diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -37918,3 +37918,7477 @@ Use FP16 precision: False 03/04/2022 18:49:20 - INFO - codeparrot_training - Skipping example with length 430 (seq_length=1024) 03/04/2022 18:49:22 - INFO - codeparrot_training - Step 24999: {'lr': 0.00047079472643895784, 'samples': 12800000, 'steps': 24999, 'loss/train': 1.3369230031967163} 03/04/2022 18:49:22 - INFO - codeparrot_training - Evaluating and saving model checkpoint +03/04/2022 18:49:37 - WARNING - huggingface_hub.repository - Several commits (5) will be pushed upstream. +03/04/2022 18:49:37 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +03/04/2022 18:50:21 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/cm_code_clippy + 3033721..20b2be6 glowing-puddle-3 -> glowing-puddle-3 + +03/04/2022 18:50:26 - INFO - codeparrot_training - Step 25000: {'lr': 0.00047079223733365234, 'samples': 12800512, 'steps': 25000, 'loss/train': 2.2720863819122314} +03/04/2022 18:50:29 - INFO - codeparrot_training - Step 25001: {'lr': 0.0004707897481288612, 'samples': 12801024, 'steps': 25001, 'loss/train': 2.1621482372283936} +03/04/2022 18:50:29 - INFO - codeparrot_training - Skipping example with length 651 (seq_length=1024) +03/04/2022 18:50:34 - INFO - codeparrot_training - Step 25002: {'lr': 0.00047078725882458575, 'samples': 12801536, 'steps': 25002, 'loss/train': 1.4488753080368042} +03/04/2022 18:50:38 - INFO - codeparrot_training - Step 25003: {'lr': 0.0004707847694208269, 'samples': 12802048, 'steps': 25003, 'loss/train': 0.9972745180130005} +03/04/2022 18:50:38 - INFO - codeparrot_training - Skipping example with length 814 (seq_length=1024) +03/04/2022 18:50:43 - INFO - codeparrot_training - Step 25004: {'lr': 0.0004707822799175858, 'samples': 12802560, 'steps': 25004, 'loss/train': 2.1641221046447754} +03/04/2022 18:50:46 - INFO - codeparrot_training - Step 25005: {'lr': 0.00047077979031486363, 'samples': 12803072, 'steps': 25005, 'loss/train': 1.856127142906189} +03/04/2022 18:50:46 - INFO - codeparrot_training - Skipping example with length 560 (seq_length=1024) +03/04/2022 18:50:51 - INFO - codeparrot_training - Step 25006: {'lr': 0.0004707773006126615, 'samples': 12803584, 'steps': 25006, 'loss/train': 1.4223352670669556} +03/04/2022 18:50:54 - INFO - codeparrot_training - Step 25007: {'lr': 0.0004707748108109805, 'samples': 12804096, 'steps': 25007, 'loss/train': 0.637409508228302} +03/04/2022 18:50:55 - INFO - codeparrot_training - Skipping example with length 131 (seq_length=1024) +03/04/2022 18:51:00 - INFO - codeparrot_training - Step 25008: {'lr': 0.0004707723209098218, 'samples': 12804608, 'steps': 25008, 'loss/train': 2.6404271125793457} +03/04/2022 18:51:03 - INFO - codeparrot_training - Step 25009: {'lr': 0.0004707698309091865, 'samples': 12805120, 'steps': 25009, 'loss/train': 1.7266995906829834} +03/04/2022 18:51:04 - INFO - codeparrot_training - Skipping example with length 362 (seq_length=1024) +03/04/2022 18:51:08 - INFO - codeparrot_training - Step 25010: {'lr': 0.00047076734080907576, 'samples': 12805632, 'steps': 25010, 'loss/train': 1.8949495553970337} +03/04/2022 18:51:12 - INFO - codeparrot_training - Step 25011: {'lr': 0.0004707648506094906, 'samples': 12806144, 'steps': 25011, 'loss/train': 1.8175896406173706} +03/04/2022 18:51:12 - INFO - codeparrot_training - Skipping example with length 514 (seq_length=1024) +03/04/2022 18:51:17 - INFO - codeparrot_training - Step 25012: {'lr': 0.0004707623603104322, 'samples': 12806656, 'steps': 25012, 'loss/train': 1.9081592559814453} +03/04/2022 18:51:20 - INFO - codeparrot_training - Step 25013: {'lr': 0.0004707598699119018, 'samples': 12807168, 'steps': 25013, 'loss/train': 2.038910388946533} +03/04/2022 18:51:21 - INFO - codeparrot_training - Skipping example with length 580 (seq_length=1024) +03/04/2022 18:51:25 - INFO - codeparrot_training - Step 25014: {'lr': 0.0004707573794139003, 'samples': 12807680, 'steps': 25014, 'loss/train': 2.047226905822754} +03/04/2022 18:51:28 - INFO - codeparrot_training - Step 25015: {'lr': 0.0004707548888164289, 'samples': 12808192, 'steps': 25015, 'loss/train': 1.7910176515579224} +03/04/2022 18:51:29 - INFO - codeparrot_training - Skipping example with length 532 (seq_length=1024) +03/04/2022 18:51:34 - INFO - codeparrot_training - Step 25016: {'lr': 0.0004707523981194889, 'samples': 12808704, 'steps': 25016, 'loss/train': 3.326096296310425} +03/04/2022 18:51:37 - INFO - codeparrot_training - Step 25017: {'lr': 0.00047074990732308116, 'samples': 12809216, 'steps': 25017, 'loss/train': 1.117077112197876} +03/04/2022 18:51:37 - INFO - codeparrot_training - Skipping example with length 168 (seq_length=1024) +03/04/2022 18:51:42 - INFO - codeparrot_training - Step 25018: {'lr': 0.00047074741642720694, 'samples': 12809728, 'steps': 25018, 'loss/train': 0.4787640869617462} +03/04/2022 18:51:45 - INFO - codeparrot_training - Step 25019: {'lr': 0.0004707449254318673, 'samples': 12810240, 'steps': 25019, 'loss/train': 2.1188161373138428} +03/04/2022 18:51:46 - INFO - codeparrot_training - Skipping example with length 125 (seq_length=1024) +03/04/2022 18:51:51 - INFO - codeparrot_training - Step 25020: {'lr': 0.0004707424343370635, 'samples': 12810752, 'steps': 25020, 'loss/train': 1.9546939134597778} +03/04/2022 18:51:54 - INFO - codeparrot_training - Step 25021: {'lr': 0.00047073994314279647, 'samples': 12811264, 'steps': 25021, 'loss/train': 1.2242841720581055} +03/04/2022 18:51:55 - INFO - codeparrot_training - Skipping example with length 776 (seq_length=1024) +03/04/2022 18:51:59 - INFO - codeparrot_training - Step 25022: {'lr': 0.0004707374518490675, 'samples': 12811776, 'steps': 25022, 'loss/train': 1.6900838613510132} +03/04/2022 18:52:02 - INFO - codeparrot_training - Step 25023: {'lr': 0.0004707349604558776, 'samples': 12812288, 'steps': 25023, 'loss/train': 1.8855787515640259} +03/04/2022 18:52:03 - INFO - codeparrot_training - Skipping example with length 118 (seq_length=1024) +03/04/2022 18:52:07 - INFO - codeparrot_training - Step 25024: {'lr': 0.00047073246896322797, 'samples': 12812800, 'steps': 25024, 'loss/train': 1.83464515209198} +03/04/2022 18:52:11 - INFO - codeparrot_training - Step 25025: {'lr': 0.00047072997737111966, 'samples': 12813312, 'steps': 25025, 'loss/train': 0.9294046759605408} +03/04/2022 18:52:12 - INFO - codeparrot_training - Skipping example with length 117 (seq_length=1024) +03/04/2022 18:52:16 - INFO - codeparrot_training - Step 25026: {'lr': 0.0004707274856795538, 'samples': 12813824, 'steps': 25026, 'loss/train': 1.5606272220611572} +03/04/2022 18:52:19 - INFO - codeparrot_training - Step 25027: {'lr': 0.00047072499388853164, 'samples': 12814336, 'steps': 25027, 'loss/train': 1.9171658754348755} +03/04/2022 18:52:20 - INFO - codeparrot_training - Skipping example with length 502 (seq_length=1024) +03/04/2022 18:52:24 - INFO - codeparrot_training - Step 25028: {'lr': 0.0004707225019980541, 'samples': 12814848, 'steps': 25028, 'loss/train': 1.8090293407440186} +03/04/2022 18:52:28 - INFO - codeparrot_training - Step 25029: {'lr': 0.00047072001000812247, 'samples': 12815360, 'steps': 25029, 'loss/train': 2.993135929107666} +03/04/2022 18:52:29 - INFO - codeparrot_training - Skipping example with length 244 (seq_length=1024) +03/04/2022 18:52:33 - INFO - codeparrot_training - Step 25030: {'lr': 0.00047071751791873774, 'samples': 12815872, 'steps': 25030, 'loss/train': 1.5688202381134033} +03/04/2022 18:52:36 - INFO - codeparrot_training - Step 25031: {'lr': 0.0004707150257299012, 'samples': 12816384, 'steps': 25031, 'loss/train': 1.249319076538086} +03/04/2022 18:52:37 - INFO - codeparrot_training - Skipping example with length 206 (seq_length=1024) +03/04/2022 18:52:41 - INFO - codeparrot_training - Step 25032: {'lr': 0.0004707125334416138, 'samples': 12816896, 'steps': 25032, 'loss/train': 1.584047794342041} +03/04/2022 18:52:45 - INFO - codeparrot_training - Step 25033: {'lr': 0.00047071004105387677, 'samples': 12817408, 'steps': 25033, 'loss/train': 2.4073853492736816} +03/04/2022 18:52:46 - INFO - codeparrot_training - Skipping example with length 678 (seq_length=1024) +03/04/2022 18:52:50 - INFO - codeparrot_training - Step 25034: {'lr': 0.00047070754856669115, 'samples': 12817920, 'steps': 25034, 'loss/train': 0.6741406321525574} +03/04/2022 18:52:53 - INFO - codeparrot_training - Step 25035: {'lr': 0.0004707050559800582, 'samples': 12818432, 'steps': 25035, 'loss/train': 1.9144023656845093} +03/04/2022 18:52:54 - INFO - codeparrot_training - Skipping example with length 498 (seq_length=1024) +03/04/2022 18:52:58 - INFO - codeparrot_training - Step 25036: {'lr': 0.00047070256329397893, 'samples': 12818944, 'steps': 25036, 'loss/train': 1.4518067836761475} +03/04/2022 18:53:02 - INFO - codeparrot_training - Step 25037: {'lr': 0.0004707000705084545, 'samples': 12819456, 'steps': 25037, 'loss/train': 1.837268590927124} +03/04/2022 18:53:03 - INFO - codeparrot_training - Skipping example with length 839 (seq_length=1024) +03/04/2022 18:53:07 - INFO - codeparrot_training - Step 25038: {'lr': 0.000470697577623486, 'samples': 12819968, 'steps': 25038, 'loss/train': 2.016571283340454} +03/04/2022 18:53:10 - INFO - codeparrot_training - Step 25039: {'lr': 0.0004706950846390746, 'samples': 12820480, 'steps': 25039, 'loss/train': 2.1503210067749023} +03/04/2022 18:53:13 - INFO - codeparrot_training - Skipping example with length 744 (seq_length=1024) +03/04/2022 18:53:16 - INFO - codeparrot_training - Step 25040: {'lr': 0.00047069259155522135, 'samples': 12820992, 'steps': 25040, 'loss/train': 2.202559471130371} +03/04/2022 18:53:19 - INFO - codeparrot_training - Step 25041: {'lr': 0.0004706900983719274, 'samples': 12821504, 'steps': 25041, 'loss/train': 2.034719467163086} +03/04/2022 18:53:22 - INFO - codeparrot_training - Step 25042: {'lr': 0.000470687605089194, 'samples': 12822016, 'steps': 25042, 'loss/train': 1.992671251296997} +03/04/2022 18:53:22 - INFO - codeparrot_training - Skipping example with length 604 (seq_length=1024) +03/04/2022 18:53:27 - INFO - codeparrot_training - Step 25043: {'lr': 0.0004706851117070221, 'samples': 12822528, 'steps': 25043, 'loss/train': 0.6675636768341064} +03/04/2022 18:53:30 - INFO - codeparrot_training - Step 25044: {'lr': 0.0004706826182254129, 'samples': 12823040, 'steps': 25044, 'loss/train': 2.1059088706970215} +03/04/2022 18:53:30 - INFO - codeparrot_training - Skipping example with length 680 (seq_length=1024) +03/04/2022 18:53:36 - INFO - codeparrot_training - Step 25045: {'lr': 0.0004706801246443676, 'samples': 12823552, 'steps': 25045, 'loss/train': 1.8442326784133911} +03/04/2022 18:53:39 - INFO - codeparrot_training - Step 25046: {'lr': 0.00047067763096388717, 'samples': 12824064, 'steps': 25046, 'loss/train': 2.0886495113372803} +03/04/2022 18:53:39 - INFO - codeparrot_training - Skipping example with length 114 (seq_length=1024) +03/04/2022 18:53:44 - INFO - codeparrot_training - Step 25047: {'lr': 0.00047067513718397283, 'samples': 12824576, 'steps': 25047, 'loss/train': 2.031420946121216} +03/04/2022 18:53:48 - INFO - codeparrot_training - Step 25048: {'lr': 0.0004706726433046256, 'samples': 12825088, 'steps': 25048, 'loss/train': 1.1181421279907227} +03/04/2022 18:53:48 - INFO - codeparrot_training - Skipping example with length 786 (seq_length=1024) +03/04/2022 18:53:53 - INFO - codeparrot_training - Step 25049: {'lr': 0.00047067014932584674, 'samples': 12825600, 'steps': 25049, 'loss/train': 1.6002109050750732} +03/04/2022 18:53:56 - INFO - codeparrot_training - Step 25050: {'lr': 0.0004706676552476373, 'samples': 12826112, 'steps': 25050, 'loss/train': 2.216784715652466} +03/04/2022 18:53:56 - INFO - codeparrot_training - Skipping example with length 327 (seq_length=1024) +03/04/2022 18:54:01 - INFO - codeparrot_training - Step 25051: {'lr': 0.0004706651610699985, 'samples': 12826624, 'steps': 25051, 'loss/train': 2.6303858757019043} +03/04/2022 18:54:04 - INFO - codeparrot_training - Step 25052: {'lr': 0.00047066266679293125, 'samples': 12827136, 'steps': 25052, 'loss/train': 1.6276215314865112} +03/04/2022 18:54:04 - INFO - codeparrot_training - Skipping example with length 951 (seq_length=1024) +03/04/2022 18:54:10 - INFO - codeparrot_training - Step 25053: {'lr': 0.0004706601724164369, 'samples': 12827648, 'steps': 25053, 'loss/train': 2.3101158142089844} +03/04/2022 18:54:13 - INFO - codeparrot_training - Step 25054: {'lr': 0.0004706576779405165, 'samples': 12828160, 'steps': 25054, 'loss/train': 1.8676996231079102} +03/04/2022 18:54:13 - INFO - codeparrot_training - Skipping example with length 257 (seq_length=1024) +03/04/2022 18:54:18 - INFO - codeparrot_training - Step 25055: {'lr': 0.0004706551833651711, 'samples': 12828672, 'steps': 25055, 'loss/train': 2.157733678817749} +03/04/2022 18:54:22 - INFO - codeparrot_training - Step 25056: {'lr': 0.0004706526886904019, 'samples': 12829184, 'steps': 25056, 'loss/train': 1.7926851511001587} +03/04/2022 18:54:22 - INFO - codeparrot_training - Skipping example with length 976 (seq_length=1024) +03/04/2022 18:54:27 - INFO - codeparrot_training - Step 25057: {'lr': 0.00047065019391621, 'samples': 12829696, 'steps': 25057, 'loss/train': 2.1089589595794678} +03/04/2022 18:54:30 - INFO - codeparrot_training - Skipping example with length 75 (seq_length=1024) +03/04/2022 18:54:32 - INFO - codeparrot_training - Step 25058: {'lr': 0.0004706476990425965, 'samples': 12830208, 'steps': 25058, 'loss/train': 1.7082737684249878} +03/04/2022 18:54:35 - INFO - codeparrot_training - Step 25059: {'lr': 0.0004706452040695626, 'samples': 12830720, 'steps': 25059, 'loss/train': 1.017737627029419} +03/04/2022 18:54:38 - INFO - codeparrot_training - Skipping example with length 667 (seq_length=1024) +03/04/2022 18:54:41 - INFO - codeparrot_training - Step 25060: {'lr': 0.0004706427089971093, 'samples': 12831232, 'steps': 25060, 'loss/train': 1.5445334911346436} +03/04/2022 18:54:44 - INFO - codeparrot_training - Step 25061: {'lr': 0.0004706402138252379, 'samples': 12831744, 'steps': 25061, 'loss/train': 1.8106290102005005} +03/04/2022 18:54:46 - INFO - codeparrot_training - Skipping example with length 821 (seq_length=1024) +03/04/2022 18:54:49 - INFO - codeparrot_training - Step 25062: {'lr': 0.00047063771855394935, 'samples': 12832256, 'steps': 25062, 'loss/train': 1.4975013732910156} +03/04/2022 18:54:52 - INFO - codeparrot_training - Step 25063: {'lr': 0.00047063522318324484, 'samples': 12832768, 'steps': 25063, 'loss/train': 1.8618700504302979} +03/04/2022 18:54:55 - INFO - codeparrot_training - Skipping example with length 588 (seq_length=1024) +03/04/2022 18:54:58 - INFO - codeparrot_training - Step 25064: {'lr': 0.00047063272771312556, 'samples': 12833280, 'steps': 25064, 'loss/train': 1.593176245689392} +03/04/2022 18:55:01 - INFO - codeparrot_training - Step 25065: {'lr': 0.0004706302321435926, 'samples': 12833792, 'steps': 25065, 'loss/train': 1.29747474193573} +03/04/2022 18:55:03 - INFO - codeparrot_training - Skipping example with length 168 (seq_length=1024) +03/04/2022 18:55:06 - INFO - codeparrot_training - Step 25066: {'lr': 0.00047062773647464694, 'samples': 12834304, 'steps': 25066, 'loss/train': 1.6648106575012207} +03/04/2022 18:55:09 - INFO - codeparrot_training - Step 25067: {'lr': 0.00047062524070628993, 'samples': 12834816, 'steps': 25067, 'loss/train': 1.6909915208816528} +03/04/2022 18:55:11 - INFO - codeparrot_training - Skipping example with length 48 (seq_length=1024) +03/04/2022 18:55:14 - INFO - codeparrot_training - Step 25068: {'lr': 0.00047062274483852253, 'samples': 12835328, 'steps': 25068, 'loss/train': 1.6439026594161987} +03/04/2022 18:55:17 - INFO - codeparrot_training - Step 25069: {'lr': 0.000470620248871346, 'samples': 12835840, 'steps': 25069, 'loss/train': 2.0186049938201904} +03/04/2022 18:55:20 - INFO - codeparrot_training - Skipping example with length 381 (seq_length=1024) +03/04/2022 18:55:23 - INFO - codeparrot_training - Step 25070: {'lr': 0.00047061775280476134, 'samples': 12836352, 'steps': 25070, 'loss/train': 1.5581883192062378} +03/04/2022 18:55:26 - INFO - codeparrot_training - Step 25071: {'lr': 0.0004706152566387697, 'samples': 12836864, 'steps': 25071, 'loss/train': 2.0358872413635254} +03/04/2022 18:55:28 - INFO - codeparrot_training - Skipping example with length 671 (seq_length=1024) +03/04/2022 18:55:31 - INFO - codeparrot_training - Step 25072: {'lr': 0.0004706127603733723, 'samples': 12837376, 'steps': 25072, 'loss/train': 2.5719916820526123} +03/04/2022 18:55:35 - INFO - codeparrot_training - Step 25073: {'lr': 0.00047061026400857015, 'samples': 12837888, 'steps': 25073, 'loss/train': 2.193042755126953} +03/04/2022 18:55:37 - INFO - codeparrot_training - Skipping example with length 476 (seq_length=1024) +03/04/2022 18:55:40 - INFO - codeparrot_training - Step 25074: {'lr': 0.0004706077675443644, 'samples': 12838400, 'steps': 25074, 'loss/train': 1.1790868043899536} +03/04/2022 18:55:43 - INFO - codeparrot_training - Step 25075: {'lr': 0.00047060527098075625, 'samples': 12838912, 'steps': 25075, 'loss/train': 0.6706814169883728} +03/04/2022 18:55:45 - INFO - codeparrot_training - Skipping example with length 362 (seq_length=1024) +03/04/2022 18:55:48 - INFO - codeparrot_training - Step 25076: {'lr': 0.0004706027743177467, 'samples': 12839424, 'steps': 25076, 'loss/train': 1.542373776435852} +03/04/2022 18:55:51 - INFO - codeparrot_training - Step 25077: {'lr': 0.000470600277555337, 'samples': 12839936, 'steps': 25077, 'loss/train': 1.8712016344070435} +03/04/2022 18:55:54 - INFO - codeparrot_training - Skipping example with length 820 (seq_length=1024) +03/04/2022 18:55:57 - INFO - codeparrot_training - Step 25078: {'lr': 0.0004705977806935282, 'samples': 12840448, 'steps': 25078, 'loss/train': 2.2446393966674805} +03/04/2022 18:56:00 - INFO - codeparrot_training - Step 25079: {'lr': 0.00047059528373232147, 'samples': 12840960, 'steps': 25079, 'loss/train': 0.7199758291244507} +03/04/2022 18:56:02 - INFO - codeparrot_training - Skipping example with length 898 (seq_length=1024) +03/04/2022 18:56:05 - INFO - codeparrot_training - Step 25080: {'lr': 0.0004705927866717179, 'samples': 12841472, 'steps': 25080, 'loss/train': 2.435274124145508} +03/04/2022 18:56:08 - INFO - codeparrot_training - Step 25081: {'lr': 0.0004705902895117186, 'samples': 12841984, 'steps': 25081, 'loss/train': 2.277270555496216} +03/04/2022 18:56:11 - INFO - codeparrot_training - Skipping example with length 139 (seq_length=1024) +03/04/2022 18:56:14 - INFO - codeparrot_training - Step 25082: {'lr': 0.00047058779225232474, 'samples': 12842496, 'steps': 25082, 'loss/train': 2.2829880714416504} +03/04/2022 18:56:17 - INFO - codeparrot_training - Step 25083: {'lr': 0.0004705852948935374, 'samples': 12843008, 'steps': 25083, 'loss/train': 2.3227596282958984} +03/04/2022 18:56:19 - INFO - codeparrot_training - Skipping example with length 922 (seq_length=1024) +03/04/2022 18:56:22 - INFO - codeparrot_training - Step 25084: {'lr': 0.00047058279743535775, 'samples': 12843520, 'steps': 25084, 'loss/train': 2.129922389984131} +03/04/2022 18:56:25 - INFO - codeparrot_training - Step 25085: {'lr': 0.0004705802998777869, 'samples': 12844032, 'steps': 25085, 'loss/train': 1.6463955640792847} +03/04/2022 18:56:27 - INFO - codeparrot_training - Skipping example with length 143 (seq_length=1024) +03/04/2022 18:56:30 - INFO - codeparrot_training - Step 25086: {'lr': 0.0004705778022208259, 'samples': 12844544, 'steps': 25086, 'loss/train': 0.408038467168808} +03/04/2022 18:56:34 - INFO - codeparrot_training - Step 25087: {'lr': 0.000470575304464476, 'samples': 12845056, 'steps': 25087, 'loss/train': 1.2450834512710571} +03/04/2022 18:56:35 - INFO - codeparrot_training - Skipping example with length 998 (seq_length=1024) +03/04/2022 18:56:39 - INFO - codeparrot_training - Step 25088: {'lr': 0.00047057280660873835, 'samples': 12845568, 'steps': 25088, 'loss/train': 1.9729628562927246} +03/04/2022 18:56:42 - INFO - codeparrot_training - Step 25089: {'lr': 0.00047057030865361397, 'samples': 12846080, 'steps': 25089, 'loss/train': 2.626375198364258} +03/04/2022 18:56:44 - INFO - codeparrot_training - Skipping example with length 643 (seq_length=1024) +03/04/2022 18:56:47 - INFO - codeparrot_training - Step 25090: {'lr': 0.0004705678105991039, 'samples': 12846592, 'steps': 25090, 'loss/train': 1.944675087928772} +03/04/2022 18:56:50 - INFO - codeparrot_training - Step 25091: {'lr': 0.00047056531244520945, 'samples': 12847104, 'steps': 25091, 'loss/train': 1.8182783126831055} +03/04/2022 18:56:52 - INFO - codeparrot_training - Skipping example with length 484 (seq_length=1024) +03/04/2022 18:56:56 - INFO - codeparrot_training - Step 25092: {'lr': 0.0004705628141919317, 'samples': 12847616, 'steps': 25092, 'loss/train': 1.8274507522583008} +03/04/2022 18:56:59 - INFO - codeparrot_training - Step 25093: {'lr': 0.00047056031583927175, 'samples': 12848128, 'steps': 25093, 'loss/train': 1.9040497541427612} +03/04/2022 18:57:01 - INFO - codeparrot_training - Skipping example with length 903 (seq_length=1024) +03/04/2022 18:57:04 - INFO - codeparrot_training - Step 25094: {'lr': 0.00047055781738723063, 'samples': 12848640, 'steps': 25094, 'loss/train': 1.5787038803100586} +03/04/2022 18:57:07 - INFO - codeparrot_training - Step 25095: {'lr': 0.0004705553188358096, 'samples': 12849152, 'steps': 25095, 'loss/train': 1.8234866857528687} +03/04/2022 18:57:09 - INFO - codeparrot_training - Skipping example with length 222 (seq_length=1024) +03/04/2022 18:57:13 - INFO - codeparrot_training - Step 25096: {'lr': 0.00047055282018500976, 'samples': 12849664, 'steps': 25096, 'loss/train': 0.7234331369400024} +03/04/2022 18:57:16 - INFO - codeparrot_training - Step 25097: {'lr': 0.0004705503214348323, 'samples': 12850176, 'steps': 25097, 'loss/train': 2.4037978649139404} +03/04/2022 18:57:18 - INFO - codeparrot_training - Skipping example with length 215 (seq_length=1024) +03/04/2022 18:57:21 - INFO - codeparrot_training - Step 25098: {'lr': 0.0004705478225852782, 'samples': 12850688, 'steps': 25098, 'loss/train': 0.9351075291633606} +03/04/2022 18:57:24 - INFO - codeparrot_training - Step 25099: {'lr': 0.0004705453236363486, 'samples': 12851200, 'steps': 25099, 'loss/train': 1.5597867965698242} +03/04/2022 18:57:26 - INFO - codeparrot_training - Skipping example with length 276 (seq_length=1024) +03/04/2022 18:57:30 - INFO - codeparrot_training - Step 25100: {'lr': 0.00047054282458804477, 'samples': 12851712, 'steps': 25100, 'loss/train': 1.3071184158325195} +03/04/2022 18:57:33 - INFO - codeparrot_training - Step 25101: {'lr': 0.0004705403254403677, 'samples': 12852224, 'steps': 25101, 'loss/train': 0.7174893021583557} +03/04/2022 18:57:35 - INFO - codeparrot_training - Skipping example with length 296 (seq_length=1024) +03/04/2022 18:57:38 - INFO - codeparrot_training - Step 25102: {'lr': 0.0004705378261933186, 'samples': 12852736, 'steps': 25102, 'loss/train': 1.067669153213501} +03/04/2022 18:57:41 - INFO - codeparrot_training - Step 25103: {'lr': 0.0004705353268468985, 'samples': 12853248, 'steps': 25103, 'loss/train': 1.7714552879333496} +03/04/2022 18:57:44 - INFO - codeparrot_training - Skipping example with length 537 (seq_length=1024) +03/04/2022 18:57:47 - INFO - codeparrot_training - Step 25104: {'lr': 0.00047053282740110863, 'samples': 12853760, 'steps': 25104, 'loss/train': 2.1500978469848633} +03/04/2022 18:57:50 - INFO - codeparrot_training - Step 25105: {'lr': 0.00047053032785595005, 'samples': 12854272, 'steps': 25105, 'loss/train': 1.275953769683838} +03/04/2022 18:57:52 - INFO - codeparrot_training - Skipping example with length 430 (seq_length=1024) +03/04/2022 18:57:55 - INFO - codeparrot_training - Step 25106: {'lr': 0.0004705278282114239, 'samples': 12854784, 'steps': 25106, 'loss/train': 1.6331745386123657} +03/04/2022 18:57:58 - INFO - codeparrot_training - Step 25107: {'lr': 0.0004705253284675314, 'samples': 12855296, 'steps': 25107, 'loss/train': 1.9976993799209595} +03/04/2022 18:58:01 - INFO - codeparrot_training - Skipping example with length 9 (seq_length=1024) +03/04/2022 18:58:04 - INFO - codeparrot_training - Step 25108: {'lr': 0.00047052282862427355, 'samples': 12855808, 'steps': 25108, 'loss/train': 1.6986401081085205} +03/04/2022 18:58:07 - INFO - codeparrot_training - Step 25109: {'lr': 0.0004705203286816514, 'samples': 12856320, 'steps': 25109, 'loss/train': 2.0565803050994873} +03/04/2022 18:58:09 - INFO - codeparrot_training - Skipping example with length 885 (seq_length=1024) +03/04/2022 18:58:12 - INFO - codeparrot_training - Step 25110: {'lr': 0.0004705178286396663, 'samples': 12856832, 'steps': 25110, 'loss/train': 2.383732795715332} +03/04/2022 18:58:15 - INFO - codeparrot_training - Step 25111: {'lr': 0.0004705153284983192, 'samples': 12857344, 'steps': 25111, 'loss/train': 2.3178975582122803} +03/04/2022 18:58:19 - INFO - codeparrot_training - Step 25112: {'lr': 0.00047051282825761145, 'samples': 12857856, 'steps': 25112, 'loss/train': 6.633519172668457} +03/04/2022 18:58:19 - INFO - codeparrot_training - Skipping example with length 157 (seq_length=1024) +03/04/2022 18:58:24 - INFO - codeparrot_training - Step 25113: {'lr': 0.0004705103279175439, 'samples': 12858368, 'steps': 25113, 'loss/train': 1.6179064512252808} +03/04/2022 18:58:27 - INFO - codeparrot_training - Step 25114: {'lr': 0.0004705078274781178, 'samples': 12858880, 'steps': 25114, 'loss/train': 2.0531108379364014} +03/04/2022 18:58:27 - INFO - codeparrot_training - Skipping example with length 561 (seq_length=1024) +03/04/2022 18:58:32 - INFO - codeparrot_training - Step 25115: {'lr': 0.0004705053269393343, 'samples': 12859392, 'steps': 25115, 'loss/train': 1.9238201379776} +03/04/2022 18:58:35 - INFO - codeparrot_training - Step 25116: {'lr': 0.00047050282630119444, 'samples': 12859904, 'steps': 25116, 'loss/train': 1.5421780347824097} +03/04/2022 18:58:36 - INFO - codeparrot_training - Skipping example with length 225 (seq_length=1024) +03/04/2022 18:58:41 - INFO - codeparrot_training - Step 25117: {'lr': 0.0004705003255636995, 'samples': 12860416, 'steps': 25117, 'loss/train': 0.8592211008071899} +03/04/2022 18:58:44 - INFO - codeparrot_training - Step 25118: {'lr': 0.0004704978247268505, 'samples': 12860928, 'steps': 25118, 'loss/train': 0.7966042757034302} +03/04/2022 18:58:44 - INFO - codeparrot_training - Skipping example with length 496 (seq_length=1024) +03/04/2022 18:58:49 - INFO - codeparrot_training - Step 25119: {'lr': 0.0004704953237906485, 'samples': 12861440, 'steps': 25119, 'loss/train': 0.9785674214363098} +03/04/2022 18:58:52 - INFO - codeparrot_training - Step 25120: {'lr': 0.0004704928227550949, 'samples': 12861952, 'steps': 25120, 'loss/train': 2.816507577896118} +03/04/2022 18:58:52 - INFO - codeparrot_training - Skipping example with length 993 (seq_length=1024) +03/04/2022 18:58:58 - INFO - codeparrot_training - Step 25121: {'lr': 0.00047049032162019044, 'samples': 12862464, 'steps': 25121, 'loss/train': 0.7571011185646057} +03/04/2022 18:59:01 - INFO - codeparrot_training - Step 25122: {'lr': 0.0004704878203859365, 'samples': 12862976, 'steps': 25122, 'loss/train': 2.089536666870117} +03/04/2022 18:59:01 - INFO - codeparrot_training - Skipping example with length 953 (seq_length=1024) +03/04/2022 18:59:06 - INFO - codeparrot_training - Step 25123: {'lr': 0.0004704853190523342, 'samples': 12863488, 'steps': 25123, 'loss/train': 1.6418256759643555} +03/04/2022 18:59:09 - INFO - codeparrot_training - Step 25124: {'lr': 0.00047048281761938456, 'samples': 12864000, 'steps': 25124, 'loss/train': 2.2529735565185547} +03/04/2022 18:59:09 - INFO - codeparrot_training - Skipping example with length 338 (seq_length=1024) +03/04/2022 18:59:15 - INFO - codeparrot_training - Step 25125: {'lr': 0.00047048031608708875, 'samples': 12864512, 'steps': 25125, 'loss/train': 1.518971562385559} +03/04/2022 18:59:18 - INFO - codeparrot_training - Step 25126: {'lr': 0.000470477814455448, 'samples': 12865024, 'steps': 25126, 'loss/train': 0.5627090930938721} +03/04/2022 18:59:18 - INFO - codeparrot_training - Skipping example with length 407 (seq_length=1024) +03/04/2022 18:59:23 - INFO - codeparrot_training - Step 25127: {'lr': 0.0004704753127244633, 'samples': 12865536, 'steps': 25127, 'loss/train': 1.6445062160491943} +03/04/2022 18:59:26 - INFO - codeparrot_training - Step 25128: {'lr': 0.0004704728108941358, 'samples': 12866048, 'steps': 25128, 'loss/train': 1.6332228183746338} +03/04/2022 18:59:26 - INFO - codeparrot_training - Skipping example with length 523 (seq_length=1024) +03/04/2022 18:59:32 - INFO - codeparrot_training - Step 25129: {'lr': 0.00047047030896446665, 'samples': 12866560, 'steps': 25129, 'loss/train': 1.6114519834518433} +03/04/2022 18:59:35 - INFO - codeparrot_training - Step 25130: {'lr': 0.000470467806935457, 'samples': 12867072, 'steps': 25130, 'loss/train': 1.2869783639907837} +03/04/2022 18:59:35 - INFO - codeparrot_training - Skipping example with length 590 (seq_length=1024) +03/04/2022 18:59:40 - INFO - codeparrot_training - Step 25131: {'lr': 0.000470465304807108, 'samples': 12867584, 'steps': 25131, 'loss/train': 2.123750925064087} +03/04/2022 18:59:43 - INFO - codeparrot_training - Skipping example with length 602 (seq_length=1024) +03/04/2022 18:59:45 - INFO - codeparrot_training - Step 25132: {'lr': 0.00047046280257942067, 'samples': 12868096, 'steps': 25132, 'loss/train': 1.4471428394317627} +03/04/2022 18:59:49 - INFO - codeparrot_training - Step 25133: {'lr': 0.0004704603002523962, 'samples': 12868608, 'steps': 25133, 'loss/train': 2.197831153869629} +03/04/2022 18:59:52 - INFO - codeparrot_training - Step 25134: {'lr': 0.00047045779782603584, 'samples': 12869120, 'steps': 25134, 'loss/train': 2.5113508701324463} +03/04/2022 18:59:52 - INFO - codeparrot_training - Skipping example with length 267 (seq_length=1024) +03/04/2022 18:59:57 - INFO - codeparrot_training - Step 25135: {'lr': 0.0004704552953003405, 'samples': 12869632, 'steps': 25135, 'loss/train': 1.752384066581726} +03/04/2022 19:00:00 - INFO - codeparrot_training - Skipping example with length 370 (seq_length=1024) +03/04/2022 19:00:03 - INFO - codeparrot_training - Step 25136: {'lr': 0.0004704527926753114, 'samples': 12870144, 'steps': 25136, 'loss/train': 1.596863865852356} +03/04/2022 19:00:06 - INFO - codeparrot_training - Step 25137: {'lr': 0.00047045028995094967, 'samples': 12870656, 'steps': 25137, 'loss/train': 1.8330895900726318} +03/04/2022 19:00:09 - INFO - codeparrot_training - Step 25138: {'lr': 0.0004704477871272564, 'samples': 12871168, 'steps': 25138, 'loss/train': 0.15386617183685303} +03/04/2022 19:00:09 - INFO - codeparrot_training - Skipping example with length 142 (seq_length=1024) +03/04/2022 19:00:14 - INFO - codeparrot_training - Step 25139: {'lr': 0.0004704452842042329, 'samples': 12871680, 'steps': 25139, 'loss/train': 1.7043108940124512} +03/04/2022 19:00:17 - INFO - codeparrot_training - Step 25140: {'lr': 0.00047044278118188004, 'samples': 12872192, 'steps': 25140, 'loss/train': 2.0515506267547607} +03/04/2022 19:00:17 - INFO - codeparrot_training - Skipping example with length 970 (seq_length=1024) +03/04/2022 19:00:23 - INFO - codeparrot_training - Step 25141: {'lr': 0.00047044027806019914, 'samples': 12872704, 'steps': 25141, 'loss/train': 1.908578634262085} +03/04/2022 19:00:26 - INFO - codeparrot_training - Step 25142: {'lr': 0.0004704377748391912, 'samples': 12873216, 'steps': 25142, 'loss/train': 1.8420010805130005} +03/04/2022 19:00:26 - INFO - codeparrot_training - Skipping example with length 552 (seq_length=1024) +03/04/2022 19:00:31 - INFO - codeparrot_training - Step 25143: {'lr': 0.0004704352715188574, 'samples': 12873728, 'steps': 25143, 'loss/train': 1.9331549406051636} +03/04/2022 19:00:34 - INFO - codeparrot_training - Skipping example with length 949 (seq_length=1024) +03/04/2022 19:00:37 - INFO - codeparrot_training - Step 25144: {'lr': 0.0004704327680991989, 'samples': 12874240, 'steps': 25144, 'loss/train': 1.4206304550170898} +03/04/2022 19:00:40 - INFO - codeparrot_training - Step 25145: {'lr': 0.00047043026458021677, 'samples': 12874752, 'steps': 25145, 'loss/train': 1.3277177810668945} +03/04/2022 19:00:43 - INFO - codeparrot_training - Step 25146: {'lr': 0.0004704277609619122, 'samples': 12875264, 'steps': 25146, 'loss/train': 1.930651307106018} +03/04/2022 19:00:43 - INFO - codeparrot_training - Skipping example with length 125 (seq_length=1024) +03/04/2022 19:00:48 - INFO - codeparrot_training - Step 25147: {'lr': 0.0004704252572442862, 'samples': 12875776, 'steps': 25147, 'loss/train': 1.958849310874939} +03/04/2022 19:00:51 - INFO - codeparrot_training - Step 25148: {'lr': 0.00047042275342734006, 'samples': 12876288, 'steps': 25148, 'loss/train': 1.7760303020477295} +03/04/2022 19:00:51 - INFO - codeparrot_training - Skipping example with length 545 (seq_length=1024) +03/04/2022 19:00:57 - INFO - codeparrot_training - Step 25149: {'lr': 0.0004704202495110748, 'samples': 12876800, 'steps': 25149, 'loss/train': 1.5492703914642334} +03/04/2022 19:01:00 - INFO - codeparrot_training - Step 25150: {'lr': 0.00047041774549549156, 'samples': 12877312, 'steps': 25150, 'loss/train': 1.0460854768753052} +03/04/2022 19:01:00 - INFO - codeparrot_training - Skipping example with length 304 (seq_length=1024) +03/04/2022 19:01:05 - INFO - codeparrot_training - Step 25151: {'lr': 0.00047041524138059153, 'samples': 12877824, 'steps': 25151, 'loss/train': 1.840155005455017} +03/04/2022 19:01:08 - INFO - codeparrot_training - Step 25152: {'lr': 0.00047041273716637576, 'samples': 12878336, 'steps': 25152, 'loss/train': 1.597869634628296} +03/04/2022 19:01:08 - INFO - codeparrot_training - Skipping example with length 150 (seq_length=1024) +03/04/2022 19:01:14 - INFO - codeparrot_training - Step 25153: {'lr': 0.00047041023285284545, 'samples': 12878848, 'steps': 25153, 'loss/train': 2.067610740661621} +03/04/2022 19:01:17 - INFO - codeparrot_training - Skipping example with length 376 (seq_length=1024) +03/04/2022 19:01:19 - INFO - codeparrot_training - Step 25154: {'lr': 0.0004704077284400017, 'samples': 12879360, 'steps': 25154, 'loss/train': 2.047703981399536} +03/04/2022 19:01:22 - INFO - codeparrot_training - Step 25155: {'lr': 0.0004704052239278456, 'samples': 12879872, 'steps': 25155, 'loss/train': 0.6569011807441711} +03/04/2022 19:01:25 - INFO - codeparrot_training - Step 25156: {'lr': 0.00047040271931637824, 'samples': 12880384, 'steps': 25156, 'loss/train': 1.9719747304916382} +03/04/2022 19:01:26 - INFO - codeparrot_training - Skipping example with length 280 (seq_length=1024) +03/04/2022 19:01:31 - INFO - codeparrot_training - Step 25157: {'lr': 0.0004704002146056009, 'samples': 12880896, 'steps': 25157, 'loss/train': 1.5556350946426392} +03/04/2022 19:01:34 - INFO - codeparrot_training - Skipping example with length 814 (seq_length=1024) +03/04/2022 19:01:36 - INFO - codeparrot_training - Step 25158: {'lr': 0.0004703977097955146, 'samples': 12881408, 'steps': 25158, 'loss/train': 1.2840830087661743} +03/04/2022 19:01:39 - INFO - codeparrot_training - Step 25159: {'lr': 0.0004703952048861204, 'samples': 12881920, 'steps': 25159, 'loss/train': 1.9405949115753174} +03/04/2022 19:01:42 - INFO - codeparrot_training - Step 25160: {'lr': 0.00047039269987741967, 'samples': 12882432, 'steps': 25160, 'loss/train': 1.6845134496688843} +03/04/2022 19:01:42 - INFO - codeparrot_training - Skipping example with length 222 (seq_length=1024) +03/04/2022 19:01:48 - INFO - codeparrot_training - Step 25161: {'lr': 0.0004703901947694134, 'samples': 12882944, 'steps': 25161, 'loss/train': 1.17493736743927} +03/04/2022 19:01:51 - INFO - codeparrot_training - Step 25162: {'lr': 0.0004703876895621025, 'samples': 12883456, 'steps': 25162, 'loss/train': 1.9157634973526} +03/04/2022 19:01:51 - INFO - codeparrot_training - Skipping example with length 37 (seq_length=1024) +03/04/2022 19:01:56 - INFO - codeparrot_training - Step 25163: {'lr': 0.0004703851842554885, 'samples': 12883968, 'steps': 25163, 'loss/train': 1.5450847148895264} +03/04/2022 19:01:59 - INFO - codeparrot_training - Step 25164: {'lr': 0.0004703826788495723, 'samples': 12884480, 'steps': 25164, 'loss/train': 1.941747784614563} +03/04/2022 19:01:59 - INFO - codeparrot_training - Skipping example with length 766 (seq_length=1024) +03/04/2022 19:02:05 - INFO - codeparrot_training - Step 25165: {'lr': 0.00047038017334435504, 'samples': 12884992, 'steps': 25165, 'loss/train': 1.9387246370315552} +03/04/2022 19:02:08 - INFO - codeparrot_training - Step 25166: {'lr': 0.00047037766773983794, 'samples': 12885504, 'steps': 25166, 'loss/train': 1.6091663837432861} +03/04/2022 19:02:08 - INFO - codeparrot_training - Skipping example with length 889 (seq_length=1024) +03/04/2022 19:02:13 - INFO - codeparrot_training - Step 25167: {'lr': 0.00047037516203602195, 'samples': 12886016, 'steps': 25167, 'loss/train': 1.8177059888839722} +03/04/2022 19:02:16 - INFO - codeparrot_training - Skipping example with length 655 (seq_length=1024) +03/04/2022 19:02:18 - INFO - codeparrot_training - Step 25168: {'lr': 0.0004703726562329084, 'samples': 12886528, 'steps': 25168, 'loss/train': 2.1129581928253174} +03/04/2022 19:02:22 - INFO - codeparrot_training - Step 25169: {'lr': 0.0004703701503304983, 'samples': 12887040, 'steps': 25169, 'loss/train': 1.8308436870574951} +03/04/2022 19:02:24 - INFO - codeparrot_training - Skipping example with length 545 (seq_length=1024) +03/04/2022 19:02:27 - INFO - codeparrot_training - Step 25170: {'lr': 0.0004703676443287928, 'samples': 12887552, 'steps': 25170, 'loss/train': 2.165034294128418} +03/04/2022 19:02:30 - INFO - codeparrot_training - Step 25171: {'lr': 0.000470365138227793, 'samples': 12888064, 'steps': 25171, 'loss/train': 2.0599124431610107} +03/04/2022 19:02:33 - INFO - codeparrot_training - Step 25172: {'lr': 0.0004703626320275002, 'samples': 12888576, 'steps': 25172, 'loss/train': 1.6414638757705688} +03/04/2022 19:02:33 - INFO - codeparrot_training - Skipping example with length 250 (seq_length=1024) +03/04/2022 19:02:39 - INFO - codeparrot_training - Step 25173: {'lr': 0.0004703601257279153, 'samples': 12889088, 'steps': 25173, 'loss/train': 2.1862542629241943} +03/04/2022 19:02:42 - INFO - codeparrot_training - Step 25174: {'lr': 0.0004703576193290395, 'samples': 12889600, 'steps': 25174, 'loss/train': 1.4821419715881348} +03/04/2022 19:02:42 - INFO - codeparrot_training - Skipping example with length 159 (seq_length=1024) +03/04/2022 19:02:47 - INFO - codeparrot_training - Step 25175: {'lr': 0.0004703551128308741, 'samples': 12890112, 'steps': 25175, 'loss/train': 1.5592763423919678} +03/04/2022 19:02:50 - INFO - codeparrot_training - Step 25176: {'lr': 0.00047035260623341996, 'samples': 12890624, 'steps': 25176, 'loss/train': 1.7259502410888672} +03/04/2022 19:02:50 - INFO - codeparrot_training - Skipping example with length 231 (seq_length=1024) +03/04/2022 19:02:56 - INFO - codeparrot_training - Step 25177: {'lr': 0.0004703500995366784, 'samples': 12891136, 'steps': 25177, 'loss/train': 3.2288177013397217} +03/04/2022 19:02:59 - INFO - codeparrot_training - Step 25178: {'lr': 0.00047034759274065043, 'samples': 12891648, 'steps': 25178, 'loss/train': 1.9151241779327393} +03/04/2022 19:02:59 - INFO - codeparrot_training - Skipping example with length 282 (seq_length=1024) +03/04/2022 19:03:05 - INFO - codeparrot_training - Step 25179: {'lr': 0.00047034508584533724, 'samples': 12892160, 'steps': 25179, 'loss/train': 1.3826673030853271} +03/04/2022 19:03:08 - INFO - codeparrot_training - Step 25180: {'lr': 0.00047034257885074, 'samples': 12892672, 'steps': 25180, 'loss/train': 1.7321468591690063} +03/04/2022 19:03:09 - INFO - codeparrot_training - Skipping example with length 632 (seq_length=1024) +03/04/2022 19:03:13 - INFO - codeparrot_training - Step 25181: {'lr': 0.00047034007175685976, 'samples': 12893184, 'steps': 25181, 'loss/train': 1.6443660259246826} +03/04/2022 19:03:16 - INFO - codeparrot_training - Step 25182: {'lr': 0.0004703375645636977, 'samples': 12893696, 'steps': 25182, 'loss/train': 1.483567237854004} +03/04/2022 19:03:17 - INFO - codeparrot_training - Skipping example with length 204 (seq_length=1024) +03/04/2022 19:03:22 - INFO - codeparrot_training - Step 25183: {'lr': 0.0004703350572712549, 'samples': 12894208, 'steps': 25183, 'loss/train': 1.6034239530563354} +03/04/2022 19:03:25 - INFO - codeparrot_training - Step 25184: {'lr': 0.00047033254987953254, 'samples': 12894720, 'steps': 25184, 'loss/train': 1.7478406429290771} +03/04/2022 19:03:27 - INFO - codeparrot_training - Skipping example with length 61 (seq_length=1024) +03/04/2022 19:03:30 - INFO - codeparrot_training - Step 25185: {'lr': 0.0004703300423885318, 'samples': 12895232, 'steps': 25185, 'loss/train': 1.6723581552505493} +03/04/2022 19:03:33 - INFO - codeparrot_training - Step 25186: {'lr': 0.0004703275347982536, 'samples': 12895744, 'steps': 25186, 'loss/train': 1.8128533363342285} +03/04/2022 19:03:36 - INFO - codeparrot_training - Skipping example with length 465 (seq_length=1024) +03/04/2022 19:03:39 - INFO - codeparrot_training - Step 25187: {'lr': 0.00047032502710869935, 'samples': 12896256, 'steps': 25187, 'loss/train': 0.7747647166252136} +03/04/2022 19:03:42 - INFO - codeparrot_training - Step 25188: {'lr': 0.00047032251931987, 'samples': 12896768, 'steps': 25188, 'loss/train': 1.749839425086975} +03/04/2022 19:03:44 - INFO - codeparrot_training - Skipping example with length 221 (seq_length=1024) +03/04/2022 19:03:47 - INFO - codeparrot_training - Step 25189: {'lr': 0.0004703200114317667, 'samples': 12897280, 'steps': 25189, 'loss/train': 2.5225415229797363} +03/04/2022 19:03:50 - INFO - codeparrot_training - Step 25190: {'lr': 0.0004703175034443906, 'samples': 12897792, 'steps': 25190, 'loss/train': 1.805006980895996} +03/04/2022 19:03:52 - INFO - codeparrot_training - Skipping example with length 513 (seq_length=1024) +03/04/2022 19:03:55 - INFO - codeparrot_training - Step 25191: {'lr': 0.00047031499535774284, 'samples': 12898304, 'steps': 25191, 'loss/train': 2.043449878692627} +03/04/2022 19:03:59 - INFO - codeparrot_training - Step 25192: {'lr': 0.00047031248717182455, 'samples': 12898816, 'steps': 25192, 'loss/train': 1.819394588470459} +03/04/2022 19:04:01 - INFO - codeparrot_training - Skipping example with length 249 (seq_length=1024) +03/04/2022 19:04:04 - INFO - codeparrot_training - Step 25193: {'lr': 0.00047030997888663687, 'samples': 12899328, 'steps': 25193, 'loss/train': 1.368503451347351} +03/04/2022 19:04:07 - INFO - codeparrot_training - Step 25194: {'lr': 0.00047030747050218094, 'samples': 12899840, 'steps': 25194, 'loss/train': 1.6617664098739624} +03/04/2022 19:04:09 - INFO - codeparrot_training - Skipping example with length 796 (seq_length=1024) +03/04/2022 19:04:13 - INFO - codeparrot_training - Step 25195: {'lr': 0.0004703049620184578, 'samples': 12900352, 'steps': 25195, 'loss/train': 2.135477066040039} +03/04/2022 19:04:16 - INFO - codeparrot_training - Step 25196: {'lr': 0.0004703024534354686, 'samples': 12900864, 'steps': 25196, 'loss/train': 1.9510955810546875} +03/04/2022 19:04:18 - INFO - codeparrot_training - Skipping example with length 422 (seq_length=1024) +03/04/2022 19:04:21 - INFO - codeparrot_training - Step 25197: {'lr': 0.0004702999447532146, 'samples': 12901376, 'steps': 25197, 'loss/train': 2.524611234664917} +03/04/2022 19:04:24 - INFO - codeparrot_training - Step 25198: {'lr': 0.00047029743597169684, 'samples': 12901888, 'steps': 25198, 'loss/train': 2.1209006309509277} +03/04/2022 19:04:26 - INFO - codeparrot_training - Skipping example with length 407 (seq_length=1024) +03/04/2022 19:04:30 - INFO - codeparrot_training - Step 25199: {'lr': 0.0004702949270909164, 'samples': 12902400, 'steps': 25199, 'loss/train': 1.1638249158859253} +03/04/2022 19:04:33 - INFO - codeparrot_training - Step 25200: {'lr': 0.0004702924181108745, 'samples': 12902912, 'steps': 25200, 'loss/train': 0.9145562052726746} +03/04/2022 19:04:35 - INFO - codeparrot_training - Skipping example with length 7 (seq_length=1024) +03/04/2022 19:04:38 - INFO - codeparrot_training - Step 25201: {'lr': 0.00047028990903157233, 'samples': 12903424, 'steps': 25201, 'loss/train': 2.6583075523376465} +03/04/2022 19:04:41 - INFO - codeparrot_training - Step 25202: {'lr': 0.0004702873998530108, 'samples': 12903936, 'steps': 25202, 'loss/train': 2.027649402618408} +03/04/2022 19:04:43 - INFO - codeparrot_training - Skipping example with length 115 (seq_length=1024) +03/04/2022 19:04:46 - INFO - codeparrot_training - Step 25203: {'lr': 0.0004702848905751912, 'samples': 12904448, 'steps': 25203, 'loss/train': 2.4998562335968018} +03/04/2022 19:04:50 - INFO - codeparrot_training - Step 25204: {'lr': 0.0004702823811981146, 'samples': 12904960, 'steps': 25204, 'loss/train': 1.6686561107635498} +03/04/2022 19:04:52 - INFO - codeparrot_training - Skipping example with length 368 (seq_length=1024) +03/04/2022 19:04:55 - INFO - codeparrot_training - Step 25205: {'lr': 0.0004702798717217822, 'samples': 12905472, 'steps': 25205, 'loss/train': 2.0256083011627197} +03/04/2022 19:04:58 - INFO - codeparrot_training - Step 25206: {'lr': 0.0004702773621461951, 'samples': 12905984, 'steps': 25206, 'loss/train': 2.2310848236083984} +03/04/2022 19:05:00 - INFO - codeparrot_training - Skipping example with length 225 (seq_length=1024) +03/04/2022 19:05:04 - INFO - codeparrot_training - Step 25207: {'lr': 0.0004702748524713544, 'samples': 12906496, 'steps': 25207, 'loss/train': 2.092449188232422} +03/04/2022 19:05:07 - INFO - codeparrot_training - Step 25208: {'lr': 0.00047027234269726123, 'samples': 12907008, 'steps': 25208, 'loss/train': 1.7238491773605347} +03/04/2022 19:05:10 - INFO - codeparrot_training - Skipping example with length 1011 (seq_length=1024) +03/04/2022 19:05:13 - INFO - codeparrot_training - Step 25209: {'lr': 0.0004702698328239167, 'samples': 12907520, 'steps': 25209, 'loss/train': 0.3580975830554962} +03/04/2022 19:05:16 - INFO - codeparrot_training - Step 25210: {'lr': 0.0004702673228513221, 'samples': 12908032, 'steps': 25210, 'loss/train': 1.4008368253707886} +03/04/2022 19:05:19 - INFO - codeparrot_training - Step 25211: {'lr': 0.00047026481277947835, 'samples': 12908544, 'steps': 25211, 'loss/train': 2.0943400859832764} +03/04/2022 19:05:21 - INFO - codeparrot_training - Skipping example with length 514 (seq_length=1024) +03/04/2022 19:05:24 - INFO - codeparrot_training - Step 25212: {'lr': 0.0004702623026083867, 'samples': 12909056, 'steps': 25212, 'loss/train': 2.0550129413604736} +03/04/2022 19:05:28 - INFO - codeparrot_training - Step 25213: {'lr': 0.00047025979233804825, 'samples': 12909568, 'steps': 25213, 'loss/train': 1.938783049583435} +03/04/2022 19:05:29 - INFO - codeparrot_training - Skipping example with length 288 (seq_length=1024) +03/04/2022 19:05:33 - INFO - codeparrot_training - Step 25214: {'lr': 0.00047025728196846417, 'samples': 12910080, 'steps': 25214, 'loss/train': 1.9267405271530151} +03/04/2022 19:05:36 - INFO - codeparrot_training - Step 25215: {'lr': 0.0004702547714996355, 'samples': 12910592, 'steps': 25215, 'loss/train': 2.0269620418548584} +03/04/2022 19:05:38 - INFO - codeparrot_training - Skipping example with length 311 (seq_length=1024) +03/04/2022 19:05:41 - INFO - codeparrot_training - Step 25216: {'lr': 0.00047025226093156346, 'samples': 12911104, 'steps': 25216, 'loss/train': 1.0818992853164673} +03/04/2022 19:05:44 - INFO - codeparrot_training - Step 25217: {'lr': 0.0004702497502642492, 'samples': 12911616, 'steps': 25217, 'loss/train': 1.3142998218536377} +03/04/2022 19:05:46 - INFO - codeparrot_training - Skipping example with length 69 (seq_length=1024) +03/04/2022 19:05:50 - INFO - codeparrot_training - Step 25218: {'lr': 0.0004702472394976938, 'samples': 12912128, 'steps': 25218, 'loss/train': 1.9675078392028809} +03/04/2022 19:05:53 - INFO - codeparrot_training - Step 25219: {'lr': 0.0004702447286318983, 'samples': 12912640, 'steps': 25219, 'loss/train': 1.5353033542633057} +03/04/2022 19:05:55 - INFO - codeparrot_training - Skipping example with length 621 (seq_length=1024) +03/04/2022 19:05:58 - INFO - codeparrot_training - Step 25220: {'lr': 0.0004702422176668639, 'samples': 12913152, 'steps': 25220, 'loss/train': 1.6003278493881226} +03/04/2022 19:06:01 - INFO - codeparrot_training - Step 25221: {'lr': 0.00047023970660259193, 'samples': 12913664, 'steps': 25221, 'loss/train': 2.666259527206421} +03/04/2022 19:06:03 - INFO - codeparrot_training - Skipping example with length 681 (seq_length=1024) +03/04/2022 19:06:07 - INFO - codeparrot_training - Step 25222: {'lr': 0.0004702371954390832, 'samples': 12914176, 'steps': 25222, 'loss/train': 1.8576408624649048} +03/04/2022 19:06:10 - INFO - codeparrot_training - Step 25223: {'lr': 0.00047023468417633905, 'samples': 12914688, 'steps': 25223, 'loss/train': 1.40735924243927} +03/04/2022 19:06:12 - INFO - codeparrot_training - Skipping example with length 291 (seq_length=1024) +03/04/2022 19:06:15 - INFO - codeparrot_training - Step 25224: {'lr': 0.0004702321728143605, 'samples': 12915200, 'steps': 25224, 'loss/train': 2.7171647548675537} +03/04/2022 19:06:18 - INFO - codeparrot_training - Step 25225: {'lr': 0.0004702296613531488, 'samples': 12915712, 'steps': 25225, 'loss/train': 1.6771979331970215} +03/04/2022 19:06:20 - INFO - codeparrot_training - Skipping example with length 177 (seq_length=1024) +03/04/2022 19:06:24 - INFO - codeparrot_training - Step 25226: {'lr': 0.00047022714979270497, 'samples': 12916224, 'steps': 25226, 'loss/train': 2.799003839492798} +03/04/2022 19:06:27 - INFO - codeparrot_training - Step 25227: {'lr': 0.0004702246381330302, 'samples': 12916736, 'steps': 25227, 'loss/train': 1.7640670537948608} +03/04/2022 19:06:29 - INFO - codeparrot_training - Skipping example with length 828 (seq_length=1024) +03/04/2022 19:06:32 - INFO - codeparrot_training - Step 25228: {'lr': 0.00047022212637412553, 'samples': 12917248, 'steps': 25228, 'loss/train': 1.6470550298690796} +03/04/2022 19:06:35 - INFO - codeparrot_training - Step 25229: {'lr': 0.00047021961451599226, 'samples': 12917760, 'steps': 25229, 'loss/train': 1.139792799949646} +03/04/2022 19:06:38 - INFO - codeparrot_training - Skipping example with length 16 (seq_length=1024) +03/04/2022 19:06:41 - INFO - codeparrot_training - Step 25230: {'lr': 0.00047021710255863144, 'samples': 12918272, 'steps': 25230, 'loss/train': 2.0125138759613037} +03/04/2022 19:06:44 - INFO - codeparrot_training - Step 25231: {'lr': 0.0004702145905020442, 'samples': 12918784, 'steps': 25231, 'loss/train': 1.4043655395507812} +03/04/2022 19:06:46 - INFO - codeparrot_training - Skipping example with length 516 (seq_length=1024) +03/04/2022 19:06:49 - INFO - codeparrot_training - Step 25232: {'lr': 0.0004702120783462316, 'samples': 12919296, 'steps': 25232, 'loss/train': 2.2226333618164062} +03/04/2022 19:06:53 - INFO - codeparrot_training - Step 25233: {'lr': 0.00047020956609119483, 'samples': 12919808, 'steps': 25233, 'loss/train': 1.7125468254089355} +03/04/2022 19:06:56 - INFO - codeparrot_training - Step 25234: {'lr': 0.0004702070537369351, 'samples': 12920320, 'steps': 25234, 'loss/train': 2.105487585067749} +03/04/2022 19:06:56 - INFO - codeparrot_training - Skipping example with length 664 (seq_length=1024) +03/04/2022 19:07:01 - INFO - codeparrot_training - Step 25235: {'lr': 0.00047020454128345333, 'samples': 12920832, 'steps': 25235, 'loss/train': 1.6363765001296997} +03/04/2022 19:07:04 - INFO - codeparrot_training - Step 25236: {'lr': 0.00047020202873075093, 'samples': 12921344, 'steps': 25236, 'loss/train': 1.5417019128799438} +03/04/2022 19:07:04 - INFO - codeparrot_training - Skipping example with length 120 (seq_length=1024) +03/04/2022 19:07:10 - INFO - codeparrot_training - Step 25237: {'lr': 0.00047019951607882884, 'samples': 12921856, 'steps': 25237, 'loss/train': 0.3437754511833191} +03/04/2022 19:07:13 - INFO - codeparrot_training - Step 25238: {'lr': 0.0004701970033276882, 'samples': 12922368, 'steps': 25238, 'loss/train': 2.44795560836792} +03/04/2022 19:07:13 - INFO - codeparrot_training - Skipping example with length 534 (seq_length=1024) +03/04/2022 19:07:18 - INFO - codeparrot_training - Step 25239: {'lr': 0.0004701944904773303, 'samples': 12922880, 'steps': 25239, 'loss/train': 2.28684401512146} +03/04/2022 19:07:21 - INFO - codeparrot_training - Step 25240: {'lr': 0.0004701919775277561, 'samples': 12923392, 'steps': 25240, 'loss/train': 2.272980213165283} +03/04/2022 19:07:21 - INFO - codeparrot_training - Skipping example with length 1010 (seq_length=1024) +03/04/2022 19:07:26 - INFO - codeparrot_training - Step 25241: {'lr': 0.0004701894644789668, 'samples': 12923904, 'steps': 25241, 'loss/train': 2.385653257369995} +03/04/2022 19:07:30 - INFO - codeparrot_training - Step 25242: {'lr': 0.0004701869513309635, 'samples': 12924416, 'steps': 25242, 'loss/train': 1.9426751136779785} +03/04/2022 19:07:30 - INFO - codeparrot_training - Skipping example with length 69 (seq_length=1024) +03/04/2022 19:07:35 - INFO - codeparrot_training - Step 25243: {'lr': 0.0004701844380837474, 'samples': 12924928, 'steps': 25243, 'loss/train': 1.654362440109253} +03/04/2022 19:07:38 - INFO - codeparrot_training - Step 25244: {'lr': 0.00047018192473731956, 'samples': 12925440, 'steps': 25244, 'loss/train': 1.473850965499878} +03/04/2022 19:07:38 - INFO - codeparrot_training - Skipping example with length 446 (seq_length=1024) +03/04/2022 19:07:44 - INFO - codeparrot_training - Step 25245: {'lr': 0.0004701794112916812, 'samples': 12925952, 'steps': 25245, 'loss/train': 2.4931082725524902} +03/04/2022 19:07:47 - INFO - codeparrot_training - Step 25246: {'lr': 0.00047017689774683325, 'samples': 12926464, 'steps': 25246, 'loss/train': 1.8041152954101562} +03/04/2022 19:07:47 - INFO - codeparrot_training - Skipping example with length 829 (seq_length=1024) +03/04/2022 19:07:52 - INFO - codeparrot_training - Step 25247: {'lr': 0.0004701743841027771, 'samples': 12926976, 'steps': 25247, 'loss/train': 1.866872787475586} +03/04/2022 19:07:55 - INFO - codeparrot_training - Step 25248: {'lr': 0.0004701718703595138, 'samples': 12927488, 'steps': 25248, 'loss/train': 1.9223510026931763} +03/04/2022 19:07:56 - INFO - codeparrot_training - Skipping example with length 411 (seq_length=1024) +03/04/2022 19:08:01 - INFO - codeparrot_training - Step 25249: {'lr': 0.0004701693565170444, 'samples': 12928000, 'steps': 25249, 'loss/train': 1.2357670068740845} +03/04/2022 19:08:04 - INFO - codeparrot_training - Step 25250: {'lr': 0.0004701668425753701, 'samples': 12928512, 'steps': 25250, 'loss/train': 2.2135913372039795} +03/04/2022 19:08:04 - INFO - codeparrot_training - Skipping example with length 188 (seq_length=1024) +03/04/2022 19:08:09 - INFO - codeparrot_training - Step 25251: {'lr': 0.000470164328534492, 'samples': 12929024, 'steps': 25251, 'loss/train': 1.73197603225708} +03/04/2022 19:08:12 - INFO - codeparrot_training - Step 25252: {'lr': 0.00047016181439441126, 'samples': 12929536, 'steps': 25252, 'loss/train': 2.3281567096710205} +03/04/2022 19:08:12 - INFO - codeparrot_training - Skipping example with length 686 (seq_length=1024) +03/04/2022 19:08:17 - INFO - codeparrot_training - Step 25253: {'lr': 0.000470159300155129, 'samples': 12930048, 'steps': 25253, 'loss/train': 2.4120144844055176} +03/04/2022 19:08:21 - INFO - codeparrot_training - Step 25254: {'lr': 0.00047015678581664635, 'samples': 12930560, 'steps': 25254, 'loss/train': 1.4932515621185303} +03/04/2022 19:08:21 - INFO - codeparrot_training - Skipping example with length 865 (seq_length=1024) +03/04/2022 19:08:26 - INFO - codeparrot_training - Step 25255: {'lr': 0.00047015427137896446, 'samples': 12931072, 'steps': 25255, 'loss/train': 1.0346206426620483} +03/04/2022 19:08:29 - INFO - codeparrot_training - Step 25256: {'lr': 0.0004701517568420844, 'samples': 12931584, 'steps': 25256, 'loss/train': 2.016472816467285} +03/04/2022 19:08:29 - INFO - codeparrot_training - Skipping example with length 436 (seq_length=1024) +03/04/2022 19:08:34 - INFO - codeparrot_training - Step 25257: {'lr': 0.0004701492422060074, 'samples': 12932096, 'steps': 25257, 'loss/train': 3.011521577835083} +03/04/2022 19:08:37 - INFO - codeparrot_training - Step 25258: {'lr': 0.0004701467274707346, 'samples': 12932608, 'steps': 25258, 'loss/train': 1.5322874784469604} +03/04/2022 19:08:38 - INFO - codeparrot_training - Skipping example with length 167 (seq_length=1024) +03/04/2022 19:08:43 - INFO - codeparrot_training - Step 25259: {'lr': 0.0004701442126362671, 'samples': 12933120, 'steps': 25259, 'loss/train': 1.9460442066192627} +03/04/2022 19:08:46 - INFO - codeparrot_training - Step 25260: {'lr': 0.0004701416977026059, 'samples': 12933632, 'steps': 25260, 'loss/train': 1.175583839416504} +03/04/2022 19:08:46 - INFO - codeparrot_training - Skipping example with length 341 (seq_length=1024) +03/04/2022 19:08:51 - INFO - codeparrot_training - Step 25261: {'lr': 0.0004701391826697523, 'samples': 12934144, 'steps': 25261, 'loss/train': 1.6859688758850098} +03/04/2022 19:08:54 - INFO - codeparrot_training - Step 25262: {'lr': 0.00047013666753770736, 'samples': 12934656, 'steps': 25262, 'loss/train': 1.7589534521102905} +03/04/2022 19:08:55 - INFO - codeparrot_training - Skipping example with length 503 (seq_length=1024) +03/04/2022 19:09:00 - INFO - codeparrot_training - Step 25263: {'lr': 0.00047013415230647227, 'samples': 12935168, 'steps': 25263, 'loss/train': 2.5282318592071533} +03/04/2022 19:09:03 - INFO - codeparrot_training - Step 25264: {'lr': 0.0004701316369760481, 'samples': 12935680, 'steps': 25264, 'loss/train': 2.1080193519592285} +03/04/2022 19:09:03 - INFO - codeparrot_training - Skipping example with length 214 (seq_length=1024) +03/04/2022 19:09:08 - INFO - codeparrot_training - Step 25265: {'lr': 0.00047012912154643607, 'samples': 12936192, 'steps': 25265, 'loss/train': 2.1533796787261963} +03/04/2022 19:09:11 - INFO - codeparrot_training - Step 25266: {'lr': 0.0004701266060176372, 'samples': 12936704, 'steps': 25266, 'loss/train': 1.4148755073547363} +03/04/2022 19:09:12 - INFO - codeparrot_training - Skipping example with length 489 (seq_length=1024) +03/04/2022 19:09:17 - INFO - codeparrot_training - Step 25267: {'lr': 0.00047012409038965267, 'samples': 12937216, 'steps': 25267, 'loss/train': 1.7201858758926392} +03/04/2022 19:09:20 - INFO - codeparrot_training - Step 25268: {'lr': 0.0004701215746624836, 'samples': 12937728, 'steps': 25268, 'loss/train': 1.5146760940551758} +03/04/2022 19:09:20 - INFO - codeparrot_training - Skipping example with length 731 (seq_length=1024) +03/04/2022 19:09:25 - INFO - codeparrot_training - Step 25269: {'lr': 0.0004701190588361312, 'samples': 12938240, 'steps': 25269, 'loss/train': 1.3812400102615356} +03/04/2022 19:09:28 - INFO - codeparrot_training - Step 25270: {'lr': 0.0004701165429105966, 'samples': 12938752, 'steps': 25270, 'loss/train': 1.2600101232528687} +03/04/2022 19:09:29 - INFO - codeparrot_training - Skipping example with length 240 (seq_length=1024) +03/04/2022 19:09:33 - INFO - codeparrot_training - Step 25271: {'lr': 0.0004701140268858808, 'samples': 12939264, 'steps': 25271, 'loss/train': 1.79182767868042} +03/04/2022 19:09:37 - INFO - codeparrot_training - Step 25272: {'lr': 0.000470111510761985, 'samples': 12939776, 'steps': 25272, 'loss/train': 1.163543939590454} +03/04/2022 19:09:37 - INFO - codeparrot_training - Skipping example with length 1013 (seq_length=1024) +03/04/2022 19:09:42 - INFO - codeparrot_training - Step 25273: {'lr': 0.0004701089945389104, 'samples': 12940288, 'steps': 25273, 'loss/train': 2.542271137237549} +03/04/2022 19:09:45 - INFO - codeparrot_training - Step 25274: {'lr': 0.00047010647821665803, 'samples': 12940800, 'steps': 25274, 'loss/train': 1.8514363765716553} +03/04/2022 19:09:45 - INFO - codeparrot_training - Skipping example with length 572 (seq_length=1024) +03/04/2022 19:09:50 - INFO - codeparrot_training - Step 25275: {'lr': 0.0004701039617952291, 'samples': 12941312, 'steps': 25275, 'loss/train': 1.8375102281570435} +03/04/2022 19:09:53 - INFO - codeparrot_training - Step 25276: {'lr': 0.00047010144527462474, 'samples': 12941824, 'steps': 25276, 'loss/train': 2.6323704719543457} +03/04/2022 19:09:54 - INFO - codeparrot_training - Skipping example with length 388 (seq_length=1024) +03/04/2022 19:09:59 - INFO - codeparrot_training - Step 25277: {'lr': 0.00047009892865484607, 'samples': 12942336, 'steps': 25277, 'loss/train': 1.118599772453308} +03/04/2022 19:10:02 - INFO - codeparrot_training - Step 25278: {'lr': 0.00047009641193589423, 'samples': 12942848, 'steps': 25278, 'loss/train': 1.6042462587356567} +03/04/2022 19:10:02 - INFO - codeparrot_training - Skipping example with length 220 (seq_length=1024) +03/04/2022 19:10:07 - INFO - codeparrot_training - Step 25279: {'lr': 0.00047009389511777036, 'samples': 12943360, 'steps': 25279, 'loss/train': 2.020631790161133} +03/04/2022 19:10:11 - INFO - codeparrot_training - Step 25280: {'lr': 0.0004700913782004755, 'samples': 12943872, 'steps': 25280, 'loss/train': 2.071779727935791} +03/04/2022 19:10:11 - INFO - codeparrot_training - Skipping example with length 285 (seq_length=1024) +03/04/2022 19:10:16 - INFO - codeparrot_training - Step 25281: {'lr': 0.00047008886118401084, 'samples': 12944384, 'steps': 25281, 'loss/train': 1.9672846794128418} +03/04/2022 19:10:20 - INFO - codeparrot_training - Step 25282: {'lr': 0.0004700863440683776, 'samples': 12944896, 'steps': 25282, 'loss/train': 0.8893557190895081} +03/04/2022 19:10:21 - INFO - codeparrot_training - Skipping example with length 674 (seq_length=1024) +03/04/2022 19:10:25 - INFO - codeparrot_training - Step 25283: {'lr': 0.00047008382685357686, 'samples': 12945408, 'steps': 25283, 'loss/train': 1.433991551399231} +03/04/2022 19:10:28 - INFO - codeparrot_training - Step 25284: {'lr': 0.0004700813095396098, 'samples': 12945920, 'steps': 25284, 'loss/train': 1.5477524995803833} +03/04/2022 19:10:30 - INFO - codeparrot_training - Skipping example with length 61 (seq_length=1024) +03/04/2022 19:10:33 - INFO - codeparrot_training - Step 25285: {'lr': 0.00047007879212647744, 'samples': 12946432, 'steps': 25285, 'loss/train': 2.614014148712158} +03/04/2022 19:10:37 - INFO - codeparrot_training - Step 25286: {'lr': 0.0004700762746141809, 'samples': 12946944, 'steps': 25286, 'loss/train': 2.3048131465911865} +03/04/2022 19:10:38 - INFO - codeparrot_training - Skipping example with length 256 (seq_length=1024) +03/04/2022 19:10:42 - INFO - codeparrot_training - Step 25287: {'lr': 0.0004700737570027214, 'samples': 12947456, 'steps': 25287, 'loss/train': 1.5815726518630981} +03/04/2022 19:10:45 - INFO - codeparrot_training - Step 25288: {'lr': 0.00047007123929210015, 'samples': 12947968, 'steps': 25288, 'loss/train': 1.3431769609451294} +03/04/2022 19:10:47 - INFO - codeparrot_training - Skipping example with length 785 (seq_length=1024) +03/04/2022 19:10:50 - INFO - codeparrot_training - Step 25289: {'lr': 0.00047006872148231814, 'samples': 12948480, 'steps': 25289, 'loss/train': 2.1170506477355957} +03/04/2022 19:10:53 - INFO - codeparrot_training - Step 25290: {'lr': 0.0004700662035733766, 'samples': 12948992, 'steps': 25290, 'loss/train': 1.7611955404281616} +03/04/2022 19:10:55 - INFO - codeparrot_training - Skipping example with length 103 (seq_length=1024) +03/04/2022 19:10:59 - INFO - codeparrot_training - Step 25291: {'lr': 0.0004700636855652766, 'samples': 12949504, 'steps': 25291, 'loss/train': 1.6154122352600098} +03/04/2022 19:11:02 - INFO - codeparrot_training - Step 25292: {'lr': 0.0004700611674580193, 'samples': 12950016, 'steps': 25292, 'loss/train': 2.052398443222046} +03/04/2022 19:11:03 - INFO - codeparrot_training - Skipping example with length 20 (seq_length=1024) +03/04/2022 19:11:07 - INFO - codeparrot_training - Step 25293: {'lr': 0.0004700586492516058, 'samples': 12950528, 'steps': 25293, 'loss/train': 0.8360694050788879} +03/04/2022 19:11:10 - INFO - codeparrot_training - Step 25294: {'lr': 0.00047005613094603727, 'samples': 12951040, 'steps': 25294, 'loss/train': 1.6595330238342285} +03/04/2022 19:11:12 - INFO - codeparrot_training - Skipping example with length 446 (seq_length=1024) +03/04/2022 19:11:16 - INFO - codeparrot_training - Step 25295: {'lr': 0.0004700536125413149, 'samples': 12951552, 'steps': 25295, 'loss/train': 1.4597535133361816} +03/04/2022 19:11:19 - INFO - codeparrot_training - Step 25296: {'lr': 0.00047005109403743976, 'samples': 12952064, 'steps': 25296, 'loss/train': 1.4352736473083496} +03/04/2022 19:11:20 - INFO - codeparrot_training - Skipping example with length 765 (seq_length=1024) +03/04/2022 19:11:24 - INFO - codeparrot_training - Step 25297: {'lr': 0.00047004857543441294, 'samples': 12952576, 'steps': 25297, 'loss/train': 2.0718212127685547} +03/04/2022 19:11:27 - INFO - codeparrot_training - Step 25298: {'lr': 0.00047004605673223567, 'samples': 12953088, 'steps': 25298, 'loss/train': 1.810469388961792} +03/04/2022 19:11:28 - INFO - codeparrot_training - Skipping example with length 478 (seq_length=1024) +03/04/2022 19:11:32 - INFO - codeparrot_training - Step 25299: {'lr': 0.00047004353793090903, 'samples': 12953600, 'steps': 25299, 'loss/train': 1.957377552986145} +03/04/2022 19:11:36 - INFO - codeparrot_training - Step 25300: {'lr': 0.00047004101903043416, 'samples': 12954112, 'steps': 25300, 'loss/train': 1.8267908096313477} +03/04/2022 19:11:37 - INFO - codeparrot_training - Skipping example with length 540 (seq_length=1024) +03/04/2022 19:11:41 - INFO - codeparrot_training - Step 25301: {'lr': 0.00047003850003081215, 'samples': 12954624, 'steps': 25301, 'loss/train': 2.164577007293701} +03/04/2022 19:11:44 - INFO - codeparrot_training - Step 25302: {'lr': 0.0004700359809320443, 'samples': 12955136, 'steps': 25302, 'loss/train': 1.6552242040634155} +03/04/2022 19:11:46 - INFO - codeparrot_training - Skipping example with length 957 (seq_length=1024) +03/04/2022 19:11:49 - INFO - codeparrot_training - Step 25303: {'lr': 0.0004700334617341316, 'samples': 12955648, 'steps': 25303, 'loss/train': 1.6860713958740234} +03/04/2022 19:11:52 - INFO - codeparrot_training - Step 25304: {'lr': 0.0004700309424370752, 'samples': 12956160, 'steps': 25304, 'loss/train': 2.3178820610046387} +03/04/2022 19:11:54 - INFO - codeparrot_training - Skipping example with length 237 (seq_length=1024) +03/04/2022 19:11:58 - INFO - codeparrot_training - Step 25305: {'lr': 0.00047002842304087625, 'samples': 12956672, 'steps': 25305, 'loss/train': 1.5979934930801392} +03/04/2022 19:12:01 - INFO - codeparrot_training - Step 25306: {'lr': 0.00047002590354553586, 'samples': 12957184, 'steps': 25306, 'loss/train': 2.5537147521972656} +03/04/2022 19:12:03 - INFO - codeparrot_training - Skipping example with length 606 (seq_length=1024) +03/04/2022 19:12:06 - INFO - codeparrot_training - Step 25307: {'lr': 0.0004700233839510552, 'samples': 12957696, 'steps': 25307, 'loss/train': 2.104651927947998} +03/04/2022 19:12:09 - INFO - codeparrot_training - Step 25308: {'lr': 0.00047002086425743545, 'samples': 12958208, 'steps': 25308, 'loss/train': 1.7871196269989014} +03/04/2022 19:12:11 - INFO - codeparrot_training - Skipping example with length 415 (seq_length=1024) +03/04/2022 19:12:15 - INFO - codeparrot_training - Step 25309: {'lr': 0.0004700183444646776, 'samples': 12958720, 'steps': 25309, 'loss/train': 2.587916612625122} +03/04/2022 19:12:18 - INFO - codeparrot_training - Step 25310: {'lr': 0.000470015824572783, 'samples': 12959232, 'steps': 25310, 'loss/train': 0.13098278641700745} +03/04/2022 19:12:19 - INFO - codeparrot_training - Skipping example with length 282 (seq_length=1024) +03/04/2022 19:12:24 - INFO - codeparrot_training - Step 25311: {'lr': 0.00047001330458175264, 'samples': 12959744, 'steps': 25311, 'loss/train': 6.453415870666504} +03/04/2022 19:12:27 - INFO - codeparrot_training - Step 25312: {'lr': 0.0004700107844915876, 'samples': 12960256, 'steps': 25312, 'loss/train': 1.5737439393997192} +03/04/2022 19:12:30 - INFO - codeparrot_training - Step 25313: {'lr': 0.00047000826430228915, 'samples': 12960768, 'steps': 25313, 'loss/train': 2.7380714416503906} +03/04/2022 19:12:30 - INFO - codeparrot_training - Skipping example with length 11 (seq_length=1024) +03/04/2022 19:12:35 - INFO - codeparrot_training - Step 25314: {'lr': 0.00047000574401385835, 'samples': 12961280, 'steps': 25314, 'loss/train': 1.9988138675689697} +03/04/2022 19:12:38 - INFO - codeparrot_training - Step 25315: {'lr': 0.0004700032236262964, 'samples': 12961792, 'steps': 25315, 'loss/train': 1.5793505907058716} +03/04/2022 19:12:39 - INFO - codeparrot_training - Skipping example with length 353 (seq_length=1024) +03/04/2022 19:12:44 - INFO - codeparrot_training - Step 25316: {'lr': 0.00047000070313960436, 'samples': 12962304, 'steps': 25316, 'loss/train': 1.8534351587295532} +03/04/2022 19:12:47 - INFO - codeparrot_training - Step 25317: {'lr': 0.00046999818255378335, 'samples': 12962816, 'steps': 25317, 'loss/train': 1.8523732423782349} +03/04/2022 19:12:47 - INFO - codeparrot_training - Skipping example with length 848 (seq_length=1024) +03/04/2022 19:12:52 - INFO - codeparrot_training - Step 25318: {'lr': 0.00046999566186883466, 'samples': 12963328, 'steps': 25318, 'loss/train': 1.1400641202926636} +03/04/2022 19:12:55 - INFO - codeparrot_training - Step 25319: {'lr': 0.0004699931410847592, 'samples': 12963840, 'steps': 25319, 'loss/train': 1.8348878622055054} +03/04/2022 19:12:55 - INFO - codeparrot_training - Skipping example with length 777 (seq_length=1024) +03/04/2022 19:13:01 - INFO - codeparrot_training - Step 25320: {'lr': 0.00046999062020155834, 'samples': 12964352, 'steps': 25320, 'loss/train': 0.44005560874938965} +03/04/2022 19:13:04 - INFO - codeparrot_training - Step 25321: {'lr': 0.00046998809921923305, 'samples': 12964864, 'steps': 25321, 'loss/train': 1.711197853088379} +03/04/2022 19:13:04 - INFO - codeparrot_training - Skipping example with length 566 (seq_length=1024) +03/04/2022 19:13:09 - INFO - codeparrot_training - Step 25322: {'lr': 0.0004699855781377845, 'samples': 12965376, 'steps': 25322, 'loss/train': 1.533366322517395} +03/04/2022 19:13:12 - INFO - codeparrot_training - Step 25323: {'lr': 0.0004699830569572139, 'samples': 12965888, 'steps': 25323, 'loss/train': 2.9759814739227295} +03/04/2022 19:13:12 - INFO - codeparrot_training - Skipping example with length 670 (seq_length=1024) +03/04/2022 19:13:17 - INFO - codeparrot_training - Step 25324: {'lr': 0.00046998053567752225, 'samples': 12966400, 'steps': 25324, 'loss/train': 0.6103813052177429} +03/04/2022 19:13:21 - INFO - codeparrot_training - Step 25325: {'lr': 0.0004699780142987108, 'samples': 12966912, 'steps': 25325, 'loss/train': 1.7392432689666748} +03/04/2022 19:13:21 - INFO - codeparrot_training - Skipping example with length 572 (seq_length=1024) +03/04/2022 19:13:26 - INFO - codeparrot_training - Step 25326: {'lr': 0.0004699754928207807, 'samples': 12967424, 'steps': 25326, 'loss/train': 1.7384638786315918} +03/04/2022 19:13:29 - INFO - codeparrot_training - Step 25327: {'lr': 0.00046997297124373293, 'samples': 12967936, 'steps': 25327, 'loss/train': 1.2617100477218628} +03/04/2022 19:13:29 - INFO - codeparrot_training - Skipping example with length 585 (seq_length=1024) +03/04/2022 19:13:34 - INFO - codeparrot_training - Step 25328: {'lr': 0.00046997044956756883, 'samples': 12968448, 'steps': 25328, 'loss/train': 1.7598261833190918} +03/04/2022 19:13:37 - INFO - codeparrot_training - Step 25329: {'lr': 0.00046996792779228935, 'samples': 12968960, 'steps': 25329, 'loss/train': 1.651097297668457} +03/04/2022 19:13:38 - INFO - codeparrot_training - Skipping example with length 120 (seq_length=1024) +03/04/2022 19:13:43 - INFO - codeparrot_training - Step 25330: {'lr': 0.00046996540591789584, 'samples': 12969472, 'steps': 25330, 'loss/train': 1.7799313068389893} +03/04/2022 19:13:46 - INFO - codeparrot_training - Step 25331: {'lr': 0.00046996288394438924, 'samples': 12969984, 'steps': 25331, 'loss/train': 1.2760562896728516} +03/04/2022 19:13:46 - INFO - codeparrot_training - Skipping example with length 748 (seq_length=1024) +03/04/2022 19:13:51 - INFO - codeparrot_training - Step 25332: {'lr': 0.00046996036187177073, 'samples': 12970496, 'steps': 25332, 'loss/train': 1.7225462198257446} +03/04/2022 19:13:54 - INFO - codeparrot_training - Step 25333: {'lr': 0.0004699578397000415, 'samples': 12971008, 'steps': 25333, 'loss/train': 1.6820789575576782} +03/04/2022 19:13:55 - INFO - codeparrot_training - Skipping example with length 162 (seq_length=1024) +03/04/2022 19:14:00 - INFO - codeparrot_training - Step 25334: {'lr': 0.00046995531742920264, 'samples': 12971520, 'steps': 25334, 'loss/train': 1.9915714263916016} +03/04/2022 19:14:03 - INFO - codeparrot_training - Step 25335: {'lr': 0.00046995279505925535, 'samples': 12972032, 'steps': 25335, 'loss/train': 2.4880597591400146} +03/04/2022 19:14:03 - INFO - codeparrot_training - Skipping example with length 34 (seq_length=1024) +03/04/2022 19:14:08 - INFO - codeparrot_training - Step 25336: {'lr': 0.00046995027259020075, 'samples': 12972544, 'steps': 25336, 'loss/train': 2.4904191493988037} +03/04/2022 19:14:11 - INFO - codeparrot_training - Step 25337: {'lr': 0.00046994775002203994, 'samples': 12973056, 'steps': 25337, 'loss/train': 1.7000421285629272} +03/04/2022 19:14:12 - INFO - codeparrot_training - Skipping example with length 709 (seq_length=1024) +03/04/2022 19:14:17 - INFO - codeparrot_training - Step 25338: {'lr': 0.000469945227354774, 'samples': 12973568, 'steps': 25338, 'loss/train': 1.7487350702285767} +03/04/2022 19:14:20 - INFO - codeparrot_training - Step 25339: {'lr': 0.00046994270458840416, 'samples': 12974080, 'steps': 25339, 'loss/train': 1.43644380569458} +03/04/2022 19:14:20 - INFO - codeparrot_training - Skipping example with length 39 (seq_length=1024) +03/04/2022 19:14:25 - INFO - codeparrot_training - Step 25340: {'lr': 0.0004699401817229316, 'samples': 12974592, 'steps': 25340, 'loss/train': 0.9968238472938538} +03/04/2022 19:14:28 - INFO - codeparrot_training - Step 25341: {'lr': 0.0004699376587583573, 'samples': 12975104, 'steps': 25341, 'loss/train': 1.6751258373260498} +03/04/2022 19:14:29 - INFO - codeparrot_training - Skipping example with length 914 (seq_length=1024) +03/04/2022 19:14:33 - INFO - codeparrot_training - Step 25342: {'lr': 0.0004699351356946825, 'samples': 12975616, 'steps': 25342, 'loss/train': 2.0064308643341064} +03/04/2022 19:14:37 - INFO - codeparrot_training - Step 25343: {'lr': 0.00046993261253190833, 'samples': 12976128, 'steps': 25343, 'loss/train': 1.7874807119369507} +03/04/2022 19:14:37 - INFO - codeparrot_training - Skipping example with length 33 (seq_length=1024) +03/04/2022 19:14:42 - INFO - codeparrot_training - Step 25344: {'lr': 0.000469930089270036, 'samples': 12976640, 'steps': 25344, 'loss/train': 1.7672526836395264} +03/04/2022 19:14:45 - INFO - codeparrot_training - Step 25345: {'lr': 0.0004699275659090665, 'samples': 12977152, 'steps': 25345, 'loss/train': 1.9256064891815186} +03/04/2022 19:14:45 - INFO - codeparrot_training - Skipping example with length 574 (seq_length=1024) +03/04/2022 19:14:50 - INFO - codeparrot_training - Step 25346: {'lr': 0.000469925042449001, 'samples': 12977664, 'steps': 25346, 'loss/train': 1.6666514873504639} +03/04/2022 19:14:53 - INFO - codeparrot_training - Step 25347: {'lr': 0.0004699225188898407, 'samples': 12978176, 'steps': 25347, 'loss/train': 2.028059482574463} +03/04/2022 19:14:54 - INFO - codeparrot_training - Skipping example with length 843 (seq_length=1024) +03/04/2022 19:14:59 - INFO - codeparrot_training - Step 25348: {'lr': 0.00046991999523158666, 'samples': 12978688, 'steps': 25348, 'loss/train': 1.7633355855941772} +03/04/2022 19:15:02 - INFO - codeparrot_training - Step 25349: {'lr': 0.0004699174714742401, 'samples': 12979200, 'steps': 25349, 'loss/train': 1.8247781991958618} +03/04/2022 19:15:02 - INFO - codeparrot_training - Skipping example with length 449 (seq_length=1024) +03/04/2022 19:15:07 - INFO - codeparrot_training - Step 25350: {'lr': 0.0004699149476178022, 'samples': 12979712, 'steps': 25350, 'loss/train': 2.3249549865722656} +03/04/2022 19:15:10 - INFO - codeparrot_training - Step 25351: {'lr': 0.00046991242366227395, 'samples': 12980224, 'steps': 25351, 'loss/train': 2.033646821975708} +03/04/2022 19:15:10 - INFO - codeparrot_training - Skipping example with length 963 (seq_length=1024) +03/04/2022 19:15:15 - INFO - codeparrot_training - Step 25352: {'lr': 0.0004699098996076565, 'samples': 12980736, 'steps': 25352, 'loss/train': 2.1840810775756836} +03/04/2022 19:15:19 - INFO - codeparrot_training - Step 25353: {'lr': 0.0004699073754539511, 'samples': 12981248, 'steps': 25353, 'loss/train': 0.9812184572219849} +03/04/2022 19:15:19 - INFO - codeparrot_training - Skipping example with length 141 (seq_length=1024) +03/04/2022 19:15:24 - INFO - codeparrot_training - Step 25354: {'lr': 0.0004699048512011588, 'samples': 12981760, 'steps': 25354, 'loss/train': 2.208390474319458} +03/04/2022 19:15:27 - INFO - codeparrot_training - Step 25355: {'lr': 0.0004699023268492808, 'samples': 12982272, 'steps': 25355, 'loss/train': 1.762279748916626} +03/04/2022 19:15:27 - INFO - codeparrot_training - Skipping example with length 864 (seq_length=1024) +03/04/2022 19:15:33 - INFO - codeparrot_training - Step 25356: {'lr': 0.0004698998023983182, 'samples': 12982784, 'steps': 25356, 'loss/train': 1.1417272090911865} +03/04/2022 19:15:36 - INFO - codeparrot_training - Step 25357: {'lr': 0.0004698972778482722, 'samples': 12983296, 'steps': 25357, 'loss/train': 1.4787989854812622} +03/04/2022 19:15:36 - INFO - codeparrot_training - Skipping example with length 690 (seq_length=1024) +03/04/2022 19:15:41 - INFO - codeparrot_training - Step 25358: {'lr': 0.0004698947531991438, 'samples': 12983808, 'steps': 25358, 'loss/train': 1.863358736038208} +03/04/2022 19:15:44 - INFO - codeparrot_training - Skipping example with length 938 (seq_length=1024) +03/04/2022 19:15:47 - INFO - codeparrot_training - Step 25359: {'lr': 0.0004698922284509342, 'samples': 12984320, 'steps': 25359, 'loss/train': 1.9527350664138794} +03/04/2022 19:15:50 - INFO - codeparrot_training - Step 25360: {'lr': 0.00046988970360364456, 'samples': 12984832, 'steps': 25360, 'loss/train': 1.7599468231201172} +03/04/2022 19:15:53 - INFO - codeparrot_training - Skipping example with length 872 (seq_length=1024) +03/04/2022 19:15:55 - INFO - codeparrot_training - Step 25361: {'lr': 0.0004698871786572761, 'samples': 12985344, 'steps': 25361, 'loss/train': 1.5590951442718506} +03/04/2022 19:15:58 - INFO - codeparrot_training - Step 25362: {'lr': 0.0004698846536118298, 'samples': 12985856, 'steps': 25362, 'loss/train': 1.8527084589004517} +03/04/2022 19:16:01 - INFO - codeparrot_training - Step 25363: {'lr': 0.00046988212846730686, 'samples': 12986368, 'steps': 25363, 'loss/train': 1.8146284818649292} +03/04/2022 19:16:02 - INFO - codeparrot_training - Skipping example with length 1001 (seq_length=1024) +03/04/2022 19:16:07 - INFO - codeparrot_training - Step 25364: {'lr': 0.0004698796032237085, 'samples': 12986880, 'steps': 25364, 'loss/train': 1.93575918674469} +03/04/2022 19:16:10 - INFO - codeparrot_training - Step 25365: {'lr': 0.0004698770778810357, 'samples': 12987392, 'steps': 25365, 'loss/train': 1.7189249992370605} +03/04/2022 19:16:10 - INFO - codeparrot_training - Skipping example with length 182 (seq_length=1024) +03/04/2022 19:16:15 - INFO - codeparrot_training - Step 25366: {'lr': 0.00046987455243928974, 'samples': 12987904, 'steps': 25366, 'loss/train': 1.3808679580688477} +03/04/2022 19:16:19 - INFO - codeparrot_training - Step 25367: {'lr': 0.00046987202689847165, 'samples': 12988416, 'steps': 25367, 'loss/train': 3.013519763946533} +03/04/2022 19:16:19 - INFO - codeparrot_training - Skipping example with length 8 (seq_length=1024) +03/04/2022 19:16:24 - INFO - codeparrot_training - Step 25368: {'lr': 0.00046986950125858264, 'samples': 12988928, 'steps': 25368, 'loss/train': 1.7318766117095947} +03/04/2022 19:16:27 - INFO - codeparrot_training - Step 25369: {'lr': 0.0004698669755196239, 'samples': 12989440, 'steps': 25369, 'loss/train': 2.2340152263641357} +03/04/2022 19:16:28 - INFO - codeparrot_training - Skipping example with length 854 (seq_length=1024) +03/04/2022 19:16:33 - INFO - codeparrot_training - Step 25370: {'lr': 0.0004698644496815964, 'samples': 12989952, 'steps': 25370, 'loss/train': 3.018932342529297} +03/04/2022 19:16:36 - INFO - codeparrot_training - Step 25371: {'lr': 0.0004698619237445013, 'samples': 12990464, 'steps': 25371, 'loss/train': 2.163050413131714} +03/04/2022 19:16:37 - INFO - codeparrot_training - Skipping example with length 243 (seq_length=1024) +03/04/2022 19:16:41 - INFO - codeparrot_training - Step 25372: {'lr': 0.00046985939770834, 'samples': 12990976, 'steps': 25372, 'loss/train': 1.3070533275604248} +03/04/2022 19:16:45 - INFO - codeparrot_training - Step 25373: {'lr': 0.0004698568715731133, 'samples': 12991488, 'steps': 25373, 'loss/train': 2.114276885986328} +03/04/2022 19:16:45 - INFO - codeparrot_training - Skipping example with length 967 (seq_length=1024) +03/04/2022 19:16:50 - INFO - codeparrot_training - Step 25374: {'lr': 0.00046985434533882255, 'samples': 12992000, 'steps': 25374, 'loss/train': 1.9455182552337646} +03/04/2022 19:16:53 - INFO - codeparrot_training - Step 25375: {'lr': 0.00046985181900546883, 'samples': 12992512, 'steps': 25375, 'loss/train': 1.8896756172180176} +03/04/2022 19:16:54 - INFO - codeparrot_training - Skipping example with length 1007 (seq_length=1024) +03/04/2022 19:16:58 - INFO - codeparrot_training - Step 25376: {'lr': 0.0004698492925730532, 'samples': 12993024, 'steps': 25376, 'loss/train': 2.2577056884765625} +03/04/2022 19:17:01 - INFO - codeparrot_training - Step 25377: {'lr': 0.00046984676604157696, 'samples': 12993536, 'steps': 25377, 'loss/train': 1.6013085842132568} +03/04/2022 19:17:02 - INFO - codeparrot_training - Skipping example with length 877 (seq_length=1024) +03/04/2022 19:17:07 - INFO - codeparrot_training - Step 25378: {'lr': 0.0004698442394110411, 'samples': 12994048, 'steps': 25378, 'loss/train': 1.1733742952346802} +03/04/2022 19:17:10 - INFO - codeparrot_training - Step 25379: {'lr': 0.0004698417126814468, 'samples': 12994560, 'steps': 25379, 'loss/train': 1.5479508638381958} +03/04/2022 19:17:11 - INFO - codeparrot_training - Skipping example with length 978 (seq_length=1024) +03/04/2022 19:17:15 - INFO - codeparrot_training - Step 25380: {'lr': 0.0004698391858527953, 'samples': 12995072, 'steps': 25380, 'loss/train': 1.0326913595199585} +03/04/2022 19:17:18 - INFO - codeparrot_training - Step 25381: {'lr': 0.0004698366589250876, 'samples': 12995584, 'steps': 25381, 'loss/train': 1.8304898738861084} +03/04/2022 19:17:19 - INFO - codeparrot_training - Skipping example with length 930 (seq_length=1024) +03/04/2022 19:17:24 - INFO - codeparrot_training - Step 25382: {'lr': 0.0004698341318983249, 'samples': 12996096, 'steps': 25382, 'loss/train': 2.170163154602051} +03/04/2022 19:17:27 - INFO - codeparrot_training - Step 25383: {'lr': 0.00046983160477250837, 'samples': 12996608, 'steps': 25383, 'loss/train': 1.8382006883621216} +03/04/2022 19:17:30 - INFO - codeparrot_training - Skipping example with length 892 (seq_length=1024) +03/04/2022 19:17:33 - INFO - codeparrot_training - Step 25384: {'lr': 0.00046982907754763905, 'samples': 12997120, 'steps': 25384, 'loss/train': 2.0447144508361816} +03/04/2022 19:17:36 - INFO - codeparrot_training - Step 25385: {'lr': 0.0004698265502237182, 'samples': 12997632, 'steps': 25385, 'loss/train': 2.876307249069214} +03/04/2022 19:17:38 - INFO - codeparrot_training - Skipping example with length 717 (seq_length=1024) +03/04/2022 19:17:41 - INFO - codeparrot_training - Step 25386: {'lr': 0.0004698240228007469, 'samples': 12998144, 'steps': 25386, 'loss/train': 1.3803784847259521} +03/04/2022 19:17:44 - INFO - codeparrot_training - Step 25387: {'lr': 0.0004698214952787262, 'samples': 12998656, 'steps': 25387, 'loss/train': 2.783010959625244} +03/04/2022 19:17:47 - INFO - codeparrot_training - Skipping example with length 63 (seq_length=1024) +03/04/2022 19:17:50 - INFO - codeparrot_training - Step 25388: {'lr': 0.0004698189676576574, 'samples': 12999168, 'steps': 25388, 'loss/train': 1.868465542793274} +03/04/2022 19:17:53 - INFO - codeparrot_training - Step 25389: {'lr': 0.00046981643993754155, 'samples': 12999680, 'steps': 25389, 'loss/train': 1.3487191200256348} +03/04/2022 19:17:55 - INFO - codeparrot_training - Skipping example with length 6 (seq_length=1024) +03/04/2022 19:17:59 - INFO - codeparrot_training - Step 25390: {'lr': 0.0004698139121183798, 'samples': 13000192, 'steps': 25390, 'loss/train': 1.7889069318771362} +03/04/2022 19:18:02 - INFO - codeparrot_training - Step 25391: {'lr': 0.00046981138420017335, 'samples': 13000704, 'steps': 25391, 'loss/train': 1.2679407596588135} +03/04/2022 19:18:05 - INFO - codeparrot_training - Step 25392: {'lr': 0.00046980885618292317, 'samples': 13001216, 'steps': 25392, 'loss/train': 0.7383412718772888} +03/04/2022 19:18:07 - INFO - codeparrot_training - Skipping example with length 558 (seq_length=1024) +03/04/2022 19:18:10 - INFO - codeparrot_training - Step 25393: {'lr': 0.0004698063280666306, 'samples': 13001728, 'steps': 25393, 'loss/train': 0.6906705498695374} +03/04/2022 19:18:13 - INFO - codeparrot_training - Step 25394: {'lr': 0.0004698037998512966, 'samples': 13002240, 'steps': 25394, 'loss/train': 1.7247951030731201} +03/04/2022 19:18:16 - INFO - codeparrot_training - Skipping example with length 46 (seq_length=1024) +03/04/2022 19:18:19 - INFO - codeparrot_training - Step 25395: {'lr': 0.00046980127153692256, 'samples': 13002752, 'steps': 25395, 'loss/train': 2.4694480895996094} +03/04/2022 19:18:22 - INFO - codeparrot_training - Step 25396: {'lr': 0.00046979874312350935, 'samples': 13003264, 'steps': 25396, 'loss/train': 2.3787992000579834} +03/04/2022 19:18:25 - INFO - codeparrot_training - Step 25397: {'lr': 0.00046979621461105817, 'samples': 13003776, 'steps': 25397, 'loss/train': 1.1216151714324951} +03/04/2022 19:18:26 - INFO - codeparrot_training - Skipping example with length 536 (seq_length=1024) +03/04/2022 19:18:32 - INFO - codeparrot_training - Step 25398: {'lr': 0.0004697936859995703, 'samples': 13004288, 'steps': 25398, 'loss/train': 1.9707132577896118} +03/04/2022 19:18:35 - INFO - codeparrot_training - Step 25399: {'lr': 0.00046979115728904675, 'samples': 13004800, 'steps': 25399, 'loss/train': 1.8095448017120361} +03/04/2022 19:18:38 - INFO - codeparrot_training - Step 25400: {'lr': 0.0004697886284794887, 'samples': 13005312, 'steps': 25400, 'loss/train': 1.9364482164382935} +03/04/2022 19:18:39 - INFO - codeparrot_training - Skipping example with length 59 (seq_length=1024) +03/04/2022 19:18:43 - INFO - codeparrot_training - Step 25401: {'lr': 0.00046978609957089724, 'samples': 13005824, 'steps': 25401, 'loss/train': 2.0713348388671875} +03/04/2022 19:18:46 - INFO - codeparrot_training - Step 25402: {'lr': 0.0004697835705632736, 'samples': 13006336, 'steps': 25402, 'loss/train': 2.3738913536071777} +03/04/2022 19:18:47 - INFO - codeparrot_training - Skipping example with length 540 (seq_length=1024) +03/04/2022 19:18:52 - INFO - codeparrot_training - Step 25403: {'lr': 0.00046978104145661885, 'samples': 13006848, 'steps': 25403, 'loss/train': 6.628108501434326} +03/04/2022 19:18:55 - INFO - codeparrot_training - Step 25404: {'lr': 0.00046977851225093423, 'samples': 13007360, 'steps': 25404, 'loss/train': 1.9153882265090942} +03/04/2022 19:18:56 - INFO - codeparrot_training - Skipping example with length 310 (seq_length=1024) +03/04/2022 19:19:00 - INFO - codeparrot_training - Step 25405: {'lr': 0.0004697759829462207, 'samples': 13007872, 'steps': 25405, 'loss/train': 2.217949151992798} +03/04/2022 19:19:03 - INFO - codeparrot_training - Step 25406: {'lr': 0.0004697734535424796, 'samples': 13008384, 'steps': 25406, 'loss/train': 1.9899888038635254} +03/04/2022 19:19:04 - INFO - codeparrot_training - Skipping example with length 455 (seq_length=1024) +03/04/2022 19:19:09 - INFO - codeparrot_training - Step 25407: {'lr': 0.0004697709240397119, 'samples': 13008896, 'steps': 25407, 'loss/train': 1.1458783149719238} +03/04/2022 19:19:12 - INFO - codeparrot_training - Step 25408: {'lr': 0.00046976839443791887, 'samples': 13009408, 'steps': 25408, 'loss/train': 2.013826847076416} +03/04/2022 19:19:13 - INFO - codeparrot_training - Skipping example with length 281 (seq_length=1024) +03/04/2022 19:19:17 - INFO - codeparrot_training - Step 25409: {'lr': 0.00046976586473710156, 'samples': 13009920, 'steps': 25409, 'loss/train': 1.3955961465835571} +03/04/2022 19:19:20 - INFO - codeparrot_training - Step 25410: {'lr': 0.0004697633349372611, 'samples': 13010432, 'steps': 25410, 'loss/train': 1.5838645696640015} +03/04/2022 19:19:22 - INFO - codeparrot_training - Skipping example with length 596 (seq_length=1024) +03/04/2022 19:19:26 - INFO - codeparrot_training - Step 25411: {'lr': 0.00046976080503839874, 'samples': 13010944, 'steps': 25411, 'loss/train': 2.2237675189971924} +03/04/2022 19:19:29 - INFO - codeparrot_training - Step 25412: {'lr': 0.0004697582750405155, 'samples': 13011456, 'steps': 25412, 'loss/train': 0.7739744186401367} +03/04/2022 19:19:31 - INFO - codeparrot_training - Skipping example with length 522 (seq_length=1024) +03/04/2022 19:19:34 - INFO - codeparrot_training - Step 25413: {'lr': 0.00046975574494361263, 'samples': 13011968, 'steps': 25413, 'loss/train': 2.140094757080078} +03/04/2022 19:19:37 - INFO - codeparrot_training - Step 25414: {'lr': 0.00046975321474769115, 'samples': 13012480, 'steps': 25414, 'loss/train': 1.7429466247558594} +03/04/2022 19:19:39 - INFO - codeparrot_training - Skipping example with length 513 (seq_length=1024) +03/04/2022 19:19:43 - INFO - codeparrot_training - Step 25415: {'lr': 0.0004697506844527523, 'samples': 13012992, 'steps': 25415, 'loss/train': 2.6152782440185547} +03/04/2022 19:19:46 - INFO - codeparrot_training - Step 25416: {'lr': 0.0004697481540587972, 'samples': 13013504, 'steps': 25416, 'loss/train': 1.7161967754364014} +03/04/2022 19:19:48 - INFO - codeparrot_training - Skipping example with length 607 (seq_length=1024) +03/04/2022 19:19:51 - INFO - codeparrot_training - Step 25417: {'lr': 0.00046974562356582694, 'samples': 13014016, 'steps': 25417, 'loss/train': 2.0265867710113525} +03/04/2022 19:19:54 - INFO - codeparrot_training - Step 25418: {'lr': 0.0004697430929738427, 'samples': 13014528, 'steps': 25418, 'loss/train': 1.7055197954177856} +03/04/2022 19:19:56 - INFO - codeparrot_training - Skipping example with length 113 (seq_length=1024) +03/04/2022 19:20:00 - INFO - codeparrot_training - Step 25419: {'lr': 0.0004697405622828456, 'samples': 13015040, 'steps': 25419, 'loss/train': 0.8557844758033752} +03/04/2022 19:20:03 - INFO - codeparrot_training - Step 25420: {'lr': 0.00046973803149283686, 'samples': 13015552, 'steps': 25420, 'loss/train': 2.39294695854187} +03/04/2022 19:20:05 - INFO - codeparrot_training - Skipping example with length 162 (seq_length=1024) +03/04/2022 19:20:08 - INFO - codeparrot_training - Step 25421: {'lr': 0.0004697355006038175, 'samples': 13016064, 'steps': 25421, 'loss/train': 1.6991658210754395} +03/04/2022 19:20:11 - INFO - codeparrot_training - Step 25422: {'lr': 0.0004697329696157887, 'samples': 13016576, 'steps': 25422, 'loss/train': 1.942380666732788} +03/04/2022 19:20:14 - INFO - codeparrot_training - Skipping example with length 947 (seq_length=1024) +03/04/2022 19:20:17 - INFO - codeparrot_training - Step 25423: {'lr': 0.00046973043852875163, 'samples': 13017088, 'steps': 25423, 'loss/train': 1.4704680442810059} +03/04/2022 19:20:20 - INFO - codeparrot_training - Step 25424: {'lr': 0.00046972790734270745, 'samples': 13017600, 'steps': 25424, 'loss/train': 1.440824270248413} +03/04/2022 19:20:22 - INFO - codeparrot_training - Skipping example with length 952 (seq_length=1024) +03/04/2022 19:20:25 - INFO - codeparrot_training - Step 25425: {'lr': 0.0004697253760576572, 'samples': 13018112, 'steps': 25425, 'loss/train': 1.66150963306427} +03/04/2022 19:20:28 - INFO - codeparrot_training - Step 25426: {'lr': 0.00046972284467360217, 'samples': 13018624, 'steps': 25426, 'loss/train': 1.2757622003555298} +03/04/2022 19:20:31 - INFO - codeparrot_training - Skipping example with length 167 (seq_length=1024) +03/04/2022 19:20:34 - INFO - codeparrot_training - Step 25427: {'lr': 0.0004697203131905433, 'samples': 13019136, 'steps': 25427, 'loss/train': 2.876934766769409} +03/04/2022 19:20:37 - INFO - codeparrot_training - Step 25428: {'lr': 0.00046971778160848196, 'samples': 13019648, 'steps': 25428, 'loss/train': 1.770692229270935} +03/04/2022 19:20:39 - INFO - codeparrot_training - Skipping example with length 760 (seq_length=1024) +03/04/2022 19:20:42 - INFO - codeparrot_training - Step 25429: {'lr': 0.0004697152499274191, 'samples': 13020160, 'steps': 25429, 'loss/train': 0.806185781955719} +03/04/2022 19:20:45 - INFO - codeparrot_training - Step 25430: {'lr': 0.00046971271814735593, 'samples': 13020672, 'steps': 25430, 'loss/train': 1.650345802307129} +03/04/2022 19:20:48 - INFO - codeparrot_training - Skipping example with length 96 (seq_length=1024) +03/04/2022 19:20:51 - INFO - codeparrot_training - Step 25431: {'lr': 0.0004697101862682936, 'samples': 13021184, 'steps': 25431, 'loss/train': 1.8754361867904663} +03/04/2022 19:20:54 - INFO - codeparrot_training - Step 25432: {'lr': 0.00046970765429023336, 'samples': 13021696, 'steps': 25432, 'loss/train': 2.203113555908203} +03/04/2022 19:20:56 - INFO - codeparrot_training - Skipping example with length 727 (seq_length=1024) +03/04/2022 19:20:59 - INFO - codeparrot_training - Step 25433: {'lr': 0.00046970512221317616, 'samples': 13022208, 'steps': 25433, 'loss/train': 1.2450807094573975} +03/04/2022 19:21:02 - INFO - codeparrot_training - Step 25434: {'lr': 0.00046970259003712323, 'samples': 13022720, 'steps': 25434, 'loss/train': 1.3730324506759644} +03/04/2022 19:21:05 - INFO - codeparrot_training - Skipping example with length 229 (seq_length=1024) +03/04/2022 19:21:07 - INFO - codeparrot_training - Step 25435: {'lr': 0.00046970005776207575, 'samples': 13023232, 'steps': 25435, 'loss/train': 2.3566360473632812} +03/04/2022 19:21:11 - INFO - codeparrot_training - Step 25436: {'lr': 0.00046969752538803477, 'samples': 13023744, 'steps': 25436, 'loss/train': 0.9843683242797852} +03/04/2022 19:21:13 - INFO - codeparrot_training - Skipping example with length 608 (seq_length=1024) +03/04/2022 19:21:16 - INFO - codeparrot_training - Step 25437: {'lr': 0.0004696949929150015, 'samples': 13024256, 'steps': 25437, 'loss/train': 1.8300721645355225} +03/04/2022 19:21:19 - INFO - codeparrot_training - Step 25438: {'lr': 0.00046969246034297697, 'samples': 13024768, 'steps': 25438, 'loss/train': 2.2559244632720947} +03/04/2022 19:21:22 - INFO - codeparrot_training - Skipping example with length 211 (seq_length=1024) +03/04/2022 19:21:24 - INFO - codeparrot_training - Step 25439: {'lr': 0.0004696899276719625, 'samples': 13025280, 'steps': 25439, 'loss/train': 2.0386831760406494} +03/04/2022 19:21:28 - INFO - codeparrot_training - Step 25440: {'lr': 0.0004696873949019591, 'samples': 13025792, 'steps': 25440, 'loss/train': 1.8705074787139893} +03/04/2022 19:21:30 - INFO - codeparrot_training - Skipping example with length 961 (seq_length=1024) +03/04/2022 19:21:33 - INFO - codeparrot_training - Step 25441: {'lr': 0.000469684862032968, 'samples': 13026304, 'steps': 25441, 'loss/train': 1.8020484447479248} +03/04/2022 19:21:36 - INFO - codeparrot_training - Step 25442: {'lr': 0.0004696823290649902, 'samples': 13026816, 'steps': 25442, 'loss/train': 1.6336922645568848} +03/04/2022 19:21:38 - INFO - codeparrot_training - Skipping example with length 217 (seq_length=1024) +03/04/2022 19:21:41 - INFO - codeparrot_training - Step 25443: {'lr': 0.000469679795998027, 'samples': 13027328, 'steps': 25443, 'loss/train': 2.300718307495117} +03/04/2022 19:21:44 - INFO - codeparrot_training - Step 25444: {'lr': 0.00046967726283207945, 'samples': 13027840, 'steps': 25444, 'loss/train': 2.009284496307373} +03/04/2022 19:21:47 - INFO - codeparrot_training - Skipping example with length 652 (seq_length=1024) +03/04/2022 19:21:50 - INFO - codeparrot_training - Step 25445: {'lr': 0.0004696747295671487, 'samples': 13028352, 'steps': 25445, 'loss/train': 2.7127432823181152} +03/04/2022 19:21:53 - INFO - codeparrot_training - Step 25446: {'lr': 0.000469672196203236, 'samples': 13028864, 'steps': 25446, 'loss/train': 2.504605531692505} +03/04/2022 19:21:55 - INFO - codeparrot_training - Skipping example with length 444 (seq_length=1024) +03/04/2022 19:21:58 - INFO - codeparrot_training - Step 25447: {'lr': 0.0004696696627403423, 'samples': 13029376, 'steps': 25447, 'loss/train': 0.8868412375450134} +03/04/2022 19:22:01 - INFO - codeparrot_training - Step 25448: {'lr': 0.00046966712917846887, 'samples': 13029888, 'steps': 25448, 'loss/train': 2.0862550735473633} +03/04/2022 19:22:04 - INFO - codeparrot_training - Skipping example with length 489 (seq_length=1024) +03/04/2022 19:22:07 - INFO - codeparrot_training - Step 25449: {'lr': 0.00046966459551761684, 'samples': 13030400, 'steps': 25449, 'loss/train': 0.8388962149620056} +03/04/2022 19:22:10 - INFO - codeparrot_training - Step 25450: {'lr': 0.00046966206175778723, 'samples': 13030912, 'steps': 25450, 'loss/train': 1.3482890129089355} +03/04/2022 19:22:12 - INFO - codeparrot_training - Skipping example with length 811 (seq_length=1024) +03/04/2022 19:22:15 - INFO - codeparrot_training - Step 25451: {'lr': 0.0004696595278989814, 'samples': 13031424, 'steps': 25451, 'loss/train': 1.2773497104644775} +03/04/2022 19:22:18 - INFO - codeparrot_training - Step 25452: {'lr': 0.00046965699394120033, 'samples': 13031936, 'steps': 25452, 'loss/train': 1.2560234069824219} +03/04/2022 19:22:20 - INFO - codeparrot_training - Skipping example with length 444 (seq_length=1024) +03/04/2022 19:22:24 - INFO - codeparrot_training - Step 25453: {'lr': 0.0004696544598844452, 'samples': 13032448, 'steps': 25453, 'loss/train': 0.8781254887580872} +03/04/2022 19:22:27 - INFO - codeparrot_training - Step 25454: {'lr': 0.00046965192572871723, 'samples': 13032960, 'steps': 25454, 'loss/train': 2.633864164352417} +03/04/2022 19:22:29 - INFO - codeparrot_training - Skipping example with length 498 (seq_length=1024) +03/04/2022 19:22:32 - INFO - codeparrot_training - Step 25455: {'lr': 0.0004696493914740174, 'samples': 13033472, 'steps': 25455, 'loss/train': 0.208157017827034} +03/04/2022 19:22:35 - INFO - codeparrot_training - Step 25456: {'lr': 0.00046964685712034697, 'samples': 13033984, 'steps': 25456, 'loss/train': 2.106882333755493} +03/04/2022 19:22:38 - INFO - codeparrot_training - Skipping example with length 482 (seq_length=1024) +03/04/2022 19:22:41 - INFO - codeparrot_training - Step 25457: {'lr': 0.00046964432266770713, 'samples': 13034496, 'steps': 25457, 'loss/train': 1.3349844217300415} +03/04/2022 19:22:44 - INFO - codeparrot_training - Step 25458: {'lr': 0.0004696417881160989, 'samples': 13035008, 'steps': 25458, 'loss/train': 1.5720466375350952} +03/04/2022 19:22:46 - INFO - codeparrot_training - Skipping example with length 22 (seq_length=1024) +03/04/2022 19:22:49 - INFO - codeparrot_training - Step 25459: {'lr': 0.0004696392534655234, 'samples': 13035520, 'steps': 25459, 'loss/train': 1.8213374614715576} +03/04/2022 19:22:52 - INFO - codeparrot_training - Step 25460: {'lr': 0.0004696367187159819, 'samples': 13036032, 'steps': 25460, 'loss/train': 1.1172958612442017} +03/04/2022 19:22:55 - INFO - codeparrot_training - Skipping example with length 48 (seq_length=1024) +03/04/2022 19:22:58 - INFO - codeparrot_training - Step 25461: {'lr': 0.00046963418386747547, 'samples': 13036544, 'steps': 25461, 'loss/train': 1.5118916034698486} +03/04/2022 19:23:01 - INFO - codeparrot_training - Step 25462: {'lr': 0.0004696316489200053, 'samples': 13037056, 'steps': 25462, 'loss/train': 2.1516897678375244} +03/04/2022 19:23:06 - INFO - codeparrot_training - Step 25463: {'lr': 0.00046962911387357246, 'samples': 13037568, 'steps': 25463, 'loss/train': 1.5250990390777588} +03/04/2022 19:23:09 - INFO - codeparrot_training - Step 25464: {'lr': 0.0004696265787281782, 'samples': 13038080, 'steps': 25464, 'loss/train': 1.4249238967895508} +03/04/2022 19:23:12 - INFO - codeparrot_training - Skipping example with length 683 (seq_length=1024) +03/04/2022 19:23:15 - INFO - codeparrot_training - Step 25465: {'lr': 0.0004696240434838235, 'samples': 13038592, 'steps': 25465, 'loss/train': 1.3011730909347534} +03/04/2022 19:23:18 - INFO - codeparrot_training - Step 25466: {'lr': 0.00046962150814050963, 'samples': 13039104, 'steps': 25466, 'loss/train': 1.488167643547058} +03/04/2022 19:23:21 - INFO - codeparrot_training - Step 25467: {'lr': 0.0004696189726982377, 'samples': 13039616, 'steps': 25467, 'loss/train': 2.148378610610962} +03/04/2022 19:23:22 - INFO - codeparrot_training - Skipping example with length 74 (seq_length=1024) +03/04/2022 19:23:26 - INFO - codeparrot_training - Step 25468: {'lr': 0.00046961643715700885, 'samples': 13040128, 'steps': 25468, 'loss/train': 2.2054636478424072} +03/04/2022 19:23:30 - INFO - codeparrot_training - Step 25469: {'lr': 0.00046961390151682426, 'samples': 13040640, 'steps': 25469, 'loss/train': 2.311095952987671} +03/04/2022 19:23:30 - INFO - codeparrot_training - Skipping example with length 419 (seq_length=1024) +03/04/2022 19:23:35 - INFO - codeparrot_training - Step 25470: {'lr': 0.000469611365777685, 'samples': 13041152, 'steps': 25470, 'loss/train': 1.5300289392471313} +03/04/2022 19:23:38 - INFO - codeparrot_training - Step 25471: {'lr': 0.0004696088299395922, 'samples': 13041664, 'steps': 25471, 'loss/train': 2.0319151878356934} +03/04/2022 19:23:38 - INFO - codeparrot_training - Skipping example with length 471 (seq_length=1024) +03/04/2022 19:23:43 - INFO - codeparrot_training - Step 25472: {'lr': 0.0004696062940025471, 'samples': 13042176, 'steps': 25472, 'loss/train': 1.9446732997894287} +03/04/2022 19:23:46 - INFO - codeparrot_training - Step 25473: {'lr': 0.0004696037579665509, 'samples': 13042688, 'steps': 25473, 'loss/train': 2.5790936946868896} +03/04/2022 19:23:47 - INFO - codeparrot_training - Skipping example with length 789 (seq_length=1024) +03/04/2022 19:23:52 - INFO - codeparrot_training - Step 25474: {'lr': 0.00046960122183160446, 'samples': 13043200, 'steps': 25474, 'loss/train': 1.8887279033660889} +03/04/2022 19:23:55 - INFO - codeparrot_training - Step 25475: {'lr': 0.00046959868559770914, 'samples': 13043712, 'steps': 25475, 'loss/train': 1.4355084896087646} +03/04/2022 19:23:55 - INFO - codeparrot_training - Skipping example with length 981 (seq_length=1024) +03/04/2022 19:24:00 - INFO - codeparrot_training - Step 25476: {'lr': 0.00046959614926486606, 'samples': 13044224, 'steps': 25476, 'loss/train': 1.659017562866211} +03/04/2022 19:24:03 - INFO - codeparrot_training - Step 25477: {'lr': 0.00046959361283307636, 'samples': 13044736, 'steps': 25477, 'loss/train': 1.377712607383728} +03/04/2022 19:24:04 - INFO - codeparrot_training - Skipping example with length 841 (seq_length=1024) +03/04/2022 19:24:09 - INFO - codeparrot_training - Step 25478: {'lr': 0.0004695910763023412, 'samples': 13045248, 'steps': 25478, 'loss/train': 2.275122880935669} +03/04/2022 19:24:12 - INFO - codeparrot_training - Step 25479: {'lr': 0.0004695885396726616, 'samples': 13045760, 'steps': 25479, 'loss/train': 0.8331573605537415} +03/04/2022 19:24:12 - INFO - codeparrot_training - Skipping example with length 475 (seq_length=1024) +03/04/2022 19:24:17 - INFO - codeparrot_training - Step 25480: {'lr': 0.00046958600294403887, 'samples': 13046272, 'steps': 25480, 'loss/train': 0.5317729711532593} +03/04/2022 19:24:20 - INFO - codeparrot_training - Step 25481: {'lr': 0.000469583466116474, 'samples': 13046784, 'steps': 25481, 'loss/train': 1.9140645265579224} +03/04/2022 19:24:21 - INFO - codeparrot_training - Skipping example with length 1003 (seq_length=1024) +03/04/2022 19:24:25 - INFO - codeparrot_training - Step 25482: {'lr': 0.00046958092918996823, 'samples': 13047296, 'steps': 25482, 'loss/train': 1.4599580764770508} +03/04/2022 19:24:28 - INFO - codeparrot_training - Step 25483: {'lr': 0.0004695783921645227, 'samples': 13047808, 'steps': 25483, 'loss/train': 1.858157992362976} +03/04/2022 19:24:29 - INFO - codeparrot_training - Skipping example with length 541 (seq_length=1024) +03/04/2022 19:24:34 - INFO - codeparrot_training - Step 25484: {'lr': 0.00046957585504013853, 'samples': 13048320, 'steps': 25484, 'loss/train': 1.5703688859939575} +03/04/2022 19:24:37 - INFO - codeparrot_training - Step 25485: {'lr': 0.0004695733178168169, 'samples': 13048832, 'steps': 25485, 'loss/train': 1.8283672332763672} +03/04/2022 19:24:37 - INFO - codeparrot_training - Skipping example with length 485 (seq_length=1024) +03/04/2022 19:24:42 - INFO - codeparrot_training - Step 25486: {'lr': 0.00046957078049455895, 'samples': 13049344, 'steps': 25486, 'loss/train': 1.4905426502227783} +03/04/2022 19:24:46 - INFO - codeparrot_training - Step 25487: {'lr': 0.00046956824307336565, 'samples': 13049856, 'steps': 25487, 'loss/train': 2.2556517124176025} +03/04/2022 19:24:46 - INFO - codeparrot_training - Skipping example with length 683 (seq_length=1024) +03/04/2022 19:24:51 - INFO - codeparrot_training - Step 25488: {'lr': 0.0004695657055532384, 'samples': 13050368, 'steps': 25488, 'loss/train': 2.299039125442505} +03/04/2022 19:24:54 - INFO - codeparrot_training - Step 25489: {'lr': 0.0004695631679341782, 'samples': 13050880, 'steps': 25489, 'loss/train': 1.6763784885406494} +03/04/2022 19:24:54 - INFO - codeparrot_training - Skipping example with length 610 (seq_length=1024) +03/04/2022 19:24:59 - INFO - codeparrot_training - Step 25490: {'lr': 0.0004695606302161862, 'samples': 13051392, 'steps': 25490, 'loss/train': 1.3162503242492676} +03/04/2022 19:25:03 - INFO - codeparrot_training - Step 25491: {'lr': 0.0004695580923992636, 'samples': 13051904, 'steps': 25491, 'loss/train': 1.248085856437683} +03/04/2022 19:25:03 - INFO - codeparrot_training - Skipping example with length 107 (seq_length=1024) +03/04/2022 19:25:08 - INFO - codeparrot_training - Step 25492: {'lr': 0.0004695555544834116, 'samples': 13052416, 'steps': 25492, 'loss/train': 3.236177921295166} +03/04/2022 19:25:11 - INFO - codeparrot_training - Step 25493: {'lr': 0.00046955301646863114, 'samples': 13052928, 'steps': 25493, 'loss/train': 2.4744601249694824} +03/04/2022 19:25:11 - INFO - codeparrot_training - Skipping example with length 734 (seq_length=1024) +03/04/2022 19:25:16 - INFO - codeparrot_training - Step 25494: {'lr': 0.0004695504783549235, 'samples': 13053440, 'steps': 25494, 'loss/train': 2.267122268676758} +03/04/2022 19:25:20 - INFO - codeparrot_training - Step 25495: {'lr': 0.0004695479401422898, 'samples': 13053952, 'steps': 25495, 'loss/train': 1.790039300918579} +03/04/2022 19:25:20 - INFO - codeparrot_training - Skipping example with length 211 (seq_length=1024) +03/04/2022 19:25:25 - INFO - codeparrot_training - Step 25496: {'lr': 0.0004695454018307312, 'samples': 13054464, 'steps': 25496, 'loss/train': 1.5867931842803955} +03/04/2022 19:25:28 - INFO - codeparrot_training - Step 25497: {'lr': 0.0004695428634202488, 'samples': 13054976, 'steps': 25497, 'loss/train': 1.3060795068740845} +03/04/2022 19:25:30 - INFO - codeparrot_training - Skipping example with length 646 (seq_length=1024) +03/04/2022 19:25:34 - INFO - codeparrot_training - Step 25498: {'lr': 0.0004695403249108438, 'samples': 13055488, 'steps': 25498, 'loss/train': 1.9687601327896118} +03/04/2022 19:25:37 - INFO - codeparrot_training - Step 25499: {'lr': 0.0004695377863025173, 'samples': 13056000, 'steps': 25499, 'loss/train': 2.364687919616699} +03/04/2022 19:25:38 - INFO - codeparrot_training - Skipping example with length 267 (seq_length=1024) +03/04/2022 19:25:42 - INFO - codeparrot_training - Step 25500: {'lr': 0.00046953524759527055, 'samples': 13056512, 'steps': 25500, 'loss/train': 1.4824386835098267} +03/04/2022 19:25:45 - INFO - codeparrot_training - Step 25501: {'lr': 0.0004695327087891045, 'samples': 13057024, 'steps': 25501, 'loss/train': 1.6287752389907837} +03/04/2022 19:25:46 - INFO - codeparrot_training - Skipping example with length 678 (seq_length=1024) +03/04/2022 19:25:50 - INFO - codeparrot_training - Step 25502: {'lr': 0.00046953016988402044, 'samples': 13057536, 'steps': 25502, 'loss/train': 2.4598002433776855} +03/04/2022 19:25:53 - INFO - codeparrot_training - Step 25503: {'lr': 0.0004695276308800194, 'samples': 13058048, 'steps': 25503, 'loss/train': 2.146589517593384} +03/04/2022 19:25:54 - INFO - codeparrot_training - Skipping example with length 597 (seq_length=1024) +03/04/2022 19:25:59 - INFO - codeparrot_training - Step 25504: {'lr': 0.00046952509177710267, 'samples': 13058560, 'steps': 25504, 'loss/train': 2.2702624797821045} +03/04/2022 19:26:02 - INFO - codeparrot_training - Step 25505: {'lr': 0.00046952255257527134, 'samples': 13059072, 'steps': 25505, 'loss/train': 2.3818323612213135} +03/04/2022 19:26:03 - INFO - codeparrot_training - Skipping example with length 61 (seq_length=1024) +03/04/2022 19:26:08 - INFO - codeparrot_training - Step 25506: {'lr': 0.0004695200132745265, 'samples': 13059584, 'steps': 25506, 'loss/train': 1.735205054283142} +03/04/2022 19:26:11 - INFO - codeparrot_training - Step 25507: {'lr': 0.00046951747387486933, 'samples': 13060096, 'steps': 25507, 'loss/train': 0.24268989264965057} +03/04/2022 19:26:14 - INFO - codeparrot_training - Step 25508: {'lr': 0.00046951493437630097, 'samples': 13060608, 'steps': 25508, 'loss/train': 1.9178614616394043} +03/04/2022 19:26:14 - INFO - codeparrot_training - Skipping example with length 628 (seq_length=1024) +03/04/2022 19:26:19 - INFO - codeparrot_training - Step 25509: {'lr': 0.0004695123947788226, 'samples': 13061120, 'steps': 25509, 'loss/train': 2.8096487522125244} +03/04/2022 19:26:22 - INFO - codeparrot_training - Skipping example with length 520 (seq_length=1024) +03/04/2022 19:26:25 - INFO - codeparrot_training - Step 25510: {'lr': 0.0004695098550824353, 'samples': 13061632, 'steps': 25510, 'loss/train': 1.8710687160491943} +03/04/2022 19:26:28 - INFO - codeparrot_training - Step 25511: {'lr': 0.0004695073152871403, 'samples': 13062144, 'steps': 25511, 'loss/train': 1.6735628843307495} +03/04/2022 19:26:31 - INFO - codeparrot_training - Step 25512: {'lr': 0.00046950477539293864, 'samples': 13062656, 'steps': 25512, 'loss/train': 1.9222924709320068} +03/04/2022 19:26:31 - INFO - codeparrot_training - Skipping example with length 552 (seq_length=1024) +03/04/2022 19:26:36 - INFO - codeparrot_training - Step 25513: {'lr': 0.0004695022353998315, 'samples': 13063168, 'steps': 25513, 'loss/train': 1.86252760887146} +03/04/2022 19:26:40 - INFO - codeparrot_training - Step 25514: {'lr': 0.0004694996953078201, 'samples': 13063680, 'steps': 25514, 'loss/train': 2.654667615890503} +03/04/2022 19:26:40 - INFO - codeparrot_training - Skipping example with length 55 (seq_length=1024) +03/04/2022 19:26:45 - INFO - codeparrot_training - Step 25515: {'lr': 0.0004694971551169055, 'samples': 13064192, 'steps': 25515, 'loss/train': 1.5680612325668335} +03/04/2022 19:26:48 - INFO - codeparrot_training - Step 25516: {'lr': 0.00046949461482708875, 'samples': 13064704, 'steps': 25516, 'loss/train': 2.2980082035064697} +03/04/2022 19:26:48 - INFO - codeparrot_training - Skipping example with length 284 (seq_length=1024) +03/04/2022 19:26:53 - INFO - codeparrot_training - Step 25517: {'lr': 0.0004694920744383713, 'samples': 13065216, 'steps': 25517, 'loss/train': 1.0531436204910278} +03/04/2022 19:26:57 - INFO - codeparrot_training - Step 25518: {'lr': 0.000469489533950754, 'samples': 13065728, 'steps': 25518, 'loss/train': 2.043027639389038} +03/04/2022 19:26:57 - INFO - codeparrot_training - Skipping example with length 657 (seq_length=1024) +03/04/2022 19:27:02 - INFO - codeparrot_training - Step 25519: {'lr': 0.00046948699336423817, 'samples': 13066240, 'steps': 25519, 'loss/train': 2.1827847957611084} +03/04/2022 19:27:05 - INFO - codeparrot_training - Step 25520: {'lr': 0.0004694844526788248, 'samples': 13066752, 'steps': 25520, 'loss/train': 0.9641440510749817} +03/04/2022 19:27:05 - INFO - codeparrot_training - Skipping example with length 680 (seq_length=1024) +03/04/2022 19:27:10 - INFO - codeparrot_training - Step 25521: {'lr': 0.0004694819118945152, 'samples': 13067264, 'steps': 25521, 'loss/train': 1.4785652160644531} +03/04/2022 19:27:13 - INFO - codeparrot_training - Step 25522: {'lr': 0.00046947937101131046, 'samples': 13067776, 'steps': 25522, 'loss/train': 1.9741688966751099} +03/04/2022 19:27:14 - INFO - codeparrot_training - Skipping example with length 505 (seq_length=1024) +03/04/2022 19:27:19 - INFO - codeparrot_training - Step 25523: {'lr': 0.0004694768300292116, 'samples': 13068288, 'steps': 25523, 'loss/train': 1.9820784330368042} +03/04/2022 19:27:22 - INFO - codeparrot_training - Step 25524: {'lr': 0.0004694742889482199, 'samples': 13068800, 'steps': 25524, 'loss/train': 1.8756790161132812} +03/04/2022 19:27:22 - INFO - codeparrot_training - Skipping example with length 537 (seq_length=1024) +03/04/2022 19:27:27 - INFO - codeparrot_training - Step 25525: {'lr': 0.0004694717477683365, 'samples': 13069312, 'steps': 25525, 'loss/train': 2.252000331878662} +03/04/2022 19:27:30 - INFO - codeparrot_training - Step 25526: {'lr': 0.0004694692064895625, 'samples': 13069824, 'steps': 25526, 'loss/train': 1.2726101875305176} +03/04/2022 19:27:30 - INFO - codeparrot_training - Skipping example with length 1011 (seq_length=1024) +03/04/2022 19:27:36 - INFO - codeparrot_training - Step 25527: {'lr': 0.0004694666651118991, 'samples': 13070336, 'steps': 25527, 'loss/train': 1.942238211631775} +03/04/2022 19:27:39 - INFO - codeparrot_training - Step 25528: {'lr': 0.00046946412363534735, 'samples': 13070848, 'steps': 25528, 'loss/train': 1.694091558456421} +03/04/2022 19:27:39 - INFO - codeparrot_training - Skipping example with length 708 (seq_length=1024) +03/04/2022 19:27:44 - INFO - codeparrot_training - Step 25529: {'lr': 0.0004694615820599085, 'samples': 13071360, 'steps': 25529, 'loss/train': 1.6104822158813477} +03/04/2022 19:27:47 - INFO - codeparrot_training - Step 25530: {'lr': 0.00046945904038558364, 'samples': 13071872, 'steps': 25530, 'loss/train': 2.3564205169677734} +03/04/2022 19:27:47 - INFO - codeparrot_training - Skipping example with length 598 (seq_length=1024) +03/04/2022 19:27:53 - INFO - codeparrot_training - Step 25531: {'lr': 0.00046945649861237387, 'samples': 13072384, 'steps': 25531, 'loss/train': 2.644564628601074} +03/04/2022 19:27:56 - INFO - codeparrot_training - Step 25532: {'lr': 0.00046945395674028047, 'samples': 13072896, 'steps': 25532, 'loss/train': 1.0451500415802002} +03/04/2022 19:27:57 - INFO - codeparrot_training - Skipping example with length 699 (seq_length=1024) +03/04/2022 19:28:01 - INFO - codeparrot_training - Step 25533: {'lr': 0.0004694514147693044, 'samples': 13073408, 'steps': 25533, 'loss/train': 6.538641929626465} +03/04/2022 19:28:04 - INFO - codeparrot_training - Step 25534: {'lr': 0.000469448872699447, 'samples': 13073920, 'steps': 25534, 'loss/train': 1.4924501180648804} +03/04/2022 19:28:06 - INFO - codeparrot_training - Skipping example with length 1021 (seq_length=1024) +03/04/2022 19:28:09 - INFO - codeparrot_training - Step 25535: {'lr': 0.0004694463305307093, 'samples': 13074432, 'steps': 25535, 'loss/train': 1.0473151206970215} +03/04/2022 19:28:13 - INFO - codeparrot_training - Step 25536: {'lr': 0.00046944378826309244, 'samples': 13074944, 'steps': 25536, 'loss/train': 1.9682713747024536} +03/04/2022 19:28:14 - INFO - codeparrot_training - Skipping example with length 96 (seq_length=1024) +03/04/2022 19:28:18 - INFO - codeparrot_training - Step 25537: {'lr': 0.00046944124589659765, 'samples': 13075456, 'steps': 25537, 'loss/train': 2.4293124675750732} +03/04/2022 19:28:21 - INFO - codeparrot_training - Step 25538: {'lr': 0.00046943870343122595, 'samples': 13075968, 'steps': 25538, 'loss/train': 2.119398832321167} +03/04/2022 19:28:22 - INFO - codeparrot_training - Skipping example with length 256 (seq_length=1024) +03/04/2022 19:28:27 - INFO - codeparrot_training - Step 25539: {'lr': 0.0004694361608669786, 'samples': 13076480, 'steps': 25539, 'loss/train': 2.2235565185546875} +03/04/2022 19:28:30 - INFO - codeparrot_training - Step 25540: {'lr': 0.0004694336182038567, 'samples': 13076992, 'steps': 25540, 'loss/train': 1.9418087005615234} +03/04/2022 19:28:33 - INFO - codeparrot_training - Step 25541: {'lr': 0.00046943107544186144, 'samples': 13077504, 'steps': 25541, 'loss/train': 2.049295663833618} +03/04/2022 19:28:33 - INFO - codeparrot_training - Skipping example with length 998 (seq_length=1024) +03/04/2022 19:28:38 - INFO - codeparrot_training - Step 25542: {'lr': 0.0004694285325809938, 'samples': 13078016, 'steps': 25542, 'loss/train': 2.1261496543884277} +03/04/2022 19:28:41 - INFO - codeparrot_training - Step 25543: {'lr': 0.00046942598962125515, 'samples': 13078528, 'steps': 25543, 'loss/train': 1.9301607608795166} +03/04/2022 19:28:42 - INFO - codeparrot_training - Skipping example with length 343 (seq_length=1024) +03/04/2022 19:28:47 - INFO - codeparrot_training - Step 25544: {'lr': 0.00046942344656264657, 'samples': 13079040, 'steps': 25544, 'loss/train': 2.0735669136047363} +03/04/2022 19:28:50 - INFO - codeparrot_training - Step 25545: {'lr': 0.0004694209034051691, 'samples': 13079552, 'steps': 25545, 'loss/train': 1.5645065307617188} +03/04/2022 19:28:50 - INFO - codeparrot_training - Skipping example with length 143 (seq_length=1024) +03/04/2022 19:28:55 - INFO - codeparrot_training - Step 25546: {'lr': 0.00046941836014882394, 'samples': 13080064, 'steps': 25546, 'loss/train': 2.1198136806488037} +03/04/2022 19:28:58 - INFO - codeparrot_training - Step 25547: {'lr': 0.00046941581679361234, 'samples': 13080576, 'steps': 25547, 'loss/train': 1.9883184432983398} +03/04/2022 19:28:59 - INFO - codeparrot_training - Skipping example with length 969 (seq_length=1024) +03/04/2022 19:29:04 - INFO - codeparrot_training - Step 25548: {'lr': 0.00046941327333953526, 'samples': 13081088, 'steps': 25548, 'loss/train': 1.9838398694992065} +03/04/2022 19:29:07 - INFO - codeparrot_training - Step 25549: {'lr': 0.00046941072978659397, 'samples': 13081600, 'steps': 25549, 'loss/train': 2.0995378494262695} +03/04/2022 19:29:07 - INFO - codeparrot_training - Skipping example with length 782 (seq_length=1024) +03/04/2022 19:29:12 - INFO - codeparrot_training - Step 25550: {'lr': 0.00046940818613478964, 'samples': 13082112, 'steps': 25550, 'loss/train': 1.506577968597412} +03/04/2022 19:29:15 - INFO - codeparrot_training - Step 25551: {'lr': 0.0004694056423841233, 'samples': 13082624, 'steps': 25551, 'loss/train': 2.5039942264556885} +03/04/2022 19:29:16 - INFO - codeparrot_training - Skipping example with length 704 (seq_length=1024) +03/04/2022 19:29:20 - INFO - codeparrot_training - Step 25552: {'lr': 0.00046940309853459625, 'samples': 13083136, 'steps': 25552, 'loss/train': 2.0975401401519775} +03/04/2022 19:29:24 - INFO - codeparrot_training - Step 25553: {'lr': 0.00046940055458620945, 'samples': 13083648, 'steps': 25553, 'loss/train': 2.0207152366638184} +03/04/2022 19:29:24 - INFO - codeparrot_training - Skipping example with length 36 (seq_length=1024) +03/04/2022 19:29:29 - INFO - codeparrot_training - Step 25554: {'lr': 0.0004693980105389642, 'samples': 13084160, 'steps': 25554, 'loss/train': 2.4204697608947754} +03/04/2022 19:29:32 - INFO - codeparrot_training - Step 25555: {'lr': 0.00046939546639286156, 'samples': 13084672, 'steps': 25555, 'loss/train': 2.580156087875366} +03/04/2022 19:29:32 - INFO - codeparrot_training - Skipping example with length 254 (seq_length=1024) +03/04/2022 19:29:37 - INFO - codeparrot_training - Step 25556: {'lr': 0.00046939292214790275, 'samples': 13085184, 'steps': 25556, 'loss/train': 2.19118595123291} +03/04/2022 19:29:41 - INFO - codeparrot_training - Step 25557: {'lr': 0.0004693903778040889, 'samples': 13085696, 'steps': 25557, 'loss/train': 1.671200156211853} +03/04/2022 19:29:41 - INFO - codeparrot_training - Skipping example with length 161 (seq_length=1024) +03/04/2022 19:29:46 - INFO - codeparrot_training - Step 25558: {'lr': 0.0004693878333614211, 'samples': 13086208, 'steps': 25558, 'loss/train': 1.713773250579834} +03/04/2022 19:29:49 - INFO - codeparrot_training - Step 25559: {'lr': 0.0004693852888199005, 'samples': 13086720, 'steps': 25559, 'loss/train': 1.9031437635421753} +03/04/2022 19:29:49 - INFO - codeparrot_training - Skipping example with length 148 (seq_length=1024) +03/04/2022 19:29:54 - INFO - codeparrot_training - Step 25560: {'lr': 0.0004693827441795283, 'samples': 13087232, 'steps': 25560, 'loss/train': 1.929993748664856} +03/04/2022 19:29:58 - INFO - codeparrot_training - Step 25561: {'lr': 0.00046938019944030556, 'samples': 13087744, 'steps': 25561, 'loss/train': 1.4844731092453003} +03/04/2022 19:29:59 - INFO - codeparrot_training - Skipping example with length 788 (seq_length=1024) +03/04/2022 19:30:03 - INFO - codeparrot_training - Step 25562: {'lr': 0.00046937765460223357, 'samples': 13088256, 'steps': 25562, 'loss/train': 2.0093929767608643} +03/04/2022 19:30:06 - INFO - codeparrot_training - Step 25563: {'lr': 0.0004693751096653134, 'samples': 13088768, 'steps': 25563, 'loss/train': 1.611359715461731} +03/04/2022 19:30:07 - INFO - codeparrot_training - Skipping example with length 247 (seq_length=1024) +03/04/2022 19:30:11 - INFO - codeparrot_training - Step 25564: {'lr': 0.00046937256462954615, 'samples': 13089280, 'steps': 25564, 'loss/train': 1.8652870655059814} +03/04/2022 19:30:15 - INFO - codeparrot_training - Step 25565: {'lr': 0.00046937001949493294, 'samples': 13089792, 'steps': 25565, 'loss/train': 2.514896869659424} +03/04/2022 19:30:15 - INFO - codeparrot_training - Skipping example with length 852 (seq_length=1024) +03/04/2022 19:30:20 - INFO - codeparrot_training - Step 25566: {'lr': 0.0004693674742614751, 'samples': 13090304, 'steps': 25566, 'loss/train': 2.0481574535369873} +03/04/2022 19:30:23 - INFO - codeparrot_training - Step 25567: {'lr': 0.0004693649289291736, 'samples': 13090816, 'steps': 25567, 'loss/train': 1.851012110710144} +03/04/2022 19:30:24 - INFO - codeparrot_training - Skipping example with length 263 (seq_length=1024) +03/04/2022 19:30:28 - INFO - codeparrot_training - Step 25568: {'lr': 0.0004693623834980297, 'samples': 13091328, 'steps': 25568, 'loss/train': 6.5985307693481445} +03/04/2022 19:30:32 - INFO - codeparrot_training - Step 25569: {'lr': 0.00046935983796804443, 'samples': 13091840, 'steps': 25569, 'loss/train': 1.994016170501709} +03/04/2022 19:30:33 - INFO - codeparrot_training - Skipping example with length 1000 (seq_length=1024) +03/04/2022 19:30:37 - INFO - codeparrot_training - Step 25570: {'lr': 0.000469357292339219, 'samples': 13092352, 'steps': 25570, 'loss/train': 2.1103549003601074} +03/04/2022 19:30:40 - INFO - codeparrot_training - Step 25571: {'lr': 0.00046935474661155465, 'samples': 13092864, 'steps': 25571, 'loss/train': 2.151684045791626} +03/04/2022 19:30:41 - INFO - codeparrot_training - Skipping example with length 748 (seq_length=1024) +03/04/2022 19:30:45 - INFO - codeparrot_training - Step 25572: {'lr': 0.00046935220078505235, 'samples': 13093376, 'steps': 25572, 'loss/train': 1.9302334785461426} +03/04/2022 19:30:48 - INFO - codeparrot_training - Step 25573: {'lr': 0.00046934965485971337, 'samples': 13093888, 'steps': 25573, 'loss/train': 1.9277316331863403} +03/04/2022 19:30:50 - INFO - codeparrot_training - Skipping example with length 223 (seq_length=1024) +03/04/2022 19:30:54 - INFO - codeparrot_training - Step 25574: {'lr': 0.00046934710883553884, 'samples': 13094400, 'steps': 25574, 'loss/train': 1.8211218118667603} +03/04/2022 19:30:57 - INFO - codeparrot_training - Step 25575: {'lr': 0.00046934456271252985, 'samples': 13094912, 'steps': 25575, 'loss/train': 2.893122911453247} +03/04/2022 19:30:58 - INFO - codeparrot_training - Skipping example with length 881 (seq_length=1024) +03/04/2022 19:31:02 - INFO - codeparrot_training - Step 25576: {'lr': 0.0004693420164906876, 'samples': 13095424, 'steps': 25576, 'loss/train': 2.557299852371216} +03/04/2022 19:31:06 - INFO - codeparrot_training - Step 25577: {'lr': 0.0004693394701700132, 'samples': 13095936, 'steps': 25577, 'loss/train': 2.520155906677246} +03/04/2022 19:31:07 - INFO - codeparrot_training - Skipping example with length 95 (seq_length=1024) +03/04/2022 19:31:11 - INFO - codeparrot_training - Step 25578: {'lr': 0.00046933692375050783, 'samples': 13096448, 'steps': 25578, 'loss/train': 1.8020070791244507} +03/04/2022 19:31:14 - INFO - codeparrot_training - Step 25579: {'lr': 0.00046933437723217265, 'samples': 13096960, 'steps': 25579, 'loss/train': 2.3446097373962402} +03/04/2022 19:31:15 - INFO - codeparrot_training - Skipping example with length 751 (seq_length=1024) +03/04/2022 19:31:19 - INFO - codeparrot_training - Step 25580: {'lr': 0.0004693318306150087, 'samples': 13097472, 'steps': 25580, 'loss/train': 1.7016338109970093} +03/04/2022 19:31:22 - INFO - codeparrot_training - Step 25581: {'lr': 0.0004693292838990173, 'samples': 13097984, 'steps': 25581, 'loss/train': 1.2174557447433472} +03/04/2022 19:31:23 - INFO - codeparrot_training - Skipping example with length 141 (seq_length=1024) +03/04/2022 19:31:28 - INFO - codeparrot_training - Step 25582: {'lr': 0.0004693267370841995, 'samples': 13098496, 'steps': 25582, 'loss/train': 0.9873405694961548} +03/04/2022 19:31:31 - INFO - codeparrot_training - Step 25583: {'lr': 0.00046932419017055646, 'samples': 13099008, 'steps': 25583, 'loss/train': 1.4384894371032715} +03/04/2022 19:31:31 - INFO - codeparrot_training - Skipping example with length 705 (seq_length=1024) +03/04/2022 19:31:36 - INFO - codeparrot_training - Step 25584: {'lr': 0.0004693216431580893, 'samples': 13099520, 'steps': 25584, 'loss/train': 1.1552149057388306} +03/04/2022 19:31:39 - INFO - codeparrot_training - Step 25585: {'lr': 0.00046931909604679925, 'samples': 13100032, 'steps': 25585, 'loss/train': 2.5182080268859863} +03/04/2022 19:31:40 - INFO - codeparrot_training - Skipping example with length 778 (seq_length=1024) +03/04/2022 19:31:44 - INFO - codeparrot_training - Step 25586: {'lr': 0.0004693165488366873, 'samples': 13100544, 'steps': 25586, 'loss/train': 2.044692039489746} +03/04/2022 19:31:48 - INFO - codeparrot_training - Step 25587: {'lr': 0.00046931400152775473, 'samples': 13101056, 'steps': 25587, 'loss/train': 1.7909234762191772} +03/04/2022 19:31:48 - INFO - codeparrot_training - Skipping example with length 564 (seq_length=1024) +03/04/2022 19:31:53 - INFO - codeparrot_training - Step 25588: {'lr': 0.00046931145412000265, 'samples': 13101568, 'steps': 25588, 'loss/train': 1.7947889566421509} +03/04/2022 19:31:56 - INFO - codeparrot_training - Step 25589: {'lr': 0.00046930890661343226, 'samples': 13102080, 'steps': 25589, 'loss/train': 1.3816215991973877} +03/04/2022 19:31:56 - INFO - codeparrot_training - Skipping example with length 601 (seq_length=1024) +03/04/2022 19:32:01 - INFO - codeparrot_training - Step 25590: {'lr': 0.00046930635900804466, 'samples': 13102592, 'steps': 25590, 'loss/train': 2.793059825897217} +03/04/2022 19:32:05 - INFO - codeparrot_training - Step 25591: {'lr': 0.0004693038113038409, 'samples': 13103104, 'steps': 25591, 'loss/train': 1.6341270208358765} +03/04/2022 19:32:05 - INFO - codeparrot_training - Skipping example with length 876 (seq_length=1024) +03/04/2022 19:32:10 - INFO - codeparrot_training - Step 25592: {'lr': 0.0004693012635008224, 'samples': 13103616, 'steps': 25592, 'loss/train': 2.15512752532959} +03/04/2022 19:32:13 - INFO - codeparrot_training - Step 25593: {'lr': 0.00046929871559898994, 'samples': 13104128, 'steps': 25593, 'loss/train': 1.784670352935791} +03/04/2022 19:32:13 - INFO - codeparrot_training - Skipping example with length 506 (seq_length=1024) +03/04/2022 19:32:18 - INFO - codeparrot_training - Step 25594: {'lr': 0.00046929616759834505, 'samples': 13104640, 'steps': 25594, 'loss/train': 2.7549431324005127} +03/04/2022 19:32:21 - INFO - codeparrot_training - Step 25595: {'lr': 0.00046929361949888857, 'samples': 13105152, 'steps': 25595, 'loss/train': 2.017996311187744} +03/04/2022 19:32:22 - INFO - codeparrot_training - Skipping example with length 675 (seq_length=1024) +03/04/2022 19:32:27 - INFO - codeparrot_training - Step 25596: {'lr': 0.00046929107130062176, 'samples': 13105664, 'steps': 25596, 'loss/train': 2.2522010803222656} +03/04/2022 19:32:30 - INFO - codeparrot_training - Step 25597: {'lr': 0.00046928852300354585, 'samples': 13106176, 'steps': 25597, 'loss/train': 2.138850688934326} +03/04/2022 19:32:30 - INFO - codeparrot_training - Skipping example with length 712 (seq_length=1024) +03/04/2022 19:32:35 - INFO - codeparrot_training - Step 25598: {'lr': 0.0004692859746076619, 'samples': 13106688, 'steps': 25598, 'loss/train': 1.3575628995895386} +03/04/2022 19:32:38 - INFO - codeparrot_training - Step 25599: {'lr': 0.00046928342611297105, 'samples': 13107200, 'steps': 25599, 'loss/train': 1.8996742963790894} +03/04/2022 19:32:39 - INFO - codeparrot_training - Skipping example with length 945 (seq_length=1024) +03/04/2022 19:32:44 - INFO - codeparrot_training - Step 25600: {'lr': 0.00046928087751947444, 'samples': 13107712, 'steps': 25600, 'loss/train': 1.523504614830017} +03/04/2022 19:32:47 - INFO - codeparrot_training - Step 25601: {'lr': 0.00046927832882717323, 'samples': 13108224, 'steps': 25601, 'loss/train': 1.3956414461135864} +03/04/2022 19:32:47 - INFO - codeparrot_training - Skipping example with length 710 (seq_length=1024) +03/04/2022 19:32:52 - INFO - codeparrot_training - Step 25602: {'lr': 0.0004692757800360687, 'samples': 13108736, 'steps': 25602, 'loss/train': 2.287830352783203} +03/04/2022 19:32:55 - INFO - codeparrot_training - Step 25603: {'lr': 0.0004692732311461618, 'samples': 13109248, 'steps': 25603, 'loss/train': 2.1512067317962646} +03/04/2022 19:32:56 - INFO - codeparrot_training - Skipping example with length 566 (seq_length=1024) +03/04/2022 19:33:00 - INFO - codeparrot_training - Step 25604: {'lr': 0.0004692706821574538, 'samples': 13109760, 'steps': 25604, 'loss/train': 1.6540371179580688} +03/04/2022 19:33:04 - INFO - codeparrot_training - Step 25605: {'lr': 0.00046926813306994586, 'samples': 13110272, 'steps': 25605, 'loss/train': 1.6433981657028198} +03/04/2022 19:33:04 - INFO - codeparrot_training - Skipping example with length 135 (seq_length=1024) +03/04/2022 19:33:09 - INFO - codeparrot_training - Step 25606: {'lr': 0.00046926558388363904, 'samples': 13110784, 'steps': 25606, 'loss/train': 1.4688023328781128} +03/04/2022 19:33:12 - INFO - codeparrot_training - Step 25607: {'lr': 0.00046926303459853447, 'samples': 13111296, 'steps': 25607, 'loss/train': 0.9257676601409912} +03/04/2022 19:33:12 - INFO - codeparrot_training - Skipping example with length 72 (seq_length=1024) +03/04/2022 19:33:17 - INFO - codeparrot_training - Step 25608: {'lr': 0.00046926048521463344, 'samples': 13111808, 'steps': 25608, 'loss/train': 1.3039506673812866} +03/04/2022 19:33:20 - INFO - codeparrot_training - Step 25609: {'lr': 0.000469257935731937, 'samples': 13112320, 'steps': 25609, 'loss/train': 1.5367895364761353} +03/04/2022 19:33:21 - INFO - codeparrot_training - Skipping example with length 722 (seq_length=1024) +03/04/2022 19:33:26 - INFO - codeparrot_training - Step 25610: {'lr': 0.0004692553861504463, 'samples': 13112832, 'steps': 25610, 'loss/train': 2.0313289165496826} +03/04/2022 19:33:29 - INFO - codeparrot_training - Step 25611: {'lr': 0.00046925283647016253, 'samples': 13113344, 'steps': 25611, 'loss/train': 1.483324408531189} +03/04/2022 19:33:29 - INFO - codeparrot_training - Skipping example with length 788 (seq_length=1024) +03/04/2022 19:33:34 - INFO - codeparrot_training - Step 25612: {'lr': 0.0004692502866910868, 'samples': 13113856, 'steps': 25612, 'loss/train': 1.3099238872528076} +03/04/2022 19:33:37 - INFO - codeparrot_training - Step 25613: {'lr': 0.0004692477368132203, 'samples': 13114368, 'steps': 25613, 'loss/train': 2.0489625930786133} +03/04/2022 19:33:38 - INFO - codeparrot_training - Skipping example with length 927 (seq_length=1024) +03/04/2022 19:33:43 - INFO - codeparrot_training - Step 25614: {'lr': 0.0004692451868365641, 'samples': 13114880, 'steps': 25614, 'loss/train': 2.0967164039611816} +03/04/2022 19:33:46 - INFO - codeparrot_training - Step 25615: {'lr': 0.00046924263676111945, 'samples': 13115392, 'steps': 25615, 'loss/train': 1.089672565460205} +03/04/2022 19:33:46 - INFO - codeparrot_training - Skipping example with length 49 (seq_length=1024) +03/04/2022 19:33:51 - INFO - codeparrot_training - Step 25616: {'lr': 0.00046924008658688745, 'samples': 13115904, 'steps': 25616, 'loss/train': 1.4431177377700806} +03/04/2022 19:33:54 - INFO - codeparrot_training - Step 25617: {'lr': 0.00046923753631386924, 'samples': 13116416, 'steps': 25617, 'loss/train': 1.235486626625061} +03/04/2022 19:33:54 - INFO - codeparrot_training - Skipping example with length 574 (seq_length=1024) +03/04/2022 19:33:59 - INFO - codeparrot_training - Step 25618: {'lr': 0.0004692349859420659, 'samples': 13116928, 'steps': 25618, 'loss/train': 1.563315987586975} +03/04/2022 19:34:03 - INFO - codeparrot_training - Step 25619: {'lr': 0.00046923243547147874, 'samples': 13117440, 'steps': 25619, 'loss/train': 2.0744869709014893} +03/04/2022 19:34:03 - INFO - codeparrot_training - Skipping example with length 418 (seq_length=1024) +03/04/2022 19:34:08 - INFO - codeparrot_training - Step 25620: {'lr': 0.0004692298849021088, 'samples': 13117952, 'steps': 25620, 'loss/train': 1.7623610496520996} +03/04/2022 19:34:11 - INFO - codeparrot_training - Skipping example with length 871 (seq_length=1024) +03/04/2022 19:34:13 - INFO - codeparrot_training - Step 25621: {'lr': 0.00046922733423395736, 'samples': 13118464, 'steps': 25621, 'loss/train': 1.1628296375274658} +03/04/2022 19:34:16 - INFO - codeparrot_training - Step 25622: {'lr': 0.0004692247834670253, 'samples': 13118976, 'steps': 25622, 'loss/train': 1.7978562116622925} +03/04/2022 19:34:20 - INFO - codeparrot_training - Step 25623: {'lr': 0.000469222232601314, 'samples': 13119488, 'steps': 25623, 'loss/train': 1.954198956489563} +03/04/2022 19:34:20 - INFO - codeparrot_training - Skipping example with length 243 (seq_length=1024) +03/04/2022 19:34:25 - INFO - codeparrot_training - Step 25624: {'lr': 0.0004692196816368246, 'samples': 13120000, 'steps': 25624, 'loss/train': 1.76058030128479} +03/04/2022 19:34:28 - INFO - codeparrot_training - Step 25625: {'lr': 0.00046921713057355817, 'samples': 13120512, 'steps': 25625, 'loss/train': 0.46784284710884094} +03/04/2022 19:34:28 - INFO - codeparrot_training - Skipping example with length 422 (seq_length=1024) +03/04/2022 19:34:34 - INFO - codeparrot_training - Step 25626: {'lr': 0.0004692145794115159, 'samples': 13121024, 'steps': 25626, 'loss/train': 1.738781452178955} +03/04/2022 19:34:37 - INFO - codeparrot_training - Step 25627: {'lr': 0.00046921202815069883, 'samples': 13121536, 'steps': 25627, 'loss/train': 2.209368944168091} +03/04/2022 19:34:37 - INFO - codeparrot_training - Skipping example with length 588 (seq_length=1024) +03/04/2022 19:34:42 - INFO - codeparrot_training - Step 25628: {'lr': 0.00046920947679110833, 'samples': 13122048, 'steps': 25628, 'loss/train': 1.9466670751571655} +03/04/2022 19:34:45 - INFO - codeparrot_training - Step 25629: {'lr': 0.00046920692533274533, 'samples': 13122560, 'steps': 25629, 'loss/train': 1.9629749059677124} +03/04/2022 19:34:45 - INFO - codeparrot_training - Skipping example with length 765 (seq_length=1024) +03/04/2022 19:34:50 - INFO - codeparrot_training - Step 25630: {'lr': 0.0004692043737756111, 'samples': 13123072, 'steps': 25630, 'loss/train': 1.9756267070770264} +03/04/2022 19:34:54 - INFO - codeparrot_training - Step 25631: {'lr': 0.00046920182211970677, 'samples': 13123584, 'steps': 25631, 'loss/train': 0.9821460247039795} +03/04/2022 19:34:54 - INFO - codeparrot_training - Skipping example with length 766 (seq_length=1024) +03/04/2022 19:34:59 - INFO - codeparrot_training - Step 25632: {'lr': 0.00046919927036503353, 'samples': 13124096, 'steps': 25632, 'loss/train': 2.324683904647827} +03/04/2022 19:35:02 - INFO - codeparrot_training - Step 25633: {'lr': 0.0004691967185115924, 'samples': 13124608, 'steps': 25633, 'loss/train': 1.4814739227294922} +03/04/2022 19:35:02 - INFO - codeparrot_training - Skipping example with length 994 (seq_length=1024) +03/04/2022 19:35:07 - INFO - codeparrot_training - Step 25634: {'lr': 0.00046919416655938465, 'samples': 13125120, 'steps': 25634, 'loss/train': 1.478456974029541} +03/04/2022 19:35:11 - INFO - codeparrot_training - Step 25635: {'lr': 0.0004691916145084113, 'samples': 13125632, 'steps': 25635, 'loss/train': 2.4840168952941895} +03/04/2022 19:35:11 - INFO - codeparrot_training - Skipping example with length 691 (seq_length=1024) +03/04/2022 19:35:16 - INFO - codeparrot_training - Step 25636: {'lr': 0.0004691890623586737, 'samples': 13126144, 'steps': 25636, 'loss/train': 2.018822193145752} +03/04/2022 19:35:19 - INFO - codeparrot_training - Step 25637: {'lr': 0.00046918651011017287, 'samples': 13126656, 'steps': 25637, 'loss/train': 2.5675034523010254} +03/04/2022 19:35:19 - INFO - codeparrot_training - Skipping example with length 262 (seq_length=1024) +03/04/2022 19:35:24 - INFO - codeparrot_training - Step 25638: {'lr': 0.00046918395776290997, 'samples': 13127168, 'steps': 25638, 'loss/train': 1.7031505107879639} +03/04/2022 19:35:27 - INFO - codeparrot_training - Step 25639: {'lr': 0.0004691814053168861, 'samples': 13127680, 'steps': 25639, 'loss/train': 1.558652400970459} +03/04/2022 19:35:28 - INFO - codeparrot_training - Skipping example with length 97 (seq_length=1024) +03/04/2022 19:35:33 - INFO - codeparrot_training - Step 25640: {'lr': 0.0004691788527721026, 'samples': 13128192, 'steps': 25640, 'loss/train': 1.2536239624023438} +03/04/2022 19:35:36 - INFO - codeparrot_training - Step 25641: {'lr': 0.0004691763001285604, 'samples': 13128704, 'steps': 25641, 'loss/train': 1.5706579685211182} +03/04/2022 19:35:36 - INFO - codeparrot_training - Skipping example with length 150 (seq_length=1024) +03/04/2022 19:35:41 - INFO - codeparrot_training - Step 25642: {'lr': 0.0004691737473862607, 'samples': 13129216, 'steps': 25642, 'loss/train': 1.7827893495559692} +03/04/2022 19:35:44 - INFO - codeparrot_training - Step 25643: {'lr': 0.00046917119454520487, 'samples': 13129728, 'steps': 25643, 'loss/train': 1.0207802057266235} +03/04/2022 19:35:45 - INFO - codeparrot_training - Skipping example with length 425 (seq_length=1024) +03/04/2022 19:35:50 - INFO - codeparrot_training - Step 25644: {'lr': 0.00046916864160539376, 'samples': 13130240, 'steps': 25644, 'loss/train': 2.112990140914917} +03/04/2022 19:35:53 - INFO - codeparrot_training - Step 25645: {'lr': 0.00046916608856682865, 'samples': 13130752, 'steps': 25645, 'loss/train': 1.4446650743484497} +03/04/2022 19:35:53 - INFO - codeparrot_training - Skipping example with length 527 (seq_length=1024) +03/04/2022 19:35:58 - INFO - codeparrot_training - Step 25646: {'lr': 0.0004691635354295106, 'samples': 13131264, 'steps': 25646, 'loss/train': 1.5009338855743408} +03/04/2022 19:36:01 - INFO - codeparrot_training - Step 25647: {'lr': 0.00046916098219344093, 'samples': 13131776, 'steps': 25647, 'loss/train': 1.0889884233474731} +03/04/2022 19:36:01 - INFO - codeparrot_training - Skipping example with length 863 (seq_length=1024) +03/04/2022 19:36:06 - INFO - codeparrot_training - Step 25648: {'lr': 0.0004691584288586207, 'samples': 13132288, 'steps': 25648, 'loss/train': 1.8314423561096191} +03/04/2022 19:36:09 - INFO - codeparrot_training - Step 25649: {'lr': 0.0004691558754250511, 'samples': 13132800, 'steps': 25649, 'loss/train': 1.8279274702072144} +03/04/2022 19:36:10 - INFO - codeparrot_training - Skipping example with length 1009 (seq_length=1024) +03/04/2022 19:36:15 - INFO - codeparrot_training - Step 25650: {'lr': 0.0004691533218927332, 'samples': 13133312, 'steps': 25650, 'loss/train': 1.9700393676757812} +03/04/2022 19:36:18 - INFO - codeparrot_training - Step 25651: {'lr': 0.00046915076826166814, 'samples': 13133824, 'steps': 25651, 'loss/train': 2.469473123550415} +03/04/2022 19:36:18 - INFO - codeparrot_training - Skipping example with length 842 (seq_length=1024) +03/04/2022 19:36:24 - INFO - codeparrot_training - Step 25652: {'lr': 0.0004691482145318572, 'samples': 13134336, 'steps': 25652, 'loss/train': 1.3389499187469482} +03/04/2022 19:36:27 - INFO - codeparrot_training - Step 25653: {'lr': 0.00046914566070330144, 'samples': 13134848, 'steps': 25653, 'loss/train': 2.1133339405059814} +03/04/2022 19:36:28 - INFO - codeparrot_training - Skipping example with length 186 (seq_length=1024) +03/04/2022 19:36:32 - INFO - codeparrot_training - Step 25654: {'lr': 0.00046914310677600204, 'samples': 13135360, 'steps': 25654, 'loss/train': 1.6804629564285278} +03/04/2022 19:36:35 - INFO - codeparrot_training - Step 25655: {'lr': 0.00046914055274996017, 'samples': 13135872, 'steps': 25655, 'loss/train': 2.234924077987671} +03/04/2022 19:36:37 - INFO - codeparrot_training - Skipping example with length 940 (seq_length=1024) +03/04/2022 19:36:40 - INFO - codeparrot_training - Step 25656: {'lr': 0.00046913799862517686, 'samples': 13136384, 'steps': 25656, 'loss/train': 0.47969186305999756} +03/04/2022 19:36:43 - INFO - codeparrot_training - Step 25657: {'lr': 0.0004691354444016534, 'samples': 13136896, 'steps': 25657, 'loss/train': 1.9764001369476318} +03/04/2022 19:36:45 - INFO - codeparrot_training - Skipping example with length 79 (seq_length=1024) +03/04/2022 19:36:49 - INFO - codeparrot_training - Step 25658: {'lr': 0.00046913289007939087, 'samples': 13137408, 'steps': 25658, 'loss/train': 1.7756062746047974} +03/04/2022 19:36:52 - INFO - codeparrot_training - Step 25659: {'lr': 0.00046913033565839046, 'samples': 13137920, 'steps': 25659, 'loss/train': 1.8282790184020996} +03/04/2022 19:36:53 - INFO - codeparrot_training - Skipping example with length 66 (seq_length=1024) +03/04/2022 19:36:57 - INFO - codeparrot_training - Step 25660: {'lr': 0.0004691277811386533, 'samples': 13138432, 'steps': 25660, 'loss/train': 2.614138126373291} +03/04/2022 19:37:00 - INFO - codeparrot_training - Step 25661: {'lr': 0.0004691252265201805, 'samples': 13138944, 'steps': 25661, 'loss/train': 1.8260549306869507} +03/04/2022 19:37:02 - INFO - codeparrot_training - Skipping example with length 934 (seq_length=1024) +03/04/2022 19:37:06 - INFO - codeparrot_training - Step 25662: {'lr': 0.00046912267180297337, 'samples': 13139456, 'steps': 25662, 'loss/train': 1.4136120080947876} +03/04/2022 19:37:09 - INFO - codeparrot_training - Step 25663: {'lr': 0.0004691201169870328, 'samples': 13139968, 'steps': 25663, 'loss/train': 1.2220109701156616} +03/04/2022 19:37:10 - INFO - codeparrot_training - Skipping example with length 644 (seq_length=1024) +03/04/2022 19:37:14 - INFO - codeparrot_training - Step 25664: {'lr': 0.00046911756207236024, 'samples': 13140480, 'steps': 25664, 'loss/train': 2.043792724609375} +03/04/2022 19:37:17 - INFO - codeparrot_training - Step 25665: {'lr': 0.0004691150070589566, 'samples': 13140992, 'steps': 25665, 'loss/train': 1.6563245058059692} +03/04/2022 19:37:19 - INFO - codeparrot_training - Skipping example with length 155 (seq_length=1024) +03/04/2022 19:37:23 - INFO - codeparrot_training - Step 25666: {'lr': 0.00046911245194682306, 'samples': 13141504, 'steps': 25666, 'loss/train': 2.127086877822876} +03/04/2022 19:37:26 - INFO - codeparrot_training - Step 25667: {'lr': 0.00046910989673596093, 'samples': 13142016, 'steps': 25667, 'loss/train': 1.2142102718353271} +03/04/2022 19:37:27 - INFO - codeparrot_training - Skipping example with length 266 (seq_length=1024) +03/04/2022 19:37:31 - INFO - codeparrot_training - Step 25668: {'lr': 0.00046910734142637124, 'samples': 13142528, 'steps': 25668, 'loss/train': 1.6071752309799194} +03/04/2022 19:37:34 - INFO - codeparrot_training - Step 25669: {'lr': 0.00046910478601805514, 'samples': 13143040, 'steps': 25669, 'loss/train': 1.6032277345657349} +03/04/2022 19:37:35 - INFO - codeparrot_training - Skipping example with length 930 (seq_length=1024) +03/04/2022 19:37:39 - INFO - codeparrot_training - Step 25670: {'lr': 0.0004691022305110138, 'samples': 13143552, 'steps': 25670, 'loss/train': 1.4956276416778564} +03/04/2022 19:37:43 - INFO - codeparrot_training - Step 25671: {'lr': 0.0004690996749052484, 'samples': 13144064, 'steps': 25671, 'loss/train': 1.9554494619369507} +03/04/2022 19:37:44 - INFO - codeparrot_training - Skipping example with length 470 (seq_length=1024) +03/04/2022 19:37:48 - INFO - codeparrot_training - Step 25672: {'lr': 0.00046909711920076, 'samples': 13144576, 'steps': 25672, 'loss/train': 2.5203769207000732} +03/04/2022 19:37:51 - INFO - codeparrot_training - Step 25673: {'lr': 0.0004690945633975499, 'samples': 13145088, 'steps': 25673, 'loss/train': 2.047031879425049} +03/04/2022 19:37:53 - INFO - codeparrot_training - Skipping example with length 492 (seq_length=1024) +03/04/2022 19:37:56 - INFO - codeparrot_training - Step 25674: {'lr': 0.00046909200749561914, 'samples': 13145600, 'steps': 25674, 'loss/train': 1.3079192638397217} +03/04/2022 19:38:00 - INFO - codeparrot_training - Step 25675: {'lr': 0.00046908945149496897, 'samples': 13146112, 'steps': 25675, 'loss/train': 2.713205099105835} +03/04/2022 19:38:01 - INFO - codeparrot_training - Skipping example with length 69 (seq_length=1024) +03/04/2022 19:38:05 - INFO - codeparrot_training - Step 25676: {'lr': 0.00046908689539560034, 'samples': 13146624, 'steps': 25676, 'loss/train': 0.8642597198486328} +03/04/2022 19:38:08 - INFO - codeparrot_training - Step 25677: {'lr': 0.0004690843391975146, 'samples': 13147136, 'steps': 25677, 'loss/train': 2.11598801612854} +03/04/2022 19:38:09 - INFO - codeparrot_training - Skipping example with length 518 (seq_length=1024) +03/04/2022 19:38:13 - INFO - codeparrot_training - Step 25678: {'lr': 0.0004690817829007129, 'samples': 13147648, 'steps': 25678, 'loss/train': 1.6526800394058228} +03/04/2022 19:38:17 - INFO - codeparrot_training - Step 25679: {'lr': 0.00046907922650519623, 'samples': 13148160, 'steps': 25679, 'loss/train': 2.2532341480255127} +03/04/2022 19:38:18 - INFO - codeparrot_training - Skipping example with length 214 (seq_length=1024) +03/04/2022 19:38:22 - INFO - codeparrot_training - Step 25680: {'lr': 0.0004690766700109659, 'samples': 13148672, 'steps': 25680, 'loss/train': 2.280402421951294} +03/04/2022 19:38:25 - INFO - codeparrot_training - Step 25681: {'lr': 0.00046907411341802295, 'samples': 13149184, 'steps': 25681, 'loss/train': 0.8179460763931274} +03/04/2022 19:38:26 - INFO - codeparrot_training - Skipping example with length 286 (seq_length=1024) +03/04/2022 19:38:30 - INFO - codeparrot_training - Step 25682: {'lr': 0.0004690715567263687, 'samples': 13149696, 'steps': 25682, 'loss/train': 2.325756549835205} +03/04/2022 19:38:34 - INFO - codeparrot_training - Step 25683: {'lr': 0.00046906899993600406, 'samples': 13150208, 'steps': 25683, 'loss/train': 1.8430765867233276} +03/04/2022 19:38:35 - INFO - codeparrot_training - Skipping example with length 803 (seq_length=1024) +03/04/2022 19:38:39 - INFO - codeparrot_training - Step 25684: {'lr': 0.00046906644304693033, 'samples': 13150720, 'steps': 25684, 'loss/train': 1.4834637641906738} +03/04/2022 19:38:42 - INFO - codeparrot_training - Step 25685: {'lr': 0.0004690638860591487, 'samples': 13151232, 'steps': 25685, 'loss/train': 2.361208200454712} +03/04/2022 19:38:43 - INFO - codeparrot_training - Skipping example with length 112 (seq_length=1024) +03/04/2022 19:38:47 - INFO - codeparrot_training - Step 25686: {'lr': 0.00046906132897266026, 'samples': 13151744, 'steps': 25686, 'loss/train': 1.2955799102783203} +03/04/2022 19:38:51 - INFO - codeparrot_training - Step 25687: {'lr': 0.00046905877178746614, 'samples': 13152256, 'steps': 25687, 'loss/train': 2.484726667404175} +03/04/2022 19:38:52 - INFO - codeparrot_training - Skipping example with length 536 (seq_length=1024) +03/04/2022 19:38:56 - INFO - codeparrot_training - Step 25688: {'lr': 0.0004690562145035675, 'samples': 13152768, 'steps': 25688, 'loss/train': 1.6255322694778442} +03/04/2022 19:38:59 - INFO - codeparrot_training - Step 25689: {'lr': 0.00046905365712096553, 'samples': 13153280, 'steps': 25689, 'loss/train': 2.10624098777771} +03/04/2022 19:39:00 - INFO - codeparrot_training - Skipping example with length 310 (seq_length=1024) +03/04/2022 19:39:04 - INFO - codeparrot_training - Step 25690: {'lr': 0.0004690510996396614, 'samples': 13153792, 'steps': 25690, 'loss/train': 1.2157890796661377} +03/04/2022 19:39:07 - INFO - codeparrot_training - Step 25691: {'lr': 0.0004690485420596561, 'samples': 13154304, 'steps': 25691, 'loss/train': 2.579500913619995} +03/04/2022 19:39:09 - INFO - codeparrot_training - Skipping example with length 1023 (seq_length=1024) +03/04/2022 19:39:13 - INFO - codeparrot_training - Step 25692: {'lr': 0.000469045984380951, 'samples': 13154816, 'steps': 25692, 'loss/train': 1.5140951871871948} +03/04/2022 19:39:16 - INFO - codeparrot_training - Step 25693: {'lr': 0.0004690434266035471, 'samples': 13155328, 'steps': 25693, 'loss/train': 1.475740909576416} +03/04/2022 19:39:17 - INFO - codeparrot_training - Skipping example with length 659 (seq_length=1024) +03/04/2022 19:39:21 - INFO - codeparrot_training - Step 25694: {'lr': 0.00046904086872744577, 'samples': 13155840, 'steps': 25694, 'loss/train': 2.270071029663086} +03/04/2022 19:39:24 - INFO - codeparrot_training - Step 25695: {'lr': 0.0004690383107526479, 'samples': 13156352, 'steps': 25695, 'loss/train': 1.8595331907272339} +03/04/2022 19:39:26 - INFO - codeparrot_training - Skipping example with length 898 (seq_length=1024) +03/04/2022 19:39:30 - INFO - codeparrot_training - Step 25696: {'lr': 0.0004690357526791547, 'samples': 13156864, 'steps': 25696, 'loss/train': 1.9620486497879028} +03/04/2022 19:39:33 - INFO - codeparrot_training - Step 25697: {'lr': 0.00046903319450696744, 'samples': 13157376, 'steps': 25697, 'loss/train': 2.904324769973755} +03/04/2022 19:39:35 - INFO - codeparrot_training - Skipping example with length 650 (seq_length=1024) +03/04/2022 19:39:38 - INFO - codeparrot_training - Step 25698: {'lr': 0.00046903063623608714, 'samples': 13157888, 'steps': 25698, 'loss/train': 1.9329532384872437} +03/04/2022 19:39:41 - INFO - codeparrot_training - Step 25699: {'lr': 0.00046902807786651507, 'samples': 13158400, 'steps': 25699, 'loss/train': 3.061762571334839} +03/04/2022 19:39:43 - INFO - codeparrot_training - Skipping example with length 292 (seq_length=1024) +03/04/2022 19:39:47 - INFO - codeparrot_training - Step 25700: {'lr': 0.00046902551939825236, 'samples': 13158912, 'steps': 25700, 'loss/train': 2.0464911460876465} +03/04/2022 19:39:50 - INFO - codeparrot_training - Step 25701: {'lr': 0.00046902296083130003, 'samples': 13159424, 'steps': 25701, 'loss/train': 1.8379894495010376} +03/04/2022 19:39:52 - INFO - codeparrot_training - Skipping example with length 468 (seq_length=1024) +03/04/2022 19:39:55 - INFO - codeparrot_training - Step 25702: {'lr': 0.00046902040216565945, 'samples': 13159936, 'steps': 25702, 'loss/train': 1.6784310340881348} +03/04/2022 19:39:58 - INFO - codeparrot_training - Step 25703: {'lr': 0.0004690178434013316, 'samples': 13160448, 'steps': 25703, 'loss/train': 1.1337891817092896} +03/04/2022 19:40:00 - INFO - codeparrot_training - Skipping example with length 89 (seq_length=1024) +03/04/2022 19:40:03 - INFO - codeparrot_training - Step 25704: {'lr': 0.00046901528453831764, 'samples': 13160960, 'steps': 25704, 'loss/train': 2.459540843963623} +03/04/2022 19:40:07 - INFO - codeparrot_training - Step 25705: {'lr': 0.0004690127255766188, 'samples': 13161472, 'steps': 25705, 'loss/train': 1.7593038082122803} +03/04/2022 19:40:09 - INFO - codeparrot_training - Skipping example with length 540 (seq_length=1024) +03/04/2022 19:40:12 - INFO - codeparrot_training - Step 25706: {'lr': 0.0004690101665162362, 'samples': 13161984, 'steps': 25706, 'loss/train': 1.7941027879714966} +03/04/2022 19:40:15 - INFO - codeparrot_training - Step 25707: {'lr': 0.00046900760735717103, 'samples': 13162496, 'steps': 25707, 'loss/train': 2.2718758583068848} +03/04/2022 19:40:17 - INFO - codeparrot_training - Skipping example with length 406 (seq_length=1024) +03/04/2022 19:40:20 - INFO - codeparrot_training - Step 25708: {'lr': 0.00046900504809942433, 'samples': 13163008, 'steps': 25708, 'loss/train': 0.9856160879135132} +03/04/2022 19:40:23 - INFO - codeparrot_training - Step 25709: {'lr': 0.00046900248874299746, 'samples': 13163520, 'steps': 25709, 'loss/train': 2.0409157276153564} +03/04/2022 19:40:26 - INFO - codeparrot_training - Skipping example with length 540 (seq_length=1024) +03/04/2022 19:40:29 - INFO - codeparrot_training - Step 25710: {'lr': 0.0004689999292878914, 'samples': 13164032, 'steps': 25710, 'loss/train': 2.0801544189453125} +03/04/2022 19:40:32 - INFO - codeparrot_training - Step 25711: {'lr': 0.00046899736973410734, 'samples': 13164544, 'steps': 25711, 'loss/train': 1.0832571983337402} +03/04/2022 19:40:34 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/04/2022 19:40:37 - INFO - codeparrot_training - Step 25712: {'lr': 0.0004689948100816465, 'samples': 13165056, 'steps': 25712, 'loss/train': 1.4017027616500854} +03/04/2022 19:40:40 - INFO - codeparrot_training - Step 25713: {'lr': 0.00046899225033050985, 'samples': 13165568, 'steps': 25713, 'loss/train': 2.0863749980926514} +03/04/2022 19:40:43 - INFO - codeparrot_training - Skipping example with length 459 (seq_length=1024) +03/04/2022 19:40:46 - INFO - codeparrot_training - Step 25714: {'lr': 0.0004689896904806987, 'samples': 13166080, 'steps': 25714, 'loss/train': 1.7175102233886719} +03/04/2022 19:40:49 - INFO - codeparrot_training - Step 25715: {'lr': 0.0004689871305322143, 'samples': 13166592, 'steps': 25715, 'loss/train': 2.223897695541382} +03/04/2022 19:40:51 - INFO - codeparrot_training - Skipping example with length 97 (seq_length=1024) +03/04/2022 19:40:54 - INFO - codeparrot_training - Step 25716: {'lr': 0.0004689845704850576, 'samples': 13167104, 'steps': 25716, 'loss/train': 1.4767804145812988} +03/04/2022 19:40:57 - INFO - codeparrot_training - Step 25717: {'lr': 0.0004689820103392298, 'samples': 13167616, 'steps': 25717, 'loss/train': 2.8384833335876465} +03/04/2022 19:41:00 - INFO - codeparrot_training - Skipping example with length 119 (seq_length=1024) +03/04/2022 19:41:03 - INFO - codeparrot_training - Step 25718: {'lr': 0.0004689794500947321, 'samples': 13168128, 'steps': 25718, 'loss/train': 1.5797239542007446} +03/04/2022 19:41:06 - INFO - codeparrot_training - Step 25719: {'lr': 0.0004689768897515657, 'samples': 13168640, 'steps': 25719, 'loss/train': 1.79481041431427} +03/04/2022 19:41:08 - INFO - codeparrot_training - Skipping example with length 556 (seq_length=1024) +03/04/2022 19:41:11 - INFO - codeparrot_training - Step 25720: {'lr': 0.0004689743293097316, 'samples': 13169152, 'steps': 25720, 'loss/train': 1.9758723974227905} +03/04/2022 19:41:14 - INFO - codeparrot_training - Step 25721: {'lr': 0.0004689717687692311, 'samples': 13169664, 'steps': 25721, 'loss/train': 2.1605889797210693} +03/04/2022 19:41:17 - INFO - codeparrot_training - Skipping example with length 1003 (seq_length=1024) +03/04/2022 19:41:20 - INFO - codeparrot_training - Step 25722: {'lr': 0.0004689692081300653, 'samples': 13170176, 'steps': 25722, 'loss/train': 1.4824808835983276} +03/04/2022 19:41:23 - INFO - codeparrot_training - Step 25723: {'lr': 0.0004689666473922354, 'samples': 13170688, 'steps': 25723, 'loss/train': 1.045037031173706} +03/04/2022 19:41:25 - INFO - codeparrot_training - Skipping example with length 529 (seq_length=1024) +03/04/2022 19:41:28 - INFO - codeparrot_training - Step 25724: {'lr': 0.0004689640865557424, 'samples': 13171200, 'steps': 25724, 'loss/train': 2.0227856636047363} +03/04/2022 19:41:31 - INFO - codeparrot_training - Step 25725: {'lr': 0.0004689615256205876, 'samples': 13171712, 'steps': 25725, 'loss/train': 2.1369924545288086} +03/04/2022 19:41:33 - INFO - codeparrot_training - Skipping example with length 742 (seq_length=1024) +03/04/2022 19:41:37 - INFO - codeparrot_training - Step 25726: {'lr': 0.0004689589645867721, 'samples': 13172224, 'steps': 25726, 'loss/train': 2.2324066162109375} +03/04/2022 19:41:40 - INFO - codeparrot_training - Step 25727: {'lr': 0.0004689564034542971, 'samples': 13172736, 'steps': 25727, 'loss/train': 1.7525347471237183} +03/04/2022 19:41:42 - INFO - codeparrot_training - Skipping example with length 89 (seq_length=1024) +03/04/2022 19:41:45 - INFO - codeparrot_training - Step 25728: {'lr': 0.00046895384222316375, 'samples': 13173248, 'steps': 25728, 'loss/train': 1.9723695516586304} +03/04/2022 19:41:48 - INFO - codeparrot_training - Step 25729: {'lr': 0.0004689512808933731, 'samples': 13173760, 'steps': 25729, 'loss/train': 2.0829074382781982} +03/04/2022 19:41:50 - INFO - codeparrot_training - Skipping example with length 237 (seq_length=1024) +03/04/2022 19:41:54 - INFO - codeparrot_training - Step 25730: {'lr': 0.0004689487194649265, 'samples': 13174272, 'steps': 25730, 'loss/train': 1.7009503841400146} +03/04/2022 19:41:57 - INFO - codeparrot_training - Step 25731: {'lr': 0.0004689461579378249, 'samples': 13174784, 'steps': 25731, 'loss/train': 1.3001179695129395} +03/04/2022 19:41:59 - INFO - codeparrot_training - Skipping example with length 1020 (seq_length=1024) +03/04/2022 19:42:02 - INFO - codeparrot_training - Step 25732: {'lr': 0.0004689435963120696, 'samples': 13175296, 'steps': 25732, 'loss/train': 1.304050087928772} +03/04/2022 19:42:05 - INFO - codeparrot_training - Step 25733: {'lr': 0.00046894103458766163, 'samples': 13175808, 'steps': 25733, 'loss/train': 0.9679852724075317} +03/04/2022 19:42:08 - INFO - codeparrot_training - Skipping example with length 228 (seq_length=1024) +03/04/2022 19:42:11 - INFO - codeparrot_training - Step 25734: {'lr': 0.0004689384727646022, 'samples': 13176320, 'steps': 25734, 'loss/train': 1.7003874778747559} +03/04/2022 19:42:14 - INFO - codeparrot_training - Step 25735: {'lr': 0.00046893591084289256, 'samples': 13176832, 'steps': 25735, 'loss/train': 1.4563628435134888} +03/04/2022 19:42:16 - INFO - codeparrot_training - Skipping example with length 492 (seq_length=1024) +03/04/2022 19:42:19 - INFO - codeparrot_training - Step 25736: {'lr': 0.0004689333488225337, 'samples': 13177344, 'steps': 25736, 'loss/train': 2.0191643238067627} +03/04/2022 19:42:22 - INFO - codeparrot_training - Step 25737: {'lr': 0.00046893078670352686, 'samples': 13177856, 'steps': 25737, 'loss/train': 1.7665600776672363} +03/04/2022 19:42:24 - INFO - codeparrot_training - Skipping example with length 501 (seq_length=1024) +03/04/2022 19:42:27 - INFO - codeparrot_training - Step 25738: {'lr': 0.0004689282244858732, 'samples': 13178368, 'steps': 25738, 'loss/train': 2.5812454223632812} +03/04/2022 19:42:31 - INFO - codeparrot_training - Step 25739: {'lr': 0.00046892566216957387, 'samples': 13178880, 'steps': 25739, 'loss/train': 1.050881028175354} +03/04/2022 19:42:33 - INFO - codeparrot_training - Skipping example with length 951 (seq_length=1024) +03/04/2022 19:42:36 - INFO - codeparrot_training - Step 25740: {'lr': 0.00046892309975463, 'samples': 13179392, 'steps': 25740, 'loss/train': 2.2541086673736572} +03/04/2022 19:42:39 - INFO - codeparrot_training - Step 25741: {'lr': 0.0004689205372410427, 'samples': 13179904, 'steps': 25741, 'loss/train': 1.3961035013198853} +03/04/2022 19:42:42 - INFO - codeparrot_training - Skipping example with length 127 (seq_length=1024) +03/04/2022 19:42:44 - INFO - codeparrot_training - Step 25742: {'lr': 0.00046891797462881327, 'samples': 13180416, 'steps': 25742, 'loss/train': 2.1329171657562256} +03/04/2022 19:42:48 - INFO - codeparrot_training - Step 25743: {'lr': 0.0004689154119179427, 'samples': 13180928, 'steps': 25743, 'loss/train': 2.0172040462493896} +03/04/2022 19:42:50 - INFO - codeparrot_training - Skipping example with length 145 (seq_length=1024) +03/04/2022 19:42:53 - INFO - codeparrot_training - Step 25744: {'lr': 0.00046891284910843237, 'samples': 13181440, 'steps': 25744, 'loss/train': 1.993381381034851} +03/04/2022 19:42:56 - INFO - codeparrot_training - Step 25745: {'lr': 0.0004689102862002832, 'samples': 13181952, 'steps': 25745, 'loss/train': 2.4731855392456055} +03/04/2022 19:42:58 - INFO - codeparrot_training - Skipping example with length 562 (seq_length=1024) +03/04/2022 19:43:01 - INFO - codeparrot_training - Step 25746: {'lr': 0.00046890772319349637, 'samples': 13182464, 'steps': 25746, 'loss/train': 2.4853122234344482} +03/04/2022 19:43:05 - INFO - codeparrot_training - Step 25747: {'lr': 0.00046890516008807315, 'samples': 13182976, 'steps': 25747, 'loss/train': 1.6481667757034302} +03/04/2022 19:43:08 - INFO - codeparrot_training - Step 25748: {'lr': 0.0004689025968840147, 'samples': 13183488, 'steps': 25748, 'loss/train': 2.0766830444335938} +03/04/2022 19:43:08 - INFO - codeparrot_training - Skipping example with length 159 (seq_length=1024) +03/04/2022 19:43:13 - INFO - codeparrot_training - Step 25749: {'lr': 0.00046890003358132204, 'samples': 13184000, 'steps': 25749, 'loss/train': 1.4666603803634644} +03/04/2022 19:43:16 - INFO - codeparrot_training - Skipping example with length 240 (seq_length=1024) +03/04/2022 19:43:18 - INFO - codeparrot_training - Step 25750: {'lr': 0.0004688974701799964, 'samples': 13184512, 'steps': 25750, 'loss/train': 1.6807059049606323} +03/04/2022 19:43:22 - INFO - codeparrot_training - Step 25751: {'lr': 0.00046889490668003896, 'samples': 13185024, 'steps': 25751, 'loss/train': 1.9575626850128174} +03/04/2022 19:43:25 - INFO - codeparrot_training - Step 25752: {'lr': 0.0004688923430814509, 'samples': 13185536, 'steps': 25752, 'loss/train': 6.606766700744629} +03/04/2022 19:43:25 - INFO - codeparrot_training - Skipping example with length 476 (seq_length=1024) +03/04/2022 19:43:30 - INFO - codeparrot_training - Step 25753: {'lr': 0.00046888977938423326, 'samples': 13186048, 'steps': 25753, 'loss/train': 1.017731785774231} +03/04/2022 19:43:33 - INFO - codeparrot_training - Step 25754: {'lr': 0.00046888721558838734, 'samples': 13186560, 'steps': 25754, 'loss/train': 1.6070107221603394} +03/04/2022 19:43:34 - INFO - codeparrot_training - Skipping example with length 783 (seq_length=1024) +03/04/2022 19:43:38 - INFO - codeparrot_training - Step 25755: {'lr': 0.00046888465169391414, 'samples': 13187072, 'steps': 25755, 'loss/train': 1.8441321849822998} +03/04/2022 19:43:42 - INFO - codeparrot_training - Step 25756: {'lr': 0.00046888208770081493, 'samples': 13187584, 'steps': 25756, 'loss/train': 1.8144243955612183} +03/04/2022 19:43:42 - INFO - codeparrot_training - Skipping example with length 94 (seq_length=1024) +03/04/2022 19:43:47 - INFO - codeparrot_training - Step 25757: {'lr': 0.0004688795236090908, 'samples': 13188096, 'steps': 25757, 'loss/train': 1.4829559326171875} +03/04/2022 19:43:50 - INFO - codeparrot_training - Step 25758: {'lr': 0.000468876959418743, 'samples': 13188608, 'steps': 25758, 'loss/train': 1.6481634378433228} +03/04/2022 19:43:50 - INFO - codeparrot_training - Skipping example with length 537 (seq_length=1024) +03/04/2022 19:43:56 - INFO - codeparrot_training - Step 25759: {'lr': 0.0004688743951297726, 'samples': 13189120, 'steps': 25759, 'loss/train': 1.9870381355285645} +03/04/2022 19:43:59 - INFO - codeparrot_training - Step 25760: {'lr': 0.0004688718307421807, 'samples': 13189632, 'steps': 25760, 'loss/train': 2.1658453941345215} +03/04/2022 19:43:59 - INFO - codeparrot_training - Skipping example with length 508 (seq_length=1024) +03/04/2022 19:44:04 - INFO - codeparrot_training - Step 25761: {'lr': 0.0004688692662559686, 'samples': 13190144, 'steps': 25761, 'loss/train': 2.271631956100464} +03/04/2022 19:44:07 - INFO - codeparrot_training - Step 25762: {'lr': 0.00046886670167113734, 'samples': 13190656, 'steps': 25762, 'loss/train': 1.7112658023834229} +03/04/2022 19:44:08 - INFO - codeparrot_training - Skipping example with length 760 (seq_length=1024) +03/04/2022 19:44:13 - INFO - codeparrot_training - Step 25763: {'lr': 0.00046886413698768816, 'samples': 13191168, 'steps': 25763, 'loss/train': 2.1874313354492188} +03/04/2022 19:44:16 - INFO - codeparrot_training - Step 25764: {'lr': 0.0004688615722056222, 'samples': 13191680, 'steps': 25764, 'loss/train': 1.6382384300231934} +03/04/2022 19:44:19 - INFO - codeparrot_training - Skipping example with length 112 (seq_length=1024) +03/04/2022 19:44:21 - INFO - codeparrot_training - Step 25765: {'lr': 0.00046885900732494053, 'samples': 13192192, 'steps': 25765, 'loss/train': 1.7944228649139404} +03/04/2022 19:44:25 - INFO - codeparrot_training - Step 25766: {'lr': 0.0004688564423456444, 'samples': 13192704, 'steps': 25766, 'loss/train': 2.163855791091919} +03/04/2022 19:44:27 - INFO - codeparrot_training - Skipping example with length 970 (seq_length=1024) +03/04/2022 19:44:30 - INFO - codeparrot_training - Step 25767: {'lr': 0.00046885387726773494, 'samples': 13193216, 'steps': 25767, 'loss/train': 2.276728391647339} +03/04/2022 19:44:33 - INFO - codeparrot_training - Step 25768: {'lr': 0.0004688513120912133, 'samples': 13193728, 'steps': 25768, 'loss/train': 1.882814884185791} +03/04/2022 19:44:36 - INFO - codeparrot_training - Step 25769: {'lr': 0.0004688487468160806, 'samples': 13194240, 'steps': 25769, 'loss/train': 2.0701003074645996} +03/04/2022 19:44:36 - INFO - codeparrot_training - Skipping example with length 884 (seq_length=1024) +03/04/2022 19:44:42 - INFO - codeparrot_training - Step 25770: {'lr': 0.000468846181442338, 'samples': 13194752, 'steps': 25770, 'loss/train': 2.5071463584899902} +03/04/2022 19:44:45 - INFO - codeparrot_training - Step 25771: {'lr': 0.0004688436159699868, 'samples': 13195264, 'steps': 25771, 'loss/train': 2.086479663848877} +03/04/2022 19:44:46 - INFO - codeparrot_training - Skipping example with length 198 (seq_length=1024) +03/04/2022 19:44:50 - INFO - codeparrot_training - Step 25772: {'lr': 0.000468841050399028, 'samples': 13195776, 'steps': 25772, 'loss/train': 1.8353196382522583} +03/04/2022 19:44:53 - INFO - codeparrot_training - Step 25773: {'lr': 0.0004688384847294628, 'samples': 13196288, 'steps': 25773, 'loss/train': 1.824231743812561} +03/04/2022 19:44:54 - INFO - codeparrot_training - Skipping example with length 43 (seq_length=1024) +03/04/2022 19:44:59 - INFO - codeparrot_training - Step 25774: {'lr': 0.0004688359189612923, 'samples': 13196800, 'steps': 25774, 'loss/train': 2.586979389190674} +03/04/2022 19:45:02 - INFO - codeparrot_training - Step 25775: {'lr': 0.0004688333530945178, 'samples': 13197312, 'steps': 25775, 'loss/train': 2.59271502494812} +03/04/2022 19:45:03 - INFO - codeparrot_training - Skipping example with length 385 (seq_length=1024) +03/04/2022 19:45:07 - INFO - codeparrot_training - Step 25776: {'lr': 0.0004688307871291403, 'samples': 13197824, 'steps': 25776, 'loss/train': 1.7258427143096924} +03/04/2022 19:45:10 - INFO - codeparrot_training - Step 25777: {'lr': 0.0004688282210651611, 'samples': 13198336, 'steps': 25777, 'loss/train': 1.5032305717468262} +03/04/2022 19:45:11 - INFO - codeparrot_training - Skipping example with length 182 (seq_length=1024) +03/04/2022 19:45:16 - INFO - codeparrot_training - Step 25778: {'lr': 0.00046882565490258125, 'samples': 13198848, 'steps': 25778, 'loss/train': 1.2018293142318726} +03/04/2022 19:45:19 - INFO - codeparrot_training - Step 25779: {'lr': 0.0004688230886414019, 'samples': 13199360, 'steps': 25779, 'loss/train': 1.6057734489440918} +03/04/2022 19:45:20 - INFO - codeparrot_training - Skipping example with length 228 (seq_length=1024) +03/04/2022 19:45:24 - INFO - codeparrot_training - Step 25780: {'lr': 0.0004688205222816242, 'samples': 13199872, 'steps': 25780, 'loss/train': 0.26883500814437866} +03/04/2022 19:45:27 - INFO - codeparrot_training - Step 25781: {'lr': 0.00046881795582324944, 'samples': 13200384, 'steps': 25781, 'loss/train': 2.0397567749023438} +03/04/2022 19:45:29 - INFO - codeparrot_training - Skipping example with length 258 (seq_length=1024) +03/04/2022 19:45:33 - INFO - codeparrot_training - Step 25782: {'lr': 0.00046881538926627864, 'samples': 13200896, 'steps': 25782, 'loss/train': 0.980804443359375} +03/04/2022 19:45:36 - INFO - codeparrot_training - Step 25783: {'lr': 0.000468812822610713, 'samples': 13201408, 'steps': 25783, 'loss/train': 1.9646778106689453} +03/04/2022 19:45:37 - INFO - codeparrot_training - Skipping example with length 868 (seq_length=1024) +03/04/2022 19:45:41 - INFO - codeparrot_training - Step 25784: {'lr': 0.00046881025585655367, 'samples': 13201920, 'steps': 25784, 'loss/train': 0.8479981422424316} +03/04/2022 19:45:44 - INFO - codeparrot_training - Step 25785: {'lr': 0.0004688076890038019, 'samples': 13202432, 'steps': 25785, 'loss/train': 1.5140366554260254} +03/04/2022 19:45:46 - INFO - codeparrot_training - Skipping example with length 649 (seq_length=1024) +03/04/2022 19:45:50 - INFO - codeparrot_training - Step 25786: {'lr': 0.00046880512205245867, 'samples': 13202944, 'steps': 25786, 'loss/train': 1.815520167350769} +03/04/2022 19:45:53 - INFO - codeparrot_training - Step 25787: {'lr': 0.00046880255500252526, 'samples': 13203456, 'steps': 25787, 'loss/train': 1.4638351202011108} +03/04/2022 19:45:54 - INFO - codeparrot_training - Skipping example with length 707 (seq_length=1024) +03/04/2022 19:45:58 - INFO - codeparrot_training - Step 25788: {'lr': 0.0004687999878540028, 'samples': 13203968, 'steps': 25788, 'loss/train': 2.747626304626465} +03/04/2022 19:46:01 - INFO - codeparrot_training - Step 25789: {'lr': 0.00046879742060689243, 'samples': 13204480, 'steps': 25789, 'loss/train': 0.11578322947025299} +03/04/2022 19:46:02 - INFO - codeparrot_training - Skipping example with length 216 (seq_length=1024) +03/04/2022 19:46:07 - INFO - codeparrot_training - Step 25790: {'lr': 0.0004687948532611953, 'samples': 13204992, 'steps': 25790, 'loss/train': 2.3247017860412598} +03/04/2022 19:46:10 - INFO - codeparrot_training - Step 25791: {'lr': 0.0004687922858169126, 'samples': 13205504, 'steps': 25791, 'loss/train': 0.9731686115264893} +03/04/2022 19:46:11 - INFO - codeparrot_training - Skipping example with length 252 (seq_length=1024) +03/04/2022 19:46:15 - INFO - codeparrot_training - Step 25792: {'lr': 0.0004687897182740455, 'samples': 13206016, 'steps': 25792, 'loss/train': 1.597493290901184} +03/04/2022 19:46:18 - INFO - codeparrot_training - Step 25793: {'lr': 0.0004687871506325951, 'samples': 13206528, 'steps': 25793, 'loss/train': 1.8939908742904663} +03/04/2022 19:46:20 - INFO - codeparrot_training - Skipping example with length 178 (seq_length=1024) +03/04/2022 19:46:24 - INFO - codeparrot_training - Step 25794: {'lr': 0.00046878458289256264, 'samples': 13207040, 'steps': 25794, 'loss/train': 1.6569936275482178} +03/04/2022 19:46:27 - INFO - codeparrot_training - Step 25795: {'lr': 0.00046878201505394913, 'samples': 13207552, 'steps': 25795, 'loss/train': 1.6879701614379883} +03/04/2022 19:46:28 - INFO - codeparrot_training - Skipping example with length 63 (seq_length=1024) +03/04/2022 19:46:32 - INFO - codeparrot_training - Step 25796: {'lr': 0.0004687794471167559, 'samples': 13208064, 'steps': 25796, 'loss/train': 2.192411422729492} +03/04/2022 19:46:35 - INFO - codeparrot_training - Step 25797: {'lr': 0.00046877687908098396, 'samples': 13208576, 'steps': 25797, 'loss/train': 1.340955376625061} +03/04/2022 19:46:36 - INFO - codeparrot_training - Skipping example with length 527 (seq_length=1024) +03/04/2022 19:46:40 - INFO - codeparrot_training - Step 25798: {'lr': 0.0004687743109466346, 'samples': 13209088, 'steps': 25798, 'loss/train': 2.3971335887908936} +03/04/2022 19:46:44 - INFO - codeparrot_training - Step 25799: {'lr': 0.00046877174271370894, 'samples': 13209600, 'steps': 25799, 'loss/train': 1.605688452720642} +03/04/2022 19:46:45 - INFO - codeparrot_training - Skipping example with length 588 (seq_length=1024) +03/04/2022 19:46:49 - INFO - codeparrot_training - Step 25800: {'lr': 0.000468769174382208, 'samples': 13210112, 'steps': 25800, 'loss/train': 1.1746312379837036} +03/04/2022 19:46:52 - INFO - codeparrot_training - Step 25801: {'lr': 0.0004687666059521331, 'samples': 13210624, 'steps': 25801, 'loss/train': 2.1345129013061523} +03/04/2022 19:46:53 - INFO - codeparrot_training - Skipping example with length 735 (seq_length=1024) +03/04/2022 19:46:57 - INFO - codeparrot_training - Step 25802: {'lr': 0.0004687640374234854, 'samples': 13211136, 'steps': 25802, 'loss/train': 1.4977260828018188} +03/04/2022 19:47:00 - INFO - codeparrot_training - Step 25803: {'lr': 0.0004687614687962659, 'samples': 13211648, 'steps': 25803, 'loss/train': 1.2754597663879395} +03/04/2022 19:47:01 - INFO - codeparrot_training - Skipping example with length 440 (seq_length=1024) +03/04/2022 19:47:06 - INFO - codeparrot_training - Step 25804: {'lr': 0.0004687589000704759, 'samples': 13212160, 'steps': 25804, 'loss/train': 2.2735788822174072} +03/04/2022 19:47:09 - INFO - codeparrot_training - Step 25805: {'lr': 0.0004687563312461165, 'samples': 13212672, 'steps': 25805, 'loss/train': 2.396667242050171} +03/04/2022 19:47:10 - INFO - codeparrot_training - Skipping example with length 330 (seq_length=1024) +03/04/2022 19:47:14 - INFO - codeparrot_training - Step 25806: {'lr': 0.00046875376232318887, 'samples': 13213184, 'steps': 25806, 'loss/train': 1.6642447710037231} +03/04/2022 19:47:17 - INFO - codeparrot_training - Step 25807: {'lr': 0.00046875119330169426, 'samples': 13213696, 'steps': 25807, 'loss/train': 1.6901277303695679} +03/04/2022 19:47:19 - INFO - codeparrot_training - Skipping example with length 563 (seq_length=1024) +03/04/2022 19:47:23 - INFO - codeparrot_training - Step 25808: {'lr': 0.00046874862418163363, 'samples': 13214208, 'steps': 25808, 'loss/train': 1.4000072479248047} +03/04/2022 19:47:26 - INFO - codeparrot_training - Step 25809: {'lr': 0.00046874605496300824, 'samples': 13214720, 'steps': 25809, 'loss/train': 1.4770371913909912} +03/04/2022 19:47:27 - INFO - codeparrot_training - Skipping example with length 895 (seq_length=1024) +03/04/2022 19:47:31 - INFO - codeparrot_training - Step 25810: {'lr': 0.00046874348564581933, 'samples': 13215232, 'steps': 25810, 'loss/train': 1.1583110094070435} +03/04/2022 19:47:34 - INFO - codeparrot_training - Step 25811: {'lr': 0.00046874091623006793, 'samples': 13215744, 'steps': 25811, 'loss/train': 1.7334119081497192} +03/04/2022 19:47:36 - INFO - codeparrot_training - Skipping example with length 222 (seq_length=1024) +03/04/2022 19:47:39 - INFO - codeparrot_training - Step 25812: {'lr': 0.0004687383467157553, 'samples': 13216256, 'steps': 25812, 'loss/train': 2.045461416244507} +03/04/2022 19:47:43 - INFO - codeparrot_training - Step 25813: {'lr': 0.0004687357771028825, 'samples': 13216768, 'steps': 25813, 'loss/train': 1.922663927078247} +03/04/2022 19:47:44 - INFO - codeparrot_training - Skipping example with length 760 (seq_length=1024) +03/04/2022 19:47:48 - INFO - codeparrot_training - Step 25814: {'lr': 0.00046873320739145073, 'samples': 13217280, 'steps': 25814, 'loss/train': 0.6600019335746765} +03/04/2022 19:47:51 - INFO - codeparrot_training - Step 25815: {'lr': 0.0004687306375814612, 'samples': 13217792, 'steps': 25815, 'loss/train': 1.7463972568511963} +03/04/2022 19:47:52 - INFO - codeparrot_training - Skipping example with length 80 (seq_length=1024) +03/04/2022 19:47:56 - INFO - codeparrot_training - Step 25816: {'lr': 0.000468728067672915, 'samples': 13218304, 'steps': 25816, 'loss/train': 1.9042831659317017} +03/04/2022 19:48:00 - INFO - codeparrot_training - Step 25817: {'lr': 0.00046872549766581326, 'samples': 13218816, 'steps': 25817, 'loss/train': 2.002692937850952} +03/04/2022 19:48:01 - INFO - codeparrot_training - Skipping example with length 641 (seq_length=1024) +03/04/2022 19:48:05 - INFO - codeparrot_training - Step 25818: {'lr': 0.00046872292756015724, 'samples': 13219328, 'steps': 25818, 'loss/train': 2.0099923610687256} +03/04/2022 19:48:08 - INFO - codeparrot_training - Step 25819: {'lr': 0.000468720357355948, 'samples': 13219840, 'steps': 25819, 'loss/train': 0.819439709186554} +03/04/2022 19:48:09 - INFO - codeparrot_training - Skipping example with length 827 (seq_length=1024) +03/04/2022 19:48:13 - INFO - codeparrot_training - Step 25820: {'lr': 0.00046871778705318673, 'samples': 13220352, 'steps': 25820, 'loss/train': 1.730955719947815} +03/04/2022 19:48:16 - INFO - codeparrot_training - Step 25821: {'lr': 0.0004687152166518747, 'samples': 13220864, 'steps': 25821, 'loss/train': 1.5430030822753906} +03/04/2022 19:48:18 - INFO - codeparrot_training - Skipping example with length 760 (seq_length=1024) +03/04/2022 19:48:22 - INFO - codeparrot_training - Step 25822: {'lr': 0.0004687126461520128, 'samples': 13221376, 'steps': 25822, 'loss/train': 1.933743953704834} +03/04/2022 19:48:25 - INFO - codeparrot_training - Step 25823: {'lr': 0.0004687100755536025, 'samples': 13221888, 'steps': 25823, 'loss/train': 1.3258787393569946} +03/04/2022 19:48:26 - INFO - codeparrot_training - Skipping example with length 592 (seq_length=1024) +03/04/2022 19:48:30 - INFO - codeparrot_training - Step 25824: {'lr': 0.00046870750485664484, 'samples': 13222400, 'steps': 25824, 'loss/train': 1.5260109901428223} +03/04/2022 19:48:34 - INFO - codeparrot_training - Step 25825: {'lr': 0.00046870493406114084, 'samples': 13222912, 'steps': 25825, 'loss/train': 2.4076874256134033} +03/04/2022 19:48:34 - INFO - codeparrot_training - Skipping example with length 341 (seq_length=1024) +03/04/2022 19:48:39 - INFO - codeparrot_training - Step 25826: {'lr': 0.0004687023631670918, 'samples': 13223424, 'steps': 25826, 'loss/train': 1.7408114671707153} +03/04/2022 19:48:42 - INFO - codeparrot_training - Step 25827: {'lr': 0.0004686997921744989, 'samples': 13223936, 'steps': 25827, 'loss/train': 1.4906857013702393} +03/04/2022 19:48:43 - INFO - codeparrot_training - Skipping example with length 77 (seq_length=1024) +03/04/2022 19:48:47 - INFO - codeparrot_training - Step 25828: {'lr': 0.0004686972210833632, 'samples': 13224448, 'steps': 25828, 'loss/train': 1.7101078033447266} +03/04/2022 19:48:50 - INFO - codeparrot_training - Step 25829: {'lr': 0.0004686946498936859, 'samples': 13224960, 'steps': 25829, 'loss/train': 1.2496514320373535} +03/04/2022 19:48:51 - INFO - codeparrot_training - Skipping example with length 953 (seq_length=1024) +03/04/2022 19:48:56 - INFO - codeparrot_training - Step 25830: {'lr': 0.00046869207860546826, 'samples': 13225472, 'steps': 25830, 'loss/train': 1.8885293006896973} +03/04/2022 19:48:59 - INFO - codeparrot_training - Step 25831: {'lr': 0.00046868950721871126, 'samples': 13225984, 'steps': 25831, 'loss/train': 1.4806569814682007} +03/04/2022 19:48:59 - INFO - codeparrot_training - Skipping example with length 531 (seq_length=1024) +03/04/2022 19:49:04 - INFO - codeparrot_training - Step 25832: {'lr': 0.00046868693573341616, 'samples': 13226496, 'steps': 25832, 'loss/train': 1.8638274669647217} +03/04/2022 19:49:07 - INFO - codeparrot_training - Step 25833: {'lr': 0.00046868436414958405, 'samples': 13227008, 'steps': 25833, 'loss/train': 2.0784008502960205} +03/04/2022 19:49:08 - INFO - codeparrot_training - Skipping example with length 751 (seq_length=1024) +03/04/2022 19:49:12 - INFO - codeparrot_training - Step 25834: {'lr': 0.00046868179246721623, 'samples': 13227520, 'steps': 25834, 'loss/train': 1.7019493579864502} +03/04/2022 19:49:15 - INFO - codeparrot_training - Step 25835: {'lr': 0.00046867922068631374, 'samples': 13228032, 'steps': 25835, 'loss/train': 2.199866533279419} +03/04/2022 19:49:16 - INFO - codeparrot_training - Skipping example with length 1011 (seq_length=1024) +03/04/2022 19:49:21 - INFO - codeparrot_training - Step 25836: {'lr': 0.00046867664880687775, 'samples': 13228544, 'steps': 25836, 'loss/train': 2.098139762878418} +03/04/2022 19:49:24 - INFO - codeparrot_training - Step 25837: {'lr': 0.00046867407682890937, 'samples': 13229056, 'steps': 25837, 'loss/train': 1.9209764003753662} +03/04/2022 19:49:25 - INFO - codeparrot_training - Skipping example with length 841 (seq_length=1024) +03/04/2022 19:49:29 - INFO - codeparrot_training - Step 25838: {'lr': 0.00046867150475240994, 'samples': 13229568, 'steps': 25838, 'loss/train': 1.697674036026001} +03/04/2022 19:49:32 - INFO - codeparrot_training - Step 25839: {'lr': 0.0004686689325773805, 'samples': 13230080, 'steps': 25839, 'loss/train': 1.5586429834365845} +03/04/2022 19:49:33 - INFO - codeparrot_training - Skipping example with length 481 (seq_length=1024) +03/04/2022 19:49:38 - INFO - codeparrot_training - Step 25840: {'lr': 0.00046866636030382217, 'samples': 13230592, 'steps': 25840, 'loss/train': 1.5807653665542603} +03/04/2022 19:49:41 - INFO - codeparrot_training - Step 25841: {'lr': 0.00046866378793173616, 'samples': 13231104, 'steps': 25841, 'loss/train': 1.206275224685669} +03/04/2022 19:49:42 - INFO - codeparrot_training - Skipping example with length 377 (seq_length=1024) +03/04/2022 19:49:46 - INFO - codeparrot_training - Step 25842: {'lr': 0.0004686612154611236, 'samples': 13231616, 'steps': 25842, 'loss/train': 1.5061042308807373} +03/04/2022 19:49:49 - INFO - codeparrot_training - Step 25843: {'lr': 0.0004686586428919857, 'samples': 13232128, 'steps': 25843, 'loss/train': 1.8210333585739136} +03/04/2022 19:49:51 - INFO - codeparrot_training - Skipping example with length 889 (seq_length=1024) +03/04/2022 19:49:55 - INFO - codeparrot_training - Step 25844: {'lr': 0.00046865607022432356, 'samples': 13232640, 'steps': 25844, 'loss/train': 2.423743724822998} +03/04/2022 19:49:58 - INFO - codeparrot_training - Step 25845: {'lr': 0.00046865349745813835, 'samples': 13233152, 'steps': 25845, 'loss/train': 1.5761553049087524} +03/04/2022 19:49:59 - INFO - codeparrot_training - Skipping example with length 650 (seq_length=1024) +03/04/2022 19:50:03 - INFO - codeparrot_training - Step 25846: {'lr': 0.00046865092459343126, 'samples': 13233664, 'steps': 25846, 'loss/train': 1.4360899925231934} +03/04/2022 19:50:06 - INFO - codeparrot_training - Step 25847: {'lr': 0.00046864835163020353, 'samples': 13234176, 'steps': 25847, 'loss/train': 1.5536714792251587} +03/04/2022 19:50:08 - INFO - codeparrot_training - Skipping example with length 95 (seq_length=1024) +03/04/2022 19:50:12 - INFO - codeparrot_training - Step 25848: {'lr': 0.00046864577856845613, 'samples': 13234688, 'steps': 25848, 'loss/train': 1.4378796815872192} +03/04/2022 19:50:15 - INFO - codeparrot_training - Step 25849: {'lr': 0.0004686432054081904, 'samples': 13235200, 'steps': 25849, 'loss/train': 1.5790094137191772} +03/04/2022 19:50:16 - INFO - codeparrot_training - Skipping example with length 591 (seq_length=1024) +03/04/2022 19:50:20 - INFO - codeparrot_training - Step 25850: {'lr': 0.00046864063214940735, 'samples': 13235712, 'steps': 25850, 'loss/train': 1.5388582944869995} +03/04/2022 19:50:23 - INFO - codeparrot_training - Step 25851: {'lr': 0.0004686380587921082, 'samples': 13236224, 'steps': 25851, 'loss/train': 1.8229191303253174} +03/04/2022 19:50:24 - INFO - codeparrot_training - Skipping example with length 603 (seq_length=1024) +03/04/2022 19:50:28 - INFO - codeparrot_training - Step 25852: {'lr': 0.00046863548533629406, 'samples': 13236736, 'steps': 25852, 'loss/train': 1.728854775428772} +03/04/2022 19:50:32 - INFO - codeparrot_training - Step 25853: {'lr': 0.00046863291178196625, 'samples': 13237248, 'steps': 25853, 'loss/train': 1.4876281023025513} +03/04/2022 19:50:32 - INFO - codeparrot_training - Skipping example with length 57 (seq_length=1024) +03/04/2022 19:50:37 - INFO - codeparrot_training - Step 25854: {'lr': 0.0004686303381291258, 'samples': 13237760, 'steps': 25854, 'loss/train': 1.4468914270401} +03/04/2022 19:50:40 - INFO - codeparrot_training - Step 25855: {'lr': 0.00046862776437777386, 'samples': 13238272, 'steps': 25855, 'loss/train': 1.7548192739486694} +03/04/2022 19:50:41 - INFO - codeparrot_training - Skipping example with length 720 (seq_length=1024) +03/04/2022 19:50:45 - INFO - codeparrot_training - Step 25856: {'lr': 0.00046862519052791166, 'samples': 13238784, 'steps': 25856, 'loss/train': 1.580428957939148} +03/04/2022 19:50:49 - INFO - codeparrot_training - Step 25857: {'lr': 0.00046862261657954033, 'samples': 13239296, 'steps': 25857, 'loss/train': 0.580955982208252} +03/04/2022 19:50:50 - INFO - codeparrot_training - Skipping example with length 164 (seq_length=1024) +03/04/2022 19:50:54 - INFO - codeparrot_training - Step 25858: {'lr': 0.000468620042532661, 'samples': 13239808, 'steps': 25858, 'loss/train': 2.3391287326812744} +03/04/2022 19:50:57 - INFO - codeparrot_training - Step 25859: {'lr': 0.0004686174683872748, 'samples': 13240320, 'steps': 25859, 'loss/train': 1.8355076313018799} +03/04/2022 19:50:58 - INFO - codeparrot_training - Skipping example with length 532 (seq_length=1024) +03/04/2022 19:51:02 - INFO - codeparrot_training - Step 25860: {'lr': 0.00046861489414338304, 'samples': 13240832, 'steps': 25860, 'loss/train': 1.988729476928711} +03/04/2022 19:51:06 - INFO - codeparrot_training - Step 25861: {'lr': 0.0004686123198009867, 'samples': 13241344, 'steps': 25861, 'loss/train': 2.1283059120178223} +03/04/2022 19:51:06 - INFO - codeparrot_training - Skipping example with length 324 (seq_length=1024) +03/04/2022 19:51:11 - INFO - codeparrot_training - Step 25862: {'lr': 0.00046860974536008706, 'samples': 13241856, 'steps': 25862, 'loss/train': 1.9020934104919434} +03/04/2022 19:51:14 - INFO - codeparrot_training - Step 25863: {'lr': 0.0004686071708206853, 'samples': 13242368, 'steps': 25863, 'loss/train': 2.4686341285705566} +03/04/2022 19:51:15 - INFO - codeparrot_training - Skipping example with length 841 (seq_length=1024) +03/04/2022 19:51:19 - INFO - codeparrot_training - Step 25864: {'lr': 0.0004686045961827824, 'samples': 13242880, 'steps': 25864, 'loss/train': 1.9187458753585815} +03/04/2022 19:51:22 - INFO - codeparrot_training - Step 25865: {'lr': 0.00046860202144637976, 'samples': 13243392, 'steps': 25865, 'loss/train': 1.670817494392395} +03/04/2022 19:51:23 - INFO - codeparrot_training - Skipping example with length 703 (seq_length=1024) +03/04/2022 19:51:28 - INFO - codeparrot_training - Step 25866: {'lr': 0.00046859944661147837, 'samples': 13243904, 'steps': 25866, 'loss/train': 2.1109752655029297} +03/04/2022 19:51:31 - INFO - codeparrot_training - Step 25867: {'lr': 0.00046859687167807943, 'samples': 13244416, 'steps': 25867, 'loss/train': 1.5801206827163696} +03/04/2022 19:51:34 - INFO - codeparrot_training - Skipping example with length 801 (seq_length=1024) +03/04/2022 19:51:37 - INFO - codeparrot_training - Step 25868: {'lr': 0.0004685942966461841, 'samples': 13244928, 'steps': 25868, 'loss/train': 1.6933605670928955} +03/04/2022 19:51:40 - INFO - codeparrot_training - Step 25869: {'lr': 0.00046859172151579354, 'samples': 13245440, 'steps': 25869, 'loss/train': 2.3044238090515137} +03/04/2022 19:51:43 - INFO - codeparrot_training - Step 25870: {'lr': 0.00046858914628690896, 'samples': 13245952, 'steps': 25870, 'loss/train': 0.3217429220676422} +03/04/2022 19:51:43 - INFO - codeparrot_training - Skipping example with length 253 (seq_length=1024) +03/04/2022 19:51:48 - INFO - codeparrot_training - Step 25871: {'lr': 0.0004685865709595315, 'samples': 13246464, 'steps': 25871, 'loss/train': 1.3033314943313599} +03/04/2022 19:51:52 - INFO - codeparrot_training - Step 25872: {'lr': 0.00046858399553366224, 'samples': 13246976, 'steps': 25872, 'loss/train': 1.4432501792907715} +03/04/2022 19:51:52 - INFO - codeparrot_training - Skipping example with length 667 (seq_length=1024) +03/04/2022 19:51:57 - INFO - codeparrot_training - Step 25873: {'lr': 0.0004685814200093025, 'samples': 13247488, 'steps': 25873, 'loss/train': 1.7649273872375488} +03/04/2022 19:52:00 - INFO - codeparrot_training - Step 25874: {'lr': 0.00046857884438645327, 'samples': 13248000, 'steps': 25874, 'loss/train': 1.8365867137908936} +03/04/2022 19:52:00 - INFO - codeparrot_training - Skipping example with length 669 (seq_length=1024) +03/04/2022 19:52:05 - INFO - codeparrot_training - Step 25875: {'lr': 0.0004685762686651158, 'samples': 13248512, 'steps': 25875, 'loss/train': 0.6330443024635315} +03/04/2022 19:52:08 - INFO - codeparrot_training - Skipping example with length 683 (seq_length=1024) +03/04/2022 19:52:11 - INFO - codeparrot_training - Step 25876: {'lr': 0.0004685736928452913, 'samples': 13249024, 'steps': 25876, 'loss/train': 1.733134150505066} +03/04/2022 19:52:14 - INFO - codeparrot_training - Step 25877: {'lr': 0.00046857111692698083, 'samples': 13249536, 'steps': 25877, 'loss/train': 1.5421223640441895} +03/04/2022 19:52:17 - INFO - codeparrot_training - Step 25878: {'lr': 0.0004685685409101855, 'samples': 13250048, 'steps': 25878, 'loss/train': 1.3682268857955933} +03/04/2022 19:52:17 - INFO - codeparrot_training - Skipping example with length 686 (seq_length=1024) +03/04/2022 19:52:22 - INFO - codeparrot_training - Step 25879: {'lr': 0.00046856596479490667, 'samples': 13250560, 'steps': 25879, 'loss/train': 1.7628313302993774} +03/04/2022 19:52:26 - INFO - codeparrot_training - Step 25880: {'lr': 0.0004685633885811453, 'samples': 13251072, 'steps': 25880, 'loss/train': 1.2176059484481812} +03/04/2022 19:52:26 - INFO - codeparrot_training - Skipping example with length 724 (seq_length=1024) +03/04/2022 19:52:31 - INFO - codeparrot_training - Step 25881: {'lr': 0.0004685608122689027, 'samples': 13251584, 'steps': 25881, 'loss/train': 1.5149364471435547} +03/04/2022 19:52:34 - INFO - codeparrot_training - Step 25882: {'lr': 0.00046855823585818004, 'samples': 13252096, 'steps': 25882, 'loss/train': 0.4379826486110687} +03/04/2022 19:52:34 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/04/2022 19:52:39 - INFO - codeparrot_training - Step 25883: {'lr': 0.0004685556593489783, 'samples': 13252608, 'steps': 25883, 'loss/train': 1.1206492185592651} +03/04/2022 19:52:42 - INFO - codeparrot_training - Step 25884: {'lr': 0.0004685530827412988, 'samples': 13253120, 'steps': 25884, 'loss/train': 1.8788888454437256} +03/04/2022 19:52:43 - INFO - codeparrot_training - Skipping example with length 1016 (seq_length=1024) +03/04/2022 19:52:48 - INFO - codeparrot_training - Step 25885: {'lr': 0.0004685505060351426, 'samples': 13253632, 'steps': 25885, 'loss/train': 1.9359797239303589} +03/04/2022 19:52:51 - INFO - codeparrot_training - Step 25886: {'lr': 0.00046854792923051094, 'samples': 13254144, 'steps': 25886, 'loss/train': 1.5377318859100342} +03/04/2022 19:52:51 - INFO - codeparrot_training - Skipping example with length 291 (seq_length=1024) +03/04/2022 19:52:56 - INFO - codeparrot_training - Step 25887: {'lr': 0.00046854535232740505, 'samples': 13254656, 'steps': 25887, 'loss/train': 1.8176591396331787} +03/04/2022 19:52:59 - INFO - codeparrot_training - Step 25888: {'lr': 0.00046854277532582585, 'samples': 13255168, 'steps': 25888, 'loss/train': 1.9865645170211792} +03/04/2022 19:52:59 - INFO - codeparrot_training - Skipping example with length 980 (seq_length=1024) +03/04/2022 19:53:05 - INFO - codeparrot_training - Step 25889: {'lr': 0.0004685401982257747, 'samples': 13255680, 'steps': 25889, 'loss/train': 1.9261173009872437} +03/04/2022 19:53:08 - INFO - codeparrot_training - Step 25890: {'lr': 0.0004685376210272527, 'samples': 13256192, 'steps': 25890, 'loss/train': 1.6129933595657349} +03/04/2022 19:53:08 - INFO - codeparrot_training - Skipping example with length 702 (seq_length=1024) +03/04/2022 19:53:14 - INFO - codeparrot_training - Step 25891: {'lr': 0.00046853504373026107, 'samples': 13256704, 'steps': 25891, 'loss/train': 2.1686131954193115} +03/04/2022 19:53:17 - INFO - codeparrot_training - Step 25892: {'lr': 0.00046853246633480087, 'samples': 13257216, 'steps': 25892, 'loss/train': 2.083625078201294} +03/04/2022 19:53:19 - INFO - codeparrot_training - Skipping example with length 716 (seq_length=1024) +03/04/2022 19:53:22 - INFO - codeparrot_training - Step 25893: {'lr': 0.0004685298888408733, 'samples': 13257728, 'steps': 25893, 'loss/train': 2.0017762184143066} +03/04/2022 19:53:25 - INFO - codeparrot_training - Step 25894: {'lr': 0.0004685273112484796, 'samples': 13258240, 'steps': 25894, 'loss/train': 1.5308396816253662} +03/04/2022 19:53:27 - INFO - codeparrot_training - Skipping example with length 563 (seq_length=1024) +03/04/2022 19:53:31 - INFO - codeparrot_training - Step 25895: {'lr': 0.0004685247335576209, 'samples': 13258752, 'steps': 25895, 'loss/train': 2.1260509490966797} +03/04/2022 19:53:34 - INFO - codeparrot_training - Step 25896: {'lr': 0.00046852215576829824, 'samples': 13259264, 'steps': 25896, 'loss/train': 1.7930532693862915} +03/04/2022 19:53:36 - INFO - codeparrot_training - Skipping example with length 607 (seq_length=1024) +03/04/2022 19:53:39 - INFO - codeparrot_training - Step 25897: {'lr': 0.0004685195778805129, 'samples': 13259776, 'steps': 25897, 'loss/train': 1.9097622632980347} +03/04/2022 19:53:42 - INFO - codeparrot_training - Step 25898: {'lr': 0.000468516999894266, 'samples': 13260288, 'steps': 25898, 'loss/train': 2.222501754760742} +03/04/2022 19:53:44 - INFO - codeparrot_training - Skipping example with length 662 (seq_length=1024) +03/04/2022 19:53:48 - INFO - codeparrot_training - Step 25899: {'lr': 0.0004685144218095587, 'samples': 13260800, 'steps': 25899, 'loss/train': 1.7289777994155884} +03/04/2022 19:53:51 - INFO - codeparrot_training - Step 25900: {'lr': 0.00046851184362639223, 'samples': 13261312, 'steps': 25900, 'loss/train': 1.6573745012283325} +03/04/2022 19:53:53 - INFO - codeparrot_training - Skipping example with length 396 (seq_length=1024) +03/04/2022 19:53:56 - INFO - codeparrot_training - Step 25901: {'lr': 0.0004685092653447676, 'samples': 13261824, 'steps': 25901, 'loss/train': 1.396908164024353} +03/04/2022 19:53:59 - INFO - codeparrot_training - Step 25902: {'lr': 0.00046850668696468614, 'samples': 13262336, 'steps': 25902, 'loss/train': 1.5410982370376587} +03/04/2022 19:54:01 - INFO - codeparrot_training - Skipping example with length 604 (seq_length=1024) +03/04/2022 19:54:05 - INFO - codeparrot_training - Step 25903: {'lr': 0.0004685041084861489, 'samples': 13262848, 'steps': 25903, 'loss/train': 1.790032148361206} +03/04/2022 19:54:08 - INFO - codeparrot_training - Step 25904: {'lr': 0.00046850152990915705, 'samples': 13263360, 'steps': 25904, 'loss/train': 2.288675308227539} +03/04/2022 19:54:09 - INFO - codeparrot_training - Skipping example with length 234 (seq_length=1024) +03/04/2022 19:54:13 - INFO - codeparrot_training - Step 25905: {'lr': 0.0004684989512337119, 'samples': 13263872, 'steps': 25905, 'loss/train': 1.39580500125885} +03/04/2022 19:54:16 - INFO - codeparrot_training - Step 25906: {'lr': 0.00046849637245981434, 'samples': 13264384, 'steps': 25906, 'loss/train': 1.4886870384216309} +03/04/2022 19:54:18 - INFO - codeparrot_training - Skipping example with length 694 (seq_length=1024) +03/04/2022 19:54:21 - INFO - codeparrot_training - Step 25907: {'lr': 0.0004684937935874658, 'samples': 13264896, 'steps': 25907, 'loss/train': 2.217146396636963} +03/04/2022 19:54:25 - INFO - codeparrot_training - Step 25908: {'lr': 0.00046849121461666734, 'samples': 13265408, 'steps': 25908, 'loss/train': 1.1402108669281006} +03/04/2022 19:54:26 - INFO - codeparrot_training - Skipping example with length 822 (seq_length=1024) +03/04/2022 19:54:30 - INFO - codeparrot_training - Step 25909: {'lr': 0.00046848863554742006, 'samples': 13265920, 'steps': 25909, 'loss/train': 2.503039836883545} +03/04/2022 19:54:33 - INFO - codeparrot_training - Step 25910: {'lr': 0.0004684860563797252, 'samples': 13266432, 'steps': 25910, 'loss/train': 2.386862277984619} +03/04/2022 19:54:34 - INFO - codeparrot_training - Skipping example with length 58 (seq_length=1024) +03/04/2022 19:54:38 - INFO - codeparrot_training - Step 25911: {'lr': 0.00046848347711358384, 'samples': 13266944, 'steps': 25911, 'loss/train': 1.451749324798584} +03/04/2022 19:54:41 - INFO - codeparrot_training - Step 25912: {'lr': 0.0004684808977489973, 'samples': 13267456, 'steps': 25912, 'loss/train': 2.6808433532714844} +03/04/2022 19:54:43 - INFO - codeparrot_training - Skipping example with length 687 (seq_length=1024) +03/04/2022 19:54:47 - INFO - codeparrot_training - Step 25913: {'lr': 0.00046847831828596647, 'samples': 13267968, 'steps': 25913, 'loss/train': 1.3301246166229248} +03/04/2022 19:54:50 - INFO - codeparrot_training - Step 25914: {'lr': 0.0004684757387244928, 'samples': 13268480, 'steps': 25914, 'loss/train': 1.0148881673812866} +03/04/2022 19:54:51 - INFO - codeparrot_training - Skipping example with length 472 (seq_length=1024) +03/04/2022 19:54:55 - INFO - codeparrot_training - Step 25915: {'lr': 0.00046847315906457733, 'samples': 13268992, 'steps': 25915, 'loss/train': 2.179424524307251} +03/04/2022 19:54:58 - INFO - codeparrot_training - Step 25916: {'lr': 0.0004684705793062212, 'samples': 13269504, 'steps': 25916, 'loss/train': 1.5378541946411133} +03/04/2022 19:55:00 - INFO - codeparrot_training - Skipping example with length 370 (seq_length=1024) +03/04/2022 19:55:04 - INFO - codeparrot_training - Step 25917: {'lr': 0.00046846799944942564, 'samples': 13270016, 'steps': 25917, 'loss/train': 1.903385043144226} +03/04/2022 19:55:07 - INFO - codeparrot_training - Step 25918: {'lr': 0.00046846541949419177, 'samples': 13270528, 'steps': 25918, 'loss/train': 2.0768959522247314} +03/04/2022 19:55:08 - INFO - codeparrot_training - Skipping example with length 840 (seq_length=1024) +03/04/2022 19:55:12 - INFO - codeparrot_training - Step 25919: {'lr': 0.00046846283944052073, 'samples': 13271040, 'steps': 25919, 'loss/train': 2.4806063175201416} +03/04/2022 19:55:15 - INFO - codeparrot_training - Step 25920: {'lr': 0.0004684602592884136, 'samples': 13271552, 'steps': 25920, 'loss/train': 2.472268581390381} +03/04/2022 19:55:17 - INFO - codeparrot_training - Skipping example with length 766 (seq_length=1024) +03/04/2022 19:55:21 - INFO - codeparrot_training - Step 25921: {'lr': 0.0004684576790378718, 'samples': 13272064, 'steps': 25921, 'loss/train': 2.091535806655884} +03/04/2022 19:55:24 - INFO - codeparrot_training - Step 25922: {'lr': 0.00046845509868889625, 'samples': 13272576, 'steps': 25922, 'loss/train': 0.7834211587905884} +03/04/2022 19:55:25 - INFO - codeparrot_training - Skipping example with length 614 (seq_length=1024) +03/04/2022 19:55:29 - INFO - codeparrot_training - Step 25923: {'lr': 0.00046845251824148825, 'samples': 13273088, 'steps': 25923, 'loss/train': 1.7245408296585083} +03/04/2022 19:55:32 - INFO - codeparrot_training - Step 25924: {'lr': 0.0004684499376956489, 'samples': 13273600, 'steps': 25924, 'loss/train': 1.3210349082946777} +03/04/2022 19:55:34 - INFO - codeparrot_training - Skipping example with length 592 (seq_length=1024) +03/04/2022 19:55:38 - INFO - codeparrot_training - Step 25925: {'lr': 0.00046844735705137944, 'samples': 13274112, 'steps': 25925, 'loss/train': 1.2078791856765747} +03/04/2022 19:55:41 - INFO - codeparrot_training - Step 25926: {'lr': 0.0004684447763086809, 'samples': 13274624, 'steps': 25926, 'loss/train': 1.9099268913269043} +03/04/2022 19:55:42 - INFO - codeparrot_training - Skipping example with length 63 (seq_length=1024) +03/04/2022 19:55:46 - INFO - codeparrot_training - Step 25927: {'lr': 0.00046844219546755454, 'samples': 13275136, 'steps': 25927, 'loss/train': 1.9665889739990234} +03/04/2022 19:55:49 - INFO - codeparrot_training - Step 25928: {'lr': 0.0004684396145280014, 'samples': 13275648, 'steps': 25928, 'loss/train': 1.132936716079712} +03/04/2022 19:55:51 - INFO - codeparrot_training - Skipping example with length 673 (seq_length=1024) +03/04/2022 19:55:55 - INFO - codeparrot_training - Step 25929: {'lr': 0.00046843703349002286, 'samples': 13276160, 'steps': 25929, 'loss/train': 1.3434978723526} +03/04/2022 19:55:58 - INFO - codeparrot_training - Step 25930: {'lr': 0.00046843445235361994, 'samples': 13276672, 'steps': 25930, 'loss/train': 1.3434816598892212} +03/04/2022 19:55:59 - INFO - codeparrot_training - Skipping example with length 880 (seq_length=1024) +03/04/2022 19:56:03 - INFO - codeparrot_training - Step 25931: {'lr': 0.0004684318711187938, 'samples': 13277184, 'steps': 25931, 'loss/train': 1.2936205863952637} +03/04/2022 19:56:06 - INFO - codeparrot_training - Step 25932: {'lr': 0.0004684292897855457, 'samples': 13277696, 'steps': 25932, 'loss/train': 0.7396809458732605} +03/04/2022 19:56:07 - INFO - codeparrot_training - Skipping example with length 595 (seq_length=1024) +03/04/2022 19:56:12 - INFO - codeparrot_training - Step 25933: {'lr': 0.00046842670835387667, 'samples': 13278208, 'steps': 25933, 'loss/train': 2.231295585632324} +03/04/2022 19:56:15 - INFO - codeparrot_training - Step 25934: {'lr': 0.00046842412682378796, 'samples': 13278720, 'steps': 25934, 'loss/train': 2.269412040710449} +03/04/2022 19:56:16 - INFO - codeparrot_training - Skipping example with length 567 (seq_length=1024) +03/04/2022 19:56:20 - INFO - codeparrot_training - Step 25935: {'lr': 0.0004684215451952807, 'samples': 13279232, 'steps': 25935, 'loss/train': 2.0541248321533203} +03/04/2022 19:56:23 - INFO - codeparrot_training - Step 25936: {'lr': 0.000468418963468356, 'samples': 13279744, 'steps': 25936, 'loss/train': 2.3254141807556152} +03/04/2022 19:56:25 - INFO - codeparrot_training - Skipping example with length 531 (seq_length=1024) +03/04/2022 19:56:29 - INFO - codeparrot_training - Step 25937: {'lr': 0.0004684163816430152, 'samples': 13280256, 'steps': 25937, 'loss/train': 1.1571645736694336} +03/04/2022 19:56:32 - INFO - codeparrot_training - Step 25938: {'lr': 0.00046841379971925923, 'samples': 13280768, 'steps': 25938, 'loss/train': 1.0219831466674805} +03/04/2022 19:56:33 - INFO - codeparrot_training - Skipping example with length 502 (seq_length=1024) +03/04/2022 19:56:37 - INFO - codeparrot_training - Step 25939: {'lr': 0.0004684112176970895, 'samples': 13281280, 'steps': 25939, 'loss/train': 1.697920560836792} +03/04/2022 19:56:40 - INFO - codeparrot_training - Step 25940: {'lr': 0.0004684086355765069, 'samples': 13281792, 'steps': 25940, 'loss/train': 1.6999151706695557} +03/04/2022 19:56:42 - INFO - codeparrot_training - Skipping example with length 965 (seq_length=1024) +03/04/2022 19:56:45 - INFO - codeparrot_training - Step 25941: {'lr': 0.00046840605335751284, 'samples': 13282304, 'steps': 25941, 'loss/train': 1.7646182775497437} +03/04/2022 19:56:49 - INFO - codeparrot_training - Step 25942: {'lr': 0.0004684034710401084, 'samples': 13282816, 'steps': 25942, 'loss/train': 1.4748141765594482} +03/04/2022 19:56:50 - INFO - codeparrot_training - Skipping example with length 779 (seq_length=1024) +03/04/2022 19:56:54 - INFO - codeparrot_training - Step 25943: {'lr': 0.00046840088862429465, 'samples': 13283328, 'steps': 25943, 'loss/train': 2.6214559078216553} +03/04/2022 19:56:57 - INFO - codeparrot_training - Step 25944: {'lr': 0.00046839830611007297, 'samples': 13283840, 'steps': 25944, 'loss/train': 1.9332501888275146} +03/04/2022 19:57:00 - INFO - codeparrot_training - Skipping example with length 802 (seq_length=1024) +03/04/2022 19:57:03 - INFO - codeparrot_training - Step 25945: {'lr': 0.00046839572349744417, 'samples': 13284352, 'steps': 25945, 'loss/train': 1.9477851390838623} +03/04/2022 19:57:06 - INFO - codeparrot_training - Step 25946: {'lr': 0.0004683931407864098, 'samples': 13284864, 'steps': 25946, 'loss/train': 1.5146911144256592} +03/04/2022 19:57:08 - INFO - codeparrot_training - Skipping example with length 900 (seq_length=1024) +03/04/2022 19:57:11 - INFO - codeparrot_training - Step 25947: {'lr': 0.0004683905579769708, 'samples': 13285376, 'steps': 25947, 'loss/train': 1.7338972091674805} +03/04/2022 19:57:14 - INFO - codeparrot_training - Step 25948: {'lr': 0.0004683879750691283, 'samples': 13285888, 'steps': 25948, 'loss/train': 1.0507495403289795} +03/04/2022 19:57:16 - INFO - codeparrot_training - Skipping example with length 700 (seq_length=1024) +03/04/2022 19:57:19 - INFO - codeparrot_training - Step 25949: {'lr': 0.00046838539206288366, 'samples': 13286400, 'steps': 25949, 'loss/train': 1.6068230867385864} +03/04/2022 19:57:23 - INFO - codeparrot_training - Step 25950: {'lr': 0.00046838280895823795, 'samples': 13286912, 'steps': 25950, 'loss/train': 1.342026710510254} +03/04/2022 19:57:25 - INFO - codeparrot_training - Skipping example with length 721 (seq_length=1024) +03/04/2022 19:57:28 - INFO - codeparrot_training - Step 25951: {'lr': 0.0004683802257551922, 'samples': 13287424, 'steps': 25951, 'loss/train': 1.280750036239624} +03/04/2022 19:57:31 - INFO - codeparrot_training - Step 25952: {'lr': 0.00046837764245374777, 'samples': 13287936, 'steps': 25952, 'loss/train': 1.242254376411438} +03/04/2022 19:57:34 - INFO - codeparrot_training - Skipping example with length 32 (seq_length=1024) +03/04/2022 19:57:37 - INFO - codeparrot_training - Step 25953: {'lr': 0.0004683750590539057, 'samples': 13288448, 'steps': 25953, 'loss/train': 2.5195491313934326} +03/04/2022 19:57:40 - INFO - codeparrot_training - Step 25954: {'lr': 0.00046837247555566727, 'samples': 13288960, 'steps': 25954, 'loss/train': 0.6278606653213501} +03/04/2022 19:57:43 - INFO - codeparrot_training - Step 25955: {'lr': 0.00046836989195903344, 'samples': 13289472, 'steps': 25955, 'loss/train': 1.9989103078842163} +03/04/2022 19:57:45 - INFO - codeparrot_training - Skipping example with length 977 (seq_length=1024) +03/04/2022 19:57:49 - INFO - codeparrot_training - Step 25956: {'lr': 0.00046836730826400565, 'samples': 13289984, 'steps': 25956, 'loss/train': 2.1316897869110107} +03/04/2022 19:57:52 - INFO - codeparrot_training - Step 25957: {'lr': 0.00046836472447058485, 'samples': 13290496, 'steps': 25957, 'loss/train': 1.4017983675003052} +03/04/2022 19:57:54 - INFO - codeparrot_training - Skipping example with length 860 (seq_length=1024) +03/04/2022 19:57:57 - INFO - codeparrot_training - Step 25958: {'lr': 0.0004683621405787723, 'samples': 13291008, 'steps': 25958, 'loss/train': 1.9236096143722534} +03/04/2022 19:58:00 - INFO - codeparrot_training - Step 25959: {'lr': 0.0004683595565885691, 'samples': 13291520, 'steps': 25959, 'loss/train': 1.9333937168121338} +03/04/2022 19:58:02 - INFO - codeparrot_training - Skipping example with length 961 (seq_length=1024) +03/04/2022 19:58:06 - INFO - codeparrot_training - Step 25960: {'lr': 0.0004683569724999765, 'samples': 13292032, 'steps': 25960, 'loss/train': 1.951561689376831} +03/04/2022 19:58:09 - INFO - codeparrot_training - Step 25961: {'lr': 0.0004683543883129956, 'samples': 13292544, 'steps': 25961, 'loss/train': 1.8304283618927002} +03/04/2022 19:58:12 - INFO - codeparrot_training - Skipping example with length 863 (seq_length=1024) +03/04/2022 19:58:14 - INFO - codeparrot_training - Step 25962: {'lr': 0.00046835180402762756, 'samples': 13293056, 'steps': 25962, 'loss/train': 1.6901862621307373} +03/04/2022 19:58:17 - INFO - codeparrot_training - Step 25963: {'lr': 0.00046834921964387363, 'samples': 13293568, 'steps': 25963, 'loss/train': 5.451969623565674} +03/04/2022 19:58:20 - INFO - codeparrot_training - Skipping example with length 864 (seq_length=1024) +03/04/2022 19:58:23 - INFO - codeparrot_training - Step 25964: {'lr': 0.0004683466351617348, 'samples': 13294080, 'steps': 25964, 'loss/train': 1.5150420665740967} +03/04/2022 19:58:26 - INFO - codeparrot_training - Step 25965: {'lr': 0.00046834405058121244, 'samples': 13294592, 'steps': 25965, 'loss/train': 1.571437954902649} +03/04/2022 19:58:28 - INFO - codeparrot_training - Skipping example with length 6 (seq_length=1024) +03/04/2022 19:58:31 - INFO - codeparrot_training - Step 25966: {'lr': 0.0004683414659023076, 'samples': 13295104, 'steps': 25966, 'loss/train': 1.7864043712615967} +03/04/2022 19:58:34 - INFO - codeparrot_training - Step 25967: {'lr': 0.0004683388811250214, 'samples': 13295616, 'steps': 25967, 'loss/train': 1.7730915546417236} +03/04/2022 19:58:37 - INFO - codeparrot_training - Skipping example with length 861 (seq_length=1024) +03/04/2022 19:58:40 - INFO - codeparrot_training - Step 25968: {'lr': 0.0004683362962493552, 'samples': 13296128, 'steps': 25968, 'loss/train': 1.475606083869934} +03/04/2022 19:58:43 - INFO - codeparrot_training - Step 25969: {'lr': 0.00046833371127530995, 'samples': 13296640, 'steps': 25969, 'loss/train': 2.481719732284546} +03/04/2022 19:58:45 - INFO - codeparrot_training - Skipping example with length 773 (seq_length=1024) +03/04/2022 19:58:48 - INFO - codeparrot_training - Step 25970: {'lr': 0.00046833112620288684, 'samples': 13297152, 'steps': 25970, 'loss/train': 1.379334807395935} +03/04/2022 19:58:51 - INFO - codeparrot_training - Step 25971: {'lr': 0.0004683285410320872, 'samples': 13297664, 'steps': 25971, 'loss/train': 1.6995652914047241} +03/04/2022 19:58:54 - INFO - codeparrot_training - Skipping example with length 606 (seq_length=1024) +03/04/2022 19:58:57 - INFO - codeparrot_training - Step 25972: {'lr': 0.000468325955762912, 'samples': 13298176, 'steps': 25972, 'loss/train': 1.784519076347351} +03/04/2022 19:59:00 - INFO - codeparrot_training - Step 25973: {'lr': 0.0004683233703953626, 'samples': 13298688, 'steps': 25973, 'loss/train': 2.130711793899536} +03/04/2022 19:59:02 - INFO - codeparrot_training - Skipping example with length 752 (seq_length=1024) +03/04/2022 19:59:05 - INFO - codeparrot_training - Step 25974: {'lr': 0.00046832078492944, 'samples': 13299200, 'steps': 25974, 'loss/train': 1.400537133216858} +03/04/2022 19:59:08 - INFO - codeparrot_training - Step 25975: {'lr': 0.0004683181993651454, 'samples': 13299712, 'steps': 25975, 'loss/train': 1.9857072830200195} +03/04/2022 19:59:11 - INFO - codeparrot_training - Skipping example with length 139 (seq_length=1024) +03/04/2022 19:59:14 - INFO - codeparrot_training - Step 25976: {'lr': 0.0004683156137024801, 'samples': 13300224, 'steps': 25976, 'loss/train': 3.087407112121582} +03/04/2022 19:59:17 - INFO - codeparrot_training - Step 25977: {'lr': 0.00046831302794144504, 'samples': 13300736, 'steps': 25977, 'loss/train': 2.095113515853882} +03/04/2022 19:59:19 - INFO - codeparrot_training - Skipping example with length 276 (seq_length=1024) +03/04/2022 19:59:22 - INFO - codeparrot_training - Step 25978: {'lr': 0.00046831044208204154, 'samples': 13301248, 'steps': 25978, 'loss/train': 2.3248891830444336} +03/04/2022 19:59:25 - INFO - codeparrot_training - Step 25979: {'lr': 0.0004683078561242707, 'samples': 13301760, 'steps': 25979, 'loss/train': 2.0172057151794434} +03/04/2022 19:59:28 - INFO - codeparrot_training - Skipping example with length 980 (seq_length=1024) +03/04/2022 19:59:30 - INFO - codeparrot_training - Step 25980: {'lr': 0.00046830527006813373, 'samples': 13302272, 'steps': 25980, 'loss/train': 1.6450740098953247} +03/04/2022 19:59:34 - INFO - codeparrot_training - Step 25981: {'lr': 0.00046830268391363176, 'samples': 13302784, 'steps': 25981, 'loss/train': 2.0067148208618164} +03/04/2022 19:59:36 - INFO - codeparrot_training - Skipping example with length 538 (seq_length=1024) +03/04/2022 19:59:39 - INFO - codeparrot_training - Step 25982: {'lr': 0.0004683000976607659, 'samples': 13303296, 'steps': 25982, 'loss/train': 0.389595627784729} +03/04/2022 19:59:42 - INFO - codeparrot_training - Step 25983: {'lr': 0.00046829751130953747, 'samples': 13303808, 'steps': 25983, 'loss/train': 1.8984497785568237} +03/04/2022 19:59:45 - INFO - codeparrot_training - Skipping example with length 907 (seq_length=1024) +03/04/2022 19:59:47 - INFO - codeparrot_training - Step 25984: {'lr': 0.0004682949248599476, 'samples': 13304320, 'steps': 25984, 'loss/train': 0.3006818890571594} +03/04/2022 19:59:51 - INFO - codeparrot_training - Step 25985: {'lr': 0.0004682923383119973, 'samples': 13304832, 'steps': 25985, 'loss/train': 1.7708948850631714} +03/04/2022 19:59:54 - INFO - codeparrot_training - Step 25986: {'lr': 0.0004682897516656879, 'samples': 13305344, 'steps': 25986, 'loss/train': 1.1965590715408325} +03/04/2022 19:59:54 - INFO - codeparrot_training - Skipping example with length 925 (seq_length=1024) +03/04/2022 19:59:59 - INFO - codeparrot_training - Step 25987: {'lr': 0.00046828716492102043, 'samples': 13305856, 'steps': 25987, 'loss/train': 1.3682154417037964} +03/04/2022 20:00:02 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/04/2022 20:00:04 - INFO - codeparrot_training - Step 25988: {'lr': 0.0004682845780779962, 'samples': 13306368, 'steps': 25988, 'loss/train': 2.382455825805664} +03/04/2022 20:00:08 - INFO - codeparrot_training - Step 25989: {'lr': 0.00046828199113661627, 'samples': 13306880, 'steps': 25989, 'loss/train': 1.4804397821426392} +03/04/2022 20:00:10 - INFO - codeparrot_training - Skipping example with length 762 (seq_length=1024) +03/04/2022 20:00:13 - INFO - codeparrot_training - Step 25990: {'lr': 0.0004682794040968819, 'samples': 13307392, 'steps': 25990, 'loss/train': 2.6654675006866455} +03/04/2022 20:00:16 - INFO - codeparrot_training - Step 25991: {'lr': 0.0004682768169587942, 'samples': 13307904, 'steps': 25991, 'loss/train': 0.4490848779678345} +03/04/2022 20:00:19 - INFO - codeparrot_training - Skipping example with length 737 (seq_length=1024) +03/04/2022 20:00:21 - INFO - codeparrot_training - Step 25992: {'lr': 0.0004682742297223543, 'samples': 13308416, 'steps': 25992, 'loss/train': 1.8544172048568726} +03/04/2022 20:00:24 - INFO - codeparrot_training - Step 25993: {'lr': 0.00046827164238756337, 'samples': 13308928, 'steps': 25993, 'loss/train': 2.208698034286499} +03/04/2022 20:00:27 - INFO - codeparrot_training - Skipping example with length 497 (seq_length=1024) +03/04/2022 20:00:30 - INFO - codeparrot_training - Step 25994: {'lr': 0.00046826905495442263, 'samples': 13309440, 'steps': 25994, 'loss/train': 1.708541750907898} +03/04/2022 20:00:33 - INFO - codeparrot_training - Step 25995: {'lr': 0.00046826646742293326, 'samples': 13309952, 'steps': 25995, 'loss/train': 1.9661489725112915} +03/04/2022 20:00:36 - INFO - codeparrot_training - Step 25996: {'lr': 0.00046826387979309635, 'samples': 13310464, 'steps': 25996, 'loss/train': 1.159864068031311} +03/04/2022 20:00:38 - INFO - codeparrot_training - Skipping example with length 497 (seq_length=1024) +03/04/2022 20:00:42 - INFO - codeparrot_training - Step 25997: {'lr': 0.0004682612920649131, 'samples': 13310976, 'steps': 25997, 'loss/train': 1.7186181545257568} +03/04/2022 20:00:45 - INFO - codeparrot_training - Step 25998: {'lr': 0.00046825870423838466, 'samples': 13311488, 'steps': 25998, 'loss/train': 2.6319777965545654} +03/04/2022 20:00:46 - INFO - codeparrot_training - Skipping example with length 200 (seq_length=1024) +03/04/2022 20:00:50 - INFO - codeparrot_training - Step 25999: {'lr': 0.00046825611631351227, 'samples': 13312000, 'steps': 25999, 'loss/train': 1.9062258005142212} +03/04/2022 20:00:53 - INFO - codeparrot_training - Step 26000: {'lr': 0.00046825352829029705, 'samples': 13312512, 'steps': 26000, 'loss/train': 1.642259120941162} +03/04/2022 20:00:54 - INFO - codeparrot_training - Skipping example with length 676 (seq_length=1024) +03/04/2022 20:00:58 - INFO - codeparrot_training - Step 26001: {'lr': 0.00046825094016874014, 'samples': 13313024, 'steps': 26001, 'loss/train': 1.9957187175750732} +03/04/2022 20:01:02 - INFO - codeparrot_training - Step 26002: {'lr': 0.00046824835194884273, 'samples': 13313536, 'steps': 26002, 'loss/train': 2.7465879917144775} +03/04/2022 20:01:03 - INFO - codeparrot_training - Skipping example with length 47 (seq_length=1024) +03/04/2022 20:01:07 - INFO - codeparrot_training - Step 26003: {'lr': 0.0004682457636306059, 'samples': 13314048, 'steps': 26003, 'loss/train': 2.3993852138519287} +03/04/2022 20:01:10 - INFO - codeparrot_training - Step 26004: {'lr': 0.000468243175214031, 'samples': 13314560, 'steps': 26004, 'loss/train': 1.9481501579284668} +03/04/2022 20:01:11 - INFO - codeparrot_training - Skipping example with length 733 (seq_length=1024) +03/04/2022 20:01:15 - INFO - codeparrot_training - Step 26005: {'lr': 0.00046824058669911906, 'samples': 13315072, 'steps': 26005, 'loss/train': 1.878860354423523} +03/04/2022 20:01:18 - INFO - codeparrot_training - Step 26006: {'lr': 0.00046823799808587126, 'samples': 13315584, 'steps': 26006, 'loss/train': 2.114102840423584} +03/04/2022 20:01:19 - INFO - codeparrot_training - Skipping example with length 675 (seq_length=1024) +03/04/2022 20:01:24 - INFO - codeparrot_training - Step 26007: {'lr': 0.00046823540937428876, 'samples': 13316096, 'steps': 26007, 'loss/train': 1.279818058013916} +03/04/2022 20:01:27 - INFO - codeparrot_training - Step 26008: {'lr': 0.0004682328205643728, 'samples': 13316608, 'steps': 26008, 'loss/train': 1.2121267318725586} +03/04/2022 20:01:27 - INFO - codeparrot_training - Skipping example with length 427 (seq_length=1024) +03/04/2022 20:01:32 - INFO - codeparrot_training - Step 26009: {'lr': 0.00046823023165612455, 'samples': 13317120, 'steps': 26009, 'loss/train': 1.4796291589736938} +03/04/2022 20:01:35 - INFO - codeparrot_training - Step 26010: {'lr': 0.000468227642649545, 'samples': 13317632, 'steps': 26010, 'loss/train': 1.2600312232971191} +03/04/2022 20:01:36 - INFO - codeparrot_training - Skipping example with length 820 (seq_length=1024) +03/04/2022 20:01:41 - INFO - codeparrot_training - Step 26011: {'lr': 0.00046822505354463553, 'samples': 13318144, 'steps': 26011, 'loss/train': 1.5071678161621094} +03/04/2022 20:01:44 - INFO - codeparrot_training - Step 26012: {'lr': 0.0004682224643413972, 'samples': 13318656, 'steps': 26012, 'loss/train': 1.6324503421783447} +03/04/2022 20:01:44 - INFO - codeparrot_training - Skipping example with length 880 (seq_length=1024) +03/04/2022 20:01:49 - INFO - codeparrot_training - Step 26013: {'lr': 0.0004682198750398312, 'samples': 13319168, 'steps': 26013, 'loss/train': 1.8580371141433716} +03/04/2022 20:01:52 - INFO - codeparrot_training - Step 26014: {'lr': 0.00046821728563993867, 'samples': 13319680, 'steps': 26014, 'loss/train': 1.4944595098495483} +03/04/2022 20:01:53 - INFO - codeparrot_training - Skipping example with length 729 (seq_length=1024) +03/04/2022 20:01:58 - INFO - codeparrot_training - Step 26015: {'lr': 0.0004682146961417208, 'samples': 13320192, 'steps': 26015, 'loss/train': 2.1687328815460205} +03/04/2022 20:02:01 - INFO - codeparrot_training - Step 26016: {'lr': 0.00046821210654517874, 'samples': 13320704, 'steps': 26016, 'loss/train': 1.8499046564102173} +03/04/2022 20:02:01 - INFO - codeparrot_training - Skipping example with length 257 (seq_length=1024) +03/04/2022 20:02:06 - INFO - codeparrot_training - Step 26017: {'lr': 0.0004682095168503137, 'samples': 13321216, 'steps': 26017, 'loss/train': 1.6416001319885254} +03/04/2022 20:02:09 - INFO - codeparrot_training - Step 26018: {'lr': 0.00046820692705712685, 'samples': 13321728, 'steps': 26018, 'loss/train': 1.6696640253067017} +03/04/2022 20:02:10 - INFO - codeparrot_training - Skipping example with length 360 (seq_length=1024) +03/04/2022 20:02:15 - INFO - codeparrot_training - Step 26019: {'lr': 0.00046820433716561927, 'samples': 13322240, 'steps': 26019, 'loss/train': 2.091885805130005} +03/04/2022 20:02:18 - INFO - codeparrot_training - Step 26020: {'lr': 0.0004682017471757922, 'samples': 13322752, 'steps': 26020, 'loss/train': 1.6339701414108276} +03/04/2022 20:02:18 - INFO - codeparrot_training - Skipping example with length 460 (seq_length=1024) +03/04/2022 20:02:23 - INFO - codeparrot_training - Step 26021: {'lr': 0.0004681991570876468, 'samples': 13323264, 'steps': 26021, 'loss/train': 1.8215528726577759} +03/04/2022 20:02:26 - INFO - codeparrot_training - Step 26022: {'lr': 0.00046819656690118424, 'samples': 13323776, 'steps': 26022, 'loss/train': 3.4259986877441406} +03/04/2022 20:02:27 - INFO - codeparrot_training - Skipping example with length 840 (seq_length=1024) +03/04/2022 20:02:31 - INFO - codeparrot_training - Step 26023: {'lr': 0.00046819397661640563, 'samples': 13324288, 'steps': 26023, 'loss/train': 2.394054651260376} +03/04/2022 20:02:35 - INFO - codeparrot_training - Step 26024: {'lr': 0.0004681913862333122, 'samples': 13324800, 'steps': 26024, 'loss/train': 1.49062180519104} +03/04/2022 20:02:35 - INFO - codeparrot_training - Skipping example with length 207 (seq_length=1024) +03/04/2022 20:02:40 - INFO - codeparrot_training - Step 26025: {'lr': 0.0004681887957519051, 'samples': 13325312, 'steps': 26025, 'loss/train': 2.31349778175354} +03/04/2022 20:02:43 - INFO - codeparrot_training - Step 26026: {'lr': 0.00046818620517218544, 'samples': 13325824, 'steps': 26026, 'loss/train': 0.6781665086746216} +03/04/2022 20:02:43 - INFO - codeparrot_training - Skipping example with length 446 (seq_length=1024) +03/04/2022 20:02:48 - INFO - codeparrot_training - Step 26027: {'lr': 0.00046818361449415456, 'samples': 13326336, 'steps': 26027, 'loss/train': 2.374075412750244} +03/04/2022 20:02:52 - INFO - codeparrot_training - Step 26028: {'lr': 0.00046818102371781343, 'samples': 13326848, 'steps': 26028, 'loss/train': 2.1996102333068848} +03/04/2022 20:02:52 - INFO - codeparrot_training - Skipping example with length 563 (seq_length=1024) +03/04/2022 20:02:57 - INFO - codeparrot_training - Step 26029: {'lr': 0.0004681784328431633, 'samples': 13327360, 'steps': 26029, 'loss/train': 1.759771704673767} +03/04/2022 20:03:00 - INFO - codeparrot_training - Step 26030: {'lr': 0.0004681758418702054, 'samples': 13327872, 'steps': 26030, 'loss/train': 1.7152024507522583} +03/04/2022 20:03:00 - INFO - codeparrot_training - Skipping example with length 165 (seq_length=1024) +03/04/2022 20:03:05 - INFO - codeparrot_training - Step 26031: {'lr': 0.0004681732507989408, 'samples': 13328384, 'steps': 26031, 'loss/train': 2.422318458557129} +03/04/2022 20:03:09 - INFO - codeparrot_training - Step 26032: {'lr': 0.00046817065962937067, 'samples': 13328896, 'steps': 26032, 'loss/train': 1.7547481060028076} +03/04/2022 20:03:09 - INFO - codeparrot_training - Skipping example with length 276 (seq_length=1024) +03/04/2022 20:03:14 - INFO - codeparrot_training - Step 26033: {'lr': 0.00046816806836149624, 'samples': 13329408, 'steps': 26033, 'loss/train': 1.7028642892837524} +03/04/2022 20:03:17 - INFO - codeparrot_training - Step 26034: {'lr': 0.00046816547699531866, 'samples': 13329920, 'steps': 26034, 'loss/train': 1.9825752973556519} +03/04/2022 20:03:17 - INFO - codeparrot_training - Skipping example with length 909 (seq_length=1024) +03/04/2022 20:03:24 - INFO - codeparrot_training - Step 26035: {'lr': 0.000468162885530839, 'samples': 13330432, 'steps': 26035, 'loss/train': 1.625740647315979} +03/04/2022 20:03:27 - INFO - codeparrot_training - Step 26036: {'lr': 0.00046816029396805857, 'samples': 13330944, 'steps': 26036, 'loss/train': 1.21263587474823} +03/04/2022 20:03:30 - INFO - codeparrot_training - Step 26037: {'lr': 0.00046815770230697844, 'samples': 13331456, 'steps': 26037, 'loss/train': 1.2467689514160156} +03/04/2022 20:03:30 - INFO - codeparrot_training - Skipping example with length 26 (seq_length=1024) +03/04/2022 20:03:35 - INFO - codeparrot_training - Step 26038: {'lr': 0.0004681551105475999, 'samples': 13331968, 'steps': 26038, 'loss/train': 1.9564452171325684} +03/04/2022 20:03:39 - INFO - codeparrot_training - Step 26039: {'lr': 0.0004681525186899239, 'samples': 13332480, 'steps': 26039, 'loss/train': 0.5865878462791443} +03/04/2022 20:03:39 - INFO - codeparrot_training - Skipping example with length 1004 (seq_length=1024) +03/04/2022 20:03:44 - INFO - codeparrot_training - Step 26040: {'lr': 0.00046814992673395185, 'samples': 13332992, 'steps': 26040, 'loss/train': 1.656312346458435} +03/04/2022 20:03:47 - INFO - codeparrot_training - Step 26041: {'lr': 0.0004681473346796848, 'samples': 13333504, 'steps': 26041, 'loss/train': 1.2797900438308716} +03/04/2022 20:03:48 - INFO - codeparrot_training - Skipping example with length 607 (seq_length=1024) +03/04/2022 20:03:52 - INFO - codeparrot_training - Step 26042: {'lr': 0.0004681447425271239, 'samples': 13334016, 'steps': 26042, 'loss/train': 1.9386578798294067} +03/04/2022 20:03:56 - INFO - codeparrot_training - Step 26043: {'lr': 0.0004681421502762704, 'samples': 13334528, 'steps': 26043, 'loss/train': 1.8510576486587524} +03/04/2022 20:03:56 - INFO - codeparrot_training - Skipping example with length 644 (seq_length=1024) +03/04/2022 20:04:01 - INFO - codeparrot_training - Step 26044: {'lr': 0.0004681395579271253, 'samples': 13335040, 'steps': 26044, 'loss/train': 1.896577000617981} +03/04/2022 20:04:04 - INFO - codeparrot_training - Step 26045: {'lr': 0.00046813696547969, 'samples': 13335552, 'steps': 26045, 'loss/train': 1.9619214534759521} +03/04/2022 20:04:06 - INFO - codeparrot_training - Skipping example with length 160 (seq_length=1024) +03/04/2022 20:04:10 - INFO - codeparrot_training - Step 26046: {'lr': 0.00046813437293396543, 'samples': 13336064, 'steps': 26046, 'loss/train': 1.7232069969177246} +03/04/2022 20:04:13 - INFO - codeparrot_training - Step 26047: {'lr': 0.000468131780289953, 'samples': 13336576, 'steps': 26047, 'loss/train': 2.3391799926757812} +03/04/2022 20:04:15 - INFO - codeparrot_training - Skipping example with length 433 (seq_length=1024) +03/04/2022 20:04:18 - INFO - codeparrot_training - Step 26048: {'lr': 0.00046812918754765364, 'samples': 13337088, 'steps': 26048, 'loss/train': 1.3230880498886108} +03/04/2022 20:04:21 - INFO - codeparrot_training - Step 26049: {'lr': 0.00046812659470706877, 'samples': 13337600, 'steps': 26049, 'loss/train': 2.23012638092041} +03/04/2022 20:04:23 - INFO - codeparrot_training - Skipping example with length 621 (seq_length=1024) +03/04/2022 20:04:27 - INFO - codeparrot_training - Step 26050: {'lr': 0.0004681240017681993, 'samples': 13338112, 'steps': 26050, 'loss/train': 1.5336859226226807} +03/04/2022 20:04:30 - INFO - codeparrot_training - Step 26051: {'lr': 0.00046812140873104657, 'samples': 13338624, 'steps': 26051, 'loss/train': 1.8654184341430664} +03/04/2022 20:04:31 - INFO - codeparrot_training - Skipping example with length 640 (seq_length=1024) +03/04/2022 20:04:35 - INFO - codeparrot_training - Step 26052: {'lr': 0.00046811881559561167, 'samples': 13339136, 'steps': 26052, 'loss/train': 1.5603783130645752} +03/04/2022 20:04:38 - INFO - codeparrot_training - Step 26053: {'lr': 0.00046811622236189585, 'samples': 13339648, 'steps': 26053, 'loss/train': 1.9893840551376343} +03/04/2022 20:04:40 - INFO - codeparrot_training - Skipping example with length 589 (seq_length=1024) +03/04/2022 20:04:43 - INFO - codeparrot_training - Step 26054: {'lr': 0.0004681136290299002, 'samples': 13340160, 'steps': 26054, 'loss/train': 1.6339237689971924} +03/04/2022 20:04:47 - INFO - codeparrot_training - Step 26055: {'lr': 0.00046811103559962585, 'samples': 13340672, 'steps': 26055, 'loss/train': 1.1602602005004883} +03/04/2022 20:04:49 - INFO - codeparrot_training - Skipping example with length 522 (seq_length=1024) +03/04/2022 20:04:52 - INFO - codeparrot_training - Step 26056: {'lr': 0.00046810844207107415, 'samples': 13341184, 'steps': 26056, 'loss/train': 1.939034104347229} +03/04/2022 20:04:55 - INFO - codeparrot_training - Step 26057: {'lr': 0.0004681058484442461, 'samples': 13341696, 'steps': 26057, 'loss/train': 2.1607604026794434} +03/04/2022 20:04:57 - INFO - codeparrot_training - Skipping example with length 684 (seq_length=1024) +03/04/2022 20:05:01 - INFO - codeparrot_training - Step 26058: {'lr': 0.00046810325471914295, 'samples': 13342208, 'steps': 26058, 'loss/train': 1.9978156089782715} +03/04/2022 20:05:04 - INFO - codeparrot_training - Step 26059: {'lr': 0.00046810066089576573, 'samples': 13342720, 'steps': 26059, 'loss/train': 2.2285096645355225} +03/04/2022 20:05:06 - INFO - codeparrot_training - Skipping example with length 763 (seq_length=1024) +03/04/2022 20:05:09 - INFO - codeparrot_training - Step 26060: {'lr': 0.00046809806697411583, 'samples': 13343232, 'steps': 26060, 'loss/train': 1.9420771598815918} +03/04/2022 20:05:12 - INFO - codeparrot_training - Step 26061: {'lr': 0.0004680954729541942, 'samples': 13343744, 'steps': 26061, 'loss/train': 1.859422206878662} +03/04/2022 20:05:15 - INFO - codeparrot_training - Skipping example with length 229 (seq_length=1024) +03/04/2022 20:05:18 - INFO - codeparrot_training - Step 26062: {'lr': 0.00046809287883600227, 'samples': 13344256, 'steps': 26062, 'loss/train': 2.1467933654785156} +03/04/2022 20:05:21 - INFO - codeparrot_training - Step 26063: {'lr': 0.00046809028461954093, 'samples': 13344768, 'steps': 26063, 'loss/train': 1.8985170125961304} +03/04/2022 20:05:24 - INFO - codeparrot_training - Step 26064: {'lr': 0.00046808769030481153, 'samples': 13345280, 'steps': 26064, 'loss/train': 2.155360221862793} +03/04/2022 20:05:24 - INFO - codeparrot_training - Skipping example with length 658 (seq_length=1024) +03/04/2022 20:05:29 - INFO - codeparrot_training - Step 26065: {'lr': 0.00046808509589181513, 'samples': 13345792, 'steps': 26065, 'loss/train': 2.5804591178894043} +03/04/2022 20:05:33 - INFO - codeparrot_training - Step 26066: {'lr': 0.00046808250138055305, 'samples': 13346304, 'steps': 26066, 'loss/train': 1.4153510332107544} +03/04/2022 20:05:33 - INFO - codeparrot_training - Skipping example with length 426 (seq_length=1024) +03/04/2022 20:05:38 - INFO - codeparrot_training - Step 26067: {'lr': 0.0004680799067710263, 'samples': 13346816, 'steps': 26067, 'loss/train': 1.696730136871338} +03/04/2022 20:05:41 - INFO - codeparrot_training - Step 26068: {'lr': 0.00046807731206323605, 'samples': 13347328, 'steps': 26068, 'loss/train': 1.9909054040908813} +03/04/2022 20:05:41 - INFO - codeparrot_training - Skipping example with length 468 (seq_length=1024) +03/04/2022 20:05:47 - INFO - codeparrot_training - Step 26069: {'lr': 0.00046807471725718357, 'samples': 13347840, 'steps': 26069, 'loss/train': 2.2206437587738037} +03/04/2022 20:05:50 - INFO - codeparrot_training - Step 26070: {'lr': 0.00046807212235287, 'samples': 13348352, 'steps': 26070, 'loss/train': 1.504632592201233} +03/04/2022 20:05:51 - INFO - codeparrot_training - Skipping example with length 263 (seq_length=1024) +03/04/2022 20:05:55 - INFO - codeparrot_training - Step 26071: {'lr': 0.0004680695273502965, 'samples': 13348864, 'steps': 26071, 'loss/train': 1.7281231880187988} +03/04/2022 20:05:58 - INFO - codeparrot_training - Step 26072: {'lr': 0.00046806693224946426, 'samples': 13349376, 'steps': 26072, 'loss/train': 1.9475656747817993} +03/04/2022 20:05:59 - INFO - codeparrot_training - Skipping example with length 233 (seq_length=1024) +03/04/2022 20:06:04 - INFO - codeparrot_training - Step 26073: {'lr': 0.00046806433705037445, 'samples': 13349888, 'steps': 26073, 'loss/train': 1.2138975858688354} +03/04/2022 20:06:07 - INFO - codeparrot_training - Step 26074: {'lr': 0.00046806174175302806, 'samples': 13350400, 'steps': 26074, 'loss/train': 1.6533374786376953} +03/04/2022 20:06:07 - INFO - codeparrot_training - Skipping example with length 281 (seq_length=1024) +03/04/2022 20:06:12 - INFO - codeparrot_training - Step 26075: {'lr': 0.00046805914635742656, 'samples': 13350912, 'steps': 26075, 'loss/train': 1.3776440620422363} +03/04/2022 20:06:15 - INFO - codeparrot_training - Step 26076: {'lr': 0.0004680565508635709, 'samples': 13351424, 'steps': 26076, 'loss/train': 1.4836182594299316} +03/04/2022 20:06:16 - INFO - codeparrot_training - Skipping example with length 914 (seq_length=1024) +03/04/2022 20:06:21 - INFO - codeparrot_training - Step 26077: {'lr': 0.00046805395527146237, 'samples': 13351936, 'steps': 26077, 'loss/train': 0.08600012958049774} +03/04/2022 20:06:24 - INFO - codeparrot_training - Step 26078: {'lr': 0.0004680513595811021, 'samples': 13352448, 'steps': 26078, 'loss/train': 2.2343058586120605} +03/04/2022 20:06:24 - INFO - codeparrot_training - Skipping example with length 477 (seq_length=1024) +03/04/2022 20:06:29 - INFO - codeparrot_training - Step 26079: {'lr': 0.0004680487637924912, 'samples': 13352960, 'steps': 26079, 'loss/train': 1.3945581912994385} +03/04/2022 20:06:32 - INFO - codeparrot_training - Step 26080: {'lr': 0.0004680461679056309, 'samples': 13353472, 'steps': 26080, 'loss/train': 1.9323657751083374} +03/04/2022 20:06:32 - INFO - codeparrot_training - Skipping example with length 674 (seq_length=1024) +03/04/2022 20:06:37 - INFO - codeparrot_training - Step 26081: {'lr': 0.00046804357192052246, 'samples': 13353984, 'steps': 26081, 'loss/train': 1.6812783479690552} +03/04/2022 20:06:40 - INFO - codeparrot_training - Step 26082: {'lr': 0.00046804097583716685, 'samples': 13354496, 'steps': 26082, 'loss/train': 1.9438518285751343} +03/04/2022 20:06:41 - INFO - codeparrot_training - Skipping example with length 899 (seq_length=1024) +03/04/2022 20:06:46 - INFO - codeparrot_training - Step 26083: {'lr': 0.0004680383796555654, 'samples': 13355008, 'steps': 26083, 'loss/train': 1.2943382263183594} +03/04/2022 20:06:49 - INFO - codeparrot_training - Step 26084: {'lr': 0.00046803578337571917, 'samples': 13355520, 'steps': 26084, 'loss/train': 1.0002782344818115} +03/04/2022 20:06:49 - INFO - codeparrot_training - Skipping example with length 280 (seq_length=1024) +03/04/2022 20:06:54 - INFO - codeparrot_training - Step 26085: {'lr': 0.00046803318699762937, 'samples': 13356032, 'steps': 26085, 'loss/train': 1.9299094676971436} +03/04/2022 20:06:57 - INFO - codeparrot_training - Step 26086: {'lr': 0.0004680305905212972, 'samples': 13356544, 'steps': 26086, 'loss/train': 0.6169664859771729} +03/04/2022 20:06:58 - INFO - codeparrot_training - Skipping example with length 693 (seq_length=1024) +03/04/2022 20:07:03 - INFO - codeparrot_training - Step 26087: {'lr': 0.0004680279939467238, 'samples': 13357056, 'steps': 26087, 'loss/train': 1.0394940376281738} +03/04/2022 20:07:06 - INFO - codeparrot_training - Step 26088: {'lr': 0.00046802539727391033, 'samples': 13357568, 'steps': 26088, 'loss/train': 0.7880287170410156} +03/04/2022 20:07:06 - INFO - codeparrot_training - Skipping example with length 87 (seq_length=1024) +03/04/2022 20:07:11 - INFO - codeparrot_training - Step 26089: {'lr': 0.0004680228005028581, 'samples': 13358080, 'steps': 26089, 'loss/train': 2.655686378479004} +03/04/2022 20:07:14 - INFO - codeparrot_training - Step 26090: {'lr': 0.000468020203633568, 'samples': 13358592, 'steps': 26090, 'loss/train': 2.0334312915802} +03/04/2022 20:07:15 - INFO - codeparrot_training - Skipping example with length 465 (seq_length=1024) +03/04/2022 20:07:20 - INFO - codeparrot_training - Step 26091: {'lr': 0.0004680176066660415, 'samples': 13359104, 'steps': 26091, 'loss/train': 1.6737325191497803} +03/04/2022 20:07:23 - INFO - codeparrot_training - Step 26092: {'lr': 0.00046801500960027957, 'samples': 13359616, 'steps': 26092, 'loss/train': 0.2667890787124634} +03/04/2022 20:07:23 - INFO - codeparrot_training - Skipping example with length 864 (seq_length=1024) +03/04/2022 20:07:28 - INFO - codeparrot_training - Step 26093: {'lr': 0.00046801241243628344, 'samples': 13360128, 'steps': 26093, 'loss/train': 1.9876550436019897} +03/04/2022 20:07:32 - INFO - codeparrot_training - Step 26094: {'lr': 0.00046800981517405426, 'samples': 13360640, 'steps': 26094, 'loss/train': 1.8479565382003784} +03/04/2022 20:07:32 - INFO - codeparrot_training - Skipping example with length 944 (seq_length=1024) +03/04/2022 20:07:37 - INFO - codeparrot_training - Step 26095: {'lr': 0.0004680072178135932, 'samples': 13361152, 'steps': 26095, 'loss/train': 2.7933173179626465} +03/04/2022 20:07:40 - INFO - codeparrot_training - Step 26096: {'lr': 0.00046800462035490156, 'samples': 13361664, 'steps': 26096, 'loss/train': 1.850113034248352} +03/04/2022 20:07:40 - INFO - codeparrot_training - Skipping example with length 661 (seq_length=1024) +03/04/2022 20:07:45 - INFO - codeparrot_training - Step 26097: {'lr': 0.0004680020227979803, 'samples': 13362176, 'steps': 26097, 'loss/train': 2.1076200008392334} +03/04/2022 20:07:48 - INFO - codeparrot_training - Step 26098: {'lr': 0.0004679994251428308, 'samples': 13362688, 'steps': 26098, 'loss/train': 1.461331844329834} +03/04/2022 20:07:49 - INFO - codeparrot_training - Skipping example with length 637 (seq_length=1024) +03/04/2022 20:07:54 - INFO - codeparrot_training - Step 26099: {'lr': 0.00046799682738945397, 'samples': 13363200, 'steps': 26099, 'loss/train': 2.1057004928588867} +03/04/2022 20:07:57 - INFO - codeparrot_training - Step 26100: {'lr': 0.00046799422953785124, 'samples': 13363712, 'steps': 26100, 'loss/train': 1.9739357233047485} +03/04/2022 20:07:57 - INFO - codeparrot_training - Skipping example with length 182 (seq_length=1024) +03/04/2022 20:08:02 - INFO - codeparrot_training - Step 26101: {'lr': 0.00046799163158802365, 'samples': 13364224, 'steps': 26101, 'loss/train': 2.557032346725464} +03/04/2022 20:08:05 - INFO - codeparrot_training - Step 26102: {'lr': 0.00046798903353997243, 'samples': 13364736, 'steps': 26102, 'loss/train': 1.5120562314987183} +03/04/2022 20:08:06 - INFO - codeparrot_training - Skipping example with length 481 (seq_length=1024) +03/04/2022 20:08:11 - INFO - codeparrot_training - Step 26103: {'lr': 0.0004679864353936987, 'samples': 13365248, 'steps': 26103, 'loss/train': 1.576947569847107} +03/04/2022 20:08:14 - INFO - codeparrot_training - Step 26104: {'lr': 0.0004679838371492036, 'samples': 13365760, 'steps': 26104, 'loss/train': 0.8888798356056213} +03/04/2022 20:08:14 - INFO - codeparrot_training - Skipping example with length 720 (seq_length=1024) +03/04/2022 20:08:19 - INFO - codeparrot_training - Step 26105: {'lr': 0.00046798123880648833, 'samples': 13366272, 'steps': 26105, 'loss/train': 1.4169363975524902} +03/04/2022 20:08:22 - INFO - codeparrot_training - Step 26106: {'lr': 0.0004679786403655542, 'samples': 13366784, 'steps': 26106, 'loss/train': 1.9095174074172974} +03/04/2022 20:08:23 - INFO - codeparrot_training - Skipping example with length 614 (seq_length=1024) +03/04/2022 20:08:27 - INFO - codeparrot_training - Step 26107: {'lr': 0.0004679760418264021, 'samples': 13367296, 'steps': 26107, 'loss/train': 1.3199622631072998} +03/04/2022 20:08:31 - INFO - codeparrot_training - Step 26108: {'lr': 0.00046797344318903343, 'samples': 13367808, 'steps': 26108, 'loss/train': 1.3211076259613037} +03/04/2022 20:08:31 - INFO - codeparrot_training - Skipping example with length 962 (seq_length=1024) +03/04/2022 20:08:36 - INFO - codeparrot_training - Step 26109: {'lr': 0.0004679708444534493, 'samples': 13368320, 'steps': 26109, 'loss/train': 1.430361032485962} +03/04/2022 20:08:39 - INFO - codeparrot_training - Step 26110: {'lr': 0.0004679682456196509, 'samples': 13368832, 'steps': 26110, 'loss/train': 1.5073020458221436} +03/04/2022 20:08:40 - INFO - codeparrot_training - Skipping example with length 984 (seq_length=1024) +03/04/2022 20:08:44 - INFO - codeparrot_training - Step 26111: {'lr': 0.0004679656466876393, 'samples': 13369344, 'steps': 26111, 'loss/train': 1.5049188137054443} +03/04/2022 20:08:47 - INFO - codeparrot_training - Step 26112: {'lr': 0.00046796304765741583, 'samples': 13369856, 'steps': 26112, 'loss/train': 2.1840643882751465} +03/04/2022 20:08:48 - INFO - codeparrot_training - Skipping example with length 660 (seq_length=1024) +03/04/2022 20:08:53 - INFO - codeparrot_training - Step 26113: {'lr': 0.00046796044852898144, 'samples': 13370368, 'steps': 26113, 'loss/train': 1.8755805492401123} +03/04/2022 20:08:56 - INFO - codeparrot_training - Step 26114: {'lr': 0.0004679578493023375, 'samples': 13370880, 'steps': 26114, 'loss/train': 2.1485798358917236} +03/04/2022 20:08:56 - INFO - codeparrot_training - Skipping example with length 545 (seq_length=1024) +03/04/2022 20:09:01 - INFO - codeparrot_training - Step 26115: {'lr': 0.00046795524997748515, 'samples': 13371392, 'steps': 26115, 'loss/train': 1.7973297834396362} +03/04/2022 20:09:05 - INFO - codeparrot_training - Step 26116: {'lr': 0.0004679526505544256, 'samples': 13371904, 'steps': 26116, 'loss/train': 1.2531079053878784} +03/04/2022 20:09:06 - INFO - codeparrot_training - Skipping example with length 950 (seq_length=1024) +03/04/2022 20:09:10 - INFO - codeparrot_training - Step 26117: {'lr': 0.0004679500510331598, 'samples': 13372416, 'steps': 26117, 'loss/train': 3.1542937755584717} +03/04/2022 20:09:13 - INFO - codeparrot_training - Step 26118: {'lr': 0.00046794745141368917, 'samples': 13372928, 'steps': 26118, 'loss/train': 2.1721999645233154} +03/04/2022 20:09:15 - INFO - codeparrot_training - Skipping example with length 356 (seq_length=1024) +03/04/2022 20:09:18 - INFO - codeparrot_training - Step 26119: {'lr': 0.00046794485169601474, 'samples': 13373440, 'steps': 26119, 'loss/train': 1.8792463541030884} +03/04/2022 20:09:22 - INFO - codeparrot_training - Step 26120: {'lr': 0.00046794225188013773, 'samples': 13373952, 'steps': 26120, 'loss/train': 1.7419171333312988} +03/04/2022 20:09:23 - INFO - codeparrot_training - Skipping example with length 5 (seq_length=1024) +03/04/2022 20:09:27 - INFO - codeparrot_training - Step 26121: {'lr': 0.00046793965196605927, 'samples': 13374464, 'steps': 26121, 'loss/train': 1.6223589181900024} +03/04/2022 20:09:30 - INFO - codeparrot_training - Step 26122: {'lr': 0.00046793705195378066, 'samples': 13374976, 'steps': 26122, 'loss/train': 2.656970500946045} +03/04/2022 20:09:32 - INFO - codeparrot_training - Skipping example with length 676 (seq_length=1024) +03/04/2022 20:09:35 - INFO - codeparrot_training - Step 26123: {'lr': 0.0004679344518433029, 'samples': 13375488, 'steps': 26123, 'loss/train': 0.9795936942100525} +03/04/2022 20:09:39 - INFO - codeparrot_training - Step 26124: {'lr': 0.0004679318516346273, 'samples': 13376000, 'steps': 26124, 'loss/train': 1.3444304466247559} +03/04/2022 20:09:41 - INFO - codeparrot_training - Skipping example with length 946 (seq_length=1024) +03/04/2022 20:09:44 - INFO - codeparrot_training - Step 26125: {'lr': 0.0004679292513277549, 'samples': 13376512, 'steps': 26125, 'loss/train': 1.149139404296875} +03/04/2022 20:09:47 - INFO - codeparrot_training - Step 26126: {'lr': 0.0004679266509226869, 'samples': 13377024, 'steps': 26126, 'loss/train': 1.4356820583343506} +03/04/2022 20:09:49 - INFO - codeparrot_training - Skipping example with length 790 (seq_length=1024) +03/04/2022 20:09:52 - INFO - codeparrot_training - Step 26127: {'lr': 0.0004679240504194246, 'samples': 13377536, 'steps': 26127, 'loss/train': 1.981621265411377} +03/04/2022 20:09:56 - INFO - codeparrot_training - Step 26128: {'lr': 0.00046792144981796905, 'samples': 13378048, 'steps': 26128, 'loss/train': 1.70073664188385} +03/04/2022 20:09:58 - INFO - codeparrot_training - Skipping example with length 94 (seq_length=1024) +03/04/2022 20:10:01 - INFO - codeparrot_training - Step 26129: {'lr': 0.0004679188491183215, 'samples': 13378560, 'steps': 26129, 'loss/train': 2.0027880668640137} +03/04/2022 20:10:04 - INFO - codeparrot_training - Step 26130: {'lr': 0.00046791624832048307, 'samples': 13379072, 'steps': 26130, 'loss/train': 2.084019422531128} +03/04/2022 20:10:06 - INFO - codeparrot_training - Skipping example with length 826 (seq_length=1024) +03/04/2022 20:10:09 - INFO - codeparrot_training - Step 26131: {'lr': 0.0004679136474244549, 'samples': 13379584, 'steps': 26131, 'loss/train': 2.7600772380828857} +03/04/2022 20:10:13 - INFO - codeparrot_training - Step 26132: {'lr': 0.00046791104643023823, 'samples': 13380096, 'steps': 26132, 'loss/train': 0.683334231376648} +03/04/2022 20:10:15 - INFO - codeparrot_training - Skipping example with length 43 (seq_length=1024) +03/04/2022 20:10:18 - INFO - codeparrot_training - Step 26133: {'lr': 0.0004679084453378342, 'samples': 13380608, 'steps': 26133, 'loss/train': 1.2610054016113281} +03/04/2022 20:10:21 - INFO - codeparrot_training - Step 26134: {'lr': 0.00046790584414724404, 'samples': 13381120, 'steps': 26134, 'loss/train': 2.120995044708252} +03/04/2022 20:10:24 - INFO - codeparrot_training - Skipping example with length 170 (seq_length=1024) +03/04/2022 20:10:27 - INFO - codeparrot_training - Step 26135: {'lr': 0.0004679032428584687, 'samples': 13381632, 'steps': 26135, 'loss/train': 0.5777688026428223} +03/04/2022 20:10:30 - INFO - codeparrot_training - Step 26136: {'lr': 0.0004679006414715097, 'samples': 13382144, 'steps': 26136, 'loss/train': 1.680233120918274} +03/04/2022 20:10:32 - INFO - codeparrot_training - Skipping example with length 491 (seq_length=1024) +03/04/2022 20:10:35 - INFO - codeparrot_training - Step 26137: {'lr': 0.00046789803998636796, 'samples': 13382656, 'steps': 26137, 'loss/train': 1.557360053062439} +03/04/2022 20:10:38 - INFO - codeparrot_training - Step 26138: {'lr': 0.0004678954384030448, 'samples': 13383168, 'steps': 26138, 'loss/train': 2.390317440032959} +03/04/2022 20:10:41 - INFO - codeparrot_training - Skipping example with length 734 (seq_length=1024) +03/04/2022 20:10:44 - INFO - codeparrot_training - Step 26139: {'lr': 0.00046789283672154125, 'samples': 13383680, 'steps': 26139, 'loss/train': 3.7934465408325195} +03/04/2022 20:10:47 - INFO - codeparrot_training - Step 26140: {'lr': 0.00046789023494185855, 'samples': 13384192, 'steps': 26140, 'loss/train': 1.860734462738037} +03/04/2022 20:10:49 - INFO - codeparrot_training - Skipping example with length 880 (seq_length=1024) +03/04/2022 20:10:52 - INFO - codeparrot_training - Step 26141: {'lr': 0.0004678876330639978, 'samples': 13384704, 'steps': 26141, 'loss/train': 1.0154024362564087} +03/04/2022 20:10:55 - INFO - codeparrot_training - Step 26142: {'lr': 0.0004678850310879604, 'samples': 13385216, 'steps': 26142, 'loss/train': 1.0069926977157593} +03/04/2022 20:10:58 - INFO - codeparrot_training - Skipping example with length 928 (seq_length=1024) +03/04/2022 20:11:01 - INFO - codeparrot_training - Step 26143: {'lr': 0.0004678824290137473, 'samples': 13385728, 'steps': 26143, 'loss/train': 2.7796263694763184} +03/04/2022 20:11:04 - INFO - codeparrot_training - Step 26144: {'lr': 0.0004678798268413597, 'samples': 13386240, 'steps': 26144, 'loss/train': 1.9897270202636719} +03/04/2022 20:11:06 - INFO - codeparrot_training - Skipping example with length 377 (seq_length=1024) +03/04/2022 20:11:09 - INFO - codeparrot_training - Step 26145: {'lr': 0.00046787722457079887, 'samples': 13386752, 'steps': 26145, 'loss/train': 1.4299817085266113} +03/04/2022 20:11:12 - INFO - codeparrot_training - Step 26146: {'lr': 0.00046787462220206587, 'samples': 13387264, 'steps': 26146, 'loss/train': 2.2799880504608154} +03/04/2022 20:11:15 - INFO - codeparrot_training - Skipping example with length 724 (seq_length=1024) +03/04/2022 20:11:18 - INFO - codeparrot_training - Step 26147: {'lr': 0.00046787201973516195, 'samples': 13387776, 'steps': 26147, 'loss/train': 3.047572612762451} +03/04/2022 20:11:21 - INFO - codeparrot_training - Step 26148: {'lr': 0.00046786941717008823, 'samples': 13388288, 'steps': 26148, 'loss/train': 1.9131559133529663} +03/04/2022 20:11:24 - INFO - codeparrot_training - Step 26149: {'lr': 0.00046786681450684597, 'samples': 13388800, 'steps': 26149, 'loss/train': 2.2449686527252197} +03/04/2022 20:11:24 - INFO - codeparrot_training - Skipping example with length 308 (seq_length=1024) +03/04/2022 20:11:29 - INFO - codeparrot_training - Step 26150: {'lr': 0.00046786421174543625, 'samples': 13389312, 'steps': 26150, 'loss/train': 2.2072336673736572} +03/04/2022 20:11:32 - INFO - codeparrot_training - Skipping example with length 306 (seq_length=1024) +03/04/2022 20:11:35 - INFO - codeparrot_training - Step 26151: {'lr': 0.0004678616088858603, 'samples': 13389824, 'steps': 26151, 'loss/train': 1.7114315032958984} +03/04/2022 20:11:38 - INFO - codeparrot_training - Step 26152: {'lr': 0.0004678590059281193, 'samples': 13390336, 'steps': 26152, 'loss/train': 1.3850131034851074} +03/04/2022 20:11:41 - INFO - codeparrot_training - Step 26153: {'lr': 0.0004678564028722143, 'samples': 13390848, 'steps': 26153, 'loss/train': 1.811577320098877} +03/04/2022 20:11:41 - INFO - codeparrot_training - Skipping example with length 92 (seq_length=1024) +03/04/2022 20:11:46 - INFO - codeparrot_training - Step 26154: {'lr': 0.0004678537997181467, 'samples': 13391360, 'steps': 26154, 'loss/train': 2.5950026512145996} +03/04/2022 20:11:50 - INFO - codeparrot_training - Step 26155: {'lr': 0.00046785119646591746, 'samples': 13391872, 'steps': 26155, 'loss/train': 1.8332507610321045} +03/04/2022 20:11:51 - INFO - codeparrot_training - Skipping example with length 220 (seq_length=1024) +03/04/2022 20:11:55 - INFO - codeparrot_training - Step 26156: {'lr': 0.0004678485931155278, 'samples': 13392384, 'steps': 26156, 'loss/train': 1.0187549591064453} +03/04/2022 20:11:58 - INFO - codeparrot_training - Step 26157: {'lr': 0.000467845989666979, 'samples': 13392896, 'steps': 26157, 'loss/train': 1.6289196014404297} +03/04/2022 20:11:59 - INFO - codeparrot_training - Skipping example with length 457 (seq_length=1024) +03/04/2022 20:12:03 - INFO - codeparrot_training - Step 26158: {'lr': 0.0004678433861202721, 'samples': 13393408, 'steps': 26158, 'loss/train': 2.2954211235046387} +03/04/2022 20:12:06 - INFO - codeparrot_training - Step 26159: {'lr': 0.0004678407824754083, 'samples': 13393920, 'steps': 26159, 'loss/train': 1.9970982074737549} +03/04/2022 20:12:07 - INFO - codeparrot_training - Skipping example with length 668 (seq_length=1024) +03/04/2022 20:12:12 - INFO - codeparrot_training - Step 26160: {'lr': 0.00046783817873238885, 'samples': 13394432, 'steps': 26160, 'loss/train': 1.2233350276947021} +03/04/2022 20:12:15 - INFO - codeparrot_training - Step 26161: {'lr': 0.0004678355748912149, 'samples': 13394944, 'steps': 26161, 'loss/train': 2.286184787750244} +03/04/2022 20:12:16 - INFO - codeparrot_training - Skipping example with length 312 (seq_length=1024) +03/04/2022 20:12:20 - INFO - codeparrot_training - Step 26162: {'lr': 0.0004678329709518876, 'samples': 13395456, 'steps': 26162, 'loss/train': 1.8020402193069458} +03/04/2022 20:12:23 - INFO - codeparrot_training - Step 26163: {'lr': 0.0004678303669144081, 'samples': 13395968, 'steps': 26163, 'loss/train': 2.1140902042388916} +03/04/2022 20:12:24 - INFO - codeparrot_training - Skipping example with length 1015 (seq_length=1024) +03/04/2022 20:12:29 - INFO - codeparrot_training - Step 26164: {'lr': 0.0004678277627787776, 'samples': 13396480, 'steps': 26164, 'loss/train': 2.2568657398223877} +03/04/2022 20:12:32 - INFO - codeparrot_training - Step 26165: {'lr': 0.0004678251585449973, 'samples': 13396992, 'steps': 26165, 'loss/train': 1.4765487909317017} +03/04/2022 20:12:33 - INFO - codeparrot_training - Skipping example with length 448 (seq_length=1024) +03/04/2022 20:12:37 - INFO - codeparrot_training - Step 26166: {'lr': 0.0004678225542130683, 'samples': 13397504, 'steps': 26166, 'loss/train': 1.8485795259475708} +03/04/2022 20:12:40 - INFO - codeparrot_training - Step 26167: {'lr': 0.0004678199497829919, 'samples': 13398016, 'steps': 26167, 'loss/train': 1.8126024007797241} +03/04/2022 20:12:41 - INFO - codeparrot_training - Skipping example with length 608 (seq_length=1024) +03/04/2022 20:12:45 - INFO - codeparrot_training - Step 26168: {'lr': 0.0004678173452547691, 'samples': 13398528, 'steps': 26168, 'loss/train': 2.309438467025757} +03/04/2022 20:12:49 - INFO - codeparrot_training - Step 26169: {'lr': 0.00046781474062840126, 'samples': 13399040, 'steps': 26169, 'loss/train': 2.106666326522827} +03/04/2022 20:12:50 - INFO - codeparrot_training - Skipping example with length 386 (seq_length=1024) +03/04/2022 20:12:54 - INFO - codeparrot_training - Step 26170: {'lr': 0.0004678121359038894, 'samples': 13399552, 'steps': 26170, 'loss/train': 1.9035935401916504} +03/04/2022 20:12:57 - INFO - codeparrot_training - Step 26171: {'lr': 0.0004678095310812347, 'samples': 13400064, 'steps': 26171, 'loss/train': 2.0352907180786133} +03/04/2022 20:12:58 - INFO - codeparrot_training - Skipping example with length 175 (seq_length=1024) +03/04/2022 20:13:02 - INFO - codeparrot_training - Step 26172: {'lr': 0.0004678069261604384, 'samples': 13400576, 'steps': 26172, 'loss/train': 1.241420030593872} +03/04/2022 20:13:06 - INFO - codeparrot_training - Step 26173: {'lr': 0.00046780432114150173, 'samples': 13401088, 'steps': 26173, 'loss/train': 2.4093480110168457} +03/04/2022 20:13:07 - INFO - codeparrot_training - Skipping example with length 893 (seq_length=1024) +03/04/2022 20:13:11 - INFO - codeparrot_training - Step 26174: {'lr': 0.0004678017160244258, 'samples': 13401600, 'steps': 26174, 'loss/train': 1.6024255752563477} +03/04/2022 20:13:14 - INFO - codeparrot_training - Step 26175: {'lr': 0.00046779911080921166, 'samples': 13402112, 'steps': 26175, 'loss/train': 1.385125756263733} +03/04/2022 20:13:15 - INFO - codeparrot_training - Skipping example with length 781 (seq_length=1024) +03/04/2022 20:13:19 - INFO - codeparrot_training - Step 26176: {'lr': 0.00046779650549586075, 'samples': 13402624, 'steps': 26176, 'loss/train': 2.995293140411377} +03/04/2022 20:13:23 - INFO - codeparrot_training - Step 26177: {'lr': 0.000467793900084374, 'samples': 13403136, 'steps': 26177, 'loss/train': 1.6859033107757568} +03/04/2022 20:13:24 - INFO - codeparrot_training - Skipping example with length 550 (seq_length=1024) +03/04/2022 20:13:28 - INFO - codeparrot_training - Step 26178: {'lr': 0.0004677912945747527, 'samples': 13403648, 'steps': 26178, 'loss/train': 1.175668478012085} +03/04/2022 20:13:31 - INFO - codeparrot_training - Step 26179: {'lr': 0.000467788688966998, 'samples': 13404160, 'steps': 26179, 'loss/train': 0.9898331165313721} +03/04/2022 20:13:32 - INFO - codeparrot_training - Skipping example with length 788 (seq_length=1024) +03/04/2022 20:13:36 - INFO - codeparrot_training - Step 26180: {'lr': 0.00046778608326111104, 'samples': 13404672, 'steps': 26180, 'loss/train': 0.23517422378063202} +03/04/2022 20:13:40 - INFO - codeparrot_training - Step 26181: {'lr': 0.00046778347745709317, 'samples': 13405184, 'steps': 26181, 'loss/train': 2.474379062652588} +03/04/2022 20:13:41 - INFO - codeparrot_training - Skipping example with length 589 (seq_length=1024) +03/04/2022 20:13:45 - INFO - codeparrot_training - Step 26182: {'lr': 0.0004677808715549453, 'samples': 13405696, 'steps': 26182, 'loss/train': 2.048320770263672} +03/04/2022 20:13:48 - INFO - codeparrot_training - Step 26183: {'lr': 0.0004677782655546687, 'samples': 13406208, 'steps': 26183, 'loss/train': 2.0141658782958984} +03/04/2022 20:13:49 - INFO - codeparrot_training - Skipping example with length 887 (seq_length=1024) +03/04/2022 20:13:53 - INFO - codeparrot_training - Step 26184: {'lr': 0.00046777565945626463, 'samples': 13406720, 'steps': 26184, 'loss/train': 2.0025410652160645} +03/04/2022 20:13:57 - INFO - codeparrot_training - Step 26185: {'lr': 0.0004677730532597343, 'samples': 13407232, 'steps': 26185, 'loss/train': 1.4090379476547241} +03/04/2022 20:13:57 - INFO - codeparrot_training - Skipping example with length 124 (seq_length=1024) +03/04/2022 20:14:02 - INFO - codeparrot_training - Step 26186: {'lr': 0.00046777044696507867, 'samples': 13407744, 'steps': 26186, 'loss/train': 2.3672497272491455} +03/04/2022 20:14:05 - INFO - codeparrot_training - Step 26187: {'lr': 0.00046776784057229906, 'samples': 13408256, 'steps': 26187, 'loss/train': 2.6451077461242676} +03/04/2022 20:14:07 - INFO - codeparrot_training - Skipping example with length 611 (seq_length=1024) +03/04/2022 20:14:11 - INFO - codeparrot_training - Step 26188: {'lr': 0.00046776523408139666, 'samples': 13408768, 'steps': 26188, 'loss/train': 2.16650128364563} +03/04/2022 20:14:14 - INFO - codeparrot_training - Step 26189: {'lr': 0.0004677626274923726, 'samples': 13409280, 'steps': 26189, 'loss/train': 1.4568591117858887} +03/04/2022 20:14:15 - INFO - codeparrot_training - Skipping example with length 438 (seq_length=1024) +03/04/2022 20:14:19 - INFO - codeparrot_training - Step 26190: {'lr': 0.000467760020805228, 'samples': 13409792, 'steps': 26190, 'loss/train': 3.015784502029419} +03/04/2022 20:14:22 - INFO - codeparrot_training - Step 26191: {'lr': 0.0004677574140199642, 'samples': 13410304, 'steps': 26191, 'loss/train': 2.0492053031921387} +03/04/2022 20:14:24 - INFO - codeparrot_training - Skipping example with length 163 (seq_length=1024) +03/04/2022 20:14:27 - INFO - codeparrot_training - Step 26192: {'lr': 0.00046775480713658215, 'samples': 13410816, 'steps': 26192, 'loss/train': 1.7014299631118774} +03/04/2022 20:14:31 - INFO - codeparrot_training - Step 26193: {'lr': 0.00046775220015508325, 'samples': 13411328, 'steps': 26193, 'loss/train': 1.665052890777588} +03/04/2022 20:14:32 - INFO - codeparrot_training - Skipping example with length 145 (seq_length=1024) +03/04/2022 20:14:36 - INFO - codeparrot_training - Step 26194: {'lr': 0.0004677495930754685, 'samples': 13411840, 'steps': 26194, 'loss/train': 2.1564548015594482} +03/04/2022 20:14:39 - INFO - codeparrot_training - Step 26195: {'lr': 0.0004677469858977391, 'samples': 13412352, 'steps': 26195, 'loss/train': 1.7374427318572998} +03/04/2022 20:14:41 - INFO - codeparrot_training - Skipping example with length 423 (seq_length=1024) +03/04/2022 20:14:44 - INFO - codeparrot_training - Step 26196: {'lr': 0.00046774437862189634, 'samples': 13412864, 'steps': 26196, 'loss/train': 1.7755687236785889} +03/04/2022 20:14:47 - INFO - codeparrot_training - Step 26197: {'lr': 0.00046774177124794136, 'samples': 13413376, 'steps': 26197, 'loss/train': 2.2434895038604736} +03/04/2022 20:14:49 - INFO - codeparrot_training - Skipping example with length 434 (seq_length=1024) +03/04/2022 20:14:53 - INFO - codeparrot_training - Step 26198: {'lr': 0.00046773916377587524, 'samples': 13413888, 'steps': 26198, 'loss/train': 1.881919264793396} +03/04/2022 20:14:56 - INFO - codeparrot_training - Step 26199: {'lr': 0.00046773655620569924, 'samples': 13414400, 'steps': 26199, 'loss/train': 2.724806785583496} +03/04/2022 20:14:58 - INFO - codeparrot_training - Skipping example with length 930 (seq_length=1024) +03/04/2022 20:15:01 - INFO - codeparrot_training - Step 26200: {'lr': 0.0004677339485374145, 'samples': 13414912, 'steps': 26200, 'loss/train': 1.735425591468811} +03/04/2022 20:15:05 - INFO - codeparrot_training - Step 26201: {'lr': 0.00046773134077102217, 'samples': 13415424, 'steps': 26201, 'loss/train': 1.7192927598953247} +03/04/2022 20:15:07 - INFO - codeparrot_training - Skipping example with length 401 (seq_length=1024) +03/04/2022 20:15:10 - INFO - codeparrot_training - Step 26202: {'lr': 0.00046772873290652344, 'samples': 13415936, 'steps': 26202, 'loss/train': 1.2810925245285034} +03/04/2022 20:15:13 - INFO - codeparrot_training - Step 26203: {'lr': 0.0004677261249439196, 'samples': 13416448, 'steps': 26203, 'loss/train': 1.9988727569580078} +03/04/2022 20:15:15 - INFO - codeparrot_training - Skipping example with length 75 (seq_length=1024) +03/04/2022 20:15:18 - INFO - codeparrot_training - Step 26204: {'lr': 0.0004677235168832117, 'samples': 13416960, 'steps': 26204, 'loss/train': 1.6794885396957397} +03/04/2022 20:15:21 - INFO - codeparrot_training - Step 26205: {'lr': 0.0004677209087244009, 'samples': 13417472, 'steps': 26205, 'loss/train': 1.7517313957214355} +03/04/2022 20:15:23 - INFO - codeparrot_training - Skipping example with length 893 (seq_length=1024) +03/04/2022 20:15:27 - INFO - codeparrot_training - Step 26206: {'lr': 0.0004677183004674884, 'samples': 13417984, 'steps': 26206, 'loss/train': 1.9873133897781372} +03/04/2022 20:15:30 - INFO - codeparrot_training - Step 26207: {'lr': 0.00046771569211247546, 'samples': 13418496, 'steps': 26207, 'loss/train': 1.7855371236801147} +03/04/2022 20:15:32 - INFO - codeparrot_training - Skipping example with length 371 (seq_length=1024) +03/04/2022 20:15:35 - INFO - codeparrot_training - Step 26208: {'lr': 0.00046771308365936315, 'samples': 13419008, 'steps': 26208, 'loss/train': 6.60589599609375} +03/04/2022 20:15:38 - INFO - codeparrot_training - Step 26209: {'lr': 0.00046771047510815267, 'samples': 13419520, 'steps': 26209, 'loss/train': 0.8584921956062317} +03/04/2022 20:15:41 - INFO - codeparrot_training - Skipping example with length 825 (seq_length=1024) +03/04/2022 20:15:44 - INFO - codeparrot_training - Step 26210: {'lr': 0.0004677078664588452, 'samples': 13420032, 'steps': 26210, 'loss/train': 1.4996936321258545} +03/04/2022 20:15:47 - INFO - codeparrot_training - Step 26211: {'lr': 0.000467705257711442, 'samples': 13420544, 'steps': 26211, 'loss/train': 1.922645092010498} +03/04/2022 20:15:49 - INFO - codeparrot_training - Skipping example with length 466 (seq_length=1024) +03/04/2022 20:15:52 - INFO - codeparrot_training - Step 26212: {'lr': 0.0004677026488659441, 'samples': 13421056, 'steps': 26212, 'loss/train': 1.4073097705841064} +03/04/2022 20:15:55 - INFO - codeparrot_training - Step 26213: {'lr': 0.0004677000399223528, 'samples': 13421568, 'steps': 26213, 'loss/train': 1.3810511827468872} +03/04/2022 20:15:58 - INFO - codeparrot_training - Skipping example with length 382 (seq_length=1024) +03/04/2022 20:16:01 - INFO - codeparrot_training - Step 26214: {'lr': 0.0004676974308806692, 'samples': 13422080, 'steps': 26214, 'loss/train': 2.0653762817382812} +03/04/2022 20:16:04 - INFO - codeparrot_training - Step 26215: {'lr': 0.00046769482174089446, 'samples': 13422592, 'steps': 26215, 'loss/train': 1.5394554138183594} +03/04/2022 20:16:06 - INFO - codeparrot_training - Skipping example with length 578 (seq_length=1024) +03/04/2022 20:16:09 - INFO - codeparrot_training - Step 26216: {'lr': 0.00046769221250302984, 'samples': 13423104, 'steps': 26216, 'loss/train': 2.15212345123291} +03/04/2022 20:16:12 - INFO - codeparrot_training - Step 26217: {'lr': 0.0004676896031670764, 'samples': 13423616, 'steps': 26217, 'loss/train': 3.625394582748413} +03/04/2022 20:16:15 - INFO - codeparrot_training - Skipping example with length 190 (seq_length=1024) +03/04/2022 20:16:17 - INFO - codeparrot_training - Step 26218: {'lr': 0.00046768699373303546, 'samples': 13424128, 'steps': 26218, 'loss/train': 1.6522254943847656} +03/04/2022 20:16:21 - INFO - codeparrot_training - Step 26219: {'lr': 0.00046768438420090807, 'samples': 13424640, 'steps': 26219, 'loss/train': 1.5275942087173462} +03/04/2022 20:16:23 - INFO - codeparrot_training - Skipping example with length 685 (seq_length=1024) +03/04/2022 20:16:26 - INFO - codeparrot_training - Step 26220: {'lr': 0.0004676817745706955, 'samples': 13425152, 'steps': 26220, 'loss/train': 1.2405812740325928} +03/04/2022 20:16:29 - INFO - codeparrot_training - Step 26221: {'lr': 0.0004676791648423989, 'samples': 13425664, 'steps': 26221, 'loss/train': 2.0045218467712402} +03/04/2022 20:16:31 - INFO - codeparrot_training - Skipping example with length 901 (seq_length=1024) +03/04/2022 20:16:34 - INFO - codeparrot_training - Step 26222: {'lr': 0.00046767655501601935, 'samples': 13426176, 'steps': 26222, 'loss/train': 1.1902238130569458} +03/04/2022 20:16:37 - INFO - codeparrot_training - Step 26223: {'lr': 0.0004676739450915581, 'samples': 13426688, 'steps': 26223, 'loss/train': 1.1171594858169556} +03/04/2022 20:16:40 - INFO - codeparrot_training - Skipping example with length 115 (seq_length=1024) +03/04/2022 20:16:43 - INFO - codeparrot_training - Step 26224: {'lr': 0.0004676713350690164, 'samples': 13427200, 'steps': 26224, 'loss/train': 1.0749845504760742} +03/04/2022 20:16:46 - INFO - codeparrot_training - Step 26225: {'lr': 0.0004676687249483953, 'samples': 13427712, 'steps': 26225, 'loss/train': 0.5352584719657898} +03/04/2022 20:16:48 - INFO - codeparrot_training - Skipping example with length 698 (seq_length=1024) +03/04/2022 20:16:52 - INFO - codeparrot_training - Step 26226: {'lr': 0.0004676661147296961, 'samples': 13428224, 'steps': 26226, 'loss/train': 1.7981233596801758} +03/04/2022 20:16:55 - INFO - codeparrot_training - Step 26227: {'lr': 0.00046766350441291985, 'samples': 13428736, 'steps': 26227, 'loss/train': 2.2609095573425293} +03/04/2022 20:16:58 - INFO - codeparrot_training - Step 26228: {'lr': 0.00046766089399806775, 'samples': 13429248, 'steps': 26228, 'loss/train': 0.5924762487411499} +03/04/2022 20:16:59 - INFO - codeparrot_training - Skipping example with length 869 (seq_length=1024) +03/04/2022 20:17:04 - INFO - codeparrot_training - Step 26229: {'lr': 0.0004676582834851411, 'samples': 13429760, 'steps': 26229, 'loss/train': 2.2630815505981445} +03/04/2022 20:17:07 - INFO - codeparrot_training - Step 26230: {'lr': 0.0004676556728741409, 'samples': 13430272, 'steps': 26230, 'loss/train': 1.156329870223999} +03/04/2022 20:17:09 - INFO - codeparrot_training - Skipping example with length 617 (seq_length=1024) +03/04/2022 20:17:12 - INFO - codeparrot_training - Step 26231: {'lr': 0.0004676530621650685, 'samples': 13430784, 'steps': 26231, 'loss/train': 2.157087564468384} +03/04/2022 20:17:15 - INFO - codeparrot_training - Step 26232: {'lr': 0.00046765045135792495, 'samples': 13431296, 'steps': 26232, 'loss/train': 1.550511360168457} +03/04/2022 20:17:17 - INFO - codeparrot_training - Skipping example with length 34 (seq_length=1024) +03/04/2022 20:17:21 - INFO - codeparrot_training - Step 26233: {'lr': 0.00046764784045271146, 'samples': 13431808, 'steps': 26233, 'loss/train': 1.531336784362793} +03/04/2022 20:17:24 - INFO - codeparrot_training - Step 26234: {'lr': 0.0004676452294494292, 'samples': 13432320, 'steps': 26234, 'loss/train': 1.3884377479553223} +03/04/2022 20:17:26 - INFO - codeparrot_training - Skipping example with length 1008 (seq_length=1024) +03/04/2022 20:17:29 - INFO - codeparrot_training - Step 26235: {'lr': 0.00046764261834807944, 'samples': 13432832, 'steps': 26235, 'loss/train': 2.040224075317383} +03/04/2022 20:17:32 - INFO - codeparrot_training - Step 26236: {'lr': 0.0004676400071486632, 'samples': 13433344, 'steps': 26236, 'loss/train': 1.316050410270691} +03/04/2022 20:17:34 - INFO - codeparrot_training - Skipping example with length 558 (seq_length=1024) +03/04/2022 20:17:37 - INFO - codeparrot_training - Step 26237: {'lr': 0.0004676373958511817, 'samples': 13433856, 'steps': 26237, 'loss/train': 1.649372935295105} +03/04/2022 20:17:41 - INFO - codeparrot_training - Step 26238: {'lr': 0.00046763478445563617, 'samples': 13434368, 'steps': 26238, 'loss/train': 2.3910179138183594} +03/04/2022 20:17:43 - INFO - codeparrot_training - Skipping example with length 466 (seq_length=1024) +03/04/2022 20:17:46 - INFO - codeparrot_training - Step 26239: {'lr': 0.0004676321729620278, 'samples': 13434880, 'steps': 26239, 'loss/train': 1.8172529935836792} +03/04/2022 20:17:49 - INFO - codeparrot_training - Step 26240: {'lr': 0.0004676295613703577, 'samples': 13435392, 'steps': 26240, 'loss/train': 2.072261095046997} +03/04/2022 20:17:51 - INFO - codeparrot_training - Skipping example with length 840 (seq_length=1024) +03/04/2022 20:17:54 - INFO - codeparrot_training - Step 26241: {'lr': 0.00046762694968062706, 'samples': 13435904, 'steps': 26241, 'loss/train': 1.994399070739746} +03/04/2022 20:17:58 - INFO - codeparrot_training - Step 26242: {'lr': 0.0004676243378928371, 'samples': 13436416, 'steps': 26242, 'loss/train': 2.1013505458831787} +03/04/2022 20:17:59 - INFO - codeparrot_training - Skipping example with length 472 (seq_length=1024) +03/04/2022 20:18:03 - INFO - codeparrot_training - Step 26243: {'lr': 0.000467621726006989, 'samples': 13436928, 'steps': 26243, 'loss/train': 1.686649203300476} +03/04/2022 20:18:06 - INFO - codeparrot_training - Step 26244: {'lr': 0.0004676191140230839, 'samples': 13437440, 'steps': 26244, 'loss/train': 0.7103327512741089} +03/04/2022 20:18:08 - INFO - codeparrot_training - Skipping example with length 627 (seq_length=1024) +03/04/2022 20:18:12 - INFO - codeparrot_training - Step 26245: {'lr': 0.0004676165019411229, 'samples': 13437952, 'steps': 26245, 'loss/train': 2.7181527614593506} +03/04/2022 20:18:15 - INFO - codeparrot_training - Step 26246: {'lr': 0.00046761388976110737, 'samples': 13438464, 'steps': 26246, 'loss/train': 2.2713632583618164} +03/04/2022 20:18:17 - INFO - codeparrot_training - Skipping example with length 682 (seq_length=1024) +03/04/2022 20:18:20 - INFO - codeparrot_training - Step 26247: {'lr': 0.00046761127748303833, 'samples': 13438976, 'steps': 26247, 'loss/train': 0.38205447793006897} +03/04/2022 20:18:23 - INFO - codeparrot_training - Step 26248: {'lr': 0.000467608665106917, 'samples': 13439488, 'steps': 26248, 'loss/train': 1.0538963079452515} +03/04/2022 20:18:26 - INFO - codeparrot_training - Skipping example with length 63 (seq_length=1024) +03/04/2022 20:18:29 - INFO - codeparrot_training - Step 26249: {'lr': 0.0004676060526327446, 'samples': 13440000, 'steps': 26249, 'loss/train': 2.1698355674743652} +03/04/2022 20:18:32 - INFO - codeparrot_training - Step 26250: {'lr': 0.00046760344006052223, 'samples': 13440512, 'steps': 26250, 'loss/train': 2.1941206455230713} +03/04/2022 20:18:35 - INFO - codeparrot_training - Skipping example with length 418 (seq_length=1024) +03/04/2022 20:18:37 - INFO - codeparrot_training - Step 26251: {'lr': 0.00046760082739025113, 'samples': 13441024, 'steps': 26251, 'loss/train': 2.372011184692383} +03/04/2022 20:18:40 - INFO - codeparrot_training - Step 26252: {'lr': 0.0004675982146219324, 'samples': 13441536, 'steps': 26252, 'loss/train': 1.7368398904800415} +03/04/2022 20:18:43 - INFO - codeparrot_training - Skipping example with length 600 (seq_length=1024) +03/04/2022 20:18:45 - INFO - codeparrot_training - Step 26253: {'lr': 0.00046759560175556737, 'samples': 13442048, 'steps': 26253, 'loss/train': 2.2754759788513184} +03/04/2022 20:18:49 - INFO - codeparrot_training - Step 26254: {'lr': 0.0004675929887911571, 'samples': 13442560, 'steps': 26254, 'loss/train': 1.985810399055481} +03/04/2022 20:18:51 - INFO - codeparrot_training - Skipping example with length 440 (seq_length=1024) +03/04/2022 20:18:54 - INFO - codeparrot_training - Step 26255: {'lr': 0.0004675903757287027, 'samples': 13443072, 'steps': 26255, 'loss/train': 1.289853811264038} +03/04/2022 20:18:58 - INFO - codeparrot_training - Step 26256: {'lr': 0.0004675877625682055, 'samples': 13443584, 'steps': 26256, 'loss/train': 0.40469056367874146} +03/04/2022 20:19:01 - INFO - codeparrot_training - Step 26257: {'lr': 0.00046758514930966664, 'samples': 13444096, 'steps': 26257, 'loss/train': 1.4632779359817505} +03/04/2022 20:19:01 - INFO - codeparrot_training - Skipping example with length 565 (seq_length=1024) +03/04/2022 20:19:06 - INFO - codeparrot_training - Step 26258: {'lr': 0.0004675825359530872, 'samples': 13444608, 'steps': 26258, 'loss/train': 1.2530070543289185} +03/04/2022 20:19:09 - INFO - codeparrot_training - Step 26259: {'lr': 0.0004675799224984685, 'samples': 13445120, 'steps': 26259, 'loss/train': 1.3655645847320557} +03/04/2022 20:19:10 - INFO - codeparrot_training - Skipping example with length 408 (seq_length=1024) +03/04/2022 20:19:14 - INFO - codeparrot_training - Step 26260: {'lr': 0.00046757730894581164, 'samples': 13445632, 'steps': 26260, 'loss/train': 1.6445448398590088} +03/04/2022 20:19:17 - INFO - codeparrot_training - Step 26261: {'lr': 0.00046757469529511777, 'samples': 13446144, 'steps': 26261, 'loss/train': 2.1156513690948486} +03/04/2022 20:19:18 - INFO - codeparrot_training - Skipping example with length 533 (seq_length=1024) +03/04/2022 20:19:23 - INFO - codeparrot_training - Step 26262: {'lr': 0.0004675720815463881, 'samples': 13446656, 'steps': 26262, 'loss/train': 2.049163579940796} +03/04/2022 20:19:26 - INFO - codeparrot_training - Step 26263: {'lr': 0.00046756946769962375, 'samples': 13447168, 'steps': 26263, 'loss/train': 0.6978371143341064} +03/04/2022 20:19:27 - INFO - codeparrot_training - Skipping example with length 47 (seq_length=1024) +03/04/2022 20:19:31 - INFO - codeparrot_training - Step 26264: {'lr': 0.000467566853754826, 'samples': 13447680, 'steps': 26264, 'loss/train': 1.615500569343567} +03/04/2022 20:19:34 - INFO - codeparrot_training - Step 26265: {'lr': 0.00046756423971199603, 'samples': 13448192, 'steps': 26265, 'loss/train': 2.5839827060699463} +03/04/2022 20:19:35 - INFO - codeparrot_training - Skipping example with length 423 (seq_length=1024) +03/04/2022 20:19:40 - INFO - codeparrot_training - Step 26266: {'lr': 0.0004675616255711349, 'samples': 13448704, 'steps': 26266, 'loss/train': 2.0033435821533203} +03/04/2022 20:19:43 - INFO - codeparrot_training - Step 26267: {'lr': 0.0004675590113322439, 'samples': 13449216, 'steps': 26267, 'loss/train': 1.0274652242660522} +03/04/2022 20:19:43 - INFO - codeparrot_training - Skipping example with length 701 (seq_length=1024) +03/04/2022 20:19:48 - INFO - codeparrot_training - Step 26268: {'lr': 0.00046755639699532414, 'samples': 13449728, 'steps': 26268, 'loss/train': 1.7288697957992554} +03/04/2022 20:19:51 - INFO - codeparrot_training - Step 26269: {'lr': 0.00046755378256037685, 'samples': 13450240, 'steps': 26269, 'loss/train': 2.107079267501831} +03/04/2022 20:19:52 - INFO - codeparrot_training - Skipping example with length 567 (seq_length=1024) +03/04/2022 20:19:56 - INFO - codeparrot_training - Step 26270: {'lr': 0.00046755116802740316, 'samples': 13450752, 'steps': 26270, 'loss/train': 2.227278470993042} +03/04/2022 20:20:00 - INFO - codeparrot_training - Step 26271: {'lr': 0.00046754855339640436, 'samples': 13451264, 'steps': 26271, 'loss/train': 1.3240013122558594} +03/04/2022 20:20:00 - INFO - codeparrot_training - Skipping example with length 665 (seq_length=1024) +03/04/2022 20:20:05 - INFO - codeparrot_training - Step 26272: {'lr': 0.00046754593866738144, 'samples': 13451776, 'steps': 26272, 'loss/train': 0.5232560634613037} +03/04/2022 20:20:08 - INFO - codeparrot_training - Step 26273: {'lr': 0.0004675433238403357, 'samples': 13452288, 'steps': 26273, 'loss/train': 1.8712949752807617} +03/04/2022 20:20:10 - INFO - codeparrot_training - Skipping example with length 397 (seq_length=1024) +03/04/2022 20:20:13 - INFO - codeparrot_training - Step 26274: {'lr': 0.0004675407089152683, 'samples': 13452800, 'steps': 26274, 'loss/train': 2.0998947620391846} +03/04/2022 20:20:17 - INFO - codeparrot_training - Step 26275: {'lr': 0.00046753809389218036, 'samples': 13453312, 'steps': 26275, 'loss/train': 1.2897672653198242} +03/04/2022 20:20:18 - INFO - codeparrot_training - Skipping example with length 44 (seq_length=1024) +03/04/2022 20:20:22 - INFO - codeparrot_training - Step 26276: {'lr': 0.0004675354787710732, 'samples': 13453824, 'steps': 26276, 'loss/train': 2.0904479026794434} +03/04/2022 20:20:25 - INFO - codeparrot_training - Step 26277: {'lr': 0.0004675328635519479, 'samples': 13454336, 'steps': 26277, 'loss/train': 1.9342377185821533} +03/04/2022 20:20:27 - INFO - codeparrot_training - Skipping example with length 155 (seq_length=1024) +03/04/2022 20:20:31 - INFO - codeparrot_training - Step 26278: {'lr': 0.0004675302482348056, 'samples': 13454848, 'steps': 26278, 'loss/train': 1.812187910079956} +03/04/2022 20:20:34 - INFO - codeparrot_training - Step 26279: {'lr': 0.00046752763281964757, 'samples': 13455360, 'steps': 26279, 'loss/train': 2.391221761703491} +03/04/2022 20:20:37 - INFO - codeparrot_training - Step 26280: {'lr': 0.0004675250173064749, 'samples': 13455872, 'steps': 26280, 'loss/train': 3.3023781776428223} +03/04/2022 20:20:37 - INFO - codeparrot_training - Skipping example with length 341 (seq_length=1024) +03/04/2022 20:20:42 - INFO - codeparrot_training - Step 26281: {'lr': 0.0004675224016952888, 'samples': 13456384, 'steps': 26281, 'loss/train': 1.9747198820114136} +03/04/2022 20:20:45 - INFO - codeparrot_training - Step 26282: {'lr': 0.00046751978598609056, 'samples': 13456896, 'steps': 26282, 'loss/train': 1.89765202999115} +03/04/2022 20:20:46 - INFO - codeparrot_training - Skipping example with length 59 (seq_length=1024) +03/04/2022 20:20:51 - INFO - codeparrot_training - Step 26283: {'lr': 0.00046751717017888116, 'samples': 13457408, 'steps': 26283, 'loss/train': 2.4452362060546875} +03/04/2022 20:20:54 - INFO - codeparrot_training - Step 26284: {'lr': 0.00046751455427366194, 'samples': 13457920, 'steps': 26284, 'loss/train': 2.1030516624450684} +03/04/2022 20:20:54 - INFO - codeparrot_training - Skipping example with length 77 (seq_length=1024) +03/04/2022 20:20:59 - INFO - codeparrot_training - Step 26285: {'lr': 0.00046751193827043405, 'samples': 13458432, 'steps': 26285, 'loss/train': 1.8388447761535645} +03/04/2022 20:21:02 - INFO - codeparrot_training - Step 26286: {'lr': 0.0004675093221691985, 'samples': 13458944, 'steps': 26286, 'loss/train': 2.0483851432800293} +03/04/2022 20:21:02 - INFO - codeparrot_training - Skipping example with length 548 (seq_length=1024) +03/04/2022 20:21:08 - INFO - codeparrot_training - Step 26287: {'lr': 0.0004675067059699567, 'samples': 13459456, 'steps': 26287, 'loss/train': 0.9844334125518799} +03/04/2022 20:21:11 - INFO - codeparrot_training - Step 26288: {'lr': 0.00046750408967270973, 'samples': 13459968, 'steps': 26288, 'loss/train': 1.2988200187683105} +03/04/2022 20:21:11 - INFO - codeparrot_training - Skipping example with length 714 (seq_length=1024) +03/04/2022 20:21:16 - INFO - codeparrot_training - Step 26289: {'lr': 0.0004675014732774588, 'samples': 13460480, 'steps': 26289, 'loss/train': 1.4580541849136353} +03/04/2022 20:21:19 - INFO - codeparrot_training - Step 26290: {'lr': 0.000467498856784205, 'samples': 13460992, 'steps': 26290, 'loss/train': 1.080168604850769} +03/04/2022 20:21:20 - INFO - codeparrot_training - Skipping example with length 894 (seq_length=1024) +03/04/2022 20:21:24 - INFO - codeparrot_training - Step 26291: {'lr': 0.0004674962401929496, 'samples': 13461504, 'steps': 26291, 'loss/train': 2.468744993209839} +03/04/2022 20:21:28 - INFO - codeparrot_training - Step 26292: {'lr': 0.0004674936235036938, 'samples': 13462016, 'steps': 26292, 'loss/train': 0.6624350547790527} +03/04/2022 20:21:29 - INFO - codeparrot_training - Skipping example with length 878 (seq_length=1024) +03/04/2022 20:21:33 - INFO - codeparrot_training - Step 26293: {'lr': 0.00046749100671643866, 'samples': 13462528, 'steps': 26293, 'loss/train': 2.2161104679107666} +03/04/2022 20:21:36 - INFO - codeparrot_training - Step 26294: {'lr': 0.00046748838983118546, 'samples': 13463040, 'steps': 26294, 'loss/train': 2.5154502391815186} +03/04/2022 20:21:37 - INFO - codeparrot_training - Skipping example with length 867 (seq_length=1024) +03/04/2022 20:21:41 - INFO - codeparrot_training - Step 26295: {'lr': 0.00046748577284793535, 'samples': 13463552, 'steps': 26295, 'loss/train': 1.425231695175171} +03/04/2022 20:21:45 - INFO - codeparrot_training - Step 26296: {'lr': 0.00046748315576668946, 'samples': 13464064, 'steps': 26296, 'loss/train': 1.9454972743988037} +03/04/2022 20:21:46 - INFO - codeparrot_training - Skipping example with length 600 (seq_length=1024) +03/04/2022 20:21:50 - INFO - codeparrot_training - Step 26297: {'lr': 0.0004674805385874491, 'samples': 13464576, 'steps': 26297, 'loss/train': 2.0682132244110107} +03/04/2022 20:21:53 - INFO - codeparrot_training - Step 26298: {'lr': 0.0004674779213102153, 'samples': 13465088, 'steps': 26298, 'loss/train': 1.9246418476104736} +03/04/2022 20:21:54 - INFO - codeparrot_training - Skipping example with length 27 (seq_length=1024) +03/04/2022 20:21:58 - INFO - codeparrot_training - Step 26299: {'lr': 0.00046747530393498934, 'samples': 13465600, 'steps': 26299, 'loss/train': 1.5850003957748413} +03/04/2022 20:22:01 - INFO - codeparrot_training - Step 26300: {'lr': 0.0004674726864617723, 'samples': 13466112, 'steps': 26300, 'loss/train': 2.080498456954956} +03/04/2022 20:22:07 - INFO - codeparrot_training - Step 26301: {'lr': 0.00046747006889056556, 'samples': 13466624, 'steps': 26301, 'loss/train': 2.675243616104126} +03/04/2022 20:22:10 - INFO - codeparrot_training - Step 26302: {'lr': 0.00046746745122137, 'samples': 13467136, 'steps': 26302, 'loss/train': 1.72771418094635} +03/04/2022 20:22:11 - INFO - codeparrot_training - Skipping example with length 725 (seq_length=1024) +03/04/2022 20:22:15 - INFO - codeparrot_training - Step 26303: {'lr': 0.000467464833454187, 'samples': 13467648, 'steps': 26303, 'loss/train': 2.593093156814575} +03/04/2022 20:22:18 - INFO - codeparrot_training - Step 26304: {'lr': 0.0004674622155890178, 'samples': 13468160, 'steps': 26304, 'loss/train': 1.752964735031128} +03/04/2022 20:22:20 - INFO - codeparrot_training - Skipping example with length 264 (seq_length=1024) +03/04/2022 20:22:24 - INFO - codeparrot_training - Step 26305: {'lr': 0.00046745959762586344, 'samples': 13468672, 'steps': 26305, 'loss/train': 1.8876134157180786} +03/04/2022 20:22:27 - INFO - codeparrot_training - Step 26306: {'lr': 0.0004674569795647251, 'samples': 13469184, 'steps': 26306, 'loss/train': 1.6933643817901611} +03/04/2022 20:22:28 - INFO - codeparrot_training - Skipping example with length 454 (seq_length=1024) +03/04/2022 20:22:32 - INFO - codeparrot_training - Step 26307: {'lr': 0.00046745436140560397, 'samples': 13469696, 'steps': 26307, 'loss/train': 2.034882068634033} +03/04/2022 20:22:35 - INFO - codeparrot_training - Step 26308: {'lr': 0.00046745174314850136, 'samples': 13470208, 'steps': 26308, 'loss/train': 2.1308157444000244} +03/04/2022 20:22:37 - INFO - codeparrot_training - Skipping example with length 763 (seq_length=1024) +03/04/2022 20:22:41 - INFO - codeparrot_training - Step 26309: {'lr': 0.00046744912479341826, 'samples': 13470720, 'steps': 26309, 'loss/train': 2.1435434818267822} +03/04/2022 20:22:44 - INFO - codeparrot_training - Step 26310: {'lr': 0.00046744650634035603, 'samples': 13471232, 'steps': 26310, 'loss/train': 1.6730660200119019} +03/04/2022 20:22:45 - INFO - codeparrot_training - Skipping example with length 9 (seq_length=1024) +03/04/2022 20:22:49 - INFO - codeparrot_training - Step 26311: {'lr': 0.0004674438877893157, 'samples': 13471744, 'steps': 26311, 'loss/train': 1.6656420230865479} +03/04/2022 20:22:52 - INFO - codeparrot_training - Step 26312: {'lr': 0.0004674412691402985, 'samples': 13472256, 'steps': 26312, 'loss/train': 2.204796552658081} +03/04/2022 20:22:53 - INFO - codeparrot_training - Skipping example with length 823 (seq_length=1024) +03/04/2022 20:22:57 - INFO - codeparrot_training - Step 26313: {'lr': 0.00046743865039330565, 'samples': 13472768, 'steps': 26313, 'loss/train': 1.7796519994735718} +03/04/2022 20:23:01 - INFO - codeparrot_training - Step 26314: {'lr': 0.00046743603154833827, 'samples': 13473280, 'steps': 26314, 'loss/train': 1.4883044958114624} +03/04/2022 20:23:02 - INFO - codeparrot_training - Skipping example with length 264 (seq_length=1024) +03/04/2022 20:23:06 - INFO - codeparrot_training - Step 26315: {'lr': 0.00046743341260539756, 'samples': 13473792, 'steps': 26315, 'loss/train': 1.938624382019043} +03/04/2022 20:23:09 - INFO - codeparrot_training - Step 26316: {'lr': 0.00046743079356448476, 'samples': 13474304, 'steps': 26316, 'loss/train': 1.7916821241378784} +03/04/2022 20:23:11 - INFO - codeparrot_training - Skipping example with length 693 (seq_length=1024) +03/04/2022 20:23:14 - INFO - codeparrot_training - Step 26317: {'lr': 0.000467428174425601, 'samples': 13474816, 'steps': 26317, 'loss/train': 0.30272534489631653} +03/04/2022 20:23:18 - INFO - codeparrot_training - Step 26318: {'lr': 0.0004674255551887474, 'samples': 13475328, 'steps': 26318, 'loss/train': 1.7239028215408325} +03/04/2022 20:23:19 - INFO - codeparrot_training - Skipping example with length 310 (seq_length=1024) +03/04/2022 20:23:23 - INFO - codeparrot_training - Step 26319: {'lr': 0.0004674229358539253, 'samples': 13475840, 'steps': 26319, 'loss/train': 1.121082067489624} +03/04/2022 20:23:26 - INFO - codeparrot_training - Step 26320: {'lr': 0.0004674203164211357, 'samples': 13476352, 'steps': 26320, 'loss/train': 2.201213836669922} +03/04/2022 20:23:27 - INFO - codeparrot_training - Skipping example with length 391 (seq_length=1024) +03/04/2022 20:23:31 - INFO - codeparrot_training - Step 26321: {'lr': 0.00046741769689037985, 'samples': 13476864, 'steps': 26321, 'loss/train': 2.1501524448394775} +03/04/2022 20:23:35 - INFO - codeparrot_training - Step 26322: {'lr': 0.0004674150772616589, 'samples': 13477376, 'steps': 26322, 'loss/train': 1.5515556335449219} +03/04/2022 20:23:36 - INFO - codeparrot_training - Skipping example with length 330 (seq_length=1024) +03/04/2022 20:23:40 - INFO - codeparrot_training - Step 26323: {'lr': 0.0004674124575349742, 'samples': 13477888, 'steps': 26323, 'loss/train': 1.6722420454025269} +03/04/2022 20:23:43 - INFO - codeparrot_training - Step 26324: {'lr': 0.00046740983771032674, 'samples': 13478400, 'steps': 26324, 'loss/train': 2.2152788639068604} +03/04/2022 20:23:45 - INFO - codeparrot_training - Skipping example with length 989 (seq_length=1024) +03/04/2022 20:23:48 - INFO - codeparrot_training - Step 26325: {'lr': 0.0004674072177877178, 'samples': 13478912, 'steps': 26325, 'loss/train': 1.9207843542099} +03/04/2022 20:23:52 - INFO - codeparrot_training - Step 26326: {'lr': 0.0004674045977671484, 'samples': 13479424, 'steps': 26326, 'loss/train': 1.6756205558776855} +03/04/2022 20:23:53 - INFO - codeparrot_training - Skipping example with length 860 (seq_length=1024) +03/04/2022 20:23:57 - INFO - codeparrot_training - Step 26327: {'lr': 0.00046740197764862, 'samples': 13479936, 'steps': 26327, 'loss/train': 1.6817835569381714} +03/04/2022 20:24:00 - INFO - codeparrot_training - Step 26328: {'lr': 0.00046739935743213344, 'samples': 13480448, 'steps': 26328, 'loss/train': 1.5723381042480469} +03/04/2022 20:24:02 - INFO - codeparrot_training - Skipping example with length 16 (seq_length=1024) +03/04/2022 20:24:05 - INFO - codeparrot_training - Step 26329: {'lr': 0.00046739673711769026, 'samples': 13480960, 'steps': 26329, 'loss/train': 1.9467318058013916} +03/04/2022 20:24:08 - INFO - codeparrot_training - Step 26330: {'lr': 0.0004673941167052914, 'samples': 13481472, 'steps': 26330, 'loss/train': 1.6885074377059937} +03/04/2022 20:24:10 - INFO - codeparrot_training - Skipping example with length 761 (seq_length=1024) +03/04/2022 20:24:14 - INFO - codeparrot_training - Step 26331: {'lr': 0.0004673914961949381, 'samples': 13481984, 'steps': 26331, 'loss/train': 2.7198257446289062} +03/04/2022 20:24:17 - INFO - codeparrot_training - Step 26332: {'lr': 0.0004673888755866316, 'samples': 13482496, 'steps': 26332, 'loss/train': 2.0507190227508545} +03/04/2022 20:24:19 - INFO - codeparrot_training - Skipping example with length 125 (seq_length=1024) +03/04/2022 20:24:22 - INFO - codeparrot_training - Step 26333: {'lr': 0.0004673862548803729, 'samples': 13483008, 'steps': 26333, 'loss/train': 2.0036942958831787} +03/04/2022 20:24:25 - INFO - codeparrot_training - Step 26334: {'lr': 0.0004673836340761634, 'samples': 13483520, 'steps': 26334, 'loss/train': 1.7425588369369507} +03/04/2022 20:24:27 - INFO - codeparrot_training - Skipping example with length 902 (seq_length=1024) +03/04/2022 20:24:31 - INFO - codeparrot_training - Step 26335: {'lr': 0.00046738101317400415, 'samples': 13484032, 'steps': 26335, 'loss/train': 1.3567665815353394} +03/04/2022 20:24:34 - INFO - codeparrot_training - Step 26336: {'lr': 0.00046737839217389645, 'samples': 13484544, 'steps': 26336, 'loss/train': 1.6014015674591064} +03/04/2022 20:24:36 - INFO - codeparrot_training - Skipping example with length 260 (seq_length=1024) +03/04/2022 20:24:39 - INFO - codeparrot_training - Step 26337: {'lr': 0.0004673757710758413, 'samples': 13485056, 'steps': 26337, 'loss/train': 1.9751358032226562} +03/04/2022 20:24:42 - INFO - codeparrot_training - Step 26338: {'lr': 0.00046737314987984, 'samples': 13485568, 'steps': 26338, 'loss/train': 1.940842628479004} +03/04/2022 20:24:44 - INFO - codeparrot_training - Skipping example with length 673 (seq_length=1024) +03/04/2022 20:24:47 - INFO - codeparrot_training - Step 26339: {'lr': 0.0004673705285858938, 'samples': 13486080, 'steps': 26339, 'loss/train': 1.567187786102295} +03/04/2022 20:24:51 - INFO - codeparrot_training - Step 26340: {'lr': 0.00046736790719400373, 'samples': 13486592, 'steps': 26340, 'loss/train': 2.185310125350952} +03/04/2022 20:24:52 - INFO - codeparrot_training - Skipping example with length 86 (seq_length=1024) +03/04/2022 20:24:56 - INFO - codeparrot_training - Step 26341: {'lr': 0.000467365285704171, 'samples': 13487104, 'steps': 26341, 'loss/train': 1.4059953689575195} +03/04/2022 20:24:59 - INFO - codeparrot_training - Step 26342: {'lr': 0.00046736266411639694, 'samples': 13487616, 'steps': 26342, 'loss/train': 1.407268762588501} +03/04/2022 20:25:00 - INFO - codeparrot_training - Skipping example with length 401 (seq_length=1024) +03/04/2022 20:25:04 - INFO - codeparrot_training - Step 26343: {'lr': 0.00046736004243068255, 'samples': 13488128, 'steps': 26343, 'loss/train': 2.755145311355591} +03/04/2022 20:25:07 - INFO - codeparrot_training - Step 26344: {'lr': 0.00046735742064702904, 'samples': 13488640, 'steps': 26344, 'loss/train': 1.3670215606689453} +03/04/2022 20:25:09 - INFO - codeparrot_training - Skipping example with length 545 (seq_length=1024) +03/04/2022 20:25:13 - INFO - codeparrot_training - Step 26345: {'lr': 0.00046735479876543765, 'samples': 13489152, 'steps': 26345, 'loss/train': 1.0788187980651855} +03/04/2022 20:25:16 - INFO - codeparrot_training - Step 26346: {'lr': 0.00046735217678590957, 'samples': 13489664, 'steps': 26346, 'loss/train': 0.4471122622489929} +03/04/2022 20:25:17 - INFO - codeparrot_training - Skipping example with length 263 (seq_length=1024) +03/04/2022 20:25:21 - INFO - codeparrot_training - Step 26347: {'lr': 0.00046734955470844594, 'samples': 13490176, 'steps': 26347, 'loss/train': 2.2048699855804443} +03/04/2022 20:25:24 - INFO - codeparrot_training - Step 26348: {'lr': 0.00046734693253304795, 'samples': 13490688, 'steps': 26348, 'loss/train': 0.21905824542045593} +03/04/2022 20:25:26 - INFO - codeparrot_training - Skipping example with length 809 (seq_length=1024) +03/04/2022 20:25:29 - INFO - codeparrot_training - Step 26349: {'lr': 0.0004673443102597168, 'samples': 13491200, 'steps': 26349, 'loss/train': 1.931807041168213} +03/04/2022 20:25:33 - INFO - codeparrot_training - Step 26350: {'lr': 0.00046734168788845363, 'samples': 13491712, 'steps': 26350, 'loss/train': 2.1042449474334717} +03/04/2022 20:25:34 - INFO - codeparrot_training - Skipping example with length 910 (seq_length=1024) +03/04/2022 20:25:38 - INFO - codeparrot_training - Step 26351: {'lr': 0.00046733906541925963, 'samples': 13492224, 'steps': 26351, 'loss/train': 1.430544376373291} +03/04/2022 20:25:41 - INFO - codeparrot_training - Step 26352: {'lr': 0.00046733644285213604, 'samples': 13492736, 'steps': 26352, 'loss/train': 1.8092010021209717} +03/04/2022 20:25:42 - INFO - codeparrot_training - Skipping example with length 489 (seq_length=1024) +03/04/2022 20:25:46 - INFO - codeparrot_training - Step 26353: {'lr': 0.00046733382018708405, 'samples': 13493248, 'steps': 26353, 'loss/train': 2.2483410835266113} +03/04/2022 20:25:50 - INFO - codeparrot_training - Step 26354: {'lr': 0.00046733119742410476, 'samples': 13493760, 'steps': 26354, 'loss/train': 2.2999935150146484} +03/04/2022 20:25:52 - INFO - codeparrot_training - Skipping example with length 215 (seq_length=1024) +03/04/2022 20:25:55 - INFO - codeparrot_training - Step 26355: {'lr': 0.0004673285745631993, 'samples': 13494272, 'steps': 26355, 'loss/train': 1.8416318893432617} +03/04/2022 20:25:58 - INFO - codeparrot_training - Step 26356: {'lr': 0.000467325951604369, 'samples': 13494784, 'steps': 26356, 'loss/train': 2.043569803237915} +03/04/2022 20:26:00 - INFO - codeparrot_training - Skipping example with length 281 (seq_length=1024) +03/04/2022 20:26:03 - INFO - codeparrot_training - Step 26357: {'lr': 0.00046732332854761507, 'samples': 13495296, 'steps': 26357, 'loss/train': 2.19012451171875} +03/04/2022 20:26:06 - INFO - codeparrot_training - Step 26358: {'lr': 0.00046732070539293847, 'samples': 13495808, 'steps': 26358, 'loss/train': 2.1480791568756104} +03/04/2022 20:26:09 - INFO - codeparrot_training - Skipping example with length 966 (seq_length=1024) +03/04/2022 20:26:12 - INFO - codeparrot_training - Step 26359: {'lr': 0.0004673180821403405, 'samples': 13496320, 'steps': 26359, 'loss/train': 1.4631963968276978} +03/04/2022 20:26:15 - INFO - codeparrot_training - Step 26360: {'lr': 0.00046731545878982253, 'samples': 13496832, 'steps': 26360, 'loss/train': 1.2111998796463013} +03/04/2022 20:26:18 - INFO - codeparrot_training - Skipping example with length 297 (seq_length=1024) +03/04/2022 20:26:21 - INFO - codeparrot_training - Step 26361: {'lr': 0.0004673128353413854, 'samples': 13497344, 'steps': 26361, 'loss/train': 0.8602479100227356} +03/04/2022 20:26:24 - INFO - codeparrot_training - Step 26362: {'lr': 0.00046731021179503054, 'samples': 13497856, 'steps': 26362, 'loss/train': 2.090641975402832} +03/04/2022 20:26:27 - INFO - codeparrot_training - Step 26363: {'lr': 0.00046730758815075903, 'samples': 13498368, 'steps': 26363, 'loss/train': 0.4611779451370239} +03/04/2022 20:26:28 - INFO - codeparrot_training - Skipping example with length 646 (seq_length=1024) +03/04/2022 20:26:32 - INFO - codeparrot_training - Step 26364: {'lr': 0.0004673049644085721, 'samples': 13498880, 'steps': 26364, 'loss/train': 2.6320652961730957} +03/04/2022 20:26:35 - INFO - codeparrot_training - Step 26365: {'lr': 0.00046730234056847084, 'samples': 13499392, 'steps': 26365, 'loss/train': 2.3065810203552246} +03/04/2022 20:26:36 - INFO - codeparrot_training - Skipping example with length 686 (seq_length=1024) +03/04/2022 20:26:41 - INFO - codeparrot_training - Step 26366: {'lr': 0.00046729971663045654, 'samples': 13499904, 'steps': 26366, 'loss/train': 2.055612564086914} +03/04/2022 20:26:44 - INFO - codeparrot_training - Step 26367: {'lr': 0.00046729709259453033, 'samples': 13500416, 'steps': 26367, 'loss/train': 1.6176706552505493} +03/04/2022 20:26:45 - INFO - codeparrot_training - Skipping example with length 969 (seq_length=1024) +03/04/2022 20:26:50 - INFO - codeparrot_training - Step 26368: {'lr': 0.0004672944684606934, 'samples': 13500928, 'steps': 26368, 'loss/train': 1.3179558515548706} +03/04/2022 20:26:53 - INFO - codeparrot_training - Step 26369: {'lr': 0.000467291844228947, 'samples': 13501440, 'steps': 26369, 'loss/train': 1.3017600774765015} +03/04/2022 20:26:55 - INFO - codeparrot_training - Skipping example with length 652 (seq_length=1024) +03/04/2022 20:26:58 - INFO - codeparrot_training - Step 26370: {'lr': 0.00046728921989929215, 'samples': 13501952, 'steps': 26370, 'loss/train': 1.774262547492981} +03/04/2022 20:27:01 - INFO - codeparrot_training - Step 26371: {'lr': 0.0004672865954717301, 'samples': 13502464, 'steps': 26371, 'loss/train': 2.459853410720825} +03/04/2022 20:27:04 - INFO - codeparrot_training - Skipping example with length 157 (seq_length=1024) +03/04/2022 20:27:06 - INFO - codeparrot_training - Step 26372: {'lr': 0.00046728397094626217, 'samples': 13502976, 'steps': 26372, 'loss/train': 2.0828793048858643} +03/04/2022 20:27:10 - INFO - codeparrot_training - Step 26373: {'lr': 0.0004672813463228894, 'samples': 13503488, 'steps': 26373, 'loss/train': 1.922803282737732} +03/04/2022 20:27:12 - INFO - codeparrot_training - Skipping example with length 179 (seq_length=1024) +03/04/2022 20:27:15 - INFO - codeparrot_training - Step 26374: {'lr': 0.00046727872160161305, 'samples': 13504000, 'steps': 26374, 'loss/train': 1.6921474933624268} +03/04/2022 20:27:18 - INFO - codeparrot_training - Step 26375: {'lr': 0.0004672760967824342, 'samples': 13504512, 'steps': 26375, 'loss/train': 2.1356189250946045} +03/04/2022 20:27:20 - INFO - codeparrot_training - Skipping example with length 658 (seq_length=1024) +03/04/2022 20:27:23 - INFO - codeparrot_training - Step 26376: {'lr': 0.0004672734718653541, 'samples': 13505024, 'steps': 26376, 'loss/train': 1.4718737602233887} +03/04/2022 20:27:26 - INFO - codeparrot_training - Step 26377: {'lr': 0.00046727084685037394, 'samples': 13505536, 'steps': 26377, 'loss/train': 2.175557851791382} +03/04/2022 20:27:29 - INFO - codeparrot_training - Skipping example with length 50 (seq_length=1024) +03/04/2022 20:27:32 - INFO - codeparrot_training - Step 26378: {'lr': 0.00046726822173749497, 'samples': 13506048, 'steps': 26378, 'loss/train': 2.9148662090301514} +03/04/2022 20:27:35 - INFO - codeparrot_training - Step 26379: {'lr': 0.0004672655965267182, 'samples': 13506560, 'steps': 26379, 'loss/train': 1.0219379663467407} +03/04/2022 20:27:38 - INFO - codeparrot_training - Skipping example with length 272 (seq_length=1024) +03/04/2022 20:27:40 - INFO - codeparrot_training - Step 26380: {'lr': 0.0004672629712180448, 'samples': 13507072, 'steps': 26380, 'loss/train': 2.3088953495025635} +03/04/2022 20:27:43 - INFO - codeparrot_training - Step 26381: {'lr': 0.00046726034581147624, 'samples': 13507584, 'steps': 26381, 'loss/train': 2.821471929550171} +03/04/2022 20:27:46 - INFO - codeparrot_training - Skipping example with length 207 (seq_length=1024) +03/04/2022 20:27:49 - INFO - codeparrot_training - Step 26382: {'lr': 0.0004672577203070135, 'samples': 13508096, 'steps': 26382, 'loss/train': 2.200788974761963} +03/04/2022 20:27:52 - INFO - codeparrot_training - Step 26383: {'lr': 0.0004672550947046577, 'samples': 13508608, 'steps': 26383, 'loss/train': 2.0816233158111572} +03/04/2022 20:27:55 - INFO - codeparrot_training - Step 26384: {'lr': 0.0004672524690044102, 'samples': 13509120, 'steps': 26384, 'loss/train': 2.0663087368011475} +03/04/2022 20:27:55 - INFO - codeparrot_training - Skipping example with length 40 (seq_length=1024) +03/04/2022 20:28:00 - INFO - codeparrot_training - Step 26385: {'lr': 0.000467249843206272, 'samples': 13509632, 'steps': 26385, 'loss/train': 2.1231961250305176} +03/04/2022 20:28:04 - INFO - codeparrot_training - Step 26386: {'lr': 0.00046724721731024446, 'samples': 13510144, 'steps': 26386, 'loss/train': 1.90706467628479} +03/04/2022 20:28:04 - INFO - codeparrot_training - Skipping example with length 275 (seq_length=1024) +03/04/2022 20:28:09 - INFO - codeparrot_training - Step 26387: {'lr': 0.00046724459131632854, 'samples': 13510656, 'steps': 26387, 'loss/train': 1.4670147895812988} +03/04/2022 20:28:12 - INFO - codeparrot_training - Step 26388: {'lr': 0.00046724196522452565, 'samples': 13511168, 'steps': 26388, 'loss/train': 1.9441431760787964} +03/04/2022 20:28:12 - INFO - codeparrot_training - Skipping example with length 772 (seq_length=1024) +03/04/2022 20:28:18 - INFO - codeparrot_training - Step 26389: {'lr': 0.00046723933903483687, 'samples': 13511680, 'steps': 26389, 'loss/train': 1.654375672340393} +03/04/2022 20:28:21 - INFO - codeparrot_training - Step 26390: {'lr': 0.00046723671274726344, 'samples': 13512192, 'steps': 26390, 'loss/train': 1.3300007581710815} +03/04/2022 20:28:21 - INFO - codeparrot_training - Skipping example with length 139 (seq_length=1024) +03/04/2022 20:28:26 - INFO - codeparrot_training - Step 26391: {'lr': 0.00046723408636180645, 'samples': 13512704, 'steps': 26391, 'loss/train': 1.7215726375579834} +03/04/2022 20:28:29 - INFO - codeparrot_training - Step 26392: {'lr': 0.00046723145987846715, 'samples': 13513216, 'steps': 26392, 'loss/train': 1.7362573146820068} +03/04/2022 20:28:29 - INFO - codeparrot_training - Skipping example with length 371 (seq_length=1024) +03/04/2022 20:28:34 - INFO - codeparrot_training - Step 26393: {'lr': 0.00046722883329724667, 'samples': 13513728, 'steps': 26393, 'loss/train': 1.1911818981170654} +03/04/2022 20:28:38 - INFO - codeparrot_training - Step 26394: {'lr': 0.0004672262066181463, 'samples': 13514240, 'steps': 26394, 'loss/train': 1.8602632284164429} +03/04/2022 20:28:38 - INFO - codeparrot_training - Skipping example with length 864 (seq_length=1024) +03/04/2022 20:28:43 - INFO - codeparrot_training - Step 26395: {'lr': 0.00046722357984116717, 'samples': 13514752, 'steps': 26395, 'loss/train': 1.9785295724868774} +03/04/2022 20:28:46 - INFO - codeparrot_training - Step 26396: {'lr': 0.0004672209529663103, 'samples': 13515264, 'steps': 26396, 'loss/train': 1.8130202293395996} +03/04/2022 20:28:46 - INFO - codeparrot_training - Skipping example with length 694 (seq_length=1024) +03/04/2022 20:28:52 - INFO - codeparrot_training - Step 26397: {'lr': 0.00046721832599357717, 'samples': 13515776, 'steps': 26397, 'loss/train': 1.79843008518219} +03/04/2022 20:28:55 - INFO - codeparrot_training - Step 26398: {'lr': 0.00046721569892296875, 'samples': 13516288, 'steps': 26398, 'loss/train': 2.376918315887451} +03/04/2022 20:28:55 - INFO - codeparrot_training - Skipping example with length 907 (seq_length=1024) +03/04/2022 20:29:00 - INFO - codeparrot_training - Step 26399: {'lr': 0.00046721307175448626, 'samples': 13516800, 'steps': 26399, 'loss/train': 1.7172563076019287} +03/04/2022 20:29:03 - INFO - codeparrot_training - Step 26400: {'lr': 0.000467210444488131, 'samples': 13517312, 'steps': 26400, 'loss/train': 1.6288255453109741} +03/04/2022 20:29:04 - INFO - codeparrot_training - Skipping example with length 709 (seq_length=1024) +03/04/2022 20:29:09 - INFO - codeparrot_training - Step 26401: {'lr': 0.000467207817123904, 'samples': 13517824, 'steps': 26401, 'loss/train': 1.9853402376174927} +03/04/2022 20:29:12 - INFO - codeparrot_training - Step 26402: {'lr': 0.0004672051896618065, 'samples': 13518336, 'steps': 26402, 'loss/train': 2.4832844734191895} +03/04/2022 20:29:12 - INFO - codeparrot_training - Skipping example with length 318 (seq_length=1024) +03/04/2022 20:29:17 - INFO - codeparrot_training - Step 26403: {'lr': 0.0004672025621018397, 'samples': 13518848, 'steps': 26403, 'loss/train': 6.4857330322265625} +03/04/2022 20:29:20 - INFO - codeparrot_training - Step 26404: {'lr': 0.00046719993444400477, 'samples': 13519360, 'steps': 26404, 'loss/train': 0.782981812953949} +03/04/2022 20:29:21 - INFO - codeparrot_training - Skipping example with length 583 (seq_length=1024) +03/04/2022 20:29:26 - INFO - codeparrot_training - Step 26405: {'lr': 0.00046719730668830293, 'samples': 13519872, 'steps': 26405, 'loss/train': 0.9992297291755676} +03/04/2022 20:29:29 - INFO - codeparrot_training - Step 26406: {'lr': 0.0004671946788347353, 'samples': 13520384, 'steps': 26406, 'loss/train': 0.9563493728637695} +03/04/2022 20:29:30 - INFO - codeparrot_training - Skipping example with length 970 (seq_length=1024) +03/04/2022 20:29:34 - INFO - codeparrot_training - Step 26407: {'lr': 0.00046719205088330317, 'samples': 13520896, 'steps': 26407, 'loss/train': 0.8506782054901123} +03/04/2022 20:29:37 - INFO - codeparrot_training - Step 26408: {'lr': 0.0004671894228340076, 'samples': 13521408, 'steps': 26408, 'loss/train': 1.7318669557571411} +03/04/2022 20:29:38 - INFO - codeparrot_training - Skipping example with length 795 (seq_length=1024) +03/04/2022 20:29:42 - INFO - codeparrot_training - Step 26409: {'lr': 0.0004671867946868499, 'samples': 13521920, 'steps': 26409, 'loss/train': 1.9564017057418823} +03/04/2022 20:29:46 - INFO - codeparrot_training - Step 26410: {'lr': 0.000467184166441831, 'samples': 13522432, 'steps': 26410, 'loss/train': 2.875936985015869} +03/04/2022 20:29:47 - INFO - codeparrot_training - Skipping example with length 455 (seq_length=1024) +03/04/2022 20:29:51 - INFO - codeparrot_training - Step 26411: {'lr': 0.0004671815380989525, 'samples': 13522944, 'steps': 26411, 'loss/train': 1.4288697242736816} +03/04/2022 20:29:54 - INFO - codeparrot_training - Step 26412: {'lr': 0.0004671789096582152, 'samples': 13523456, 'steps': 26412, 'loss/train': 1.0903258323669434} +03/04/2022 20:29:55 - INFO - codeparrot_training - Skipping example with length 355 (seq_length=1024) +03/04/2022 20:29:59 - INFO - codeparrot_training - Step 26413: {'lr': 0.00046717628111962045, 'samples': 13523968, 'steps': 26413, 'loss/train': 2.001708507537842} +03/04/2022 20:30:02 - INFO - codeparrot_training - Step 26414: {'lr': 0.00046717365248316947, 'samples': 13524480, 'steps': 26414, 'loss/train': 1.982118010520935} +03/04/2022 20:30:03 - INFO - codeparrot_training - Skipping example with length 550 (seq_length=1024) +03/04/2022 20:30:08 - INFO - codeparrot_training - Step 26415: {'lr': 0.00046717102374886334, 'samples': 13524992, 'steps': 26415, 'loss/train': 2.015167474746704} +03/04/2022 20:30:11 - INFO - codeparrot_training - Step 26416: {'lr': 0.0004671683949167033, 'samples': 13525504, 'steps': 26416, 'loss/train': 2.158921003341675} +03/04/2022 20:30:14 - INFO - codeparrot_training - Skipping example with length 133 (seq_length=1024) +03/04/2022 20:30:17 - INFO - codeparrot_training - Step 26417: {'lr': 0.0004671657659866906, 'samples': 13526016, 'steps': 26417, 'loss/train': 2.13736629486084} +03/04/2022 20:30:20 - INFO - codeparrot_training - Step 26418: {'lr': 0.00046716313695882626, 'samples': 13526528, 'steps': 26418, 'loss/train': 1.5941184759140015} +03/04/2022 20:30:22 - INFO - codeparrot_training - Skipping example with length 893 (seq_length=1024) +03/04/2022 20:30:25 - INFO - codeparrot_training - Step 26419: {'lr': 0.00046716050783311166, 'samples': 13527040, 'steps': 26419, 'loss/train': 1.4115631580352783} +03/04/2022 20:30:28 - INFO - codeparrot_training - Step 26420: {'lr': 0.00046715787860954785, 'samples': 13527552, 'steps': 26420, 'loss/train': 2.2943177223205566} +03/04/2022 20:30:31 - INFO - codeparrot_training - Skipping example with length 428 (seq_length=1024) +03/04/2022 20:30:33 - INFO - codeparrot_training - Step 26421: {'lr': 0.000467155249288136, 'samples': 13528064, 'steps': 26421, 'loss/train': 2.1785802841186523} +03/04/2022 20:30:37 - INFO - codeparrot_training - Step 26422: {'lr': 0.00046715261986887734, 'samples': 13528576, 'steps': 26422, 'loss/train': 1.9694342613220215} +03/04/2022 20:30:39 - INFO - codeparrot_training - Skipping example with length 17 (seq_length=1024) +03/04/2022 20:30:42 - INFO - codeparrot_training - Step 26423: {'lr': 0.0004671499903517732, 'samples': 13529088, 'steps': 26423, 'loss/train': 2.3398220539093018} +03/04/2022 20:30:45 - INFO - codeparrot_training - Step 26424: {'lr': 0.00046714736073682453, 'samples': 13529600, 'steps': 26424, 'loss/train': 2.203522205352783} +03/04/2022 20:30:47 - INFO - codeparrot_training - Skipping example with length 308 (seq_length=1024) +03/04/2022 20:30:50 - INFO - codeparrot_training - Step 26425: {'lr': 0.00046714473102403255, 'samples': 13530112, 'steps': 26425, 'loss/train': 2.436027765274048} +03/04/2022 20:30:53 - INFO - codeparrot_training - Step 26426: {'lr': 0.0004671421012133986, 'samples': 13530624, 'steps': 26426, 'loss/train': 2.0419392585754395} +03/04/2022 20:30:56 - INFO - codeparrot_training - Skipping example with length 242 (seq_length=1024) +03/04/2022 20:30:59 - INFO - codeparrot_training - Step 26427: {'lr': 0.00046713947130492373, 'samples': 13531136, 'steps': 26427, 'loss/train': 1.4386550188064575} +03/04/2022 20:31:02 - INFO - codeparrot_training - Step 26428: {'lr': 0.0004671368412986091, 'samples': 13531648, 'steps': 26428, 'loss/train': 1.6365159749984741} +03/04/2022 20:31:04 - INFO - codeparrot_training - Skipping example with length 941 (seq_length=1024) +03/04/2022 20:31:07 - INFO - codeparrot_training - Step 26429: {'lr': 0.0004671342111944561, 'samples': 13532160, 'steps': 26429, 'loss/train': 2.3108408451080322} +03/04/2022 20:31:10 - INFO - codeparrot_training - Step 26430: {'lr': 0.00046713158099246564, 'samples': 13532672, 'steps': 26430, 'loss/train': 2.017540693283081} +03/04/2022 20:31:13 - INFO - codeparrot_training - Skipping example with length 849 (seq_length=1024) +03/04/2022 20:31:16 - INFO - codeparrot_training - Step 26431: {'lr': 0.00046712895069263917, 'samples': 13533184, 'steps': 26431, 'loss/train': 1.7505096197128296} +03/04/2022 20:31:19 - INFO - codeparrot_training - Step 26432: {'lr': 0.00046712632029497766, 'samples': 13533696, 'steps': 26432, 'loss/train': 1.755135178565979} +03/04/2022 20:31:22 - INFO - codeparrot_training - Skipping example with length 902 (seq_length=1024) +03/04/2022 20:31:24 - INFO - codeparrot_training - Step 26433: {'lr': 0.0004671236897994824, 'samples': 13534208, 'steps': 26433, 'loss/train': 1.7604070901870728} +03/04/2022 20:31:27 - INFO - codeparrot_training - Step 26434: {'lr': 0.00046712105920615455, 'samples': 13534720, 'steps': 26434, 'loss/train': 1.7391496896743774} +03/04/2022 20:31:30 - INFO - codeparrot_training - Skipping example with length 757 (seq_length=1024) +03/04/2022 20:31:33 - INFO - codeparrot_training - Step 26435: {'lr': 0.00046711842851499533, 'samples': 13535232, 'steps': 26435, 'loss/train': 1.6905876398086548} +03/04/2022 20:31:36 - INFO - codeparrot_training - Step 26436: {'lr': 0.0004671157977260059, 'samples': 13535744, 'steps': 26436, 'loss/train': 2.1517322063446045} +03/04/2022 20:31:39 - INFO - codeparrot_training - Step 26437: {'lr': 0.0004671131668391874, 'samples': 13536256, 'steps': 26437, 'loss/train': 0.8955194354057312} +03/04/2022 20:31:39 - INFO - codeparrot_training - Skipping example with length 633 (seq_length=1024) +03/04/2022 20:31:44 - INFO - codeparrot_training - Step 26438: {'lr': 0.00046711053585454104, 'samples': 13536768, 'steps': 26438, 'loss/train': 2.2363696098327637} +03/04/2022 20:31:48 - INFO - codeparrot_training - Step 26439: {'lr': 0.0004671079047720681, 'samples': 13537280, 'steps': 26439, 'loss/train': 1.9852031469345093} +03/04/2022 20:31:48 - INFO - codeparrot_training - Skipping example with length 670 (seq_length=1024) +03/04/2022 20:31:53 - INFO - codeparrot_training - Step 26440: {'lr': 0.00046710527359176957, 'samples': 13537792, 'steps': 26440, 'loss/train': 1.3503477573394775} +03/04/2022 20:31:55 - INFO - codeparrot_training - Skipping example with length 309 (seq_length=1024) +03/04/2022 20:31:58 - INFO - codeparrot_training - Step 26441: {'lr': 0.0004671026423136469, 'samples': 13538304, 'steps': 26441, 'loss/train': 2.1063976287841797} +03/04/2022 20:32:01 - INFO - codeparrot_training - Step 26442: {'lr': 0.00046710001093770107, 'samples': 13538816, 'steps': 26442, 'loss/train': 2.0339794158935547} +03/04/2022 20:32:04 - INFO - codeparrot_training - Skipping example with length 529 (seq_length=1024) +03/04/2022 20:32:07 - INFO - codeparrot_training - Step 26443: {'lr': 0.0004670973794639333, 'samples': 13539328, 'steps': 26443, 'loss/train': 1.880011796951294} +03/04/2022 20:32:10 - INFO - codeparrot_training - Step 26444: {'lr': 0.0004670947478923447, 'samples': 13539840, 'steps': 26444, 'loss/train': 1.0665866136550903} +03/04/2022 20:32:12 - INFO - codeparrot_training - Skipping example with length 1014 (seq_length=1024) +03/04/2022 20:32:15 - INFO - codeparrot_training - Step 26445: {'lr': 0.00046709211622293677, 'samples': 13540352, 'steps': 26445, 'loss/train': 1.7912287712097168} +03/04/2022 20:32:18 - INFO - codeparrot_training - Step 26446: {'lr': 0.00046708948445571037, 'samples': 13540864, 'steps': 26446, 'loss/train': 2.483994960784912} +03/04/2022 20:32:21 - INFO - codeparrot_training - Skipping example with length 136 (seq_length=1024) +03/04/2022 20:32:24 - INFO - codeparrot_training - Step 26447: {'lr': 0.0004670868525906668, 'samples': 13541376, 'steps': 26447, 'loss/train': 1.478830099105835} +03/04/2022 20:32:27 - INFO - codeparrot_training - Step 26448: {'lr': 0.00046708422062780725, 'samples': 13541888, 'steps': 26448, 'loss/train': 1.729313850402832} +03/04/2022 20:32:30 - INFO - codeparrot_training - Step 26449: {'lr': 0.0004670815885671329, 'samples': 13542400, 'steps': 26449, 'loss/train': 1.9526331424713135} +03/04/2022 20:32:30 - INFO - codeparrot_training - Skipping example with length 12 (seq_length=1024) +03/04/2022 20:32:35 - INFO - codeparrot_training - Step 26450: {'lr': 0.00046707895640864494, 'samples': 13542912, 'steps': 26450, 'loss/train': 1.9172755479812622} +03/04/2022 20:32:39 - INFO - codeparrot_training - Step 26451: {'lr': 0.0004670763241523446, 'samples': 13543424, 'steps': 26451, 'loss/train': 1.6826196908950806} +03/04/2022 20:32:39 - INFO - codeparrot_training - Skipping example with length 1012 (seq_length=1024) +03/04/2022 20:32:44 - INFO - codeparrot_training - Step 26452: {'lr': 0.00046707369179823294, 'samples': 13543936, 'steps': 26452, 'loss/train': 2.079040765762329} +03/04/2022 20:32:47 - INFO - codeparrot_training - Step 26453: {'lr': 0.00046707105934631123, 'samples': 13544448, 'steps': 26453, 'loss/train': 1.9599032402038574} +03/04/2022 20:32:47 - INFO - codeparrot_training - Skipping example with length 887 (seq_length=1024) +03/04/2022 20:32:52 - INFO - codeparrot_training - Step 26454: {'lr': 0.00046706842679658067, 'samples': 13544960, 'steps': 26454, 'loss/train': 0.9750809669494629} +03/04/2022 20:32:55 - INFO - codeparrot_training - Step 26455: {'lr': 0.0004670657941490425, 'samples': 13545472, 'steps': 26455, 'loss/train': 1.2822140455245972} +03/04/2022 20:32:56 - INFO - codeparrot_training - Skipping example with length 785 (seq_length=1024) +03/04/2022 20:33:01 - INFO - codeparrot_training - Step 26456: {'lr': 0.00046706316140369774, 'samples': 13545984, 'steps': 26456, 'loss/train': 2.9064910411834717} +03/04/2022 20:33:04 - INFO - codeparrot_training - Step 26457: {'lr': 0.0004670605285605477, 'samples': 13546496, 'steps': 26457, 'loss/train': 2.2233498096466064} +03/04/2022 20:33:05 - INFO - codeparrot_training - Skipping example with length 443 (seq_length=1024) +03/04/2022 20:33:09 - INFO - codeparrot_training - Step 26458: {'lr': 0.0004670578956195935, 'samples': 13547008, 'steps': 26458, 'loss/train': 3.8833322525024414} +03/04/2022 20:33:12 - INFO - codeparrot_training - Step 26459: {'lr': 0.00046705526258083643, 'samples': 13547520, 'steps': 26459, 'loss/train': 1.307416558265686} +03/04/2022 20:33:13 - INFO - codeparrot_training - Skipping example with length 384 (seq_length=1024) +03/04/2022 20:33:18 - INFO - codeparrot_training - Step 26460: {'lr': 0.0004670526294442775, 'samples': 13548032, 'steps': 26460, 'loss/train': 2.1492719650268555} +03/04/2022 20:33:21 - INFO - codeparrot_training - Step 26461: {'lr': 0.0004670499962099181, 'samples': 13548544, 'steps': 26461, 'loss/train': 2.1291027069091797} +03/04/2022 20:33:23 - INFO - codeparrot_training - Skipping example with length 586 (seq_length=1024) +03/04/2022 20:33:26 - INFO - codeparrot_training - Step 26462: {'lr': 0.0004670473628777593, 'samples': 13549056, 'steps': 26462, 'loss/train': 1.5550085306167603} +03/04/2022 20:33:29 - INFO - codeparrot_training - Step 26463: {'lr': 0.0004670447294478023, 'samples': 13549568, 'steps': 26463, 'loss/train': 1.7049188613891602} +03/04/2022 20:33:31 - INFO - codeparrot_training - Skipping example with length 83 (seq_length=1024) +03/04/2022 20:33:34 - INFO - codeparrot_training - Step 26464: {'lr': 0.0004670420959200483, 'samples': 13550080, 'steps': 26464, 'loss/train': 1.6637805700302124} +03/04/2022 20:33:38 - INFO - codeparrot_training - Step 26465: {'lr': 0.00046703946229449846, 'samples': 13550592, 'steps': 26465, 'loss/train': 2.4205026626586914} +03/04/2022 20:33:39 - INFO - codeparrot_training - Skipping example with length 602 (seq_length=1024) +03/04/2022 20:33:43 - INFO - codeparrot_training - Step 26466: {'lr': 0.00046703682857115406, 'samples': 13551104, 'steps': 26466, 'loss/train': 1.1018468141555786} +03/04/2022 20:33:46 - INFO - codeparrot_training - Step 26467: {'lr': 0.0004670341947500161, 'samples': 13551616, 'steps': 26467, 'loss/train': 1.527510643005371} +03/04/2022 20:33:47 - INFO - codeparrot_training - Skipping example with length 616 (seq_length=1024) +03/04/2022 20:33:51 - INFO - codeparrot_training - Step 26468: {'lr': 0.00046703156083108597, 'samples': 13552128, 'steps': 26468, 'loss/train': 1.5231901407241821} +03/04/2022 20:33:54 - INFO - codeparrot_training - Step 26469: {'lr': 0.0004670289268143647, 'samples': 13552640, 'steps': 26469, 'loss/train': 1.6364010572433472} +03/04/2022 20:33:56 - INFO - codeparrot_training - Skipping example with length 322 (seq_length=1024) +03/04/2022 20:34:00 - INFO - codeparrot_training - Step 26470: {'lr': 0.0004670262926998536, 'samples': 13553152, 'steps': 26470, 'loss/train': 1.6553860902786255} +03/04/2022 20:34:03 - INFO - codeparrot_training - Step 26471: {'lr': 0.00046702365848755377, 'samples': 13553664, 'steps': 26471, 'loss/train': 0.3026265501976013} +03/04/2022 20:34:04 - INFO - codeparrot_training - Skipping example with length 123 (seq_length=1024) +03/04/2022 20:34:08 - INFO - codeparrot_training - Step 26472: {'lr': 0.0004670210241774664, 'samples': 13554176, 'steps': 26472, 'loss/train': 2.116995096206665} +03/04/2022 20:34:11 - INFO - codeparrot_training - Step 26473: {'lr': 0.0004670183897695928, 'samples': 13554688, 'steps': 26473, 'loss/train': 1.4347567558288574} +03/04/2022 20:34:13 - INFO - codeparrot_training - Skipping example with length 419 (seq_length=1024) +03/04/2022 20:34:17 - INFO - codeparrot_training - Step 26474: {'lr': 0.00046701575526393395, 'samples': 13555200, 'steps': 26474, 'loss/train': 1.3316739797592163} +03/04/2022 20:34:20 - INFO - codeparrot_training - Step 26475: {'lr': 0.00046701312066049126, 'samples': 13555712, 'steps': 26475, 'loss/train': 2.721540689468384} +03/04/2022 20:34:21 - INFO - codeparrot_training - Skipping example with length 985 (seq_length=1024) +03/04/2022 20:34:25 - INFO - codeparrot_training - Step 26476: {'lr': 0.00046701048595926574, 'samples': 13556224, 'steps': 26476, 'loss/train': 2.0841526985168457} +03/04/2022 20:34:28 - INFO - codeparrot_training - Step 26477: {'lr': 0.00046700785116025867, 'samples': 13556736, 'steps': 26477, 'loss/train': 1.8084744215011597} +03/04/2022 20:34:30 - INFO - codeparrot_training - Skipping example with length 504 (seq_length=1024) +03/04/2022 20:34:33 - INFO - codeparrot_training - Step 26478: {'lr': 0.0004670052162634712, 'samples': 13557248, 'steps': 26478, 'loss/train': 2.3716490268707275} +03/04/2022 20:34:37 - INFO - codeparrot_training - Step 26479: {'lr': 0.0004670025812689045, 'samples': 13557760, 'steps': 26479, 'loss/train': 2.438819646835327} +03/04/2022 20:34:38 - INFO - codeparrot_training - Skipping example with length 558 (seq_length=1024) +03/04/2022 20:34:42 - INFO - codeparrot_training - Step 26480: {'lr': 0.00046699994617655985, 'samples': 13558272, 'steps': 26480, 'loss/train': 2.3289902210235596} +03/04/2022 20:34:45 - INFO - codeparrot_training - Step 26481: {'lr': 0.0004669973109864383, 'samples': 13558784, 'steps': 26481, 'loss/train': 1.9662460088729858} +03/04/2022 20:34:46 - INFO - codeparrot_training - Skipping example with length 867 (seq_length=1024) +03/04/2022 20:34:50 - INFO - codeparrot_training - Step 26482: {'lr': 0.00046699467569854115, 'samples': 13559296, 'steps': 26482, 'loss/train': 1.741187572479248} +03/04/2022 20:34:53 - INFO - codeparrot_training - Step 26483: {'lr': 0.0004669920403128696, 'samples': 13559808, 'steps': 26483, 'loss/train': 2.096268653869629} +03/04/2022 20:34:55 - INFO - codeparrot_training - Skipping example with length 79 (seq_length=1024) +03/04/2022 20:34:59 - INFO - codeparrot_training - Step 26484: {'lr': 0.00046698940482942466, 'samples': 13560320, 'steps': 26484, 'loss/train': 2.9663467407226562} +03/04/2022 20:35:02 - INFO - codeparrot_training - Step 26485: {'lr': 0.0004669867692482077, 'samples': 13560832, 'steps': 26485, 'loss/train': 1.6388397216796875} +03/04/2022 20:35:03 - INFO - codeparrot_training - Skipping example with length 237 (seq_length=1024) +03/04/2022 20:35:07 - INFO - codeparrot_training - Step 26486: {'lr': 0.00046698413356921985, 'samples': 13561344, 'steps': 26486, 'loss/train': 1.5816243886947632} +03/04/2022 20:35:11 - INFO - codeparrot_training - Step 26487: {'lr': 0.00046698149779246235, 'samples': 13561856, 'steps': 26487, 'loss/train': 2.0468482971191406} +03/04/2022 20:35:12 - INFO - codeparrot_training - Skipping example with length 347 (seq_length=1024) +03/04/2022 20:35:16 - INFO - codeparrot_training - Step 26488: {'lr': 0.0004669788619179363, 'samples': 13562368, 'steps': 26488, 'loss/train': 1.807116150856018} +03/04/2022 20:35:19 - INFO - codeparrot_training - Step 26489: {'lr': 0.0004669762259456429, 'samples': 13562880, 'steps': 26489, 'loss/train': 2.150627851486206} +03/04/2022 20:35:21 - INFO - codeparrot_training - Skipping example with length 872 (seq_length=1024) +03/04/2022 20:35:24 - INFO - codeparrot_training - Step 26490: {'lr': 0.00046697358987558336, 'samples': 13563392, 'steps': 26490, 'loss/train': 2.465322256088257} +03/04/2022 20:35:27 - INFO - codeparrot_training - Step 26491: {'lr': 0.0004669709537077589, 'samples': 13563904, 'steps': 26491, 'loss/train': 0.7522966861724854} +03/04/2022 20:35:29 - INFO - codeparrot_training - Skipping example with length 461 (seq_length=1024) +03/04/2022 20:35:33 - INFO - codeparrot_training - Step 26492: {'lr': 0.00046696831744217065, 'samples': 13564416, 'steps': 26492, 'loss/train': 1.259932279586792} +03/04/2022 20:35:36 - INFO - codeparrot_training - Step 26493: {'lr': 0.0004669656810788199, 'samples': 13564928, 'steps': 26493, 'loss/train': 1.414974570274353} +03/04/2022 20:35:37 - INFO - codeparrot_training - Skipping example with length 1015 (seq_length=1024) +03/04/2022 20:35:41 - INFO - codeparrot_training - Step 26494: {'lr': 0.0004669630446177077, 'samples': 13565440, 'steps': 26494, 'loss/train': 1.3000133037567139} +03/04/2022 20:35:44 - INFO - codeparrot_training - Step 26495: {'lr': 0.0004669604080588352, 'samples': 13565952, 'steps': 26495, 'loss/train': 1.964637041091919} +03/04/2022 20:35:46 - INFO - codeparrot_training - Skipping example with length 37 (seq_length=1024) +03/04/2022 20:35:50 - INFO - codeparrot_training - Step 26496: {'lr': 0.0004669577714022039, 'samples': 13566464, 'steps': 26496, 'loss/train': 1.4004921913146973} +03/04/2022 20:35:53 - INFO - codeparrot_training - Step 26497: {'lr': 0.00046695513464781456, 'samples': 13566976, 'steps': 26497, 'loss/train': 1.7632145881652832} +03/04/2022 20:35:54 - INFO - codeparrot_training - Skipping example with length 836 (seq_length=1024) +03/04/2022 20:35:58 - INFO - codeparrot_training - Step 26498: {'lr': 0.00046695249779566875, 'samples': 13567488, 'steps': 26498, 'loss/train': 1.6646251678466797} +03/04/2022 20:36:01 - INFO - codeparrot_training - Step 26499: {'lr': 0.0004669498608457674, 'samples': 13568000, 'steps': 26499, 'loss/train': 1.4009953737258911} +03/04/2022 20:36:04 - INFO - codeparrot_training - Skipping example with length 435 (seq_length=1024) +03/04/2022 20:36:07 - INFO - codeparrot_training - Step 26500: {'lr': 0.0004669472237981118, 'samples': 13568512, 'steps': 26500, 'loss/train': 1.996139645576477} +03/04/2022 20:36:10 - INFO - codeparrot_training - Step 26501: {'lr': 0.00046694458665270315, 'samples': 13569024, 'steps': 26501, 'loss/train': 1.607911467552185} +03/04/2022 20:36:12 - INFO - codeparrot_training - Skipping example with length 50 (seq_length=1024) +03/04/2022 20:36:15 - INFO - codeparrot_training - Step 26502: {'lr': 0.0004669419494095426, 'samples': 13569536, 'steps': 26502, 'loss/train': 1.9766838550567627} +03/04/2022 20:36:18 - INFO - codeparrot_training - Step 26503: {'lr': 0.0004669393120686314, 'samples': 13570048, 'steps': 26503, 'loss/train': 1.6975353956222534} +03/04/2022 20:36:21 - INFO - codeparrot_training - Skipping example with length 929 (seq_length=1024) +03/04/2022 20:36:23 - INFO - codeparrot_training - Step 26504: {'lr': 0.0004669366746299707, 'samples': 13570560, 'steps': 26504, 'loss/train': 2.2534005641937256} +03/04/2022 20:36:27 - INFO - codeparrot_training - Step 26505: {'lr': 0.00046693403709356163, 'samples': 13571072, 'steps': 26505, 'loss/train': 1.399840235710144} +03/04/2022 20:36:29 - INFO - codeparrot_training - Skipping example with length 647 (seq_length=1024) +03/04/2022 20:36:32 - INFO - codeparrot_training - Step 26506: {'lr': 0.00046693139945940546, 'samples': 13571584, 'steps': 26506, 'loss/train': 1.7342591285705566} +03/04/2022 20:36:35 - INFO - codeparrot_training - Step 26507: {'lr': 0.0004669287617275033, 'samples': 13572096, 'steps': 26507, 'loss/train': 1.2373005151748657} +03/04/2022 20:36:37 - INFO - codeparrot_training - Skipping example with length 493 (seq_length=1024) +03/04/2022 20:36:40 - INFO - codeparrot_training - Step 26508: {'lr': 0.0004669261238978564, 'samples': 13572608, 'steps': 26508, 'loss/train': 1.493643045425415} +03/04/2022 20:36:44 - INFO - codeparrot_training - Step 26509: {'lr': 0.00046692348597046596, 'samples': 13573120, 'steps': 26509, 'loss/train': 2.1045961380004883} +03/04/2022 20:36:46 - INFO - codeparrot_training - Skipping example with length 515 (seq_length=1024) +03/04/2022 20:36:49 - INFO - codeparrot_training - Step 26510: {'lr': 0.0004669208479453332, 'samples': 13573632, 'steps': 26510, 'loss/train': 1.3280003070831299} +03/04/2022 20:36:52 - INFO - codeparrot_training - Step 26511: {'lr': 0.00046691820982245913, 'samples': 13574144, 'steps': 26511, 'loss/train': 1.4315855503082275} +03/04/2022 20:36:55 - INFO - codeparrot_training - Step 26512: {'lr': 0.00046691557160184516, 'samples': 13574656, 'steps': 26512, 'loss/train': 1.377063274383545} +03/04/2022 20:36:56 - INFO - codeparrot_training - Skipping example with length 709 (seq_length=1024) +03/04/2022 20:37:01 - INFO - codeparrot_training - Step 26513: {'lr': 0.0004669129332834923, 'samples': 13575168, 'steps': 26513, 'loss/train': 1.4692057371139526} +03/04/2022 20:37:04 - INFO - codeparrot_training - Step 26514: {'lr': 0.0004669102948674019, 'samples': 13575680, 'steps': 26514, 'loss/train': 1.4055215120315552} +03/04/2022 20:37:05 - INFO - codeparrot_training - Skipping example with length 387 (seq_length=1024) +03/04/2022 20:37:09 - INFO - codeparrot_training - Step 26515: {'lr': 0.000466907656353575, 'samples': 13576192, 'steps': 26515, 'loss/train': 1.5627578496932983} +03/04/2022 20:37:12 - INFO - codeparrot_training - Step 26516: {'lr': 0.0004669050177420129, 'samples': 13576704, 'steps': 26516, 'loss/train': 1.8977771997451782} +03/04/2022 20:37:13 - INFO - codeparrot_training - Skipping example with length 696 (seq_length=1024) +03/04/2022 20:37:18 - INFO - codeparrot_training - Step 26517: {'lr': 0.0004669023790327168, 'samples': 13577216, 'steps': 26517, 'loss/train': 2.461113929748535} +03/04/2022 20:37:21 - INFO - codeparrot_training - Step 26518: {'lr': 0.0004668997402256877, 'samples': 13577728, 'steps': 26518, 'loss/train': 1.8436012268066406} +03/04/2022 20:37:22 - INFO - codeparrot_training - Skipping example with length 328 (seq_length=1024) +03/04/2022 20:37:26 - INFO - codeparrot_training - Step 26519: {'lr': 0.00046689710132092704, 'samples': 13578240, 'steps': 26519, 'loss/train': 2.4113199710845947} +03/04/2022 20:37:29 - INFO - codeparrot_training - Step 26520: {'lr': 0.00046689446231843585, 'samples': 13578752, 'steps': 26520, 'loss/train': 1.2172058820724487} +03/04/2022 20:37:30 - INFO - codeparrot_training - Skipping example with length 819 (seq_length=1024) +03/04/2022 20:37:35 - INFO - codeparrot_training - Step 26521: {'lr': 0.0004668918232182153, 'samples': 13579264, 'steps': 26521, 'loss/train': 1.2210732698440552} +03/04/2022 20:37:38 - INFO - codeparrot_training - Step 26522: {'lr': 0.0004668891840202668, 'samples': 13579776, 'steps': 26522, 'loss/train': 2.2023234367370605} +03/04/2022 20:37:39 - INFO - codeparrot_training - Skipping example with length 406 (seq_length=1024) +03/04/2022 20:37:43 - INFO - codeparrot_training - Step 26523: {'lr': 0.00046688654472459124, 'samples': 13580288, 'steps': 26523, 'loss/train': 1.5736870765686035} +03/04/2022 20:37:46 - INFO - codeparrot_training - Step 26524: {'lr': 0.00046688390533119003, 'samples': 13580800, 'steps': 26524, 'loss/train': 2.8879246711730957} +03/04/2022 20:37:47 - INFO - codeparrot_training - Skipping example with length 702 (seq_length=1024) +03/04/2022 20:37:51 - INFO - codeparrot_training - Step 26525: {'lr': 0.00046688126584006425, 'samples': 13581312, 'steps': 26525, 'loss/train': 0.8541378974914551} +03/04/2022 20:37:55 - INFO - codeparrot_training - Step 26526: {'lr': 0.00046687862625121505, 'samples': 13581824, 'steps': 26526, 'loss/train': 1.7431772947311401} +03/04/2022 20:37:55 - INFO - codeparrot_training - Skipping example with length 548 (seq_length=1024) +03/04/2022 20:38:00 - INFO - codeparrot_training - Step 26527: {'lr': 0.0004668759865646438, 'samples': 13582336, 'steps': 26527, 'loss/train': 2.386282444000244} +03/04/2022 20:38:03 - INFO - codeparrot_training - Step 26528: {'lr': 0.00046687334678035153, 'samples': 13582848, 'steps': 26528, 'loss/train': 2.0503463745117188} +03/04/2022 20:38:03 - INFO - codeparrot_training - Skipping example with length 855 (seq_length=1024) +03/04/2022 20:38:08 - INFO - codeparrot_training - Step 26529: {'lr': 0.00046687070689833943, 'samples': 13583360, 'steps': 26529, 'loss/train': 1.8635592460632324} +03/04/2022 20:38:12 - INFO - codeparrot_training - Step 26530: {'lr': 0.00046686806691860884, 'samples': 13583872, 'steps': 26530, 'loss/train': 1.4422972202301025} +03/04/2022 20:38:12 - INFO - codeparrot_training - Skipping example with length 305 (seq_length=1024) +03/04/2022 20:38:17 - INFO - codeparrot_training - Step 26531: {'lr': 0.00046686542684116073, 'samples': 13584384, 'steps': 26531, 'loss/train': 1.7908167839050293} +03/04/2022 20:38:20 - INFO - codeparrot_training - Step 26532: {'lr': 0.00046686278666599647, 'samples': 13584896, 'steps': 26532, 'loss/train': 2.5626654624938965} +03/04/2022 20:38:20 - INFO - codeparrot_training - Skipping example with length 587 (seq_length=1024) +03/04/2022 20:38:25 - INFO - codeparrot_training - Step 26533: {'lr': 0.0004668601463931172, 'samples': 13585408, 'steps': 26533, 'loss/train': 1.3456982374191284} +03/04/2022 20:38:29 - INFO - codeparrot_training - Step 26534: {'lr': 0.00046685750602252406, 'samples': 13585920, 'steps': 26534, 'loss/train': 1.7075285911560059} +03/04/2022 20:38:29 - INFO - codeparrot_training - Skipping example with length 859 (seq_length=1024) +03/04/2022 20:38:34 - INFO - codeparrot_training - Step 26535: {'lr': 0.0004668548655542183, 'samples': 13586432, 'steps': 26535, 'loss/train': 1.6753120422363281} +03/04/2022 20:38:37 - INFO - codeparrot_training - Step 26536: {'lr': 0.000466852224988201, 'samples': 13586944, 'steps': 26536, 'loss/train': 2.108614444732666} +03/04/2022 20:38:37 - INFO - codeparrot_training - Skipping example with length 953 (seq_length=1024) +03/04/2022 20:38:42 - INFO - codeparrot_training - Step 26537: {'lr': 0.00046684958432447355, 'samples': 13587456, 'steps': 26537, 'loss/train': 1.1829787492752075} +03/04/2022 20:38:45 - INFO - codeparrot_training - Step 26538: {'lr': 0.00046684694356303693, 'samples': 13587968, 'steps': 26538, 'loss/train': 2.3334717750549316} +03/04/2022 20:38:45 - INFO - codeparrot_training - Skipping example with length 929 (seq_length=1024) +03/04/2022 20:38:51 - INFO - codeparrot_training - Step 26539: {'lr': 0.0004668443027038925, 'samples': 13588480, 'steps': 26539, 'loss/train': 2.772888422012329} +03/04/2022 20:38:54 - INFO - codeparrot_training - Step 26540: {'lr': 0.00046684166174704134, 'samples': 13588992, 'steps': 26540, 'loss/train': 1.4010323286056519} +03/04/2022 20:38:54 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/04/2022 20:38:59 - INFO - codeparrot_training - Step 26541: {'lr': 0.00046683902069248465, 'samples': 13589504, 'steps': 26541, 'loss/train': 0.9568489193916321} +03/04/2022 20:39:02 - INFO - codeparrot_training - Step 26542: {'lr': 0.0004668363795402237, 'samples': 13590016, 'steps': 26542, 'loss/train': 1.8775134086608887} +03/04/2022 20:39:02 - INFO - codeparrot_training - Skipping example with length 605 (seq_length=1024) +03/04/2022 20:39:08 - INFO - codeparrot_training - Step 26543: {'lr': 0.00046683373829025954, 'samples': 13590528, 'steps': 26543, 'loss/train': 1.5324357748031616} +03/04/2022 20:39:11 - INFO - codeparrot_training - Step 26544: {'lr': 0.0004668310969425935, 'samples': 13591040, 'steps': 26544, 'loss/train': 1.8269140720367432} +03/04/2022 20:39:11 - INFO - codeparrot_training - Skipping example with length 514 (seq_length=1024) +03/04/2022 20:39:16 - INFO - codeparrot_training - Step 26545: {'lr': 0.00046682845549722677, 'samples': 13591552, 'steps': 26545, 'loss/train': 1.9655989408493042} +03/04/2022 20:39:19 - INFO - codeparrot_training - Step 26546: {'lr': 0.0004668258139541604, 'samples': 13592064, 'steps': 26546, 'loss/train': 2.426006555557251} +03/04/2022 20:39:19 - INFO - codeparrot_training - Skipping example with length 739 (seq_length=1024) +03/04/2022 20:39:24 - INFO - codeparrot_training - Step 26547: {'lr': 0.00046682317231339565, 'samples': 13592576, 'steps': 26547, 'loss/train': 1.4301517009735107} +03/04/2022 20:39:27 - INFO - codeparrot_training - Step 26548: {'lr': 0.00046682053057493377, 'samples': 13593088, 'steps': 26548, 'loss/train': 1.7316124439239502} +03/04/2022 20:39:28 - INFO - codeparrot_training - Skipping example with length 64 (seq_length=1024) +03/04/2022 20:39:33 - INFO - codeparrot_training - Step 26549: {'lr': 0.00046681788873877595, 'samples': 13593600, 'steps': 26549, 'loss/train': 2.3400120735168457} +03/04/2022 20:39:36 - INFO - codeparrot_training - Step 26550: {'lr': 0.00046681524680492327, 'samples': 13594112, 'steps': 26550, 'loss/train': 1.68581223487854} +03/04/2022 20:39:36 - INFO - codeparrot_training - Skipping example with length 1011 (seq_length=1024) +03/04/2022 20:39:41 - INFO - codeparrot_training - Step 26551: {'lr': 0.00046681260477337693, 'samples': 13594624, 'steps': 26551, 'loss/train': 1.6424542665481567} +03/04/2022 20:39:44 - INFO - codeparrot_training - Step 26552: {'lr': 0.0004668099626441383, 'samples': 13595136, 'steps': 26552, 'loss/train': 1.9305063486099243} +03/04/2022 20:39:45 - INFO - codeparrot_training - Skipping example with length 628 (seq_length=1024) +03/04/2022 20:39:50 - INFO - codeparrot_training - Step 26553: {'lr': 0.00046680732041720836, 'samples': 13595648, 'steps': 26553, 'loss/train': 1.9807507991790771} +03/04/2022 20:39:53 - INFO - codeparrot_training - Step 26554: {'lr': 0.0004668046780925884, 'samples': 13596160, 'steps': 26554, 'loss/train': 2.6153101921081543} +03/04/2022 20:39:53 - INFO - codeparrot_training - Skipping example with length 807 (seq_length=1024) +03/04/2022 20:39:58 - INFO - codeparrot_training - Step 26555: {'lr': 0.0004668020356702796, 'samples': 13596672, 'steps': 26555, 'loss/train': 3.038522481918335} +03/04/2022 20:40:01 - INFO - codeparrot_training - Step 26556: {'lr': 0.0004667993931502832, 'samples': 13597184, 'steps': 26556, 'loss/train': 1.7892401218414307} +03/04/2022 20:40:02 - INFO - codeparrot_training - Skipping example with length 290 (seq_length=1024) +03/04/2022 20:40:07 - INFO - codeparrot_training - Step 26557: {'lr': 0.00046679675053260027, 'samples': 13597696, 'steps': 26557, 'loss/train': 2.2638485431671143} +03/04/2022 20:40:10 - INFO - codeparrot_training - Step 26558: {'lr': 0.00046679410781723206, 'samples': 13598208, 'steps': 26558, 'loss/train': 1.8912955522537231} +03/04/2022 20:40:11 - INFO - codeparrot_training - Skipping example with length 560 (seq_length=1024) +03/04/2022 20:40:15 - INFO - codeparrot_training - Step 26559: {'lr': 0.0004667914650041799, 'samples': 13598720, 'steps': 26559, 'loss/train': 2.029426097869873} +03/04/2022 20:40:18 - INFO - codeparrot_training - Step 26560: {'lr': 0.00046678882209344474, 'samples': 13599232, 'steps': 26560, 'loss/train': 2.237112045288086} +03/04/2022 20:40:19 - INFO - codeparrot_training - Skipping example with length 838 (seq_length=1024) +03/04/2022 20:40:24 - INFO - codeparrot_training - Step 26561: {'lr': 0.00046678617908502785, 'samples': 13599744, 'steps': 26561, 'loss/train': 2.0326857566833496} +03/04/2022 20:40:27 - INFO - codeparrot_training - Step 26562: {'lr': 0.00046678353597893053, 'samples': 13600256, 'steps': 26562, 'loss/train': 1.894349455833435} +03/04/2022 20:40:27 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/04/2022 20:40:32 - INFO - codeparrot_training - Step 26563: {'lr': 0.0004667808927751539, 'samples': 13600768, 'steps': 26563, 'loss/train': 1.8986589908599854} +03/04/2022 20:40:35 - INFO - codeparrot_training - Step 26564: {'lr': 0.00046677824947369907, 'samples': 13601280, 'steps': 26564, 'loss/train': 1.5348010063171387} +03/04/2022 20:40:36 - INFO - codeparrot_training - Skipping example with length 161 (seq_length=1024) +03/04/2022 20:40:41 - INFO - codeparrot_training - Step 26565: {'lr': 0.0004667756060745674, 'samples': 13601792, 'steps': 26565, 'loss/train': 1.9123196601867676} +03/04/2022 20:40:44 - INFO - codeparrot_training - Step 26566: {'lr': 0.0004667729625777599, 'samples': 13602304, 'steps': 26566, 'loss/train': 2.6009438037872314} +03/04/2022 20:40:45 - INFO - codeparrot_training - Skipping example with length 58 (seq_length=1024) +03/04/2022 20:40:49 - INFO - codeparrot_training - Step 26567: {'lr': 0.0004667703189832779, 'samples': 13602816, 'steps': 26567, 'loss/train': 0.730574369430542} +03/04/2022 20:40:52 - INFO - codeparrot_training - Step 26568: {'lr': 0.00046676767529112254, 'samples': 13603328, 'steps': 26568, 'loss/train': 1.001646637916565} +03/04/2022 20:40:53 - INFO - codeparrot_training - Skipping example with length 729 (seq_length=1024) +03/04/2022 20:40:58 - INFO - codeparrot_training - Step 26569: {'lr': 0.000466765031501295, 'samples': 13603840, 'steps': 26569, 'loss/train': 1.1095993518829346} +03/04/2022 20:41:01 - INFO - codeparrot_training - Step 26570: {'lr': 0.0004667623876137965, 'samples': 13604352, 'steps': 26570, 'loss/train': 1.927314281463623} +03/04/2022 20:41:01 - INFO - codeparrot_training - Skipping example with length 394 (seq_length=1024) +03/04/2022 20:41:07 - INFO - codeparrot_training - Step 26571: {'lr': 0.00046675974362862815, 'samples': 13604864, 'steps': 26571, 'loss/train': 2.3595004081726074} +03/04/2022 20:41:10 - INFO - codeparrot_training - Step 26572: {'lr': 0.00046675709954579125, 'samples': 13605376, 'steps': 26572, 'loss/train': 1.8578994274139404} +03/04/2022 20:41:12 - INFO - codeparrot_training - Skipping example with length 448 (seq_length=1024) +03/04/2022 20:41:15 - INFO - codeparrot_training - Step 26573: {'lr': 0.0004667544553652869, 'samples': 13605888, 'steps': 26573, 'loss/train': 1.9144315719604492} +03/04/2022 20:41:18 - INFO - codeparrot_training - Step 26574: {'lr': 0.0004667518110871164, 'samples': 13606400, 'steps': 26574, 'loss/train': 2.3605785369873047} +03/04/2022 20:41:22 - INFO - codeparrot_training - Step 26575: {'lr': 0.0004667491667112809, 'samples': 13606912, 'steps': 26575, 'loss/train': 3.4777581691741943} +03/04/2022 20:41:22 - INFO - codeparrot_training - Skipping example with length 423 (seq_length=1024) +03/04/2022 20:41:27 - INFO - codeparrot_training - Step 26576: {'lr': 0.0004667465222377815, 'samples': 13607424, 'steps': 26576, 'loss/train': 2.103983163833618} +03/04/2022 20:41:30 - INFO - codeparrot_training - Skipping example with length 221 (seq_length=1024) +03/04/2022 20:41:32 - INFO - codeparrot_training - Step 26577: {'lr': 0.0004667438776666195, 'samples': 13607936, 'steps': 26577, 'loss/train': 1.3853412866592407} +03/04/2022 20:41:36 - INFO - codeparrot_training - Step 26578: {'lr': 0.00046674123299779603, 'samples': 13608448, 'steps': 26578, 'loss/train': 1.5102964639663696} +03/04/2022 20:41:38 - INFO - codeparrot_training - Skipping example with length 945 (seq_length=1024) +03/04/2022 20:41:41 - INFO - codeparrot_training - Step 26579: {'lr': 0.0004667385882313123, 'samples': 13608960, 'steps': 26579, 'loss/train': 1.960842490196228} +03/04/2022 20:41:44 - INFO - codeparrot_training - Step 26580: {'lr': 0.0004667359433671695, 'samples': 13609472, 'steps': 26580, 'loss/train': 2.1305618286132812} +03/04/2022 20:41:46 - INFO - codeparrot_training - Skipping example with length 327 (seq_length=1024) +03/04/2022 20:41:49 - INFO - codeparrot_training - Step 26581: {'lr': 0.0004667332984053689, 'samples': 13609984, 'steps': 26581, 'loss/train': 1.7484407424926758} +03/04/2022 20:41:52 - INFO - codeparrot_training - Step 26582: {'lr': 0.00046673065334591155, 'samples': 13610496, 'steps': 26582, 'loss/train': 1.334031105041504} +03/04/2022 20:41:55 - INFO - codeparrot_training - Skipping example with length 599 (seq_length=1024) +03/04/2022 20:41:58 - INFO - codeparrot_training - Step 26583: {'lr': 0.00046672800818879873, 'samples': 13611008, 'steps': 26583, 'loss/train': 1.5804671049118042} +03/04/2022 20:42:01 - INFO - codeparrot_training - Step 26584: {'lr': 0.0004667253629340316, 'samples': 13611520, 'steps': 26584, 'loss/train': 1.7392085790634155} +03/04/2022 20:42:03 - INFO - codeparrot_training - Skipping example with length 333 (seq_length=1024) +03/04/2022 20:42:06 - INFO - codeparrot_training - Step 26585: {'lr': 0.0004667227175816114, 'samples': 13612032, 'steps': 26585, 'loss/train': 1.8505655527114868} +03/04/2022 20:42:09 - INFO - codeparrot_training - Step 26586: {'lr': 0.0004667200721315393, 'samples': 13612544, 'steps': 26586, 'loss/train': 2.9690558910369873} +03/04/2022 20:42:11 - INFO - codeparrot_training - Skipping example with length 644 (seq_length=1024) +03/04/2022 20:42:14 - INFO - codeparrot_training - Step 26587: {'lr': 0.00046671742658381646, 'samples': 13613056, 'steps': 26587, 'loss/train': 1.5246477127075195} +03/04/2022 20:42:18 - INFO - codeparrot_training - Step 26588: {'lr': 0.000466714780938444, 'samples': 13613568, 'steps': 26588, 'loss/train': 0.9926356673240662} +03/04/2022 20:42:20 - INFO - codeparrot_training - Skipping example with length 728 (seq_length=1024) +03/04/2022 20:42:23 - INFO - codeparrot_training - Step 26589: {'lr': 0.0004667121351954233, 'samples': 13614080, 'steps': 26589, 'loss/train': 2.129506826400757} +03/04/2022 20:42:26 - INFO - codeparrot_training - Step 26590: {'lr': 0.00046670948935475544, 'samples': 13614592, 'steps': 26590, 'loss/train': 3.4915058612823486} +03/04/2022 20:42:30 - INFO - codeparrot_training - Step 26591: {'lr': 0.00046670684341644167, 'samples': 13615104, 'steps': 26591, 'loss/train': 0.5684828758239746} +03/04/2022 20:42:30 - INFO - codeparrot_training - Skipping example with length 444 (seq_length=1024) +03/04/2022 20:42:35 - INFO - codeparrot_training - Step 26592: {'lr': 0.0004667041973804831, 'samples': 13615616, 'steps': 26592, 'loss/train': 1.9176748991012573} +03/04/2022 20:42:38 - INFO - codeparrot_training - Step 26593: {'lr': 0.00046670155124688096, 'samples': 13616128, 'steps': 26593, 'loss/train': 1.37889564037323} +03/04/2022 20:42:38 - INFO - codeparrot_training - Skipping example with length 226 (seq_length=1024) +03/04/2022 20:42:43 - INFO - codeparrot_training - Step 26594: {'lr': 0.00046669890501563636, 'samples': 13616640, 'steps': 26594, 'loss/train': 1.6936659812927246} +03/04/2022 20:42:47 - INFO - codeparrot_training - Step 26595: {'lr': 0.0004666962586867507, 'samples': 13617152, 'steps': 26595, 'loss/train': 2.0882515907287598} +03/04/2022 20:42:47 - INFO - codeparrot_training - Skipping example with length 178 (seq_length=1024) +03/04/2022 20:42:52 - INFO - codeparrot_training - Step 26596: {'lr': 0.000466693612260225, 'samples': 13617664, 'steps': 26596, 'loss/train': 1.3531520366668701} +03/04/2022 20:42:55 - INFO - codeparrot_training - Skipping example with length 816 (seq_length=1024) +03/04/2022 20:42:57 - INFO - codeparrot_training - Step 26597: {'lr': 0.00046669096573606053, 'samples': 13618176, 'steps': 26597, 'loss/train': 1.4609912633895874} +03/04/2022 20:43:01 - INFO - codeparrot_training - Step 26598: {'lr': 0.00046668831911425844, 'samples': 13618688, 'steps': 26598, 'loss/train': 1.0594253540039062} +03/04/2022 20:43:03 - INFO - codeparrot_training - Skipping example with length 408 (seq_length=1024) +03/04/2022 20:43:06 - INFO - codeparrot_training - Step 26599: {'lr': 0.00046668567239481994, 'samples': 13619200, 'steps': 26599, 'loss/train': 1.6262317895889282} +03/04/2022 20:43:09 - INFO - codeparrot_training - Step 26600: {'lr': 0.0004666830255777462, 'samples': 13619712, 'steps': 26600, 'loss/train': 6.550108432769775} +03/04/2022 20:43:12 - INFO - codeparrot_training - Step 26601: {'lr': 0.00046668037866303845, 'samples': 13620224, 'steps': 26601, 'loss/train': 2.1563990116119385} +03/04/2022 20:43:13 - INFO - codeparrot_training - Skipping example with length 461 (seq_length=1024) +03/04/2022 20:43:17 - INFO - codeparrot_training - Step 26602: {'lr': 0.0004666777316506979, 'samples': 13620736, 'steps': 26602, 'loss/train': 1.7423055171966553} +03/04/2022 20:43:21 - INFO - codeparrot_training - Step 26603: {'lr': 0.00046667508454072566, 'samples': 13621248, 'steps': 26603, 'loss/train': 1.8265783786773682} +03/04/2022 20:43:21 - INFO - codeparrot_training - Skipping example with length 563 (seq_length=1024) +03/04/2022 20:43:26 - INFO - codeparrot_training - Step 26604: {'lr': 0.00046667243733312296, 'samples': 13621760, 'steps': 26604, 'loss/train': 1.8406914472579956} +03/04/2022 20:43:29 - INFO - codeparrot_training - Step 26605: {'lr': 0.000466669790027891, 'samples': 13622272, 'steps': 26605, 'loss/train': 2.4394304752349854} +03/04/2022 20:43:30 - INFO - codeparrot_training - Skipping example with length 64 (seq_length=1024) +03/04/2022 20:43:34 - INFO - codeparrot_training - Step 26606: {'lr': 0.00046666714262503107, 'samples': 13622784, 'steps': 26606, 'loss/train': 1.874596118927002} +03/04/2022 20:43:38 - INFO - codeparrot_training - Step 26607: {'lr': 0.00046666449512454416, 'samples': 13623296, 'steps': 26607, 'loss/train': 1.972298264503479} +03/04/2022 20:43:38 - INFO - codeparrot_training - Skipping example with length 1000 (seq_length=1024) +03/04/2022 20:43:43 - INFO - codeparrot_training - Step 26608: {'lr': 0.0004666618475264316, 'samples': 13623808, 'steps': 26608, 'loss/train': 2.3511786460876465} +03/04/2022 20:43:46 - INFO - codeparrot_training - Step 26609: {'lr': 0.0004666591998306946, 'samples': 13624320, 'steps': 26609, 'loss/train': 1.5270026922225952} +03/04/2022 20:43:47 - INFO - codeparrot_training - Skipping example with length 1001 (seq_length=1024) +03/04/2022 20:43:51 - INFO - codeparrot_training - Step 26610: {'lr': 0.0004666565520373343, 'samples': 13624832, 'steps': 26610, 'loss/train': 1.962636947631836} +03/04/2022 20:43:55 - INFO - codeparrot_training - Step 26611: {'lr': 0.00046665390414635184, 'samples': 13625344, 'steps': 26611, 'loss/train': 1.75454580783844} +03/04/2022 20:43:56 - INFO - codeparrot_training - Skipping example with length 219 (seq_length=1024) +03/04/2022 20:44:00 - INFO - codeparrot_training - Step 26612: {'lr': 0.0004666512561577485, 'samples': 13625856, 'steps': 26612, 'loss/train': 2.0201120376586914} +03/04/2022 20:44:03 - INFO - codeparrot_training - Step 26613: {'lr': 0.0004666486080715255, 'samples': 13626368, 'steps': 26613, 'loss/train': 1.7885310649871826} +03/04/2022 20:44:04 - INFO - codeparrot_training - Skipping example with length 474 (seq_length=1024) +03/04/2022 20:44:08 - INFO - codeparrot_training - Step 26614: {'lr': 0.0004666459598876839, 'samples': 13626880, 'steps': 26614, 'loss/train': 1.5661311149597168} +03/04/2022 20:44:12 - INFO - codeparrot_training - Step 26615: {'lr': 0.000466643311606225, 'samples': 13627392, 'steps': 26615, 'loss/train': 1.7631574869155884} +03/04/2022 20:44:13 - INFO - codeparrot_training - Skipping example with length 153 (seq_length=1024) +03/04/2022 20:44:17 - INFO - codeparrot_training - Step 26616: {'lr': 0.00046664066322715006, 'samples': 13627904, 'steps': 26616, 'loss/train': 1.9867814779281616} +03/04/2022 20:44:20 - INFO - codeparrot_training - Step 26617: {'lr': 0.00046663801475046004, 'samples': 13628416, 'steps': 26617, 'loss/train': 1.7163591384887695} +03/04/2022 20:44:21 - INFO - codeparrot_training - Skipping example with length 759 (seq_length=1024) +03/04/2022 20:44:25 - INFO - codeparrot_training - Step 26618: {'lr': 0.0004666353661761563, 'samples': 13628928, 'steps': 26618, 'loss/train': 1.7444401979446411} +03/04/2022 20:44:29 - INFO - codeparrot_training - Step 26619: {'lr': 0.0004666327175042401, 'samples': 13629440, 'steps': 26619, 'loss/train': 2.0086004734039307} +03/04/2022 20:44:30 - INFO - codeparrot_training - Skipping example with length 47 (seq_length=1024) +03/04/2022 20:44:34 - INFO - codeparrot_training - Step 26620: {'lr': 0.00046663006873471247, 'samples': 13629952, 'steps': 26620, 'loss/train': 1.8757693767547607} +03/04/2022 20:44:37 - INFO - codeparrot_training - Step 26621: {'lr': 0.00046662741986757463, 'samples': 13630464, 'steps': 26621, 'loss/train': 1.9343801736831665} +03/04/2022 20:44:38 - INFO - codeparrot_training - Skipping example with length 643 (seq_length=1024) +03/04/2022 20:44:42 - INFO - codeparrot_training - Step 26622: {'lr': 0.0004666247709028279, 'samples': 13630976, 'steps': 26622, 'loss/train': 2.0473289489746094} +03/04/2022 20:44:45 - INFO - codeparrot_training - Step 26623: {'lr': 0.00046662212184047334, 'samples': 13631488, 'steps': 26623, 'loss/train': 2.001615524291992} +03/04/2022 20:44:47 - INFO - codeparrot_training - Skipping example with length 846 (seq_length=1024) +03/04/2022 20:44:51 - INFO - codeparrot_training - Step 26624: {'lr': 0.0004666194726805122, 'samples': 13632000, 'steps': 26624, 'loss/train': 1.8757197856903076} +03/04/2022 20:44:54 - INFO - codeparrot_training - Step 26625: {'lr': 0.0004666168234229457, 'samples': 13632512, 'steps': 26625, 'loss/train': 1.5612101554870605} +03/04/2022 20:44:55 - INFO - codeparrot_training - Skipping example with length 242 (seq_length=1024) +03/04/2022 20:44:59 - INFO - codeparrot_training - Step 26626: {'lr': 0.000466614174067775, 'samples': 13633024, 'steps': 26626, 'loss/train': 2.1403236389160156} +03/04/2022 20:45:02 - INFO - codeparrot_training - Step 26627: {'lr': 0.00046661152461500126, 'samples': 13633536, 'steps': 26627, 'loss/train': 1.7003254890441895} +03/04/2022 20:45:03 - INFO - codeparrot_training - Skipping example with length 442 (seq_length=1024) +03/04/2022 20:45:08 - INFO - codeparrot_training - Step 26628: {'lr': 0.0004666088750646257, 'samples': 13634048, 'steps': 26628, 'loss/train': 1.1554498672485352} +03/04/2022 20:45:11 - INFO - codeparrot_training - Step 26629: {'lr': 0.0004666062254166496, 'samples': 13634560, 'steps': 26629, 'loss/train': 2.1569178104400635} +03/04/2022 20:45:12 - INFO - codeparrot_training - Skipping example with length 316 (seq_length=1024) +03/04/2022 20:45:16 - INFO - codeparrot_training - Step 26630: {'lr': 0.000466603575671074, 'samples': 13635072, 'steps': 26630, 'loss/train': 1.2858855724334717} +03/04/2022 20:45:19 - INFO - codeparrot_training - Step 26631: {'lr': 0.00046660092582790025, 'samples': 13635584, 'steps': 26631, 'loss/train': 2.555605888366699} +03/04/2022 20:45:20 - INFO - codeparrot_training - Skipping example with length 283 (seq_length=1024) +03/04/2022 20:45:25 - INFO - codeparrot_training - Step 26632: {'lr': 0.0004665982758871294, 'samples': 13636096, 'steps': 26632, 'loss/train': 1.899399995803833} +03/04/2022 20:45:28 - INFO - codeparrot_training - Step 26633: {'lr': 0.0004665956258487627, 'samples': 13636608, 'steps': 26633, 'loss/train': 0.2395668625831604} +03/04/2022 20:45:29 - INFO - codeparrot_training - Skipping example with length 890 (seq_length=1024) +03/04/2022 20:45:33 - INFO - codeparrot_training - Step 26634: {'lr': 0.0004665929757128014, 'samples': 13637120, 'steps': 26634, 'loss/train': 2.7183055877685547} +03/04/2022 20:45:36 - INFO - codeparrot_training - Step 26635: {'lr': 0.0004665903254792466, 'samples': 13637632, 'steps': 26635, 'loss/train': 2.1655919551849365} +03/04/2022 20:45:37 - INFO - codeparrot_training - Skipping example with length 319 (seq_length=1024) +03/04/2022 20:45:42 - INFO - codeparrot_training - Step 26636: {'lr': 0.0004665876751480996, 'samples': 13638144, 'steps': 26636, 'loss/train': 2.7645320892333984} +03/04/2022 20:45:45 - INFO - codeparrot_training - Step 26637: {'lr': 0.0004665850247193615, 'samples': 13638656, 'steps': 26637, 'loss/train': 1.8570046424865723} +03/04/2022 20:45:47 - INFO - codeparrot_training - Skipping example with length 336 (seq_length=1024) +03/04/2022 20:45:50 - INFO - codeparrot_training - Step 26638: {'lr': 0.0004665823741930335, 'samples': 13639168, 'steps': 26638, 'loss/train': 1.9662957191467285} +03/04/2022 20:45:53 - INFO - codeparrot_training - Step 26639: {'lr': 0.00046657972356911696, 'samples': 13639680, 'steps': 26639, 'loss/train': 0.148898184299469} +03/04/2022 20:45:56 - INFO - codeparrot_training - Skipping example with length 978 (seq_length=1024) +03/04/2022 20:45:59 - INFO - codeparrot_training - Step 26640: {'lr': 0.00046657707284761274, 'samples': 13640192, 'steps': 26640, 'loss/train': 1.6434271335601807} +03/04/2022 20:46:02 - INFO - codeparrot_training - Step 26641: {'lr': 0.0004665744220285224, 'samples': 13640704, 'steps': 26641, 'loss/train': 1.7147456407546997} +03/04/2022 20:46:05 - INFO - codeparrot_training - Step 26642: {'lr': 0.0004665717711118469, 'samples': 13641216, 'steps': 26642, 'loss/train': 2.2712020874023438} +03/04/2022 20:46:05 - INFO - codeparrot_training - Skipping example with length 791 (seq_length=1024) +03/04/2022 20:46:10 - INFO - codeparrot_training - Step 26643: {'lr': 0.00046656912009758743, 'samples': 13641728, 'steps': 26643, 'loss/train': 1.6994085311889648} +03/04/2022 20:46:13 - INFO - codeparrot_training - Step 26644: {'lr': 0.0004665664689857454, 'samples': 13642240, 'steps': 26644, 'loss/train': 2.0453977584838867} +03/04/2022 20:46:14 - INFO - codeparrot_training - Skipping example with length 162 (seq_length=1024) +03/04/2022 20:46:19 - INFO - codeparrot_training - Step 26645: {'lr': 0.00046656381777632173, 'samples': 13642752, 'steps': 26645, 'loss/train': 2.4222054481506348} +03/04/2022 20:46:22 - INFO - codeparrot_training - Step 26646: {'lr': 0.0004665611664693178, 'samples': 13643264, 'steps': 26646, 'loss/train': 0.8537673950195312} +03/04/2022 20:46:22 - INFO - codeparrot_training - Skipping example with length 617 (seq_length=1024) +03/04/2022 20:46:27 - INFO - codeparrot_training - Step 26647: {'lr': 0.0004665585150647348, 'samples': 13643776, 'steps': 26647, 'loss/train': 1.301190972328186} +03/04/2022 20:46:30 - INFO - codeparrot_training - Step 26648: {'lr': 0.0004665558635625738, 'samples': 13644288, 'steps': 26648, 'loss/train': 1.9126513004302979} +03/04/2022 20:46:31 - INFO - codeparrot_training - Skipping example with length 115 (seq_length=1024) +03/04/2022 20:46:36 - INFO - codeparrot_training - Step 26649: {'lr': 0.00046655321196283604, 'samples': 13644800, 'steps': 26649, 'loss/train': 1.8300268650054932} +03/04/2022 20:46:39 - INFO - codeparrot_training - Step 26650: {'lr': 0.00046655056026552287, 'samples': 13645312, 'steps': 26650, 'loss/train': 1.8785401582717896} +03/04/2022 20:46:39 - INFO - codeparrot_training - Skipping example with length 557 (seq_length=1024) +03/04/2022 20:46:44 - INFO - codeparrot_training - Step 26651: {'lr': 0.0004665479084706353, 'samples': 13645824, 'steps': 26651, 'loss/train': 1.4458609819412231} +03/04/2022 20:46:47 - INFO - codeparrot_training - Step 26652: {'lr': 0.00046654525657817457, 'samples': 13646336, 'steps': 26652, 'loss/train': 1.6842485666275024} +03/04/2022 20:46:48 - INFO - codeparrot_training - Skipping example with length 823 (seq_length=1024) +03/04/2022 20:46:52 - INFO - codeparrot_training - Step 26653: {'lr': 0.0004665426045881419, 'samples': 13646848, 'steps': 26653, 'loss/train': 0.761953592300415} +03/04/2022 20:46:56 - INFO - codeparrot_training - Step 26654: {'lr': 0.00046653995250053843, 'samples': 13647360, 'steps': 26654, 'loss/train': 2.232832670211792} +03/04/2022 20:46:56 - INFO - codeparrot_training - Skipping example with length 715 (seq_length=1024) +03/04/2022 20:47:01 - INFO - codeparrot_training - Step 26655: {'lr': 0.00046653730031536545, 'samples': 13647872, 'steps': 26655, 'loss/train': 1.6327837705612183} +03/04/2022 20:47:04 - INFO - codeparrot_training - Step 26656: {'lr': 0.0004665346480326241, 'samples': 13648384, 'steps': 26656, 'loss/train': 1.8122210502624512} +03/04/2022 20:47:05 - INFO - codeparrot_training - Skipping example with length 246 (seq_length=1024) +03/04/2022 20:47:09 - INFO - codeparrot_training - Step 26657: {'lr': 0.00046653199565231554, 'samples': 13648896, 'steps': 26657, 'loss/train': 2.063917875289917} +03/04/2022 20:47:12 - INFO - codeparrot_training - Step 26658: {'lr': 0.00046652934317444104, 'samples': 13649408, 'steps': 26658, 'loss/train': 1.4336347579956055} +03/04/2022 20:47:13 - INFO - codeparrot_training - Skipping example with length 845 (seq_length=1024) +03/04/2022 20:47:18 - INFO - codeparrot_training - Step 26659: {'lr': 0.00046652669059900174, 'samples': 13649920, 'steps': 26659, 'loss/train': 2.055600166320801} +03/04/2022 20:47:21 - INFO - codeparrot_training - Step 26660: {'lr': 0.0004665240379259989, 'samples': 13650432, 'steps': 26660, 'loss/train': 0.545746922492981} +03/04/2022 20:47:22 - INFO - codeparrot_training - Skipping example with length 92 (seq_length=1024) +03/04/2022 20:47:26 - INFO - codeparrot_training - Step 26661: {'lr': 0.00046652138515543366, 'samples': 13650944, 'steps': 26661, 'loss/train': 1.6737637519836426} +03/04/2022 20:47:29 - INFO - codeparrot_training - Step 26662: {'lr': 0.00046651873228730715, 'samples': 13651456, 'steps': 26662, 'loss/train': 1.779179334640503} +03/04/2022 20:47:30 - INFO - codeparrot_training - Skipping example with length 899 (seq_length=1024) +03/04/2022 20:47:35 - INFO - codeparrot_training - Step 26663: {'lr': 0.0004665160793216207, 'samples': 13651968, 'steps': 26663, 'loss/train': 1.7342503070831299} +03/04/2022 20:47:38 - INFO - codeparrot_training - Step 26664: {'lr': 0.00046651342625837544, 'samples': 13652480, 'steps': 26664, 'loss/train': 1.9248305559158325} +03/04/2022 20:47:41 - INFO - codeparrot_training - Skipping example with length 279 (seq_length=1024) +03/04/2022 20:47:44 - INFO - codeparrot_training - Step 26665: {'lr': 0.00046651077309757256, 'samples': 13652992, 'steps': 26665, 'loss/train': 2.652754783630371} +03/04/2022 20:47:47 - INFO - codeparrot_training - Step 26666: {'lr': 0.0004665081198392133, 'samples': 13653504, 'steps': 26666, 'loss/train': 2.037874221801758} +03/04/2022 20:47:49 - INFO - codeparrot_training - Skipping example with length 46 (seq_length=1024) +03/04/2022 20:47:52 - INFO - codeparrot_training - Step 26667: {'lr': 0.0004665054664832988, 'samples': 13654016, 'steps': 26667, 'loss/train': 1.7718651294708252} +03/04/2022 20:47:55 - INFO - codeparrot_training - Step 26668: {'lr': 0.00046650281302983024, 'samples': 13654528, 'steps': 26668, 'loss/train': 1.981916069984436} +03/04/2022 20:47:58 - INFO - codeparrot_training - Skipping example with length 1000 (seq_length=1024) +03/04/2022 20:48:00 - INFO - codeparrot_training - Step 26669: {'lr': 0.00046650015947880886, 'samples': 13655040, 'steps': 26669, 'loss/train': 1.6942044496536255} +03/04/2022 20:48:04 - INFO - codeparrot_training - Step 26670: {'lr': 0.00046649750583023595, 'samples': 13655552, 'steps': 26670, 'loss/train': 2.0728018283843994} +03/04/2022 20:48:07 - INFO - codeparrot_training - Step 26671: {'lr': 0.00046649485208411244, 'samples': 13656064, 'steps': 26671, 'loss/train': 1.7234209775924683} +03/04/2022 20:48:07 - INFO - codeparrot_training - Skipping example with length 706 (seq_length=1024) +03/04/2022 20:48:12 - INFO - codeparrot_training - Step 26672: {'lr': 0.00046649219824043984, 'samples': 13656576, 'steps': 26672, 'loss/train': 0.9366819262504578} +03/04/2022 20:48:15 - INFO - codeparrot_training - Skipping example with length 723 (seq_length=1024) +03/04/2022 20:48:17 - INFO - codeparrot_training - Step 26673: {'lr': 0.00046648954429921914, 'samples': 13657088, 'steps': 26673, 'loss/train': 1.784995436668396} +03/04/2022 20:48:21 - INFO - codeparrot_training - Step 26674: {'lr': 0.00046648689026045157, 'samples': 13657600, 'steps': 26674, 'loss/train': 1.9957302808761597} +03/04/2022 20:48:24 - INFO - codeparrot_training - Step 26675: {'lr': 0.0004664842361241384, 'samples': 13658112, 'steps': 26675, 'loss/train': 0.6632347106933594} +03/04/2022 20:48:24 - INFO - codeparrot_training - Skipping example with length 278 (seq_length=1024) +03/04/2022 20:48:29 - INFO - codeparrot_training - Step 26676: {'lr': 0.00046648158189028073, 'samples': 13658624, 'steps': 26676, 'loss/train': 1.3486677408218384} +03/04/2022 20:48:32 - INFO - codeparrot_training - Step 26677: {'lr': 0.0004664789275588798, 'samples': 13659136, 'steps': 26677, 'loss/train': 0.1966020166873932} +03/04/2022 20:48:32 - INFO - codeparrot_training - Skipping example with length 581 (seq_length=1024) +03/04/2022 20:48:38 - INFO - codeparrot_training - Step 26678: {'lr': 0.0004664762731299368, 'samples': 13659648, 'steps': 26678, 'loss/train': 1.8229998350143433} +03/04/2022 20:48:40 - INFO - codeparrot_training - Skipping example with length 480 (seq_length=1024) +03/04/2022 20:48:43 - INFO - codeparrot_training - Step 26679: {'lr': 0.00046647361860345293, 'samples': 13660160, 'steps': 26679, 'loss/train': 1.8731719255447388} +03/04/2022 20:48:46 - INFO - codeparrot_training - Step 26680: {'lr': 0.00046647096397942945, 'samples': 13660672, 'steps': 26680, 'loss/train': 1.4844691753387451} +03/04/2022 20:48:49 - INFO - codeparrot_training - Skipping example with length 718 (seq_length=1024) +03/04/2022 20:48:51 - INFO - codeparrot_training - Step 26681: {'lr': 0.0004664683092578674, 'samples': 13661184, 'steps': 26681, 'loss/train': 1.3100836277008057} +03/04/2022 20:48:55 - INFO - codeparrot_training - Step 26682: {'lr': 0.00046646565443876815, 'samples': 13661696, 'steps': 26682, 'loss/train': 1.40003502368927} +03/04/2022 20:48:57 - INFO - codeparrot_training - Skipping example with length 217 (seq_length=1024) +03/04/2022 20:49:00 - INFO - codeparrot_training - Step 26683: {'lr': 0.00046646299952213277, 'samples': 13662208, 'steps': 26683, 'loss/train': 1.8873366117477417} +03/04/2022 20:49:03 - INFO - codeparrot_training - Step 26684: {'lr': 0.00046646034450796255, 'samples': 13662720, 'steps': 26684, 'loss/train': 1.8637052774429321} +03/04/2022 20:49:06 - INFO - codeparrot_training - Skipping example with length 353 (seq_length=1024) +03/04/2022 20:49:08 - INFO - codeparrot_training - Step 26685: {'lr': 0.0004664576893962586, 'samples': 13663232, 'steps': 26685, 'loss/train': 1.2652688026428223} +03/04/2022 20:49:12 - INFO - codeparrot_training - Step 26686: {'lr': 0.0004664550341870222, 'samples': 13663744, 'steps': 26686, 'loss/train': 0.8165706396102905} +03/04/2022 20:49:14 - INFO - codeparrot_training - Skipping example with length 322 (seq_length=1024) +03/04/2022 20:49:17 - INFO - codeparrot_training - Step 26687: {'lr': 0.00046645237888025444, 'samples': 13664256, 'steps': 26687, 'loss/train': 1.6000189781188965} +03/04/2022 20:49:20 - INFO - codeparrot_training - Step 26688: {'lr': 0.0004664497234759566, 'samples': 13664768, 'steps': 26688, 'loss/train': 1.890013575553894} +03/04/2022 20:49:23 - INFO - codeparrot_training - Skipping example with length 112 (seq_length=1024) +03/04/2022 20:49:25 - INFO - codeparrot_training - Step 26689: {'lr': 0.00046644706797412984, 'samples': 13665280, 'steps': 26689, 'loss/train': 1.6908893585205078} +03/04/2022 20:49:29 - INFO - codeparrot_training - Step 26690: {'lr': 0.00046644441237477544, 'samples': 13665792, 'steps': 26690, 'loss/train': 2.1045820713043213} +03/04/2022 20:49:31 - INFO - codeparrot_training - Skipping example with length 455 (seq_length=1024) +03/04/2022 20:49:34 - INFO - codeparrot_training - Step 26691: {'lr': 0.00046644175667789444, 'samples': 13666304, 'steps': 26691, 'loss/train': 1.4937067031860352} +03/04/2022 20:49:37 - INFO - codeparrot_training - Step 26692: {'lr': 0.00046643910088348817, 'samples': 13666816, 'steps': 26692, 'loss/train': 1.9463579654693604} +03/04/2022 20:49:39 - INFO - codeparrot_training - Skipping example with length 100 (seq_length=1024) +03/04/2022 20:49:42 - INFO - codeparrot_training - Step 26693: {'lr': 0.0004664364449915578, 'samples': 13667328, 'steps': 26693, 'loss/train': 1.8870209455490112} +03/04/2022 20:49:45 - INFO - codeparrot_training - Step 26694: {'lr': 0.0004664337890021044, 'samples': 13667840, 'steps': 26694, 'loss/train': 1.4400068521499634} +03/04/2022 20:49:48 - INFO - codeparrot_training - Skipping example with length 905 (seq_length=1024) +03/04/2022 20:49:51 - INFO - codeparrot_training - Step 26695: {'lr': 0.0004664311329151294, 'samples': 13668352, 'steps': 26695, 'loss/train': 1.4525578022003174} +03/04/2022 20:49:54 - INFO - codeparrot_training - Step 26696: {'lr': 0.0004664284767306338, 'samples': 13668864, 'steps': 26696, 'loss/train': 0.8865824341773987} +03/04/2022 20:49:57 - INFO - codeparrot_training - Step 26697: {'lr': 0.0004664258204486189, 'samples': 13669376, 'steps': 26697, 'loss/train': 1.181544542312622} +03/04/2022 20:49:58 - INFO - codeparrot_training - Skipping example with length 757 (seq_length=1024) +03/04/2022 20:50:03 - INFO - codeparrot_training - Step 26698: {'lr': 0.0004664231640690859, 'samples': 13669888, 'steps': 26698, 'loss/train': 2.6543309688568115} +03/04/2022 20:50:06 - INFO - codeparrot_training - Step 26699: {'lr': 0.0004664205075920359, 'samples': 13670400, 'steps': 26699, 'loss/train': 0.8751461505889893} +03/04/2022 20:50:06 - INFO - codeparrot_training - Skipping example with length 534 (seq_length=1024) +03/04/2022 20:50:11 - INFO - codeparrot_training - Step 26700: {'lr': 0.0004664178510174702, 'samples': 13670912, 'steps': 26700, 'loss/train': 1.6041452884674072} +03/04/2022 20:50:14 - INFO - codeparrot_training - Step 26701: {'lr': 0.0004664151943453899, 'samples': 13671424, 'steps': 26701, 'loss/train': 1.0287470817565918} +03/04/2022 20:50:14 - INFO - codeparrot_training - Skipping example with length 446 (seq_length=1024) +03/04/2022 20:50:19 - INFO - codeparrot_training - Step 26702: {'lr': 0.0004664125375757963, 'samples': 13671936, 'steps': 26702, 'loss/train': 1.899570345878601} +03/04/2022 20:50:23 - INFO - codeparrot_training - Step 26703: {'lr': 0.00046640988070869053, 'samples': 13672448, 'steps': 26703, 'loss/train': 1.9523086547851562} +03/04/2022 20:50:23 - INFO - codeparrot_training - Skipping example with length 224 (seq_length=1024) +03/04/2022 20:50:28 - INFO - codeparrot_training - Step 26704: {'lr': 0.00046640722374407384, 'samples': 13672960, 'steps': 26704, 'loss/train': 2.289858341217041} +03/04/2022 20:50:31 - INFO - codeparrot_training - Step 26705: {'lr': 0.00046640456668194737, 'samples': 13673472, 'steps': 26705, 'loss/train': 2.0405712127685547} +03/04/2022 20:50:31 - INFO - codeparrot_training - Skipping example with length 957 (seq_length=1024) +03/04/2022 20:50:36 - INFO - codeparrot_training - Step 26706: {'lr': 0.0004664019095223123, 'samples': 13673984, 'steps': 26706, 'loss/train': 1.9924113750457764} +03/04/2022 20:50:39 - INFO - codeparrot_training - Step 26707: {'lr': 0.00046639925226517, 'samples': 13674496, 'steps': 26707, 'loss/train': 1.671492338180542} +03/04/2022 20:50:39 - INFO - codeparrot_training - Skipping example with length 866 (seq_length=1024) +03/04/2022 20:50:45 - INFO - codeparrot_training - Step 26708: {'lr': 0.0004663965949105214, 'samples': 13675008, 'steps': 26708, 'loss/train': 2.14054274559021} +03/04/2022 20:50:48 - INFO - codeparrot_training - Step 26709: {'lr': 0.0004663939374583679, 'samples': 13675520, 'steps': 26709, 'loss/train': 1.7124980688095093} +03/04/2022 20:50:48 - INFO - codeparrot_training - Skipping example with length 1000 (seq_length=1024) +03/04/2022 20:50:54 - INFO - codeparrot_training - Step 26710: {'lr': 0.00046639127990871055, 'samples': 13676032, 'steps': 26710, 'loss/train': 2.3323233127593994} +03/04/2022 20:50:57 - INFO - codeparrot_training - Step 26711: {'lr': 0.00046638862226155075, 'samples': 13676544, 'steps': 26711, 'loss/train': 5.955111503601074} +03/04/2022 20:51:00 - INFO - codeparrot_training - Step 26712: {'lr': 0.0004663859645168895, 'samples': 13677056, 'steps': 26712, 'loss/train': 2.474405527114868} +03/04/2022 20:51:00 - INFO - codeparrot_training - Skipping example with length 783 (seq_length=1024) +03/04/2022 20:51:05 - INFO - codeparrot_training - Step 26713: {'lr': 0.00046638330667472805, 'samples': 13677568, 'steps': 26713, 'loss/train': 1.805928111076355} +03/04/2022 20:51:08 - INFO - codeparrot_training - Step 26714: {'lr': 0.0004663806487350677, 'samples': 13678080, 'steps': 26714, 'loss/train': 6.692773818969727} +03/04/2022 20:51:09 - INFO - codeparrot_training - Skipping example with length 586 (seq_length=1024) +03/04/2022 20:51:14 - INFO - codeparrot_training - Step 26715: {'lr': 0.00046637799069790953, 'samples': 13678592, 'steps': 26715, 'loss/train': 2.2610812187194824} +03/04/2022 20:51:17 - INFO - codeparrot_training - Step 26716: {'lr': 0.00046637533256325476, 'samples': 13679104, 'steps': 26716, 'loss/train': 1.7981669902801514} +03/04/2022 20:51:18 - INFO - codeparrot_training - Skipping example with length 655 (seq_length=1024) +03/04/2022 20:51:22 - INFO - codeparrot_training - Step 26717: {'lr': 0.0004663726743311046, 'samples': 13679616, 'steps': 26717, 'loss/train': 6.598403453826904} +03/04/2022 20:51:25 - INFO - codeparrot_training - Step 26718: {'lr': 0.00046637001600146027, 'samples': 13680128, 'steps': 26718, 'loss/train': 1.973952293395996} +03/04/2022 20:51:27 - INFO - codeparrot_training - Skipping example with length 448 (seq_length=1024) +03/04/2022 20:51:31 - INFO - codeparrot_training - Step 26719: {'lr': 0.000466367357574323, 'samples': 13680640, 'steps': 26719, 'loss/train': 1.7913126945495605} +03/04/2022 20:51:34 - INFO - codeparrot_training - Step 26720: {'lr': 0.00046636469904969387, 'samples': 13681152, 'steps': 26720, 'loss/train': 1.40028715133667} +03/04/2022 20:51:35 - INFO - codeparrot_training - Skipping example with length 621 (seq_length=1024) +03/04/2022 20:51:39 - INFO - codeparrot_training - Step 26721: {'lr': 0.0004663620404275741, 'samples': 13681664, 'steps': 26721, 'loss/train': 1.4481658935546875} +03/04/2022 20:51:42 - INFO - codeparrot_training - Step 26722: {'lr': 0.00046635938170796505, 'samples': 13682176, 'steps': 26722, 'loss/train': 0.897899866104126} +03/04/2022 20:51:44 - INFO - codeparrot_training - Skipping example with length 272 (seq_length=1024) +03/04/2022 20:51:48 - INFO - codeparrot_training - Step 26723: {'lr': 0.00046635672289086774, 'samples': 13682688, 'steps': 26723, 'loss/train': 1.9261603355407715} +03/04/2022 20:51:51 - INFO - codeparrot_training - Step 26724: {'lr': 0.00046635406397628346, 'samples': 13683200, 'steps': 26724, 'loss/train': 2.3305139541625977} +03/04/2022 20:51:52 - INFO - codeparrot_training - Skipping example with length 398 (seq_length=1024) +03/04/2022 20:51:56 - INFO - codeparrot_training - Step 26725: {'lr': 0.00046635140496421336, 'samples': 13683712, 'steps': 26725, 'loss/train': 1.206812858581543} +03/04/2022 20:51:59 - INFO - codeparrot_training - Step 26726: {'lr': 0.0004663487458546586, 'samples': 13684224, 'steps': 26726, 'loss/train': 2.1753625869750977} +03/04/2022 20:52:02 - INFO - codeparrot_training - Skipping example with length 841 (seq_length=1024) +03/04/2022 20:52:05 - INFO - codeparrot_training - Step 26727: {'lr': 0.0004663460866476205, 'samples': 13684736, 'steps': 26727, 'loss/train': 2.2715835571289062} +03/04/2022 20:52:08 - INFO - codeparrot_training - Step 26728: {'lr': 0.00046634342734310023, 'samples': 13685248, 'steps': 26728, 'loss/train': 1.3536500930786133} +03/04/2022 20:52:10 - INFO - codeparrot_training - Skipping example with length 370 (seq_length=1024) +03/04/2022 20:52:13 - INFO - codeparrot_training - Step 26729: {'lr': 0.0004663407679410988, 'samples': 13685760, 'steps': 26729, 'loss/train': 2.0546627044677734} +03/04/2022 20:52:16 - INFO - codeparrot_training - Step 26730: {'lr': 0.0004663381084416177, 'samples': 13686272, 'steps': 26730, 'loss/train': 1.9673089981079102} +03/04/2022 20:52:19 - INFO - codeparrot_training - Skipping example with length 203 (seq_length=1024) +03/04/2022 20:52:22 - INFO - codeparrot_training - Step 26731: {'lr': 0.00046633544884465796, 'samples': 13686784, 'steps': 26731, 'loss/train': 1.8400992155075073} +03/04/2022 20:52:25 - INFO - codeparrot_training - Step 26732: {'lr': 0.0004663327891502208, 'samples': 13687296, 'steps': 26732, 'loss/train': 1.639454960823059} +03/04/2022 20:52:27 - INFO - codeparrot_training - Skipping example with length 656 (seq_length=1024) +03/04/2022 20:52:30 - INFO - codeparrot_training - Step 26733: {'lr': 0.0004663301293583073, 'samples': 13687808, 'steps': 26733, 'loss/train': 1.8162367343902588} +03/04/2022 20:52:33 - INFO - codeparrot_training - Step 26734: {'lr': 0.000466327469468919, 'samples': 13688320, 'steps': 26734, 'loss/train': 1.559457540512085} +03/04/2022 20:52:36 - INFO - codeparrot_training - Skipping example with length 538 (seq_length=1024) +03/04/2022 20:52:38 - INFO - codeparrot_training - Step 26735: {'lr': 0.0004663248094820567, 'samples': 13688832, 'steps': 26735, 'loss/train': 1.785496473312378} +03/04/2022 20:52:42 - INFO - codeparrot_training - Step 26736: {'lr': 0.00046632214939772187, 'samples': 13689344, 'steps': 26736, 'loss/train': 1.4931201934814453} +03/04/2022 20:52:44 - INFO - codeparrot_training - Skipping example with length 883 (seq_length=1024) +03/04/2022 20:52:47 - INFO - codeparrot_training - Step 26737: {'lr': 0.0004663194892159156, 'samples': 13689856, 'steps': 26737, 'loss/train': 1.0998592376708984} +03/04/2022 20:52:50 - INFO - codeparrot_training - Step 26738: {'lr': 0.0004663168289366391, 'samples': 13690368, 'steps': 26738, 'loss/train': 1.5991463661193848} +03/04/2022 20:52:53 - INFO - codeparrot_training - Skipping example with length 843 (seq_length=1024) +03/04/2022 20:52:56 - INFO - codeparrot_training - Step 26739: {'lr': 0.0004663141685598936, 'samples': 13690880, 'steps': 26739, 'loss/train': 1.5006264448165894} +03/04/2022 20:52:59 - INFO - codeparrot_training - Step 26740: {'lr': 0.00046631150808568026, 'samples': 13691392, 'steps': 26740, 'loss/train': 2.0278966426849365} +03/04/2022 20:53:02 - INFO - codeparrot_training - Step 26741: {'lr': 0.00046630884751400024, 'samples': 13691904, 'steps': 26741, 'loss/train': 2.2093558311462402} +03/04/2022 20:53:02 - INFO - codeparrot_training - Skipping example with length 414 (seq_length=1024) +03/04/2022 20:53:07 - INFO - codeparrot_training - Step 26742: {'lr': 0.0004663061868448548, 'samples': 13692416, 'steps': 26742, 'loss/train': 2.0519049167633057} +03/04/2022 20:53:10 - INFO - codeparrot_training - Skipping example with length 807 (seq_length=1024) +03/04/2022 20:53:13 - INFO - codeparrot_training - Step 26743: {'lr': 0.0004663035260782452, 'samples': 13692928, 'steps': 26743, 'loss/train': 1.7872018814086914} +03/04/2022 20:53:16 - INFO - codeparrot_training - Step 26744: {'lr': 0.0004663008652141726, 'samples': 13693440, 'steps': 26744, 'loss/train': 1.6538136005401611} +03/04/2022 20:53:19 - INFO - codeparrot_training - Step 26745: {'lr': 0.00046629820425263805, 'samples': 13693952, 'steps': 26745, 'loss/train': 2.3294997215270996} +03/04/2022 20:53:19 - INFO - codeparrot_training - Skipping example with length 468 (seq_length=1024) +03/04/2022 20:53:24 - INFO - codeparrot_training - Step 26746: {'lr': 0.00046629554319364293, 'samples': 13694464, 'steps': 26746, 'loss/train': 2.1605207920074463} +03/04/2022 20:53:27 - INFO - codeparrot_training - Skipping example with length 293 (seq_length=1024) +03/04/2022 20:53:30 - INFO - codeparrot_training - Step 26747: {'lr': 0.00046629288203718834, 'samples': 13694976, 'steps': 26747, 'loss/train': 1.1768008470535278} +03/04/2022 20:53:33 - INFO - codeparrot_training - Step 26748: {'lr': 0.00046629022078327557, 'samples': 13695488, 'steps': 26748, 'loss/train': 1.8717012405395508} +03/04/2022 20:53:36 - INFO - codeparrot_training - Step 26749: {'lr': 0.0004662875594319057, 'samples': 13696000, 'steps': 26749, 'loss/train': 1.884063482284546} +03/04/2022 20:53:36 - INFO - codeparrot_training - Skipping example with length 119 (seq_length=1024) +03/04/2022 20:53:41 - INFO - codeparrot_training - Step 26750: {'lr': 0.00046628489798308006, 'samples': 13696512, 'steps': 26750, 'loss/train': 2.0499110221862793} +03/04/2022 20:53:45 - INFO - codeparrot_training - Step 26751: {'lr': 0.0004662822364367997, 'samples': 13697024, 'steps': 26751, 'loss/train': 1.9151647090911865} +03/04/2022 20:53:45 - INFO - codeparrot_training - Skipping example with length 156 (seq_length=1024) +03/04/2022 20:53:50 - INFO - codeparrot_training - Step 26752: {'lr': 0.000466279574793066, 'samples': 13697536, 'steps': 26752, 'loss/train': 0.960995078086853} +03/04/2022 20:53:53 - INFO - codeparrot_training - Step 26753: {'lr': 0.00046627691305188004, 'samples': 13698048, 'steps': 26753, 'loss/train': 2.462904214859009} +03/04/2022 20:53:53 - INFO - codeparrot_training - Skipping example with length 798 (seq_length=1024) +03/04/2022 20:53:58 - INFO - codeparrot_training - Step 26754: {'lr': 0.00046627425121324294, 'samples': 13698560, 'steps': 26754, 'loss/train': 1.6544512510299683} +03/04/2022 20:54:02 - INFO - codeparrot_training - Step 26755: {'lr': 0.0004662715892771561, 'samples': 13699072, 'steps': 26755, 'loss/train': 0.49252554774284363} +03/04/2022 20:54:02 - INFO - codeparrot_training - Skipping example with length 343 (seq_length=1024) +03/04/2022 20:54:07 - INFO - codeparrot_training - Step 26756: {'lr': 0.0004662689272436206, 'samples': 13699584, 'steps': 26756, 'loss/train': 1.0155199766159058} +03/04/2022 20:54:10 - INFO - codeparrot_training - Step 26757: {'lr': 0.00046626626511263764, 'samples': 13700096, 'steps': 26757, 'loss/train': 2.544031858444214} +03/04/2022 20:54:10 - INFO - codeparrot_training - Skipping example with length 784 (seq_length=1024) +03/04/2022 20:54:15 - INFO - codeparrot_training - Step 26758: {'lr': 0.00046626360288420845, 'samples': 13700608, 'steps': 26758, 'loss/train': 0.5484464764595032} +03/04/2022 20:54:18 - INFO - codeparrot_training - Step 26759: {'lr': 0.00046626094055833426, 'samples': 13701120, 'steps': 26759, 'loss/train': 1.8377954959869385} +03/04/2022 20:54:18 - INFO - codeparrot_training - Skipping example with length 199 (seq_length=1024) +03/04/2022 20:54:24 - INFO - codeparrot_training - Step 26760: {'lr': 0.0004662582781350161, 'samples': 13701632, 'steps': 26760, 'loss/train': 1.4961811304092407} +03/04/2022 20:54:27 - INFO - codeparrot_training - Step 26761: {'lr': 0.00046625561561425543, 'samples': 13702144, 'steps': 26761, 'loss/train': 2.119016647338867} +03/04/2022 20:54:28 - INFO - codeparrot_training - Skipping example with length 515 (seq_length=1024) +03/04/2022 20:54:32 - INFO - codeparrot_training - Step 26762: {'lr': 0.00046625295299605323, 'samples': 13702656, 'steps': 26762, 'loss/train': 1.8316349983215332} +03/04/2022 20:54:35 - INFO - codeparrot_training - Step 26763: {'lr': 0.0004662502902804109, 'samples': 13703168, 'steps': 26763, 'loss/train': 1.5876240730285645} +03/04/2022 20:54:36 - INFO - codeparrot_training - Skipping example with length 343 (seq_length=1024) +03/04/2022 20:54:41 - INFO - codeparrot_training - Step 26764: {'lr': 0.0004662476274673294, 'samples': 13703680, 'steps': 26764, 'loss/train': 2.1906826496124268} +03/04/2022 20:54:44 - INFO - codeparrot_training - Step 26765: {'lr': 0.00046624496455681006, 'samples': 13704192, 'steps': 26765, 'loss/train': 1.8430958986282349} +03/04/2022 20:54:44 - INFO - codeparrot_training - Skipping example with length 243 (seq_length=1024) +03/04/2022 20:54:49 - INFO - codeparrot_training - Step 26766: {'lr': 0.00046624230154885415, 'samples': 13704704, 'steps': 26766, 'loss/train': 2.269381284713745} +03/04/2022 20:54:52 - INFO - codeparrot_training - Step 26767: {'lr': 0.0004662396384434627, 'samples': 13705216, 'steps': 26767, 'loss/train': 1.2148300409317017} +03/04/2022 20:54:53 - INFO - codeparrot_training - Skipping example with length 813 (seq_length=1024) +03/04/2022 20:54:57 - INFO - codeparrot_training - Step 26768: {'lr': 0.00046623697524063713, 'samples': 13705728, 'steps': 26768, 'loss/train': 1.8138720989227295} +03/04/2022 20:55:01 - INFO - codeparrot_training - Step 26769: {'lr': 0.00046623431194037847, 'samples': 13706240, 'steps': 26769, 'loss/train': 1.5771387815475464} +03/04/2022 20:55:01 - INFO - codeparrot_training - Skipping example with length 605 (seq_length=1024) +03/04/2022 20:55:06 - INFO - codeparrot_training - Step 26770: {'lr': 0.000466231648542688, 'samples': 13706752, 'steps': 26770, 'loss/train': 2.971315860748291} +03/04/2022 20:55:09 - INFO - codeparrot_training - Step 26771: {'lr': 0.0004662289850475668, 'samples': 13707264, 'steps': 26771, 'loss/train': 1.7636375427246094} +03/04/2022 20:55:10 - INFO - codeparrot_training - Skipping example with length 815 (seq_length=1024) +03/04/2022 20:55:14 - INFO - codeparrot_training - Step 26772: {'lr': 0.0004662263214550162, 'samples': 13707776, 'steps': 26772, 'loss/train': 1.9939988851547241} +03/04/2022 20:55:17 - INFO - codeparrot_training - Step 26773: {'lr': 0.00046622365776503735, 'samples': 13708288, 'steps': 26773, 'loss/train': 1.5807050466537476} +03/04/2022 20:55:18 - INFO - codeparrot_training - Skipping example with length 393 (seq_length=1024) +03/04/2022 20:55:23 - INFO - codeparrot_training - Step 26774: {'lr': 0.0004662209939776315, 'samples': 13708800, 'steps': 26774, 'loss/train': 1.9121589660644531} +03/04/2022 20:55:26 - INFO - codeparrot_training - Step 26775: {'lr': 0.0004662183300927997, 'samples': 13709312, 'steps': 26775, 'loss/train': 2.4105429649353027} +03/04/2022 20:55:27 - INFO - codeparrot_training - Skipping example with length 817 (seq_length=1024) +03/04/2022 20:55:32 - INFO - codeparrot_training - Step 26776: {'lr': 0.0004662156661105433, 'samples': 13709824, 'steps': 26776, 'loss/train': 1.602795958518982} +03/04/2022 20:55:35 - INFO - codeparrot_training - Step 26777: {'lr': 0.0004662130020308635, 'samples': 13710336, 'steps': 26777, 'loss/train': 1.6779029369354248} +03/04/2022 20:55:36 - INFO - codeparrot_training - Skipping example with length 559 (seq_length=1024) +03/04/2022 20:55:40 - INFO - codeparrot_training - Step 26778: {'lr': 0.00046621033785376146, 'samples': 13710848, 'steps': 26778, 'loss/train': 2.1358914375305176} +03/04/2022 20:55:43 - INFO - codeparrot_training - Step 26779: {'lr': 0.00046620767357923834, 'samples': 13711360, 'steps': 26779, 'loss/train': 1.6286842823028564} +03/04/2022 20:55:44 - INFO - codeparrot_training - Skipping example with length 55 (seq_length=1024) +03/04/2022 20:55:48 - INFO - codeparrot_training - Step 26780: {'lr': 0.0004662050092072954, 'samples': 13711872, 'steps': 26780, 'loss/train': 1.2755171060562134} +03/04/2022 20:55:51 - INFO - codeparrot_training - Step 26781: {'lr': 0.0004662023447379338, 'samples': 13712384, 'steps': 26781, 'loss/train': 0.8289638757705688} +03/04/2022 20:55:53 - INFO - codeparrot_training - Skipping example with length 909 (seq_length=1024) +03/04/2022 20:55:57 - INFO - codeparrot_training - Step 26782: {'lr': 0.0004661996801711548, 'samples': 13712896, 'steps': 26782, 'loss/train': 2.3932621479034424} +03/04/2022 20:56:00 - INFO - codeparrot_training - Step 26783: {'lr': 0.0004661970155069595, 'samples': 13713408, 'steps': 26783, 'loss/train': 1.3078304529190063} +03/04/2022 20:56:01 - INFO - codeparrot_training - Skipping example with length 835 (seq_length=1024) +03/04/2022 20:56:05 - INFO - codeparrot_training - Step 26784: {'lr': 0.00046619435074534923, 'samples': 13713920, 'steps': 26784, 'loss/train': 2.1332814693450928} +03/04/2022 20:56:08 - INFO - codeparrot_training - Step 26785: {'lr': 0.0004661916858863251, 'samples': 13714432, 'steps': 26785, 'loss/train': 1.0629059076309204} +03/04/2022 20:56:09 - INFO - codeparrot_training - Skipping example with length 592 (seq_length=1024) +03/04/2022 20:56:13 - INFO - codeparrot_training - Step 26786: {'lr': 0.00046618902092988824, 'samples': 13714944, 'steps': 26786, 'loss/train': 0.9151431918144226} +03/04/2022 20:56:17 - INFO - codeparrot_training - Step 26787: {'lr': 0.00046618635587604006, 'samples': 13715456, 'steps': 26787, 'loss/train': 1.870697021484375} +03/04/2022 20:56:17 - INFO - codeparrot_training - Skipping example with length 227 (seq_length=1024) +03/04/2022 20:56:22 - INFO - codeparrot_training - Step 26788: {'lr': 0.00046618369072478163, 'samples': 13715968, 'steps': 26788, 'loss/train': 1.8286612033843994} +03/04/2022 20:56:25 - INFO - codeparrot_training - Step 26789: {'lr': 0.0004661810254761141, 'samples': 13716480, 'steps': 26789, 'loss/train': 1.6959599256515503} +03/04/2022 20:56:25 - INFO - codeparrot_training - Skipping example with length 346 (seq_length=1024) +03/04/2022 20:56:31 - INFO - codeparrot_training - Step 26790: {'lr': 0.0004661783601300388, 'samples': 13716992, 'steps': 26790, 'loss/train': 1.1813629865646362} +03/04/2022 20:56:34 - INFO - codeparrot_training - Step 26791: {'lr': 0.00046617569468655686, 'samples': 13717504, 'steps': 26791, 'loss/train': 1.6389458179473877} +03/04/2022 20:56:37 - INFO - codeparrot_training - Step 26792: {'lr': 0.00046617302914566945, 'samples': 13718016, 'steps': 26792, 'loss/train': 2.364201784133911} +03/04/2022 20:56:38 - INFO - codeparrot_training - Skipping example with length 133 (seq_length=1024) +03/04/2022 20:56:43 - INFO - codeparrot_training - Step 26793: {'lr': 0.00046617036350737786, 'samples': 13718528, 'steps': 26793, 'loss/train': 1.9508531093597412} +03/04/2022 20:56:46 - INFO - codeparrot_training - Step 26794: {'lr': 0.0004661676977716832, 'samples': 13719040, 'steps': 26794, 'loss/train': 1.8392504453659058} +03/04/2022 20:56:46 - INFO - codeparrot_training - Skipping example with length 169 (seq_length=1024) +03/04/2022 20:56:51 - INFO - codeparrot_training - Step 26795: {'lr': 0.0004661650319385867, 'samples': 13719552, 'steps': 26795, 'loss/train': 1.9261102676391602} +03/04/2022 20:56:54 - INFO - codeparrot_training - Step 26796: {'lr': 0.0004661623660080896, 'samples': 13720064, 'steps': 26796, 'loss/train': 1.9575016498565674} +03/04/2022 20:56:55 - INFO - codeparrot_training - Skipping example with length 473 (seq_length=1024) +03/04/2022 20:56:59 - INFO - codeparrot_training - Step 26797: {'lr': 0.000466159699980193, 'samples': 13720576, 'steps': 26797, 'loss/train': 1.739269733428955} +03/04/2022 20:57:03 - INFO - codeparrot_training - Step 26798: {'lr': 0.0004661570338548983, 'samples': 13721088, 'steps': 26798, 'loss/train': 2.4262988567352295} +03/04/2022 20:57:03 - INFO - codeparrot_training - Skipping example with length 754 (seq_length=1024) +03/04/2022 20:57:08 - INFO - codeparrot_training - Step 26799: {'lr': 0.00046615436763220645, 'samples': 13721600, 'steps': 26799, 'loss/train': 2.1114871501922607} +03/04/2022 20:57:11 - INFO - codeparrot_training - Step 26800: {'lr': 0.0004661517013121189, 'samples': 13722112, 'steps': 26800, 'loss/train': 1.958483338356018} +03/04/2022 20:57:11 - INFO - codeparrot_training - Skipping example with length 406 (seq_length=1024) +03/04/2022 20:57:16 - INFO - codeparrot_training - Step 26801: {'lr': 0.00046614903489463667, 'samples': 13722624, 'steps': 26801, 'loss/train': 1.774544596672058} +03/04/2022 20:57:19 - INFO - codeparrot_training - Step 26802: {'lr': 0.000466146368379761, 'samples': 13723136, 'steps': 26802, 'loss/train': 2.366953134536743} +03/04/2022 20:57:19 - INFO - codeparrot_training - Skipping example with length 140 (seq_length=1024) +03/04/2022 20:57:25 - INFO - codeparrot_training - Step 26803: {'lr': 0.0004661437017674931, 'samples': 13723648, 'steps': 26803, 'loss/train': 1.7530736923217773} +03/04/2022 20:57:28 - INFO - codeparrot_training - Step 26804: {'lr': 0.00046614103505783423, 'samples': 13724160, 'steps': 26804, 'loss/train': 1.3962421417236328} +03/04/2022 20:57:28 - INFO - codeparrot_training - Skipping example with length 930 (seq_length=1024) +03/04/2022 20:57:33 - INFO - codeparrot_training - Step 26805: {'lr': 0.0004661383682507856, 'samples': 13724672, 'steps': 26805, 'loss/train': 1.9255813360214233} +03/04/2022 20:57:36 - INFO - codeparrot_training - Step 26806: {'lr': 0.00046613570134634825, 'samples': 13725184, 'steps': 26806, 'loss/train': 2.706526279449463} +03/04/2022 20:57:36 - INFO - codeparrot_training - Skipping example with length 765 (seq_length=1024) +03/04/2022 20:57:42 - INFO - codeparrot_training - Step 26807: {'lr': 0.00046613303434452346, 'samples': 13725696, 'steps': 26807, 'loss/train': 1.5704504251480103} +03/04/2022 20:57:45 - INFO - codeparrot_training - Step 26808: {'lr': 0.00046613036724531254, 'samples': 13726208, 'steps': 26808, 'loss/train': 2.387249708175659} +03/04/2022 20:57:46 - INFO - codeparrot_training - Skipping example with length 435 (seq_length=1024) +03/04/2022 20:57:50 - INFO - codeparrot_training - Step 26809: {'lr': 0.00046612770004871663, 'samples': 13726720, 'steps': 26809, 'loss/train': 2.4342329502105713} +03/04/2022 20:57:53 - INFO - codeparrot_training - Step 26810: {'lr': 0.00046612503275473687, 'samples': 13727232, 'steps': 26810, 'loss/train': 1.8359522819519043} +03/04/2022 20:57:54 - INFO - codeparrot_training - Skipping example with length 47 (seq_length=1024) +03/04/2022 20:57:59 - INFO - codeparrot_training - Step 26811: {'lr': 0.00046612236536337456, 'samples': 13727744, 'steps': 26811, 'loss/train': 2.5119807720184326} +03/04/2022 20:58:02 - INFO - codeparrot_training - Step 26812: {'lr': 0.00046611969787463083, 'samples': 13728256, 'steps': 26812, 'loss/train': 1.1894862651824951} +03/04/2022 20:58:03 - INFO - codeparrot_training - Skipping example with length 469 (seq_length=1024) +03/04/2022 20:58:07 - INFO - codeparrot_training - Step 26813: {'lr': 0.00046611703028850683, 'samples': 13728768, 'steps': 26813, 'loss/train': 2.3573427200317383} +03/04/2022 20:58:11 - INFO - codeparrot_training - Step 26814: {'lr': 0.00046611436260500386, 'samples': 13729280, 'steps': 26814, 'loss/train': 1.7114008665084839} +03/04/2022 20:58:12 - INFO - codeparrot_training - Skipping example with length 521 (seq_length=1024) +03/04/2022 20:58:16 - INFO - codeparrot_training - Step 26815: {'lr': 0.00046611169482412305, 'samples': 13729792, 'steps': 26815, 'loss/train': 1.8820774555206299} +03/04/2022 20:58:19 - INFO - codeparrot_training - Step 26816: {'lr': 0.00046610902694586576, 'samples': 13730304, 'steps': 26816, 'loss/train': 1.5594134330749512} +03/04/2022 20:58:21 - INFO - codeparrot_training - Skipping example with length 849 (seq_length=1024) +03/04/2022 20:58:24 - INFO - codeparrot_training - Step 26817: {'lr': 0.00046610635897023303, 'samples': 13730816, 'steps': 26817, 'loss/train': 1.8868751525878906} +03/04/2022 20:58:28 - INFO - codeparrot_training - Step 26818: {'lr': 0.0004661036908972261, 'samples': 13731328, 'steps': 26818, 'loss/train': 1.5075949430465698} +03/04/2022 20:58:29 - INFO - codeparrot_training - Skipping example with length 610 (seq_length=1024) +03/04/2022 20:58:33 - INFO - codeparrot_training - Step 26819: {'lr': 0.0004661010227268462, 'samples': 13731840, 'steps': 26819, 'loss/train': 0.6462875604629517} +03/04/2022 20:58:36 - INFO - codeparrot_training - Step 26820: {'lr': 0.0004660983544590944, 'samples': 13732352, 'steps': 26820, 'loss/train': 1.8443554639816284} +03/04/2022 20:58:37 - INFO - codeparrot_training - Skipping example with length 51 (seq_length=1024) +03/04/2022 20:58:41 - INFO - codeparrot_training - Step 26821: {'lr': 0.0004660956860939722, 'samples': 13732864, 'steps': 26821, 'loss/train': 1.1227312088012695} +03/04/2022 20:58:45 - INFO - codeparrot_training - Step 26822: {'lr': 0.0004660930176314805, 'samples': 13733376, 'steps': 26822, 'loss/train': 1.2645559310913086} +03/04/2022 20:58:46 - INFO - codeparrot_training - Skipping example with length 484 (seq_length=1024) +03/04/2022 20:58:50 - INFO - codeparrot_training - Step 26823: {'lr': 0.0004660903490716206, 'samples': 13733888, 'steps': 26823, 'loss/train': 1.5876251459121704} +03/04/2022 20:58:53 - INFO - codeparrot_training - Step 26824: {'lr': 0.0004660876804143938, 'samples': 13734400, 'steps': 26824, 'loss/train': 2.0230329036712646} +03/04/2022 20:58:55 - INFO - codeparrot_training - Skipping example with length 89 (seq_length=1024) +03/04/2022 20:58:58 - INFO - codeparrot_training - Step 26825: {'lr': 0.0004660850116598012, 'samples': 13734912, 'steps': 26825, 'loss/train': 1.7992655038833618} +03/04/2022 20:59:01 - INFO - codeparrot_training - Step 26826: {'lr': 0.00046608234280784406, 'samples': 13735424, 'steps': 26826, 'loss/train': 1.691169023513794} +03/04/2022 20:59:03 - INFO - codeparrot_training - Skipping example with length 847 (seq_length=1024) +03/04/2022 20:59:07 - INFO - codeparrot_training - Step 26827: {'lr': 0.0004660796738585235, 'samples': 13735936, 'steps': 26827, 'loss/train': 1.6293751001358032} +03/04/2022 20:59:10 - INFO - codeparrot_training - Step 26828: {'lr': 0.0004660770048118408, 'samples': 13736448, 'steps': 26828, 'loss/train': 1.0595265626907349} +03/04/2022 20:59:12 - INFO - codeparrot_training - Skipping example with length 918 (seq_length=1024) +03/04/2022 20:59:15 - INFO - codeparrot_training - Step 26829: {'lr': 0.00046607433566779713, 'samples': 13736960, 'steps': 26829, 'loss/train': 1.870438575744629} +03/04/2022 20:59:18 - INFO - codeparrot_training - Step 26830: {'lr': 0.00046607166642639365, 'samples': 13737472, 'steps': 26830, 'loss/train': 2.0925443172454834} +03/04/2022 20:59:20 - INFO - codeparrot_training - Skipping example with length 929 (seq_length=1024) +03/04/2022 20:59:24 - INFO - codeparrot_training - Step 26831: {'lr': 0.00046606899708763174, 'samples': 13737984, 'steps': 26831, 'loss/train': 2.2272346019744873} +03/04/2022 20:59:27 - INFO - codeparrot_training - Step 26832: {'lr': 0.0004660663276515124, 'samples': 13738496, 'steps': 26832, 'loss/train': 1.059781789779663} +03/04/2022 20:59:28 - INFO - codeparrot_training - Skipping example with length 315 (seq_length=1024) +03/04/2022 20:59:32 - INFO - codeparrot_training - Step 26833: {'lr': 0.00046606365811803686, 'samples': 13739008, 'steps': 26833, 'loss/train': 1.7172563076019287} +03/04/2022 20:59:35 - INFO - codeparrot_training - Step 26834: {'lr': 0.0004660609884872064, 'samples': 13739520, 'steps': 26834, 'loss/train': 1.5828953981399536} +03/04/2022 20:59:37 - INFO - codeparrot_training - Skipping example with length 472 (seq_length=1024) +03/04/2022 20:59:41 - INFO - codeparrot_training - Step 26835: {'lr': 0.00046605831875902215, 'samples': 13740032, 'steps': 26835, 'loss/train': 2.0359396934509277} +03/04/2022 20:59:44 - INFO - codeparrot_training - Step 26836: {'lr': 0.00046605564893348545, 'samples': 13740544, 'steps': 26836, 'loss/train': 3.368013381958008} +03/04/2022 20:59:48 - INFO - codeparrot_training - Step 26837: {'lr': 0.0004660529790105974, 'samples': 13741056, 'steps': 26837, 'loss/train': 2.1502716541290283} +03/04/2022 20:59:49 - INFO - codeparrot_training - Skipping example with length 839 (seq_length=1024) +03/04/2022 20:59:53 - INFO - codeparrot_training - Step 26838: {'lr': 0.00046605030899035915, 'samples': 13741568, 'steps': 26838, 'loss/train': 1.1533163785934448} +03/04/2022 20:59:56 - INFO - codeparrot_training - Step 26839: {'lr': 0.000466047638872772, 'samples': 13742080, 'steps': 26839, 'loss/train': 1.6251182556152344} +03/04/2022 20:59:57 - INFO - codeparrot_training - Skipping example with length 546 (seq_length=1024) +03/04/2022 21:00:01 - INFO - codeparrot_training - Step 26840: {'lr': 0.0004660449686578371, 'samples': 13742592, 'steps': 26840, 'loss/train': 1.8646130561828613} +03/04/2022 21:00:05 - INFO - codeparrot_training - Step 26841: {'lr': 0.0004660422983455557, 'samples': 13743104, 'steps': 26841, 'loss/train': 1.8938318490982056} +03/04/2022 21:00:06 - INFO - codeparrot_training - Skipping example with length 317 (seq_length=1024) +03/04/2022 21:00:10 - INFO - codeparrot_training - Step 26842: {'lr': 0.0004660396279359289, 'samples': 13743616, 'steps': 26842, 'loss/train': 1.8495512008666992} +03/04/2022 21:00:13 - INFO - codeparrot_training - Step 26843: {'lr': 0.000466036957428958, 'samples': 13744128, 'steps': 26843, 'loss/train': 1.705845832824707} +03/04/2022 21:00:15 - INFO - codeparrot_training - Skipping example with length 823 (seq_length=1024) +03/04/2022 21:00:18 - INFO - codeparrot_training - Step 26844: {'lr': 0.0004660342868246442, 'samples': 13744640, 'steps': 26844, 'loss/train': 1.7729588747024536} +03/04/2022 21:00:21 - INFO - codeparrot_training - Step 26845: {'lr': 0.0004660316161229887, 'samples': 13745152, 'steps': 26845, 'loss/train': 2.334062337875366} +03/04/2022 21:00:23 - INFO - codeparrot_training - Skipping example with length 789 (seq_length=1024) +03/04/2022 21:00:27 - INFO - codeparrot_training - Step 26846: {'lr': 0.00046602894532399275, 'samples': 13745664, 'steps': 26846, 'loss/train': 2.180962324142456} +03/04/2022 21:00:30 - INFO - codeparrot_training - Step 26847: {'lr': 0.00046602627442765744, 'samples': 13746176, 'steps': 26847, 'loss/train': 1.7380377054214478} +03/04/2022 21:00:31 - INFO - codeparrot_training - Skipping example with length 580 (seq_length=1024) +03/04/2022 21:00:35 - INFO - codeparrot_training - Step 26848: {'lr': 0.00046602360343398397, 'samples': 13746688, 'steps': 26848, 'loss/train': 1.6574586629867554} +03/04/2022 21:00:38 - INFO - codeparrot_training - Step 26849: {'lr': 0.0004660209323429736, 'samples': 13747200, 'steps': 26849, 'loss/train': 1.5669090747833252} +03/04/2022 21:00:40 - INFO - codeparrot_training - Skipping example with length 828 (seq_length=1024) +03/04/2022 21:00:44 - INFO - codeparrot_training - Step 26850: {'lr': 0.0004660182611546276, 'samples': 13747712, 'steps': 26850, 'loss/train': 1.1924272775650024} +03/04/2022 21:00:47 - INFO - codeparrot_training - Step 26851: {'lr': 0.0004660155898689471, 'samples': 13748224, 'steps': 26851, 'loss/train': 1.6120572090148926} +03/04/2022 21:00:49 - INFO - codeparrot_training - Skipping example with length 460 (seq_length=1024) +03/04/2022 21:00:52 - INFO - codeparrot_training - Step 26852: {'lr': 0.0004660129184859332, 'samples': 13748736, 'steps': 26852, 'loss/train': 1.6276812553405762} +03/04/2022 21:00:55 - INFO - codeparrot_training - Step 26853: {'lr': 0.00046601024700558736, 'samples': 13749248, 'steps': 26853, 'loss/train': 2.6847753524780273} +03/04/2022 21:00:57 - INFO - codeparrot_training - Skipping example with length 972 (seq_length=1024) +03/04/2022 21:01:01 - INFO - codeparrot_training - Step 26854: {'lr': 0.0004660075754279105, 'samples': 13749760, 'steps': 26854, 'loss/train': 1.4832788705825806} +03/04/2022 21:01:04 - INFO - codeparrot_training - Step 26855: {'lr': 0.00046600490375290406, 'samples': 13750272, 'steps': 26855, 'loss/train': 1.7771449089050293} +03/04/2022 21:01:06 - INFO - codeparrot_training - Skipping example with length 323 (seq_length=1024) +03/04/2022 21:01:09 - INFO - codeparrot_training - Step 26856: {'lr': 0.0004660022319805691, 'samples': 13750784, 'steps': 26856, 'loss/train': 1.7487356662750244} +03/04/2022 21:01:12 - INFO - codeparrot_training - Step 26857: {'lr': 0.0004659995601109069, 'samples': 13751296, 'steps': 26857, 'loss/train': 1.9064791202545166} +03/04/2022 21:01:14 - INFO - codeparrot_training - Skipping example with length 1022 (seq_length=1024) +03/04/2022 21:01:18 - INFO - codeparrot_training - Step 26858: {'lr': 0.0004659968881439186, 'samples': 13751808, 'steps': 26858, 'loss/train': 2.022096872329712} +03/04/2022 21:01:21 - INFO - codeparrot_training - Step 26859: {'lr': 0.00046599421607960545, 'samples': 13752320, 'steps': 26859, 'loss/train': 1.6218641996383667} +03/04/2022 21:01:23 - INFO - codeparrot_training - Skipping example with length 556 (seq_length=1024) +03/04/2022 21:01:26 - INFO - codeparrot_training - Step 26860: {'lr': 0.0004659915439179686, 'samples': 13752832, 'steps': 26860, 'loss/train': 1.7284876108169556} +03/04/2022 21:01:29 - INFO - codeparrot_training - Step 26861: {'lr': 0.0004659888716590094, 'samples': 13753344, 'steps': 26861, 'loss/train': 2.1894519329071045} +03/04/2022 21:01:31 - INFO - codeparrot_training - Skipping example with length 575 (seq_length=1024) +03/04/2022 21:01:34 - INFO - codeparrot_training - Step 26862: {'lr': 0.00046598619930272883, 'samples': 13753856, 'steps': 26862, 'loss/train': 2.325878858566284} +03/04/2022 21:01:38 - INFO - codeparrot_training - Step 26863: {'lr': 0.00046598352684912824, 'samples': 13754368, 'steps': 26863, 'loss/train': 2.099714994430542} +03/04/2022 21:01:40 - INFO - codeparrot_training - Skipping example with length 503 (seq_length=1024) +03/04/2022 21:01:43 - INFO - codeparrot_training - Step 26864: {'lr': 0.0004659808542982088, 'samples': 13754880, 'steps': 26864, 'loss/train': 2.2697854042053223} +03/04/2022 21:01:46 - INFO - codeparrot_training - Step 26865: {'lr': 0.0004659781816499718, 'samples': 13755392, 'steps': 26865, 'loss/train': 1.641038417816162} +03/04/2022 21:01:48 - INFO - codeparrot_training - Skipping example with length 372 (seq_length=1024) +03/04/2022 21:01:51 - INFO - codeparrot_training - Step 26866: {'lr': 0.0004659755089044183, 'samples': 13755904, 'steps': 26866, 'loss/train': 1.6898154020309448} +03/04/2022 21:01:54 - INFO - codeparrot_training - Step 26867: {'lr': 0.00046597283606154957, 'samples': 13756416, 'steps': 26867, 'loss/train': 1.8515864610671997} +03/04/2022 21:01:57 - INFO - codeparrot_training - Skipping example with length 163 (seq_length=1024) +03/04/2022 21:02:00 - INFO - codeparrot_training - Step 26868: {'lr': 0.0004659701631213668, 'samples': 13756928, 'steps': 26868, 'loss/train': 2.666245460510254} +03/04/2022 21:02:03 - INFO - codeparrot_training - Step 26869: {'lr': 0.00046596749008387124, 'samples': 13757440, 'steps': 26869, 'loss/train': 2.086841583251953} +03/04/2022 21:02:05 - INFO - codeparrot_training - Skipping example with length 583 (seq_length=1024) +03/04/2022 21:02:08 - INFO - codeparrot_training - Step 26870: {'lr': 0.00046596481694906403, 'samples': 13757952, 'steps': 26870, 'loss/train': 2.137169361114502} +03/04/2022 21:02:11 - INFO - codeparrot_training - Step 26871: {'lr': 0.00046596214371694643, 'samples': 13758464, 'steps': 26871, 'loss/train': 2.2387752532958984} +03/04/2022 21:02:14 - INFO - codeparrot_training - Skipping example with length 490 (seq_length=1024) +03/04/2022 21:02:17 - INFO - codeparrot_training - Step 26872: {'lr': 0.00046595947038751963, 'samples': 13758976, 'steps': 26872, 'loss/train': 1.7299578189849854} +03/04/2022 21:02:20 - INFO - codeparrot_training - Step 26873: {'lr': 0.00046595679696078476, 'samples': 13759488, 'steps': 26873, 'loss/train': 1.876828670501709} +03/04/2022 21:02:22 - INFO - codeparrot_training - Skipping example with length 580 (seq_length=1024) +03/04/2022 21:02:25 - INFO - codeparrot_training - Step 26874: {'lr': 0.00046595412343674317, 'samples': 13760000, 'steps': 26874, 'loss/train': 1.9129383563995361} +03/04/2022 21:02:28 - INFO - codeparrot_training - Step 26875: {'lr': 0.00046595144981539596, 'samples': 13760512, 'steps': 26875, 'loss/train': 1.3868958950042725} +03/04/2022 21:02:30 - INFO - codeparrot_training - Skipping example with length 766 (seq_length=1024) +03/04/2022 21:02:33 - INFO - codeparrot_training - Step 26876: {'lr': 0.00046594877609674437, 'samples': 13761024, 'steps': 26876, 'loss/train': 0.19592063128948212} +03/04/2022 21:02:37 - INFO - codeparrot_training - Step 26877: {'lr': 0.00046594610228078954, 'samples': 13761536, 'steps': 26877, 'loss/train': 2.623413324356079} +03/04/2022 21:02:39 - INFO - codeparrot_training - Skipping example with length 459 (seq_length=1024) +03/04/2022 21:02:42 - INFO - codeparrot_training - Step 26878: {'lr': 0.00046594342836753276, 'samples': 13762048, 'steps': 26878, 'loss/train': 2.1538541316986084} +03/04/2022 21:02:45 - INFO - codeparrot_training - Step 26879: {'lr': 0.0004659407543569752, 'samples': 13762560, 'steps': 26879, 'loss/train': 1.8404595851898193} +03/04/2022 21:02:48 - INFO - codeparrot_training - Skipping example with length 277 (seq_length=1024) +03/04/2022 21:02:51 - INFO - codeparrot_training - Step 26880: {'lr': 0.0004659380802491181, 'samples': 13763072, 'steps': 26880, 'loss/train': 1.980209231376648} +03/04/2022 21:02:54 - INFO - codeparrot_training - Step 26881: {'lr': 0.00046593540604396256, 'samples': 13763584, 'steps': 26881, 'loss/train': 1.6889338493347168} +03/04/2022 21:02:56 - INFO - codeparrot_training - Skipping example with length 218 (seq_length=1024) +03/04/2022 21:02:59 - INFO - codeparrot_training - Step 26882: {'lr': 0.00046593273174150995, 'samples': 13764096, 'steps': 26882, 'loss/train': 2.3550567626953125} +03/04/2022 21:03:02 - INFO - codeparrot_training - Step 26883: {'lr': 0.0004659300573417613, 'samples': 13764608, 'steps': 26883, 'loss/train': 2.1005825996398926} +03/04/2022 21:03:04 - INFO - codeparrot_training - Skipping example with length 602 (seq_length=1024) +03/04/2022 21:03:07 - INFO - codeparrot_training - Step 26884: {'lr': 0.00046592738284471794, 'samples': 13765120, 'steps': 26884, 'loss/train': 1.7370342016220093} +03/04/2022 21:03:11 - INFO - codeparrot_training - Step 26885: {'lr': 0.000465924708250381, 'samples': 13765632, 'steps': 26885, 'loss/train': 1.626375675201416} +03/04/2022 21:03:13 - INFO - codeparrot_training - Skipping example with length 176 (seq_length=1024) +03/04/2022 21:03:16 - INFO - codeparrot_training - Step 26886: {'lr': 0.00046592203355875177, 'samples': 13766144, 'steps': 26886, 'loss/train': 1.3553518056869507} +03/04/2022 21:03:19 - INFO - codeparrot_training - Step 26887: {'lr': 0.00046591935876983136, 'samples': 13766656, 'steps': 26887, 'loss/train': 1.6720315217971802} +03/04/2022 21:03:21 - INFO - codeparrot_training - Skipping example with length 887 (seq_length=1024) +03/04/2022 21:03:24 - INFO - codeparrot_training - Step 26888: {'lr': 0.0004659166838836211, 'samples': 13767168, 'steps': 26888, 'loss/train': 1.36971914768219} +03/04/2022 21:03:27 - INFO - codeparrot_training - Step 26889: {'lr': 0.000465914008900122, 'samples': 13767680, 'steps': 26889, 'loss/train': 2.0922796726226807} +03/04/2022 21:03:30 - INFO - codeparrot_training - Skipping example with length 375 (seq_length=1024) +03/04/2022 21:03:33 - INFO - codeparrot_training - Step 26890: {'lr': 0.00046591133381933546, 'samples': 13768192, 'steps': 26890, 'loss/train': 1.967210054397583} +03/04/2022 21:03:36 - INFO - codeparrot_training - Step 26891: {'lr': 0.0004659086586412626, 'samples': 13768704, 'steps': 26891, 'loss/train': 1.7520339488983154} +03/04/2022 21:03:38 - INFO - codeparrot_training - Skipping example with length 577 (seq_length=1024) +03/04/2022 21:03:41 - INFO - codeparrot_training - Step 26892: {'lr': 0.0004659059833659046, 'samples': 13769216, 'steps': 26892, 'loss/train': 2.4216220378875732} +03/04/2022 21:03:44 - INFO - codeparrot_training - Step 26893: {'lr': 0.0004659033079932627, 'samples': 13769728, 'steps': 26893, 'loss/train': 1.9866973161697388} +03/04/2022 21:03:46 - INFO - codeparrot_training - Skipping example with length 474 (seq_length=1024) +03/04/2022 21:03:50 - INFO - codeparrot_training - Step 26894: {'lr': 0.00046590063252333806, 'samples': 13770240, 'steps': 26894, 'loss/train': 1.7901705503463745} +03/04/2022 21:03:53 - INFO - codeparrot_training - Step 26895: {'lr': 0.000465897956956132, 'samples': 13770752, 'steps': 26895, 'loss/train': 1.4143060445785522} +03/04/2022 21:03:55 - INFO - codeparrot_training - Skipping example with length 742 (seq_length=1024) +03/04/2022 21:03:58 - INFO - codeparrot_training - Step 26896: {'lr': 0.0004658952812916456, 'samples': 13771264, 'steps': 26896, 'loss/train': 1.4676921367645264} +03/04/2022 21:04:01 - INFO - codeparrot_training - Step 26897: {'lr': 0.0004658926055298802, 'samples': 13771776, 'steps': 26897, 'loss/train': 2.439800977706909} +03/04/2022 21:04:03 - INFO - codeparrot_training - Skipping example with length 709 (seq_length=1024) +03/04/2022 21:04:07 - INFO - codeparrot_training - Step 26898: {'lr': 0.0004658899296708369, 'samples': 13772288, 'steps': 26898, 'loss/train': 1.5424342155456543} +03/04/2022 21:04:10 - INFO - codeparrot_training - Step 26899: {'lr': 0.00046588725371451685, 'samples': 13772800, 'steps': 26899, 'loss/train': 1.947596788406372} +03/04/2022 21:04:12 - INFO - codeparrot_training - Skipping example with length 91 (seq_length=1024) +03/04/2022 21:04:15 - INFO - codeparrot_training - Step 26900: {'lr': 0.00046588457766092134, 'samples': 13773312, 'steps': 26900, 'loss/train': 2.148871898651123} +03/04/2022 21:04:18 - INFO - codeparrot_training - Step 26901: {'lr': 0.00046588190151005163, 'samples': 13773824, 'steps': 26901, 'loss/train': 1.954790472984314} +03/04/2022 21:04:21 - INFO - codeparrot_training - Skipping example with length 997 (seq_length=1024) +03/04/2022 21:04:24 - INFO - codeparrot_training - Step 26902: {'lr': 0.00046587922526190883, 'samples': 13774336, 'steps': 26902, 'loss/train': 2.4236133098602295} +03/04/2022 21:04:27 - INFO - codeparrot_training - Step 26903: {'lr': 0.00046587654891649423, 'samples': 13774848, 'steps': 26903, 'loss/train': 2.181058883666992} +03/04/2022 21:04:29 - INFO - codeparrot_training - Skipping example with length 938 (seq_length=1024) +03/04/2022 21:04:32 - INFO - codeparrot_training - Step 26904: {'lr': 0.00046587387247380897, 'samples': 13775360, 'steps': 26904, 'loss/train': 1.8829150199890137} +03/04/2022 21:04:35 - INFO - codeparrot_training - Step 26905: {'lr': 0.00046587119593385424, 'samples': 13775872, 'steps': 26905, 'loss/train': 0.2675618827342987} +03/04/2022 21:04:37 - INFO - codeparrot_training - Skipping example with length 1016 (seq_length=1024) +03/04/2022 21:04:40 - INFO - codeparrot_training - Step 26906: {'lr': 0.00046586851929663134, 'samples': 13776384, 'steps': 26906, 'loss/train': 2.0829412937164307} +03/04/2022 21:04:44 - INFO - codeparrot_training - Step 26907: {'lr': 0.00046586584256214135, 'samples': 13776896, 'steps': 26907, 'loss/train': 1.401829481124878} +03/04/2022 21:04:46 - INFO - codeparrot_training - Skipping example with length 80 (seq_length=1024) +03/04/2022 21:04:49 - INFO - codeparrot_training - Step 26908: {'lr': 0.0004658631657303856, 'samples': 13777408, 'steps': 26908, 'loss/train': 2.182494640350342} +03/04/2022 21:04:52 - INFO - codeparrot_training - Step 26909: {'lr': 0.0004658604888013652, 'samples': 13777920, 'steps': 26909, 'loss/train': 2.0812644958496094} +03/04/2022 21:04:54 - INFO - codeparrot_training - Skipping example with length 561 (seq_length=1024) +03/04/2022 21:04:58 - INFO - codeparrot_training - Step 26910: {'lr': 0.00046585781177508137, 'samples': 13778432, 'steps': 26910, 'loss/train': 2.4312736988067627} +03/04/2022 21:05:01 - INFO - codeparrot_training - Step 26911: {'lr': 0.0004658551346515354, 'samples': 13778944, 'steps': 26911, 'loss/train': 1.7564440965652466} +03/04/2022 21:05:03 - INFO - codeparrot_training - Skipping example with length 35 (seq_length=1024) +03/04/2022 21:05:06 - INFO - codeparrot_training - Step 26912: {'lr': 0.00046585245743072833, 'samples': 13779456, 'steps': 26912, 'loss/train': 1.57561457157135} +03/04/2022 21:05:09 - INFO - codeparrot_training - Step 26913: {'lr': 0.0004658497801126616, 'samples': 13779968, 'steps': 26913, 'loss/train': 1.9252772331237793} +03/04/2022 21:05:12 - INFO - codeparrot_training - Skipping example with length 691 (seq_length=1024) +03/04/2022 21:05:14 - INFO - codeparrot_training - Step 26914: {'lr': 0.00046584710269733623, 'samples': 13780480, 'steps': 26914, 'loss/train': 1.9885400533676147} +03/04/2022 21:05:18 - INFO - codeparrot_training - Step 26915: {'lr': 0.00046584442518475354, 'samples': 13780992, 'steps': 26915, 'loss/train': 1.9810930490493774} +03/04/2022 21:05:20 - INFO - codeparrot_training - Skipping example with length 148 (seq_length=1024) +03/04/2022 21:05:23 - INFO - codeparrot_training - Step 26916: {'lr': 0.0004658417475749146, 'samples': 13781504, 'steps': 26916, 'loss/train': 1.447359561920166} +03/04/2022 21:05:26 - INFO - codeparrot_training - Step 26917: {'lr': 0.00046583906986782074, 'samples': 13782016, 'steps': 26917, 'loss/train': 2.0656986236572266} +03/04/2022 21:05:28 - INFO - codeparrot_training - Skipping example with length 752 (seq_length=1024) +03/04/2022 21:05:31 - INFO - codeparrot_training - Step 26918: {'lr': 0.0004658363920634732, 'samples': 13782528, 'steps': 26918, 'loss/train': 2.407899856567383} +03/04/2022 21:05:34 - INFO - codeparrot_training - Step 26919: {'lr': 0.000465833714161873, 'samples': 13783040, 'steps': 26919, 'loss/train': 1.7505202293395996} +03/04/2022 21:05:37 - INFO - codeparrot_training - Skipping example with length 350 (seq_length=1024) +03/04/2022 21:05:40 - INFO - codeparrot_training - Step 26920: {'lr': 0.00046583103616302146, 'samples': 13783552, 'steps': 26920, 'loss/train': 1.1656569242477417} +03/04/2022 21:05:43 - INFO - codeparrot_training - Step 26921: {'lr': 0.0004658283580669198, 'samples': 13784064, 'steps': 26921, 'loss/train': 1.9668821096420288} +03/04/2022 21:05:45 - INFO - codeparrot_training - Skipping example with length 777 (seq_length=1024) +03/04/2022 21:05:48 - INFO - codeparrot_training - Step 26922: {'lr': 0.0004658256798735693, 'samples': 13784576, 'steps': 26922, 'loss/train': 1.6807609796524048} +03/04/2022 21:05:51 - INFO - codeparrot_training - Step 26923: {'lr': 0.000465823001582971, 'samples': 13785088, 'steps': 26923, 'loss/train': 0.43089497089385986} +03/04/2022 21:05:54 - INFO - codeparrot_training - Skipping example with length 76 (seq_length=1024) +03/04/2022 21:05:56 - INFO - codeparrot_training - Step 26924: {'lr': 0.00046582032319512624, 'samples': 13785600, 'steps': 26924, 'loss/train': 1.815053939819336} +03/04/2022 21:06:00 - INFO - codeparrot_training - Step 26925: {'lr': 0.00046581764471003605, 'samples': 13786112, 'steps': 26925, 'loss/train': 1.314481496810913} +03/04/2022 21:06:02 - INFO - codeparrot_training - Skipping example with length 831 (seq_length=1024) +03/04/2022 21:06:05 - INFO - codeparrot_training - Step 26926: {'lr': 0.0004658149661277019, 'samples': 13786624, 'steps': 26926, 'loss/train': 1.7341303825378418} +03/04/2022 21:06:08 - INFO - codeparrot_training - Step 26927: {'lr': 0.0004658122874481248, 'samples': 13787136, 'steps': 26927, 'loss/train': 1.8964287042617798} +03/04/2022 21:06:11 - INFO - codeparrot_training - Skipping example with length 850 (seq_length=1024) +03/04/2022 21:06:13 - INFO - codeparrot_training - Step 26928: {'lr': 0.000465809608671306, 'samples': 13787648, 'steps': 26928, 'loss/train': 1.298348069190979} +03/04/2022 21:06:17 - INFO - codeparrot_training - Step 26929: {'lr': 0.0004658069297972467, 'samples': 13788160, 'steps': 26929, 'loss/train': 2.526851177215576} +03/04/2022 21:06:19 - INFO - codeparrot_training - Skipping example with length 479 (seq_length=1024) +03/04/2022 21:06:22 - INFO - codeparrot_training - Step 26930: {'lr': 0.00046580425082594823, 'samples': 13788672, 'steps': 26930, 'loss/train': 1.9725583791732788} +03/04/2022 21:06:25 - INFO - codeparrot_training - Step 26931: {'lr': 0.00046580157175741155, 'samples': 13789184, 'steps': 26931, 'loss/train': 1.2763549089431763} +03/04/2022 21:06:29 - INFO - codeparrot_training - Step 26932: {'lr': 0.0004657988925916381, 'samples': 13789696, 'steps': 26932, 'loss/train': 1.5090621709823608} +03/04/2022 21:06:30 - INFO - codeparrot_training - Skipping example with length 188 (seq_length=1024) +03/04/2022 21:06:34 - INFO - codeparrot_training - Step 26933: {'lr': 0.000465796213328629, 'samples': 13790208, 'steps': 26933, 'loss/train': 1.8929084539413452} +03/04/2022 21:06:37 - INFO - codeparrot_training - Step 26934: {'lr': 0.00046579353396838545, 'samples': 13790720, 'steps': 26934, 'loss/train': 1.790312647819519} +03/04/2022 21:06:38 - INFO - codeparrot_training - Skipping example with length 607 (seq_length=1024) +03/04/2022 21:06:42 - INFO - codeparrot_training - Step 26935: {'lr': 0.00046579085451090864, 'samples': 13791232, 'steps': 26935, 'loss/train': 0.9979751706123352} +03/04/2022 21:06:45 - INFO - codeparrot_training - Step 26936: {'lr': 0.00046578817495619983, 'samples': 13791744, 'steps': 26936, 'loss/train': 2.4268741607666016} +03/04/2022 21:06:47 - INFO - codeparrot_training - Skipping example with length 243 (seq_length=1024) +03/04/2022 21:06:51 - INFO - codeparrot_training - Step 26937: {'lr': 0.0004657854953042602, 'samples': 13792256, 'steps': 26937, 'loss/train': 1.8120075464248657} +03/04/2022 21:06:54 - INFO - codeparrot_training - Step 26938: {'lr': 0.00046578281555509094, 'samples': 13792768, 'steps': 26938, 'loss/train': 2.019676685333252} +03/04/2022 21:06:56 - INFO - codeparrot_training - Skipping example with length 53 (seq_length=1024) +03/04/2022 21:06:59 - INFO - codeparrot_training - Step 26939: {'lr': 0.00046578013570869325, 'samples': 13793280, 'steps': 26939, 'loss/train': 1.3692336082458496} +03/04/2022 21:07:02 - INFO - codeparrot_training - Step 26940: {'lr': 0.00046577745576506844, 'samples': 13793792, 'steps': 26940, 'loss/train': 2.395744800567627} +03/04/2022 21:07:04 - INFO - codeparrot_training - Skipping example with length 568 (seq_length=1024) +03/04/2022 21:07:08 - INFO - codeparrot_training - Step 26941: {'lr': 0.00046577477572421757, 'samples': 13794304, 'steps': 26941, 'loss/train': 3.3783013820648193} +03/04/2022 21:07:11 - INFO - codeparrot_training - Step 26942: {'lr': 0.0004657720955861419, 'samples': 13794816, 'steps': 26942, 'loss/train': 1.6694440841674805} +03/04/2022 21:07:13 - INFO - codeparrot_training - Skipping example with length 119 (seq_length=1024) +03/04/2022 21:07:16 - INFO - codeparrot_training - Step 26943: {'lr': 0.00046576941535084274, 'samples': 13795328, 'steps': 26943, 'loss/train': 1.7654786109924316} +03/04/2022 21:07:19 - INFO - codeparrot_training - Step 26944: {'lr': 0.0004657667350183211, 'samples': 13795840, 'steps': 26944, 'loss/train': 2.0501928329467773} +03/04/2022 21:07:21 - INFO - codeparrot_training - Skipping example with length 395 (seq_length=1024) +03/04/2022 21:07:25 - INFO - codeparrot_training - Step 26945: {'lr': 0.00046576405458857836, 'samples': 13796352, 'steps': 26945, 'loss/train': 2.2468926906585693} +03/04/2022 21:07:28 - INFO - codeparrot_training - Step 26946: {'lr': 0.0004657613740616157, 'samples': 13796864, 'steps': 26946, 'loss/train': 1.9141151905059814} +03/04/2022 21:07:30 - INFO - codeparrot_training - Skipping example with length 894 (seq_length=1024) +03/04/2022 21:07:33 - INFO - codeparrot_training - Step 26947: {'lr': 0.0004657586934374342, 'samples': 13797376, 'steps': 26947, 'loss/train': 2.0254385471343994} +03/04/2022 21:07:36 - INFO - codeparrot_training - Step 26948: {'lr': 0.0004657560127160352, 'samples': 13797888, 'steps': 26948, 'loss/train': 0.7585595846176147} +03/04/2022 21:07:38 - INFO - codeparrot_training - Skipping example with length 948 (seq_length=1024) +03/04/2022 21:07:42 - INFO - codeparrot_training - Step 26949: {'lr': 0.00046575333189741993, 'samples': 13798400, 'steps': 26949, 'loss/train': 2.778754234313965} +03/04/2022 21:07:45 - INFO - codeparrot_training - Step 26950: {'lr': 0.00046575065098158945, 'samples': 13798912, 'steps': 26950, 'loss/train': 2.0358493328094482} +03/04/2022 21:07:47 - INFO - codeparrot_training - Skipping example with length 793 (seq_length=1024) +03/04/2022 21:07:51 - INFO - codeparrot_training - Step 26951: {'lr': 0.0004657479699685451, 'samples': 13799424, 'steps': 26951, 'loss/train': 2.1740055084228516} +03/04/2022 21:07:54 - INFO - codeparrot_training - Step 26952: {'lr': 0.00046574528885828803, 'samples': 13799936, 'steps': 26952, 'loss/train': 2.1357152462005615} +03/04/2022 21:07:57 - INFO - codeparrot_training - Step 26953: {'lr': 0.0004657426076508195, 'samples': 13800448, 'steps': 26953, 'loss/train': 0.5794610977172852} +03/04/2022 21:07:57 - INFO - codeparrot_training - Skipping example with length 197 (seq_length=1024) +03/04/2022 21:08:02 - INFO - codeparrot_training - Step 26954: {'lr': 0.00046573992634614064, 'samples': 13800960, 'steps': 26954, 'loss/train': 2.048552989959717} +03/04/2022 21:08:05 - INFO - codeparrot_training - Step 26955: {'lr': 0.00046573724494425274, 'samples': 13801472, 'steps': 26955, 'loss/train': 1.7350014448165894} +03/04/2022 21:08:06 - INFO - codeparrot_training - Skipping example with length 382 (seq_length=1024) +03/04/2022 21:08:11 - INFO - codeparrot_training - Step 26956: {'lr': 0.00046573456344515694, 'samples': 13801984, 'steps': 26956, 'loss/train': 0.8301938772201538} +03/04/2022 21:08:14 - INFO - codeparrot_training - Step 26957: {'lr': 0.00046573188184885445, 'samples': 13802496, 'steps': 26957, 'loss/train': 1.6894348859786987} +03/04/2022 21:08:14 - INFO - codeparrot_training - Skipping example with length 110 (seq_length=1024) +03/04/2022 21:08:19 - INFO - codeparrot_training - Step 26958: {'lr': 0.0004657292001553465, 'samples': 13803008, 'steps': 26958, 'loss/train': 0.6227928996086121} +03/04/2022 21:08:22 - INFO - codeparrot_training - Step 26959: {'lr': 0.0004657265183646344, 'samples': 13803520, 'steps': 26959, 'loss/train': 2.154426336288452} +03/04/2022 21:08:23 - INFO - codeparrot_training - Skipping example with length 319 (seq_length=1024) +03/04/2022 21:08:28 - INFO - codeparrot_training - Step 26960: {'lr': 0.00046572383647671913, 'samples': 13804032, 'steps': 26960, 'loss/train': 2.224898338317871} +03/04/2022 21:08:31 - INFO - codeparrot_training - Step 26961: {'lr': 0.0004657211544916021, 'samples': 13804544, 'steps': 26961, 'loss/train': 1.56977117061615} +03/04/2022 21:08:31 - INFO - codeparrot_training - Skipping example with length 512 (seq_length=1024) +03/04/2022 21:08:36 - INFO - codeparrot_training - Step 26962: {'lr': 0.00046571847240928444, 'samples': 13805056, 'steps': 26962, 'loss/train': 1.3864017724990845} +03/04/2022 21:08:39 - INFO - codeparrot_training - Step 26963: {'lr': 0.0004657157902297674, 'samples': 13805568, 'steps': 26963, 'loss/train': 2.0441770553588867} +03/04/2022 21:08:40 - INFO - codeparrot_training - Skipping example with length 710 (seq_length=1024) +03/04/2022 21:08:45 - INFO - codeparrot_training - Step 26964: {'lr': 0.00046571310795305213, 'samples': 13806080, 'steps': 26964, 'loss/train': 1.596448540687561} +03/04/2022 21:08:48 - INFO - codeparrot_training - Step 26965: {'lr': 0.0004657104255791398, 'samples': 13806592, 'steps': 26965, 'loss/train': 1.8522869348526} +03/04/2022 21:08:48 - INFO - codeparrot_training - Skipping example with length 588 (seq_length=1024) +03/04/2022 21:08:53 - INFO - codeparrot_training - Step 26966: {'lr': 0.0004657077431080317, 'samples': 13807104, 'steps': 26966, 'loss/train': 1.7474335432052612} +03/04/2022 21:08:56 - INFO - codeparrot_training - Step 26967: {'lr': 0.00046570506053972906, 'samples': 13807616, 'steps': 26967, 'loss/train': 1.3027578592300415} +03/04/2022 21:08:58 - INFO - codeparrot_training - Skipping example with length 55 (seq_length=1024) +03/04/2022 21:09:02 - INFO - codeparrot_training - Step 26968: {'lr': 0.000465702377874233, 'samples': 13808128, 'steps': 26968, 'loss/train': 1.7368673086166382} +03/04/2022 21:09:05 - INFO - codeparrot_training - Step 26969: {'lr': 0.00046569969511154485, 'samples': 13808640, 'steps': 26969, 'loss/train': 2.2720043659210205} +03/04/2022 21:09:06 - INFO - codeparrot_training - Skipping example with length 893 (seq_length=1024) +03/04/2022 21:09:10 - INFO - codeparrot_training - Step 26970: {'lr': 0.0004656970122516657, 'samples': 13809152, 'steps': 26970, 'loss/train': 2.381481885910034} +03/04/2022 21:09:13 - INFO - codeparrot_training - Step 26971: {'lr': 0.0004656943292945968, 'samples': 13809664, 'steps': 26971, 'loss/train': 1.7974470853805542} +03/04/2022 21:09:15 - INFO - codeparrot_training - Skipping example with length 759 (seq_length=1024) +03/04/2022 21:09:18 - INFO - codeparrot_training - Step 26972: {'lr': 0.0004656916462403394, 'samples': 13810176, 'steps': 26972, 'loss/train': 2.350710391998291} +03/04/2022 21:09:22 - INFO - codeparrot_training - Step 26973: {'lr': 0.0004656889630888946, 'samples': 13810688, 'steps': 26973, 'loss/train': 1.568495512008667} +03/04/2022 21:09:23 - INFO - codeparrot_training - Skipping example with length 987 (seq_length=1024) +03/04/2022 21:09:27 - INFO - codeparrot_training - Step 26974: {'lr': 0.0004656862798402638, 'samples': 13811200, 'steps': 26974, 'loss/train': 1.280421495437622} +03/04/2022 21:09:30 - INFO - codeparrot_training - Step 26975: {'lr': 0.00046568359649444796, 'samples': 13811712, 'steps': 26975, 'loss/train': 0.3861537575721741} +03/04/2022 21:09:32 - INFO - codeparrot_training - Skipping example with length 636 (seq_length=1024) +03/04/2022 21:09:35 - INFO - codeparrot_training - Step 26976: {'lr': 0.0004656809130514485, 'samples': 13812224, 'steps': 26976, 'loss/train': 1.720929741859436} +03/04/2022 21:09:39 - INFO - codeparrot_training - Step 26977: {'lr': 0.00046567822951126646, 'samples': 13812736, 'steps': 26977, 'loss/train': 1.8824357986450195} +03/04/2022 21:09:40 - INFO - codeparrot_training - Skipping example with length 485 (seq_length=1024) +03/04/2022 21:09:44 - INFO - codeparrot_training - Step 26978: {'lr': 0.00046567554587390324, 'samples': 13813248, 'steps': 26978, 'loss/train': 2.3506133556365967} +03/04/2022 21:09:47 - INFO - codeparrot_training - Step 26979: {'lr': 0.00046567286213935994, 'samples': 13813760, 'steps': 26979, 'loss/train': 0.2063003033399582} +03/04/2022 21:09:49 - INFO - codeparrot_training - Skipping example with length 791 (seq_length=1024) +03/04/2022 21:09:52 - INFO - codeparrot_training - Step 26980: {'lr': 0.00046567017830763776, 'samples': 13814272, 'steps': 26980, 'loss/train': 1.8373440504074097} +03/04/2022 21:09:56 - INFO - codeparrot_training - Step 26981: {'lr': 0.0004656674943787379, 'samples': 13814784, 'steps': 26981, 'loss/train': 0.5863098502159119} +03/04/2022 21:09:57 - INFO - codeparrot_training - Skipping example with length 439 (seq_length=1024) +03/04/2022 21:10:01 - INFO - codeparrot_training - Step 26982: {'lr': 0.0004656648103526616, 'samples': 13815296, 'steps': 26982, 'loss/train': 1.780055284500122} +03/04/2022 21:10:04 - INFO - codeparrot_training - Step 26983: {'lr': 0.00046566212622941005, 'samples': 13815808, 'steps': 26983, 'loss/train': 1.87108314037323} +03/04/2022 21:10:05 - INFO - codeparrot_training - Skipping example with length 637 (seq_length=1024) +03/04/2022 21:10:09 - INFO - codeparrot_training - Step 26984: {'lr': 0.00046565944200898453, 'samples': 13816320, 'steps': 26984, 'loss/train': 1.3122005462646484} +03/04/2022 21:10:12 - INFO - codeparrot_training - Step 26985: {'lr': 0.00046565675769138614, 'samples': 13816832, 'steps': 26985, 'loss/train': 1.679057002067566} +03/04/2022 21:10:14 - INFO - codeparrot_training - Skipping example with length 805 (seq_length=1024) +03/04/2022 21:10:18 - INFO - codeparrot_training - Step 26986: {'lr': 0.00046565407327661614, 'samples': 13817344, 'steps': 26986, 'loss/train': 2.113560199737549} +03/04/2022 21:10:21 - INFO - codeparrot_training - Step 26987: {'lr': 0.0004656513887646758, 'samples': 13817856, 'steps': 26987, 'loss/train': 1.221604824066162} +03/04/2022 21:10:22 - INFO - codeparrot_training - Skipping example with length 54 (seq_length=1024) +03/04/2022 21:10:26 - INFO - codeparrot_training - Step 26988: {'lr': 0.00046564870415556625, 'samples': 13818368, 'steps': 26988, 'loss/train': 1.9061634540557861} +03/04/2022 21:10:29 - INFO - codeparrot_training - Step 26989: {'lr': 0.0004656460194492887, 'samples': 13818880, 'steps': 26989, 'loss/train': 1.4639053344726562} +03/04/2022 21:10:31 - INFO - codeparrot_training - Skipping example with length 852 (seq_length=1024) +03/04/2022 21:10:34 - INFO - codeparrot_training - Step 26990: {'lr': 0.0004656433346458444, 'samples': 13819392, 'steps': 26990, 'loss/train': 1.6703394651412964} +03/04/2022 21:10:38 - INFO - codeparrot_training - Step 26991: {'lr': 0.0004656406497452345, 'samples': 13819904, 'steps': 26991, 'loss/train': 1.7555326223373413} +03/04/2022 21:10:39 - INFO - codeparrot_training - Skipping example with length 19 (seq_length=1024) +03/04/2022 21:10:43 - INFO - codeparrot_training - Step 26992: {'lr': 0.0004656379647474603, 'samples': 13820416, 'steps': 26992, 'loss/train': 2.2461001873016357} +03/04/2022 21:10:46 - INFO - codeparrot_training - Step 26993: {'lr': 0.0004656352796525229, 'samples': 13820928, 'steps': 26993, 'loss/train': 1.9018021821975708} +03/04/2022 21:10:47 - INFO - codeparrot_training - Skipping example with length 846 (seq_length=1024) +03/04/2022 21:10:51 - INFO - codeparrot_training - Step 26994: {'lr': 0.0004656325944604236, 'samples': 13821440, 'steps': 26994, 'loss/train': 1.6641300916671753} +03/04/2022 21:10:54 - INFO - codeparrot_training - Step 26995: {'lr': 0.00046562990917116366, 'samples': 13821952, 'steps': 26995, 'loss/train': 2.4000017642974854} +03/04/2022 21:10:55 - INFO - codeparrot_training - Skipping example with length 30 (seq_length=1024) +03/04/2022 21:11:00 - INFO - codeparrot_training - Step 26996: {'lr': 0.0004656272237847441, 'samples': 13822464, 'steps': 26996, 'loss/train': 2.3405776023864746} +03/04/2022 21:11:03 - INFO - codeparrot_training - Step 26997: {'lr': 0.0004656245383011663, 'samples': 13822976, 'steps': 26997, 'loss/train': 1.660452961921692} +03/04/2022 21:11:03 - INFO - codeparrot_training - Skipping example with length 780 (seq_length=1024) +03/04/2022 21:11:08 - INFO - codeparrot_training - Step 26998: {'lr': 0.00046562185272043137, 'samples': 13823488, 'steps': 26998, 'loss/train': 1.6345388889312744} +03/04/2022 21:11:11 - INFO - codeparrot_training - Step 26999: {'lr': 0.00046561916704254057, 'samples': 13824000, 'steps': 26999, 'loss/train': 1.7701505422592163} +03/04/2022 21:11:12 - INFO - codeparrot_training - Skipping example with length 613 (seq_length=1024) +03/04/2022 21:11:17 - INFO - codeparrot_training - Step 27000: {'lr': 0.0004656164812674951, 'samples': 13824512, 'steps': 27000, 'loss/train': 1.871762752532959} +03/04/2022 21:11:20 - INFO - codeparrot_training - Step 27001: {'lr': 0.00046561379539529626, 'samples': 13825024, 'steps': 27001, 'loss/train': 0.6935906410217285} +03/04/2022 21:11:20 - INFO - codeparrot_training - Skipping example with length 693 (seq_length=1024) +03/04/2022 21:11:25 - INFO - codeparrot_training - Step 27002: {'lr': 0.0004656111094259451, 'samples': 13825536, 'steps': 27002, 'loss/train': 1.907692790031433} +03/04/2022 21:11:28 - INFO - codeparrot_training - Step 27003: {'lr': 0.0004656084233594429, 'samples': 13826048, 'steps': 27003, 'loss/train': 1.893552303314209} +03/04/2022 21:11:30 - INFO - codeparrot_training - Skipping example with length 897 (seq_length=1024) +03/04/2022 21:11:34 - INFO - codeparrot_training - Step 27004: {'lr': 0.0004656057371957908, 'samples': 13826560, 'steps': 27004, 'loss/train': 1.7349058389663696} +03/04/2022 21:11:37 - INFO - codeparrot_training - Step 27005: {'lr': 0.00046560305093499015, 'samples': 13827072, 'steps': 27005, 'loss/train': 2.0631167888641357} +03/04/2022 21:11:38 - INFO - codeparrot_training - Skipping example with length 936 (seq_length=1024) +03/04/2022 21:11:42 - INFO - codeparrot_training - Step 27006: {'lr': 0.00046560036457704215, 'samples': 13827584, 'steps': 27006, 'loss/train': 1.9291396141052246} +03/04/2022 21:11:45 - INFO - codeparrot_training - Step 27007: {'lr': 0.00046559767812194786, 'samples': 13828096, 'steps': 27007, 'loss/train': 1.5850939750671387} +03/04/2022 21:11:47 - INFO - codeparrot_training - Skipping example with length 861 (seq_length=1024) +03/04/2022 21:11:51 - INFO - codeparrot_training - Step 27008: {'lr': 0.0004655949915697086, 'samples': 13828608, 'steps': 27008, 'loss/train': 2.109180212020874} +03/04/2022 21:11:54 - INFO - codeparrot_training - Step 27009: {'lr': 0.0004655923049203256, 'samples': 13829120, 'steps': 27009, 'loss/train': 0.9794557094573975} +03/04/2022 21:11:58 - INFO - codeparrot_training - Step 27010: {'lr': 0.00046558961817380005, 'samples': 13829632, 'steps': 27010, 'loss/train': 0.9650289416313171} +03/04/2022 21:11:58 - INFO - codeparrot_training - Skipping example with length 709 (seq_length=1024) +03/04/2022 21:12:03 - INFO - codeparrot_training - Step 27011: {'lr': 0.00046558693133013306, 'samples': 13830144, 'steps': 27011, 'loss/train': 1.238690733909607} +03/04/2022 21:12:06 - INFO - codeparrot_training - Step 27012: {'lr': 0.000465584244389326, 'samples': 13830656, 'steps': 27012, 'loss/train': 2.2273831367492676} +03/04/2022 21:12:06 - INFO - codeparrot_training - Skipping example with length 958 (seq_length=1024) +03/04/2022 21:12:11 - INFO - codeparrot_training - Step 27013: {'lr': 0.00046558155735137996, 'samples': 13831168, 'steps': 27013, 'loss/train': 2.580404758453369} +03/04/2022 21:12:14 - INFO - codeparrot_training - Step 27014: {'lr': 0.00046557887021629623, 'samples': 13831680, 'steps': 27014, 'loss/train': 1.9690332412719727} +03/04/2022 21:12:15 - INFO - codeparrot_training - Skipping example with length 509 (seq_length=1024) +03/04/2022 21:12:20 - INFO - codeparrot_training - Step 27015: {'lr': 0.000465576182984076, 'samples': 13832192, 'steps': 27015, 'loss/train': 1.6849416494369507} +03/04/2022 21:12:23 - INFO - codeparrot_training - Step 27016: {'lr': 0.0004655734956547204, 'samples': 13832704, 'steps': 27016, 'loss/train': 1.5752344131469727} +03/04/2022 21:12:23 - INFO - codeparrot_training - Skipping example with length 682 (seq_length=1024) +03/04/2022 21:12:28 - INFO - codeparrot_training - Step 27017: {'lr': 0.00046557080822823076, 'samples': 13833216, 'steps': 27017, 'loss/train': 1.7027051448822021} +03/04/2022 21:12:31 - INFO - codeparrot_training - Step 27018: {'lr': 0.0004655681207046083, 'samples': 13833728, 'steps': 27018, 'loss/train': 2.068704128265381} +03/04/2022 21:12:31 - INFO - codeparrot_training - Skipping example with length 510 (seq_length=1024) +03/04/2022 21:12:36 - INFO - codeparrot_training - Step 27019: {'lr': 0.0004655654330838541, 'samples': 13834240, 'steps': 27019, 'loss/train': 1.6756867170333862} +03/04/2022 21:12:40 - INFO - codeparrot_training - Step 27020: {'lr': 0.00046556274536596945, 'samples': 13834752, 'steps': 27020, 'loss/train': 1.9486370086669922} +03/04/2022 21:12:40 - INFO - codeparrot_training - Skipping example with length 783 (seq_length=1024) +03/04/2022 21:12:45 - INFO - codeparrot_training - Step 27021: {'lr': 0.00046556005755095555, 'samples': 13835264, 'steps': 27021, 'loss/train': 1.689372181892395} +03/04/2022 21:12:48 - INFO - codeparrot_training - Step 27022: {'lr': 0.00046555736963881355, 'samples': 13835776, 'steps': 27022, 'loss/train': 1.9235975742340088} +03/04/2022 21:12:48 - INFO - codeparrot_training - Skipping example with length 443 (seq_length=1024) +03/04/2022 21:12:53 - INFO - codeparrot_training - Step 27023: {'lr': 0.0004655546816295448, 'samples': 13836288, 'steps': 27023, 'loss/train': 1.2673035860061646} +03/04/2022 21:12:56 - INFO - codeparrot_training - Step 27024: {'lr': 0.0004655519935231505, 'samples': 13836800, 'steps': 27024, 'loss/train': 1.974639892578125} +03/04/2022 21:12:57 - INFO - codeparrot_training - Skipping example with length 540 (seq_length=1024) +03/04/2022 21:13:02 - INFO - codeparrot_training - Step 27025: {'lr': 0.00046554930531963166, 'samples': 13837312, 'steps': 27025, 'loss/train': 2.1420445442199707} +03/04/2022 21:13:05 - INFO - codeparrot_training - Skipping example with length 787 (seq_length=1024) +03/04/2022 21:13:07 - INFO - codeparrot_training - Step 27026: {'lr': 0.0004655466170189897, 'samples': 13837824, 'steps': 27026, 'loss/train': 1.5582914352416992} +03/04/2022 21:13:10 - INFO - codeparrot_training - Step 27027: {'lr': 0.0004655439286212257, 'samples': 13838336, 'steps': 27027, 'loss/train': 2.1231091022491455} +03/04/2022 21:13:13 - INFO - codeparrot_training - Skipping example with length 393 (seq_length=1024) +03/04/2022 21:13:16 - INFO - codeparrot_training - Step 27028: {'lr': 0.00046554124012634105, 'samples': 13838848, 'steps': 27028, 'loss/train': 2.5949220657348633} +03/04/2022 21:13:19 - INFO - codeparrot_training - Step 27029: {'lr': 0.0004655385515343368, 'samples': 13839360, 'steps': 27029, 'loss/train': 0.4669930040836334} +03/04/2022 21:13:22 - INFO - codeparrot_training - Step 27030: {'lr': 0.0004655358628452142, 'samples': 13839872, 'steps': 27030, 'loss/train': 2.0689239501953125} +03/04/2022 21:13:27 - INFO - codeparrot_training - Step 27031: {'lr': 0.00046553317405897444, 'samples': 13840384, 'steps': 27031, 'loss/train': 1.818233847618103} +03/04/2022 21:13:30 - INFO - codeparrot_training - Step 27032: {'lr': 0.0004655304851756188, 'samples': 13840896, 'steps': 27032, 'loss/train': 1.768347978591919} +03/04/2022 21:13:30 - INFO - codeparrot_training - Skipping example with length 436 (seq_length=1024) +03/04/2022 21:13:36 - INFO - codeparrot_training - Step 27033: {'lr': 0.0004655277961951484, 'samples': 13841408, 'steps': 27033, 'loss/train': 1.8392176628112793} +03/04/2022 21:13:39 - INFO - codeparrot_training - Step 27034: {'lr': 0.00046552510711756444, 'samples': 13841920, 'steps': 27034, 'loss/train': 1.8600338697433472} +03/04/2022 21:13:39 - INFO - codeparrot_training - Skipping example with length 911 (seq_length=1024) +03/04/2022 21:13:44 - INFO - codeparrot_training - Step 27035: {'lr': 0.0004655224179428683, 'samples': 13842432, 'steps': 27035, 'loss/train': 0.923130452632904} +03/04/2022 21:13:47 - INFO - codeparrot_training - Step 27036: {'lr': 0.00046551972867106106, 'samples': 13842944, 'steps': 27036, 'loss/train': 1.545621633529663} +03/04/2022 21:13:48 - INFO - codeparrot_training - Skipping example with length 73 (seq_length=1024) +03/04/2022 21:13:53 - INFO - codeparrot_training - Step 27037: {'lr': 0.00046551703930214393, 'samples': 13843456, 'steps': 27037, 'loss/train': 2.0888125896453857} +03/04/2022 21:13:56 - INFO - codeparrot_training - Step 27038: {'lr': 0.00046551434983611823, 'samples': 13843968, 'steps': 27038, 'loss/train': 1.7190394401550293} +03/04/2022 21:13:56 - INFO - codeparrot_training - Skipping example with length 369 (seq_length=1024) +03/04/2022 21:14:01 - INFO - codeparrot_training - Step 27039: {'lr': 0.00046551166027298505, 'samples': 13844480, 'steps': 27039, 'loss/train': 2.1900062561035156} +03/04/2022 21:14:04 - INFO - codeparrot_training - Step 27040: {'lr': 0.0004655089706127456, 'samples': 13844992, 'steps': 27040, 'loss/train': 1.9904708862304688} +03/04/2022 21:14:04 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/04/2022 21:14:10 - INFO - codeparrot_training - Step 27041: {'lr': 0.00046550628085540114, 'samples': 13845504, 'steps': 27041, 'loss/train': 0.5434851050376892} +03/04/2022 21:14:13 - INFO - codeparrot_training - Step 27042: {'lr': 0.0004655035910009529, 'samples': 13846016, 'steps': 27042, 'loss/train': 1.2899688482284546} +03/04/2022 21:14:13 - INFO - codeparrot_training - Skipping example with length 561 (seq_length=1024) +03/04/2022 21:14:18 - INFO - codeparrot_training - Step 27043: {'lr': 0.00046550090104940207, 'samples': 13846528, 'steps': 27043, 'loss/train': 1.204062819480896} +03/04/2022 21:14:21 - INFO - codeparrot_training - Step 27044: {'lr': 0.00046549821100074987, 'samples': 13847040, 'steps': 27044, 'loss/train': 1.7272709608078003} +03/04/2022 21:14:21 - INFO - codeparrot_training - Skipping example with length 677 (seq_length=1024) +03/04/2022 21:14:27 - INFO - codeparrot_training - Step 27045: {'lr': 0.0004654955208549975, 'samples': 13847552, 'steps': 27045, 'loss/train': 1.7356256246566772} +03/04/2022 21:14:29 - INFO - codeparrot_training - Skipping example with length 872 (seq_length=1024) +03/04/2022 21:14:32 - INFO - codeparrot_training - Step 27046: {'lr': 0.0004654928306121461, 'samples': 13848064, 'steps': 27046, 'loss/train': 1.5773513317108154} +03/04/2022 21:14:35 - INFO - codeparrot_training - Step 27047: {'lr': 0.000465490140272197, 'samples': 13848576, 'steps': 27047, 'loss/train': 1.640571117401123} +03/04/2022 21:14:38 - INFO - codeparrot_training - Step 27048: {'lr': 0.00046548744983515133, 'samples': 13849088, 'steps': 27048, 'loss/train': 2.0742311477661133} +03/04/2022 21:14:38 - INFO - codeparrot_training - Skipping example with length 54 (seq_length=1024) +03/04/2022 21:14:43 - INFO - codeparrot_training - Step 27049: {'lr': 0.0004654847593010104, 'samples': 13849600, 'steps': 27049, 'loss/train': 1.584211826324463} +03/04/2022 21:14:47 - INFO - codeparrot_training - Step 27050: {'lr': 0.0004654820686697754, 'samples': 13850112, 'steps': 27050, 'loss/train': 2.6977977752685547} +03/04/2022 21:14:47 - INFO - codeparrot_training - Skipping example with length 446 (seq_length=1024) +03/04/2022 21:14:52 - INFO - codeparrot_training - Step 27051: {'lr': 0.00046547937794144743, 'samples': 13850624, 'steps': 27051, 'loss/train': 2.191925048828125} +03/04/2022 21:14:55 - INFO - codeparrot_training - Step 27052: {'lr': 0.00046547668711602774, 'samples': 13851136, 'steps': 27052, 'loss/train': 1.3612678050994873} +03/04/2022 21:14:55 - INFO - codeparrot_training - Skipping example with length 284 (seq_length=1024) +03/04/2022 21:15:00 - INFO - codeparrot_training - Step 27053: {'lr': 0.0004654739961935177, 'samples': 13851648, 'steps': 27053, 'loss/train': 1.8132718801498413} +03/04/2022 21:15:03 - INFO - codeparrot_training - Step 27054: {'lr': 0.0004654713051739183, 'samples': 13852160, 'steps': 27054, 'loss/train': 0.7594876885414124} +03/04/2022 21:15:03 - INFO - codeparrot_training - Skipping example with length 829 (seq_length=1024) +03/04/2022 21:15:09 - INFO - codeparrot_training - Step 27055: {'lr': 0.000465468614057231, 'samples': 13852672, 'steps': 27055, 'loss/train': 2.0670742988586426} +03/04/2022 21:15:11 - INFO - codeparrot_training - Skipping example with length 317 (seq_length=1024) +03/04/2022 21:15:14 - INFO - codeparrot_training - Step 27056: {'lr': 0.0004654659228434567, 'samples': 13853184, 'steps': 27056, 'loss/train': 2.2369132041931152} +03/04/2022 21:15:17 - INFO - codeparrot_training - Step 27057: {'lr': 0.00046546323153259686, 'samples': 13853696, 'steps': 27057, 'loss/train': 1.8082598447799683} +03/04/2022 21:15:20 - INFO - codeparrot_training - Skipping example with length 495 (seq_length=1024) +03/04/2022 21:15:22 - INFO - codeparrot_training - Step 27058: {'lr': 0.00046546054012465253, 'samples': 13854208, 'steps': 27058, 'loss/train': 0.7171269655227661} +03/04/2022 21:15:26 - INFO - codeparrot_training - Step 27059: {'lr': 0.00046545784861962516, 'samples': 13854720, 'steps': 27059, 'loss/train': 1.7160779237747192} +03/04/2022 21:15:28 - INFO - codeparrot_training - Skipping example with length 18 (seq_length=1024) +03/04/2022 21:15:31 - INFO - codeparrot_training - Step 27060: {'lr': 0.00046545515701751567, 'samples': 13855232, 'steps': 27060, 'loss/train': 2.1785833835601807} +03/04/2022 21:15:34 - INFO - codeparrot_training - Step 27061: {'lr': 0.00046545246531832547, 'samples': 13855744, 'steps': 27061, 'loss/train': 1.4626277685165405} +03/04/2022 21:15:37 - INFO - codeparrot_training - Step 27062: {'lr': 0.0004654497735220557, 'samples': 13856256, 'steps': 27062, 'loss/train': 1.1318156719207764} +03/04/2022 21:15:38 - INFO - codeparrot_training - Skipping example with length 87 (seq_length=1024) +03/04/2022 21:15:43 - INFO - codeparrot_training - Step 27063: {'lr': 0.0004654470816287076, 'samples': 13856768, 'steps': 27063, 'loss/train': 1.9653629064559937} +03/04/2022 21:15:46 - INFO - codeparrot_training - Step 27064: {'lr': 0.0004654443896382824, 'samples': 13857280, 'steps': 27064, 'loss/train': 1.7505704164505005} +03/04/2022 21:15:47 - INFO - codeparrot_training - Skipping example with length 73 (seq_length=1024) +03/04/2022 21:15:51 - INFO - codeparrot_training - Step 27065: {'lr': 0.0004654416975507812, 'samples': 13857792, 'steps': 27065, 'loss/train': 1.4989372491836548} +03/04/2022 21:15:54 - INFO - codeparrot_training - Step 27066: {'lr': 0.0004654390053662053, 'samples': 13858304, 'steps': 27066, 'loss/train': 1.782608151435852} +03/04/2022 21:15:55 - INFO - codeparrot_training - Skipping example with length 903 (seq_length=1024) +03/04/2022 21:16:00 - INFO - codeparrot_training - Step 27067: {'lr': 0.000465436313084556, 'samples': 13858816, 'steps': 27067, 'loss/train': 1.6184111833572388} +03/04/2022 21:16:03 - INFO - codeparrot_training - Step 27068: {'lr': 0.0004654336207058344, 'samples': 13859328, 'steps': 27068, 'loss/train': 2.031097650527954} +03/04/2022 21:16:03 - INFO - codeparrot_training - Skipping example with length 837 (seq_length=1024) +03/04/2022 21:16:08 - INFO - codeparrot_training - Step 27069: {'lr': 0.0004654309282300416, 'samples': 13859840, 'steps': 27069, 'loss/train': 1.1097253561019897} +03/04/2022 21:16:11 - INFO - codeparrot_training - Step 27070: {'lr': 0.00046542823565717914, 'samples': 13860352, 'steps': 27070, 'loss/train': 1.6570360660552979} +03/04/2022 21:16:12 - INFO - codeparrot_training - Skipping example with length 561 (seq_length=1024) +03/04/2022 21:16:16 - INFO - codeparrot_training - Step 27071: {'lr': 0.00046542554298724793, 'samples': 13860864, 'steps': 27071, 'loss/train': 1.5371021032333374} +03/04/2022 21:16:20 - INFO - codeparrot_training - Step 27072: {'lr': 0.00046542285022024935, 'samples': 13861376, 'steps': 27072, 'loss/train': 2.0738537311553955} +03/04/2022 21:16:21 - INFO - codeparrot_training - Skipping example with length 445 (seq_length=1024) +03/04/2022 21:16:25 - INFO - codeparrot_training - Step 27073: {'lr': 0.0004654201573561845, 'samples': 13861888, 'steps': 27073, 'loss/train': 1.6445918083190918} +03/04/2022 21:16:28 - INFO - codeparrot_training - Step 27074: {'lr': 0.00046541746439505467, 'samples': 13862400, 'steps': 27074, 'loss/train': 1.8079501390457153} +03/04/2022 21:16:29 - INFO - codeparrot_training - Skipping example with length 1007 (seq_length=1024) +03/04/2022 21:16:33 - INFO - codeparrot_training - Step 27075: {'lr': 0.00046541477133686107, 'samples': 13862912, 'steps': 27075, 'loss/train': 2.4497525691986084} +03/04/2022 21:16:37 - INFO - codeparrot_training - Step 27076: {'lr': 0.0004654120781816049, 'samples': 13863424, 'steps': 27076, 'loss/train': 2.1270925998687744} +03/04/2022 21:16:38 - INFO - codeparrot_training - Skipping example with length 236 (seq_length=1024) +03/04/2022 21:16:42 - INFO - codeparrot_training - Step 27077: {'lr': 0.00046540938492928735, 'samples': 13863936, 'steps': 27077, 'loss/train': 2.105628490447998} +03/04/2022 21:16:45 - INFO - codeparrot_training - Step 27078: {'lr': 0.0004654066915799097, 'samples': 13864448, 'steps': 27078, 'loss/train': 0.8273047208786011} +03/04/2022 21:16:47 - INFO - codeparrot_training - Skipping example with length 321 (seq_length=1024) +03/04/2022 21:16:50 - INFO - codeparrot_training - Step 27079: {'lr': 0.000465403998133473, 'samples': 13864960, 'steps': 27079, 'loss/train': 1.7959136962890625} +03/04/2022 21:16:53 - INFO - codeparrot_training - Step 27080: {'lr': 0.0004654013045899788, 'samples': 13865472, 'steps': 27080, 'loss/train': 1.8930463790893555} +03/04/2022 21:16:55 - INFO - codeparrot_training - Skipping example with length 554 (seq_length=1024) +03/04/2022 21:16:59 - INFO - codeparrot_training - Step 27081: {'lr': 0.00046539861094942794, 'samples': 13865984, 'steps': 27081, 'loss/train': 1.768653392791748} +03/04/2022 21:17:02 - INFO - codeparrot_training - Step 27082: {'lr': 0.00046539591721182175, 'samples': 13866496, 'steps': 27082, 'loss/train': 2.1163735389709473} +03/04/2022 21:17:03 - INFO - codeparrot_training - Skipping example with length 461 (seq_length=1024) +03/04/2022 21:17:07 - INFO - codeparrot_training - Step 27083: {'lr': 0.00046539322337716153, 'samples': 13867008, 'steps': 27083, 'loss/train': 2.0211970806121826} +03/04/2022 21:17:10 - INFO - codeparrot_training - Step 27084: {'lr': 0.00046539052944544846, 'samples': 13867520, 'steps': 27084, 'loss/train': 1.2630654573440552} +03/04/2022 21:17:11 - INFO - codeparrot_training - Skipping example with length 335 (seq_length=1024) +03/04/2022 21:17:15 - INFO - codeparrot_training - Step 27085: {'lr': 0.0004653878354166838, 'samples': 13868032, 'steps': 27085, 'loss/train': 1.2577301263809204} +03/04/2022 21:17:19 - INFO - codeparrot_training - Step 27086: {'lr': 0.0004653851412908686, 'samples': 13868544, 'steps': 27086, 'loss/train': 1.9790763854980469} +03/04/2022 21:17:20 - INFO - codeparrot_training - Skipping example with length 213 (seq_length=1024) +03/04/2022 21:17:24 - INFO - codeparrot_training - Step 27087: {'lr': 0.0004653824470680043, 'samples': 13869056, 'steps': 27087, 'loss/train': 2.145411252975464} +03/04/2022 21:17:27 - INFO - codeparrot_training - Step 27088: {'lr': 0.00046537975274809186, 'samples': 13869568, 'steps': 27088, 'loss/train': 1.602316975593567} +03/04/2022 21:17:28 - INFO - codeparrot_training - Skipping example with length 137 (seq_length=1024) +03/04/2022 21:17:32 - INFO - codeparrot_training - Step 27089: {'lr': 0.0004653770583311327, 'samples': 13870080, 'steps': 27089, 'loss/train': 1.7670884132385254} +03/04/2022 21:17:36 - INFO - codeparrot_training - Step 27090: {'lr': 0.00046537436381712796, 'samples': 13870592, 'steps': 27090, 'loss/train': 1.308382511138916} +03/04/2022 21:17:36 - INFO - codeparrot_training - Skipping example with length 166 (seq_length=1024) +03/04/2022 21:17:41 - INFO - codeparrot_training - Step 27091: {'lr': 0.00046537166920607886, 'samples': 13871104, 'steps': 27091, 'loss/train': 1.324061393737793} +03/04/2022 21:17:44 - INFO - codeparrot_training - Step 27092: {'lr': 0.00046536897449798656, 'samples': 13871616, 'steps': 27092, 'loss/train': 1.8986238241195679} +03/04/2022 21:17:45 - INFO - codeparrot_training - Skipping example with length 654 (seq_length=1024) +03/04/2022 21:17:49 - INFO - codeparrot_training - Step 27093: {'lr': 0.00046536627969285236, 'samples': 13872128, 'steps': 27093, 'loss/train': 1.3568392992019653} +03/04/2022 21:17:52 - INFO - codeparrot_training - Step 27094: {'lr': 0.0004653635847906774, 'samples': 13872640, 'steps': 27094, 'loss/train': 1.83497154712677} +03/04/2022 21:17:53 - INFO - codeparrot_training - Skipping example with length 568 (seq_length=1024) +03/04/2022 21:17:58 - INFO - codeparrot_training - Step 27095: {'lr': 0.000465360889791463, 'samples': 13873152, 'steps': 27095, 'loss/train': 0.9121497869491577} +03/04/2022 21:18:01 - INFO - codeparrot_training - Step 27096: {'lr': 0.0004653581946952103, 'samples': 13873664, 'steps': 27096, 'loss/train': 2.2708756923675537} +03/04/2022 21:18:02 - INFO - codeparrot_training - Skipping example with length 605 (seq_length=1024) +03/04/2022 21:18:06 - INFO - codeparrot_training - Step 27097: {'lr': 0.0004653554995019205, 'samples': 13874176, 'steps': 27097, 'loss/train': 2.119619607925415} +03/04/2022 21:18:10 - INFO - codeparrot_training - Step 27098: {'lr': 0.0004653528042115948, 'samples': 13874688, 'steps': 27098, 'loss/train': 1.403655767440796} +03/04/2022 21:18:12 - INFO - codeparrot_training - Skipping example with length 515 (seq_length=1024) +03/04/2022 21:18:15 - INFO - codeparrot_training - Step 27099: {'lr': 0.0004653501088242345, 'samples': 13875200, 'steps': 27099, 'loss/train': 2.693763494491577} +03/04/2022 21:18:18 - INFO - codeparrot_training - Step 27100: {'lr': 0.0004653474133398408, 'samples': 13875712, 'steps': 27100, 'loss/train': 1.301839828491211} +03/04/2022 21:18:20 - INFO - codeparrot_training - Skipping example with length 461 (seq_length=1024) +03/04/2022 21:18:23 - INFO - codeparrot_training - Step 27101: {'lr': 0.00046534471775841474, 'samples': 13876224, 'steps': 27101, 'loss/train': 1.7934544086456299} +03/04/2022 21:18:26 - INFO - codeparrot_training - Step 27102: {'lr': 0.0004653420220799578, 'samples': 13876736, 'steps': 27102, 'loss/train': 0.9241384863853455} +03/04/2022 21:18:29 - INFO - codeparrot_training - Skipping example with length 304 (seq_length=1024) +03/04/2022 21:18:32 - INFO - codeparrot_training - Step 27103: {'lr': 0.000465339326304471, 'samples': 13877248, 'steps': 27103, 'loss/train': 2.26963210105896} +03/04/2022 21:18:35 - INFO - codeparrot_training - Step 27104: {'lr': 0.0004653366304319556, 'samples': 13877760, 'steps': 27104, 'loss/train': 0.38335752487182617} +03/04/2022 21:18:37 - INFO - codeparrot_training - Skipping example with length 704 (seq_length=1024) +03/04/2022 21:18:40 - INFO - codeparrot_training - Step 27105: {'lr': 0.0004653339344624129, 'samples': 13878272, 'steps': 27105, 'loss/train': 1.5408906936645508} +03/04/2022 21:18:43 - INFO - codeparrot_training - Step 27106: {'lr': 0.00046533123839584406, 'samples': 13878784, 'steps': 27106, 'loss/train': 1.9943867921829224} +03/04/2022 21:18:45 - INFO - codeparrot_training - Skipping example with length 597 (seq_length=1024) +03/04/2022 21:18:48 - INFO - codeparrot_training - Step 27107: {'lr': 0.0004653285422322503, 'samples': 13879296, 'steps': 27107, 'loss/train': 1.378122329711914} +03/04/2022 21:18:52 - INFO - codeparrot_training - Step 27108: {'lr': 0.00046532584597163275, 'samples': 13879808, 'steps': 27108, 'loss/train': 1.6634700298309326} +03/04/2022 21:18:54 - INFO - codeparrot_training - Skipping example with length 900 (seq_length=1024) +03/04/2022 21:18:57 - INFO - codeparrot_training - Step 27109: {'lr': 0.0004653231496139927, 'samples': 13880320, 'steps': 27109, 'loss/train': 1.0929324626922607} +03/04/2022 21:19:00 - INFO - codeparrot_training - Step 27110: {'lr': 0.0004653204531593315, 'samples': 13880832, 'steps': 27110, 'loss/train': 2.285989999771118} +03/04/2022 21:19:02 - INFO - codeparrot_training - Skipping example with length 581 (seq_length=1024) +03/04/2022 21:19:05 - INFO - codeparrot_training - Step 27111: {'lr': 0.0004653177566076501, 'samples': 13881344, 'steps': 27111, 'loss/train': 1.6831626892089844} +03/04/2022 21:19:09 - INFO - codeparrot_training - Step 27112: {'lr': 0.0004653150599589498, 'samples': 13881856, 'steps': 27112, 'loss/train': 1.029488205909729} +03/04/2022 21:19:11 - INFO - codeparrot_training - Skipping example with length 878 (seq_length=1024) +03/04/2022 21:19:14 - INFO - codeparrot_training - Step 27113: {'lr': 0.0004653123632132319, 'samples': 13882368, 'steps': 27113, 'loss/train': 1.6527074575424194} +03/04/2022 21:19:17 - INFO - codeparrot_training - Step 27114: {'lr': 0.0004653096663704976, 'samples': 13882880, 'steps': 27114, 'loss/train': 2.0187742710113525} +03/04/2022 21:19:19 - INFO - codeparrot_training - Skipping example with length 234 (seq_length=1024) +03/04/2022 21:19:22 - INFO - codeparrot_training - Step 27115: {'lr': 0.0004653069694307481, 'samples': 13883392, 'steps': 27115, 'loss/train': 1.8555246591567993} +03/04/2022 21:19:25 - INFO - codeparrot_training - Step 27116: {'lr': 0.00046530427239398453, 'samples': 13883904, 'steps': 27116, 'loss/train': 1.7274163961410522} +03/04/2022 21:19:28 - INFO - codeparrot_training - Skipping example with length 431 (seq_length=1024) +03/04/2022 21:19:31 - INFO - codeparrot_training - Step 27117: {'lr': 0.0004653015752602082, 'samples': 13884416, 'steps': 27117, 'loss/train': 1.8092612028121948} +03/04/2022 21:19:34 - INFO - codeparrot_training - Step 27118: {'lr': 0.0004652988780294204, 'samples': 13884928, 'steps': 27118, 'loss/train': 0.1930076628923416} +03/04/2022 21:19:36 - INFO - codeparrot_training - Skipping example with length 584 (seq_length=1024) +03/04/2022 21:19:39 - INFO - codeparrot_training - Step 27119: {'lr': 0.00046529618070162215, 'samples': 13885440, 'steps': 27119, 'loss/train': 1.7767243385314941} +03/04/2022 21:19:42 - INFO - codeparrot_training - Step 27120: {'lr': 0.00046529348327681476, 'samples': 13885952, 'steps': 27120, 'loss/train': 1.3183544874191284} +03/04/2022 21:19:44 - INFO - codeparrot_training - Skipping example with length 405 (seq_length=1024) +03/04/2022 21:19:48 - INFO - codeparrot_training - Step 27121: {'lr': 0.0004652907857549995, 'samples': 13886464, 'steps': 27121, 'loss/train': 2.04373836517334} +03/04/2022 21:19:51 - INFO - codeparrot_training - Step 27122: {'lr': 0.0004652880881361775, 'samples': 13886976, 'steps': 27122, 'loss/train': 1.6949533224105835} +03/04/2022 21:19:53 - INFO - codeparrot_training - Skipping example with length 862 (seq_length=1024) +03/04/2022 21:19:56 - INFO - codeparrot_training - Step 27123: {'lr': 0.00046528539042035, 'samples': 13887488, 'steps': 27123, 'loss/train': 1.097642183303833} +03/04/2022 21:19:59 - INFO - codeparrot_training - Step 27124: {'lr': 0.0004652826926075183, 'samples': 13888000, 'steps': 27124, 'loss/train': 2.004002332687378} +03/04/2022 21:20:02 - INFO - codeparrot_training - Skipping example with length 884 (seq_length=1024) +03/04/2022 21:20:05 - INFO - codeparrot_training - Step 27125: {'lr': 0.00046527999469768346, 'samples': 13888512, 'steps': 27125, 'loss/train': 1.1716101169586182} +03/04/2022 21:20:08 - INFO - codeparrot_training - Step 27126: {'lr': 0.0004652772966908468, 'samples': 13889024, 'steps': 27126, 'loss/train': 2.0120861530303955} +03/04/2022 21:20:10 - INFO - codeparrot_training - Skipping example with length 997 (seq_length=1024) +03/04/2022 21:20:13 - INFO - codeparrot_training - Step 27127: {'lr': 0.0004652745985870095, 'samples': 13889536, 'steps': 27127, 'loss/train': 1.6473901271820068} +03/04/2022 21:20:16 - INFO - codeparrot_training - Step 27128: {'lr': 0.0004652719003861728, 'samples': 13890048, 'steps': 27128, 'loss/train': 2.232377529144287} +03/04/2022 21:20:19 - INFO - codeparrot_training - Step 27129: {'lr': 0.0004652692020883379, 'samples': 13890560, 'steps': 27129, 'loss/train': 1.521709680557251} +03/04/2022 21:20:19 - INFO - codeparrot_training - Skipping example with length 558 (seq_length=1024) +03/04/2022 21:20:25 - INFO - codeparrot_training - Step 27130: {'lr': 0.00046526650369350605, 'samples': 13891072, 'steps': 27130, 'loss/train': 1.2067619562149048} +03/04/2022 21:20:28 - INFO - codeparrot_training - Step 27131: {'lr': 0.0004652638052016784, 'samples': 13891584, 'steps': 27131, 'loss/train': 1.9540432691574097} +03/04/2022 21:20:28 - INFO - codeparrot_training - Skipping example with length 658 (seq_length=1024) +03/04/2022 21:20:33 - INFO - codeparrot_training - Step 27132: {'lr': 0.00046526110661285615, 'samples': 13892096, 'steps': 27132, 'loss/train': 1.8228216171264648} +03/04/2022 21:20:36 - INFO - codeparrot_training - Step 27133: {'lr': 0.00046525840792704064, 'samples': 13892608, 'steps': 27133, 'loss/train': 2.0725021362304688} +03/04/2022 21:20:36 - INFO - codeparrot_training - Skipping example with length 751 (seq_length=1024) +03/04/2022 21:20:42 - INFO - codeparrot_training - Step 27134: {'lr': 0.000465255709144233, 'samples': 13893120, 'steps': 27134, 'loss/train': 1.7328137159347534} +03/04/2022 21:20:45 - INFO - codeparrot_training - Step 27135: {'lr': 0.00046525301026443443, 'samples': 13893632, 'steps': 27135, 'loss/train': 1.2246339321136475} +03/04/2022 21:20:45 - INFO - codeparrot_training - Skipping example with length 476 (seq_length=1024) +03/04/2022 21:20:50 - INFO - codeparrot_training - Step 27136: {'lr': 0.0004652503112876463, 'samples': 13894144, 'steps': 27136, 'loss/train': 2.04892635345459} +03/04/2022 21:20:53 - INFO - codeparrot_training - Step 27137: {'lr': 0.00046524761221386956, 'samples': 13894656, 'steps': 27137, 'loss/train': 6.455905914306641} +03/04/2022 21:20:54 - INFO - codeparrot_training - Skipping example with length 909 (seq_length=1024) +03/04/2022 21:20:59 - INFO - codeparrot_training - Step 27138: {'lr': 0.0004652449130431056, 'samples': 13895168, 'steps': 27138, 'loss/train': 1.7534172534942627} +03/04/2022 21:21:02 - INFO - codeparrot_training - Step 27139: {'lr': 0.00046524221377535564, 'samples': 13895680, 'steps': 27139, 'loss/train': 1.3366974592208862} +03/04/2022 21:21:02 - INFO - codeparrot_training - Skipping example with length 997 (seq_length=1024) +03/04/2022 21:21:07 - INFO - codeparrot_training - Step 27140: {'lr': 0.00046523951441062087, 'samples': 13896192, 'steps': 27140, 'loss/train': 2.079841375350952} +03/04/2022 21:21:10 - INFO - codeparrot_training - Step 27141: {'lr': 0.0004652368149489024, 'samples': 13896704, 'steps': 27141, 'loss/train': 1.5781397819519043} +03/04/2022 21:21:11 - INFO - codeparrot_training - Skipping example with length 592 (seq_length=1024) +03/04/2022 21:21:16 - INFO - codeparrot_training - Step 27142: {'lr': 0.0004652341153902016, 'samples': 13897216, 'steps': 27142, 'loss/train': 2.0407118797302246} +03/04/2022 21:21:19 - INFO - codeparrot_training - Step 27143: {'lr': 0.00046523141573451965, 'samples': 13897728, 'steps': 27143, 'loss/train': 1.6682339906692505} +03/04/2022 21:21:20 - INFO - codeparrot_training - Skipping example with length 87 (seq_length=1024) +03/04/2022 21:21:24 - INFO - codeparrot_training - Step 27144: {'lr': 0.0004652287159818577, 'samples': 13898240, 'steps': 27144, 'loss/train': 2.272930383682251} +03/04/2022 21:21:27 - INFO - codeparrot_training - Step 27145: {'lr': 0.00046522601613221704, 'samples': 13898752, 'steps': 27145, 'loss/train': 2.5399696826934814} +03/04/2022 21:21:28 - INFO - codeparrot_training - Skipping example with length 361 (seq_length=1024) +03/04/2022 21:21:33 - INFO - codeparrot_training - Step 27146: {'lr': 0.0004652233161855989, 'samples': 13899264, 'steps': 27146, 'loss/train': 1.852360486984253} +03/04/2022 21:21:36 - INFO - codeparrot_training - Step 27147: {'lr': 0.0004652206161420044, 'samples': 13899776, 'steps': 27147, 'loss/train': 2.4017114639282227} +03/04/2022 21:21:37 - INFO - codeparrot_training - Skipping example with length 355 (seq_length=1024) +03/04/2022 21:21:41 - INFO - codeparrot_training - Step 27148: {'lr': 0.00046521791600143483, 'samples': 13900288, 'steps': 27148, 'loss/train': 1.9020055532455444} +03/04/2022 21:21:44 - INFO - codeparrot_training - Step 27149: {'lr': 0.00046521521576389134, 'samples': 13900800, 'steps': 27149, 'loss/train': 0.9204671382904053} +03/04/2022 21:21:45 - INFO - codeparrot_training - Skipping example with length 87 (seq_length=1024) +03/04/2022 21:21:49 - INFO - codeparrot_training - Step 27150: {'lr': 0.00046521251542937524, 'samples': 13901312, 'steps': 27150, 'loss/train': 2.2946619987487793} +03/04/2022 21:21:53 - INFO - codeparrot_training - Step 27151: {'lr': 0.0004652098149978877, 'samples': 13901824, 'steps': 27151, 'loss/train': 2.7079246044158936} +03/04/2022 21:21:54 - INFO - codeparrot_training - Skipping example with length 528 (seq_length=1024) +03/04/2022 21:21:58 - INFO - codeparrot_training - Step 27152: {'lr': 0.00046520711446943, 'samples': 13902336, 'steps': 27152, 'loss/train': 2.4164223670959473} +03/04/2022 21:22:01 - INFO - codeparrot_training - Step 27153: {'lr': 0.0004652044138440032, 'samples': 13902848, 'steps': 27153, 'loss/train': 1.6536080837249756} +03/04/2022 21:22:03 - INFO - codeparrot_training - Skipping example with length 185 (seq_length=1024) +03/04/2022 21:22:06 - INFO - codeparrot_training - Step 27154: {'lr': 0.00046520171312160863, 'samples': 13903360, 'steps': 27154, 'loss/train': 2.0347158908843994} +03/04/2022 21:22:10 - INFO - codeparrot_training - Step 27155: {'lr': 0.00046519901230224756, 'samples': 13903872, 'steps': 27155, 'loss/train': 1.8984805345535278} +03/04/2022 21:22:11 - INFO - codeparrot_training - Skipping example with length 636 (seq_length=1024) +03/04/2022 21:22:15 - INFO - codeparrot_training - Step 27156: {'lr': 0.000465196311385921, 'samples': 13904384, 'steps': 27156, 'loss/train': 2.7802813053131104} +03/04/2022 21:22:18 - INFO - codeparrot_training - Step 27157: {'lr': 0.0004651936103726304, 'samples': 13904896, 'steps': 27157, 'loss/train': 2.2277681827545166} +03/04/2022 21:22:21 - INFO - codeparrot_training - Skipping example with length 179 (seq_length=1024) +03/04/2022 21:22:23 - INFO - codeparrot_training - Step 27158: {'lr': 0.0004651909092623769, 'samples': 13905408, 'steps': 27158, 'loss/train': 1.6847542524337769} +03/04/2022 21:22:26 - INFO - codeparrot_training - Step 27159: {'lr': 0.00046518820805516165, 'samples': 13905920, 'steps': 27159, 'loss/train': 0.9047752022743225} +03/04/2022 21:22:29 - INFO - codeparrot_training - Skipping example with length 194 (seq_length=1024) +03/04/2022 21:22:32 - INFO - codeparrot_training - Step 27160: {'lr': 0.0004651855067509859, 'samples': 13906432, 'steps': 27160, 'loss/train': 1.1378461122512817} +03/04/2022 21:22:35 - INFO - codeparrot_training - Step 27161: {'lr': 0.0004651828053498509, 'samples': 13906944, 'steps': 27161, 'loss/train': 1.6713186502456665} +03/04/2022 21:22:37 - INFO - codeparrot_training - Skipping example with length 523 (seq_length=1024) +03/04/2022 21:22:40 - INFO - codeparrot_training - Step 27162: {'lr': 0.0004651801038517579, 'samples': 13907456, 'steps': 27162, 'loss/train': 2.3976619243621826} +03/04/2022 21:22:43 - INFO - codeparrot_training - Step 27163: {'lr': 0.000465177402256708, 'samples': 13907968, 'steps': 27163, 'loss/train': 2.3340606689453125} +03/04/2022 21:22:46 - INFO - codeparrot_training - Skipping example with length 955 (seq_length=1024) +03/04/2022 21:22:49 - INFO - codeparrot_training - Step 27164: {'lr': 0.00046517470056470244, 'samples': 13908480, 'steps': 27164, 'loss/train': 1.6888781785964966} +03/04/2022 21:22:52 - INFO - codeparrot_training - Step 27165: {'lr': 0.00046517199877574257, 'samples': 13908992, 'steps': 27165, 'loss/train': 1.3941059112548828} +03/04/2022 21:22:54 - INFO - codeparrot_training - Skipping example with length 782 (seq_length=1024) +03/04/2022 21:22:57 - INFO - codeparrot_training - Step 27166: {'lr': 0.0004651692968898295, 'samples': 13909504, 'steps': 27166, 'loss/train': 0.9268202781677246} +03/04/2022 21:23:00 - INFO - codeparrot_training - Step 27167: {'lr': 0.00046516659490696444, 'samples': 13910016, 'steps': 27167, 'loss/train': 2.930304527282715} +03/04/2022 21:23:03 - INFO - codeparrot_training - Skipping example with length 169 (seq_length=1024) +03/04/2022 21:23:06 - INFO - codeparrot_training - Step 27168: {'lr': 0.0004651638928271487, 'samples': 13910528, 'steps': 27168, 'loss/train': 1.8597384691238403} +03/04/2022 21:23:09 - INFO - codeparrot_training - Step 27169: {'lr': 0.00046516119065038335, 'samples': 13911040, 'steps': 27169, 'loss/train': 1.8607821464538574} +03/04/2022 21:23:11 - INFO - codeparrot_training - Skipping example with length 53 (seq_length=1024) +03/04/2022 21:23:14 - INFO - codeparrot_training - Step 27170: {'lr': 0.00046515848837666975, 'samples': 13911552, 'steps': 27170, 'loss/train': 1.6070096492767334} +03/04/2022 21:23:17 - INFO - codeparrot_training - Step 27171: {'lr': 0.00046515578600600895, 'samples': 13912064, 'steps': 27171, 'loss/train': 1.3144350051879883} +03/04/2022 21:23:20 - INFO - codeparrot_training - Skipping example with length 747 (seq_length=1024) +03/04/2022 21:23:23 - INFO - codeparrot_training - Step 27172: {'lr': 0.0004651530835384024, 'samples': 13912576, 'steps': 27172, 'loss/train': 2.287173271179199} +03/04/2022 21:23:26 - INFO - codeparrot_training - Step 27173: {'lr': 0.0004651503809738511, 'samples': 13913088, 'steps': 27173, 'loss/train': 2.231224775314331} +03/04/2022 21:23:28 - INFO - codeparrot_training - Skipping example with length 355 (seq_length=1024) +03/04/2022 21:23:31 - INFO - codeparrot_training - Step 27174: {'lr': 0.0004651476783123564, 'samples': 13913600, 'steps': 27174, 'loss/train': 1.8622173070907593} +03/04/2022 21:23:34 - INFO - codeparrot_training - Step 27175: {'lr': 0.00046514497555391946, 'samples': 13914112, 'steps': 27175, 'loss/train': 1.5029879808425903} +03/04/2022 21:23:37 - INFO - codeparrot_training - Skipping example with length 545 (seq_length=1024) +03/04/2022 21:23:40 - INFO - codeparrot_training - Step 27176: {'lr': 0.0004651422726985415, 'samples': 13914624, 'steps': 27176, 'loss/train': 1.9662816524505615} +03/04/2022 21:23:43 - INFO - codeparrot_training - Step 27177: {'lr': 0.00046513956974622377, 'samples': 13915136, 'steps': 27177, 'loss/train': 1.4699898958206177} +03/04/2022 21:23:45 - INFO - codeparrot_training - Skipping example with length 515 (seq_length=1024) +03/04/2022 21:23:48 - INFO - codeparrot_training - Step 27178: {'lr': 0.00046513686669696756, 'samples': 13915648, 'steps': 27178, 'loss/train': 1.7560211420059204} +03/04/2022 21:23:51 - INFO - codeparrot_training - Step 27179: {'lr': 0.00046513416355077386, 'samples': 13916160, 'steps': 27179, 'loss/train': 1.8316538333892822} +03/04/2022 21:23:53 - INFO - codeparrot_training - Skipping example with length 91 (seq_length=1024) +03/04/2022 21:23:56 - INFO - codeparrot_training - Step 27180: {'lr': 0.0004651314603076441, 'samples': 13916672, 'steps': 27180, 'loss/train': 2.3957161903381348} +03/04/2022 21:24:00 - INFO - codeparrot_training - Step 27181: {'lr': 0.00046512875696757937, 'samples': 13917184, 'steps': 27181, 'loss/train': 1.9911634922027588} +03/04/2022 21:24:02 - INFO - codeparrot_training - Skipping example with length 765 (seq_length=1024) +03/04/2022 21:24:05 - INFO - codeparrot_training - Step 27182: {'lr': 0.00046512605353058096, 'samples': 13917696, 'steps': 27182, 'loss/train': 1.1965152025222778} +03/04/2022 21:24:08 - INFO - codeparrot_training - Step 27183: {'lr': 0.00046512334999665006, 'samples': 13918208, 'steps': 27183, 'loss/train': 1.6050702333450317} +03/04/2022 21:24:11 - INFO - codeparrot_training - Step 27184: {'lr': 0.000465120646365788, 'samples': 13918720, 'steps': 27184, 'loss/train': 1.7720950841903687} +03/04/2022 21:24:11 - INFO - codeparrot_training - Skipping example with length 507 (seq_length=1024) +03/04/2022 21:24:17 - INFO - codeparrot_training - Step 27185: {'lr': 0.0004651179426379958, 'samples': 13919232, 'steps': 27185, 'loss/train': 1.7109336853027344} +03/04/2022 21:24:20 - INFO - codeparrot_training - Step 27186: {'lr': 0.00046511523881327476, 'samples': 13919744, 'steps': 27186, 'loss/train': 1.0705214738845825} +03/04/2022 21:24:20 - INFO - codeparrot_training - Skipping example with length 909 (seq_length=1024) +03/04/2022 21:24:25 - INFO - codeparrot_training - Step 27187: {'lr': 0.00046511253489162616, 'samples': 13920256, 'steps': 27187, 'loss/train': 2.0839645862579346} +03/04/2022 21:24:28 - INFO - codeparrot_training - Step 27188: {'lr': 0.00046510983087305114, 'samples': 13920768, 'steps': 27188, 'loss/train': 0.9539011120796204} +03/04/2022 21:24:28 - INFO - codeparrot_training - Skipping example with length 481 (seq_length=1024) +03/04/2022 21:24:34 - INFO - codeparrot_training - Step 27189: {'lr': 0.00046510712675755094, 'samples': 13921280, 'steps': 27189, 'loss/train': 1.6290028095245361} +03/04/2022 21:24:37 - INFO - codeparrot_training - Step 27190: {'lr': 0.00046510442254512686, 'samples': 13921792, 'steps': 27190, 'loss/train': 2.3907692432403564} +03/04/2022 21:24:37 - INFO - codeparrot_training - Skipping example with length 881 (seq_length=1024) +03/04/2022 21:24:42 - INFO - codeparrot_training - Step 27191: {'lr': 0.00046510171823578, 'samples': 13922304, 'steps': 27191, 'loss/train': 1.9262198209762573} +03/04/2022 21:24:45 - INFO - codeparrot_training - Step 27192: {'lr': 0.0004650990138295116, 'samples': 13922816, 'steps': 27192, 'loss/train': 1.788630723953247} +03/04/2022 21:24:45 - INFO - codeparrot_training - Skipping example with length 36 (seq_length=1024) +03/04/2022 21:24:51 - INFO - codeparrot_training - Step 27193: {'lr': 0.00046509630932632293, 'samples': 13923328, 'steps': 27193, 'loss/train': 1.9480499029159546} +03/04/2022 21:24:54 - INFO - codeparrot_training - Step 27194: {'lr': 0.0004650936047262152, 'samples': 13923840, 'steps': 27194, 'loss/train': 2.2159295082092285} +03/04/2022 21:24:54 - INFO - codeparrot_training - Skipping example with length 768 (seq_length=1024) +03/04/2022 21:24:59 - INFO - codeparrot_training - Step 27195: {'lr': 0.0004650909000291895, 'samples': 13924352, 'steps': 27195, 'loss/train': 1.3598031997680664} +03/04/2022 21:25:02 - INFO - codeparrot_training - Skipping example with length 1022 (seq_length=1024) +03/04/2022 21:25:05 - INFO - codeparrot_training - Step 27196: {'lr': 0.00046508819523524724, 'samples': 13924864, 'steps': 27196, 'loss/train': 2.1528549194335938} +03/04/2022 21:25:08 - INFO - codeparrot_training - Step 27197: {'lr': 0.0004650854903443896, 'samples': 13925376, 'steps': 27197, 'loss/train': 1.8450297117233276} +03/04/2022 21:25:11 - INFO - codeparrot_training - Step 27198: {'lr': 0.00046508278535661775, 'samples': 13925888, 'steps': 27198, 'loss/train': 0.46189242601394653} +03/04/2022 21:25:16 - INFO - codeparrot_training - Step 27199: {'lr': 0.00046508008027193286, 'samples': 13926400, 'steps': 27199, 'loss/train': 1.4161683320999146} +03/04/2022 21:25:20 - INFO - codeparrot_training - Step 27200: {'lr': 0.0004650773750903363, 'samples': 13926912, 'steps': 27200, 'loss/train': 1.455668330192566} +03/04/2022 21:25:20 - INFO - codeparrot_training - Skipping example with length 104 (seq_length=1024) +03/04/2022 21:25:25 - INFO - codeparrot_training - Step 27201: {'lr': 0.0004650746698118291, 'samples': 13927424, 'steps': 27201, 'loss/train': 1.8708664178848267} +03/04/2022 21:25:28 - INFO - codeparrot_training - Step 27202: {'lr': 0.0004650719644364126, 'samples': 13927936, 'steps': 27202, 'loss/train': 1.7596213817596436} +03/04/2022 21:25:28 - INFO - codeparrot_training - Skipping example with length 216 (seq_length=1024) +03/04/2022 21:25:33 - INFO - codeparrot_training - Step 27203: {'lr': 0.000465069258964088, 'samples': 13928448, 'steps': 27203, 'loss/train': 1.6614344120025635} +03/04/2022 21:25:37 - INFO - codeparrot_training - Step 27204: {'lr': 0.0004650665533948565, 'samples': 13928960, 'steps': 27204, 'loss/train': 1.894809365272522} +03/04/2022 21:25:37 - INFO - codeparrot_training - Skipping example with length 221 (seq_length=1024) +03/04/2022 21:25:42 - INFO - codeparrot_training - Step 27205: {'lr': 0.00046506384772871935, 'samples': 13929472, 'steps': 27205, 'loss/train': 1.5893926620483398} +03/04/2022 21:25:45 - INFO - codeparrot_training - Step 27206: {'lr': 0.0004650611419656777, 'samples': 13929984, 'steps': 27206, 'loss/train': 2.292898178100586} +03/04/2022 21:25:45 - INFO - codeparrot_training - Skipping example with length 321 (seq_length=1024) +03/04/2022 21:25:50 - INFO - codeparrot_training - Step 27207: {'lr': 0.0004650584361057328, 'samples': 13930496, 'steps': 27207, 'loss/train': 1.7369637489318848} +03/04/2022 21:25:53 - INFO - codeparrot_training - Step 27208: {'lr': 0.00046505573014888604, 'samples': 13931008, 'steps': 27208, 'loss/train': 2.0523242950439453} +03/04/2022 21:25:54 - INFO - codeparrot_training - Skipping example with length 482 (seq_length=1024) +03/04/2022 21:25:59 - INFO - codeparrot_training - Step 27209: {'lr': 0.0004650530240951383, 'samples': 13931520, 'steps': 27209, 'loss/train': 2.0914368629455566} +03/04/2022 21:26:02 - INFO - codeparrot_training - Step 27210: {'lr': 0.0004650503179444911, 'samples': 13932032, 'steps': 27210, 'loss/train': 1.8827884197235107} +03/04/2022 21:26:03 - INFO - codeparrot_training - Skipping example with length 38 (seq_length=1024) +03/04/2022 21:26:07 - INFO - codeparrot_training - Step 27211: {'lr': 0.00046504761169694555, 'samples': 13932544, 'steps': 27211, 'loss/train': 1.5371589660644531} +03/04/2022 21:26:10 - INFO - codeparrot_training - Step 27212: {'lr': 0.0004650449053525028, 'samples': 13933056, 'steps': 27212, 'loss/train': 1.7822381258010864} +03/04/2022 21:26:11 - INFO - codeparrot_training - Skipping example with length 616 (seq_length=1024) +03/04/2022 21:26:15 - INFO - codeparrot_training - Step 27213: {'lr': 0.00046504219891116416, 'samples': 13933568, 'steps': 27213, 'loss/train': 1.77534818649292} +03/04/2022 21:26:19 - INFO - codeparrot_training - Step 27214: {'lr': 0.0004650394923729309, 'samples': 13934080, 'steps': 27214, 'loss/train': 4.209066867828369} +03/04/2022 21:26:19 - INFO - codeparrot_training - Skipping example with length 253 (seq_length=1024) +03/04/2022 21:26:24 - INFO - codeparrot_training - Step 27215: {'lr': 0.00046503678573780403, 'samples': 13934592, 'steps': 27215, 'loss/train': 1.6322433948516846} +03/04/2022 21:26:27 - INFO - codeparrot_training - Step 27216: {'lr': 0.000465034079005785, 'samples': 13935104, 'steps': 27216, 'loss/train': 1.5035631656646729} +03/04/2022 21:26:27 - INFO - codeparrot_training - Skipping example with length 835 (seq_length=1024) +03/04/2022 21:26:32 - INFO - codeparrot_training - Step 27217: {'lr': 0.00046503137217687485, 'samples': 13935616, 'steps': 27217, 'loss/train': 0.9212533831596375} +03/04/2022 21:26:35 - INFO - codeparrot_training - Step 27218: {'lr': 0.0004650286652510749, 'samples': 13936128, 'steps': 27218, 'loss/train': 0.8663285374641418} +03/04/2022 21:26:36 - INFO - codeparrot_training - Skipping example with length 89 (seq_length=1024) +03/04/2022 21:26:41 - INFO - codeparrot_training - Step 27219: {'lr': 0.0004650259582283864, 'samples': 13936640, 'steps': 27219, 'loss/train': 1.5176759958267212} +03/04/2022 21:26:44 - INFO - codeparrot_training - Step 27220: {'lr': 0.0004650232511088105, 'samples': 13937152, 'steps': 27220, 'loss/train': 1.5205771923065186} +03/04/2022 21:26:44 - INFO - codeparrot_training - Skipping example with length 416 (seq_length=1024) +03/04/2022 21:26:49 - INFO - codeparrot_training - Step 27221: {'lr': 0.00046502054389234844, 'samples': 13937664, 'steps': 27221, 'loss/train': 1.086166262626648} +03/04/2022 21:26:52 - INFO - codeparrot_training - Step 27222: {'lr': 0.0004650178365790014, 'samples': 13938176, 'steps': 27222, 'loss/train': 2.1769015789031982} +03/04/2022 21:26:52 - INFO - codeparrot_training - Skipping example with length 88 (seq_length=1024) +03/04/2022 21:26:58 - INFO - codeparrot_training - Step 27223: {'lr': 0.0004650151291687707, 'samples': 13938688, 'steps': 27223, 'loss/train': 1.879853367805481} +03/04/2022 21:27:01 - INFO - codeparrot_training - Step 27224: {'lr': 0.00046501242166165747, 'samples': 13939200, 'steps': 27224, 'loss/train': 1.0433379411697388} +03/04/2022 21:27:01 - INFO - codeparrot_training - Skipping example with length 882 (seq_length=1024) +03/04/2022 21:27:06 - INFO - codeparrot_training - Step 27225: {'lr': 0.000465009714057663, 'samples': 13939712, 'steps': 27225, 'loss/train': 2.0189249515533447} +03/04/2022 21:27:09 - INFO - codeparrot_training - Skipping example with length 358 (seq_length=1024) +03/04/2022 21:27:11 - INFO - codeparrot_training - Step 27226: {'lr': 0.00046500700635678844, 'samples': 13940224, 'steps': 27226, 'loss/train': 2.252335786819458} +03/04/2022 21:27:14 - INFO - codeparrot_training - Step 27227: {'lr': 0.000465004298559035, 'samples': 13940736, 'steps': 27227, 'loss/train': 1.1944422721862793} +03/04/2022 21:27:17 - INFO - codeparrot_training - Skipping example with length 885 (seq_length=1024) +03/04/2022 21:27:20 - INFO - codeparrot_training - Step 27228: {'lr': 0.00046500159066440404, 'samples': 13941248, 'steps': 27228, 'loss/train': 0.44980719685554504} +03/04/2022 21:27:23 - INFO - codeparrot_training - Step 27229: {'lr': 0.0004649988826728966, 'samples': 13941760, 'steps': 27229, 'loss/train': 2.0220401287078857} +03/04/2022 21:27:26 - INFO - codeparrot_training - Step 27230: {'lr': 0.000464996174584514, 'samples': 13942272, 'steps': 27230, 'loss/train': 1.6536790132522583} +03/04/2022 21:27:26 - INFO - codeparrot_training - Skipping example with length 946 (seq_length=1024) +03/04/2022 21:27:31 - INFO - codeparrot_training - Step 27231: {'lr': 0.00046499346639925746, 'samples': 13942784, 'steps': 27231, 'loss/train': 1.8055981397628784} +03/04/2022 21:27:34 - INFO - codeparrot_training - Step 27232: {'lr': 0.0004649907581171282, 'samples': 13943296, 'steps': 27232, 'loss/train': 2.357915163040161} +03/04/2022 21:27:34 - INFO - codeparrot_training - Skipping example with length 949 (seq_length=1024) +03/04/2022 21:27:40 - INFO - codeparrot_training - Step 27233: {'lr': 0.00046498804973812735, 'samples': 13943808, 'steps': 27233, 'loss/train': 1.428297758102417} +03/04/2022 21:27:43 - INFO - codeparrot_training - Step 27234: {'lr': 0.00046498534126225625, 'samples': 13944320, 'steps': 27234, 'loss/train': 0.39849698543548584} +03/04/2022 21:27:44 - INFO - codeparrot_training - Skipping example with length 742 (seq_length=1024) +03/04/2022 21:27:49 - INFO - codeparrot_training - Step 27235: {'lr': 0.0004649826326895161, 'samples': 13944832, 'steps': 27235, 'loss/train': 1.4829034805297852} +03/04/2022 21:27:52 - INFO - codeparrot_training - Step 27236: {'lr': 0.0004649799240199081, 'samples': 13945344, 'steps': 27236, 'loss/train': 1.4370018243789673} +03/04/2022 21:27:52 - INFO - codeparrot_training - Skipping example with length 263 (seq_length=1024) +03/04/2022 21:27:57 - INFO - codeparrot_training - Step 27237: {'lr': 0.0004649772152534334, 'samples': 13945856, 'steps': 27237, 'loss/train': 2.1481642723083496} +03/04/2022 21:28:00 - INFO - codeparrot_training - Step 27238: {'lr': 0.0004649745063900933, 'samples': 13946368, 'steps': 27238, 'loss/train': 2.4609646797180176} +03/04/2022 21:28:01 - INFO - codeparrot_training - Skipping example with length 449 (seq_length=1024) +03/04/2022 21:28:05 - INFO - codeparrot_training - Step 27239: {'lr': 0.000464971797429889, 'samples': 13946880, 'steps': 27239, 'loss/train': 1.1149240732192993} +03/04/2022 21:28:08 - INFO - codeparrot_training - Step 27240: {'lr': 0.00046496908837282173, 'samples': 13947392, 'steps': 27240, 'loss/train': 1.6180450916290283} +03/04/2022 21:28:09 - INFO - codeparrot_training - Skipping example with length 76 (seq_length=1024) +03/04/2022 21:28:14 - INFO - codeparrot_training - Step 27241: {'lr': 0.00046496637921889276, 'samples': 13947904, 'steps': 27241, 'loss/train': 2.0958292484283447} +03/04/2022 21:28:17 - INFO - codeparrot_training - Step 27242: {'lr': 0.0004649636699681031, 'samples': 13948416, 'steps': 27242, 'loss/train': 2.075373888015747} +03/04/2022 21:28:17 - INFO - codeparrot_training - Skipping example with length 296 (seq_length=1024) +03/04/2022 21:28:22 - INFO - codeparrot_training - Step 27243: {'lr': 0.00046496096062045427, 'samples': 13948928, 'steps': 27243, 'loss/train': 1.2011469602584839} +03/04/2022 21:28:25 - INFO - codeparrot_training - Step 27244: {'lr': 0.00046495825117594735, 'samples': 13949440, 'steps': 27244, 'loss/train': 2.0066211223602295} +03/04/2022 21:28:26 - INFO - codeparrot_training - Skipping example with length 270 (seq_length=1024) +03/04/2022 21:28:31 - INFO - codeparrot_training - Step 27245: {'lr': 0.0004649555416345835, 'samples': 13949952, 'steps': 27245, 'loss/train': 2.4188411235809326} +03/04/2022 21:28:34 - INFO - codeparrot_training - Step 27246: {'lr': 0.0004649528319963641, 'samples': 13950464, 'steps': 27246, 'loss/train': 2.104572057723999} +03/04/2022 21:28:34 - INFO - codeparrot_training - Skipping example with length 9 (seq_length=1024) +03/04/2022 21:28:39 - INFO - codeparrot_training - Step 27247: {'lr': 0.0004649501222612901, 'samples': 13950976, 'steps': 27247, 'loss/train': 1.0064759254455566} +03/04/2022 21:28:42 - INFO - codeparrot_training - Step 27248: {'lr': 0.000464947412429363, 'samples': 13951488, 'steps': 27248, 'loss/train': 1.3773727416992188} +03/04/2022 21:28:42 - INFO - codeparrot_training - Skipping example with length 393 (seq_length=1024) +03/04/2022 21:28:48 - INFO - codeparrot_training - Step 27249: {'lr': 0.000464944702500584, 'samples': 13952000, 'steps': 27249, 'loss/train': 2.1093738079071045} +03/04/2022 21:28:51 - INFO - codeparrot_training - Step 27250: {'lr': 0.0004649419924749541, 'samples': 13952512, 'steps': 27250, 'loss/train': 1.824320673942566} +03/04/2022 21:28:52 - INFO - codeparrot_training - Skipping example with length 105 (seq_length=1024) +03/04/2022 21:28:56 - INFO - codeparrot_training - Step 27251: {'lr': 0.0004649392823524746, 'samples': 13953024, 'steps': 27251, 'loss/train': 1.4800001382827759} +03/04/2022 21:28:59 - INFO - codeparrot_training - Step 27252: {'lr': 0.0004649365721331469, 'samples': 13953536, 'steps': 27252, 'loss/train': 2.046095371246338} +03/04/2022 21:29:01 - INFO - codeparrot_training - Skipping example with length 508 (seq_length=1024) +03/04/2022 21:29:05 - INFO - codeparrot_training - Step 27253: {'lr': 0.00046493386181697206, 'samples': 13954048, 'steps': 27253, 'loss/train': 1.2677292823791504} +03/04/2022 21:29:08 - INFO - codeparrot_training - Step 27254: {'lr': 0.00046493115140395136, 'samples': 13954560, 'steps': 27254, 'loss/train': 2.1946308612823486} +03/04/2022 21:29:09 - INFO - codeparrot_training - Skipping example with length 482 (seq_length=1024) +03/04/2022 21:29:13 - INFO - codeparrot_training - Step 27255: {'lr': 0.000464928440894086, 'samples': 13955072, 'steps': 27255, 'loss/train': 1.7067654132843018} +03/04/2022 21:29:16 - INFO - codeparrot_training - Step 27256: {'lr': 0.00046492573028737716, 'samples': 13955584, 'steps': 27256, 'loss/train': 1.880800485610962} +03/04/2022 21:29:18 - INFO - codeparrot_training - Skipping example with length 27 (seq_length=1024) +03/04/2022 21:29:22 - INFO - codeparrot_training - Step 27257: {'lr': 0.0004649230195838261, 'samples': 13956096, 'steps': 27257, 'loss/train': 1.8770759105682373} +03/04/2022 21:29:25 - INFO - codeparrot_training - Step 27258: {'lr': 0.00046492030878343406, 'samples': 13956608, 'steps': 27258, 'loss/train': 1.2926831245422363} +03/04/2022 21:29:26 - INFO - codeparrot_training - Skipping example with length 277 (seq_length=1024) +03/04/2022 21:29:30 - INFO - codeparrot_training - Step 27259: {'lr': 0.00046491759788620227, 'samples': 13957120, 'steps': 27259, 'loss/train': 2.585660696029663} +03/04/2022 21:29:33 - INFO - codeparrot_training - Step 27260: {'lr': 0.0004649148868921319, 'samples': 13957632, 'steps': 27260, 'loss/train': 2.572108745574951} +03/04/2022 21:29:35 - INFO - codeparrot_training - Skipping example with length 106 (seq_length=1024) +03/04/2022 21:29:38 - INFO - codeparrot_training - Step 27261: {'lr': 0.00046491217580122427, 'samples': 13958144, 'steps': 27261, 'loss/train': 1.8491166830062866} +03/04/2022 21:29:42 - INFO - codeparrot_training - Step 27262: {'lr': 0.00046490946461348045, 'samples': 13958656, 'steps': 27262, 'loss/train': 3.0542492866516113} +03/04/2022 21:29:43 - INFO - codeparrot_training - Skipping example with length 513 (seq_length=1024) +03/04/2022 21:29:47 - INFO - codeparrot_training - Step 27263: {'lr': 0.00046490675332890177, 'samples': 13959168, 'steps': 27263, 'loss/train': 1.4080657958984375} +03/04/2022 21:29:50 - INFO - codeparrot_training - Step 27264: {'lr': 0.00046490404194748935, 'samples': 13959680, 'steps': 27264, 'loss/train': 0.6727156639099121} +03/04/2022 21:29:51 - INFO - codeparrot_training - Skipping example with length 956 (seq_length=1024) +03/04/2022 21:29:55 - INFO - codeparrot_training - Step 27265: {'lr': 0.00046490133046924457, 'samples': 13960192, 'steps': 27265, 'loss/train': 2.1778900623321533} +03/04/2022 21:29:59 - INFO - codeparrot_training - Step 27266: {'lr': 0.0004648986188941685, 'samples': 13960704, 'steps': 27266, 'loss/train': 1.8192154169082642} +03/04/2022 21:30:00 - INFO - codeparrot_training - Skipping example with length 979 (seq_length=1024) +03/04/2022 21:30:04 - INFO - codeparrot_training - Step 27267: {'lr': 0.0004648959072222625, 'samples': 13961216, 'steps': 27267, 'loss/train': 1.500523567199707} +03/04/2022 21:30:07 - INFO - codeparrot_training - Step 27268: {'lr': 0.0004648931954535277, 'samples': 13961728, 'steps': 27268, 'loss/train': 1.6341878175735474} +03/04/2022 21:30:08 - INFO - codeparrot_training - Skipping example with length 130 (seq_length=1024) +03/04/2022 21:30:12 - INFO - codeparrot_training - Step 27269: {'lr': 0.0004648904835879654, 'samples': 13962240, 'steps': 27269, 'loss/train': 1.3035786151885986} +03/04/2022 21:30:15 - INFO - codeparrot_training - Step 27270: {'lr': 0.0004648877716255766, 'samples': 13962752, 'steps': 27270, 'loss/train': 1.9258136749267578} +03/04/2022 21:30:17 - INFO - codeparrot_training - Skipping example with length 454 (seq_length=1024) +03/04/2022 21:30:21 - INFO - codeparrot_training - Step 27271: {'lr': 0.00046488505956636286, 'samples': 13963264, 'steps': 27271, 'loss/train': 2.2206928730010986} +03/04/2022 21:30:24 - INFO - codeparrot_training - Step 27272: {'lr': 0.0004648823474103251, 'samples': 13963776, 'steps': 27272, 'loss/train': 1.7389881610870361} +03/04/2022 21:30:25 - INFO - codeparrot_training - Skipping example with length 485 (seq_length=1024) +03/04/2022 21:30:29 - INFO - codeparrot_training - Step 27273: {'lr': 0.0004648796351574648, 'samples': 13964288, 'steps': 27273, 'loss/train': 1.4280964136123657} +03/04/2022 21:30:32 - INFO - codeparrot_training - Step 27274: {'lr': 0.0004648769228077829, 'samples': 13964800, 'steps': 27274, 'loss/train': 1.5894533395767212} +03/04/2022 21:30:38 - INFO - codeparrot_training - Step 27275: {'lr': 0.00046487421036128085, 'samples': 13965312, 'steps': 27275, 'loss/train': 1.7295185327529907} +03/04/2022 21:30:41 - INFO - codeparrot_training - Step 27276: {'lr': 0.00046487149781795976, 'samples': 13965824, 'steps': 27276, 'loss/train': 1.7408370971679688} +03/04/2022 21:30:42 - INFO - codeparrot_training - Skipping example with length 388 (seq_length=1024) +03/04/2022 21:30:46 - INFO - codeparrot_training - Step 27277: {'lr': 0.00046486878517782094, 'samples': 13966336, 'steps': 27277, 'loss/train': 1.9155091047286987} +03/04/2022 21:30:49 - INFO - codeparrot_training - Step 27278: {'lr': 0.0004648660724408656, 'samples': 13966848, 'steps': 27278, 'loss/train': 0.5985935926437378} +03/04/2022 21:30:50 - INFO - codeparrot_training - Skipping example with length 778 (seq_length=1024) +03/04/2022 21:30:55 - INFO - codeparrot_training - Step 27279: {'lr': 0.00046486335960709485, 'samples': 13967360, 'steps': 27279, 'loss/train': 1.571506381034851} +03/04/2022 21:30:58 - INFO - codeparrot_training - Step 27280: {'lr': 0.00046486064667651, 'samples': 13967872, 'steps': 27280, 'loss/train': 1.766096830368042} +03/04/2022 21:30:59 - INFO - codeparrot_training - Skipping example with length 619 (seq_length=1024) +03/04/2022 21:31:03 - INFO - codeparrot_training - Step 27281: {'lr': 0.0004648579336491123, 'samples': 13968384, 'steps': 27281, 'loss/train': 1.37592613697052} +03/04/2022 21:31:06 - INFO - codeparrot_training - Step 27282: {'lr': 0.0004648552205249029, 'samples': 13968896, 'steps': 27282, 'loss/train': 1.9541445970535278} +03/04/2022 21:31:07 - INFO - codeparrot_training - Skipping example with length 553 (seq_length=1024) +03/04/2022 21:31:12 - INFO - codeparrot_training - Step 27283: {'lr': 0.000464852507303883, 'samples': 13969408, 'steps': 27283, 'loss/train': 2.6447255611419678} +03/04/2022 21:31:15 - INFO - codeparrot_training - Step 27284: {'lr': 0.0004648497939860539, 'samples': 13969920, 'steps': 27284, 'loss/train': 1.3173807859420776} +03/04/2022 21:31:16 - INFO - codeparrot_training - Skipping example with length 102 (seq_length=1024) +03/04/2022 21:31:20 - INFO - codeparrot_training - Step 27285: {'lr': 0.0004648470805714169, 'samples': 13970432, 'steps': 27285, 'loss/train': 1.6244666576385498} +03/04/2022 21:31:23 - INFO - codeparrot_training - Step 27286: {'lr': 0.00046484436705997303, 'samples': 13970944, 'steps': 27286, 'loss/train': 2.420758008956909} +03/04/2022 21:31:25 - INFO - codeparrot_training - Skipping example with length 86 (seq_length=1024) +03/04/2022 21:31:29 - INFO - codeparrot_training - Step 27287: {'lr': 0.0004648416534517236, 'samples': 13971456, 'steps': 27287, 'loss/train': 1.7559852600097656} +03/04/2022 21:31:32 - INFO - codeparrot_training - Step 27288: {'lr': 0.00046483893974666983, 'samples': 13971968, 'steps': 27288, 'loss/train': 1.3592007160186768} +03/04/2022 21:31:33 - INFO - codeparrot_training - Skipping example with length 681 (seq_length=1024) +03/04/2022 21:31:37 - INFO - codeparrot_training - Step 27289: {'lr': 0.000464836225944813, 'samples': 13972480, 'steps': 27289, 'loss/train': 0.6610035300254822} +03/04/2022 21:31:40 - INFO - codeparrot_training - Step 27290: {'lr': 0.00046483351204615423, 'samples': 13972992, 'steps': 27290, 'loss/train': 0.5085729956626892} +03/04/2022 21:31:42 - INFO - codeparrot_training - Skipping example with length 922 (seq_length=1024) +03/04/2022 21:31:45 - INFO - codeparrot_training - Step 27291: {'lr': 0.0004648307980506948, 'samples': 13973504, 'steps': 27291, 'loss/train': 1.7768157720565796} +03/04/2022 21:31:49 - INFO - codeparrot_training - Step 27292: {'lr': 0.00046482808395843594, 'samples': 13974016, 'steps': 27292, 'loss/train': 1.3744535446166992} +03/04/2022 21:31:50 - INFO - codeparrot_training - Skipping example with length 842 (seq_length=1024) +03/04/2022 21:31:54 - INFO - codeparrot_training - Step 27293: {'lr': 0.0004648253697693789, 'samples': 13974528, 'steps': 27293, 'loss/train': 1.9383039474487305} +03/04/2022 21:31:57 - INFO - codeparrot_training - Step 27294: {'lr': 0.0004648226554835248, 'samples': 13975040, 'steps': 27294, 'loss/train': 1.004044532775879} +03/04/2022 21:31:59 - INFO - codeparrot_training - Skipping example with length 826 (seq_length=1024) +03/04/2022 21:32:03 - INFO - codeparrot_training - Step 27295: {'lr': 0.000464819941100875, 'samples': 13975552, 'steps': 27295, 'loss/train': 0.9965910911560059} +03/04/2022 21:32:06 - INFO - codeparrot_training - Step 27296: {'lr': 0.00046481722662143057, 'samples': 13976064, 'steps': 27296, 'loss/train': 0.9804989099502563} +03/04/2022 21:32:07 - INFO - codeparrot_training - Skipping example with length 95 (seq_length=1024) +03/04/2022 21:32:11 - INFO - codeparrot_training - Step 27297: {'lr': 0.0004648145120451929, 'samples': 13976576, 'steps': 27297, 'loss/train': 2.134272336959839} +03/04/2022 21:32:14 - INFO - codeparrot_training - Step 27298: {'lr': 0.000464811797372163, 'samples': 13977088, 'steps': 27298, 'loss/train': 2.401169776916504} +03/04/2022 21:32:16 - INFO - codeparrot_training - Skipping example with length 956 (seq_length=1024) +03/04/2022 21:32:20 - INFO - codeparrot_training - Step 27299: {'lr': 0.00046480908260234234, 'samples': 13977600, 'steps': 27299, 'loss/train': 1.2475439310073853} +03/04/2022 21:32:23 - INFO - codeparrot_training - Step 27300: {'lr': 0.0004648063677357319, 'samples': 13978112, 'steps': 27300, 'loss/train': 2.148590564727783} +03/04/2022 21:32:24 - INFO - codeparrot_training - Skipping example with length 349 (seq_length=1024) +03/04/2022 21:32:28 - INFO - codeparrot_training - Step 27301: {'lr': 0.00046480365277233316, 'samples': 13978624, 'steps': 27301, 'loss/train': 3.2269961833953857} +03/04/2022 21:32:31 - INFO - codeparrot_training - Step 27302: {'lr': 0.00046480093771214716, 'samples': 13979136, 'steps': 27302, 'loss/train': 1.9284334182739258} +03/04/2022 21:32:33 - INFO - codeparrot_training - Skipping example with length 63 (seq_length=1024) +03/04/2022 21:32:37 - INFO - codeparrot_training - Step 27303: {'lr': 0.0004647982225551751, 'samples': 13979648, 'steps': 27303, 'loss/train': 2.0808684825897217} +03/04/2022 21:32:40 - INFO - codeparrot_training - Step 27304: {'lr': 0.0004647955073014184, 'samples': 13980160, 'steps': 27304, 'loss/train': 1.436845064163208} +03/04/2022 21:32:42 - INFO - codeparrot_training - Skipping example with length 703 (seq_length=1024) +03/04/2022 21:32:45 - INFO - codeparrot_training - Step 27305: {'lr': 0.00046479279195087804, 'samples': 13980672, 'steps': 27305, 'loss/train': 2.772162914276123} +03/04/2022 21:32:48 - INFO - codeparrot_training - Step 27306: {'lr': 0.0004647900765035554, 'samples': 13981184, 'steps': 27306, 'loss/train': 1.9187259674072266} +03/04/2022 21:32:50 - INFO - codeparrot_training - Skipping example with length 377 (seq_length=1024) +03/04/2022 21:32:54 - INFO - codeparrot_training - Step 27307: {'lr': 0.0004647873609594517, 'samples': 13981696, 'steps': 27307, 'loss/train': 2.2711098194122314} +03/04/2022 21:32:57 - INFO - codeparrot_training - Step 27308: {'lr': 0.0004647846453185681, 'samples': 13982208, 'steps': 27308, 'loss/train': 1.9040457010269165} +03/04/2022 21:32:59 - INFO - codeparrot_training - Skipping example with length 229 (seq_length=1024) +03/04/2022 21:33:03 - INFO - codeparrot_training - Step 27309: {'lr': 0.0004647819295809059, 'samples': 13982720, 'steps': 27309, 'loss/train': 1.8028217554092407} +03/04/2022 21:33:06 - INFO - codeparrot_training - Step 27310: {'lr': 0.00046477921374646624, 'samples': 13983232, 'steps': 27310, 'loss/train': 3.1039977073669434} +03/04/2022 21:33:09 - INFO - codeparrot_training - Skipping example with length 674 (seq_length=1024) +03/04/2022 21:33:11 - INFO - codeparrot_training - Step 27311: {'lr': 0.0004647764978152503, 'samples': 13983744, 'steps': 27311, 'loss/train': 1.920505404472351} +03/04/2022 21:33:14 - INFO - codeparrot_training - Step 27312: {'lr': 0.0004647737817872595, 'samples': 13984256, 'steps': 27312, 'loss/train': 2.512624502182007} +03/04/2022 21:33:18 - INFO - codeparrot_training - Step 27313: {'lr': 0.0004647710656624949, 'samples': 13984768, 'steps': 27313, 'loss/train': 0.24730858206748962} +03/04/2022 21:33:18 - INFO - codeparrot_training - Skipping example with length 621 (seq_length=1024) +03/04/2022 21:33:23 - INFO - codeparrot_training - Step 27314: {'lr': 0.0004647683494409578, 'samples': 13985280, 'steps': 27314, 'loss/train': 2.483790159225464} +03/04/2022 21:33:26 - INFO - codeparrot_training - Skipping example with length 574 (seq_length=1024) +03/04/2022 21:33:28 - INFO - codeparrot_training - Step 27315: {'lr': 0.0004647656331226494, 'samples': 13985792, 'steps': 27315, 'loss/train': 1.7700674533843994} +03/04/2022 21:33:31 - INFO - codeparrot_training - Step 27316: {'lr': 0.0004647629167075709, 'samples': 13986304, 'steps': 27316, 'loss/train': 1.3436956405639648} +03/04/2022 21:33:35 - INFO - codeparrot_training - Step 27317: {'lr': 0.00046476020019572354, 'samples': 13986816, 'steps': 27317, 'loss/train': 1.9957822561264038} +03/04/2022 21:33:35 - INFO - codeparrot_training - Skipping example with length 759 (seq_length=1024) +03/04/2022 21:33:40 - INFO - codeparrot_training - Step 27318: {'lr': 0.00046475748358710856, 'samples': 13987328, 'steps': 27318, 'loss/train': 1.872269630432129} +03/04/2022 21:33:43 - INFO - codeparrot_training - Skipping example with length 390 (seq_length=1024) +03/04/2022 21:33:45 - INFO - codeparrot_training - Step 27319: {'lr': 0.0004647547668817271, 'samples': 13987840, 'steps': 27319, 'loss/train': 2.207102060317993} +03/04/2022 21:33:48 - INFO - codeparrot_training - Step 27320: {'lr': 0.00046475205007958054, 'samples': 13988352, 'steps': 27320, 'loss/train': 2.4627227783203125} +03/04/2022 21:33:51 - INFO - codeparrot_training - Step 27321: {'lr': 0.00046474933318067004, 'samples': 13988864, 'steps': 27321, 'loss/train': 1.6673823595046997} +03/04/2022 21:33:51 - INFO - codeparrot_training - Skipping example with length 776 (seq_length=1024) +03/04/2022 21:33:57 - INFO - codeparrot_training - Step 27322: {'lr': 0.0004647466161849968, 'samples': 13989376, 'steps': 27322, 'loss/train': 1.8202558755874634} +03/04/2022 21:34:00 - INFO - codeparrot_training - Step 27323: {'lr': 0.000464743899092562, 'samples': 13989888, 'steps': 27323, 'loss/train': 2.1119251251220703} +03/04/2022 21:34:00 - INFO - codeparrot_training - Skipping example with length 589 (seq_length=1024) +03/04/2022 21:34:05 - INFO - codeparrot_training - Step 27324: {'lr': 0.0004647411819033669, 'samples': 13990400, 'steps': 27324, 'loss/train': 2.1649341583251953} +03/04/2022 21:34:08 - INFO - codeparrot_training - Skipping example with length 39 (seq_length=1024) +03/04/2022 21:34:10 - INFO - codeparrot_training - Step 27325: {'lr': 0.00046473846461741276, 'samples': 13990912, 'steps': 27325, 'loss/train': 1.8168530464172363} +03/04/2022 21:34:14 - INFO - codeparrot_training - Step 27326: {'lr': 0.0004647357472347008, 'samples': 13991424, 'steps': 27326, 'loss/train': 2.44339919090271} +03/04/2022 21:34:16 - INFO - codeparrot_training - Skipping example with length 44 (seq_length=1024) +03/04/2022 21:34:19 - INFO - codeparrot_training - Step 27327: {'lr': 0.00046473302975523224, 'samples': 13991936, 'steps': 27327, 'loss/train': 1.5548832416534424} +03/04/2022 21:34:22 - INFO - codeparrot_training - Step 27328: {'lr': 0.0004647303121790082, 'samples': 13992448, 'steps': 27328, 'loss/train': 1.8480143547058105} +03/04/2022 21:34:25 - INFO - codeparrot_training - Skipping example with length 212 (seq_length=1024) +03/04/2022 21:34:27 - INFO - codeparrot_training - Step 27329: {'lr': 0.0004647275945060301, 'samples': 13992960, 'steps': 27329, 'loss/train': 1.090482473373413} +03/04/2022 21:34:31 - INFO - codeparrot_training - Step 27330: {'lr': 0.000464724876736299, 'samples': 13993472, 'steps': 27330, 'loss/train': 2.0289106369018555} +03/04/2022 21:34:33 - INFO - codeparrot_training - Skipping example with length 306 (seq_length=1024) +03/04/2022 21:34:36 - INFO - codeparrot_training - Step 27331: {'lr': 0.00046472215886981616, 'samples': 13993984, 'steps': 27331, 'loss/train': 1.5530024766921997} +03/04/2022 21:34:39 - INFO - codeparrot_training - Step 27332: {'lr': 0.00046471944090658294, 'samples': 13994496, 'steps': 27332, 'loss/train': 1.7373859882354736} +03/04/2022 21:34:42 - INFO - codeparrot_training - Skipping example with length 57 (seq_length=1024) +03/04/2022 21:34:45 - INFO - codeparrot_training - Step 27333: {'lr': 0.0004647167228466004, 'samples': 13995008, 'steps': 27333, 'loss/train': 2.1907501220703125} +03/04/2022 21:34:48 - INFO - codeparrot_training - Step 27334: {'lr': 0.0004647140046898697, 'samples': 13995520, 'steps': 27334, 'loss/train': 2.283541679382324} +03/04/2022 21:34:51 - INFO - codeparrot_training - Step 27335: {'lr': 0.0004647112864363923, 'samples': 13996032, 'steps': 27335, 'loss/train': 3.3142378330230713} +03/04/2022 21:34:52 - INFO - codeparrot_training - Skipping example with length 112 (seq_length=1024) +03/04/2022 21:34:56 - INFO - codeparrot_training - Step 27336: {'lr': 0.00046470856808616934, 'samples': 13996544, 'steps': 27336, 'loss/train': 1.3447926044464111} +03/04/2022 21:34:59 - INFO - codeparrot_training - Step 27337: {'lr': 0.0004647058496392019, 'samples': 13997056, 'steps': 27337, 'loss/train': 1.9790986776351929} +03/04/2022 21:35:00 - INFO - codeparrot_training - Skipping example with length 209 (seq_length=1024) +03/04/2022 21:35:05 - INFO - codeparrot_training - Step 27338: {'lr': 0.0004647031310954914, 'samples': 13997568, 'steps': 27338, 'loss/train': 2.673316240310669} +03/04/2022 21:35:08 - INFO - codeparrot_training - Step 27339: {'lr': 0.00046470041245503895, 'samples': 13998080, 'steps': 27339, 'loss/train': 1.678635835647583} +03/04/2022 21:35:09 - INFO - codeparrot_training - Skipping example with length 417 (seq_length=1024) +03/04/2022 21:35:13 - INFO - codeparrot_training - Step 27340: {'lr': 0.0004646976937178459, 'samples': 13998592, 'steps': 27340, 'loss/train': 1.8614041805267334} +03/04/2022 21:35:16 - INFO - codeparrot_training - Step 27341: {'lr': 0.0004646949748839132, 'samples': 13999104, 'steps': 27341, 'loss/train': 1.4324384927749634} +03/04/2022 21:35:18 - INFO - codeparrot_training - Skipping example with length 394 (seq_length=1024) +03/04/2022 21:35:22 - INFO - codeparrot_training - Step 27342: {'lr': 0.0004646922559532424, 'samples': 13999616, 'steps': 27342, 'loss/train': 2.4837899208068848} +03/04/2022 21:35:25 - INFO - codeparrot_training - Step 27343: {'lr': 0.0004646895369258345, 'samples': 14000128, 'steps': 27343, 'loss/train': 1.9539533853530884} +03/04/2022 21:35:26 - INFO - codeparrot_training - Skipping example with length 346 (seq_length=1024) +03/04/2022 21:35:30 - INFO - codeparrot_training - Step 27344: {'lr': 0.00046468681780169086, 'samples': 14000640, 'steps': 27344, 'loss/train': 1.9839603900909424} +03/04/2022 21:35:33 - INFO - codeparrot_training - Step 27345: {'lr': 0.0004646840985808126, 'samples': 14001152, 'steps': 27345, 'loss/train': 1.9265379905700684} +03/04/2022 21:35:34 - INFO - codeparrot_training - Skipping example with length 1001 (seq_length=1024) +03/04/2022 21:35:39 - INFO - codeparrot_training - Step 27346: {'lr': 0.0004646813792632011, 'samples': 14001664, 'steps': 27346, 'loss/train': 1.5810306072235107} +03/04/2022 21:35:42 - INFO - codeparrot_training - Step 27347: {'lr': 0.00046467865984885736, 'samples': 14002176, 'steps': 27347, 'loss/train': 2.1293587684631348} +03/04/2022 21:35:45 - INFO - codeparrot_training - Step 27348: {'lr': 0.0004646759403377828, 'samples': 14002688, 'steps': 27348, 'loss/train': 2.6820826530456543} +03/04/2022 21:35:45 - INFO - codeparrot_training - Skipping example with length 348 (seq_length=1024) +03/04/2022 21:35:51 - INFO - codeparrot_training - Step 27349: {'lr': 0.00046467322072997865, 'samples': 14003200, 'steps': 27349, 'loss/train': 0.4234185218811035} +03/04/2022 21:35:54 - INFO - codeparrot_training - Step 27350: {'lr': 0.00046467050102544594, 'samples': 14003712, 'steps': 27350, 'loss/train': 2.3875370025634766} +03/04/2022 21:35:54 - INFO - codeparrot_training - Skipping example with length 546 (seq_length=1024) +03/04/2022 21:35:59 - INFO - codeparrot_training - Step 27351: {'lr': 0.0004646677812241861, 'samples': 14004224, 'steps': 27351, 'loss/train': 1.9230246543884277} +03/04/2022 21:36:02 - INFO - codeparrot_training - Step 27352: {'lr': 0.0004646650613262001, 'samples': 14004736, 'steps': 27352, 'loss/train': 0.4853203296661377} +03/04/2022 21:36:03 - INFO - codeparrot_training - Skipping example with length 84 (seq_length=1024) +03/04/2022 21:36:08 - INFO - codeparrot_training - Step 27353: {'lr': 0.00046466234133148957, 'samples': 14005248, 'steps': 27353, 'loss/train': 2.0545365810394287} +03/04/2022 21:36:11 - INFO - codeparrot_training - Step 27354: {'lr': 0.00046465962124005535, 'samples': 14005760, 'steps': 27354, 'loss/train': 1.6933728456497192} +03/04/2022 21:36:11 - INFO - codeparrot_training - Skipping example with length 967 (seq_length=1024) +03/04/2022 21:36:16 - INFO - codeparrot_training - Step 27355: {'lr': 0.0004646569010518988, 'samples': 14006272, 'steps': 27355, 'loss/train': 1.5168442726135254} +03/04/2022 21:36:19 - INFO - codeparrot_training - Step 27356: {'lr': 0.00046465418076702125, 'samples': 14006784, 'steps': 27356, 'loss/train': 2.1320295333862305} +03/04/2022 21:36:19 - INFO - codeparrot_training - Skipping example with length 441 (seq_length=1024) +03/04/2022 21:36:25 - INFO - codeparrot_training - Step 27357: {'lr': 0.00046465146038542375, 'samples': 14007296, 'steps': 27357, 'loss/train': 2.427990198135376} +03/04/2022 21:36:28 - INFO - codeparrot_training - Step 27358: {'lr': 0.0004646487399071077, 'samples': 14007808, 'steps': 27358, 'loss/train': 1.6325488090515137} +03/04/2022 21:36:28 - INFO - codeparrot_training - Skipping example with length 641 (seq_length=1024) +03/04/2022 21:36:33 - INFO - codeparrot_training - Step 27359: {'lr': 0.00046464601933207417, 'samples': 14008320, 'steps': 27359, 'loss/train': 1.868369698524475} +03/04/2022 21:36:36 - INFO - codeparrot_training - Step 27360: {'lr': 0.0004646432986603245, 'samples': 14008832, 'steps': 27360, 'loss/train': 1.950134515762329} +03/04/2022 21:36:36 - INFO - codeparrot_training - Skipping example with length 486 (seq_length=1024) +03/04/2022 21:36:42 - INFO - codeparrot_training - Step 27361: {'lr': 0.00046464057789185985, 'samples': 14009344, 'steps': 27361, 'loss/train': 1.0225882530212402} +03/04/2022 21:36:45 - INFO - codeparrot_training - Step 27362: {'lr': 0.00046463785702668156, 'samples': 14009856, 'steps': 27362, 'loss/train': 1.4361827373504639} +03/04/2022 21:36:45 - INFO - codeparrot_training - Skipping example with length 719 (seq_length=1024) +03/04/2022 21:36:50 - INFO - codeparrot_training - Step 27363: {'lr': 0.0004646351360647907, 'samples': 14010368, 'steps': 27363, 'loss/train': 2.1005513668060303} +03/04/2022 21:36:53 - INFO - codeparrot_training - Step 27364: {'lr': 0.00046463241500618846, 'samples': 14010880, 'steps': 27364, 'loss/train': 1.7549381256103516} +03/04/2022 21:36:53 - INFO - codeparrot_training - Skipping example with length 75 (seq_length=1024) +03/04/2022 21:36:58 - INFO - codeparrot_training - Step 27365: {'lr': 0.00046462969385087626, 'samples': 14011392, 'steps': 27365, 'loss/train': 1.912890911102295} +03/04/2022 21:37:02 - INFO - codeparrot_training - Step 27366: {'lr': 0.00046462697259885523, 'samples': 14011904, 'steps': 27366, 'loss/train': 1.626736044883728} +03/04/2022 21:37:02 - INFO - codeparrot_training - Skipping example with length 380 (seq_length=1024) +03/04/2022 21:37:07 - INFO - codeparrot_training - Step 27367: {'lr': 0.0004646242512501266, 'samples': 14012416, 'steps': 27367, 'loss/train': 0.9463950991630554} +03/04/2022 21:37:10 - INFO - codeparrot_training - Step 27368: {'lr': 0.0004646215298046916, 'samples': 14012928, 'steps': 27368, 'loss/train': 1.5539534091949463} +03/04/2022 21:37:10 - INFO - codeparrot_training - Skipping example with length 569 (seq_length=1024) +03/04/2022 21:37:15 - INFO - codeparrot_training - Step 27369: {'lr': 0.00046461880826255143, 'samples': 14013440, 'steps': 27369, 'loss/train': 1.0024809837341309} +03/04/2022 21:37:19 - INFO - codeparrot_training - Step 27370: {'lr': 0.00046461608662370734, 'samples': 14013952, 'steps': 27370, 'loss/train': 2.000147819519043} +03/04/2022 21:37:19 - INFO - codeparrot_training - Skipping example with length 647 (seq_length=1024) +03/04/2022 21:37:24 - INFO - codeparrot_training - Step 27371: {'lr': 0.0004646133648881606, 'samples': 14014464, 'steps': 27371, 'loss/train': 1.0557485818862915} +03/04/2022 21:37:27 - INFO - codeparrot_training - Step 27372: {'lr': 0.00046461064305591235, 'samples': 14014976, 'steps': 27372, 'loss/train': 1.2884052991867065} +03/04/2022 21:37:27 - INFO - codeparrot_training - Skipping example with length 747 (seq_length=1024) +03/04/2022 21:37:32 - INFO - codeparrot_training - Step 27373: {'lr': 0.00046460792112696384, 'samples': 14015488, 'steps': 27373, 'loss/train': 1.8098361492156982} +03/04/2022 21:37:35 - INFO - codeparrot_training - Skipping example with length 927 (seq_length=1024) +03/04/2022 21:37:38 - INFO - codeparrot_training - Step 27374: {'lr': 0.0004646051991013163, 'samples': 14016000, 'steps': 27374, 'loss/train': 1.5951982736587524} +03/04/2022 21:37:41 - INFO - codeparrot_training - Step 27375: {'lr': 0.000464602476978971, 'samples': 14016512, 'steps': 27375, 'loss/train': 2.246730089187622} +03/04/2022 21:37:44 - INFO - codeparrot_training - Skipping example with length 303 (seq_length=1024) +03/04/2022 21:37:46 - INFO - codeparrot_training - Step 27376: {'lr': 0.00046459975475992914, 'samples': 14017024, 'steps': 27376, 'loss/train': 2.352454423904419} +03/04/2022 21:37:49 - INFO - codeparrot_training - Step 27377: {'lr': 0.00046459703244419194, 'samples': 14017536, 'steps': 27377, 'loss/train': 0.49017319083213806} +03/04/2022 21:37:52 - INFO - codeparrot_training - Step 27378: {'lr': 0.0004645943100317606, 'samples': 14018048, 'steps': 27378, 'loss/train': 1.823350429534912} +03/04/2022 21:37:52 - INFO - codeparrot_training - Skipping example with length 719 (seq_length=1024) +03/04/2022 21:37:58 - INFO - codeparrot_training - Step 27379: {'lr': 0.00046459158752263643, 'samples': 14018560, 'steps': 27379, 'loss/train': 0.17725194990634918} +03/04/2022 21:38:01 - INFO - codeparrot_training - Step 27380: {'lr': 0.0004645888649168205, 'samples': 14019072, 'steps': 27380, 'loss/train': 0.8929397463798523} +03/04/2022 21:38:01 - INFO - codeparrot_training - Skipping example with length 904 (seq_length=1024) +03/04/2022 21:38:06 - INFO - codeparrot_training - Step 27381: {'lr': 0.0004645861422143143, 'samples': 14019584, 'steps': 27381, 'loss/train': 1.661621332168579} +03/04/2022 21:38:10 - INFO - codeparrot_training - Step 27382: {'lr': 0.0004645834194151187, 'samples': 14020096, 'steps': 27382, 'loss/train': 0.8645858764648438} +03/04/2022 21:38:10 - INFO - codeparrot_training - Skipping example with length 195 (seq_length=1024) +03/04/2022 21:38:15 - INFO - codeparrot_training - Step 27383: {'lr': 0.0004645806965192353, 'samples': 14020608, 'steps': 27383, 'loss/train': 2.057730197906494} +03/04/2022 21:38:18 - INFO - codeparrot_training - Step 27384: {'lr': 0.000464577973526665, 'samples': 14021120, 'steps': 27384, 'loss/train': 1.7803815603256226} +03/04/2022 21:38:18 - INFO - codeparrot_training - Skipping example with length 146 (seq_length=1024) +03/04/2022 21:38:24 - INFO - codeparrot_training - Step 27385: {'lr': 0.00046457525043740926, 'samples': 14021632, 'steps': 27385, 'loss/train': 2.674163579940796} +03/04/2022 21:38:27 - INFO - codeparrot_training - Step 27386: {'lr': 0.0004645725272514693, 'samples': 14022144, 'steps': 27386, 'loss/train': 1.463183045387268} +03/04/2022 21:38:27 - INFO - codeparrot_training - Skipping example with length 607 (seq_length=1024) +03/04/2022 21:38:32 - INFO - codeparrot_training - Step 27387: {'lr': 0.0004645698039688461, 'samples': 14022656, 'steps': 27387, 'loss/train': 2.2811574935913086} +03/04/2022 21:38:35 - INFO - codeparrot_training - Step 27388: {'lr': 0.00046456708058954116, 'samples': 14023168, 'steps': 27388, 'loss/train': 2.0511343479156494} +03/04/2022 21:38:36 - INFO - codeparrot_training - Skipping example with length 279 (seq_length=1024) +03/04/2022 21:38:40 - INFO - codeparrot_training - Step 27389: {'lr': 0.0004645643571135556, 'samples': 14023680, 'steps': 27389, 'loss/train': 1.2097456455230713} +03/04/2022 21:38:44 - INFO - codeparrot_training - Step 27390: {'lr': 0.00046456163354089065, 'samples': 14024192, 'steps': 27390, 'loss/train': 1.3479502201080322} +03/04/2022 21:38:44 - INFO - codeparrot_training - Skipping example with length 590 (seq_length=1024) +03/04/2022 21:38:49 - INFO - codeparrot_training - Step 27391: {'lr': 0.00046455890987154747, 'samples': 14024704, 'steps': 27391, 'loss/train': 1.5953457355499268} +03/04/2022 21:38:52 - INFO - codeparrot_training - Step 27392: {'lr': 0.0004645561861055274, 'samples': 14025216, 'steps': 27392, 'loss/train': 1.46274733543396} +03/04/2022 21:38:53 - INFO - codeparrot_training - Skipping example with length 571 (seq_length=1024) +03/04/2022 21:38:57 - INFO - codeparrot_training - Step 27393: {'lr': 0.00046455346224283167, 'samples': 14025728, 'steps': 27393, 'loss/train': 1.3379813432693481} +03/04/2022 21:39:00 - INFO - codeparrot_training - Step 27394: {'lr': 0.00046455073828346137, 'samples': 14026240, 'steps': 27394, 'loss/train': 1.7677438259124756} +03/04/2022 21:39:01 - INFO - codeparrot_training - Skipping example with length 72 (seq_length=1024) +03/04/2022 21:39:06 - INFO - codeparrot_training - Step 27395: {'lr': 0.0004645480142274179, 'samples': 14026752, 'steps': 27395, 'loss/train': 0.8622795939445496} +03/04/2022 21:39:09 - INFO - codeparrot_training - Step 27396: {'lr': 0.0004645452900747024, 'samples': 14027264, 'steps': 27396, 'loss/train': 1.7063603401184082} +03/04/2022 21:39:10 - INFO - codeparrot_training - Skipping example with length 622 (seq_length=1024) +03/04/2022 21:39:14 - INFO - codeparrot_training - Step 27397: {'lr': 0.00046454256582531604, 'samples': 14027776, 'steps': 27397, 'loss/train': 1.7681043148040771} +03/04/2022 21:39:17 - INFO - codeparrot_training - Step 27398: {'lr': 0.0004645398414792602, 'samples': 14028288, 'steps': 27398, 'loss/train': 1.0030204057693481} +03/04/2022 21:39:18 - INFO - codeparrot_training - Skipping example with length 909 (seq_length=1024) +03/04/2022 21:39:23 - INFO - codeparrot_training - Step 27399: {'lr': 0.000464537117036536, 'samples': 14028800, 'steps': 27399, 'loss/train': 1.2784544229507446} +03/04/2022 21:39:26 - INFO - codeparrot_training - Step 27400: {'lr': 0.00046453439249714466, 'samples': 14029312, 'steps': 27400, 'loss/train': 1.9824109077453613} +03/04/2022 21:39:26 - INFO - codeparrot_training - Skipping example with length 221 (seq_length=1024) +03/04/2022 21:39:31 - INFO - codeparrot_training - Step 27401: {'lr': 0.00046453166786108736, 'samples': 14029824, 'steps': 27401, 'loss/train': 2.03820538520813} +03/04/2022 21:39:34 - INFO - codeparrot_training - Step 27402: {'lr': 0.00046452894312836547, 'samples': 14030336, 'steps': 27402, 'loss/train': 2.2694809436798096} +03/04/2022 21:39:36 - INFO - codeparrot_training - Skipping example with length 412 (seq_length=1024) +03/04/2022 21:39:40 - INFO - codeparrot_training - Step 27403: {'lr': 0.0004645262182989802, 'samples': 14030848, 'steps': 27403, 'loss/train': 2.384436845779419} +03/04/2022 21:39:43 - INFO - codeparrot_training - Step 27404: {'lr': 0.0004645234933729327, 'samples': 14031360, 'steps': 27404, 'loss/train': 1.7128311395645142} +03/04/2022 21:39:44 - INFO - codeparrot_training - Skipping example with length 143 (seq_length=1024) +03/04/2022 21:39:48 - INFO - codeparrot_training - Step 27405: {'lr': 0.00046452076835022416, 'samples': 14031872, 'steps': 27405, 'loss/train': 1.5785454511642456} +03/04/2022 21:39:51 - INFO - codeparrot_training - Step 27406: {'lr': 0.0004645180432308559, 'samples': 14032384, 'steps': 27406, 'loss/train': 1.8436830043792725} +03/04/2022 21:39:52 - INFO - codeparrot_training - Skipping example with length 426 (seq_length=1024) +03/04/2022 21:39:57 - INFO - codeparrot_training - Step 27407: {'lr': 0.00046451531801482913, 'samples': 14032896, 'steps': 27407, 'loss/train': 2.10463809967041} +03/04/2022 21:40:00 - INFO - codeparrot_training - Step 27408: {'lr': 0.00046451259270214505, 'samples': 14033408, 'steps': 27408, 'loss/train': 2.0184102058410645} +03/04/2022 21:40:01 - INFO - codeparrot_training - Skipping example with length 740 (seq_length=1024) +03/04/2022 21:40:05 - INFO - codeparrot_training - Step 27409: {'lr': 0.00046450986729280495, 'samples': 14033920, 'steps': 27409, 'loss/train': 2.0944406986236572} +03/04/2022 21:40:08 - INFO - codeparrot_training - Step 27410: {'lr': 0.00046450714178680996, 'samples': 14034432, 'steps': 27410, 'loss/train': 1.5075424909591675} +03/04/2022 21:40:10 - INFO - codeparrot_training - Skipping example with length 921 (seq_length=1024) +03/04/2022 21:40:13 - INFO - codeparrot_training - Step 27411: {'lr': 0.0004645044161841614, 'samples': 14034944, 'steps': 27411, 'loss/train': 2.4431211948394775} +03/04/2022 21:40:17 - INFO - codeparrot_training - Step 27412: {'lr': 0.00046450169048486045, 'samples': 14035456, 'steps': 27412, 'loss/train': 1.0280897617340088} +03/04/2022 21:40:18 - INFO - codeparrot_training - Skipping example with length 315 (seq_length=1024) +03/04/2022 21:40:22 - INFO - codeparrot_training - Step 27413: {'lr': 0.0004644989646889084, 'samples': 14035968, 'steps': 27413, 'loss/train': 2.125209093093872} +03/04/2022 21:40:25 - INFO - codeparrot_training - Step 27414: {'lr': 0.0004644962387963063, 'samples': 14036480, 'steps': 27414, 'loss/train': 1.9043469429016113} +03/04/2022 21:40:26 - INFO - codeparrot_training - Skipping example with length 806 (seq_length=1024) +03/04/2022 21:40:30 - INFO - codeparrot_training - Step 27415: {'lr': 0.0004644935128070556, 'samples': 14036992, 'steps': 27415, 'loss/train': 1.982437252998352} +03/04/2022 21:40:33 - INFO - codeparrot_training - Step 27416: {'lr': 0.0004644907867211574, 'samples': 14037504, 'steps': 27416, 'loss/train': 2.3460440635681152} +03/04/2022 21:40:35 - INFO - codeparrot_training - Skipping example with length 831 (seq_length=1024) +03/04/2022 21:40:39 - INFO - codeparrot_training - Step 27417: {'lr': 0.000464488060538613, 'samples': 14038016, 'steps': 27417, 'loss/train': 1.9599082469940186} +03/04/2022 21:40:42 - INFO - codeparrot_training - Step 27418: {'lr': 0.0004644853342594235, 'samples': 14038528, 'steps': 27418, 'loss/train': 1.8385353088378906} +03/04/2022 21:40:43 - INFO - codeparrot_training - Skipping example with length 225 (seq_length=1024) +03/04/2022 21:40:47 - INFO - codeparrot_training - Step 27419: {'lr': 0.0004644826078835903, 'samples': 14039040, 'steps': 27419, 'loss/train': 1.6550085544586182} +03/04/2022 21:40:50 - INFO - codeparrot_training - Step 27420: {'lr': 0.00046447988141111457, 'samples': 14039552, 'steps': 27420, 'loss/train': 1.5975160598754883} +03/04/2022 21:40:52 - INFO - codeparrot_training - Skipping example with length 267 (seq_length=1024) +03/04/2022 21:40:56 - INFO - codeparrot_training - Step 27421: {'lr': 0.0004644771548419975, 'samples': 14040064, 'steps': 27421, 'loss/train': 2.035372734069824} +03/04/2022 21:40:59 - INFO - codeparrot_training - Step 27422: {'lr': 0.0004644744281762403, 'samples': 14040576, 'steps': 27422, 'loss/train': 1.3625017404556274} +03/04/2022 21:41:01 - INFO - codeparrot_training - Skipping example with length 968 (seq_length=1024) +03/04/2022 21:41:04 - INFO - codeparrot_training - Step 27423: {'lr': 0.0004644717014138442, 'samples': 14041088, 'steps': 27423, 'loss/train': 1.6563042402267456} +03/04/2022 21:41:07 - INFO - codeparrot_training - Step 27424: {'lr': 0.0004644689745548105, 'samples': 14041600, 'steps': 27424, 'loss/train': 1.3288251161575317} +03/04/2022 21:41:09 - INFO - codeparrot_training - Skipping example with length 718 (seq_length=1024) +03/04/2022 21:41:13 - INFO - codeparrot_training - Step 27425: {'lr': 0.00046446624759914043, 'samples': 14042112, 'steps': 27425, 'loss/train': 1.6382728815078735} +03/04/2022 21:41:16 - INFO - codeparrot_training - Step 27426: {'lr': 0.0004644635205468351, 'samples': 14042624, 'steps': 27426, 'loss/train': 1.8266234397888184} +03/04/2022 21:41:17 - INFO - codeparrot_training - Skipping example with length 88 (seq_length=1024) +03/04/2022 21:41:21 - INFO - codeparrot_training - Step 27427: {'lr': 0.00046446079339789587, 'samples': 14043136, 'steps': 27427, 'loss/train': 2.0857698917388916} +03/04/2022 21:41:24 - INFO - codeparrot_training - Step 27428: {'lr': 0.0004644580661523239, 'samples': 14043648, 'steps': 27428, 'loss/train': 1.900187611579895} +03/04/2022 21:41:26 - INFO - codeparrot_training - Skipping example with length 936 (seq_length=1024) +03/04/2022 21:41:30 - INFO - codeparrot_training - Step 27429: {'lr': 0.00046445533881012043, 'samples': 14044160, 'steps': 27429, 'loss/train': 1.4356448650360107} +03/04/2022 21:41:33 - INFO - codeparrot_training - Step 27430: {'lr': 0.0004644526113712867, 'samples': 14044672, 'steps': 27430, 'loss/train': 1.57810640335083} +03/04/2022 21:41:34 - INFO - codeparrot_training - Skipping example with length 537 (seq_length=1024) +03/04/2022 21:41:38 - INFO - codeparrot_training - Step 27431: {'lr': 0.00046444988383582394, 'samples': 14045184, 'steps': 27431, 'loss/train': 0.42720088362693787} +03/04/2022 21:41:41 - INFO - codeparrot_training - Step 27432: {'lr': 0.0004644471562037333, 'samples': 14045696, 'steps': 27432, 'loss/train': 1.293715238571167} +03/04/2022 21:41:42 - INFO - codeparrot_training - Skipping example with length 148 (seq_length=1024) +03/04/2022 21:41:46 - INFO - codeparrot_training - Step 27433: {'lr': 0.0004644444284750162, 'samples': 14046208, 'steps': 27433, 'loss/train': 2.1766974925994873} +03/04/2022 21:41:50 - INFO - codeparrot_training - Step 27434: {'lr': 0.0004644417006496737, 'samples': 14046720, 'steps': 27434, 'loss/train': 2.537862539291382} +03/04/2022 21:41:51 - INFO - codeparrot_training - Skipping example with length 664 (seq_length=1024) +03/04/2022 21:41:55 - INFO - codeparrot_training - Step 27435: {'lr': 0.0004644389727277071, 'samples': 14047232, 'steps': 27435, 'loss/train': 0.5788947343826294} +03/04/2022 21:41:58 - INFO - codeparrot_training - Step 27436: {'lr': 0.00046443624470911754, 'samples': 14047744, 'steps': 27436, 'loss/train': 1.9883496761322021} +03/04/2022 21:41:59 - INFO - codeparrot_training - Skipping example with length 28 (seq_length=1024) +03/04/2022 21:42:03 - INFO - codeparrot_training - Step 27437: {'lr': 0.00046443351659390637, 'samples': 14048256, 'steps': 27437, 'loss/train': 1.3385837078094482} +03/04/2022 21:42:06 - INFO - codeparrot_training - Step 27438: {'lr': 0.00046443078838207474, 'samples': 14048768, 'steps': 27438, 'loss/train': 2.25730037689209} +03/04/2022 21:42:07 - INFO - codeparrot_training - Skipping example with length 344 (seq_length=1024) +03/04/2022 21:42:12 - INFO - codeparrot_training - Step 27439: {'lr': 0.00046442806007362394, 'samples': 14049280, 'steps': 27439, 'loss/train': 1.4275152683258057} +03/04/2022 21:42:15 - INFO - codeparrot_training - Step 27440: {'lr': 0.00046442533166855517, 'samples': 14049792, 'steps': 27440, 'loss/train': 1.123038649559021} +03/04/2022 21:42:16 - INFO - codeparrot_training - Skipping example with length 625 (seq_length=1024) +03/04/2022 21:42:20 - INFO - codeparrot_training - Step 27441: {'lr': 0.00046442260316686957, 'samples': 14050304, 'steps': 27441, 'loss/train': 1.8440301418304443} +03/04/2022 21:42:23 - INFO - codeparrot_training - Step 27442: {'lr': 0.0004644198745685685, 'samples': 14050816, 'steps': 27442, 'loss/train': 1.7701092958450317} +03/04/2022 21:42:24 - INFO - codeparrot_training - Skipping example with length 809 (seq_length=1024) +03/04/2022 21:42:29 - INFO - codeparrot_training - Step 27443: {'lr': 0.00046441714587365317, 'samples': 14051328, 'steps': 27443, 'loss/train': 1.8733227252960205} +03/04/2022 21:42:32 - INFO - codeparrot_training - Step 27444: {'lr': 0.00046441441708212477, 'samples': 14051840, 'steps': 27444, 'loss/train': 1.5340734720230103} +03/04/2022 21:42:33 - INFO - codeparrot_training - Skipping example with length 368 (seq_length=1024) +03/04/2022 21:42:37 - INFO - codeparrot_training - Step 27445: {'lr': 0.00046441168819398457, 'samples': 14052352, 'steps': 27445, 'loss/train': 1.6654702425003052} +03/04/2022 21:42:40 - INFO - codeparrot_training - Step 27446: {'lr': 0.0004644089592092338, 'samples': 14052864, 'steps': 27446, 'loss/train': 2.098836898803711} +03/04/2022 21:42:41 - INFO - codeparrot_training - Skipping example with length 576 (seq_length=1024) +03/04/2022 21:42:46 - INFO - codeparrot_training - Step 27447: {'lr': 0.0004644062301278735, 'samples': 14053376, 'steps': 27447, 'loss/train': 1.8823657035827637} +03/04/2022 21:42:49 - INFO - codeparrot_training - Step 27448: {'lr': 0.0004644035009499052, 'samples': 14053888, 'steps': 27448, 'loss/train': 0.9201361536979675} +03/04/2022 21:42:51 - INFO - codeparrot_training - Skipping example with length 48 (seq_length=1024) +03/04/2022 21:42:54 - INFO - codeparrot_training - Step 27449: {'lr': 0.0004644007716753299, 'samples': 14054400, 'steps': 27449, 'loss/train': 1.9657623767852783} +03/04/2022 21:42:57 - INFO - codeparrot_training - Step 27450: {'lr': 0.00046439804230414904, 'samples': 14054912, 'steps': 27450, 'loss/train': 6.4773478507995605} +03/04/2022 21:43:00 - INFO - codeparrot_training - Skipping example with length 961 (seq_length=1024) +03/04/2022 21:43:02 - INFO - codeparrot_training - Step 27451: {'lr': 0.0004643953128363637, 'samples': 14055424, 'steps': 27451, 'loss/train': 1.7805688381195068} +03/04/2022 21:43:06 - INFO - codeparrot_training - Step 27452: {'lr': 0.0004643925832719751, 'samples': 14055936, 'steps': 27452, 'loss/train': 1.4561102390289307} +03/04/2022 21:43:08 - INFO - codeparrot_training - Skipping example with length 439 (seq_length=1024) +03/04/2022 21:43:11 - INFO - codeparrot_training - Step 27453: {'lr': 0.0004643898536109845, 'samples': 14056448, 'steps': 27453, 'loss/train': 0.985674262046814} +03/04/2022 21:43:14 - INFO - codeparrot_training - Step 27454: {'lr': 0.0004643871238533931, 'samples': 14056960, 'steps': 27454, 'loss/train': 1.9698567390441895} +03/04/2022 21:43:18 - INFO - codeparrot_training - Step 27455: {'lr': 0.0004643843939992022, 'samples': 14057472, 'steps': 27455, 'loss/train': 0.48464033007621765} +03/04/2022 21:43:18 - INFO - codeparrot_training - Skipping example with length 893 (seq_length=1024) +03/04/2022 21:43:23 - INFO - codeparrot_training - Step 27456: {'lr': 0.0004643816640484131, 'samples': 14057984, 'steps': 27456, 'loss/train': 2.1968865394592285} +03/04/2022 21:43:26 - INFO - codeparrot_training - Step 27457: {'lr': 0.0004643789340010268, 'samples': 14058496, 'steps': 27457, 'loss/train': 1.6791248321533203} +03/04/2022 21:43:26 - INFO - codeparrot_training - Skipping example with length 842 (seq_length=1024) +03/04/2022 21:43:31 - INFO - codeparrot_training - Step 27458: {'lr': 0.00046437620385704476, 'samples': 14059008, 'steps': 27458, 'loss/train': 1.8809698820114136} +03/04/2022 21:43:35 - INFO - codeparrot_training - Step 27459: {'lr': 0.0004643734736164681, 'samples': 14059520, 'steps': 27459, 'loss/train': 2.0172295570373535} +03/04/2022 21:43:35 - INFO - codeparrot_training - Skipping example with length 961 (seq_length=1024) +03/04/2022 21:43:40 - INFO - codeparrot_training - Step 27460: {'lr': 0.00046437074327929795, 'samples': 14060032, 'steps': 27460, 'loss/train': 2.199040651321411} +03/04/2022 21:43:43 - INFO - codeparrot_training - Step 27461: {'lr': 0.0004643680128455358, 'samples': 14060544, 'steps': 27461, 'loss/train': 2.2491455078125} +03/04/2022 21:43:43 - INFO - codeparrot_training - Skipping example with length 347 (seq_length=1024) +03/04/2022 21:43:49 - INFO - codeparrot_training - Step 27462: {'lr': 0.00046436528231518263, 'samples': 14061056, 'steps': 27462, 'loss/train': 2.135902166366577} +03/04/2022 21:43:52 - INFO - codeparrot_training - Step 27463: {'lr': 0.0004643625516882398, 'samples': 14061568, 'steps': 27463, 'loss/train': 1.595616340637207} +03/04/2022 21:43:55 - INFO - codeparrot_training - Step 27464: {'lr': 0.0004643598209647085, 'samples': 14062080, 'steps': 27464, 'loss/train': 1.910935878753662} +03/04/2022 21:43:55 - INFO - codeparrot_training - Skipping example with length 457 (seq_length=1024) +03/04/2022 21:44:01 - INFO - codeparrot_training - Step 27465: {'lr': 0.00046435709014459, 'samples': 14062592, 'steps': 27465, 'loss/train': 0.5192428827285767} +03/04/2022 21:44:04 - INFO - codeparrot_training - Step 27466: {'lr': 0.0004643543592278855, 'samples': 14063104, 'steps': 27466, 'loss/train': 1.8331623077392578} +03/04/2022 21:44:04 - INFO - codeparrot_training - Skipping example with length 945 (seq_length=1024) +03/04/2022 21:44:09 - INFO - codeparrot_training - Step 27467: {'lr': 0.0004643516282145962, 'samples': 14063616, 'steps': 27467, 'loss/train': 2.0985190868377686} +03/04/2022 21:44:12 - INFO - codeparrot_training - Step 27468: {'lr': 0.0004643488971047234, 'samples': 14064128, 'steps': 27468, 'loss/train': 1.8761496543884277} +03/04/2022 21:44:13 - INFO - codeparrot_training - Skipping example with length 611 (seq_length=1024) +03/04/2022 21:44:17 - INFO - codeparrot_training - Step 27469: {'lr': 0.0004643461658982683, 'samples': 14064640, 'steps': 27469, 'loss/train': 1.3340861797332764} +03/04/2022 21:44:21 - INFO - codeparrot_training - Step 27470: {'lr': 0.00046434343459523207, 'samples': 14065152, 'steps': 27470, 'loss/train': 2.089254140853882} +03/04/2022 21:44:21 - INFO - codeparrot_training - Skipping example with length 412 (seq_length=1024) +03/04/2022 21:44:26 - INFO - codeparrot_training - Step 27471: {'lr': 0.00046434070319561604, 'samples': 14065664, 'steps': 27471, 'loss/train': 1.7469450235366821} +03/04/2022 21:44:29 - INFO - codeparrot_training - Step 27472: {'lr': 0.0004643379716994214, 'samples': 14066176, 'steps': 27472, 'loss/train': 1.3748592138290405} +03/04/2022 21:44:29 - INFO - codeparrot_training - Skipping example with length 207 (seq_length=1024) +03/04/2022 21:44:34 - INFO - codeparrot_training - Step 27473: {'lr': 0.0004643352401066494, 'samples': 14066688, 'steps': 27473, 'loss/train': 0.9277780055999756} +03/04/2022 21:44:38 - INFO - codeparrot_training - Step 27474: {'lr': 0.00046433250841730123, 'samples': 14067200, 'steps': 27474, 'loss/train': 0.9676547646522522} +03/04/2022 21:44:38 - INFO - codeparrot_training - Skipping example with length 484 (seq_length=1024) +03/04/2022 21:44:43 - INFO - codeparrot_training - Step 27475: {'lr': 0.0004643297766313781, 'samples': 14067712, 'steps': 27475, 'loss/train': 1.0953902006149292} +03/04/2022 21:44:46 - INFO - codeparrot_training - Step 27476: {'lr': 0.0004643270447488813, 'samples': 14068224, 'steps': 27476, 'loss/train': 1.6601144075393677} +03/04/2022 21:44:46 - INFO - codeparrot_training - Skipping example with length 835 (seq_length=1024) +03/04/2022 21:44:51 - INFO - codeparrot_training - Step 27477: {'lr': 0.000464324312769812, 'samples': 14068736, 'steps': 27477, 'loss/train': 1.5576833486557007} +03/04/2022 21:44:54 - INFO - codeparrot_training - Skipping example with length 991 (seq_length=1024) +03/04/2022 21:44:57 - INFO - codeparrot_training - Step 27478: {'lr': 0.0004643215806941716, 'samples': 14069248, 'steps': 27478, 'loss/train': 1.5923259258270264} +03/04/2022 21:45:00 - INFO - codeparrot_training - Step 27479: {'lr': 0.00046431884852196105, 'samples': 14069760, 'steps': 27479, 'loss/train': 1.9775041341781616} +03/04/2022 21:45:02 - INFO - codeparrot_training - Skipping example with length 280 (seq_length=1024) +03/04/2022 21:45:05 - INFO - codeparrot_training - Step 27480: {'lr': 0.0004643161162531818, 'samples': 14070272, 'steps': 27480, 'loss/train': 1.2686140537261963} +03/04/2022 21:45:08 - INFO - codeparrot_training - Step 27481: {'lr': 0.00046431338388783504, 'samples': 14070784, 'steps': 27481, 'loss/train': 1.7342884540557861} +03/04/2022 21:45:11 - INFO - codeparrot_training - Skipping example with length 919 (seq_length=1024) +03/04/2022 21:45:13 - INFO - codeparrot_training - Step 27482: {'lr': 0.000464310651425922, 'samples': 14071296, 'steps': 27482, 'loss/train': 1.9429219961166382} +03/04/2022 21:45:17 - INFO - codeparrot_training - Step 27483: {'lr': 0.00046430791886744384, 'samples': 14071808, 'steps': 27483, 'loss/train': 1.7062933444976807} +03/04/2022 21:45:20 - INFO - codeparrot_training - Skipping example with length 443 (seq_length=1024) +03/04/2022 21:45:22 - INFO - codeparrot_training - Step 27484: {'lr': 0.0004643051862124018, 'samples': 14072320, 'steps': 27484, 'loss/train': 1.6051735877990723} +03/04/2022 21:45:25 - INFO - codeparrot_training - Step 27485: {'lr': 0.0004643024534607973, 'samples': 14072832, 'steps': 27485, 'loss/train': 1.7099019289016724} +03/04/2022 21:45:28 - INFO - codeparrot_training - Skipping example with length 287 (seq_length=1024) +03/04/2022 21:45:30 - INFO - codeparrot_training - Step 27486: {'lr': 0.00046429972061263125, 'samples': 14073344, 'steps': 27486, 'loss/train': 1.3505125045776367} +03/04/2022 21:45:34 - INFO - codeparrot_training - Step 27487: {'lr': 0.0004642969876679051, 'samples': 14073856, 'steps': 27487, 'loss/train': 1.882491111755371} +03/04/2022 21:45:36 - INFO - codeparrot_training - Skipping example with length 314 (seq_length=1024) +03/04/2022 21:45:39 - INFO - codeparrot_training - Step 27488: {'lr': 0.00046429425462662, 'samples': 14074368, 'steps': 27488, 'loss/train': 1.9467310905456543} +03/04/2022 21:45:42 - INFO - codeparrot_training - Step 27489: {'lr': 0.00046429152148877727, 'samples': 14074880, 'steps': 27489, 'loss/train': 1.5897464752197266} +03/04/2022 21:45:44 - INFO - codeparrot_training - Skipping example with length 295 (seq_length=1024) +03/04/2022 21:45:47 - INFO - codeparrot_training - Step 27490: {'lr': 0.00046428878825437815, 'samples': 14075392, 'steps': 27490, 'loss/train': 2.0209341049194336} +03/04/2022 21:45:51 - INFO - codeparrot_training - Step 27491: {'lr': 0.00046428605492342367, 'samples': 14075904, 'steps': 27491, 'loss/train': 6.542036056518555} +03/04/2022 21:45:54 - INFO - codeparrot_training - Step 27492: {'lr': 0.00046428332149591535, 'samples': 14076416, 'steps': 27492, 'loss/train': 1.5625042915344238} +03/04/2022 21:45:54 - INFO - codeparrot_training - Skipping example with length 366 (seq_length=1024) +03/04/2022 21:45:59 - INFO - codeparrot_training - Step 27493: {'lr': 0.00046428058797185417, 'samples': 14076928, 'steps': 27493, 'loss/train': 1.4840084314346313} +03/04/2022 21:46:02 - INFO - codeparrot_training - Step 27494: {'lr': 0.00046427785435124147, 'samples': 14077440, 'steps': 27494, 'loss/train': 0.6383106112480164} +03/04/2022 21:46:04 - INFO - codeparrot_training - Skipping example with length 573 (seq_length=1024) +03/04/2022 21:46:08 - INFO - codeparrot_training - Step 27495: {'lr': 0.0004642751206340785, 'samples': 14077952, 'steps': 27495, 'loss/train': 1.5301226377487183} +03/04/2022 21:46:11 - INFO - codeparrot_training - Step 27496: {'lr': 0.00046427238682036643, 'samples': 14078464, 'steps': 27496, 'loss/train': 1.3929015398025513} +03/04/2022 21:46:12 - INFO - codeparrot_training - Skipping example with length 343 (seq_length=1024) +03/04/2022 21:46:16 - INFO - codeparrot_training - Step 27497: {'lr': 0.0004642696529101066, 'samples': 14078976, 'steps': 27497, 'loss/train': 2.1790637969970703} +03/04/2022 21:46:20 - INFO - codeparrot_training - Step 27498: {'lr': 0.0004642669189033001, 'samples': 14079488, 'steps': 27498, 'loss/train': 1.9886882305145264} +03/04/2022 21:46:21 - INFO - codeparrot_training - Skipping example with length 950 (seq_length=1024) +03/04/2022 21:46:25 - INFO - codeparrot_training - Step 27499: {'lr': 0.0004642641847999483, 'samples': 14080000, 'steps': 27499, 'loss/train': 1.928977608680725} +03/04/2022 21:46:28 - INFO - codeparrot_training - Step 27500: {'lr': 0.0004642614506000523, 'samples': 14080512, 'steps': 27500, 'loss/train': 1.7993202209472656} +03/04/2022 21:46:30 - INFO - codeparrot_training - Skipping example with length 292 (seq_length=1024) +03/04/2022 21:46:33 - INFO - codeparrot_training - Step 27501: {'lr': 0.00046425871630361343, 'samples': 14081024, 'steps': 27501, 'loss/train': 1.6733847856521606} +03/04/2022 21:46:36 - INFO - codeparrot_training - Step 27502: {'lr': 0.0004642559819106329, 'samples': 14081536, 'steps': 27502, 'loss/train': 2.332489252090454} +03/04/2022 21:46:38 - INFO - codeparrot_training - Skipping example with length 882 (seq_length=1024) +03/04/2022 21:46:42 - INFO - codeparrot_training - Step 27503: {'lr': 0.0004642532474211119, 'samples': 14082048, 'steps': 27503, 'loss/train': 2.3540337085723877} +03/04/2022 21:46:45 - INFO - codeparrot_training - Step 27504: {'lr': 0.0004642505128350517, 'samples': 14082560, 'steps': 27504, 'loss/train': 1.924314022064209} +03/04/2022 21:46:47 - INFO - codeparrot_training - Skipping example with length 419 (seq_length=1024) +03/04/2022 21:46:50 - INFO - codeparrot_training - Step 27505: {'lr': 0.00046424777815245354, 'samples': 14083072, 'steps': 27505, 'loss/train': 0.6645641922950745} +03/04/2022 21:46:53 - INFO - codeparrot_training - Step 27506: {'lr': 0.0004642450433733186, 'samples': 14083584, 'steps': 27506, 'loss/train': 1.7985507249832153} +03/04/2022 21:46:55 - INFO - codeparrot_training - Skipping example with length 426 (seq_length=1024) +03/04/2022 21:46:58 - INFO - codeparrot_training - Step 27507: {'lr': 0.0004642423084976482, 'samples': 14084096, 'steps': 27507, 'loss/train': 2.5674941539764404} +03/04/2022 21:47:02 - INFO - codeparrot_training - Step 27508: {'lr': 0.0004642395735254435, 'samples': 14084608, 'steps': 27508, 'loss/train': 0.8497194647789001} +03/04/2022 21:47:04 - INFO - codeparrot_training - Skipping example with length 406 (seq_length=1024) +03/04/2022 21:47:07 - INFO - codeparrot_training - Step 27509: {'lr': 0.0004642368384567058, 'samples': 14085120, 'steps': 27509, 'loss/train': 1.680609107017517} +03/04/2022 21:47:10 - INFO - codeparrot_training - Step 27510: {'lr': 0.0004642341032914362, 'samples': 14085632, 'steps': 27510, 'loss/train': 2.2475357055664062} +03/04/2022 21:47:12 - INFO - codeparrot_training - Skipping example with length 417 (seq_length=1024) +03/04/2022 21:47:15 - INFO - codeparrot_training - Step 27511: {'lr': 0.00046423136802963607, 'samples': 14086144, 'steps': 27511, 'loss/train': 1.122145652770996} +03/04/2022 21:47:19 - INFO - codeparrot_training - Step 27512: {'lr': 0.0004642286326713065, 'samples': 14086656, 'steps': 27512, 'loss/train': 1.8663039207458496} +03/04/2022 21:47:21 - INFO - codeparrot_training - Skipping example with length 907 (seq_length=1024) +03/04/2022 21:47:24 - INFO - codeparrot_training - Step 27513: {'lr': 0.000464225897216449, 'samples': 14087168, 'steps': 27513, 'loss/train': 1.2194300889968872} +03/04/2022 21:47:27 - INFO - codeparrot_training - Step 27514: {'lr': 0.0004642231616650645, 'samples': 14087680, 'steps': 27514, 'loss/train': 1.1292306184768677} +03/04/2022 21:47:29 - INFO - codeparrot_training - Skipping example with length 796 (seq_length=1024) +03/04/2022 21:47:32 - INFO - codeparrot_training - Step 27515: {'lr': 0.00046422042601715433, 'samples': 14088192, 'steps': 27515, 'loss/train': 2.3064815998077393} +03/04/2022 21:47:35 - INFO - codeparrot_training - Step 27516: {'lr': 0.00046421769027271974, 'samples': 14088704, 'steps': 27516, 'loss/train': 1.3717312812805176} +03/04/2022 21:47:37 - INFO - codeparrot_training - Skipping example with length 37 (seq_length=1024) +03/04/2022 21:47:41 - INFO - codeparrot_training - Step 27517: {'lr': 0.00046421495443176204, 'samples': 14089216, 'steps': 27517, 'loss/train': 1.7045902013778687} +03/04/2022 21:47:44 - INFO - codeparrot_training - Step 27518: {'lr': 0.0004642122184942824, 'samples': 14089728, 'steps': 27518, 'loss/train': 1.7514593601226807} +03/04/2022 21:47:45 - INFO - codeparrot_training - Skipping example with length 981 (seq_length=1024) +03/04/2022 21:47:50 - INFO - codeparrot_training - Step 27519: {'lr': 0.00046420948246028194, 'samples': 14090240, 'steps': 27519, 'loss/train': 2.223639726638794} +03/04/2022 21:47:53 - INFO - codeparrot_training - Step 27520: {'lr': 0.000464206746329762, 'samples': 14090752, 'steps': 27520, 'loss/train': 2.083322763442993} +03/04/2022 21:47:56 - INFO - codeparrot_training - Step 27521: {'lr': 0.00046420401010272385, 'samples': 14091264, 'steps': 27521, 'loss/train': 1.2613078355789185} +03/04/2022 21:47:57 - INFO - codeparrot_training - Skipping example with length 103 (seq_length=1024) +03/04/2022 21:48:01 - INFO - codeparrot_training - Step 27522: {'lr': 0.00046420127377916863, 'samples': 14091776, 'steps': 27522, 'loss/train': 1.9479728937149048} +03/04/2022 21:48:04 - INFO - codeparrot_training - Step 27523: {'lr': 0.0004641985373590977, 'samples': 14092288, 'steps': 27523, 'loss/train': 1.9705098867416382} +03/04/2022 21:48:05 - INFO - codeparrot_training - Skipping example with length 669 (seq_length=1024) +03/04/2022 21:48:10 - INFO - codeparrot_training - Step 27524: {'lr': 0.00046419580084251224, 'samples': 14092800, 'steps': 27524, 'loss/train': 2.94038462638855} +03/04/2022 21:48:13 - INFO - codeparrot_training - Step 27525: {'lr': 0.0004641930642294133, 'samples': 14093312, 'steps': 27525, 'loss/train': 1.4523680210113525} +03/04/2022 21:48:14 - INFO - codeparrot_training - Skipping example with length 20 (seq_length=1024) +03/04/2022 21:48:18 - INFO - codeparrot_training - Step 27526: {'lr': 0.0004641903275198024, 'samples': 14093824, 'steps': 27526, 'loss/train': 1.8401007652282715} +03/04/2022 21:48:21 - INFO - codeparrot_training - Step 27527: {'lr': 0.0004641875907136806, 'samples': 14094336, 'steps': 27527, 'loss/train': 1.802200198173523} +03/04/2022 21:48:23 - INFO - codeparrot_training - Skipping example with length 302 (seq_length=1024) +03/04/2022 21:48:27 - INFO - codeparrot_training - Step 27528: {'lr': 0.0004641848538110492, 'samples': 14094848, 'steps': 27528, 'loss/train': 1.7315595149993896} +03/04/2022 21:48:30 - INFO - codeparrot_training - Step 27529: {'lr': 0.00046418211681190937, 'samples': 14095360, 'steps': 27529, 'loss/train': 2.416125535964966} +03/04/2022 21:48:34 - INFO - codeparrot_training - Step 27530: {'lr': 0.00046417937971626245, 'samples': 14095872, 'steps': 27530, 'loss/train': 1.5735355615615845} +03/04/2022 21:48:34 - INFO - codeparrot_training - Skipping example with length 705 (seq_length=1024) +03/04/2022 21:48:39 - INFO - codeparrot_training - Step 27531: {'lr': 0.0004641766425241095, 'samples': 14096384, 'steps': 27531, 'loss/train': 2.508897304534912} +03/04/2022 21:48:42 - INFO - codeparrot_training - Step 27532: {'lr': 0.000464173905235452, 'samples': 14096896, 'steps': 27532, 'loss/train': 1.7210041284561157} +03/04/2022 21:48:42 - INFO - codeparrot_training - Skipping example with length 541 (seq_length=1024) +03/04/2022 21:48:47 - INFO - codeparrot_training - Step 27533: {'lr': 0.0004641711678502909, 'samples': 14097408, 'steps': 27533, 'loss/train': 0.20753736793994904} +03/04/2022 21:48:50 - INFO - codeparrot_training - Step 27534: {'lr': 0.00046416843036862766, 'samples': 14097920, 'steps': 27534, 'loss/train': 1.9154161214828491} +03/04/2022 21:48:51 - INFO - codeparrot_training - Skipping example with length 778 (seq_length=1024) +03/04/2022 21:48:56 - INFO - codeparrot_training - Step 27535: {'lr': 0.0004641656927904634, 'samples': 14098432, 'steps': 27535, 'loss/train': 1.7031707763671875} +03/04/2022 21:48:59 - INFO - codeparrot_training - Step 27536: {'lr': 0.00046416295511579944, 'samples': 14098944, 'steps': 27536, 'loss/train': 0.9756758809089661} +03/04/2022 21:48:59 - INFO - codeparrot_training - Skipping example with length 456 (seq_length=1024) +03/04/2022 21:49:04 - INFO - codeparrot_training - Step 27537: {'lr': 0.0004641602173446369, 'samples': 14099456, 'steps': 27537, 'loss/train': 0.7165545225143433} +03/04/2022 21:49:07 - INFO - codeparrot_training - Step 27538: {'lr': 0.00046415747947697704, 'samples': 14099968, 'steps': 27538, 'loss/train': 2.6045031547546387} +03/04/2022 21:49:08 - INFO - codeparrot_training - Skipping example with length 431 (seq_length=1024) +03/04/2022 21:49:13 - INFO - codeparrot_training - Step 27539: {'lr': 0.00046415474151282124, 'samples': 14100480, 'steps': 27539, 'loss/train': 1.4550666809082031} +03/04/2022 21:49:16 - INFO - codeparrot_training - Step 27540: {'lr': 0.0004641520034521705, 'samples': 14100992, 'steps': 27540, 'loss/train': 2.0749640464782715} +03/04/2022 21:49:17 - INFO - codeparrot_training - Skipping example with length 97 (seq_length=1024) +03/04/2022 21:49:21 - INFO - codeparrot_training - Step 27541: {'lr': 0.0004641492652950262, 'samples': 14101504, 'steps': 27541, 'loss/train': 1.7656759023666382} +03/04/2022 21:49:24 - INFO - codeparrot_training - Step 27542: {'lr': 0.0004641465270413896, 'samples': 14102016, 'steps': 27542, 'loss/train': 1.656904935836792} +03/04/2022 21:49:25 - INFO - codeparrot_training - Skipping example with length 194 (seq_length=1024) +03/04/2022 21:49:30 - INFO - codeparrot_training - Step 27543: {'lr': 0.00046414378869126185, 'samples': 14102528, 'steps': 27543, 'loss/train': 1.8351272344589233} +03/04/2022 21:49:33 - INFO - codeparrot_training - Step 27544: {'lr': 0.0004641410502446442, 'samples': 14103040, 'steps': 27544, 'loss/train': 2.211921453475952} +03/04/2022 21:49:34 - INFO - codeparrot_training - Skipping example with length 155 (seq_length=1024) +03/04/2022 21:49:38 - INFO - codeparrot_training - Step 27545: {'lr': 0.00046413831170153785, 'samples': 14103552, 'steps': 27545, 'loss/train': 2.222635269165039} +03/04/2022 21:49:41 - INFO - codeparrot_training - Step 27546: {'lr': 0.0004641355730619442, 'samples': 14104064, 'steps': 27546, 'loss/train': 1.6965638399124146} +03/04/2022 21:49:42 - INFO - codeparrot_training - Skipping example with length 144 (seq_length=1024) +03/04/2022 21:49:46 - INFO - codeparrot_training - Step 27547: {'lr': 0.0004641328343258643, 'samples': 14104576, 'steps': 27547, 'loss/train': 1.911508560180664} +03/04/2022 21:49:49 - INFO - codeparrot_training - Step 27548: {'lr': 0.00046413009549329946, 'samples': 14105088, 'steps': 27548, 'loss/train': 2.277571201324463} +03/04/2022 21:49:51 - INFO - codeparrot_training - Skipping example with length 87 (seq_length=1024) +03/04/2022 21:49:55 - INFO - codeparrot_training - Step 27549: {'lr': 0.0004641273565642509, 'samples': 14105600, 'steps': 27549, 'loss/train': 1.6523603200912476} +03/04/2022 21:49:58 - INFO - codeparrot_training - Step 27550: {'lr': 0.0004641246175387198, 'samples': 14106112, 'steps': 27550, 'loss/train': 1.9692319631576538} +03/04/2022 21:49:59 - INFO - codeparrot_training - Skipping example with length 838 (seq_length=1024) +03/04/2022 21:50:03 - INFO - codeparrot_training - Step 27551: {'lr': 0.0004641218784167075, 'samples': 14106624, 'steps': 27551, 'loss/train': 0.770227313041687} +03/04/2022 21:50:06 - INFO - codeparrot_training - Step 27552: {'lr': 0.0004641191391982152, 'samples': 14107136, 'steps': 27552, 'loss/train': 2.3484370708465576} +03/04/2022 21:50:07 - INFO - codeparrot_training - Skipping example with length 986 (seq_length=1024) +03/04/2022 21:50:12 - INFO - codeparrot_training - Step 27553: {'lr': 0.00046411639988324407, 'samples': 14107648, 'steps': 27553, 'loss/train': 1.7093877792358398} +03/04/2022 21:50:15 - INFO - codeparrot_training - Step 27554: {'lr': 0.00046411366047179547, 'samples': 14108160, 'steps': 27554, 'loss/train': 2.1578946113586426} +03/04/2022 21:50:16 - INFO - codeparrot_training - Skipping example with length 281 (seq_length=1024) +03/04/2022 21:50:20 - INFO - codeparrot_training - Step 27555: {'lr': 0.00046411092096387054, 'samples': 14108672, 'steps': 27555, 'loss/train': 1.9369224309921265} +03/04/2022 21:50:23 - INFO - codeparrot_training - Step 27556: {'lr': 0.0004641081813594705, 'samples': 14109184, 'steps': 27556, 'loss/train': 1.0960320234298706} +03/04/2022 21:50:24 - INFO - codeparrot_training - Skipping example with length 83 (seq_length=1024) +03/04/2022 21:50:29 - INFO - codeparrot_training - Step 27557: {'lr': 0.0004641054416585966, 'samples': 14109696, 'steps': 27557, 'loss/train': 2.1412601470947266} +03/04/2022 21:50:32 - INFO - codeparrot_training - Step 27558: {'lr': 0.00046410270186125014, 'samples': 14110208, 'steps': 27558, 'loss/train': 1.4084429740905762} +03/04/2022 21:50:33 - INFO - codeparrot_training - Skipping example with length 114 (seq_length=1024) +03/04/2022 21:50:37 - INFO - codeparrot_training - Step 27559: {'lr': 0.0004640999619674323, 'samples': 14110720, 'steps': 27559, 'loss/train': 1.7194292545318604} +03/04/2022 21:50:41 - INFO - codeparrot_training - Step 27560: {'lr': 0.0004640972219771443, 'samples': 14111232, 'steps': 27560, 'loss/train': 3.113382339477539} +03/04/2022 21:50:41 - INFO - codeparrot_training - Skipping example with length 465 (seq_length=1024) +03/04/2022 21:50:46 - INFO - codeparrot_training - Step 27561: {'lr': 0.00046409448189038737, 'samples': 14111744, 'steps': 27561, 'loss/train': 2.201770305633545} +03/04/2022 21:50:49 - INFO - codeparrot_training - Step 27562: {'lr': 0.00046409174170716284, 'samples': 14112256, 'steps': 27562, 'loss/train': 0.9841717481613159} +03/04/2022 21:50:50 - INFO - codeparrot_training - Skipping example with length 747 (seq_length=1024) +03/04/2022 21:50:54 - INFO - codeparrot_training - Step 27563: {'lr': 0.0004640890014274718, 'samples': 14112768, 'steps': 27563, 'loss/train': 1.768831729888916} +03/04/2022 21:50:57 - INFO - codeparrot_training - Step 27564: {'lr': 0.0004640862610513156, 'samples': 14113280, 'steps': 27564, 'loss/train': 1.58949613571167} +03/04/2022 21:50:59 - INFO - codeparrot_training - Skipping example with length 610 (seq_length=1024) +03/04/2022 21:51:03 - INFO - codeparrot_training - Step 27565: {'lr': 0.00046408352057869545, 'samples': 14113792, 'steps': 27565, 'loss/train': 1.4789090156555176} +03/04/2022 21:51:06 - INFO - codeparrot_training - Step 27566: {'lr': 0.0004640807800096126, 'samples': 14114304, 'steps': 27566, 'loss/train': 2.222676992416382} +03/04/2022 21:51:07 - INFO - codeparrot_training - Skipping example with length 146 (seq_length=1024) +03/04/2022 21:51:11 - INFO - codeparrot_training - Step 27567: {'lr': 0.0004640780393440682, 'samples': 14114816, 'steps': 27567, 'loss/train': 2.033569574356079} +03/04/2022 21:51:14 - INFO - codeparrot_training - Step 27568: {'lr': 0.0004640752985820635, 'samples': 14115328, 'steps': 27568, 'loss/train': 2.299964666366577} +03/04/2022 21:51:16 - INFO - codeparrot_training - Skipping example with length 745 (seq_length=1024) +03/04/2022 21:51:20 - INFO - codeparrot_training - Step 27569: {'lr': 0.0004640725577235998, 'samples': 14115840, 'steps': 27569, 'loss/train': 1.8933861255645752} +03/04/2022 21:51:23 - INFO - codeparrot_training - Step 27570: {'lr': 0.00046406981676867836, 'samples': 14116352, 'steps': 27570, 'loss/train': 1.6625146865844727} +03/04/2022 21:51:25 - INFO - codeparrot_training - Skipping example with length 26 (seq_length=1024) +03/04/2022 21:51:28 - INFO - codeparrot_training - Step 27571: {'lr': 0.00046406707571730035, 'samples': 14116864, 'steps': 27571, 'loss/train': 1.603249430656433} +03/04/2022 21:51:31 - INFO - codeparrot_training - Step 27572: {'lr': 0.000464064334569467, 'samples': 14117376, 'steps': 27572, 'loss/train': 1.9846317768096924} +03/04/2022 21:51:33 - INFO - codeparrot_training - Skipping example with length 729 (seq_length=1024) +03/04/2022 21:51:36 - INFO - codeparrot_training - Step 27573: {'lr': 0.00046406159332517956, 'samples': 14117888, 'steps': 27573, 'loss/train': 1.8401986360549927} +03/04/2022 21:51:40 - INFO - codeparrot_training - Step 27574: {'lr': 0.00046405885198443926, 'samples': 14118400, 'steps': 27574, 'loss/train': 1.8662515878677368} +03/04/2022 21:51:41 - INFO - codeparrot_training - Skipping example with length 69 (seq_length=1024) +03/04/2022 21:51:45 - INFO - codeparrot_training - Step 27575: {'lr': 0.00046405611054724737, 'samples': 14118912, 'steps': 27575, 'loss/train': 1.8286628723144531} +03/04/2022 21:51:48 - INFO - codeparrot_training - Step 27576: {'lr': 0.00046405336901360507, 'samples': 14119424, 'steps': 27576, 'loss/train': 1.0668145418167114} +03/04/2022 21:51:50 - INFO - codeparrot_training - Skipping example with length 880 (seq_length=1024) +03/04/2022 21:51:54 - INFO - codeparrot_training - Step 27577: {'lr': 0.00046405062738351366, 'samples': 14119936, 'steps': 27577, 'loss/train': 2.066446542739868} +03/04/2022 21:51:57 - INFO - codeparrot_training - Step 27578: {'lr': 0.00046404788565697434, 'samples': 14120448, 'steps': 27578, 'loss/train': 1.9912432432174683} +03/04/2022 21:51:58 - INFO - codeparrot_training - Skipping example with length 799 (seq_length=1024) +03/04/2022 21:52:02 - INFO - codeparrot_training - Step 27579: {'lr': 0.00046404514383398835, 'samples': 14120960, 'steps': 27579, 'loss/train': 2.641558885574341} +03/04/2022 21:52:05 - INFO - codeparrot_training - Step 27580: {'lr': 0.0004640424019145568, 'samples': 14121472, 'steps': 27580, 'loss/train': 2.41717529296875} +03/04/2022 21:52:07 - INFO - codeparrot_training - Skipping example with length 805 (seq_length=1024) +03/04/2022 21:52:10 - INFO - codeparrot_training - Step 27581: {'lr': 0.00046403965989868124, 'samples': 14121984, 'steps': 27581, 'loss/train': 2.020951986312866} +03/04/2022 21:52:14 - INFO - codeparrot_training - Step 27582: {'lr': 0.0004640369177863626, 'samples': 14122496, 'steps': 27582, 'loss/train': 1.296420931816101} +03/04/2022 21:52:15 - INFO - codeparrot_training - Skipping example with length 126 (seq_length=1024) +03/04/2022 21:52:19 - INFO - codeparrot_training - Step 27583: {'lr': 0.00046403417557760226, 'samples': 14123008, 'steps': 27583, 'loss/train': 1.6932988166809082} +03/04/2022 21:52:22 - INFO - codeparrot_training - Step 27584: {'lr': 0.00046403143327240136, 'samples': 14123520, 'steps': 27584, 'loss/train': 2.339085817337036} +03/04/2022 21:52:24 - INFO - codeparrot_training - Skipping example with length 26 (seq_length=1024) +03/04/2022 21:52:27 - INFO - codeparrot_training - Step 27585: {'lr': 0.00046402869087076127, 'samples': 14124032, 'steps': 27585, 'loss/train': 2.1542680263519287} +03/04/2022 21:52:30 - INFO - codeparrot_training - Step 27586: {'lr': 0.00046402594837268314, 'samples': 14124544, 'steps': 27586, 'loss/train': 1.9707403182983398} +03/04/2022 21:52:32 - INFO - codeparrot_training - Skipping example with length 821 (seq_length=1024) +03/04/2022 21:52:36 - INFO - codeparrot_training - Step 27587: {'lr': 0.0004640232057781682, 'samples': 14125056, 'steps': 27587, 'loss/train': 1.5464863777160645} +03/04/2022 21:52:39 - INFO - codeparrot_training - Step 27588: {'lr': 0.00046402046308721776, 'samples': 14125568, 'steps': 27588, 'loss/train': 1.7598837614059448} +03/04/2022 21:52:41 - INFO - codeparrot_training - Skipping example with length 466 (seq_length=1024) +03/04/2022 21:52:44 - INFO - codeparrot_training - Step 27589: {'lr': 0.0004640177202998329, 'samples': 14126080, 'steps': 27589, 'loss/train': 2.269519805908203} +03/04/2022 21:52:47 - INFO - codeparrot_training - Step 27590: {'lr': 0.00046401497741601505, 'samples': 14126592, 'steps': 27590, 'loss/train': 3.437429904937744} +03/04/2022 21:52:49 - INFO - codeparrot_training - Skipping example with length 187 (seq_length=1024) +03/04/2022 21:52:53 - INFO - codeparrot_training - Step 27591: {'lr': 0.00046401223443576537, 'samples': 14127104, 'steps': 27591, 'loss/train': 1.6568045616149902} +03/04/2022 21:52:56 - INFO - codeparrot_training - Step 27592: {'lr': 0.00046400949135908497, 'samples': 14127616, 'steps': 27592, 'loss/train': 1.5045348405838013} +03/04/2022 21:52:58 - INFO - codeparrot_training - Skipping example with length 109 (seq_length=1024) +03/04/2022 21:53:01 - INFO - codeparrot_training - Step 27593: {'lr': 0.0004640067481859753, 'samples': 14128128, 'steps': 27593, 'loss/train': 1.4222878217697144} +03/04/2022 21:53:04 - INFO - codeparrot_training - Step 27594: {'lr': 0.00046400400491643744, 'samples': 14128640, 'steps': 27594, 'loss/train': 1.5131932497024536} +03/04/2022 21:53:06 - INFO - codeparrot_training - Skipping example with length 1010 (seq_length=1024) +03/04/2022 21:53:10 - INFO - codeparrot_training - Step 27595: {'lr': 0.00046400126155047265, 'samples': 14129152, 'steps': 27595, 'loss/train': 2.2484421730041504} +03/04/2022 21:53:13 - INFO - codeparrot_training - Step 27596: {'lr': 0.0004639985180880822, 'samples': 14129664, 'steps': 27596, 'loss/train': 0.7109977006912231} +03/04/2022 21:53:14 - INFO - codeparrot_training - Skipping example with length 446 (seq_length=1024) +03/04/2022 21:53:18 - INFO - codeparrot_training - Step 27597: {'lr': 0.0004639957745292674, 'samples': 14130176, 'steps': 27597, 'loss/train': 1.857573390007019} +03/04/2022 21:53:21 - INFO - codeparrot_training - Step 27598: {'lr': 0.00046399303087402935, 'samples': 14130688, 'steps': 27598, 'loss/train': 1.496458888053894} +03/04/2022 21:53:23 - INFO - codeparrot_training - Skipping example with length 141 (seq_length=1024) +03/04/2022 21:53:26 - INFO - codeparrot_training - Step 27599: {'lr': 0.00046399028712236935, 'samples': 14131200, 'steps': 27599, 'loss/train': 1.9203948974609375} +03/04/2022 21:53:30 - INFO - codeparrot_training - Step 27600: {'lr': 0.0004639875432742886, 'samples': 14131712, 'steps': 27600, 'loss/train': 1.9752079248428345} +03/04/2022 21:53:31 - INFO - codeparrot_training - Skipping example with length 628 (seq_length=1024) +03/04/2022 21:53:35 - INFO - codeparrot_training - Step 27601: {'lr': 0.0004639847993297884, 'samples': 14132224, 'steps': 27601, 'loss/train': 2.3917438983917236} +03/04/2022 21:53:38 - INFO - codeparrot_training - Step 27602: {'lr': 0.00046398205528886994, 'samples': 14132736, 'steps': 27602, 'loss/train': 1.6044554710388184} +03/04/2022 21:53:40 - INFO - codeparrot_training - Skipping example with length 672 (seq_length=1024) +03/04/2022 21:53:43 - INFO - codeparrot_training - Step 27603: {'lr': 0.00046397931115153444, 'samples': 14133248, 'steps': 27603, 'loss/train': 1.7703933715820312} +03/04/2022 21:53:46 - INFO - codeparrot_training - Step 27604: {'lr': 0.0004639765669177833, 'samples': 14133760, 'steps': 27604, 'loss/train': 1.1748124361038208} +03/04/2022 21:53:48 - INFO - codeparrot_training - Skipping example with length 693 (seq_length=1024) +03/04/2022 21:53:52 - INFO - codeparrot_training - Step 27605: {'lr': 0.00046397382258761744, 'samples': 14134272, 'steps': 27605, 'loss/train': 2.083744764328003} +03/04/2022 21:53:55 - INFO - codeparrot_training - Step 27606: {'lr': 0.0004639710781610384, 'samples': 14134784, 'steps': 27606, 'loss/train': 1.532667636871338} +03/04/2022 21:53:56 - INFO - codeparrot_training - Skipping example with length 134 (seq_length=1024) +03/04/2022 21:54:00 - INFO - codeparrot_training - Step 27607: {'lr': 0.00046396833363804724, 'samples': 14135296, 'steps': 27607, 'loss/train': 1.4918828010559082} +03/04/2022 21:54:03 - INFO - codeparrot_training - Step 27608: {'lr': 0.00046396558901864527, 'samples': 14135808, 'steps': 27608, 'loss/train': 1.8344670534133911} +03/04/2022 21:54:05 - INFO - codeparrot_training - Skipping example with length 788 (seq_length=1024) +03/04/2022 21:54:08 - INFO - codeparrot_training - Step 27609: {'lr': 0.0004639628443028337, 'samples': 14136320, 'steps': 27609, 'loss/train': 0.4375920295715332} +03/04/2022 21:54:12 - INFO - codeparrot_training - Step 27610: {'lr': 0.0004639600994906138, 'samples': 14136832, 'steps': 27610, 'loss/train': 1.5562015771865845} +03/04/2022 21:54:13 - INFO - codeparrot_training - Skipping example with length 334 (seq_length=1024) +03/04/2022 21:54:17 - INFO - codeparrot_training - Step 27611: {'lr': 0.00046395735458198674, 'samples': 14137344, 'steps': 27611, 'loss/train': 1.7201288938522339} +03/04/2022 21:54:20 - INFO - codeparrot_training - Step 27612: {'lr': 0.0004639546095769538, 'samples': 14137856, 'steps': 27612, 'loss/train': 6.799593925476074} +03/04/2022 21:54:22 - INFO - codeparrot_training - Skipping example with length 113 (seq_length=1024) +03/04/2022 21:54:25 - INFO - codeparrot_training - Step 27613: {'lr': 0.00046395186447551617, 'samples': 14138368, 'steps': 27613, 'loss/train': 1.455855131149292} +03/04/2022 21:54:29 - INFO - codeparrot_training - Step 27614: {'lr': 0.00046394911927767526, 'samples': 14138880, 'steps': 27614, 'loss/train': 1.6548454761505127} +03/04/2022 21:54:31 - INFO - codeparrot_training - Skipping example with length 890 (seq_length=1024) +03/04/2022 21:54:34 - INFO - codeparrot_training - Step 27615: {'lr': 0.0004639463739834321, 'samples': 14139392, 'steps': 27615, 'loss/train': 0.9600452184677124} +03/04/2022 21:54:37 - INFO - codeparrot_training - Step 27616: {'lr': 0.00046394362859278793, 'samples': 14139904, 'steps': 27616, 'loss/train': 1.2721601724624634} +03/04/2022 21:54:39 - INFO - codeparrot_training - Skipping example with length 269 (seq_length=1024) +03/04/2022 21:54:42 - INFO - codeparrot_training - Step 27617: {'lr': 0.00046394088310574416, 'samples': 14140416, 'steps': 27617, 'loss/train': 1.6315211057662964} +03/04/2022 21:54:46 - INFO - codeparrot_training - Step 27618: {'lr': 0.000463938137522302, 'samples': 14140928, 'steps': 27618, 'loss/train': 1.812462329864502} +03/04/2022 21:54:48 - INFO - codeparrot_training - Skipping example with length 79 (seq_length=1024) +03/04/2022 21:54:51 - INFO - codeparrot_training - Step 27619: {'lr': 0.00046393539184246246, 'samples': 14141440, 'steps': 27619, 'loss/train': 1.689713716506958} +03/04/2022 21:54:54 - INFO - codeparrot_training - Step 27620: {'lr': 0.000463932646066227, 'samples': 14141952, 'steps': 27620, 'loss/train': 2.6587886810302734} +03/04/2022 21:54:56 - INFO - codeparrot_training - Skipping example with length 117 (seq_length=1024) +03/04/2022 21:54:59 - INFO - codeparrot_training - Step 27621: {'lr': 0.0004639299001935968, 'samples': 14142464, 'steps': 27621, 'loss/train': 2.1129837036132812} +03/04/2022 21:55:02 - INFO - codeparrot_training - Step 27622: {'lr': 0.0004639271542245731, 'samples': 14142976, 'steps': 27622, 'loss/train': 0.19581444561481476} +03/04/2022 21:55:04 - INFO - codeparrot_training - Skipping example with length 589 (seq_length=1024) +03/04/2022 21:55:08 - INFO - codeparrot_training - Step 27623: {'lr': 0.000463924408159157, 'samples': 14143488, 'steps': 27623, 'loss/train': 0.8398597240447998} +03/04/2022 21:55:11 - INFO - codeparrot_training - Step 27624: {'lr': 0.00046392166199735, 'samples': 14144000, 'steps': 27624, 'loss/train': 1.8493735790252686} +03/04/2022 21:55:13 - INFO - codeparrot_training - Skipping example with length 629 (seq_length=1024) +03/04/2022 21:55:16 - INFO - codeparrot_training - Step 27625: {'lr': 0.00046391891573915325, 'samples': 14144512, 'steps': 27625, 'loss/train': 1.7245063781738281} +03/04/2022 21:55:19 - INFO - codeparrot_training - Step 27626: {'lr': 0.0004639161693845678, 'samples': 14145024, 'steps': 27626, 'loss/train': 2.0746915340423584} +03/04/2022 21:55:22 - INFO - codeparrot_training - Skipping example with length 518 (seq_length=1024) +03/04/2022 21:55:25 - INFO - codeparrot_training - Step 27627: {'lr': 0.0004639134229335951, 'samples': 14145536, 'steps': 27627, 'loss/train': 1.8795573711395264} +03/04/2022 21:55:28 - INFO - codeparrot_training - Step 27628: {'lr': 0.0004639106763862363, 'samples': 14146048, 'steps': 27628, 'loss/train': 2.0054335594177246} +03/04/2022 21:55:30 - INFO - codeparrot_training - Skipping example with length 962 (seq_length=1024) +03/04/2022 21:55:33 - INFO - codeparrot_training - Step 27629: {'lr': 0.00046390792974249263, 'samples': 14146560, 'steps': 27629, 'loss/train': 1.4535967111587524} +03/04/2022 21:55:36 - INFO - codeparrot_training - Step 27630: {'lr': 0.00046390518300236535, 'samples': 14147072, 'steps': 27630, 'loss/train': 1.7396965026855469} +03/04/2022 21:55:38 - INFO - codeparrot_training - Skipping example with length 483 (seq_length=1024) +03/04/2022 21:55:41 - INFO - codeparrot_training - Step 27631: {'lr': 0.0004639024361658557, 'samples': 14147584, 'steps': 27631, 'loss/train': 2.5055084228515625} +03/04/2022 21:55:45 - INFO - codeparrot_training - Step 27632: {'lr': 0.00046389968923296496, 'samples': 14148096, 'steps': 27632, 'loss/train': 1.5569921731948853} +03/04/2022 21:55:47 - INFO - codeparrot_training - Skipping example with length 212 (seq_length=1024) +03/04/2022 21:55:50 - INFO - codeparrot_training - Step 27633: {'lr': 0.0004638969422036943, 'samples': 14148608, 'steps': 27633, 'loss/train': 1.5817103385925293} +03/04/2022 21:55:53 - INFO - codeparrot_training - Step 27634: {'lr': 0.00046389419507804493, 'samples': 14149120, 'steps': 27634, 'loss/train': 2.108802080154419} +03/04/2022 21:55:55 - INFO - codeparrot_training - Skipping example with length 728 (seq_length=1024) +03/04/2022 21:55:58 - INFO - codeparrot_training - Step 27635: {'lr': 0.00046389144785601813, 'samples': 14149632, 'steps': 27635, 'loss/train': 1.8186153173446655} +03/04/2022 21:56:02 - INFO - codeparrot_training - Step 27636: {'lr': 0.0004638887005376152, 'samples': 14150144, 'steps': 27636, 'loss/train': 1.2399070262908936} +03/04/2022 21:56:04 - INFO - codeparrot_training - Skipping example with length 136 (seq_length=1024) +03/04/2022 21:56:07 - INFO - codeparrot_training - Step 27637: {'lr': 0.0004638859531228373, 'samples': 14150656, 'steps': 27637, 'loss/train': 1.5489846467971802} +03/04/2022 21:56:10 - INFO - codeparrot_training - Step 27638: {'lr': 0.00046388320561168567, 'samples': 14151168, 'steps': 27638, 'loss/train': 1.4304358959197998} +03/04/2022 21:56:13 - INFO - codeparrot_training - Step 27639: {'lr': 0.00046388045800416157, 'samples': 14151680, 'steps': 27639, 'loss/train': 0.5739402770996094} +03/04/2022 21:56:14 - INFO - codeparrot_training - Skipping example with length 775 (seq_length=1024) +03/04/2022 21:56:19 - INFO - codeparrot_training - Step 27640: {'lr': 0.00046387771030026627, 'samples': 14152192, 'steps': 27640, 'loss/train': 1.2853105068206787} +03/04/2022 21:56:22 - INFO - codeparrot_training - Step 27641: {'lr': 0.00046387496250000095, 'samples': 14152704, 'steps': 27641, 'loss/train': 1.532561182975769} +03/04/2022 21:56:22 - INFO - codeparrot_training - Skipping example with length 377 (seq_length=1024) +03/04/2022 21:56:27 - INFO - codeparrot_training - Step 27642: {'lr': 0.0004638722146033669, 'samples': 14153216, 'steps': 27642, 'loss/train': 1.7510236501693726} +03/04/2022 21:56:30 - INFO - codeparrot_training - Step 27643: {'lr': 0.0004638694666103653, 'samples': 14153728, 'steps': 27643, 'loss/train': 2.280945062637329} +03/04/2022 21:56:30 - INFO - codeparrot_training - Skipping example with length 709 (seq_length=1024) +03/04/2022 21:56:36 - INFO - codeparrot_training - Step 27644: {'lr': 0.00046386671852099743, 'samples': 14154240, 'steps': 27644, 'loss/train': 0.961483895778656} +03/04/2022 21:56:39 - INFO - codeparrot_training - Step 27645: {'lr': 0.0004638639703352645, 'samples': 14154752, 'steps': 27645, 'loss/train': 2.505633592605591} +03/04/2022 21:56:39 - INFO - codeparrot_training - Skipping example with length 646 (seq_length=1024) +03/04/2022 21:56:44 - INFO - codeparrot_training - Step 27646: {'lr': 0.00046386122205316783, 'samples': 14155264, 'steps': 27646, 'loss/train': 1.683584213256836} +03/04/2022 21:56:47 - INFO - codeparrot_training - Step 27647: {'lr': 0.0004638584736747085, 'samples': 14155776, 'steps': 27647, 'loss/train': 1.8131953477859497} +03/04/2022 21:56:47 - INFO - codeparrot_training - Skipping example with length 222 (seq_length=1024) +03/04/2022 21:56:52 - INFO - codeparrot_training - Step 27648: {'lr': 0.00046385572519988793, 'samples': 14156288, 'steps': 27648, 'loss/train': 2.177790403366089} +03/04/2022 21:56:55 - INFO - codeparrot_training - Step 27649: {'lr': 0.00046385297662870716, 'samples': 14156800, 'steps': 27649, 'loss/train': 1.9873101711273193} +03/04/2022 21:56:55 - INFO - codeparrot_training - Skipping example with length 764 (seq_length=1024) +03/04/2022 21:57:01 - INFO - codeparrot_training - Step 27650: {'lr': 0.00046385022796116766, 'samples': 14157312, 'steps': 27650, 'loss/train': 1.2463878393173218} +03/04/2022 21:57:04 - INFO - codeparrot_training - Step 27651: {'lr': 0.0004638474791972705, 'samples': 14157824, 'steps': 27651, 'loss/train': 2.1014769077301025} +03/04/2022 21:57:04 - INFO - codeparrot_training - Skipping example with length 287 (seq_length=1024) +03/04/2022 21:57:09 - INFO - codeparrot_training - Step 27652: {'lr': 0.000463844730337017, 'samples': 14158336, 'steps': 27652, 'loss/train': 2.2625844478607178} +03/04/2022 21:57:12 - INFO - codeparrot_training - Step 27653: {'lr': 0.00046384198138040825, 'samples': 14158848, 'steps': 27653, 'loss/train': 1.9300283193588257} +03/04/2022 21:57:13 - INFO - codeparrot_training - Skipping example with length 556 (seq_length=1024) +03/04/2022 21:57:18 - INFO - codeparrot_training - Step 27654: {'lr': 0.00046383923232744565, 'samples': 14159360, 'steps': 27654, 'loss/train': 2.1567187309265137} +03/04/2022 21:57:21 - INFO - codeparrot_training - Step 27655: {'lr': 0.00046383648317813045, 'samples': 14159872, 'steps': 27655, 'loss/train': 1.30332350730896} +03/04/2022 21:57:21 - INFO - codeparrot_training - Skipping example with length 559 (seq_length=1024) +03/04/2022 21:57:26 - INFO - codeparrot_training - Step 27656: {'lr': 0.0004638337339324638, 'samples': 14160384, 'steps': 27656, 'loss/train': 2.142608404159546} +03/04/2022 21:57:30 - INFO - codeparrot_training - Step 27657: {'lr': 0.00046383098459044697, 'samples': 14160896, 'steps': 27657, 'loss/train': 1.143470287322998} +03/04/2022 21:57:31 - INFO - codeparrot_training - Skipping example with length 186 (seq_length=1024) +03/04/2022 21:57:35 - INFO - codeparrot_training - Step 27658: {'lr': 0.0004638282351520812, 'samples': 14161408, 'steps': 27658, 'loss/train': 1.9976515769958496} +03/04/2022 21:57:38 - INFO - codeparrot_training - Step 27659: {'lr': 0.00046382548561736773, 'samples': 14161920, 'steps': 27659, 'loss/train': 1.8706058263778687} +03/04/2022 21:57:39 - INFO - codeparrot_training - Skipping example with length 327 (seq_length=1024) +03/04/2022 21:57:43 - INFO - codeparrot_training - Step 27660: {'lr': 0.0004638227359863078, 'samples': 14162432, 'steps': 27660, 'loss/train': 1.6258901357650757} +03/04/2022 21:57:46 - INFO - codeparrot_training - Step 27661: {'lr': 0.0004638199862589026, 'samples': 14162944, 'steps': 27661, 'loss/train': 1.7211793661117554} +03/04/2022 21:57:48 - INFO - codeparrot_training - Skipping example with length 628 (seq_length=1024) +03/04/2022 21:57:52 - INFO - codeparrot_training - Step 27662: {'lr': 0.0004638172364351535, 'samples': 14163456, 'steps': 27662, 'loss/train': 2.274756908416748} +03/04/2022 21:57:55 - INFO - codeparrot_training - Step 27663: {'lr': 0.00046381448651506153, 'samples': 14163968, 'steps': 27663, 'loss/train': 0.5946659445762634} +03/04/2022 21:57:56 - INFO - codeparrot_training - Skipping example with length 228 (seq_length=1024) +03/04/2022 21:58:00 - INFO - codeparrot_training - Step 27664: {'lr': 0.00046381173649862815, 'samples': 14164480, 'steps': 27664, 'loss/train': 1.467079997062683} +03/04/2022 21:58:03 - INFO - codeparrot_training - Step 27665: {'lr': 0.00046380898638585447, 'samples': 14164992, 'steps': 27665, 'loss/train': 2.2406606674194336} +03/04/2022 21:58:05 - INFO - codeparrot_training - Skipping example with length 61 (seq_length=1024) +03/04/2022 21:58:09 - INFO - codeparrot_training - Step 27666: {'lr': 0.0004638062361767418, 'samples': 14165504, 'steps': 27666, 'loss/train': 2.694225788116455} +03/04/2022 21:58:12 - INFO - codeparrot_training - Step 27667: {'lr': 0.00046380348587129127, 'samples': 14166016, 'steps': 27667, 'loss/train': 2.78141713142395} +03/04/2022 21:58:13 - INFO - codeparrot_training - Skipping example with length 231 (seq_length=1024) +03/04/2022 21:58:17 - INFO - codeparrot_training - Step 27668: {'lr': 0.0004638007354695042, 'samples': 14166528, 'steps': 27668, 'loss/train': 2.0397236347198486} +03/04/2022 21:58:20 - INFO - codeparrot_training - Step 27669: {'lr': 0.0004637979849713818, 'samples': 14167040, 'steps': 27669, 'loss/train': 1.8724124431610107} +03/04/2022 21:58:22 - INFO - codeparrot_training - Skipping example with length 256 (seq_length=1024) +03/04/2022 21:58:26 - INFO - codeparrot_training - Step 27670: {'lr': 0.0004637952343769254, 'samples': 14167552, 'steps': 27670, 'loss/train': 1.954559326171875} +03/04/2022 21:58:29 - INFO - codeparrot_training - Step 27671: {'lr': 0.00046379248368613615, 'samples': 14168064, 'steps': 27671, 'loss/train': 1.8471759557724} +03/04/2022 21:58:31 - INFO - codeparrot_training - Skipping example with length 617 (seq_length=1024) +03/04/2022 21:58:34 - INFO - codeparrot_training - Step 27672: {'lr': 0.0004637897328990153, 'samples': 14168576, 'steps': 27672, 'loss/train': 2.4220101833343506} +03/04/2022 21:58:37 - INFO - codeparrot_training - Step 27673: {'lr': 0.000463786982015564, 'samples': 14169088, 'steps': 27673, 'loss/train': 1.9231946468353271} +03/04/2022 21:58:39 - INFO - codeparrot_training - Skipping example with length 259 (seq_length=1024) +03/04/2022 21:58:43 - INFO - codeparrot_training - Step 27674: {'lr': 0.00046378423103578373, 'samples': 14169600, 'steps': 27674, 'loss/train': 1.2739123106002808} +03/04/2022 21:58:46 - INFO - codeparrot_training - Step 27675: {'lr': 0.0004637814799596755, 'samples': 14170112, 'steps': 27675, 'loss/train': 2.087042808532715} +03/04/2022 21:58:49 - INFO - codeparrot_training - Step 27676: {'lr': 0.00046377872878724066, 'samples': 14170624, 'steps': 27676, 'loss/train': 0.645000159740448} +03/04/2022 21:58:49 - INFO - codeparrot_training - Skipping example with length 539 (seq_length=1024) +03/04/2022 21:58:55 - INFO - codeparrot_training - Step 27677: {'lr': 0.0004637759775184804, 'samples': 14171136, 'steps': 27677, 'loss/train': 2.3026845455169678} +03/04/2022 21:58:58 - INFO - codeparrot_training - Step 27678: {'lr': 0.000463773226153396, 'samples': 14171648, 'steps': 27678, 'loss/train': 1.6942437887191772} +03/04/2022 21:58:58 - INFO - codeparrot_training - Skipping example with length 381 (seq_length=1024) +03/04/2022 21:59:03 - INFO - codeparrot_training - Step 27679: {'lr': 0.00046377047469198875, 'samples': 14172160, 'steps': 27679, 'loss/train': 1.4253997802734375} +03/04/2022 21:59:06 - INFO - codeparrot_training - Step 27680: {'lr': 0.00046376772313425974, 'samples': 14172672, 'steps': 27680, 'loss/train': 1.9372918605804443} +03/04/2022 21:59:07 - INFO - codeparrot_training - Skipping example with length 410 (seq_length=1024) +03/04/2022 21:59:12 - INFO - codeparrot_training - Step 27681: {'lr': 0.0004637649714802102, 'samples': 14173184, 'steps': 27681, 'loss/train': 1.1158498525619507} +03/04/2022 21:59:15 - INFO - codeparrot_training - Step 27682: {'lr': 0.0004637622197298417, 'samples': 14173696, 'steps': 27682, 'loss/train': 1.6550767421722412} +03/04/2022 21:59:15 - INFO - codeparrot_training - Skipping example with length 448 (seq_length=1024) +03/04/2022 21:59:20 - INFO - codeparrot_training - Step 27683: {'lr': 0.000463759467883155, 'samples': 14174208, 'steps': 27683, 'loss/train': 3.8308522701263428} +03/04/2022 21:59:23 - INFO - codeparrot_training - Step 27684: {'lr': 0.0004637567159401518, 'samples': 14174720, 'steps': 27684, 'loss/train': 1.7069790363311768} +03/04/2022 21:59:24 - INFO - codeparrot_training - Skipping example with length 937 (seq_length=1024) +03/04/2022 21:59:28 - INFO - codeparrot_training - Step 27685: {'lr': 0.00046375396390083303, 'samples': 14175232, 'steps': 27685, 'loss/train': 1.4326366186141968} +03/04/2022 21:59:32 - INFO - codeparrot_training - Step 27686: {'lr': 0.0004637512117652, 'samples': 14175744, 'steps': 27686, 'loss/train': 1.5388849973678589} +03/04/2022 21:59:32 - INFO - codeparrot_training - Skipping example with length 474 (seq_length=1024) +03/04/2022 21:59:37 - INFO - codeparrot_training - Step 27687: {'lr': 0.00046374845953325394, 'samples': 14176256, 'steps': 27687, 'loss/train': 2.2025234699249268} +03/04/2022 21:59:40 - INFO - codeparrot_training - Step 27688: {'lr': 0.0004637457072049962, 'samples': 14176768, 'steps': 27688, 'loss/train': 1.6369661092758179} +03/04/2022 21:59:41 - INFO - codeparrot_training - Skipping example with length 601 (seq_length=1024) +03/04/2022 21:59:45 - INFO - codeparrot_training - Step 27689: {'lr': 0.0004637429547804279, 'samples': 14177280, 'steps': 27689, 'loss/train': 1.3149855136871338} +03/04/2022 21:59:48 - INFO - codeparrot_training - Step 27690: {'lr': 0.0004637402022595503, 'samples': 14177792, 'steps': 27690, 'loss/train': 1.807578682899475} +03/04/2022 21:59:49 - INFO - codeparrot_training - Skipping example with length 807 (seq_length=1024) +03/04/2022 21:59:54 - INFO - codeparrot_training - Step 27691: {'lr': 0.0004637374496423647, 'samples': 14178304, 'steps': 27691, 'loss/train': 2.0890626907348633} +03/04/2022 21:59:57 - INFO - codeparrot_training - Step 27692: {'lr': 0.0004637346969288723, 'samples': 14178816, 'steps': 27692, 'loss/train': 1.640377163887024} +03/04/2022 21:59:57 - INFO - codeparrot_training - Skipping example with length 747 (seq_length=1024) +03/04/2022 22:00:03 - INFO - codeparrot_training - Step 27693: {'lr': 0.0004637319441190743, 'samples': 14179328, 'steps': 27693, 'loss/train': 1.8304755687713623} +03/04/2022 22:00:06 - INFO - codeparrot_training - Step 27694: {'lr': 0.00046372919121297207, 'samples': 14179840, 'steps': 27694, 'loss/train': 0.16078712046146393} +03/04/2022 22:00:07 - INFO - codeparrot_training - Skipping example with length 143 (seq_length=1024) +03/04/2022 22:00:11 - INFO - codeparrot_training - Step 27695: {'lr': 0.0004637264382105667, 'samples': 14180352, 'steps': 27695, 'loss/train': 1.9947274923324585} +03/04/2022 22:00:14 - INFO - codeparrot_training - Step 27696: {'lr': 0.00046372368511185953, 'samples': 14180864, 'steps': 27696, 'loss/train': 2.2045764923095703} +03/04/2022 22:00:16 - INFO - codeparrot_training - Skipping example with length 319 (seq_length=1024) +03/04/2022 22:00:20 - INFO - codeparrot_training - Step 27697: {'lr': 0.0004637209319168517, 'samples': 14181376, 'steps': 27697, 'loss/train': 1.5442280769348145} +03/04/2022 22:00:23 - INFO - codeparrot_training - Step 27698: {'lr': 0.0004637181786255446, 'samples': 14181888, 'steps': 27698, 'loss/train': 2.004926919937134} +03/04/2022 22:00:24 - INFO - codeparrot_training - Skipping example with length 559 (seq_length=1024) +03/04/2022 22:00:28 - INFO - codeparrot_training - Step 27699: {'lr': 0.0004637154252379394, 'samples': 14182400, 'steps': 27699, 'loss/train': 2.624953508377075} +03/04/2022 22:00:31 - INFO - codeparrot_training - Step 27700: {'lr': 0.00046371267175403724, 'samples': 14182912, 'steps': 27700, 'loss/train': 1.7273741960525513} +03/04/2022 22:00:33 - INFO - codeparrot_training - Skipping example with length 178 (seq_length=1024) +03/04/2022 22:00:37 - INFO - codeparrot_training - Step 27701: {'lr': 0.0004637099181738395, 'samples': 14183424, 'steps': 27701, 'loss/train': 2.193159341812134} +03/04/2022 22:00:40 - INFO - codeparrot_training - Step 27702: {'lr': 0.00046370716449734733, 'samples': 14183936, 'steps': 27702, 'loss/train': 1.833182454109192} +03/04/2022 22:00:41 - INFO - codeparrot_training - Skipping example with length 586 (seq_length=1024) +03/04/2022 22:00:45 - INFO - codeparrot_training - Step 27703: {'lr': 0.00046370441072456206, 'samples': 14184448, 'steps': 27703, 'loss/train': 2.585933208465576} +03/04/2022 22:00:48 - INFO - codeparrot_training - Step 27704: {'lr': 0.00046370165685548484, 'samples': 14184960, 'steps': 27704, 'loss/train': 2.016301155090332} +03/04/2022 22:00:51 - INFO - codeparrot_training - Skipping example with length 995 (seq_length=1024) +03/04/2022 22:00:54 - INFO - codeparrot_training - Step 27705: {'lr': 0.00046369890289011696, 'samples': 14185472, 'steps': 27705, 'loss/train': 1.347489356994629} +03/04/2022 22:00:57 - INFO - codeparrot_training - Step 27706: {'lr': 0.0004636961488284597, 'samples': 14185984, 'steps': 27706, 'loss/train': 1.0110247135162354} +03/04/2022 22:00:59 - INFO - codeparrot_training - Skipping example with length 490 (seq_length=1024) +03/04/2022 22:01:03 - INFO - codeparrot_training - Step 27707: {'lr': 0.0004636933946705142, 'samples': 14186496, 'steps': 27707, 'loss/train': 1.1653650999069214} +03/04/2022 22:01:06 - INFO - codeparrot_training - Step 27708: {'lr': 0.00046369064041628175, 'samples': 14187008, 'steps': 27708, 'loss/train': 2.1749603748321533} +03/04/2022 22:01:09 - INFO - codeparrot_training - Step 27709: {'lr': 0.00046368788606576363, 'samples': 14187520, 'steps': 27709, 'loss/train': 1.8301324844360352} +03/04/2022 22:01:11 - INFO - codeparrot_training - Skipping example with length 217 (seq_length=1024) +03/04/2022 22:01:15 - INFO - codeparrot_training - Step 27710: {'lr': 0.00046368513161896104, 'samples': 14188032, 'steps': 27710, 'loss/train': 1.616692066192627} +03/04/2022 22:01:18 - INFO - codeparrot_training - Step 27711: {'lr': 0.0004636823770758752, 'samples': 14188544, 'steps': 27711, 'loss/train': 2.0546274185180664} +03/04/2022 22:01:20 - INFO - codeparrot_training - Skipping example with length 155 (seq_length=1024) +03/04/2022 22:01:23 - INFO - codeparrot_training - Step 27712: {'lr': 0.0004636796224365074, 'samples': 14189056, 'steps': 27712, 'loss/train': 1.4505919218063354} +03/04/2022 22:01:26 - INFO - codeparrot_training - Step 27713: {'lr': 0.0004636768677008588, 'samples': 14189568, 'steps': 27713, 'loss/train': 1.7372467517852783} +03/04/2022 22:01:28 - INFO - codeparrot_training - Skipping example with length 361 (seq_length=1024) +03/04/2022 22:01:31 - INFO - codeparrot_training - Step 27714: {'lr': 0.0004636741128689308, 'samples': 14190080, 'steps': 27714, 'loss/train': 1.6584452390670776} +03/04/2022 22:01:35 - INFO - codeparrot_training - Step 27715: {'lr': 0.00046367135794072445, 'samples': 14190592, 'steps': 27715, 'loss/train': 2.363029718399048} +03/04/2022 22:01:37 - INFO - codeparrot_training - Skipping example with length 498 (seq_length=1024) +03/04/2022 22:01:40 - INFO - codeparrot_training - Step 27716: {'lr': 0.0004636686029162411, 'samples': 14191104, 'steps': 27716, 'loss/train': 1.506821870803833} +03/04/2022 22:01:43 - INFO - codeparrot_training - Step 27717: {'lr': 0.000463665847795482, 'samples': 14191616, 'steps': 27717, 'loss/train': 1.650162696838379} +03/04/2022 22:01:45 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/04/2022 22:01:48 - INFO - codeparrot_training - Step 27718: {'lr': 0.0004636630925784484, 'samples': 14192128, 'steps': 27718, 'loss/train': 0.442815899848938} +03/04/2022 22:01:51 - INFO - codeparrot_training - Step 27719: {'lr': 0.0004636603372651415, 'samples': 14192640, 'steps': 27719, 'loss/train': 2.1814231872558594} +03/04/2022 22:01:53 - INFO - codeparrot_training - Skipping example with length 904 (seq_length=1024) +03/04/2022 22:01:57 - INFO - codeparrot_training - Step 27720: {'lr': 0.0004636575818555625, 'samples': 14193152, 'steps': 27720, 'loss/train': 1.7144211530685425} +03/04/2022 22:02:00 - INFO - codeparrot_training - Step 27721: {'lr': 0.00046365482634971275, 'samples': 14193664, 'steps': 27721, 'loss/train': 2.4171338081359863} +03/04/2022 22:02:02 - INFO - codeparrot_training - Skipping example with length 804 (seq_length=1024) +03/04/2022 22:02:05 - INFO - codeparrot_training - Step 27722: {'lr': 0.00046365207074759344, 'samples': 14194176, 'steps': 27722, 'loss/train': 1.9996066093444824} +03/04/2022 22:02:08 - INFO - codeparrot_training - Step 27723: {'lr': 0.0004636493150492057, 'samples': 14194688, 'steps': 27723, 'loss/train': 1.48897123336792} +03/04/2022 22:02:10 - INFO - codeparrot_training - Skipping example with length 867 (seq_length=1024) +03/04/2022 22:02:14 - INFO - codeparrot_training - Step 27724: {'lr': 0.00046364655925455094, 'samples': 14195200, 'steps': 27724, 'loss/train': 2.320746898651123} +03/04/2022 22:02:17 - INFO - codeparrot_training - Step 27725: {'lr': 0.0004636438033636303, 'samples': 14195712, 'steps': 27725, 'loss/train': 2.3675310611724854} +03/04/2022 22:02:19 - INFO - codeparrot_training - Skipping example with length 120 (seq_length=1024) +03/04/2022 22:02:22 - INFO - codeparrot_training - Step 27726: {'lr': 0.00046364104737644515, 'samples': 14196224, 'steps': 27726, 'loss/train': 1.060471534729004} +03/04/2022 22:02:25 - INFO - codeparrot_training - Step 27727: {'lr': 0.00046363829129299655, 'samples': 14196736, 'steps': 27727, 'loss/train': 1.4515392780303955} +03/04/2022 22:02:27 - INFO - codeparrot_training - Skipping example with length 801 (seq_length=1024) +03/04/2022 22:02:30 - INFO - codeparrot_training - Step 27728: {'lr': 0.0004636355351132859, 'samples': 14197248, 'steps': 27728, 'loss/train': 1.8292841911315918} +03/04/2022 22:02:34 - INFO - codeparrot_training - Step 27729: {'lr': 0.00046363277883731437, 'samples': 14197760, 'steps': 27729, 'loss/train': 1.8513379096984863} +03/04/2022 22:02:36 - INFO - codeparrot_training - Skipping example with length 386 (seq_length=1024) +03/04/2022 22:02:39 - INFO - codeparrot_training - Step 27730: {'lr': 0.0004636300224650831, 'samples': 14198272, 'steps': 27730, 'loss/train': 1.7789907455444336} +03/04/2022 22:02:42 - INFO - codeparrot_training - Step 27731: {'lr': 0.00046362726599659355, 'samples': 14198784, 'steps': 27731, 'loss/train': 0.8972790241241455} +03/04/2022 22:02:45 - INFO - codeparrot_training - Skipping example with length 963 (seq_length=1024) +03/04/2022 22:02:47 - INFO - codeparrot_training - Step 27732: {'lr': 0.0004636245094318468, 'samples': 14199296, 'steps': 27732, 'loss/train': 3.09785532951355} +03/04/2022 22:02:51 - INFO - codeparrot_training - Step 27733: {'lr': 0.0004636217527708442, 'samples': 14199808, 'steps': 27733, 'loss/train': 1.6478772163391113} +03/04/2022 22:02:53 - INFO - codeparrot_training - Skipping example with length 347 (seq_length=1024) +03/04/2022 22:02:56 - INFO - codeparrot_training - Step 27734: {'lr': 0.0004636189960135869, 'samples': 14200320, 'steps': 27734, 'loss/train': 1.8681186437606812} +03/04/2022 22:02:59 - INFO - codeparrot_training - Step 27735: {'lr': 0.0004636162391600761, 'samples': 14200832, 'steps': 27735, 'loss/train': 2.36531662940979} +03/04/2022 22:03:01 - INFO - codeparrot_training - Skipping example with length 248 (seq_length=1024) +03/04/2022 22:03:04 - INFO - codeparrot_training - Step 27736: {'lr': 0.00046361348221031316, 'samples': 14201344, 'steps': 27736, 'loss/train': 1.3374491930007935} +03/04/2022 22:03:07 - INFO - codeparrot_training - Step 27737: {'lr': 0.00046361072516429936, 'samples': 14201856, 'steps': 27737, 'loss/train': 1.3949568271636963} +03/04/2022 22:03:10 - INFO - codeparrot_training - Skipping example with length 296 (seq_length=1024) +03/04/2022 22:03:13 - INFO - codeparrot_training - Step 27738: {'lr': 0.0004636079680220358, 'samples': 14202368, 'steps': 27738, 'loss/train': 2.6667675971984863} +03/04/2022 22:03:16 - INFO - codeparrot_training - Step 27739: {'lr': 0.0004636052107835238, 'samples': 14202880, 'steps': 27739, 'loss/train': 1.5909504890441895} +03/04/2022 22:03:18 - INFO - codeparrot_training - Skipping example with length 559 (seq_length=1024) +03/04/2022 22:03:21 - INFO - codeparrot_training - Step 27740: {'lr': 0.0004636024534487646, 'samples': 14203392, 'steps': 27740, 'loss/train': 1.7761894464492798} +03/04/2022 22:03:24 - INFO - codeparrot_training - Step 27741: {'lr': 0.0004635996960177594, 'samples': 14203904, 'steps': 27741, 'loss/train': 1.5329194068908691} +03/04/2022 22:03:27 - INFO - codeparrot_training - Skipping example with length 759 (seq_length=1024) +03/04/2022 22:03:29 - INFO - codeparrot_training - Step 27742: {'lr': 0.0004635969384905095, 'samples': 14204416, 'steps': 27742, 'loss/train': 2.229836940765381} +03/04/2022 22:03:33 - INFO - codeparrot_training - Step 27743: {'lr': 0.0004635941808670161, 'samples': 14204928, 'steps': 27743, 'loss/train': 1.1372333765029907} +03/04/2022 22:03:35 - INFO - codeparrot_training - Skipping example with length 148 (seq_length=1024) +03/04/2022 22:03:38 - INFO - codeparrot_training - Step 27744: {'lr': 0.00046359142314728047, 'samples': 14205440, 'steps': 27744, 'loss/train': 1.7281755208969116} +03/04/2022 22:03:41 - INFO - codeparrot_training - Step 27745: {'lr': 0.00046358866533130385, 'samples': 14205952, 'steps': 27745, 'loss/train': 1.8930840492248535} +03/04/2022 22:03:43 - INFO - codeparrot_training - Skipping example with length 615 (seq_length=1024) +03/04/2022 22:03:46 - INFO - codeparrot_training - Step 27746: {'lr': 0.00046358590741908744, 'samples': 14206464, 'steps': 27746, 'loss/train': 1.3337597846984863} +03/04/2022 22:03:50 - INFO - codeparrot_training - Step 27747: {'lr': 0.0004635831494106325, 'samples': 14206976, 'steps': 27747, 'loss/train': 1.690032720565796} +03/04/2022 22:03:52 - INFO - codeparrot_training - Skipping example with length 188 (seq_length=1024) +03/04/2022 22:03:55 - INFO - codeparrot_training - Step 27748: {'lr': 0.0004635803913059404, 'samples': 14207488, 'steps': 27748, 'loss/train': 1.4430631399154663} +03/04/2022 22:03:58 - INFO - codeparrot_training - Step 27749: {'lr': 0.00046357763310501216, 'samples': 14208000, 'steps': 27749, 'loss/train': 1.9462181329727173} +03/04/2022 22:04:01 - INFO - codeparrot_training - Skipping example with length 126 (seq_length=1024) +03/04/2022 22:04:04 - INFO - codeparrot_training - Step 27750: {'lr': 0.0004635748748078492, 'samples': 14208512, 'steps': 27750, 'loss/train': 1.523032307624817} +03/04/2022 22:04:07 - INFO - codeparrot_training - Step 27751: {'lr': 0.0004635721164144526, 'samples': 14209024, 'steps': 27751, 'loss/train': 0.42103275656700134} +03/04/2022 22:04:10 - INFO - codeparrot_training - Step 27752: {'lr': 0.0004635693579248238, 'samples': 14209536, 'steps': 27752, 'loss/train': 2.098022699356079} +03/04/2022 22:04:10 - INFO - codeparrot_training - Skipping example with length 225 (seq_length=1024) +03/04/2022 22:04:15 - INFO - codeparrot_training - Step 27753: {'lr': 0.00046356659933896393, 'samples': 14210048, 'steps': 27753, 'loss/train': 2.3072338104248047} +03/04/2022 22:04:18 - INFO - codeparrot_training - Step 27754: {'lr': 0.0004635638406568742, 'samples': 14210560, 'steps': 27754, 'loss/train': 2.0574228763580322} +03/04/2022 22:04:19 - INFO - codeparrot_training - Skipping example with length 263 (seq_length=1024) +03/04/2022 22:04:24 - INFO - codeparrot_training - Step 27755: {'lr': 0.00046356108187855594, 'samples': 14211072, 'steps': 27755, 'loss/train': 1.469580888748169} +03/04/2022 22:04:27 - INFO - codeparrot_training - Step 27756: {'lr': 0.00046355832300401035, 'samples': 14211584, 'steps': 27756, 'loss/train': 2.01407790184021} +03/04/2022 22:04:27 - INFO - codeparrot_training - Skipping example with length 824 (seq_length=1024) +03/04/2022 22:04:32 - INFO - codeparrot_training - Step 27757: {'lr': 0.0004635555640332386, 'samples': 14212096, 'steps': 27757, 'loss/train': 2.295642614364624} +03/04/2022 22:04:35 - INFO - codeparrot_training - Step 27758: {'lr': 0.0004635528049662421, 'samples': 14212608, 'steps': 27758, 'loss/train': 1.5834499597549438} +03/04/2022 22:04:36 - INFO - codeparrot_training - Skipping example with length 868 (seq_length=1024) +03/04/2022 22:04:41 - INFO - codeparrot_training - Step 27759: {'lr': 0.000463550045803022, 'samples': 14213120, 'steps': 27759, 'loss/train': 2.163318634033203} +03/04/2022 22:04:44 - INFO - codeparrot_training - Step 27760: {'lr': 0.00046354728654357947, 'samples': 14213632, 'steps': 27760, 'loss/train': 0.6939167976379395} +03/04/2022 22:04:44 - INFO - codeparrot_training - Skipping example with length 764 (seq_length=1024) +03/04/2022 22:04:49 - INFO - codeparrot_training - Step 27761: {'lr': 0.00046354452718791586, 'samples': 14214144, 'steps': 27761, 'loss/train': 2.087846279144287} +03/04/2022 22:04:52 - INFO - codeparrot_training - Step 27762: {'lr': 0.0004635417677360324, 'samples': 14214656, 'steps': 27762, 'loss/train': 1.7547439336776733} +03/04/2022 22:04:53 - INFO - codeparrot_training - Skipping example with length 340 (seq_length=1024) +03/04/2022 22:04:57 - INFO - codeparrot_training - Step 27763: {'lr': 0.0004635390081879303, 'samples': 14215168, 'steps': 27763, 'loss/train': 1.6721590757369995} +03/04/2022 22:05:01 - INFO - codeparrot_training - Step 27764: {'lr': 0.0004635362485436109, 'samples': 14215680, 'steps': 27764, 'loss/train': 1.5610356330871582} +03/04/2022 22:05:01 - INFO - codeparrot_training - Skipping example with length 913 (seq_length=1024) +03/04/2022 22:05:06 - INFO - codeparrot_training - Step 27765: {'lr': 0.00046353348880307524, 'samples': 14216192, 'steps': 27765, 'loss/train': 2.0910394191741943} +03/04/2022 22:05:09 - INFO - codeparrot_training - Step 27766: {'lr': 0.0004635307289663248, 'samples': 14216704, 'steps': 27766, 'loss/train': 2.2547733783721924} +03/04/2022 22:05:10 - INFO - codeparrot_training - Skipping example with length 512 (seq_length=1024) +03/04/2022 22:05:15 - INFO - codeparrot_training - Step 27767: {'lr': 0.0004635279690333606, 'samples': 14217216, 'steps': 27767, 'loss/train': 1.8320846557617188} +03/04/2022 22:05:18 - INFO - codeparrot_training - Step 27768: {'lr': 0.00046352520900418403, 'samples': 14217728, 'steps': 27768, 'loss/train': 1.6263936758041382} +03/04/2022 22:05:18 - INFO - codeparrot_training - Skipping example with length 67 (seq_length=1024) +03/04/2022 22:05:23 - INFO - codeparrot_training - Step 27769: {'lr': 0.00046352244887879623, 'samples': 14218240, 'steps': 27769, 'loss/train': 2.0929505825042725} +03/04/2022 22:05:26 - INFO - codeparrot_training - Step 27770: {'lr': 0.0004635196886571986, 'samples': 14218752, 'steps': 27770, 'loss/train': 1.832887053489685} +03/04/2022 22:05:27 - INFO - codeparrot_training - Skipping example with length 438 (seq_length=1024) +03/04/2022 22:05:31 - INFO - codeparrot_training - Step 27771: {'lr': 0.0004635169283393923, 'samples': 14219264, 'steps': 27771, 'loss/train': 1.312677264213562} +03/04/2022 22:05:35 - INFO - codeparrot_training - Step 27772: {'lr': 0.0004635141679253785, 'samples': 14219776, 'steps': 27772, 'loss/train': 1.8295867443084717} +03/04/2022 22:05:35 - INFO - codeparrot_training - Skipping example with length 765 (seq_length=1024) +03/04/2022 22:05:40 - INFO - codeparrot_training - Step 27773: {'lr': 0.0004635114074151586, 'samples': 14220288, 'steps': 27773, 'loss/train': 2.1418542861938477} +03/04/2022 22:05:43 - INFO - codeparrot_training - Step 27774: {'lr': 0.00046350864680873375, 'samples': 14220800, 'steps': 27774, 'loss/train': 1.6271815299987793} +03/04/2022 22:05:43 - INFO - codeparrot_training - Skipping example with length 86 (seq_length=1024) +03/04/2022 22:05:48 - INFO - codeparrot_training - Step 27775: {'lr': 0.0004635058861061051, 'samples': 14221312, 'steps': 27775, 'loss/train': 2.012097120285034} +03/04/2022 22:05:52 - INFO - codeparrot_training - Step 27776: {'lr': 0.00046350312530727403, 'samples': 14221824, 'steps': 27776, 'loss/train': 0.13385742902755737} +03/04/2022 22:05:52 - INFO - codeparrot_training - Skipping example with length 10 (seq_length=1024) +03/04/2022 22:05:57 - INFO - codeparrot_training - Step 27777: {'lr': 0.00046350036441224175, 'samples': 14222336, 'steps': 27777, 'loss/train': 1.629122018814087} +03/04/2022 22:06:00 - INFO - codeparrot_training - Step 27778: {'lr': 0.00046349760342100955, 'samples': 14222848, 'steps': 27778, 'loss/train': 1.8312352895736694} +03/04/2022 22:06:00 - INFO - codeparrot_training - Skipping example with length 619 (seq_length=1024) +03/04/2022 22:06:05 - INFO - codeparrot_training - Step 27779: {'lr': 0.00046349484233357854, 'samples': 14223360, 'steps': 27779, 'loss/train': 1.6179144382476807} +03/04/2022 22:06:08 - INFO - codeparrot_training - Step 27780: {'lr': 0.0004634920811499501, 'samples': 14223872, 'steps': 27780, 'loss/train': 1.5531561374664307} +03/04/2022 22:06:08 - INFO - codeparrot_training - Skipping example with length 738 (seq_length=1024) +03/04/2022 22:06:14 - INFO - codeparrot_training - Step 27781: {'lr': 0.00046348931987012543, 'samples': 14224384, 'steps': 27781, 'loss/train': 1.8827059268951416} +03/04/2022 22:06:17 - INFO - codeparrot_training - Step 27782: {'lr': 0.00046348655849410577, 'samples': 14224896, 'steps': 27782, 'loss/train': 1.1866304874420166} +03/04/2022 22:06:18 - INFO - codeparrot_training - Skipping example with length 888 (seq_length=1024) +03/04/2022 22:06:22 - INFO - codeparrot_training - Step 27783: {'lr': 0.0004634837970218924, 'samples': 14225408, 'steps': 27783, 'loss/train': 2.3055338859558105} +03/04/2022 22:06:25 - INFO - codeparrot_training - Step 27784: {'lr': 0.0004634810354534864, 'samples': 14225920, 'steps': 27784, 'loss/train': 1.9569162130355835} +03/04/2022 22:06:26 - INFO - codeparrot_training - Skipping example with length 202 (seq_length=1024) +03/04/2022 22:06:31 - INFO - codeparrot_training - Step 27785: {'lr': 0.0004634782737888892, 'samples': 14226432, 'steps': 27785, 'loss/train': 1.464118242263794} +03/04/2022 22:06:34 - INFO - codeparrot_training - Step 27786: {'lr': 0.000463475512028102, 'samples': 14226944, 'steps': 27786, 'loss/train': 2.316864013671875} +03/04/2022 22:06:34 - INFO - codeparrot_training - Skipping example with length 268 (seq_length=1024) +03/04/2022 22:06:39 - INFO - codeparrot_training - Step 27787: {'lr': 0.000463472750171126, 'samples': 14227456, 'steps': 27787, 'loss/train': 1.9971297979354858} +03/04/2022 22:06:42 - INFO - codeparrot_training - Step 27788: {'lr': 0.0004634699882179625, 'samples': 14227968, 'steps': 27788, 'loss/train': 1.4182398319244385} +03/04/2022 22:06:43 - INFO - codeparrot_training - Skipping example with length 450 (seq_length=1024) +03/04/2022 22:06:48 - INFO - codeparrot_training - Step 27789: {'lr': 0.0004634672261686127, 'samples': 14228480, 'steps': 27789, 'loss/train': 1.9485887289047241} +03/04/2022 22:06:51 - INFO - codeparrot_training - Step 27790: {'lr': 0.0004634644640230779, 'samples': 14228992, 'steps': 27790, 'loss/train': 2.0548746585845947} +03/04/2022 22:06:51 - INFO - codeparrot_training - Skipping example with length 392 (seq_length=1024) +03/04/2022 22:06:56 - INFO - codeparrot_training - Step 27791: {'lr': 0.0004634617017813593, 'samples': 14229504, 'steps': 27791, 'loss/train': 1.986729383468628} +03/04/2022 22:06:59 - INFO - codeparrot_training - Step 27792: {'lr': 0.00046345893944345806, 'samples': 14230016, 'steps': 27792, 'loss/train': 2.2010934352874756} +03/04/2022 22:06:59 - INFO - codeparrot_training - Skipping example with length 613 (seq_length=1024) +03/04/2022 22:07:04 - INFO - codeparrot_training - Step 27793: {'lr': 0.00046345617700937564, 'samples': 14230528, 'steps': 27793, 'loss/train': 0.4626636803150177} +03/04/2022 22:07:08 - INFO - codeparrot_training - Step 27794: {'lr': 0.0004634534144791131, 'samples': 14231040, 'steps': 27794, 'loss/train': 1.7331457138061523} +03/04/2022 22:07:08 - INFO - codeparrot_training - Skipping example with length 284 (seq_length=1024) +03/04/2022 22:07:13 - INFO - codeparrot_training - Step 27795: {'lr': 0.0004634506518526718, 'samples': 14231552, 'steps': 27795, 'loss/train': 1.4106076955795288} +03/04/2022 22:07:16 - INFO - codeparrot_training - Step 27796: {'lr': 0.00046344788913005286, 'samples': 14232064, 'steps': 27796, 'loss/train': 1.3276640176773071} +03/04/2022 22:07:16 - INFO - codeparrot_training - Skipping example with length 452 (seq_length=1024) +03/04/2022 22:07:21 - INFO - codeparrot_training - Step 27797: {'lr': 0.00046344512631125756, 'samples': 14232576, 'steps': 27797, 'loss/train': 1.9046461582183838} +03/04/2022 22:07:24 - INFO - codeparrot_training - Step 27798: {'lr': 0.00046344236339628724, 'samples': 14233088, 'steps': 27798, 'loss/train': 1.5118050575256348} +03/04/2022 22:07:24 - INFO - codeparrot_training - Skipping example with length 635 (seq_length=1024) +03/04/2022 22:07:30 - INFO - codeparrot_training - Step 27799: {'lr': 0.0004634396003851431, 'samples': 14233600, 'steps': 27799, 'loss/train': 1.7403936386108398} +03/04/2022 22:07:33 - INFO - codeparrot_training - Step 27800: {'lr': 0.00046343683727782635, 'samples': 14234112, 'steps': 27800, 'loss/train': 1.4251739978790283} +03/04/2022 22:07:33 - INFO - codeparrot_training - Skipping example with length 700 (seq_length=1024) +03/04/2022 22:07:38 - INFO - codeparrot_training - Step 27801: {'lr': 0.0004634340740743382, 'samples': 14234624, 'steps': 27801, 'loss/train': 2.0048484802246094} +03/04/2022 22:07:41 - INFO - codeparrot_training - Step 27802: {'lr': 0.00046343131077468, 'samples': 14235136, 'steps': 27802, 'loss/train': 1.9860423803329468} +03/04/2022 22:07:41 - INFO - codeparrot_training - Skipping example with length 451 (seq_length=1024) +03/04/2022 22:07:47 - INFO - codeparrot_training - Step 27803: {'lr': 0.00046342854737885296, 'samples': 14235648, 'steps': 27803, 'loss/train': 2.3793885707855225} +03/04/2022 22:07:49 - INFO - codeparrot_training - Skipping example with length 281 (seq_length=1024) +03/04/2022 22:07:52 - INFO - codeparrot_training - Step 27804: {'lr': 0.00046342578388685837, 'samples': 14236160, 'steps': 27804, 'loss/train': 2.4320528507232666} +03/04/2022 22:07:55 - INFO - codeparrot_training - Step 27805: {'lr': 0.0004634230202986973, 'samples': 14236672, 'steps': 27805, 'loss/train': 1.7690705060958862} +03/04/2022 22:07:58 - INFO - codeparrot_training - Skipping example with length 241 (seq_length=1024) +03/04/2022 22:08:00 - INFO - codeparrot_training - Step 27806: {'lr': 0.0004634202566143712, 'samples': 14237184, 'steps': 27806, 'loss/train': 1.710745930671692} +03/04/2022 22:08:03 - INFO - codeparrot_training - Step 27807: {'lr': 0.00046341749283388117, 'samples': 14237696, 'steps': 27807, 'loss/train': 1.9012010097503662} +03/04/2022 22:08:06 - INFO - codeparrot_training - Skipping example with length 586 (seq_length=1024) +03/04/2022 22:08:09 - INFO - codeparrot_training - Step 27808: {'lr': 0.0004634147289572285, 'samples': 14238208, 'steps': 27808, 'loss/train': 1.891396164894104} +03/04/2022 22:08:12 - INFO - codeparrot_training - Step 27809: {'lr': 0.00046341196498441453, 'samples': 14238720, 'steps': 27809, 'loss/train': 1.6843780279159546} +03/04/2022 22:08:15 - INFO - codeparrot_training - Skipping example with length 364 (seq_length=1024) +03/04/2022 22:08:17 - INFO - codeparrot_training - Step 27810: {'lr': 0.0004634092009154403, 'samples': 14239232, 'steps': 27810, 'loss/train': 1.9456217288970947} +03/04/2022 22:08:20 - INFO - codeparrot_training - Step 27811: {'lr': 0.0004634064367503072, 'samples': 14239744, 'steps': 27811, 'loss/train': 1.508108377456665} +03/04/2022 22:08:23 - INFO - codeparrot_training - Skipping example with length 61 (seq_length=1024) +03/04/2022 22:08:26 - INFO - codeparrot_training - Step 27812: {'lr': 0.00046340367248901655, 'samples': 14240256, 'steps': 27812, 'loss/train': 1.7578274011611938} +03/04/2022 22:08:29 - INFO - codeparrot_training - Step 27813: {'lr': 0.00046340090813156944, 'samples': 14240768, 'steps': 27813, 'loss/train': 0.723953366279602} +03/04/2022 22:08:32 - INFO - codeparrot_training - Skipping example with length 710 (seq_length=1024) +03/04/2022 22:08:34 - INFO - codeparrot_training - Step 27814: {'lr': 0.00046339814367796716, 'samples': 14241280, 'steps': 27814, 'loss/train': 2.3181283473968506} +03/04/2022 22:08:37 - INFO - codeparrot_training - Step 27815: {'lr': 0.00046339537912821094, 'samples': 14241792, 'steps': 27815, 'loss/train': 1.6481131315231323} +03/04/2022 22:08:40 - INFO - codeparrot_training - Skipping example with length 253 (seq_length=1024) +03/04/2022 22:08:43 - INFO - codeparrot_training - Step 27816: {'lr': 0.0004633926144823022, 'samples': 14242304, 'steps': 27816, 'loss/train': 2.20108699798584} +03/04/2022 22:08:46 - INFO - codeparrot_training - Step 27817: {'lr': 0.0004633898497402419, 'samples': 14242816, 'steps': 27817, 'loss/train': 1.3461902141571045} +03/04/2022 22:08:49 - INFO - codeparrot_training - Step 27818: {'lr': 0.0004633870849020314, 'samples': 14243328, 'steps': 27818, 'loss/train': 2.112186908721924} +03/04/2022 22:08:49 - INFO - codeparrot_training - Skipping example with length 164 (seq_length=1024) +03/04/2022 22:08:54 - INFO - codeparrot_training - Step 27819: {'lr': 0.00046338431996767205, 'samples': 14243840, 'steps': 27819, 'loss/train': 1.6549814939498901} +03/04/2022 22:08:57 - INFO - codeparrot_training - Step 27820: {'lr': 0.00046338155493716503, 'samples': 14244352, 'steps': 27820, 'loss/train': 0.6535369157791138} +03/04/2022 22:08:58 - INFO - codeparrot_training - Skipping example with length 229 (seq_length=1024) +03/04/2022 22:09:03 - INFO - codeparrot_training - Step 27821: {'lr': 0.0004633787898105115, 'samples': 14244864, 'steps': 27821, 'loss/train': 1.411918044090271} +03/04/2022 22:09:06 - INFO - codeparrot_training - Step 27822: {'lr': 0.0004633760245877129, 'samples': 14245376, 'steps': 27822, 'loss/train': 1.8394396305084229} +03/04/2022 22:09:06 - INFO - codeparrot_training - Skipping example with length 892 (seq_length=1024) +03/04/2022 22:09:11 - INFO - codeparrot_training - Step 27823: {'lr': 0.0004633732592687703, 'samples': 14245888, 'steps': 27823, 'loss/train': 2.455958604812622} +03/04/2022 22:09:15 - INFO - codeparrot_training - Step 27824: {'lr': 0.00046337049385368495, 'samples': 14246400, 'steps': 27824, 'loss/train': 0.4692844748497009} +03/04/2022 22:09:15 - INFO - codeparrot_training - Skipping example with length 603 (seq_length=1024) +03/04/2022 22:09:20 - INFO - codeparrot_training - Step 27825: {'lr': 0.00046336772834245824, 'samples': 14246912, 'steps': 27825, 'loss/train': 1.6152745485305786} +03/04/2022 22:09:23 - INFO - codeparrot_training - Step 27826: {'lr': 0.0004633649627350912, 'samples': 14247424, 'steps': 27826, 'loss/train': 2.0641887187957764} +03/04/2022 22:09:23 - INFO - codeparrot_training - Skipping example with length 786 (seq_length=1024) +03/04/2022 22:09:28 - INFO - codeparrot_training - Step 27827: {'lr': 0.00046336219703158526, 'samples': 14247936, 'steps': 27827, 'loss/train': 1.6779061555862427} +03/04/2022 22:09:31 - INFO - codeparrot_training - Step 27828: {'lr': 0.00046335943123194164, 'samples': 14248448, 'steps': 27828, 'loss/train': 1.4329558610916138} +03/04/2022 22:09:31 - INFO - codeparrot_training - Skipping example with length 611 (seq_length=1024) +03/04/2022 22:09:37 - INFO - codeparrot_training - Step 27829: {'lr': 0.0004633566653361615, 'samples': 14248960, 'steps': 27829, 'loss/train': 3.0693910121917725} +03/04/2022 22:09:40 - INFO - codeparrot_training - Step 27830: {'lr': 0.0004633538993442462, 'samples': 14249472, 'steps': 27830, 'loss/train': 1.9775872230529785} +03/04/2022 22:09:40 - INFO - codeparrot_training - Skipping example with length 609 (seq_length=1024) +03/04/2022 22:09:45 - INFO - codeparrot_training - Step 27831: {'lr': 0.00046335113325619685, 'samples': 14249984, 'steps': 27831, 'loss/train': 2.1445913314819336} +03/04/2022 22:09:48 - INFO - codeparrot_training - Step 27832: {'lr': 0.00046334836707201486, 'samples': 14250496, 'steps': 27832, 'loss/train': 1.3260197639465332} +03/04/2022 22:09:48 - INFO - codeparrot_training - Skipping example with length 885 (seq_length=1024) +03/04/2022 22:09:53 - INFO - codeparrot_training - Step 27833: {'lr': 0.0004633456007917013, 'samples': 14251008, 'steps': 27833, 'loss/train': 1.877514123916626} +03/04/2022 22:09:57 - INFO - codeparrot_training - Step 27834: {'lr': 0.0004633428344152576, 'samples': 14251520, 'steps': 27834, 'loss/train': 1.5650569200515747} +03/04/2022 22:09:57 - INFO - codeparrot_training - Skipping example with length 543 (seq_length=1024) +03/04/2022 22:10:02 - INFO - codeparrot_training - Step 27835: {'lr': 0.0004633400679426848, 'samples': 14252032, 'steps': 27835, 'loss/train': 1.3776929378509521} +03/04/2022 22:10:05 - INFO - codeparrot_training - Step 27836: {'lr': 0.00046333730137398433, 'samples': 14252544, 'steps': 27836, 'loss/train': 2.5411412715911865} +03/04/2022 22:10:05 - INFO - codeparrot_training - Skipping example with length 142 (seq_length=1024) +03/04/2022 22:10:10 - INFO - codeparrot_training - Step 27837: {'lr': 0.00046333453470915736, 'samples': 14253056, 'steps': 27837, 'loss/train': 2.2302629947662354} +03/04/2022 22:10:13 - INFO - codeparrot_training - Step 27838: {'lr': 0.0004633317679482051, 'samples': 14253568, 'steps': 27838, 'loss/train': 1.519649863243103} +03/04/2022 22:10:14 - INFO - codeparrot_training - Skipping example with length 389 (seq_length=1024) +03/04/2022 22:10:19 - INFO - codeparrot_training - Step 27839: {'lr': 0.00046332900109112893, 'samples': 14254080, 'steps': 27839, 'loss/train': 1.549185037612915} +03/04/2022 22:10:22 - INFO - codeparrot_training - Step 27840: {'lr': 0.0004633262341379299, 'samples': 14254592, 'steps': 27840, 'loss/train': 0.859388530254364} +03/04/2022 22:10:22 - INFO - codeparrot_training - Skipping example with length 21 (seq_length=1024) +03/04/2022 22:10:27 - INFO - codeparrot_training - Step 27841: {'lr': 0.0004633234670886094, 'samples': 14255104, 'steps': 27841, 'loss/train': 1.855017066001892} +03/04/2022 22:10:30 - INFO - codeparrot_training - Step 27842: {'lr': 0.0004633206999431686, 'samples': 14255616, 'steps': 27842, 'loss/train': 1.7508885860443115} +03/04/2022 22:10:31 - INFO - codeparrot_training - Skipping example with length 144 (seq_length=1024) +03/04/2022 22:10:36 - INFO - codeparrot_training - Step 27843: {'lr': 0.00046331793270160885, 'samples': 14256128, 'steps': 27843, 'loss/train': 2.497023820877075} +03/04/2022 22:10:39 - INFO - codeparrot_training - Step 27844: {'lr': 0.0004633151653639314, 'samples': 14256640, 'steps': 27844, 'loss/train': 0.954716682434082} +03/04/2022 22:10:39 - INFO - codeparrot_training - Skipping example with length 217 (seq_length=1024) +03/04/2022 22:10:44 - INFO - codeparrot_training - Step 27845: {'lr': 0.00046331239793013726, 'samples': 14257152, 'steps': 27845, 'loss/train': 1.5120946168899536} +03/04/2022 22:10:47 - INFO - codeparrot_training - Step 27846: {'lr': 0.0004633096304002279, 'samples': 14257664, 'steps': 27846, 'loss/train': 2.254528760910034} +03/04/2022 22:10:47 - INFO - codeparrot_training - Skipping example with length 741 (seq_length=1024) +03/04/2022 22:10:53 - INFO - codeparrot_training - Step 27847: {'lr': 0.00046330686277420454, 'samples': 14258176, 'steps': 27847, 'loss/train': 2.151296615600586} +03/04/2022 22:10:56 - INFO - codeparrot_training - Step 27848: {'lr': 0.00046330409505206837, 'samples': 14258688, 'steps': 27848, 'loss/train': 2.572255849838257} +03/04/2022 22:10:56 - INFO - codeparrot_training - Skipping example with length 968 (seq_length=1024) +03/04/2022 22:11:01 - INFO - codeparrot_training - Step 27849: {'lr': 0.00046330132723382066, 'samples': 14259200, 'steps': 27849, 'loss/train': 1.9765233993530273} +03/04/2022 22:11:04 - INFO - codeparrot_training - Step 27850: {'lr': 0.0004632985593194627, 'samples': 14259712, 'steps': 27850, 'loss/train': 1.3368562459945679} +03/04/2022 22:11:04 - INFO - codeparrot_training - Skipping example with length 225 (seq_length=1024) +03/04/2022 22:11:10 - INFO - codeparrot_training - Step 27851: {'lr': 0.00046329579130899567, 'samples': 14260224, 'steps': 27851, 'loss/train': 1.6849944591522217} +03/04/2022 22:11:13 - INFO - codeparrot_training - Step 27852: {'lr': 0.0004632930232024209, 'samples': 14260736, 'steps': 27852, 'loss/train': 1.436474323272705} +03/04/2022 22:11:13 - INFO - codeparrot_training - Skipping example with length 735 (seq_length=1024) +03/04/2022 22:11:18 - INFO - codeparrot_training - Step 27853: {'lr': 0.0004632902549997395, 'samples': 14261248, 'steps': 27853, 'loss/train': 1.2919046878814697} +03/04/2022 22:11:21 - INFO - codeparrot_training - Step 27854: {'lr': 0.00046328748670095287, 'samples': 14261760, 'steps': 27854, 'loss/train': 2.773169994354248} +03/04/2022 22:11:21 - INFO - codeparrot_training - Skipping example with length 242 (seq_length=1024) +03/04/2022 22:11:27 - INFO - codeparrot_training - Step 27855: {'lr': 0.0004632847183060622, 'samples': 14262272, 'steps': 27855, 'loss/train': 1.354175090789795} +03/04/2022 22:11:30 - INFO - codeparrot_training - Skipping example with length 914 (seq_length=1024) +03/04/2022 22:11:32 - INFO - codeparrot_training - Step 27856: {'lr': 0.0004632819498150688, 'samples': 14262784, 'steps': 27856, 'loss/train': 1.643376350402832} +03/04/2022 22:11:35 - INFO - codeparrot_training - Step 27857: {'lr': 0.00046327918122797363, 'samples': 14263296, 'steps': 27857, 'loss/train': 1.3152709007263184} +03/04/2022 22:11:38 - INFO - codeparrot_training - Skipping example with length 222 (seq_length=1024) +03/04/2022 22:11:40 - INFO - codeparrot_training - Step 27858: {'lr': 0.00046327641254477833, 'samples': 14263808, 'steps': 27858, 'loss/train': 1.742732286453247} +03/04/2022 22:11:44 - INFO - codeparrot_training - Step 27859: {'lr': 0.00046327364376548384, 'samples': 14264320, 'steps': 27859, 'loss/train': 1.619951605796814} +03/04/2022 22:11:46 - INFO - codeparrot_training - Skipping example with length 978 (seq_length=1024) +03/04/2022 22:11:49 - INFO - codeparrot_training - Step 27860: {'lr': 0.0004632708748900917, 'samples': 14264832, 'steps': 27860, 'loss/train': 2.0265564918518066} +03/04/2022 22:11:52 - INFO - codeparrot_training - Step 27861: {'lr': 0.00046326810591860285, 'samples': 14265344, 'steps': 27861, 'loss/train': 1.6696511507034302} +03/04/2022 22:11:55 - INFO - codeparrot_training - Skipping example with length 836 (seq_length=1024) +03/04/2022 22:11:57 - INFO - codeparrot_training - Step 27862: {'lr': 0.0004632653368510187, 'samples': 14265856, 'steps': 27862, 'loss/train': 1.8721580505371094} +03/04/2022 22:12:01 - INFO - codeparrot_training - Step 27863: {'lr': 0.00046326256768734053, 'samples': 14266368, 'steps': 27863, 'loss/train': 1.9082244634628296} +03/04/2022 22:12:04 - INFO - codeparrot_training - Step 27864: {'lr': 0.0004632597984275695, 'samples': 14266880, 'steps': 27864, 'loss/train': 1.9762349128723145} +03/04/2022 22:12:05 - INFO - codeparrot_training - Skipping example with length 435 (seq_length=1024) +03/04/2022 22:12:09 - INFO - codeparrot_training - Step 27865: {'lr': 0.00046325702907170697, 'samples': 14267392, 'steps': 27865, 'loss/train': 1.7194844484329224} +03/04/2022 22:12:12 - INFO - codeparrot_training - Step 27866: {'lr': 0.000463254259619754, 'samples': 14267904, 'steps': 27866, 'loss/train': 2.0421030521392822} +03/04/2022 22:12:13 - INFO - codeparrot_training - Skipping example with length 839 (seq_length=1024) +03/04/2022 22:12:17 - INFO - codeparrot_training - Step 27867: {'lr': 0.000463251490071712, 'samples': 14268416, 'steps': 27867, 'loss/train': 1.0017389059066772} +03/04/2022 22:12:21 - INFO - codeparrot_training - Step 27868: {'lr': 0.0004632487204275822, 'samples': 14268928, 'steps': 27868, 'loss/train': 1.224450707435608} +03/04/2022 22:12:22 - INFO - codeparrot_training - Skipping example with length 845 (seq_length=1024) +03/04/2022 22:12:26 - INFO - codeparrot_training - Step 27869: {'lr': 0.0004632459506873658, 'samples': 14269440, 'steps': 27869, 'loss/train': 1.614784836769104} +03/04/2022 22:12:29 - INFO - codeparrot_training - Step 27870: {'lr': 0.0004632431808510641, 'samples': 14269952, 'steps': 27870, 'loss/train': 2.2245163917541504} +03/04/2022 22:12:30 - INFO - codeparrot_training - Skipping example with length 17 (seq_length=1024) +03/04/2022 22:12:34 - INFO - codeparrot_training - Step 27871: {'lr': 0.0004632404109186782, 'samples': 14270464, 'steps': 27871, 'loss/train': 2.0076591968536377} +03/04/2022 22:12:38 - INFO - codeparrot_training - Step 27872: {'lr': 0.0004632376408902096, 'samples': 14270976, 'steps': 27872, 'loss/train': 0.2839256525039673} +03/04/2022 22:12:39 - INFO - codeparrot_training - Skipping example with length 656 (seq_length=1024) +03/04/2022 22:12:43 - INFO - codeparrot_training - Step 27873: {'lr': 0.0004632348707656593, 'samples': 14271488, 'steps': 27873, 'loss/train': 1.963602066040039} +03/04/2022 22:12:46 - INFO - codeparrot_training - Step 27874: {'lr': 0.00046323210054502874, 'samples': 14272000, 'steps': 27874, 'loss/train': 1.855611801147461} +03/04/2022 22:12:47 - INFO - codeparrot_training - Skipping example with length 319 (seq_length=1024) +03/04/2022 22:12:51 - INFO - codeparrot_training - Step 27875: {'lr': 0.00046322933022831903, 'samples': 14272512, 'steps': 27875, 'loss/train': 1.9642767906188965} +03/04/2022 22:12:55 - INFO - codeparrot_training - Step 27876: {'lr': 0.0004632265598155315, 'samples': 14273024, 'steps': 27876, 'loss/train': 0.16419781744480133} +03/04/2022 22:12:56 - INFO - codeparrot_training - Skipping example with length 801 (seq_length=1024) +03/04/2022 22:13:00 - INFO - codeparrot_training - Step 27877: {'lr': 0.00046322378930666736, 'samples': 14273536, 'steps': 27877, 'loss/train': 2.0920801162719727} +03/04/2022 22:13:03 - INFO - codeparrot_training - Step 27878: {'lr': 0.0004632210187017278, 'samples': 14274048, 'steps': 27878, 'loss/train': 1.591761589050293} +03/04/2022 22:13:04 - INFO - codeparrot_training - Skipping example with length 491 (seq_length=1024) +03/04/2022 22:13:08 - INFO - codeparrot_training - Step 27879: {'lr': 0.00046321824800071425, 'samples': 14274560, 'steps': 27879, 'loss/train': 1.734160304069519} +03/04/2022 22:13:11 - INFO - codeparrot_training - Step 27880: {'lr': 0.0004632154772036279, 'samples': 14275072, 'steps': 27880, 'loss/train': 0.7569888234138489} +03/04/2022 22:13:12 - INFO - codeparrot_training - Skipping example with length 700 (seq_length=1024) +03/04/2022 22:13:17 - INFO - codeparrot_training - Step 27881: {'lr': 0.0004632127063104698, 'samples': 14275584, 'steps': 27881, 'loss/train': 2.0740513801574707} +03/04/2022 22:13:20 - INFO - codeparrot_training - Step 27882: {'lr': 0.00046320993532124137, 'samples': 14276096, 'steps': 27882, 'loss/train': 1.583892583847046} +03/04/2022 22:13:21 - INFO - codeparrot_training - Skipping example with length 865 (seq_length=1024) +03/04/2022 22:13:25 - INFO - codeparrot_training - Step 27883: {'lr': 0.0004632071642359439, 'samples': 14276608, 'steps': 27883, 'loss/train': 0.7008077502250671} +03/04/2022 22:13:28 - INFO - codeparrot_training - Step 27884: {'lr': 0.0004632043930545785, 'samples': 14277120, 'steps': 27884, 'loss/train': 2.089836597442627} +03/04/2022 22:13:29 - INFO - codeparrot_training - Skipping example with length 882 (seq_length=1024) +03/04/2022 22:13:34 - INFO - codeparrot_training - Step 27885: {'lr': 0.00046320162177714653, 'samples': 14277632, 'steps': 27885, 'loss/train': 1.4336007833480835} +03/04/2022 22:13:37 - INFO - codeparrot_training - Step 27886: {'lr': 0.00046319885040364925, 'samples': 14278144, 'steps': 27886, 'loss/train': 1.7551071643829346} +03/04/2022 22:13:38 - INFO - codeparrot_training - Skipping example with length 310 (seq_length=1024) +03/04/2022 22:13:42 - INFO - codeparrot_training - Step 27887: {'lr': 0.00046319607893408776, 'samples': 14278656, 'steps': 27887, 'loss/train': 2.4879469871520996} +03/04/2022 22:13:46 - INFO - codeparrot_training - Step 27888: {'lr': 0.0004631933073684635, 'samples': 14279168, 'steps': 27888, 'loss/train': 0.281965434551239} +03/04/2022 22:13:48 - INFO - codeparrot_training - Skipping example with length 137 (seq_length=1024) +03/04/2022 22:13:51 - INFO - codeparrot_training - Step 27889: {'lr': 0.00046319053570677754, 'samples': 14279680, 'steps': 27889, 'loss/train': 1.8241602182388306} +03/04/2022 22:13:54 - INFO - codeparrot_training - Step 27890: {'lr': 0.0004631877639490313, 'samples': 14280192, 'steps': 27890, 'loss/train': 1.5852327346801758} +03/04/2022 22:13:56 - INFO - codeparrot_training - Skipping example with length 932 (seq_length=1024) +03/04/2022 22:13:59 - INFO - codeparrot_training - Step 27891: {'lr': 0.0004631849920952259, 'samples': 14280704, 'steps': 27891, 'loss/train': 1.0881717205047607} +03/04/2022 22:14:02 - INFO - codeparrot_training - Step 27892: {'lr': 0.0004631822201453626, 'samples': 14281216, 'steps': 27892, 'loss/train': 1.511894702911377} +03/04/2022 22:14:05 - INFO - codeparrot_training - Skipping example with length 14 (seq_length=1024) +03/04/2022 22:14:08 - INFO - codeparrot_training - Step 27893: {'lr': 0.0004631794480994427, 'samples': 14281728, 'steps': 27893, 'loss/train': 2.1161115169525146} +03/04/2022 22:14:11 - INFO - codeparrot_training - Step 27894: {'lr': 0.0004631766759574675, 'samples': 14282240, 'steps': 27894, 'loss/train': 1.7565356492996216} +03/04/2022 22:14:13 - INFO - codeparrot_training - Skipping example with length 592 (seq_length=1024) +03/04/2022 22:14:16 - INFO - codeparrot_training - Step 27895: {'lr': 0.0004631739037194381, 'samples': 14282752, 'steps': 27895, 'loss/train': 2.4350175857543945} +03/04/2022 22:14:19 - INFO - codeparrot_training - Step 27896: {'lr': 0.00046317113138535584, 'samples': 14283264, 'steps': 27896, 'loss/train': 1.3406541347503662} +03/04/2022 22:14:22 - INFO - codeparrot_training - Skipping example with length 48 (seq_length=1024) +03/04/2022 22:14:25 - INFO - codeparrot_training - Step 27897: {'lr': 0.0004631683589552219, 'samples': 14283776, 'steps': 27897, 'loss/train': 0.17141465842723846} +03/04/2022 22:14:28 - INFO - codeparrot_training - Step 27898: {'lr': 0.00046316558642903774, 'samples': 14284288, 'steps': 27898, 'loss/train': 1.971772313117981} +03/04/2022 22:14:31 - INFO - codeparrot_training - Skipping example with length 836 (seq_length=1024) +03/04/2022 22:14:33 - INFO - codeparrot_training - Step 27899: {'lr': 0.0004631628138068043, 'samples': 14284800, 'steps': 27899, 'loss/train': 0.5769757628440857} +03/04/2022 22:14:36 - INFO - codeparrot_training - Step 27900: {'lr': 0.00046316004108852305, 'samples': 14285312, 'steps': 27900, 'loss/train': 1.3511649370193481} +03/04/2022 22:14:39 - INFO - codeparrot_training - Step 27901: {'lr': 0.0004631572682741952, 'samples': 14285824, 'steps': 27901, 'loss/train': 1.7435011863708496} +03/04/2022 22:14:39 - INFO - codeparrot_training - Skipping example with length 241 (seq_length=1024) +03/04/2022 22:14:45 - INFO - codeparrot_training - Step 27902: {'lr': 0.0004631544953638219, 'samples': 14286336, 'steps': 27902, 'loss/train': 0.2282589226961136} +03/04/2022 22:14:48 - INFO - codeparrot_training - Step 27903: {'lr': 0.00046315172235740455, 'samples': 14286848, 'steps': 27903, 'loss/train': 1.8253365755081177} +03/04/2022 22:14:48 - INFO - codeparrot_training - Skipping example with length 873 (seq_length=1024) +03/04/2022 22:14:53 - INFO - codeparrot_training - Step 27904: {'lr': 0.0004631489492549443, 'samples': 14287360, 'steps': 27904, 'loss/train': 1.9457635879516602} +03/04/2022 22:14:57 - INFO - codeparrot_training - Step 27905: {'lr': 0.00046314617605644243, 'samples': 14287872, 'steps': 27905, 'loss/train': 1.4312101602554321} +03/04/2022 22:14:57 - INFO - codeparrot_training - Skipping example with length 912 (seq_length=1024) +03/04/2022 22:15:02 - INFO - codeparrot_training - Step 27906: {'lr': 0.0004631434027619001, 'samples': 14288384, 'steps': 27906, 'loss/train': 1.775363564491272} +03/04/2022 22:15:05 - INFO - codeparrot_training - Step 27907: {'lr': 0.0004631406293713188, 'samples': 14288896, 'steps': 27907, 'loss/train': 2.2983882427215576} +03/04/2022 22:15:06 - INFO - codeparrot_training - Skipping example with length 655 (seq_length=1024) +03/04/2022 22:15:10 - INFO - codeparrot_training - Step 27908: {'lr': 0.0004631378558846995, 'samples': 14289408, 'steps': 27908, 'loss/train': 1.6185795068740845} +03/04/2022 22:15:14 - INFO - codeparrot_training - Step 27909: {'lr': 0.00046313508230204364, 'samples': 14289920, 'steps': 27909, 'loss/train': 1.8852150440216064} +03/04/2022 22:15:14 - INFO - codeparrot_training - Skipping example with length 413 (seq_length=1024) +03/04/2022 22:15:19 - INFO - codeparrot_training - Step 27910: {'lr': 0.00046313230862335235, 'samples': 14290432, 'steps': 27910, 'loss/train': 1.721232533454895} +03/04/2022 22:15:22 - INFO - codeparrot_training - Step 27911: {'lr': 0.000463129534848627, 'samples': 14290944, 'steps': 27911, 'loss/train': 1.9536317586898804} +03/04/2022 22:15:24 - INFO - codeparrot_training - Skipping example with length 465 (seq_length=1024) +03/04/2022 22:15:28 - INFO - codeparrot_training - Step 27912: {'lr': 0.0004631267609778687, 'samples': 14291456, 'steps': 27912, 'loss/train': 1.6025091409683228} +03/04/2022 22:15:31 - INFO - codeparrot_training - Step 27913: {'lr': 0.0004631239870110788, 'samples': 14291968, 'steps': 27913, 'loss/train': 2.11519193649292} +03/04/2022 22:15:33 - INFO - codeparrot_training - Skipping example with length 228 (seq_length=1024) +03/04/2022 22:15:36 - INFO - codeparrot_training - Step 27914: {'lr': 0.00046312121294825846, 'samples': 14292480, 'steps': 27914, 'loss/train': 2.1453258991241455} +03/04/2022 22:15:40 - INFO - codeparrot_training - Step 27915: {'lr': 0.00046311843878940904, 'samples': 14292992, 'steps': 27915, 'loss/train': 2.2039995193481445} +03/04/2022 22:15:41 - INFO - codeparrot_training - Skipping example with length 31 (seq_length=1024) +03/04/2022 22:15:45 - INFO - codeparrot_training - Step 27916: {'lr': 0.0004631156645345318, 'samples': 14293504, 'steps': 27916, 'loss/train': 1.5869899988174438} +03/04/2022 22:15:48 - INFO - codeparrot_training - Step 27917: {'lr': 0.0004631128901836278, 'samples': 14294016, 'steps': 27917, 'loss/train': 1.9840222597122192} +03/04/2022 22:15:50 - INFO - codeparrot_training - Skipping example with length 294 (seq_length=1024) +03/04/2022 22:15:53 - INFO - codeparrot_training - Step 27918: {'lr': 0.0004631101157366985, 'samples': 14294528, 'steps': 27918, 'loss/train': 2.6739704608917236} +03/04/2022 22:15:56 - INFO - codeparrot_training - Step 27919: {'lr': 0.0004631073411937451, 'samples': 14295040, 'steps': 27919, 'loss/train': 1.8380647897720337} +03/04/2022 22:15:58 - INFO - codeparrot_training - Skipping example with length 1017 (seq_length=1024) +03/04/2022 22:16:02 - INFO - codeparrot_training - Step 27920: {'lr': 0.00046310456655476875, 'samples': 14295552, 'steps': 27920, 'loss/train': 1.934631586074829} +03/04/2022 22:16:05 - INFO - codeparrot_training - Step 27921: {'lr': 0.0004631017918197709, 'samples': 14296064, 'steps': 27921, 'loss/train': 2.0217432975769043} +03/04/2022 22:16:06 - INFO - codeparrot_training - Skipping example with length 950 (seq_length=1024) +03/04/2022 22:16:10 - INFO - codeparrot_training - Step 27922: {'lr': 0.00046309901698875244, 'samples': 14296576, 'steps': 27922, 'loss/train': 1.850279688835144} +03/04/2022 22:16:13 - INFO - codeparrot_training - Step 27923: {'lr': 0.00046309624206171505, 'samples': 14297088, 'steps': 27923, 'loss/train': 0.8564593195915222} +03/04/2022 22:16:15 - INFO - codeparrot_training - Skipping example with length 247 (seq_length=1024) +03/04/2022 22:16:19 - INFO - codeparrot_training - Step 27924: {'lr': 0.00046309346703865973, 'samples': 14297600, 'steps': 27924, 'loss/train': 1.8524950742721558} +03/04/2022 22:16:22 - INFO - codeparrot_training - Step 27925: {'lr': 0.00046309069191958775, 'samples': 14298112, 'steps': 27925, 'loss/train': 1.1815844774246216} +03/04/2022 22:16:23 - INFO - codeparrot_training - Skipping example with length 429 (seq_length=1024) +03/04/2022 22:16:27 - INFO - codeparrot_training - Step 27926: {'lr': 0.00046308791670450033, 'samples': 14298624, 'steps': 27926, 'loss/train': 1.0284316539764404} +03/04/2022 22:16:30 - INFO - codeparrot_training - Step 27927: {'lr': 0.00046308514139339896, 'samples': 14299136, 'steps': 27927, 'loss/train': 1.700566053390503} +03/04/2022 22:16:32 - INFO - codeparrot_training - Skipping example with length 150 (seq_length=1024) +03/04/2022 22:16:35 - INFO - codeparrot_training - Step 27928: {'lr': 0.0004630823659862846, 'samples': 14299648, 'steps': 27928, 'loss/train': 2.0303456783294678} +03/04/2022 22:16:39 - INFO - codeparrot_training - Step 27929: {'lr': 0.0004630795904831586, 'samples': 14300160, 'steps': 27929, 'loss/train': 1.8341206312179565} +03/04/2022 22:16:40 - INFO - codeparrot_training - Skipping example with length 874 (seq_length=1024) +03/04/2022 22:16:44 - INFO - codeparrot_training - Step 27930: {'lr': 0.0004630768148840223, 'samples': 14300672, 'steps': 27930, 'loss/train': 0.9762017130851746} +03/04/2022 22:16:47 - INFO - codeparrot_training - Step 27931: {'lr': 0.0004630740391888768, 'samples': 14301184, 'steps': 27931, 'loss/train': 1.5248593091964722} +03/04/2022 22:16:49 - INFO - codeparrot_training - Skipping example with length 825 (seq_length=1024) +03/04/2022 22:16:52 - INFO - codeparrot_training - Step 27932: {'lr': 0.0004630712633977234, 'samples': 14301696, 'steps': 27932, 'loss/train': 0.806500256061554} +03/04/2022 22:16:56 - INFO - codeparrot_training - Step 27933: {'lr': 0.00046306848751056346, 'samples': 14302208, 'steps': 27933, 'loss/train': 1.718628168106079} +03/04/2022 22:16:57 - INFO - codeparrot_training - Skipping example with length 300 (seq_length=1024) +03/04/2022 22:17:01 - INFO - codeparrot_training - Step 27934: {'lr': 0.0004630657115273981, 'samples': 14302720, 'steps': 27934, 'loss/train': 1.8251714706420898} +03/04/2022 22:17:04 - INFO - codeparrot_training - Step 27935: {'lr': 0.0004630629354482286, 'samples': 14303232, 'steps': 27935, 'loss/train': 2.0446701049804688} +03/04/2022 22:17:05 - INFO - codeparrot_training - Skipping example with length 807 (seq_length=1024) +03/04/2022 22:17:09 - INFO - codeparrot_training - Step 27936: {'lr': 0.00046306015927305633, 'samples': 14303744, 'steps': 27936, 'loss/train': 2.2135467529296875} +03/04/2022 22:17:12 - INFO - codeparrot_training - Step 27937: {'lr': 0.0004630573830018824, 'samples': 14304256, 'steps': 27937, 'loss/train': 1.7606581449508667} +03/04/2022 22:17:13 - INFO - codeparrot_training - Skipping example with length 425 (seq_length=1024) +03/04/2022 22:17:18 - INFO - codeparrot_training - Step 27938: {'lr': 0.00046305460663470803, 'samples': 14304768, 'steps': 27938, 'loss/train': 2.1651854515075684} +03/04/2022 22:17:21 - INFO - codeparrot_training - Step 27939: {'lr': 0.0004630518301715346, 'samples': 14305280, 'steps': 27939, 'loss/train': 1.3201555013656616} +03/04/2022 22:17:22 - INFO - codeparrot_training - Skipping example with length 606 (seq_length=1024) +03/04/2022 22:17:26 - INFO - codeparrot_training - Step 27940: {'lr': 0.00046304905361236335, 'samples': 14305792, 'steps': 27940, 'loss/train': 1.4048999547958374} +03/04/2022 22:17:29 - INFO - codeparrot_training - Step 27941: {'lr': 0.00046304627695719535, 'samples': 14306304, 'steps': 27941, 'loss/train': 2.0570197105407715} +03/04/2022 22:17:30 - INFO - codeparrot_training - Skipping example with length 643 (seq_length=1024) +03/04/2022 22:17:35 - INFO - codeparrot_training - Step 27942: {'lr': 0.0004630435002060321, 'samples': 14306816, 'steps': 27942, 'loss/train': 2.0686562061309814} +03/04/2022 22:17:38 - INFO - codeparrot_training - Step 27943: {'lr': 0.0004630407233588747, 'samples': 14307328, 'steps': 27943, 'loss/train': 1.516229271888733} +03/04/2022 22:17:39 - INFO - codeparrot_training - Skipping example with length 199 (seq_length=1024) +03/04/2022 22:17:43 - INFO - codeparrot_training - Step 27944: {'lr': 0.00046303794641572445, 'samples': 14307840, 'steps': 27944, 'loss/train': 2.164670467376709} +03/04/2022 22:17:46 - INFO - codeparrot_training - Step 27945: {'lr': 0.0004630351693765825, 'samples': 14308352, 'steps': 27945, 'loss/train': 1.1335526704788208} +03/04/2022 22:17:47 - INFO - codeparrot_training - Skipping example with length 168 (seq_length=1024) +03/04/2022 22:17:52 - INFO - codeparrot_training - Step 27946: {'lr': 0.0004630323922414503, 'samples': 14308864, 'steps': 27946, 'loss/train': 1.6934776306152344} +03/04/2022 22:17:55 - INFO - codeparrot_training - Step 27947: {'lr': 0.00046302961501032896, 'samples': 14309376, 'steps': 27947, 'loss/train': 1.3476037979125977} +03/04/2022 22:17:55 - INFO - codeparrot_training - Skipping example with length 441 (seq_length=1024) +03/04/2022 22:18:00 - INFO - codeparrot_training - Step 27948: {'lr': 0.00046302683768321973, 'samples': 14309888, 'steps': 27948, 'loss/train': 1.55656099319458} +03/04/2022 22:18:03 - INFO - codeparrot_training - Step 27949: {'lr': 0.00046302406026012396, 'samples': 14310400, 'steps': 27949, 'loss/train': 0.8655235767364502} +03/04/2022 22:18:04 - INFO - codeparrot_training - Skipping example with length 468 (seq_length=1024) +03/04/2022 22:18:09 - INFO - codeparrot_training - Step 27950: {'lr': 0.0004630212827410428, 'samples': 14310912, 'steps': 27950, 'loss/train': 1.631001353263855} +03/04/2022 22:18:12 - INFO - codeparrot_training - Step 27951: {'lr': 0.00046301850512597755, 'samples': 14311424, 'steps': 27951, 'loss/train': 2.3786988258361816} +03/04/2022 22:18:12 - INFO - codeparrot_training - Skipping example with length 703 (seq_length=1024) +03/04/2022 22:18:17 - INFO - codeparrot_training - Step 27952: {'lr': 0.0004630157274149294, 'samples': 14311936, 'steps': 27952, 'loss/train': 1.6180237531661987} +03/04/2022 22:18:20 - INFO - codeparrot_training - Step 27953: {'lr': 0.0004630129496078997, 'samples': 14312448, 'steps': 27953, 'loss/train': 1.5433504581451416} +03/04/2022 22:18:21 - INFO - codeparrot_training - Skipping example with length 941 (seq_length=1024) +03/04/2022 22:18:26 - INFO - codeparrot_training - Step 27954: {'lr': 0.00046301017170488965, 'samples': 14312960, 'steps': 27954, 'loss/train': 2.174212694168091} +03/04/2022 22:18:29 - INFO - codeparrot_training - Step 27955: {'lr': 0.0004630073937059005, 'samples': 14313472, 'steps': 27955, 'loss/train': 1.6526682376861572} +03/04/2022 22:18:29 - INFO - codeparrot_training - Skipping example with length 367 (seq_length=1024) +03/04/2022 22:18:34 - INFO - codeparrot_training - Step 27956: {'lr': 0.0004630046156109334, 'samples': 14313984, 'steps': 27956, 'loss/train': 1.9537677764892578} +03/04/2022 22:18:37 - INFO - codeparrot_training - Step 27957: {'lr': 0.0004630018374199899, 'samples': 14314496, 'steps': 27957, 'loss/train': 1.1786617040634155} +03/04/2022 22:18:38 - INFO - codeparrot_training - Skipping example with length 374 (seq_length=1024) +03/04/2022 22:18:43 - INFO - codeparrot_training - Step 27958: {'lr': 0.00046299905913307096, 'samples': 14315008, 'steps': 27958, 'loss/train': 1.3849451541900635} +03/04/2022 22:18:46 - INFO - codeparrot_training - Step 27959: {'lr': 0.00046299628075017785, 'samples': 14315520, 'steps': 27959, 'loss/train': 2.0837199687957764} +03/04/2022 22:18:47 - INFO - codeparrot_training - Skipping example with length 784 (seq_length=1024) +03/04/2022 22:18:51 - INFO - codeparrot_training - Step 27960: {'lr': 0.000462993502271312, 'samples': 14316032, 'steps': 27960, 'loss/train': 1.7922285795211792} +03/04/2022 22:18:54 - INFO - codeparrot_training - Step 27961: {'lr': 0.00046299072369647453, 'samples': 14316544, 'steps': 27961, 'loss/train': 2.1117124557495117} +03/04/2022 22:18:56 - INFO - codeparrot_training - Skipping example with length 271 (seq_length=1024) +03/04/2022 22:19:00 - INFO - codeparrot_training - Step 27962: {'lr': 0.00046298794502566676, 'samples': 14317056, 'steps': 27962, 'loss/train': 1.7747284173965454} +03/04/2022 22:19:03 - INFO - codeparrot_training - Step 27963: {'lr': 0.0004629851662588899, 'samples': 14317568, 'steps': 27963, 'loss/train': 1.6693531274795532} +03/04/2022 22:19:04 - INFO - codeparrot_training - Skipping example with length 469 (seq_length=1024) +03/04/2022 22:19:08 - INFO - codeparrot_training - Step 27964: {'lr': 0.00046298238739614524, 'samples': 14318080, 'steps': 27964, 'loss/train': 0.9834689497947693} +03/04/2022 22:19:11 - INFO - codeparrot_training - Step 27965: {'lr': 0.0004629796084374339, 'samples': 14318592, 'steps': 27965, 'loss/train': 0.5830991268157959} +03/04/2022 22:19:13 - INFO - codeparrot_training - Skipping example with length 35 (seq_length=1024) +03/04/2022 22:19:16 - INFO - codeparrot_training - Step 27966: {'lr': 0.00046297682938275733, 'samples': 14319104, 'steps': 27966, 'loss/train': 1.0919314622879028} +03/04/2022 22:19:20 - INFO - codeparrot_training - Step 27967: {'lr': 0.0004629740502321167, 'samples': 14319616, 'steps': 27967, 'loss/train': 3.1301751136779785} +03/04/2022 22:19:22 - INFO - codeparrot_training - Skipping example with length 8 (seq_length=1024) +03/04/2022 22:19:25 - INFO - codeparrot_training - Step 27968: {'lr': 0.00046297127098551317, 'samples': 14320128, 'steps': 27968, 'loss/train': 2.1403682231903076} +03/04/2022 22:19:28 - INFO - codeparrot_training - Step 27969: {'lr': 0.00046296849164294816, 'samples': 14320640, 'steps': 27969, 'loss/train': 1.8543730974197388} +03/04/2022 22:19:30 - INFO - codeparrot_training - Skipping example with length 688 (seq_length=1024) +03/04/2022 22:19:34 - INFO - codeparrot_training - Step 27970: {'lr': 0.00046296571220442274, 'samples': 14321152, 'steps': 27970, 'loss/train': 0.5319598317146301} +03/04/2022 22:19:37 - INFO - codeparrot_training - Step 27971: {'lr': 0.00046296293266993833, 'samples': 14321664, 'steps': 27971, 'loss/train': 2.1653618812561035} +03/04/2022 22:19:39 - INFO - codeparrot_training - Skipping example with length 895 (seq_length=1024) +03/04/2022 22:19:42 - INFO - codeparrot_training - Step 27972: {'lr': 0.00046296015303949606, 'samples': 14322176, 'steps': 27972, 'loss/train': 2.0075464248657227} +03/04/2022 22:19:45 - INFO - codeparrot_training - Step 27973: {'lr': 0.0004629573733130973, 'samples': 14322688, 'steps': 27973, 'loss/train': 2.2769131660461426} +03/04/2022 22:19:47 - INFO - codeparrot_training - Skipping example with length 431 (seq_length=1024) +03/04/2022 22:19:50 - INFO - codeparrot_training - Step 27974: {'lr': 0.00046295459349074316, 'samples': 14323200, 'steps': 27974, 'loss/train': 1.8948166370391846} +03/04/2022 22:19:54 - INFO - codeparrot_training - Step 27975: {'lr': 0.000462951813572435, 'samples': 14323712, 'steps': 27975, 'loss/train': 2.2822437286376953} +03/04/2022 22:19:55 - INFO - codeparrot_training - Skipping example with length 207 (seq_length=1024) +03/04/2022 22:19:59 - INFO - codeparrot_training - Step 27976: {'lr': 0.00046294903355817397, 'samples': 14324224, 'steps': 27976, 'loss/train': 0.23006807267665863} +03/04/2022 22:20:02 - INFO - codeparrot_training - Step 27977: {'lr': 0.0004629462534479615, 'samples': 14324736, 'steps': 27977, 'loss/train': 2.0448102951049805} +03/04/2022 22:20:04 - INFO - codeparrot_training - Skipping example with length 314 (seq_length=1024) +03/04/2022 22:20:07 - INFO - codeparrot_training - Step 27978: {'lr': 0.0004629434732417986, 'samples': 14325248, 'steps': 27978, 'loss/train': 1.0648891925811768} +03/04/2022 22:20:11 - INFO - codeparrot_training - Step 27979: {'lr': 0.0004629406929396868, 'samples': 14325760, 'steps': 27979, 'loss/train': 2.147395610809326} +03/04/2022 22:20:12 - INFO - codeparrot_training - Skipping example with length 522 (seq_length=1024) +03/04/2022 22:20:17 - INFO - codeparrot_training - Step 27980: {'lr': 0.00046293791254162713, 'samples': 14326272, 'steps': 27980, 'loss/train': 1.997711181640625} +03/04/2022 22:20:20 - INFO - codeparrot_training - Step 27981: {'lr': 0.0004629351320476209, 'samples': 14326784, 'steps': 27981, 'loss/train': 1.9108020067214966} +03/04/2022 22:20:23 - INFO - codeparrot_training - Step 27982: {'lr': 0.00046293235145766955, 'samples': 14327296, 'steps': 27982, 'loss/train': 2.2130215167999268} +03/04/2022 22:20:25 - INFO - codeparrot_training - Skipping example with length 399 (seq_length=1024) +03/04/2022 22:20:29 - INFO - codeparrot_training - Step 27983: {'lr': 0.000462929570771774, 'samples': 14327808, 'steps': 27983, 'loss/train': 1.697169303894043} +03/04/2022 22:20:32 - INFO - codeparrot_training - Step 27984: {'lr': 0.0004629267899899358, 'samples': 14328320, 'steps': 27984, 'loss/train': 2.050527811050415} +03/04/2022 22:20:34 - INFO - codeparrot_training - Skipping example with length 278 (seq_length=1024) +03/04/2022 22:20:37 - INFO - codeparrot_training - Step 27985: {'lr': 0.00046292400911215594, 'samples': 14328832, 'steps': 27985, 'loss/train': 2.893350601196289} +03/04/2022 22:20:40 - INFO - codeparrot_training - Step 27986: {'lr': 0.00046292122813843586, 'samples': 14329344, 'steps': 27986, 'loss/train': 1.1114239692687988} +03/04/2022 22:20:43 - INFO - codeparrot_training - Skipping example with length 900 (seq_length=1024) +03/04/2022 22:20:46 - INFO - codeparrot_training - Step 27987: {'lr': 0.00046291844706877674, 'samples': 14329856, 'steps': 27987, 'loss/train': 1.4518760442733765} +03/04/2022 22:20:49 - INFO - codeparrot_training - Step 27988: {'lr': 0.0004629156659031799, 'samples': 14330368, 'steps': 27988, 'loss/train': 1.8703193664550781} +03/04/2022 22:20:52 - INFO - codeparrot_training - Step 27989: {'lr': 0.0004629128846416465, 'samples': 14330880, 'steps': 27989, 'loss/train': 0.9988330006599426} +03/04/2022 22:20:53 - INFO - codeparrot_training - Skipping example with length 194 (seq_length=1024) +03/04/2022 22:20:58 - INFO - codeparrot_training - Step 27990: {'lr': 0.00046291010328417784, 'samples': 14331392, 'steps': 27990, 'loss/train': 1.769507884979248} +03/04/2022 22:21:01 - INFO - codeparrot_training - Step 27991: {'lr': 0.0004629073218307752, 'samples': 14331904, 'steps': 27991, 'loss/train': 2.7357914447784424} +03/04/2022 22:21:01 - INFO - codeparrot_training - Skipping example with length 554 (seq_length=1024) +03/04/2022 22:21:06 - INFO - codeparrot_training - Step 27992: {'lr': 0.0004629045402814398, 'samples': 14332416, 'steps': 27992, 'loss/train': 1.180209755897522} +03/04/2022 22:21:09 - INFO - codeparrot_training - Step 27993: {'lr': 0.0004629017586361729, 'samples': 14332928, 'steps': 27993, 'loss/train': 1.7916556596755981} +03/04/2022 22:21:09 - INFO - codeparrot_training - Skipping example with length 665 (seq_length=1024) +03/04/2022 22:21:14 - INFO - codeparrot_training - Step 27994: {'lr': 0.0004628989768949757, 'samples': 14333440, 'steps': 27994, 'loss/train': 1.5045077800750732} +03/04/2022 22:21:17 - INFO - codeparrot_training - Step 27995: {'lr': 0.0004628961950578496, 'samples': 14333952, 'steps': 27995, 'loss/train': 1.9115160703659058} +03/04/2022 22:21:18 - INFO - codeparrot_training - Skipping example with length 154 (seq_length=1024) +03/04/2022 22:21:23 - INFO - codeparrot_training - Step 27996: {'lr': 0.00046289341312479574, 'samples': 14334464, 'steps': 27996, 'loss/train': 1.8505536317825317} +03/04/2022 22:21:26 - INFO - codeparrot_training - Step 27997: {'lr': 0.0004628906310958153, 'samples': 14334976, 'steps': 27997, 'loss/train': 1.650133728981018} +03/04/2022 22:21:26 - INFO - codeparrot_training - Skipping example with length 128 (seq_length=1024) +03/04/2022 22:21:31 - INFO - codeparrot_training - Step 27998: {'lr': 0.00046288784897090973, 'samples': 14335488, 'steps': 27998, 'loss/train': 0.48331236839294434} +03/04/2022 22:21:34 - INFO - codeparrot_training - Step 27999: {'lr': 0.00046288506675008014, 'samples': 14336000, 'steps': 27999, 'loss/train': 1.9386268854141235} +03/04/2022 22:21:34 - INFO - codeparrot_training - Skipping example with length 739 (seq_length=1024) +03/04/2022 22:21:40 - INFO - codeparrot_training - Step 28000: {'lr': 0.0004628822844333278, 'samples': 14336512, 'steps': 28000, 'loss/train': 2.1001925468444824} +03/04/2022 22:21:43 - INFO - codeparrot_training - Step 28001: {'lr': 0.0004628795020206541, 'samples': 14337024, 'steps': 28001, 'loss/train': 2.069309949874878} +03/04/2022 22:21:43 - INFO - codeparrot_training - Skipping example with length 14 (seq_length=1024) +03/04/2022 22:21:48 - INFO - codeparrot_training - Step 28002: {'lr': 0.00046287671951206004, 'samples': 14337536, 'steps': 28002, 'loss/train': 1.4137552976608276} +03/04/2022 22:21:51 - INFO - codeparrot_training - Step 28003: {'lr': 0.0004628739369075471, 'samples': 14338048, 'steps': 28003, 'loss/train': 2.2543540000915527} +03/04/2022 22:21:51 - INFO - codeparrot_training - Skipping example with length 343 (seq_length=1024) +03/04/2022 22:21:57 - INFO - codeparrot_training - Step 28004: {'lr': 0.00046287115420711643, 'samples': 14338560, 'steps': 28004, 'loss/train': 1.3900833129882812} +03/04/2022 22:22:02 - INFO - codeparrot_training - Step 28005: {'lr': 0.00046286837141076934, 'samples': 14339072, 'steps': 28005, 'loss/train': 1.8759207725524902} +03/04/2022 22:22:05 - INFO - codeparrot_training - Step 28006: {'lr': 0.0004628655885185069, 'samples': 14339584, 'steps': 28006, 'loss/train': 1.685986042022705} +03/04/2022 22:22:08 - INFO - codeparrot_training - Skipping example with length 154 (seq_length=1024) +03/04/2022 22:22:10 - INFO - codeparrot_training - Step 28007: {'lr': 0.00046286280553033067, 'samples': 14340096, 'steps': 28007, 'loss/train': 2.1517112255096436} +03/04/2022 22:22:14 - INFO - codeparrot_training - Step 28008: {'lr': 0.0004628600224462417, 'samples': 14340608, 'steps': 28008, 'loss/train': 1.8084824085235596} +03/04/2022 22:22:17 - INFO - codeparrot_training - Step 28009: {'lr': 0.00046285723926624126, 'samples': 14341120, 'steps': 28009, 'loss/train': 2.0590226650238037} +03/04/2022 22:22:17 - INFO - codeparrot_training - Skipping example with length 959 (seq_length=1024) +03/04/2022 22:22:22 - INFO - codeparrot_training - Step 28010: {'lr': 0.00046285445599033063, 'samples': 14341632, 'steps': 28010, 'loss/train': 2.651611089706421} +03/04/2022 22:22:25 - INFO - codeparrot_training - Step 28011: {'lr': 0.00046285167261851114, 'samples': 14342144, 'steps': 28011, 'loss/train': 1.8674241304397583} +03/04/2022 22:22:25 - INFO - codeparrot_training - Skipping example with length 680 (seq_length=1024) +03/04/2022 22:22:31 - INFO - codeparrot_training - Step 28012: {'lr': 0.00046284888915078384, 'samples': 14342656, 'steps': 28012, 'loss/train': 1.6005983352661133} +03/04/2022 22:22:34 - INFO - codeparrot_training - Step 28013: {'lr': 0.00046284610558715024, 'samples': 14343168, 'steps': 28013, 'loss/train': 1.6902517080307007} +03/04/2022 22:22:34 - INFO - codeparrot_training - Skipping example with length 1007 (seq_length=1024) +03/04/2022 22:22:39 - INFO - codeparrot_training - Step 28014: {'lr': 0.00046284332192761136, 'samples': 14343680, 'steps': 28014, 'loss/train': 2.5007622241973877} +03/04/2022 22:22:42 - INFO - codeparrot_training - Step 28015: {'lr': 0.0004628405381721686, 'samples': 14344192, 'steps': 28015, 'loss/train': 1.9219526052474976} +03/04/2022 22:22:42 - INFO - codeparrot_training - Skipping example with length 63 (seq_length=1024) +03/04/2022 22:22:48 - INFO - codeparrot_training - Step 28016: {'lr': 0.00046283775432082327, 'samples': 14344704, 'steps': 28016, 'loss/train': 1.7110927104949951} +03/04/2022 22:22:51 - INFO - codeparrot_training - Step 28017: {'lr': 0.0004628349703735765, 'samples': 14345216, 'steps': 28017, 'loss/train': 2.3022358417510986} +03/04/2022 22:22:51 - INFO - codeparrot_training - Skipping example with length 965 (seq_length=1024) +03/04/2022 22:22:56 - INFO - codeparrot_training - Step 28018: {'lr': 0.0004628321863304295, 'samples': 14345728, 'steps': 28018, 'loss/train': 2.0060653686523438} +03/04/2022 22:22:59 - INFO - codeparrot_training - Step 28019: {'lr': 0.00046282940219138366, 'samples': 14346240, 'steps': 28019, 'loss/train': 1.5071872472763062} +03/04/2022 22:22:59 - INFO - codeparrot_training - Skipping example with length 964 (seq_length=1024) +03/04/2022 22:23:05 - INFO - codeparrot_training - Step 28020: {'lr': 0.0004628266179564401, 'samples': 14346752, 'steps': 28020, 'loss/train': 0.5800417065620422} +03/04/2022 22:23:08 - INFO - codeparrot_training - Step 28021: {'lr': 0.0004628238336256002, 'samples': 14347264, 'steps': 28021, 'loss/train': 2.117781639099121} +03/04/2022 22:23:08 - INFO - codeparrot_training - Skipping example with length 393 (seq_length=1024) +03/04/2022 22:23:13 - INFO - codeparrot_training - Step 28022: {'lr': 0.0004628210491988652, 'samples': 14347776, 'steps': 28022, 'loss/train': 1.8395440578460693} +03/04/2022 22:23:16 - INFO - codeparrot_training - Step 28023: {'lr': 0.0004628182646762363, 'samples': 14348288, 'steps': 28023, 'loss/train': 2.2641303539276123} +03/04/2022 22:23:16 - INFO - codeparrot_training - Skipping example with length 209 (seq_length=1024) +03/04/2022 22:23:22 - INFO - codeparrot_training - Step 28024: {'lr': 0.00046281548005771476, 'samples': 14348800, 'steps': 28024, 'loss/train': 1.0367774963378906} +03/04/2022 22:23:25 - INFO - codeparrot_training - Step 28025: {'lr': 0.0004628126953433018, 'samples': 14349312, 'steps': 28025, 'loss/train': 1.7554303407669067} +03/04/2022 22:23:25 - INFO - codeparrot_training - Skipping example with length 901 (seq_length=1024) +03/04/2022 22:23:30 - INFO - codeparrot_training - Step 28026: {'lr': 0.00046280991053299883, 'samples': 14349824, 'steps': 28026, 'loss/train': 1.0367465019226074} +03/04/2022 22:23:33 - INFO - codeparrot_training - Step 28027: {'lr': 0.00046280712562680695, 'samples': 14350336, 'steps': 28027, 'loss/train': 1.6584105491638184} +03/04/2022 22:23:33 - INFO - codeparrot_training - Skipping example with length 909 (seq_length=1024) +03/04/2022 22:23:39 - INFO - codeparrot_training - Step 28028: {'lr': 0.0004628043406247274, 'samples': 14350848, 'steps': 28028, 'loss/train': 1.6952285766601562} +03/04/2022 22:23:42 - INFO - codeparrot_training - Step 28029: {'lr': 0.0004628015555267616, 'samples': 14351360, 'steps': 28029, 'loss/train': 2.2824454307556152} +03/04/2022 22:23:42 - INFO - codeparrot_training - Skipping example with length 276 (seq_length=1024) +03/04/2022 22:23:47 - INFO - codeparrot_training - Step 28030: {'lr': 0.00046279877033291063, 'samples': 14351872, 'steps': 28030, 'loss/train': 1.4954257011413574} +03/04/2022 22:23:51 - INFO - codeparrot_training - Step 28031: {'lr': 0.0004627959850431759, 'samples': 14352384, 'steps': 28031, 'loss/train': 1.7569440603256226} +03/04/2022 22:23:51 - INFO - codeparrot_training - Skipping example with length 857 (seq_length=1024) +03/04/2022 22:23:56 - INFO - codeparrot_training - Step 28032: {'lr': 0.0004627931996575585, 'samples': 14352896, 'steps': 28032, 'loss/train': 1.4409431219100952} +03/04/2022 22:23:59 - INFO - codeparrot_training - Step 28033: {'lr': 0.0004627904141760598, 'samples': 14353408, 'steps': 28033, 'loss/train': 1.2438899278640747} +03/04/2022 22:24:00 - INFO - codeparrot_training - Skipping example with length 310 (seq_length=1024) +03/04/2022 22:24:05 - INFO - codeparrot_training - Step 28034: {'lr': 0.000462787628598681, 'samples': 14353920, 'steps': 28034, 'loss/train': 1.4846700429916382} +03/04/2022 22:24:08 - INFO - codeparrot_training - Step 28035: {'lr': 0.00046278484292542346, 'samples': 14354432, 'steps': 28035, 'loss/train': 0.6266364455223083} +03/04/2022 22:24:09 - INFO - codeparrot_training - Skipping example with length 642 (seq_length=1024) +03/04/2022 22:24:13 - INFO - codeparrot_training - Step 28036: {'lr': 0.0004627820571562883, 'samples': 14354944, 'steps': 28036, 'loss/train': 2.7089147567749023} +03/04/2022 22:24:16 - INFO - codeparrot_training - Step 28037: {'lr': 0.0004627792712912768, 'samples': 14355456, 'steps': 28037, 'loss/train': 1.5414420366287231} +03/04/2022 22:24:17 - INFO - codeparrot_training - Skipping example with length 991 (seq_length=1024) +03/04/2022 22:24:22 - INFO - codeparrot_training - Step 28038: {'lr': 0.0004627764853303902, 'samples': 14355968, 'steps': 28038, 'loss/train': 1.9628666639328003} +03/04/2022 22:24:25 - INFO - codeparrot_training - Step 28039: {'lr': 0.00046277369927362987, 'samples': 14356480, 'steps': 28039, 'loss/train': 1.6780688762664795} +03/04/2022 22:24:26 - INFO - codeparrot_training - Skipping example with length 649 (seq_length=1024) +03/04/2022 22:24:30 - INFO - codeparrot_training - Step 28040: {'lr': 0.00046277091312099704, 'samples': 14356992, 'steps': 28040, 'loss/train': 1.309260606765747} +03/04/2022 22:24:33 - INFO - codeparrot_training - Step 28041: {'lr': 0.00046276812687249283, 'samples': 14357504, 'steps': 28041, 'loss/train': 1.8374944925308228} +03/04/2022 22:24:34 - INFO - codeparrot_training - Skipping example with length 990 (seq_length=1024) +03/04/2022 22:24:39 - INFO - codeparrot_training - Step 28042: {'lr': 0.00046276534052811863, 'samples': 14358016, 'steps': 28042, 'loss/train': 2.0515449047088623} +03/04/2022 22:24:42 - INFO - codeparrot_training - Step 28043: {'lr': 0.00046276255408787565, 'samples': 14358528, 'steps': 28043, 'loss/train': 1.48404061794281} +03/04/2022 22:24:43 - INFO - codeparrot_training - Skipping example with length 390 (seq_length=1024) +03/04/2022 22:24:47 - INFO - codeparrot_training - Step 28044: {'lr': 0.0004627597675517652, 'samples': 14359040, 'steps': 28044, 'loss/train': 1.839157223701477} +03/04/2022 22:24:50 - INFO - codeparrot_training - Step 28045: {'lr': 0.00046275698091978836, 'samples': 14359552, 'steps': 28045, 'loss/train': 2.3646225929260254} +03/04/2022 22:24:51 - INFO - codeparrot_training - Skipping example with length 676 (seq_length=1024) +03/04/2022 22:24:56 - INFO - codeparrot_training - Step 28046: {'lr': 0.0004627541941919466, 'samples': 14360064, 'steps': 28046, 'loss/train': 2.2540459632873535} +03/04/2022 22:24:59 - INFO - codeparrot_training - Step 28047: {'lr': 0.00046275140736824104, 'samples': 14360576, 'steps': 28047, 'loss/train': 0.4593677222728729} +03/04/2022 22:25:00 - INFO - codeparrot_training - Skipping example with length 732 (seq_length=1024) +03/04/2022 22:25:04 - INFO - codeparrot_training - Step 28048: {'lr': 0.000462748620448673, 'samples': 14361088, 'steps': 28048, 'loss/train': 1.189672589302063} +03/04/2022 22:25:07 - INFO - codeparrot_training - Step 28049: {'lr': 0.0004627458334332437, 'samples': 14361600, 'steps': 28049, 'loss/train': 1.681689739227295} +03/04/2022 22:25:08 - INFO - codeparrot_training - Skipping example with length 170 (seq_length=1024) +03/04/2022 22:25:12 - INFO - codeparrot_training - Step 28050: {'lr': 0.0004627430463219544, 'samples': 14362112, 'steps': 28050, 'loss/train': 1.5495625734329224} +03/04/2022 22:25:16 - INFO - codeparrot_training - Step 28051: {'lr': 0.0004627402591148064, 'samples': 14362624, 'steps': 28051, 'loss/train': 2.1977484226226807} +03/04/2022 22:25:17 - INFO - codeparrot_training - Skipping example with length 704 (seq_length=1024) +03/04/2022 22:25:21 - INFO - codeparrot_training - Step 28052: {'lr': 0.0004627374718118009, 'samples': 14363136, 'steps': 28052, 'loss/train': 3.241132974624634} +03/04/2022 22:25:24 - INFO - codeparrot_training - Step 28053: {'lr': 0.0004627346844129392, 'samples': 14363648, 'steps': 28053, 'loss/train': 1.9198987483978271} +03/04/2022 22:25:25 - INFO - codeparrot_training - Skipping example with length 775 (seq_length=1024) +03/04/2022 22:25:30 - INFO - codeparrot_training - Step 28054: {'lr': 0.0004627318969182225, 'samples': 14364160, 'steps': 28054, 'loss/train': 2.144991874694824} +03/04/2022 22:25:33 - INFO - codeparrot_training - Step 28055: {'lr': 0.0004627291093276521, 'samples': 14364672, 'steps': 28055, 'loss/train': 1.6902788877487183} +03/04/2022 22:25:34 - INFO - codeparrot_training - Skipping example with length 922 (seq_length=1024) +03/04/2022 22:25:38 - INFO - codeparrot_training - Step 28056: {'lr': 0.0004627263216412292, 'samples': 14365184, 'steps': 28056, 'loss/train': 1.9141831398010254} +03/04/2022 22:25:41 - INFO - codeparrot_training - Step 28057: {'lr': 0.00046272353385895515, 'samples': 14365696, 'steps': 28057, 'loss/train': 2.4049510955810547} +03/04/2022 22:25:42 - INFO - codeparrot_training - Skipping example with length 752 (seq_length=1024) +03/04/2022 22:25:46 - INFO - codeparrot_training - Step 28058: {'lr': 0.0004627207459808312, 'samples': 14366208, 'steps': 28058, 'loss/train': 1.802938461303711} +03/04/2022 22:25:50 - INFO - codeparrot_training - Step 28059: {'lr': 0.00046271795800685854, 'samples': 14366720, 'steps': 28059, 'loss/train': 1.8242777585983276} +03/04/2022 22:25:51 - INFO - codeparrot_training - Skipping example with length 730 (seq_length=1024) +03/04/2022 22:25:55 - INFO - codeparrot_training - Step 28060: {'lr': 0.00046271516993703844, 'samples': 14367232, 'steps': 28060, 'loss/train': 1.6778475046157837} +03/04/2022 22:25:58 - INFO - codeparrot_training - Step 28061: {'lr': 0.00046271238177137216, 'samples': 14367744, 'steps': 28061, 'loss/train': 2.5240142345428467} +03/04/2022 22:25:59 - INFO - codeparrot_training - Skipping example with length 689 (seq_length=1024) +03/04/2022 22:26:03 - INFO - codeparrot_training - Step 28062: {'lr': 0.00046270959350986095, 'samples': 14368256, 'steps': 28062, 'loss/train': 1.4100782871246338} +03/04/2022 22:26:06 - INFO - codeparrot_training - Step 28063: {'lr': 0.0004627068051525061, 'samples': 14368768, 'steps': 28063, 'loss/train': 0.25495561957359314} +03/04/2022 22:26:08 - INFO - codeparrot_training - Skipping example with length 54 (seq_length=1024) +03/04/2022 22:26:12 - INFO - codeparrot_training - Step 28064: {'lr': 0.00046270401669930885, 'samples': 14369280, 'steps': 28064, 'loss/train': 2.4806411266326904} +03/04/2022 22:26:15 - INFO - codeparrot_training - Step 28065: {'lr': 0.0004627012281502704, 'samples': 14369792, 'steps': 28065, 'loss/train': 1.7448320388793945} +03/04/2022 22:26:16 - INFO - codeparrot_training - Skipping example with length 72 (seq_length=1024) +03/04/2022 22:26:20 - INFO - codeparrot_training - Step 28066: {'lr': 0.00046269843950539214, 'samples': 14370304, 'steps': 28066, 'loss/train': 2.0208756923675537} +03/04/2022 22:26:23 - INFO - codeparrot_training - Step 28067: {'lr': 0.00046269565076467517, 'samples': 14370816, 'steps': 28067, 'loss/train': 1.8965331315994263} +03/04/2022 22:26:25 - INFO - codeparrot_training - Skipping example with length 782 (seq_length=1024) +03/04/2022 22:26:29 - INFO - codeparrot_training - Step 28068: {'lr': 0.0004626928619281209, 'samples': 14371328, 'steps': 28068, 'loss/train': 1.8070625066757202} +03/04/2022 22:26:32 - INFO - codeparrot_training - Step 28069: {'lr': 0.0004626900729957305, 'samples': 14371840, 'steps': 28069, 'loss/train': 1.7898110151290894} +03/04/2022 22:26:33 - INFO - codeparrot_training - Skipping example with length 30 (seq_length=1024) +03/04/2022 22:26:37 - INFO - codeparrot_training - Step 28070: {'lr': 0.00046268728396750515, 'samples': 14372352, 'steps': 28070, 'loss/train': 2.071082830429077} +03/04/2022 22:26:40 - INFO - codeparrot_training - Step 28071: {'lr': 0.0004626844948434462, 'samples': 14372864, 'steps': 28071, 'loss/train': 1.9518851041793823} +03/04/2022 22:26:42 - INFO - codeparrot_training - Skipping example with length 87 (seq_length=1024) +03/04/2022 22:26:46 - INFO - codeparrot_training - Step 28072: {'lr': 0.00046268170562355497, 'samples': 14373376, 'steps': 28072, 'loss/train': 1.5169181823730469} +03/04/2022 22:26:49 - INFO - codeparrot_training - Step 28073: {'lr': 0.0004626789163078327, 'samples': 14373888, 'steps': 28073, 'loss/train': 2.2166080474853516} +03/04/2022 22:26:52 - INFO - codeparrot_training - Skipping example with length 204 (seq_length=1024) +03/04/2022 22:26:54 - INFO - codeparrot_training - Step 28074: {'lr': 0.00046267612689628046, 'samples': 14374400, 'steps': 28074, 'loss/train': 6.40658712387085} +03/04/2022 22:26:57 - INFO - codeparrot_training - Step 28075: {'lr': 0.00046267333738889973, 'samples': 14374912, 'steps': 28075, 'loss/train': 1.5569745302200317} +03/04/2022 22:27:01 - INFO - codeparrot_training - Step 28076: {'lr': 0.00046267054778569163, 'samples': 14375424, 'steps': 28076, 'loss/train': 2.055246353149414} +03/04/2022 22:27:01 - INFO - codeparrot_training - Skipping example with length 280 (seq_length=1024) +03/04/2022 22:27:06 - INFO - codeparrot_training - Step 28077: {'lr': 0.0004626677580866574, 'samples': 14375936, 'steps': 28077, 'loss/train': 1.1890004873275757} +03/04/2022 22:27:09 - INFO - codeparrot_training - Step 28078: {'lr': 0.00046266496829179847, 'samples': 14376448, 'steps': 28078, 'loss/train': 1.3166096210479736} +03/04/2022 22:27:09 - INFO - codeparrot_training - Skipping example with length 730 (seq_length=1024) +03/04/2022 22:27:14 - INFO - codeparrot_training - Step 28079: {'lr': 0.0004626621784011159, 'samples': 14376960, 'steps': 28079, 'loss/train': 2.110304832458496} +03/04/2022 22:27:18 - INFO - codeparrot_training - Step 28080: {'lr': 0.0004626593884146111, 'samples': 14377472, 'steps': 28080, 'loss/train': 2.522806167602539} +03/04/2022 22:27:18 - INFO - codeparrot_training - Skipping example with length 632 (seq_length=1024) +03/04/2022 22:27:23 - INFO - codeparrot_training - Step 28081: {'lr': 0.00046265659833228523, 'samples': 14377984, 'steps': 28081, 'loss/train': 1.8437256813049316} +03/04/2022 22:27:26 - INFO - codeparrot_training - Skipping example with length 834 (seq_length=1024) +03/04/2022 22:27:28 - INFO - codeparrot_training - Step 28082: {'lr': 0.0004626538081541396, 'samples': 14378496, 'steps': 28082, 'loss/train': 1.475285530090332} +03/04/2022 22:27:31 - INFO - codeparrot_training - Step 28083: {'lr': 0.00046265101788017543, 'samples': 14379008, 'steps': 28083, 'loss/train': 2.596146583557129} +03/04/2022 22:27:34 - INFO - codeparrot_training - Step 28084: {'lr': 0.00046264822751039406, 'samples': 14379520, 'steps': 28084, 'loss/train': 2.2042605876922607} +03/04/2022 22:27:35 - INFO - codeparrot_training - Skipping example with length 710 (seq_length=1024) +03/04/2022 22:27:40 - INFO - codeparrot_training - Step 28085: {'lr': 0.00046264543704479654, 'samples': 14380032, 'steps': 28085, 'loss/train': 1.8914812803268433} +03/04/2022 22:27:43 - INFO - codeparrot_training - Step 28086: {'lr': 0.0004626426464833844, 'samples': 14380544, 'steps': 28086, 'loss/train': 2.158964157104492} +03/04/2022 22:27:43 - INFO - codeparrot_training - Skipping example with length 571 (seq_length=1024) +03/04/2022 22:27:48 - INFO - codeparrot_training - Step 28087: {'lr': 0.0004626398558261586, 'samples': 14381056, 'steps': 28087, 'loss/train': 1.6674350500106812} +03/04/2022 22:27:51 - INFO - codeparrot_training - Skipping example with length 739 (seq_length=1024) +03/04/2022 22:27:54 - INFO - codeparrot_training - Step 28088: {'lr': 0.00046263706507312073, 'samples': 14381568, 'steps': 28088, 'loss/train': 2.0252678394317627} +03/04/2022 22:27:57 - INFO - codeparrot_training - Step 28089: {'lr': 0.00046263427422427183, 'samples': 14382080, 'steps': 28089, 'loss/train': 1.9319151639938354} +03/04/2022 22:28:00 - INFO - codeparrot_training - Step 28090: {'lr': 0.00046263148327961324, 'samples': 14382592, 'steps': 28090, 'loss/train': 1.2417149543762207} +03/04/2022 22:28:01 - INFO - codeparrot_training - Skipping example with length 587 (seq_length=1024) +03/04/2022 22:28:05 - INFO - codeparrot_training - Step 28091: {'lr': 0.00046262869223914613, 'samples': 14383104, 'steps': 28091, 'loss/train': 1.5222437381744385} +03/04/2022 22:28:08 - INFO - codeparrot_training - Step 28092: {'lr': 0.00046262590110287183, 'samples': 14383616, 'steps': 28092, 'loss/train': 1.9832857847213745} +03/04/2022 22:28:09 - INFO - codeparrot_training - Skipping example with length 859 (seq_length=1024) +03/04/2022 22:28:14 - INFO - codeparrot_training - Step 28093: {'lr': 0.00046262310987079156, 'samples': 14384128, 'steps': 28093, 'loss/train': 2.2961349487304688} +03/04/2022 22:28:17 - INFO - codeparrot_training - Step 28094: {'lr': 0.0004626203185429066, 'samples': 14384640, 'steps': 28094, 'loss/train': 0.6584925651550293} +03/04/2022 22:28:17 - INFO - codeparrot_training - Skipping example with length 592 (seq_length=1024) +03/04/2022 22:28:22 - INFO - codeparrot_training - Step 28095: {'lr': 0.00046261752711921825, 'samples': 14385152, 'steps': 28095, 'loss/train': 2.265442132949829} +03/04/2022 22:28:25 - INFO - codeparrot_training - Step 28096: {'lr': 0.00046261473559972764, 'samples': 14385664, 'steps': 28096, 'loss/train': 1.9483779668807983} +03/04/2022 22:28:26 - INFO - codeparrot_training - Skipping example with length 1022 (seq_length=1024) +03/04/2022 22:28:31 - INFO - codeparrot_training - Step 28097: {'lr': 0.00046261194398443617, 'samples': 14386176, 'steps': 28097, 'loss/train': 2.2661020755767822} +03/04/2022 22:28:34 - INFO - codeparrot_training - Step 28098: {'lr': 0.00046260915227334503, 'samples': 14386688, 'steps': 28098, 'loss/train': 1.6153074502944946} +03/04/2022 22:28:34 - INFO - codeparrot_training - Skipping example with length 855 (seq_length=1024) +03/04/2022 22:28:39 - INFO - codeparrot_training - Step 28099: {'lr': 0.0004626063604664555, 'samples': 14387200, 'steps': 28099, 'loss/train': 1.4579943418502808} +03/04/2022 22:28:42 - INFO - codeparrot_training - Step 28100: {'lr': 0.00046260356856376884, 'samples': 14387712, 'steps': 28100, 'loss/train': 2.0567591190338135} +03/04/2022 22:28:42 - INFO - codeparrot_training - Skipping example with length 384 (seq_length=1024) +03/04/2022 22:28:48 - INFO - codeparrot_training - Step 28101: {'lr': 0.0004626007765652862, 'samples': 14388224, 'steps': 28101, 'loss/train': 2.019024610519409} +03/04/2022 22:28:51 - INFO - codeparrot_training - Step 28102: {'lr': 0.00046259798447100903, 'samples': 14388736, 'steps': 28102, 'loss/train': 2.5934672355651855} +03/04/2022 22:28:51 - INFO - codeparrot_training - Skipping example with length 198 (seq_length=1024) +03/04/2022 22:28:56 - INFO - codeparrot_training - Step 28103: {'lr': 0.0004625951922809385, 'samples': 14389248, 'steps': 28103, 'loss/train': 1.6816918849945068} +03/04/2022 22:28:59 - INFO - codeparrot_training - Step 28104: {'lr': 0.0004625923999950758, 'samples': 14389760, 'steps': 28104, 'loss/train': 2.3240773677825928} +03/04/2022 22:28:59 - INFO - codeparrot_training - Skipping example with length 903 (seq_length=1024) +03/04/2022 22:29:05 - INFO - codeparrot_training - Step 28105: {'lr': 0.0004625896076134222, 'samples': 14390272, 'steps': 28105, 'loss/train': 1.3406025171279907} +03/04/2022 22:29:08 - INFO - codeparrot_training - Step 28106: {'lr': 0.00046258681513597913, 'samples': 14390784, 'steps': 28106, 'loss/train': 2.169384479522705} +03/04/2022 22:29:11 - INFO - codeparrot_training - Skipping example with length 470 (seq_length=1024) +03/04/2022 22:29:13 - INFO - codeparrot_training - Step 28107: {'lr': 0.0004625840225627476, 'samples': 14391296, 'steps': 28107, 'loss/train': 1.4953354597091675} +03/04/2022 22:29:17 - INFO - codeparrot_training - Step 28108: {'lr': 0.0004625812298937291, 'samples': 14391808, 'steps': 28108, 'loss/train': 2.611284017562866} +03/04/2022 22:29:19 - INFO - codeparrot_training - Skipping example with length 851 (seq_length=1024) +03/04/2022 22:29:22 - INFO - codeparrot_training - Step 28109: {'lr': 0.0004625784371289247, 'samples': 14392320, 'steps': 28109, 'loss/train': 1.0776478052139282} +03/04/2022 22:29:25 - INFO - codeparrot_training - Step 28110: {'lr': 0.00046257564426833574, 'samples': 14392832, 'steps': 28110, 'loss/train': 1.6356031894683838} +03/04/2022 22:29:27 - INFO - codeparrot_training - Skipping example with length 897 (seq_length=1024) +03/04/2022 22:29:30 - INFO - codeparrot_training - Step 28111: {'lr': 0.0004625728513119635, 'samples': 14393344, 'steps': 28111, 'loss/train': 1.6882630586624146} +03/04/2022 22:29:33 - INFO - codeparrot_training - Step 28112: {'lr': 0.0004625700582598092, 'samples': 14393856, 'steps': 28112, 'loss/train': 3.3646106719970703} +03/04/2022 22:29:36 - INFO - codeparrot_training - Skipping example with length 756 (seq_length=1024) +03/04/2022 22:29:39 - INFO - codeparrot_training - Step 28113: {'lr': 0.00046256726511187407, 'samples': 14394368, 'steps': 28113, 'loss/train': 2.0795812606811523} +03/04/2022 22:29:42 - INFO - codeparrot_training - Step 28114: {'lr': 0.0004625644718681595, 'samples': 14394880, 'steps': 28114, 'loss/train': 2.007127523422241} +03/04/2022 22:29:44 - INFO - codeparrot_training - Skipping example with length 58 (seq_length=1024) +03/04/2022 22:29:47 - INFO - codeparrot_training - Step 28115: {'lr': 0.0004625616785286666, 'samples': 14395392, 'steps': 28115, 'loss/train': 1.6462960243225098} +03/04/2022 22:29:50 - INFO - codeparrot_training - Step 28116: {'lr': 0.0004625588850933967, 'samples': 14395904, 'steps': 28116, 'loss/train': 1.7275465726852417} +03/04/2022 22:29:53 - INFO - codeparrot_training - Skipping example with length 514 (seq_length=1024) +03/04/2022 22:29:56 - INFO - codeparrot_training - Step 28117: {'lr': 0.00046255609156235105, 'samples': 14396416, 'steps': 28117, 'loss/train': 1.7358933687210083} +03/04/2022 22:29:59 - INFO - codeparrot_training - Step 28118: {'lr': 0.0004625532979355309, 'samples': 14396928, 'steps': 28118, 'loss/train': 1.6556073427200317} +03/04/2022 22:30:01 - INFO - codeparrot_training - Skipping example with length 286 (seq_length=1024) +03/04/2022 22:30:04 - INFO - codeparrot_training - Step 28119: {'lr': 0.00046255050421293756, 'samples': 14397440, 'steps': 28119, 'loss/train': 1.7418181896209717} +03/04/2022 22:30:07 - INFO - codeparrot_training - Step 28120: {'lr': 0.0004625477103945722, 'samples': 14397952, 'steps': 28120, 'loss/train': 1.591454267501831} +03/04/2022 22:30:09 - INFO - codeparrot_training - Skipping example with length 592 (seq_length=1024) +03/04/2022 22:30:13 - INFO - codeparrot_training - Step 28121: {'lr': 0.00046254491648043604, 'samples': 14398464, 'steps': 28121, 'loss/train': 2.4587764739990234} +03/04/2022 22:30:16 - INFO - codeparrot_training - Step 28122: {'lr': 0.00046254212247053055, 'samples': 14398976, 'steps': 28122, 'loss/train': 1.5030931234359741} +03/04/2022 22:30:18 - INFO - codeparrot_training - Skipping example with length 526 (seq_length=1024) +03/04/2022 22:30:21 - INFO - codeparrot_training - Step 28123: {'lr': 0.0004625393283648568, 'samples': 14399488, 'steps': 28123, 'loss/train': 2.268728733062744} +03/04/2022 22:30:24 - INFO - codeparrot_training - Step 28124: {'lr': 0.0004625365341634161, 'samples': 14400000, 'steps': 28124, 'loss/train': 2.88309645652771} +03/04/2022 22:30:26 - INFO - codeparrot_training - Skipping example with length 904 (seq_length=1024) +03/04/2022 22:30:29 - INFO - codeparrot_training - Step 28125: {'lr': 0.00046253373986620985, 'samples': 14400512, 'steps': 28125, 'loss/train': 1.7204844951629639} +03/04/2022 22:30:32 - INFO - codeparrot_training - Step 28126: {'lr': 0.00046253094547323904, 'samples': 14401024, 'steps': 28126, 'loss/train': 1.458883285522461} +03/04/2022 22:30:35 - INFO - codeparrot_training - Skipping example with length 796 (seq_length=1024) +03/04/2022 22:30:38 - INFO - codeparrot_training - Step 28127: {'lr': 0.0004625281509845051, 'samples': 14401536, 'steps': 28127, 'loss/train': 2.3355560302734375} +03/04/2022 22:30:41 - INFO - codeparrot_training - Step 28128: {'lr': 0.0004625253564000092, 'samples': 14402048, 'steps': 28128, 'loss/train': 2.2517402172088623} +03/04/2022 22:30:43 - INFO - codeparrot_training - Skipping example with length 882 (seq_length=1024) +03/04/2022 22:30:46 - INFO - codeparrot_training - Step 28129: {'lr': 0.00046252256171975273, 'samples': 14402560, 'steps': 28129, 'loss/train': 1.5643378496170044} +03/04/2022 22:30:49 - INFO - codeparrot_training - Step 28130: {'lr': 0.0004625197669437368, 'samples': 14403072, 'steps': 28130, 'loss/train': 1.9685091972351074} +03/04/2022 22:30:51 - INFO - codeparrot_training - Skipping example with length 668 (seq_length=1024) +03/04/2022 22:30:55 - INFO - codeparrot_training - Step 28131: {'lr': 0.0004625169720719628, 'samples': 14403584, 'steps': 28131, 'loss/train': 1.4467720985412598} +03/04/2022 22:30:58 - INFO - codeparrot_training - Step 28132: {'lr': 0.0004625141771044319, 'samples': 14404096, 'steps': 28132, 'loss/train': 1.6731187105178833} +03/04/2022 22:31:00 - INFO - codeparrot_training - Skipping example with length 265 (seq_length=1024) +03/04/2022 22:31:03 - INFO - codeparrot_training - Step 28133: {'lr': 0.0004625113820411454, 'samples': 14404608, 'steps': 28133, 'loss/train': 1.5617496967315674} +03/04/2022 22:31:06 - INFO - codeparrot_training - Step 28134: {'lr': 0.0004625085868821046, 'samples': 14405120, 'steps': 28134, 'loss/train': 1.3026152849197388} +03/04/2022 22:31:08 - INFO - codeparrot_training - Skipping example with length 726 (seq_length=1024) +03/04/2022 22:31:12 - INFO - codeparrot_training - Step 28135: {'lr': 0.0004625057916273107, 'samples': 14405632, 'steps': 28135, 'loss/train': 1.1610740423202515} +03/04/2022 22:31:15 - INFO - codeparrot_training - Step 28136: {'lr': 0.00046250299627676486, 'samples': 14406144, 'steps': 28136, 'loss/train': 1.5719972848892212} +03/04/2022 22:31:17 - INFO - codeparrot_training - Skipping example with length 449 (seq_length=1024) +03/04/2022 22:31:20 - INFO - codeparrot_training - Step 28137: {'lr': 0.0004625002008304685, 'samples': 14406656, 'steps': 28137, 'loss/train': 1.1747713088989258} +03/04/2022 22:31:23 - INFO - codeparrot_training - Step 28138: {'lr': 0.00046249740528842286, 'samples': 14407168, 'steps': 28138, 'loss/train': 1.5609873533248901} +03/04/2022 22:31:25 - INFO - codeparrot_training - Skipping example with length 758 (seq_length=1024) +03/04/2022 22:31:28 - INFO - codeparrot_training - Step 28139: {'lr': 0.00046249460965062917, 'samples': 14407680, 'steps': 28139, 'loss/train': 1.4873164892196655} +03/04/2022 22:31:32 - INFO - codeparrot_training - Step 28140: {'lr': 0.0004624918139170887, 'samples': 14408192, 'steps': 28140, 'loss/train': 2.3407249450683594} +03/04/2022 22:31:34 - INFO - codeparrot_training - Skipping example with length 47 (seq_length=1024) +03/04/2022 22:31:37 - INFO - codeparrot_training - Step 28141: {'lr': 0.0004624890180878027, 'samples': 14408704, 'steps': 28141, 'loss/train': 1.74484121799469} +03/04/2022 22:31:40 - INFO - codeparrot_training - Step 28142: {'lr': 0.00046248622216277235, 'samples': 14409216, 'steps': 28142, 'loss/train': 1.6680779457092285} +03/04/2022 22:31:42 - INFO - codeparrot_training - Skipping example with length 934 (seq_length=1024) +03/04/2022 22:31:45 - INFO - codeparrot_training - Step 28143: {'lr': 0.0004624834261419991, 'samples': 14409728, 'steps': 28143, 'loss/train': 2.0923094749450684} +03/04/2022 22:31:49 - INFO - codeparrot_training - Step 28144: {'lr': 0.000462480630025484, 'samples': 14410240, 'steps': 28144, 'loss/train': 1.4857609272003174} +03/04/2022 22:31:51 - INFO - codeparrot_training - Skipping example with length 999 (seq_length=1024) +03/04/2022 22:31:54 - INFO - codeparrot_training - Step 28145: {'lr': 0.0004624778338132285, 'samples': 14410752, 'steps': 28145, 'loss/train': 1.739702820777893} +03/04/2022 22:31:57 - INFO - codeparrot_training - Step 28146: {'lr': 0.0004624750375052337, 'samples': 14411264, 'steps': 28146, 'loss/train': 2.859133005142212} +03/04/2022 22:31:59 - INFO - codeparrot_training - Skipping example with length 714 (seq_length=1024) +03/04/2022 22:32:02 - INFO - codeparrot_training - Step 28147: {'lr': 0.0004624722411015009, 'samples': 14411776, 'steps': 28147, 'loss/train': 1.755147099494934} +03/04/2022 22:32:06 - INFO - codeparrot_training - Step 28148: {'lr': 0.0004624694446020314, 'samples': 14412288, 'steps': 28148, 'loss/train': 1.7164280414581299} +03/04/2022 22:32:08 - INFO - codeparrot_training - Skipping example with length 746 (seq_length=1024) +03/04/2022 22:32:11 - INFO - codeparrot_training - Step 28149: {'lr': 0.0004624666480068265, 'samples': 14412800, 'steps': 28149, 'loss/train': 0.9909658432006836} +03/04/2022 22:32:14 - INFO - codeparrot_training - Step 28150: {'lr': 0.0004624638513158874, 'samples': 14413312, 'steps': 28150, 'loss/train': 2.305788993835449} +03/04/2022 22:32:16 - INFO - codeparrot_training - Skipping example with length 817 (seq_length=1024) +03/04/2022 22:32:19 - INFO - codeparrot_training - Step 28151: {'lr': 0.0004624610545292154, 'samples': 14413824, 'steps': 28151, 'loss/train': 1.5843918323516846} +03/04/2022 22:32:22 - INFO - codeparrot_training - Step 28152: {'lr': 0.00046245825764681166, 'samples': 14414336, 'steps': 28152, 'loss/train': 1.8396695852279663} +03/04/2022 22:32:25 - INFO - codeparrot_training - Skipping example with length 524 (seq_length=1024) +03/04/2022 22:32:28 - INFO - codeparrot_training - Step 28153: {'lr': 0.0004624554606686775, 'samples': 14414848, 'steps': 28153, 'loss/train': 1.48122239112854} +03/04/2022 22:32:31 - INFO - codeparrot_training - Step 28154: {'lr': 0.0004624526635948142, 'samples': 14415360, 'steps': 28154, 'loss/train': 1.8700424432754517} +03/04/2022 22:32:33 - INFO - codeparrot_training - Skipping example with length 607 (seq_length=1024) +03/04/2022 22:32:36 - INFO - codeparrot_training - Step 28155: {'lr': 0.000462449866425223, 'samples': 14415872, 'steps': 28155, 'loss/train': 1.9450435638427734} +03/04/2022 22:32:39 - INFO - codeparrot_training - Step 28156: {'lr': 0.0004624470691599052, 'samples': 14416384, 'steps': 28156, 'loss/train': 0.6915452480316162} +03/04/2022 22:32:41 - INFO - codeparrot_training - Skipping example with length 955 (seq_length=1024) +03/04/2022 22:32:45 - INFO - codeparrot_training - Step 28157: {'lr': 0.00046244427179886207, 'samples': 14416896, 'steps': 28157, 'loss/train': 1.5829966068267822} +03/04/2022 22:32:48 - INFO - codeparrot_training - Step 28158: {'lr': 0.0004624414743420947, 'samples': 14417408, 'steps': 28158, 'loss/train': 1.7616063356399536} +03/04/2022 22:32:50 - INFO - codeparrot_training - Skipping example with length 58 (seq_length=1024) +03/04/2022 22:32:53 - INFO - codeparrot_training - Step 28159: {'lr': 0.00046243867678960463, 'samples': 14417920, 'steps': 28159, 'loss/train': 1.8184945583343506} +03/04/2022 22:32:56 - INFO - codeparrot_training - Step 28160: {'lr': 0.00046243587914139285, 'samples': 14418432, 'steps': 28160, 'loss/train': 1.7058660984039307} +03/04/2022 22:32:58 - INFO - codeparrot_training - Skipping example with length 751 (seq_length=1024) +03/04/2022 22:33:01 - INFO - codeparrot_training - Step 28161: {'lr': 0.00046243308139746076, 'samples': 14418944, 'steps': 28161, 'loss/train': 2.9440455436706543} +03/04/2022 22:33:04 - INFO - codeparrot_training - Step 28162: {'lr': 0.00046243028355780967, 'samples': 14419456, 'steps': 28162, 'loss/train': 1.7923601865768433} +03/04/2022 22:33:06 - INFO - codeparrot_training - Skipping example with length 630 (seq_length=1024) +03/04/2022 22:33:10 - INFO - codeparrot_training - Step 28163: {'lr': 0.00046242748562244076, 'samples': 14419968, 'steps': 28163, 'loss/train': 1.6393907070159912} +03/04/2022 22:33:13 - INFO - codeparrot_training - Step 28164: {'lr': 0.00046242468759135523, 'samples': 14420480, 'steps': 28164, 'loss/train': 1.514647126197815} +03/04/2022 22:33:14 - INFO - codeparrot_training - Skipping example with length 131 (seq_length=1024) +03/04/2022 22:33:18 - INFO - codeparrot_training - Step 28165: {'lr': 0.00046242188946455444, 'samples': 14420992, 'steps': 28165, 'loss/train': 1.8699649572372437} +03/04/2022 22:33:21 - INFO - codeparrot_training - Step 28166: {'lr': 0.0004624190912420397, 'samples': 14421504, 'steps': 28166, 'loss/train': 2.012799024581909} +03/04/2022 22:33:23 - INFO - codeparrot_training - Skipping example with length 341 (seq_length=1024) +03/04/2022 22:33:27 - INFO - codeparrot_training - Step 28167: {'lr': 0.0004624162929238121, 'samples': 14422016, 'steps': 28167, 'loss/train': 1.6737743616104126} +03/04/2022 22:33:30 - INFO - codeparrot_training - Step 28168: {'lr': 0.000462413494509873, 'samples': 14422528, 'steps': 28168, 'loss/train': 1.6216694116592407} +03/04/2022 22:33:31 - INFO - codeparrot_training - Skipping example with length 203 (seq_length=1024) +03/04/2022 22:33:35 - INFO - codeparrot_training - Step 28169: {'lr': 0.0004624106960002237, 'samples': 14423040, 'steps': 28169, 'loss/train': 2.1164615154266357} +03/04/2022 22:33:38 - INFO - codeparrot_training - Step 28170: {'lr': 0.0004624078973948654, 'samples': 14423552, 'steps': 28170, 'loss/train': 1.4005062580108643} +03/04/2022 22:33:40 - INFO - codeparrot_training - Skipping example with length 893 (seq_length=1024) +03/04/2022 22:33:43 - INFO - codeparrot_training - Step 28171: {'lr': 0.00046240509869379943, 'samples': 14424064, 'steps': 28171, 'loss/train': 2.344982862472534} +03/04/2022 22:33:47 - INFO - codeparrot_training - Step 28172: {'lr': 0.00046240229989702697, 'samples': 14424576, 'steps': 28172, 'loss/train': 1.9281339645385742} +03/04/2022 22:33:48 - INFO - codeparrot_training - Skipping example with length 584 (seq_length=1024) +03/04/2022 22:33:52 - INFO - codeparrot_training - Step 28173: {'lr': 0.0004623995010045493, 'samples': 14425088, 'steps': 28173, 'loss/train': 1.7192636728286743} +03/04/2022 22:33:55 - INFO - codeparrot_training - Step 28174: {'lr': 0.0004623967020163677, 'samples': 14425600, 'steps': 28174, 'loss/train': 1.8230856657028198} +03/04/2022 22:33:58 - INFO - codeparrot_training - Skipping example with length 811 (seq_length=1024) +03/04/2022 22:34:01 - INFO - codeparrot_training - Step 28175: {'lr': 0.0004623939029324834, 'samples': 14426112, 'steps': 28175, 'loss/train': 2.491098403930664} +03/04/2022 22:34:04 - INFO - codeparrot_training - Step 28176: {'lr': 0.0004623911037528977, 'samples': 14426624, 'steps': 28176, 'loss/train': 2.131561756134033} +03/04/2022 22:34:06 - INFO - codeparrot_training - Skipping example with length 74 (seq_length=1024) +03/04/2022 22:34:09 - INFO - codeparrot_training - Step 28177: {'lr': 0.00046238830447761184, 'samples': 14427136, 'steps': 28177, 'loss/train': 1.4753724336624146} +03/04/2022 22:34:13 - INFO - codeparrot_training - Step 28178: {'lr': 0.0004623855051066271, 'samples': 14427648, 'steps': 28178, 'loss/train': 0.8971425890922546} +03/04/2022 22:34:16 - INFO - codeparrot_training - Step 28179: {'lr': 0.00046238270563994465, 'samples': 14428160, 'steps': 28179, 'loss/train': 2.506060838699341} +03/04/2022 22:34:17 - INFO - codeparrot_training - Skipping example with length 729 (seq_length=1024) +03/04/2022 22:34:21 - INFO - codeparrot_training - Step 28180: {'lr': 0.00046237990607756596, 'samples': 14428672, 'steps': 28180, 'loss/train': 1.8267039060592651} +03/04/2022 22:34:24 - INFO - codeparrot_training - Step 28181: {'lr': 0.0004623771064194921, 'samples': 14429184, 'steps': 28181, 'loss/train': 0.4928413927555084} +03/04/2022 22:34:25 - INFO - codeparrot_training - Skipping example with length 487 (seq_length=1024) +03/04/2022 22:34:29 - INFO - codeparrot_training - Step 28182: {'lr': 0.0004623743066657244, 'samples': 14429696, 'steps': 28182, 'loss/train': 1.3066824674606323} +03/04/2022 22:34:33 - INFO - codeparrot_training - Step 28183: {'lr': 0.00046237150681626414, 'samples': 14430208, 'steps': 28183, 'loss/train': 2.079785108566284} +03/04/2022 22:34:33 - INFO - codeparrot_training - Skipping example with length 335 (seq_length=1024) +03/04/2022 22:34:38 - INFO - codeparrot_training - Step 28184: {'lr': 0.00046236870687111254, 'samples': 14430720, 'steps': 28184, 'loss/train': 2.089653968811035} +03/04/2022 22:34:41 - INFO - codeparrot_training - Step 28185: {'lr': 0.0004623659068302708, 'samples': 14431232, 'steps': 28185, 'loss/train': 1.5375726222991943} +03/04/2022 22:34:42 - INFO - codeparrot_training - Skipping example with length 792 (seq_length=1024) +03/04/2022 22:34:46 - INFO - codeparrot_training - Step 28186: {'lr': 0.00046236310669374035, 'samples': 14431744, 'steps': 28186, 'loss/train': 1.521285057067871} +03/04/2022 22:34:50 - INFO - codeparrot_training - Step 28187: {'lr': 0.0004623603064615223, 'samples': 14432256, 'steps': 28187, 'loss/train': 1.598927617073059} +03/04/2022 22:34:51 - INFO - codeparrot_training - Skipping example with length 331 (seq_length=1024) +03/04/2022 22:34:55 - INFO - codeparrot_training - Step 28188: {'lr': 0.000462357506133618, 'samples': 14432768, 'steps': 28188, 'loss/train': 1.5292284488677979} +03/04/2022 22:34:58 - INFO - codeparrot_training - Step 28189: {'lr': 0.00046235470571002877, 'samples': 14433280, 'steps': 28189, 'loss/train': 2.5605599880218506} +03/04/2022 22:35:00 - INFO - codeparrot_training - Skipping example with length 237 (seq_length=1024) +03/04/2022 22:35:03 - INFO - codeparrot_training - Step 28190: {'lr': 0.00046235190519075564, 'samples': 14433792, 'steps': 28190, 'loss/train': 1.7622976303100586} +03/04/2022 22:35:06 - INFO - codeparrot_training - Step 28191: {'lr': 0.00046234910457580014, 'samples': 14434304, 'steps': 28191, 'loss/train': 1.7767924070358276} +03/04/2022 22:35:08 - INFO - codeparrot_training - Skipping example with length 575 (seq_length=1024) +03/04/2022 22:35:12 - INFO - codeparrot_training - Step 28192: {'lr': 0.0004623463038651633, 'samples': 14434816, 'steps': 28192, 'loss/train': 2.2895407676696777} +03/04/2022 22:35:15 - INFO - codeparrot_training - Step 28193: {'lr': 0.0004623435030588466, 'samples': 14435328, 'steps': 28193, 'loss/train': 1.5818462371826172} +03/04/2022 22:35:16 - INFO - codeparrot_training - Skipping example with length 758 (seq_length=1024) +03/04/2022 22:35:20 - INFO - codeparrot_training - Step 28194: {'lr': 0.00046234070215685116, 'samples': 14435840, 'steps': 28194, 'loss/train': 2.3772470951080322} +03/04/2022 22:35:23 - INFO - codeparrot_training - Step 28195: {'lr': 0.0004623379011591782, 'samples': 14436352, 'steps': 28195, 'loss/train': 1.9827202558517456} +03/04/2022 22:35:25 - INFO - codeparrot_training - Skipping example with length 404 (seq_length=1024) +03/04/2022 22:35:29 - INFO - codeparrot_training - Step 28196: {'lr': 0.00046233510006582913, 'samples': 14436864, 'steps': 28196, 'loss/train': 2.052654504776001} +03/04/2022 22:35:32 - INFO - codeparrot_training - Step 28197: {'lr': 0.00046233229887680517, 'samples': 14437376, 'steps': 28197, 'loss/train': 2.301666021347046} +03/04/2022 22:35:33 - INFO - codeparrot_training - Skipping example with length 761 (seq_length=1024) +03/04/2022 22:35:37 - INFO - codeparrot_training - Step 28198: {'lr': 0.00046232949759210753, 'samples': 14437888, 'steps': 28198, 'loss/train': 2.4367799758911133} +03/04/2022 22:35:40 - INFO - codeparrot_training - Step 28199: {'lr': 0.00046232669621173745, 'samples': 14438400, 'steps': 28199, 'loss/train': 1.5220845937728882} +03/04/2022 22:35:42 - INFO - codeparrot_training - Skipping example with length 243 (seq_length=1024) +03/04/2022 22:35:45 - INFO - codeparrot_training - Step 28200: {'lr': 0.00046232389473569623, 'samples': 14438912, 'steps': 28200, 'loss/train': 2.1719753742218018} +03/04/2022 22:35:49 - INFO - codeparrot_training - Step 28201: {'lr': 0.0004623210931639852, 'samples': 14439424, 'steps': 28201, 'loss/train': 2.156470775604248} +03/04/2022 22:35:50 - INFO - codeparrot_training - Skipping example with length 63 (seq_length=1024) +03/04/2022 22:35:54 - INFO - codeparrot_training - Step 28202: {'lr': 0.00046231829149660553, 'samples': 14439936, 'steps': 28202, 'loss/train': 1.8819589614868164} +03/04/2022 22:35:57 - INFO - codeparrot_training - Step 28203: {'lr': 0.00046231548973355854, 'samples': 14440448, 'steps': 28203, 'loss/train': 1.222532868385315} +03/04/2022 22:35:59 - INFO - codeparrot_training - Skipping example with length 925 (seq_length=1024) +03/04/2022 22:36:02 - INFO - codeparrot_training - Step 28204: {'lr': 0.00046231268787484545, 'samples': 14440960, 'steps': 28204, 'loss/train': 1.9642658233642578} +03/04/2022 22:36:05 - INFO - codeparrot_training - Step 28205: {'lr': 0.0004623098859204675, 'samples': 14441472, 'steps': 28205, 'loss/train': 1.4017316102981567} +03/04/2022 22:36:07 - INFO - codeparrot_training - Skipping example with length 886 (seq_length=1024) +03/04/2022 22:36:11 - INFO - codeparrot_training - Step 28206: {'lr': 0.00046230708387042603, 'samples': 14441984, 'steps': 28206, 'loss/train': 1.466529369354248} +03/04/2022 22:36:14 - INFO - codeparrot_training - Step 28207: {'lr': 0.0004623042817247223, 'samples': 14442496, 'steps': 28207, 'loss/train': 2.9606869220733643} +03/04/2022 22:36:17 - INFO - codeparrot_training - Skipping example with length 401 (seq_length=1024) +03/04/2022 22:36:20 - INFO - codeparrot_training - Step 28208: {'lr': 0.00046230147948335746, 'samples': 14443008, 'steps': 28208, 'loss/train': 2.59894061088562} +03/04/2022 22:36:23 - INFO - codeparrot_training - Step 28209: {'lr': 0.0004622986771463329, 'samples': 14443520, 'steps': 28209, 'loss/train': 2.2940847873687744} +03/04/2022 22:36:25 - INFO - codeparrot_training - Skipping example with length 609 (seq_length=1024) +03/04/2022 22:36:28 - INFO - codeparrot_training - Step 28210: {'lr': 0.0004622958747136498, 'samples': 14444032, 'steps': 28210, 'loss/train': 2.223080635070801} +03/04/2022 22:36:32 - INFO - codeparrot_training - Step 28211: {'lr': 0.00046229307218530945, 'samples': 14444544, 'steps': 28211, 'loss/train': 1.079291820526123} +03/04/2022 22:36:34 - INFO - codeparrot_training - Skipping example with length 77 (seq_length=1024) +03/04/2022 22:36:37 - INFO - codeparrot_training - Step 28212: {'lr': 0.0004622902695613131, 'samples': 14445056, 'steps': 28212, 'loss/train': 1.9182860851287842} +03/04/2022 22:36:40 - INFO - codeparrot_training - Step 28213: {'lr': 0.00046228746684166214, 'samples': 14445568, 'steps': 28213, 'loss/train': 2.2384345531463623} +03/04/2022 22:36:42 - INFO - codeparrot_training - Skipping example with length 569 (seq_length=1024) +03/04/2022 22:36:45 - INFO - codeparrot_training - Step 28214: {'lr': 0.00046228466402635764, 'samples': 14446080, 'steps': 28214, 'loss/train': 2.224390745162964} +03/04/2022 22:36:49 - INFO - codeparrot_training - Step 28215: {'lr': 0.0004622818611154009, 'samples': 14446592, 'steps': 28215, 'loss/train': 1.4603514671325684} +03/04/2022 22:36:51 - INFO - codeparrot_training - Skipping example with length 628 (seq_length=1024) +03/04/2022 22:36:54 - INFO - codeparrot_training - Step 28216: {'lr': 0.00046227905810879334, 'samples': 14447104, 'steps': 28216, 'loss/train': 0.865271806716919} +03/04/2022 22:36:57 - INFO - codeparrot_training - Step 28217: {'lr': 0.0004622762550065361, 'samples': 14447616, 'steps': 28217, 'loss/train': 1.7706621885299683} +03/04/2022 22:36:59 - INFO - codeparrot_training - Skipping example with length 221 (seq_length=1024) +03/04/2022 22:37:02 - INFO - codeparrot_training - Step 28218: {'lr': 0.0004622734518086304, 'samples': 14448128, 'steps': 28218, 'loss/train': 1.6963046789169312} +03/04/2022 22:37:06 - INFO - codeparrot_training - Step 28219: {'lr': 0.0004622706485150776, 'samples': 14448640, 'steps': 28219, 'loss/train': 1.8714600801467896} +03/04/2022 22:37:08 - INFO - codeparrot_training - Skipping example with length 210 (seq_length=1024) +03/04/2022 22:37:11 - INFO - codeparrot_training - Step 28220: {'lr': 0.0004622678451258788, 'samples': 14449152, 'steps': 28220, 'loss/train': 1.5058749914169312} +03/04/2022 22:37:14 - INFO - codeparrot_training - Step 28221: {'lr': 0.00046226504164103557, 'samples': 14449664, 'steps': 28221, 'loss/train': 1.8998761177062988} +03/04/2022 22:37:16 - INFO - codeparrot_training - Skipping example with length 214 (seq_length=1024) +03/04/2022 22:37:19 - INFO - codeparrot_training - Step 28222: {'lr': 0.0004622622380605489, 'samples': 14450176, 'steps': 28222, 'loss/train': 1.2426837682724} +03/04/2022 22:37:22 - INFO - codeparrot_training - Step 28223: {'lr': 0.0004622594343844201, 'samples': 14450688, 'steps': 28223, 'loss/train': 1.9605756998062134} +03/04/2022 22:37:25 - INFO - codeparrot_training - Skipping example with length 804 (seq_length=1024) +03/04/2022 22:37:28 - INFO - codeparrot_training - Step 28224: {'lr': 0.00046225663061265056, 'samples': 14451200, 'steps': 28224, 'loss/train': 2.307708978652954} +03/04/2022 22:37:31 - INFO - codeparrot_training - Step 28225: {'lr': 0.0004622538267452414, 'samples': 14451712, 'steps': 28225, 'loss/train': 0.9833211302757263} +03/04/2022 22:37:34 - INFO - codeparrot_training - Step 28226: {'lr': 0.00046225102278219394, 'samples': 14452224, 'steps': 28226, 'loss/train': 2.096790075302124} +03/04/2022 22:37:35 - INFO - codeparrot_training - Skipping example with length 535 (seq_length=1024) +03/04/2022 22:37:40 - INFO - codeparrot_training - Step 28227: {'lr': 0.0004622482187235094, 'samples': 14452736, 'steps': 28227, 'loss/train': 0.6535780429840088} +03/04/2022 22:37:43 - INFO - codeparrot_training - Step 28228: {'lr': 0.00046224541456918916, 'samples': 14453248, 'steps': 28228, 'loss/train': 2.097245216369629} +03/04/2022 22:37:43 - INFO - codeparrot_training - Skipping example with length 874 (seq_length=1024) +03/04/2022 22:37:48 - INFO - codeparrot_training - Step 28229: {'lr': 0.0004622426103192344, 'samples': 14453760, 'steps': 28229, 'loss/train': 2.073657989501953} +03/04/2022 22:37:51 - INFO - codeparrot_training - Step 28230: {'lr': 0.00046223980597364647, 'samples': 14454272, 'steps': 28230, 'loss/train': 1.2474056482315063} +03/04/2022 22:37:52 - INFO - codeparrot_training - Skipping example with length 362 (seq_length=1024) +03/04/2022 22:37:57 - INFO - codeparrot_training - Step 28231: {'lr': 0.0004622370015324264, 'samples': 14454784, 'steps': 28231, 'loss/train': 0.865554690361023} +03/04/2022 22:38:00 - INFO - codeparrot_training - Step 28232: {'lr': 0.0004622341969955757, 'samples': 14455296, 'steps': 28232, 'loss/train': 1.687196135520935} +03/04/2022 22:38:01 - INFO - codeparrot_training - Skipping example with length 112 (seq_length=1024) +03/04/2022 22:38:05 - INFO - codeparrot_training - Step 28233: {'lr': 0.00046223139236309553, 'samples': 14455808, 'steps': 28233, 'loss/train': 1.5283609628677368} +03/04/2022 22:38:08 - INFO - codeparrot_training - Step 28234: {'lr': 0.0004622285876349872, 'samples': 14456320, 'steps': 28234, 'loss/train': 1.8432928323745728} +03/04/2022 22:38:09 - INFO - codeparrot_training - Skipping example with length 573 (seq_length=1024) +03/04/2022 22:38:13 - INFO - codeparrot_training - Step 28235: {'lr': 0.00046222578281125194, 'samples': 14456832, 'steps': 28235, 'loss/train': 1.8542454242706299} +03/04/2022 22:38:17 - INFO - codeparrot_training - Step 28236: {'lr': 0.0004622229778918909, 'samples': 14457344, 'steps': 28236, 'loss/train': 2.1417524814605713} +03/04/2022 22:38:18 - INFO - codeparrot_training - Skipping example with length 726 (seq_length=1024) +03/04/2022 22:38:22 - INFO - codeparrot_training - Step 28237: {'lr': 0.00046222017287690566, 'samples': 14457856, 'steps': 28237, 'loss/train': 1.6115163564682007} +03/04/2022 22:38:25 - INFO - codeparrot_training - Step 28238: {'lr': 0.00046221736776629713, 'samples': 14458368, 'steps': 28238, 'loss/train': 2.828225612640381} +03/04/2022 22:38:30 - INFO - codeparrot_training - Step 28239: {'lr': 0.0004622145625600668, 'samples': 14458880, 'steps': 28239, 'loss/train': 1.8400185108184814} +03/04/2022 22:38:34 - INFO - codeparrot_training - Step 28240: {'lr': 0.00046221175725821585, 'samples': 14459392, 'steps': 28240, 'loss/train': 1.3655014038085938} +03/04/2022 22:38:35 - INFO - codeparrot_training - Skipping example with length 44 (seq_length=1024) +03/04/2022 22:38:39 - INFO - codeparrot_training - Step 28241: {'lr': 0.00046220895186074553, 'samples': 14459904, 'steps': 28241, 'loss/train': 2.592376470565796} +03/04/2022 22:38:42 - INFO - codeparrot_training - Step 28242: {'lr': 0.0004622061463676572, 'samples': 14460416, 'steps': 28242, 'loss/train': 1.7200971841812134} +03/04/2022 22:38:44 - INFO - codeparrot_training - Skipping example with length 265 (seq_length=1024) +03/04/2022 22:38:47 - INFO - codeparrot_training - Step 28243: {'lr': 0.000462203340778952, 'samples': 14460928, 'steps': 28243, 'loss/train': 1.5662012100219727} +03/04/2022 22:38:51 - INFO - codeparrot_training - Step 28244: {'lr': 0.0004622005350946312, 'samples': 14461440, 'steps': 28244, 'loss/train': 1.8112459182739258} +03/04/2022 22:38:52 - INFO - codeparrot_training - Skipping example with length 710 (seq_length=1024) +03/04/2022 22:38:56 - INFO - codeparrot_training - Step 28245: {'lr': 0.00046219772931469617, 'samples': 14461952, 'steps': 28245, 'loss/train': 1.7569890022277832} +03/04/2022 22:38:59 - INFO - codeparrot_training - Step 28246: {'lr': 0.00046219492343914815, 'samples': 14462464, 'steps': 28246, 'loss/train': 1.9689233303070068} +03/04/2022 22:39:01 - INFO - codeparrot_training - Skipping example with length 782 (seq_length=1024) +03/04/2022 22:39:04 - INFO - codeparrot_training - Step 28247: {'lr': 0.00046219211746798835, 'samples': 14462976, 'steps': 28247, 'loss/train': 2.222337245941162} +03/04/2022 22:39:08 - INFO - codeparrot_training - Step 28248: {'lr': 0.000462189311401218, 'samples': 14463488, 'steps': 28248, 'loss/train': 2.4476191997528076} +03/04/2022 22:39:09 - INFO - codeparrot_training - Skipping example with length 579 (seq_length=1024) +03/04/2022 22:39:13 - INFO - codeparrot_training - Step 28249: {'lr': 0.0004621865052388385, 'samples': 14464000, 'steps': 28249, 'loss/train': 2.217796564102173} +03/04/2022 22:39:16 - INFO - codeparrot_training - Step 28250: {'lr': 0.00046218369898085097, 'samples': 14464512, 'steps': 28250, 'loss/train': 1.7386078834533691} +03/04/2022 22:39:17 - INFO - codeparrot_training - Skipping example with length 323 (seq_length=1024) +03/04/2022 22:39:21 - INFO - codeparrot_training - Step 28251: {'lr': 0.0004621808926272568, 'samples': 14465024, 'steps': 28251, 'loss/train': 1.7503645420074463} +03/04/2022 22:39:24 - INFO - codeparrot_training - Step 28252: {'lr': 0.0004621780861780572, 'samples': 14465536, 'steps': 28252, 'loss/train': 2.030625581741333} +03/04/2022 22:39:26 - INFO - codeparrot_training - Skipping example with length 653 (seq_length=1024) +03/04/2022 22:39:30 - INFO - codeparrot_training - Step 28253: {'lr': 0.00046217527963325335, 'samples': 14466048, 'steps': 28253, 'loss/train': 1.4750890731811523} +03/04/2022 22:39:33 - INFO - codeparrot_training - Step 28254: {'lr': 0.00046217247299284666, 'samples': 14466560, 'steps': 28254, 'loss/train': 2.7053892612457275} +03/04/2022 22:39:34 - INFO - codeparrot_training - Skipping example with length 852 (seq_length=1024) +03/04/2022 22:39:38 - INFO - codeparrot_training - Step 28255: {'lr': 0.00046216966625683834, 'samples': 14467072, 'steps': 28255, 'loss/train': 4.57110595703125} +03/04/2022 22:39:41 - INFO - codeparrot_training - Step 28256: {'lr': 0.00046216685942522957, 'samples': 14467584, 'steps': 28256, 'loss/train': 2.233008861541748} +03/04/2022 22:39:43 - INFO - codeparrot_training - Skipping example with length 870 (seq_length=1024) +03/04/2022 22:39:47 - INFO - codeparrot_training - Step 28257: {'lr': 0.00046216405249802176, 'samples': 14468096, 'steps': 28257, 'loss/train': 1.7791632413864136} +03/04/2022 22:39:50 - INFO - codeparrot_training - Step 28258: {'lr': 0.000462161245475216, 'samples': 14468608, 'steps': 28258, 'loss/train': 1.3522037267684937} +03/04/2022 22:39:52 - INFO - codeparrot_training - Skipping example with length 457 (seq_length=1024) +03/04/2022 22:39:55 - INFO - codeparrot_training - Step 28259: {'lr': 0.0004621584383568137, 'samples': 14469120, 'steps': 28259, 'loss/train': 1.8858468532562256} +03/04/2022 22:39:58 - INFO - codeparrot_training - Step 28260: {'lr': 0.00046215563114281613, 'samples': 14469632, 'steps': 28260, 'loss/train': 2.0129947662353516} +03/04/2022 22:40:00 - INFO - codeparrot_training - Skipping example with length 336 (seq_length=1024) +03/04/2022 22:40:04 - INFO - codeparrot_training - Step 28261: {'lr': 0.0004621528238332245, 'samples': 14470144, 'steps': 28261, 'loss/train': 2.110783100128174} +03/04/2022 22:40:07 - INFO - codeparrot_training - Step 28262: {'lr': 0.00046215001642804, 'samples': 14470656, 'steps': 28262, 'loss/train': 2.002697229385376} +03/04/2022 22:40:08 - INFO - codeparrot_training - Skipping example with length 278 (seq_length=1024) +03/04/2022 22:40:12 - INFO - codeparrot_training - Step 28263: {'lr': 0.0004621472089272641, 'samples': 14471168, 'steps': 28263, 'loss/train': 1.8643553256988525} +03/04/2022 22:40:15 - INFO - codeparrot_training - Step 28264: {'lr': 0.0004621444013308979, 'samples': 14471680, 'steps': 28264, 'loss/train': 1.1936086416244507} +03/04/2022 22:40:17 - INFO - codeparrot_training - Skipping example with length 293 (seq_length=1024) +03/04/2022 22:40:21 - INFO - codeparrot_training - Step 28265: {'lr': 0.00046214159363894264, 'samples': 14472192, 'steps': 28265, 'loss/train': 2.3569729328155518} +03/04/2022 22:40:24 - INFO - codeparrot_training - Step 28266: {'lr': 0.0004621387858513997, 'samples': 14472704, 'steps': 28266, 'loss/train': 1.794743537902832} +03/04/2022 22:40:25 - INFO - codeparrot_training - Skipping example with length 925 (seq_length=1024) +03/04/2022 22:40:29 - INFO - codeparrot_training - Step 28267: {'lr': 0.0004621359779682703, 'samples': 14473216, 'steps': 28267, 'loss/train': 1.413746953010559} +03/04/2022 22:40:32 - INFO - codeparrot_training - Step 28268: {'lr': 0.0004621331699895557, 'samples': 14473728, 'steps': 28268, 'loss/train': 2.150907039642334} +03/04/2022 22:40:34 - INFO - codeparrot_training - Skipping example with length 653 (seq_length=1024) +03/04/2022 22:40:37 - INFO - codeparrot_training - Step 28269: {'lr': 0.00046213036191525714, 'samples': 14474240, 'steps': 28269, 'loss/train': 1.8511475324630737} +03/04/2022 22:40:41 - INFO - codeparrot_training - Step 28270: {'lr': 0.00046212755374537594, 'samples': 14474752, 'steps': 28270, 'loss/train': 1.7172820568084717} +03/04/2022 22:40:42 - INFO - codeparrot_training - Skipping example with length 264 (seq_length=1024) +03/04/2022 22:40:46 - INFO - codeparrot_training - Step 28271: {'lr': 0.0004621247454799133, 'samples': 14475264, 'steps': 28271, 'loss/train': 2.6843080520629883} +03/04/2022 22:40:49 - INFO - codeparrot_training - Step 28272: {'lr': 0.0004621219371188706, 'samples': 14475776, 'steps': 28272, 'loss/train': 2.121633529663086} +03/04/2022 22:40:51 - INFO - codeparrot_training - Skipping example with length 710 (seq_length=1024) +03/04/2022 22:40:54 - INFO - codeparrot_training - Step 28273: {'lr': 0.0004621191286622489, 'samples': 14476288, 'steps': 28273, 'loss/train': 2.1590254306793213} +03/04/2022 22:40:57 - INFO - codeparrot_training - Step 28274: {'lr': 0.00046211632011004973, 'samples': 14476800, 'steps': 28274, 'loss/train': 1.413214921951294} +03/04/2022 22:40:59 - INFO - codeparrot_training - Skipping example with length 324 (seq_length=1024) +03/04/2022 22:41:03 - INFO - codeparrot_training - Step 28275: {'lr': 0.0004621135114622742, 'samples': 14477312, 'steps': 28275, 'loss/train': 1.4337598085403442} +03/04/2022 22:41:06 - INFO - codeparrot_training - Step 28276: {'lr': 0.00046211070271892353, 'samples': 14477824, 'steps': 28276, 'loss/train': 1.1462377309799194} +03/04/2022 22:41:08 - INFO - codeparrot_training - Skipping example with length 18 (seq_length=1024) +03/04/2022 22:41:11 - INFO - codeparrot_training - Step 28277: {'lr': 0.00046210789387999906, 'samples': 14478336, 'steps': 28277, 'loss/train': 1.9296038150787354} +03/04/2022 22:41:14 - INFO - codeparrot_training - Step 28278: {'lr': 0.00046210508494550206, 'samples': 14478848, 'steps': 28278, 'loss/train': 2.719940423965454} +03/04/2022 22:41:16 - INFO - codeparrot_training - Skipping example with length 704 (seq_length=1024) +03/04/2022 22:41:20 - INFO - codeparrot_training - Step 28279: {'lr': 0.0004621022759154338, 'samples': 14479360, 'steps': 28279, 'loss/train': 2.1380927562713623} +03/04/2022 22:41:23 - INFO - codeparrot_training - Step 28280: {'lr': 0.0004620994667897955, 'samples': 14479872, 'steps': 28280, 'loss/train': 1.6783688068389893} +03/04/2022 22:41:25 - INFO - codeparrot_training - Skipping example with length 87 (seq_length=1024) +03/04/2022 22:41:28 - INFO - codeparrot_training - Step 28281: {'lr': 0.0004620966575685885, 'samples': 14480384, 'steps': 28281, 'loss/train': 1.8166375160217285} +03/04/2022 22:41:31 - INFO - codeparrot_training - Step 28282: {'lr': 0.000462093848251814, 'samples': 14480896, 'steps': 28282, 'loss/train': 2.255211114883423} +03/04/2022 22:41:34 - INFO - codeparrot_training - Skipping example with length 819 (seq_length=1024) +03/04/2022 22:41:37 - INFO - codeparrot_training - Step 28283: {'lr': 0.00046209103883947323, 'samples': 14481408, 'steps': 28283, 'loss/train': 1.5178070068359375} +03/04/2022 22:41:40 - INFO - codeparrot_training - Step 28284: {'lr': 0.00046208822933156756, 'samples': 14481920, 'steps': 28284, 'loss/train': 1.7980749607086182} +03/04/2022 22:41:42 - INFO - codeparrot_training - Skipping example with length 709 (seq_length=1024) +03/04/2022 22:41:45 - INFO - codeparrot_training - Step 28285: {'lr': 0.00046208541972809824, 'samples': 14482432, 'steps': 28285, 'loss/train': 1.5211321115493774} +03/04/2022 22:41:48 - INFO - codeparrot_training - Step 28286: {'lr': 0.00046208261002906643, 'samples': 14482944, 'steps': 28286, 'loss/train': 0.6545057892799377} +03/04/2022 22:41:51 - INFO - codeparrot_training - Skipping example with length 786 (seq_length=1024) +03/04/2022 22:41:54 - INFO - codeparrot_training - Step 28287: {'lr': 0.00046207980023447347, 'samples': 14483456, 'steps': 28287, 'loss/train': 1.665461540222168} +03/04/2022 22:41:57 - INFO - codeparrot_training - Step 28288: {'lr': 0.0004620769903443207, 'samples': 14483968, 'steps': 28288, 'loss/train': 6.616833686828613} +03/04/2022 22:42:00 - INFO - codeparrot_training - Skipping example with length 279 (seq_length=1024) +03/04/2022 22:42:02 - INFO - codeparrot_training - Step 28289: {'lr': 0.00046207418035860927, 'samples': 14484480, 'steps': 28289, 'loss/train': 2.1301066875457764} +03/04/2022 22:42:05 - INFO - codeparrot_training - Step 28290: {'lr': 0.00046207137027734046, 'samples': 14484992, 'steps': 28290, 'loss/train': 2.370570182800293} +03/04/2022 22:42:08 - INFO - codeparrot_training - Skipping example with length 492 (seq_length=1024) +03/04/2022 22:42:10 - INFO - codeparrot_training - Step 28291: {'lr': 0.00046206856010051555, 'samples': 14485504, 'steps': 28291, 'loss/train': 1.276041030883789} +03/04/2022 22:42:14 - INFO - codeparrot_training - Step 28292: {'lr': 0.0004620657498281359, 'samples': 14486016, 'steps': 28292, 'loss/train': 1.7905917167663574} +03/04/2022 22:42:16 - INFO - codeparrot_training - Skipping example with length 270 (seq_length=1024) +03/04/2022 22:42:19 - INFO - codeparrot_training - Step 28293: {'lr': 0.0004620629394602027, 'samples': 14486528, 'steps': 28293, 'loss/train': 2.072216033935547} +03/04/2022 22:42:22 - INFO - codeparrot_training - Step 28294: {'lr': 0.00046206012899671715, 'samples': 14487040, 'steps': 28294, 'loss/train': 1.9236313104629517} +03/04/2022 22:42:25 - INFO - codeparrot_training - Skipping example with length 67 (seq_length=1024) +03/04/2022 22:42:28 - INFO - codeparrot_training - Step 28295: {'lr': 0.00046205731843768056, 'samples': 14487552, 'steps': 28295, 'loss/train': 0.5986870527267456} +03/04/2022 22:42:31 - INFO - codeparrot_training - Step 28296: {'lr': 0.0004620545077830942, 'samples': 14488064, 'steps': 28296, 'loss/train': 1.1816462278366089} +03/04/2022 22:42:34 - INFO - codeparrot_training - Step 28297: {'lr': 0.00046205169703295945, 'samples': 14488576, 'steps': 28297, 'loss/train': 3.332437753677368} +03/04/2022 22:42:35 - INFO - codeparrot_training - Skipping example with length 210 (seq_length=1024) +03/04/2022 22:42:39 - INFO - codeparrot_training - Step 28298: {'lr': 0.00046204888618727743, 'samples': 14489088, 'steps': 28298, 'loss/train': 2.239515542984009} +03/04/2022 22:42:42 - INFO - codeparrot_training - Step 28299: {'lr': 0.00046204607524604944, 'samples': 14489600, 'steps': 28299, 'loss/train': 1.323746919631958} +03/04/2022 22:42:43 - INFO - codeparrot_training - Skipping example with length 669 (seq_length=1024) +03/04/2022 22:42:48 - INFO - codeparrot_training - Step 28300: {'lr': 0.0004620432642092768, 'samples': 14490112, 'steps': 28300, 'loss/train': 2.0938899517059326} +03/04/2022 22:42:51 - INFO - codeparrot_training - Step 28301: {'lr': 0.00046204045307696065, 'samples': 14490624, 'steps': 28301, 'loss/train': 0.5166608691215515} +03/04/2022 22:42:51 - INFO - codeparrot_training - Skipping example with length 694 (seq_length=1024) +03/04/2022 22:42:56 - INFO - codeparrot_training - Step 28302: {'lr': 0.0004620376418491024, 'samples': 14491136, 'steps': 28302, 'loss/train': 1.878035068511963} +03/04/2022 22:42:59 - INFO - codeparrot_training - Step 28303: {'lr': 0.0004620348305257033, 'samples': 14491648, 'steps': 28303, 'loss/train': 0.6501943469047546} +03/04/2022 22:43:00 - INFO - codeparrot_training - Skipping example with length 449 (seq_length=1024) +03/04/2022 22:43:05 - INFO - codeparrot_training - Step 28304: {'lr': 0.00046203201910676453, 'samples': 14492160, 'steps': 28304, 'loss/train': 1.6701397895812988} +03/04/2022 22:43:08 - INFO - codeparrot_training - Step 28305: {'lr': 0.0004620292075922874, 'samples': 14492672, 'steps': 28305, 'loss/train': 2.0548577308654785} +03/04/2022 22:43:08 - INFO - codeparrot_training - Skipping example with length 829 (seq_length=1024) +03/04/2022 22:43:13 - INFO - codeparrot_training - Step 28306: {'lr': 0.0004620263959822732, 'samples': 14493184, 'steps': 28306, 'loss/train': 1.4954462051391602} +03/04/2022 22:43:16 - INFO - codeparrot_training - Step 28307: {'lr': 0.00046202358427672313, 'samples': 14493696, 'steps': 28307, 'loss/train': 1.8337366580963135} +03/04/2022 22:43:16 - INFO - codeparrot_training - Skipping example with length 703 (seq_length=1024) +03/04/2022 22:43:21 - INFO - codeparrot_training - Step 28308: {'lr': 0.0004620207724756386, 'samples': 14494208, 'steps': 28308, 'loss/train': 2.0196423530578613} +03/04/2022 22:43:25 - INFO - codeparrot_training - Step 28309: {'lr': 0.0004620179605790207, 'samples': 14494720, 'steps': 28309, 'loss/train': 1.3312158584594727} +03/04/2022 22:43:26 - INFO - codeparrot_training - Skipping example with length 970 (seq_length=1024) +03/04/2022 22:43:26 - INFO - codeparrot_training - Dataset epoch: 1 +03/04/2022 22:43:30 - INFO - codeparrot_training - Step 28310: {'lr': 0.00046201514858687075, 'samples': 14495232, 'steps': 28310, 'loss/train': 2.1164300441741943} +03/04/2022 22:43:33 - INFO - codeparrot_training - Step 28311: {'lr': 0.00046201233649919015, 'samples': 14495744, 'steps': 28311, 'loss/train': 1.5374401807785034} +03/04/2022 22:43:34 - INFO - codeparrot_training - Skipping example with length 678 (seq_length=1024) +03/04/2022 22:43:38 - INFO - codeparrot_training - Step 28312: {'lr': 0.00046200952431598, 'samples': 14496256, 'steps': 28312, 'loss/train': 1.9420329332351685} +03/04/2022 22:43:41 - INFO - codeparrot_training - Step 28313: {'lr': 0.00046200671203724166, 'samples': 14496768, 'steps': 28313, 'loss/train': 1.5617731809616089} +03/04/2022 22:43:42 - INFO - codeparrot_training - Skipping example with length 217 (seq_length=1024) +03/04/2022 22:43:47 - INFO - codeparrot_training - Step 28314: {'lr': 0.00046200389966297633, 'samples': 14497280, 'steps': 28314, 'loss/train': 2.074497699737549} +03/04/2022 22:43:50 - INFO - codeparrot_training - Step 28315: {'lr': 0.00046200108719318537, 'samples': 14497792, 'steps': 28315, 'loss/train': 1.768312931060791} +03/04/2022 22:43:51 - INFO - codeparrot_training - Skipping example with length 893 (seq_length=1024) +03/04/2022 22:43:55 - INFO - codeparrot_training - Step 28316: {'lr': 0.0004619982746278699, 'samples': 14498304, 'steps': 28316, 'loss/train': 1.2528194189071655} +03/04/2022 22:43:58 - INFO - codeparrot_training - Step 28317: {'lr': 0.00046199546196703134, 'samples': 14498816, 'steps': 28317, 'loss/train': 1.548931360244751} +03/04/2022 22:43:59 - INFO - codeparrot_training - Skipping example with length 155 (seq_length=1024) +03/04/2022 22:44:04 - INFO - codeparrot_training - Step 28318: {'lr': 0.0004619926492106709, 'samples': 14499328, 'steps': 28318, 'loss/train': 2.1215429306030273} +03/04/2022 22:44:07 - INFO - codeparrot_training - Step 28319: {'lr': 0.0004619898363587899, 'samples': 14499840, 'steps': 28319, 'loss/train': 1.8353849649429321} +03/04/2022 22:44:08 - INFO - codeparrot_training - Skipping example with length 700 (seq_length=1024) +03/04/2022 22:44:12 - INFO - codeparrot_training - Step 28320: {'lr': 0.00046198702341138944, 'samples': 14500352, 'steps': 28320, 'loss/train': 2.437762498855591} +03/04/2022 22:44:15 - INFO - codeparrot_training - Step 28321: {'lr': 0.00046198421036847093, 'samples': 14500864, 'steps': 28321, 'loss/train': 1.4762158393859863} +03/04/2022 22:44:16 - INFO - codeparrot_training - Skipping example with length 499 (seq_length=1024) +03/04/2022 22:44:20 - INFO - codeparrot_training - Step 28322: {'lr': 0.00046198139723003563, 'samples': 14501376, 'steps': 28322, 'loss/train': 2.559457540512085} +03/04/2022 22:44:24 - INFO - codeparrot_training - Step 28323: {'lr': 0.00046197858399608477, 'samples': 14501888, 'steps': 28323, 'loss/train': 0.9250370264053345} +03/04/2022 22:44:24 - INFO - codeparrot_training - Skipping example with length 404 (seq_length=1024) +03/04/2022 22:44:29 - INFO - codeparrot_training - Step 28324: {'lr': 0.00046197577066661965, 'samples': 14502400, 'steps': 28324, 'loss/train': 1.9162812232971191} +03/04/2022 22:44:32 - INFO - codeparrot_training - Step 28325: {'lr': 0.0004619729572416415, 'samples': 14502912, 'steps': 28325, 'loss/train': 2.0879809856414795} +03/04/2022 22:44:33 - INFO - codeparrot_training - Skipping example with length 458 (seq_length=1024) +03/04/2022 22:44:37 - INFO - codeparrot_training - Step 28326: {'lr': 0.0004619701437211516, 'samples': 14503424, 'steps': 28326, 'loss/train': 1.8281383514404297} +03/04/2022 22:44:41 - INFO - codeparrot_training - Step 28327: {'lr': 0.00046196733010515125, 'samples': 14503936, 'steps': 28327, 'loss/train': 2.239240884780884} +03/04/2022 22:44:41 - INFO - codeparrot_training - Skipping example with length 515 (seq_length=1024) +03/04/2022 22:44:46 - INFO - codeparrot_training - Step 28328: {'lr': 0.0004619645163936417, 'samples': 14504448, 'steps': 28328, 'loss/train': 1.8546830415725708} +03/04/2022 22:44:49 - INFO - codeparrot_training - Step 28329: {'lr': 0.0004619617025866242, 'samples': 14504960, 'steps': 28329, 'loss/train': 2.183136224746704} +03/04/2022 22:44:50 - INFO - codeparrot_training - Skipping example with length 835 (seq_length=1024) +03/04/2022 22:44:54 - INFO - codeparrot_training - Step 28330: {'lr': 0.00046195888868409994, 'samples': 14505472, 'steps': 28330, 'loss/train': 1.3057067394256592} +03/04/2022 22:44:57 - INFO - codeparrot_training - Step 28331: {'lr': 0.0004619560746860704, 'samples': 14505984, 'steps': 28331, 'loss/train': 1.6381313800811768} +03/04/2022 22:44:58 - INFO - codeparrot_training - Skipping example with length 643 (seq_length=1024) +03/04/2022 22:45:03 - INFO - codeparrot_training - Step 28332: {'lr': 0.0004619532605925366, 'samples': 14506496, 'steps': 28332, 'loss/train': 1.5500290393829346} +03/04/2022 22:45:06 - INFO - codeparrot_training - Step 28333: {'lr': 0.00046195044640350003, 'samples': 14507008, 'steps': 28333, 'loss/train': 1.5433707237243652} +03/04/2022 22:45:06 - INFO - codeparrot_training - Skipping example with length 281 (seq_length=1024) +03/04/2022 22:45:11 - INFO - codeparrot_training - Step 28334: {'lr': 0.00046194763211896187, 'samples': 14507520, 'steps': 28334, 'loss/train': 0.9608316421508789} +03/04/2022 22:45:14 - INFO - codeparrot_training - Step 28335: {'lr': 0.0004619448177389233, 'samples': 14508032, 'steps': 28335, 'loss/train': 2.7697536945343018} +03/04/2022 22:45:14 - INFO - codeparrot_training - Skipping example with length 535 (seq_length=1024) +03/04/2022 22:45:19 - INFO - codeparrot_training - Step 28336: {'lr': 0.0004619420032633857, 'samples': 14508544, 'steps': 28336, 'loss/train': 1.6733633279800415} +03/04/2022 22:45:23 - INFO - codeparrot_training - Step 28337: {'lr': 0.0004619391886923503, 'samples': 14509056, 'steps': 28337, 'loss/train': 1.8333038091659546} +03/04/2022 22:45:23 - INFO - codeparrot_training - Skipping example with length 879 (seq_length=1024) +03/04/2022 22:45:28 - INFO - codeparrot_training - Step 28338: {'lr': 0.0004619363740258184, 'samples': 14509568, 'steps': 28338, 'loss/train': 1.6225978136062622} +03/04/2022 22:45:31 - INFO - codeparrot_training - Skipping example with length 379 (seq_length=1024) +03/04/2022 22:45:33 - INFO - codeparrot_training - Step 28339: {'lr': 0.00046193355926379124, 'samples': 14510080, 'steps': 28339, 'loss/train': 1.8342550992965698} +03/04/2022 22:45:36 - INFO - codeparrot_training - Step 28340: {'lr': 0.00046193074440627, 'samples': 14510592, 'steps': 28340, 'loss/train': 0.17486226558685303} +03/04/2022 22:45:39 - INFO - codeparrot_training - Skipping example with length 406 (seq_length=1024) +03/04/2022 22:45:41 - INFO - codeparrot_training - Step 28341: {'lr': 0.0004619279294532561, 'samples': 14511104, 'steps': 28341, 'loss/train': 1.5536885261535645} +03/04/2022 22:45:45 - INFO - codeparrot_training - Step 28342: {'lr': 0.00046192511440475083, 'samples': 14511616, 'steps': 28342, 'loss/train': 1.8736741542816162} +03/04/2022 22:45:47 - INFO - codeparrot_training - Skipping example with length 465 (seq_length=1024) +03/04/2022 22:45:50 - INFO - codeparrot_training - Step 28343: {'lr': 0.00046192229926075526, 'samples': 14512128, 'steps': 28343, 'loss/train': 1.3841402530670166} +03/04/2022 22:45:53 - INFO - codeparrot_training - Step 28344: {'lr': 0.0004619194840212708, 'samples': 14512640, 'steps': 28344, 'loss/train': 1.4966727495193481} +03/04/2022 22:45:55 - INFO - codeparrot_training - Skipping example with length 755 (seq_length=1024) +03/04/2022 22:45:58 - INFO - codeparrot_training - Step 28345: {'lr': 0.0004619166686862987, 'samples': 14513152, 'steps': 28345, 'loss/train': 1.6909773349761963} +03/04/2022 22:46:02 - INFO - codeparrot_training - Step 28346: {'lr': 0.0004619138532558402, 'samples': 14513664, 'steps': 28346, 'loss/train': 1.8510650396347046} +03/04/2022 22:46:04 - INFO - codeparrot_training - Skipping example with length 847 (seq_length=1024) +03/04/2022 22:46:07 - INFO - codeparrot_training - Step 28347: {'lr': 0.00046191103772989664, 'samples': 14514176, 'steps': 28347, 'loss/train': 1.9116324186325073} +03/04/2022 22:46:10 - INFO - codeparrot_training - Step 28348: {'lr': 0.00046190822210846917, 'samples': 14514688, 'steps': 28348, 'loss/train': 2.1827337741851807} +03/04/2022 22:46:12 - INFO - codeparrot_training - Skipping example with length 241 (seq_length=1024) +03/04/2022 22:46:15 - INFO - codeparrot_training - Step 28349: {'lr': 0.0004619054063915592, 'samples': 14515200, 'steps': 28349, 'loss/train': 1.698214054107666} +03/04/2022 22:46:18 - INFO - codeparrot_training - Step 28350: {'lr': 0.00046190259057916786, 'samples': 14515712, 'steps': 28350, 'loss/train': 1.3486562967300415} +03/04/2022 22:46:20 - INFO - codeparrot_training - Skipping example with length 863 (seq_length=1024) +03/04/2022 22:46:24 - INFO - codeparrot_training - Step 28351: {'lr': 0.0004618997746712965, 'samples': 14516224, 'steps': 28351, 'loss/train': 2.1203606128692627} +03/04/2022 22:46:27 - INFO - codeparrot_training - Step 28352: {'lr': 0.00046189695866794635, 'samples': 14516736, 'steps': 28352, 'loss/train': 1.3322757482528687} +03/04/2022 22:46:28 - INFO - codeparrot_training - Skipping example with length 819 (seq_length=1024) +03/04/2022 22:46:32 - INFO - codeparrot_training - Step 28353: {'lr': 0.00046189414256911875, 'samples': 14517248, 'steps': 28353, 'loss/train': 1.6637177467346191} +03/04/2022 22:46:35 - INFO - codeparrot_training - Step 28354: {'lr': 0.0004618913263748149, 'samples': 14517760, 'steps': 28354, 'loss/train': 2.0440452098846436} +03/04/2022 22:46:36 - INFO - codeparrot_training - Skipping example with length 404 (seq_length=1024) +03/04/2022 22:46:40 - INFO - codeparrot_training - Step 28355: {'lr': 0.0004618885100850361, 'samples': 14518272, 'steps': 28355, 'loss/train': 1.7147034406661987} +03/04/2022 22:46:44 - INFO - codeparrot_training - Step 28356: {'lr': 0.0004618856936997836, 'samples': 14518784, 'steps': 28356, 'loss/train': 2.6594667434692383} +03/04/2022 22:46:45 - INFO - codeparrot_training - Skipping example with length 71 (seq_length=1024) +03/04/2022 22:46:49 - INFO - codeparrot_training - Step 28357: {'lr': 0.0004618828772190586, 'samples': 14519296, 'steps': 28357, 'loss/train': 1.209424376487732} +03/04/2022 22:46:52 - INFO - codeparrot_training - Step 28358: {'lr': 0.0004618800606428626, 'samples': 14519808, 'steps': 28358, 'loss/train': 2.7648284435272217} +03/04/2022 22:46:53 - INFO - codeparrot_training - Skipping example with length 719 (seq_length=1024) +03/04/2022 22:46:57 - INFO - codeparrot_training - Step 28359: {'lr': 0.00046187724397119657, 'samples': 14520320, 'steps': 28359, 'loss/train': 1.7226858139038086} +03/04/2022 22:47:00 - INFO - codeparrot_training - Step 28360: {'lr': 0.000461874427204062, 'samples': 14520832, 'steps': 28360, 'loss/train': 2.4177238941192627} +03/04/2022 22:47:01 - INFO - codeparrot_training - Skipping example with length 207 (seq_length=1024) +03/04/2022 22:47:06 - INFO - codeparrot_training - Step 28361: {'lr': 0.00046187161034146, 'samples': 14521344, 'steps': 28361, 'loss/train': 2.15651535987854} +03/04/2022 22:47:09 - INFO - codeparrot_training - Step 28362: {'lr': 0.00046186879338339207, 'samples': 14521856, 'steps': 28362, 'loss/train': 2.2936675548553467} +03/04/2022 22:47:10 - INFO - codeparrot_training - Skipping example with length 754 (seq_length=1024) +03/04/2022 22:47:14 - INFO - codeparrot_training - Step 28363: {'lr': 0.0004618659763298592, 'samples': 14522368, 'steps': 28363, 'loss/train': 2.2216598987579346} +03/04/2022 22:47:17 - INFO - codeparrot_training - Step 28364: {'lr': 0.00046186315918086285, 'samples': 14522880, 'steps': 28364, 'loss/train': 0.9725690484046936} +03/04/2022 22:47:18 - INFO - codeparrot_training - Skipping example with length 156 (seq_length=1024) +03/04/2022 22:47:23 - INFO - codeparrot_training - Step 28365: {'lr': 0.0004618603419364042, 'samples': 14523392, 'steps': 28365, 'loss/train': 1.909342885017395} +03/04/2022 22:47:26 - INFO - codeparrot_training - Step 28366: {'lr': 0.00046185752459648456, 'samples': 14523904, 'steps': 28366, 'loss/train': 1.893879771232605} +03/04/2022 22:47:27 - INFO - codeparrot_training - Skipping example with length 773 (seq_length=1024) +03/04/2022 22:47:31 - INFO - codeparrot_training - Step 28367: {'lr': 0.00046185470716110516, 'samples': 14524416, 'steps': 28367, 'loss/train': 1.3622266054153442} +03/04/2022 22:47:34 - INFO - codeparrot_training - Step 28368: {'lr': 0.00046185188963026734, 'samples': 14524928, 'steps': 28368, 'loss/train': 1.9037989377975464} +03/04/2022 22:47:35 - INFO - codeparrot_training - Skipping example with length 991 (seq_length=1024) +03/04/2022 22:47:39 - INFO - codeparrot_training - Step 28369: {'lr': 0.0004618490720039723, 'samples': 14525440, 'steps': 28369, 'loss/train': 1.6392556428909302} +03/04/2022 22:47:43 - INFO - codeparrot_training - Step 28370: {'lr': 0.0004618462542822214, 'samples': 14525952, 'steps': 28370, 'loss/train': 2.110544204711914} +03/04/2022 22:47:43 - INFO - codeparrot_training - Skipping example with length 206 (seq_length=1024) +03/04/2022 22:47:48 - INFO - codeparrot_training - Step 28371: {'lr': 0.0004618434364650158, 'samples': 14526464, 'steps': 28371, 'loss/train': 0.12755286693572998} +03/04/2022 22:47:51 - INFO - codeparrot_training - Step 28372: {'lr': 0.00046184061855235683, 'samples': 14526976, 'steps': 28372, 'loss/train': 1.712847113609314} +03/04/2022 22:47:51 - INFO - codeparrot_training - Skipping example with length 837 (seq_length=1024) +03/04/2022 22:47:56 - INFO - codeparrot_training - Step 28373: {'lr': 0.00046183780054424574, 'samples': 14527488, 'steps': 28373, 'loss/train': 0.7448714375495911} +03/04/2022 22:47:59 - INFO - codeparrot_training - Step 28374: {'lr': 0.00046183498244068376, 'samples': 14528000, 'steps': 28374, 'loss/train': 1.497391939163208} +03/04/2022 22:48:00 - INFO - codeparrot_training - Skipping example with length 130 (seq_length=1024) +03/04/2022 22:48:05 - INFO - codeparrot_training - Step 28375: {'lr': 0.00046183216424167226, 'samples': 14528512, 'steps': 28375, 'loss/train': 2.7550015449523926} +03/04/2022 22:48:08 - INFO - codeparrot_training - Skipping example with length 517 (seq_length=1024) +03/04/2022 22:48:10 - INFO - codeparrot_training - Step 28376: {'lr': 0.0004618293459472124, 'samples': 14529024, 'steps': 28376, 'loss/train': 1.7029379606246948} +03/04/2022 22:48:13 - INFO - codeparrot_training - Step 28377: {'lr': 0.0004618265275573056, 'samples': 14529536, 'steps': 28377, 'loss/train': 1.5451273918151855} +03/04/2022 22:48:16 - INFO - codeparrot_training - Skipping example with length 165 (seq_length=1024) +03/04/2022 22:48:18 - INFO - codeparrot_training - Step 28378: {'lr': 0.00046182370907195294, 'samples': 14530048, 'steps': 28378, 'loss/train': 1.0116934776306152} +03/04/2022 22:48:22 - INFO - codeparrot_training - Step 28379: {'lr': 0.00046182089049115585, 'samples': 14530560, 'steps': 28379, 'loss/train': 0.2080044150352478} +03/04/2022 22:48:24 - INFO - codeparrot_training - Skipping example with length 830 (seq_length=1024) +03/04/2022 22:48:27 - INFO - codeparrot_training - Step 28380: {'lr': 0.0004618180718149155, 'samples': 14531072, 'steps': 28380, 'loss/train': 2.1246297359466553} +03/04/2022 22:48:30 - INFO - codeparrot_training - Step 28381: {'lr': 0.00046181525304323325, 'samples': 14531584, 'steps': 28381, 'loss/train': 2.2423717975616455} +03/04/2022 22:48:32 - INFO - codeparrot_training - Skipping example with length 348 (seq_length=1024) +03/04/2022 22:48:35 - INFO - codeparrot_training - Step 28382: {'lr': 0.0004618124341761102, 'samples': 14532096, 'steps': 28382, 'loss/train': 2.007514476776123} +03/04/2022 22:48:38 - INFO - codeparrot_training - Step 28383: {'lr': 0.0004618096152135478, 'samples': 14532608, 'steps': 28383, 'loss/train': 0.834243893623352} +03/04/2022 22:48:41 - INFO - codeparrot_training - Skipping example with length 1019 (seq_length=1024) +03/04/2022 22:48:44 - INFO - codeparrot_training - Step 28384: {'lr': 0.00046180679615554735, 'samples': 14533120, 'steps': 28384, 'loss/train': 1.4318647384643555} +03/04/2022 22:48:47 - INFO - codeparrot_training - Step 28385: {'lr': 0.00046180397700210985, 'samples': 14533632, 'steps': 28385, 'loss/train': 0.20897673070430756} +03/04/2022 22:48:49 - INFO - codeparrot_training - Skipping example with length 557 (seq_length=1024) +03/04/2022 22:48:52 - INFO - codeparrot_training - Step 28386: {'lr': 0.0004618011577532368, 'samples': 14534144, 'steps': 28386, 'loss/train': 2.137322187423706} +03/04/2022 22:48:55 - INFO - codeparrot_training - Step 28387: {'lr': 0.0004617983384089295, 'samples': 14534656, 'steps': 28387, 'loss/train': 2.148057222366333} +03/04/2022 22:48:57 - INFO - codeparrot_training - Skipping example with length 415 (seq_length=1024) +03/04/2022 22:49:00 - INFO - codeparrot_training - Step 28388: {'lr': 0.00046179551896918916, 'samples': 14535168, 'steps': 28388, 'loss/train': 1.7843319177627563} +03/04/2022 22:49:04 - INFO - codeparrot_training - Step 28389: {'lr': 0.00046179269943401693, 'samples': 14535680, 'steps': 28389, 'loss/train': 2.668386936187744} +03/04/2022 22:49:06 - INFO - codeparrot_training - Skipping example with length 560 (seq_length=1024) +03/04/2022 22:49:09 - INFO - codeparrot_training - Step 28390: {'lr': 0.00046178987980341414, 'samples': 14536192, 'steps': 28390, 'loss/train': 1.7163957357406616} +03/04/2022 22:49:12 - INFO - codeparrot_training - Step 28391: {'lr': 0.00046178706007738227, 'samples': 14536704, 'steps': 28391, 'loss/train': 2.1705682277679443} +03/04/2022 22:49:14 - INFO - codeparrot_training - Skipping example with length 331 (seq_length=1024) +03/04/2022 22:49:17 - INFO - codeparrot_training - Step 28392: {'lr': 0.0004617842402559223, 'samples': 14537216, 'steps': 28392, 'loss/train': 1.45749831199646} +03/04/2022 22:49:20 - INFO - codeparrot_training - Step 28393: {'lr': 0.0004617814203390356, 'samples': 14537728, 'steps': 28393, 'loss/train': 2.0853567123413086} +03/04/2022 22:49:22 - INFO - codeparrot_training - Skipping example with length 958 (seq_length=1024) +03/04/2022 22:49:26 - INFO - codeparrot_training - Step 28394: {'lr': 0.0004617786003267235, 'samples': 14538240, 'steps': 28394, 'loss/train': 1.7724525928497314} +03/04/2022 22:49:29 - INFO - codeparrot_training - Step 28395: {'lr': 0.00046177578021898717, 'samples': 14538752, 'steps': 28395, 'loss/train': 2.6610617637634277} +03/04/2022 22:49:31 - INFO - codeparrot_training - Skipping example with length 128 (seq_length=1024) +03/04/2022 22:49:34 - INFO - codeparrot_training - Step 28396: {'lr': 0.000461772960015828, 'samples': 14539264, 'steps': 28396, 'loss/train': 0.30445003509521484} +03/04/2022 22:49:37 - INFO - codeparrot_training - Step 28397: {'lr': 0.00046177013971724723, 'samples': 14539776, 'steps': 28397, 'loss/train': 2.1253702640533447} +03/04/2022 22:49:40 - INFO - codeparrot_training - Skipping example with length 75 (seq_length=1024) +03/04/2022 22:49:43 - INFO - codeparrot_training - Step 28398: {'lr': 0.00046176731932324604, 'samples': 14540288, 'steps': 28398, 'loss/train': 2.1879992485046387} +03/04/2022 22:49:46 - INFO - codeparrot_training - Step 28399: {'lr': 0.0004617644988338258, 'samples': 14540800, 'steps': 28399, 'loss/train': 1.6910182237625122} +03/04/2022 22:49:48 - INFO - codeparrot_training - Skipping example with length 815 (seq_length=1024) +03/04/2022 22:49:51 - INFO - codeparrot_training - Step 28400: {'lr': 0.0004617616782489877, 'samples': 14541312, 'steps': 28400, 'loss/train': 1.452311635017395} +03/04/2022 22:49:54 - INFO - codeparrot_training - Step 28401: {'lr': 0.00046175885756873314, 'samples': 14541824, 'steps': 28401, 'loss/train': 1.4132397174835205} +03/04/2022 22:49:56 - INFO - codeparrot_training - Skipping example with length 223 (seq_length=1024) +03/04/2022 22:49:59 - INFO - codeparrot_training - Step 28402: {'lr': 0.00046175603679306324, 'samples': 14542336, 'steps': 28402, 'loss/train': 1.4782369136810303} +03/04/2022 22:50:03 - INFO - codeparrot_training - Step 28403: {'lr': 0.0004617532159219794, 'samples': 14542848, 'steps': 28403, 'loss/train': 1.411097764968872} +03/04/2022 22:50:04 - INFO - codeparrot_training - Skipping example with length 191 (seq_length=1024) +03/04/2022 22:50:08 - INFO - codeparrot_training - Step 28404: {'lr': 0.0004617503949554828, 'samples': 14543360, 'steps': 28404, 'loss/train': 1.7750134468078613} +03/04/2022 22:50:11 - INFO - codeparrot_training - Step 28405: {'lr': 0.0004617475738935747, 'samples': 14543872, 'steps': 28405, 'loss/train': 2.292668342590332} +03/04/2022 22:50:12 - INFO - codeparrot_training - Skipping example with length 851 (seq_length=1024) +03/04/2022 22:50:16 - INFO - codeparrot_training - Step 28406: {'lr': 0.0004617447527362564, 'samples': 14544384, 'steps': 28406, 'loss/train': 1.2514328956604004} +03/04/2022 22:50:19 - INFO - codeparrot_training - Step 28407: {'lr': 0.00046174193148352914, 'samples': 14544896, 'steps': 28407, 'loss/train': 0.8554359078407288} +03/04/2022 22:50:21 - INFO - codeparrot_training - Skipping example with length 478 (seq_length=1024) +03/04/2022 22:50:25 - INFO - codeparrot_training - Step 28408: {'lr': 0.00046173911013539437, 'samples': 14545408, 'steps': 28408, 'loss/train': 0.1482871174812317} +03/04/2022 22:50:28 - INFO - codeparrot_training - Step 28409: {'lr': 0.0004617362886918531, 'samples': 14545920, 'steps': 28409, 'loss/train': 1.4405052661895752} +03/04/2022 22:50:29 - INFO - codeparrot_training - Skipping example with length 153 (seq_length=1024) +03/04/2022 22:50:33 - INFO - codeparrot_training - Step 28410: {'lr': 0.0004617334671529069, 'samples': 14546432, 'steps': 28410, 'loss/train': 1.8034029006958008} +03/04/2022 22:50:36 - INFO - codeparrot_training - Step 28411: {'lr': 0.0004617306455185567, 'samples': 14546944, 'steps': 28411, 'loss/train': 1.5738331079483032} +03/04/2022 22:50:37 - INFO - codeparrot_training - Skipping example with length 669 (seq_length=1024) +03/04/2022 22:50:41 - INFO - codeparrot_training - Step 28412: {'lr': 0.00046172782378880404, 'samples': 14547456, 'steps': 28412, 'loss/train': 1.3957966566085815} +03/04/2022 22:50:45 - INFO - codeparrot_training - Step 28413: {'lr': 0.00046172500196364996, 'samples': 14547968, 'steps': 28413, 'loss/train': 1.7327498197555542} +03/04/2022 22:50:46 - INFO - codeparrot_training - Skipping example with length 227 (seq_length=1024) +03/04/2022 22:50:50 - INFO - codeparrot_training - Step 28414: {'lr': 0.000461722180043096, 'samples': 14548480, 'steps': 28414, 'loss/train': 1.4061049222946167} +03/04/2022 22:50:53 - INFO - codeparrot_training - Step 28415: {'lr': 0.0004617193580271433, 'samples': 14548992, 'steps': 28415, 'loss/train': 1.9827089309692383} +03/04/2022 22:50:54 - INFO - codeparrot_training - Skipping example with length 44 (seq_length=1024) +03/04/2022 22:50:58 - INFO - codeparrot_training - Step 28416: {'lr': 0.000461716535915793, 'samples': 14549504, 'steps': 28416, 'loss/train': 2.591987133026123} +03/04/2022 22:51:01 - INFO - codeparrot_training - Step 28417: {'lr': 0.0004617137137090466, 'samples': 14550016, 'steps': 28417, 'loss/train': 1.0727282762527466} +03/04/2022 22:51:02 - INFO - codeparrot_training - Skipping example with length 138 (seq_length=1024) +03/04/2022 22:51:07 - INFO - codeparrot_training - Step 28418: {'lr': 0.0004617108914069052, 'samples': 14550528, 'steps': 28418, 'loss/train': 1.788729190826416} +03/04/2022 22:51:10 - INFO - codeparrot_training - Step 28419: {'lr': 0.0004617080690093701, 'samples': 14551040, 'steps': 28419, 'loss/train': 1.5807162523269653} +03/04/2022 22:51:10 - INFO - codeparrot_training - Skipping example with length 998 (seq_length=1024) +03/04/2022 22:51:15 - INFO - codeparrot_training - Step 28420: {'lr': 0.00046170524651644276, 'samples': 14551552, 'steps': 28420, 'loss/train': 2.2590601444244385} +03/04/2022 22:51:18 - INFO - codeparrot_training - Step 28421: {'lr': 0.00046170242392812425, 'samples': 14552064, 'steps': 28421, 'loss/train': 1.6716374158859253} +03/04/2022 22:51:18 - INFO - codeparrot_training - Skipping example with length 239 (seq_length=1024) +03/04/2022 22:51:23 - INFO - codeparrot_training - Step 28422: {'lr': 0.0004616996012444158, 'samples': 14552576, 'steps': 28422, 'loss/train': 2.3578341007232666} +03/04/2022 22:51:27 - INFO - codeparrot_training - Step 28423: {'lr': 0.00046169677846531884, 'samples': 14553088, 'steps': 28423, 'loss/train': 2.3878164291381836} +03/04/2022 22:51:27 - INFO - codeparrot_training - Skipping example with length 10 (seq_length=1024) +03/04/2022 22:51:32 - INFO - codeparrot_training - Step 28424: {'lr': 0.0004616939555908346, 'samples': 14553600, 'steps': 28424, 'loss/train': 1.8753529787063599} +03/04/2022 22:51:35 - INFO - codeparrot_training - Step 28425: {'lr': 0.0004616911326209643, 'samples': 14554112, 'steps': 28425, 'loss/train': 2.1106035709381104} +03/04/2022 22:51:35 - INFO - codeparrot_training - Skipping example with length 172 (seq_length=1024) +03/04/2022 22:51:40 - INFO - codeparrot_training - Step 28426: {'lr': 0.0004616883095557092, 'samples': 14554624, 'steps': 28426, 'loss/train': 2.177588701248169} +03/04/2022 22:51:43 - INFO - codeparrot_training - Step 28427: {'lr': 0.0004616854863950707, 'samples': 14555136, 'steps': 28427, 'loss/train': 1.2556391954421997} +03/04/2022 22:51:44 - INFO - codeparrot_training - Skipping example with length 823 (seq_length=1024) +03/04/2022 22:51:49 - INFO - codeparrot_training - Step 28428: {'lr': 0.00046168266313904995, 'samples': 14555648, 'steps': 28428, 'loss/train': 0.1952565759420395} +03/04/2022 22:51:52 - INFO - codeparrot_training - Skipping example with length 662 (seq_length=1024) +03/04/2022 22:51:54 - INFO - codeparrot_training - Step 28429: {'lr': 0.00046167983978764827, 'samples': 14556160, 'steps': 28429, 'loss/train': 1.4494187831878662} +03/04/2022 22:51:57 - INFO - codeparrot_training - Step 28430: {'lr': 0.0004616770163408669, 'samples': 14556672, 'steps': 28430, 'loss/train': 1.780761480331421} +03/04/2022 22:52:00 - INFO - codeparrot_training - Skipping example with length 519 (seq_length=1024) +03/04/2022 22:52:02 - INFO - codeparrot_training - Step 28431: {'lr': 0.00046167419279870715, 'samples': 14557184, 'steps': 28431, 'loss/train': 1.9864543676376343} +03/04/2022 22:52:06 - INFO - codeparrot_training - Step 28432: {'lr': 0.00046167136916117025, 'samples': 14557696, 'steps': 28432, 'loss/train': 0.8525907397270203} +03/04/2022 22:52:08 - INFO - codeparrot_training - Skipping example with length 921 (seq_length=1024) +03/04/2022 22:52:11 - INFO - codeparrot_training - Step 28433: {'lr': 0.00046166854542825756, 'samples': 14558208, 'steps': 28433, 'loss/train': 1.578946828842163} +03/04/2022 22:52:14 - INFO - codeparrot_training - Step 28434: {'lr': 0.0004616657215999702, 'samples': 14558720, 'steps': 28434, 'loss/train': 1.2968952655792236} +03/04/2022 22:52:16 - INFO - codeparrot_training - Skipping example with length 579 (seq_length=1024) +03/04/2022 22:52:19 - INFO - codeparrot_training - Step 28435: {'lr': 0.0004616628976763096, 'samples': 14559232, 'steps': 28435, 'loss/train': 1.6871817111968994} +03/04/2022 22:52:22 - INFO - codeparrot_training - Step 28436: {'lr': 0.0004616600736572769, 'samples': 14559744, 'steps': 28436, 'loss/train': 2.314330577850342} +03/04/2022 22:52:24 - INFO - codeparrot_training - Skipping example with length 172 (seq_length=1024) +03/04/2022 22:52:28 - INFO - codeparrot_training - Step 28437: {'lr': 0.0004616572495428735, 'samples': 14560256, 'steps': 28437, 'loss/train': 1.055090308189392} +03/04/2022 22:52:31 - INFO - codeparrot_training - Step 28438: {'lr': 0.0004616544253331006, 'samples': 14560768, 'steps': 28438, 'loss/train': 1.1708316802978516} +03/04/2022 22:52:33 - INFO - codeparrot_training - Skipping example with length 449 (seq_length=1024) +03/04/2022 22:52:36 - INFO - codeparrot_training - Step 28439: {'lr': 0.00046165160102795943, 'samples': 14561280, 'steps': 28439, 'loss/train': 1.9060810804367065} +03/04/2022 22:52:39 - INFO - codeparrot_training - Step 28440: {'lr': 0.0004616487766274514, 'samples': 14561792, 'steps': 28440, 'loss/train': 1.8800501823425293} +03/04/2022 22:52:41 - INFO - codeparrot_training - Skipping example with length 976 (seq_length=1024) +03/04/2022 22:52:45 - INFO - codeparrot_training - Step 28441: {'lr': 0.0004616459521315777, 'samples': 14562304, 'steps': 28441, 'loss/train': 1.5542160272598267} +03/04/2022 22:52:48 - INFO - codeparrot_training - Step 28442: {'lr': 0.0004616431275403395, 'samples': 14562816, 'steps': 28442, 'loss/train': 0.7190385460853577} +03/04/2022 22:52:49 - INFO - codeparrot_training - Skipping example with length 745 (seq_length=1024) +03/04/2022 22:52:53 - INFO - codeparrot_training - Step 28443: {'lr': 0.0004616403028537382, 'samples': 14563328, 'steps': 28443, 'loss/train': 1.2053087949752808} +03/04/2022 22:52:56 - INFO - codeparrot_training - Step 28444: {'lr': 0.0004616374780717751, 'samples': 14563840, 'steps': 28444, 'loss/train': 2.3801608085632324} +03/04/2022 22:52:58 - INFO - codeparrot_training - Skipping example with length 978 (seq_length=1024) +03/04/2022 22:53:01 - INFO - codeparrot_training - Step 28445: {'lr': 0.0004616346531944514, 'samples': 14564352, 'steps': 28445, 'loss/train': 1.797059178352356} +03/04/2022 22:53:05 - INFO - codeparrot_training - Step 28446: {'lr': 0.00046163182822176835, 'samples': 14564864, 'steps': 28446, 'loss/train': 1.571852445602417} +03/04/2022 22:53:06 - INFO - codeparrot_training - Skipping example with length 310 (seq_length=1024) +03/04/2022 22:53:10 - INFO - codeparrot_training - Step 28447: {'lr': 0.0004616290031537273, 'samples': 14565376, 'steps': 28447, 'loss/train': 1.5171570777893066} +03/04/2022 22:53:13 - INFO - codeparrot_training - Step 28448: {'lr': 0.0004616261779903295, 'samples': 14565888, 'steps': 28448, 'loss/train': 1.0717830657958984} +03/04/2022 22:53:14 - INFO - codeparrot_training - Skipping example with length 828 (seq_length=1024) +03/04/2022 22:53:18 - INFO - codeparrot_training - Step 28449: {'lr': 0.0004616233527315762, 'samples': 14566400, 'steps': 28449, 'loss/train': 1.2439640760421753} +03/04/2022 22:53:21 - INFO - codeparrot_training - Step 28450: {'lr': 0.0004616205273774686, 'samples': 14566912, 'steps': 28450, 'loss/train': 2.185582160949707} +03/04/2022 22:53:22 - INFO - codeparrot_training - Skipping example with length 984 (seq_length=1024) +03/04/2022 22:53:27 - INFO - codeparrot_training - Step 28451: {'lr': 0.00046161770192800817, 'samples': 14567424, 'steps': 28451, 'loss/train': 1.9288722276687622} +03/04/2022 22:53:30 - INFO - codeparrot_training - Step 28452: {'lr': 0.000461614876383196, 'samples': 14567936, 'steps': 28452, 'loss/train': 2.313890218734741} +03/04/2022 22:53:30 - INFO - codeparrot_training - Skipping example with length 604 (seq_length=1024) +03/04/2022 22:53:35 - INFO - codeparrot_training - Step 28453: {'lr': 0.0004616120507430335, 'samples': 14568448, 'steps': 28453, 'loss/train': 1.4159350395202637} +03/04/2022 22:53:38 - INFO - codeparrot_training - Step 28454: {'lr': 0.00046160922500752176, 'samples': 14568960, 'steps': 28454, 'loss/train': 1.677953839302063} +03/04/2022 22:53:38 - INFO - codeparrot_training - Skipping example with length 312 (seq_length=1024) +03/04/2022 22:53:43 - INFO - codeparrot_training - Step 28455: {'lr': 0.0004616063991766623, 'samples': 14569472, 'steps': 28455, 'loss/train': 0.6012064218521118} +03/04/2022 22:53:47 - INFO - codeparrot_training - Step 28456: {'lr': 0.0004616035732504562, 'samples': 14569984, 'steps': 28456, 'loss/train': 2.0722954273223877} +03/04/2022 22:53:47 - INFO - codeparrot_training - Skipping example with length 253 (seq_length=1024) +03/04/2022 22:53:52 - INFO - codeparrot_training - Step 28457: {'lr': 0.0004616007472289048, 'samples': 14570496, 'steps': 28457, 'loss/train': 1.4919414520263672} +03/04/2022 22:53:55 - INFO - codeparrot_training - Step 28458: {'lr': 0.00046159792111200937, 'samples': 14571008, 'steps': 28458, 'loss/train': 1.0324054956436157} +03/04/2022 22:53:55 - INFO - codeparrot_training - Skipping example with length 259 (seq_length=1024) +03/04/2022 22:54:00 - INFO - codeparrot_training - Step 28459: {'lr': 0.0004615950948997711, 'samples': 14571520, 'steps': 28459, 'loss/train': 2.5812346935272217} +03/04/2022 22:54:03 - INFO - codeparrot_training - Step 28460: {'lr': 0.0004615922685921915, 'samples': 14572032, 'steps': 28460, 'loss/train': 0.8905381560325623} +03/04/2022 22:54:03 - INFO - codeparrot_training - Skipping example with length 370 (seq_length=1024) +03/04/2022 22:54:09 - INFO - codeparrot_training - Step 28461: {'lr': 0.0004615894421892716, 'samples': 14572544, 'steps': 28461, 'loss/train': 1.9900462627410889} +03/04/2022 22:54:12 - INFO - codeparrot_training - Step 28462: {'lr': 0.0004615866156910128, 'samples': 14573056, 'steps': 28462, 'loss/train': 1.9953563213348389} +03/04/2022 22:54:12 - INFO - codeparrot_training - Skipping example with length 481 (seq_length=1024) +03/04/2022 22:54:17 - INFO - codeparrot_training - Step 28463: {'lr': 0.00046158378909741626, 'samples': 14573568, 'steps': 28463, 'loss/train': 1.8494478464126587} +03/04/2022 22:54:20 - INFO - codeparrot_training - Step 28464: {'lr': 0.00046158096240848343, 'samples': 14574080, 'steps': 28464, 'loss/train': 0.9831640720367432} +03/04/2022 22:54:20 - INFO - codeparrot_training - Skipping example with length 220 (seq_length=1024) +03/04/2022 22:54:26 - INFO - codeparrot_training - Step 28465: {'lr': 0.00046157813562421545, 'samples': 14574592, 'steps': 28465, 'loss/train': 1.8662008047103882} +03/04/2022 22:54:29 - INFO - codeparrot_training - Step 28466: {'lr': 0.0004615753087446136, 'samples': 14575104, 'steps': 28466, 'loss/train': 2.0512847900390625} +03/04/2022 22:54:29 - INFO - codeparrot_training - Skipping example with length 899 (seq_length=1024) +03/04/2022 22:54:34 - INFO - codeparrot_training - Step 28467: {'lr': 0.00046157248176967915, 'samples': 14575616, 'steps': 28467, 'loss/train': 2.474247694015503} +03/04/2022 22:54:38 - INFO - codeparrot_training - Step 28468: {'lr': 0.0004615696546994135, 'samples': 14576128, 'steps': 28468, 'loss/train': 1.91470205783844} +03/04/2022 22:54:39 - INFO - codeparrot_training - Skipping example with length 653 (seq_length=1024) +03/04/2022 22:54:43 - INFO - codeparrot_training - Step 28469: {'lr': 0.00046156682753381774, 'samples': 14576640, 'steps': 28469, 'loss/train': 1.0590317249298096} +03/04/2022 22:54:46 - INFO - codeparrot_training - Step 28470: {'lr': 0.0004615640002728932, 'samples': 14577152, 'steps': 28470, 'loss/train': 1.599726915359497} +03/04/2022 22:54:47 - INFO - codeparrot_training - Skipping example with length 132 (seq_length=1024) +03/04/2022 22:54:51 - INFO - codeparrot_training - Step 28471: {'lr': 0.00046156117291664133, 'samples': 14577664, 'steps': 28471, 'loss/train': 1.7394338846206665} +03/04/2022 22:54:54 - INFO - codeparrot_training - Step 28472: {'lr': 0.0004615583454650632, 'samples': 14578176, 'steps': 28472, 'loss/train': 2.329432487487793} +03/04/2022 22:54:55 - INFO - codeparrot_training - Skipping example with length 352 (seq_length=1024) +03/04/2022 22:55:00 - INFO - codeparrot_training - Step 28473: {'lr': 0.00046155551791816007, 'samples': 14578688, 'steps': 28473, 'loss/train': 1.5264135599136353} +03/04/2022 22:55:03 - INFO - codeparrot_training - Step 28474: {'lr': 0.00046155269027593337, 'samples': 14579200, 'steps': 28474, 'loss/train': 2.007673740386963} +03/04/2022 22:55:03 - INFO - codeparrot_training - Skipping example with length 412 (seq_length=1024) +03/04/2022 22:55:08 - INFO - codeparrot_training - Step 28475: {'lr': 0.00046154986253838426, 'samples': 14579712, 'steps': 28475, 'loss/train': 1.7036206722259521} +03/04/2022 22:55:11 - INFO - codeparrot_training - Step 28476: {'lr': 0.00046154703470551405, 'samples': 14580224, 'steps': 28476, 'loss/train': 2.1684207916259766} +03/04/2022 22:55:12 - INFO - codeparrot_training - Skipping example with length 817 (seq_length=1024) +03/04/2022 22:55:17 - INFO - codeparrot_training - Step 28477: {'lr': 0.000461544206777324, 'samples': 14580736, 'steps': 28477, 'loss/train': 2.1722216606140137} +03/04/2022 22:55:20 - INFO - codeparrot_training - Step 28478: {'lr': 0.00046154137875381547, 'samples': 14581248, 'steps': 28478, 'loss/train': 1.5086965560913086} +03/04/2022 22:55:20 - INFO - codeparrot_training - Skipping example with length 583 (seq_length=1024) +03/04/2022 22:55:25 - INFO - codeparrot_training - Step 28479: {'lr': 0.00046153855063498964, 'samples': 14581760, 'steps': 28479, 'loss/train': 1.878096580505371} +03/04/2022 22:55:28 - INFO - codeparrot_training - Step 28480: {'lr': 0.00046153572242084776, 'samples': 14582272, 'steps': 28480, 'loss/train': 2.5475211143493652} +03/04/2022 22:55:28 - INFO - codeparrot_training - Skipping example with length 198 (seq_length=1024) +03/04/2022 22:55:33 - INFO - codeparrot_training - Step 28481: {'lr': 0.0004615328941113911, 'samples': 14582784, 'steps': 28481, 'loss/train': 1.6084961891174316} +03/04/2022 22:55:36 - INFO - codeparrot_training - Skipping example with length 572 (seq_length=1024) +03/04/2022 22:55:39 - INFO - codeparrot_training - Step 28482: {'lr': 0.00046153006570662106, 'samples': 14583296, 'steps': 28482, 'loss/train': 1.972991943359375} +03/04/2022 22:55:42 - INFO - codeparrot_training - Step 28483: {'lr': 0.0004615272372065388, 'samples': 14583808, 'steps': 28483, 'loss/train': 1.9568727016448975} +03/04/2022 22:55:44 - INFO - codeparrot_training - Skipping example with length 308 (seq_length=1024) +03/04/2022 22:55:47 - INFO - codeparrot_training - Step 28484: {'lr': 0.0004615244086111456, 'samples': 14584320, 'steps': 28484, 'loss/train': 1.4594602584838867} +03/04/2022 22:55:50 - INFO - codeparrot_training - Step 28485: {'lr': 0.00046152157992044283, 'samples': 14584832, 'steps': 28485, 'loss/train': 2.522332191467285} +03/04/2022 22:55:53 - INFO - codeparrot_training - Skipping example with length 772 (seq_length=1024) +03/04/2022 22:55:55 - INFO - codeparrot_training - Step 28486: {'lr': 0.0004615187511344316, 'samples': 14585344, 'steps': 28486, 'loss/train': 0.589413583278656} +03/04/2022 22:55:59 - INFO - codeparrot_training - Step 28487: {'lr': 0.00046151592225311347, 'samples': 14585856, 'steps': 28487, 'loss/train': 1.6642252206802368} +03/04/2022 22:56:01 - INFO - codeparrot_training - Skipping example with length 770 (seq_length=1024) +03/04/2022 22:56:04 - INFO - codeparrot_training - Step 28488: {'lr': 0.0004615130932764894, 'samples': 14586368, 'steps': 28488, 'loss/train': 1.789724349975586} +03/04/2022 22:56:07 - INFO - codeparrot_training - Step 28489: {'lr': 0.0004615102642045608, 'samples': 14586880, 'steps': 28489, 'loss/train': 1.284641146659851} +03/04/2022 22:56:09 - INFO - codeparrot_training - Skipping example with length 426 (seq_length=1024) +03/04/2022 22:56:12 - INFO - codeparrot_training - Step 28490: {'lr': 0.00046150743503732897, 'samples': 14587392, 'steps': 28490, 'loss/train': 1.8705921173095703} +03/04/2022 22:56:15 - INFO - codeparrot_training - Step 28491: {'lr': 0.0004615046057747951, 'samples': 14587904, 'steps': 28491, 'loss/train': 2.8348071575164795} +03/04/2022 22:56:18 - INFO - codeparrot_training - Skipping example with length 1008 (seq_length=1024) +03/04/2022 22:56:21 - INFO - codeparrot_training - Step 28492: {'lr': 0.0004615017764169606, 'samples': 14588416, 'steps': 28492, 'loss/train': 0.3164556622505188} +03/04/2022 22:56:24 - INFO - codeparrot_training - Step 28493: {'lr': 0.00046149894696382655, 'samples': 14588928, 'steps': 28493, 'loss/train': 1.3121024370193481} +03/04/2022 22:56:27 - INFO - codeparrot_training - Skipping example with length 412 (seq_length=1024) +03/04/2022 22:56:29 - INFO - codeparrot_training - Step 28494: {'lr': 0.00046149611741539445, 'samples': 14589440, 'steps': 28494, 'loss/train': 1.538524866104126} +03/04/2022 22:56:32 - INFO - codeparrot_training - Step 28495: {'lr': 0.00046149328777166543, 'samples': 14589952, 'steps': 28495, 'loss/train': 1.417362928390503} +03/04/2022 22:56:35 - INFO - codeparrot_training - Skipping example with length 371 (seq_length=1024) +03/04/2022 22:56:38 - INFO - codeparrot_training - Step 28496: {'lr': 0.0004614904580326408, 'samples': 14590464, 'steps': 28496, 'loss/train': 1.5697916746139526} +03/04/2022 22:56:41 - INFO - codeparrot_training - Step 28497: {'lr': 0.0004614876281983218, 'samples': 14590976, 'steps': 28497, 'loss/train': 1.3986625671386719} +03/04/2022 22:56:43 - INFO - codeparrot_training - Skipping example with length 347 (seq_length=1024) +03/04/2022 22:56:46 - INFO - codeparrot_training - Step 28498: {'lr': 0.0004614847982687097, 'samples': 14591488, 'steps': 28498, 'loss/train': 1.5352305173873901} +03/04/2022 22:56:49 - INFO - codeparrot_training - Step 28499: {'lr': 0.0004614819682438059, 'samples': 14592000, 'steps': 28499, 'loss/train': 1.2939802408218384} +03/04/2022 22:56:51 - INFO - codeparrot_training - Skipping example with length 816 (seq_length=1024) +03/04/2022 22:56:54 - INFO - codeparrot_training - Step 28500: {'lr': 0.00046147913812361155, 'samples': 14592512, 'steps': 28500, 'loss/train': 1.5529202222824097} +03/04/2022 22:56:58 - INFO - codeparrot_training - Step 28501: {'lr': 0.000461476307908128, 'samples': 14593024, 'steps': 28501, 'loss/train': 1.1584223508834839} +03/04/2022 22:56:59 - INFO - codeparrot_training - Skipping example with length 696 (seq_length=1024) +03/04/2022 22:57:03 - INFO - codeparrot_training - Step 28502: {'lr': 0.00046147347759735647, 'samples': 14593536, 'steps': 28502, 'loss/train': 2.0944275856018066} +03/04/2022 22:57:06 - INFO - codeparrot_training - Step 28503: {'lr': 0.00046147064719129823, 'samples': 14594048, 'steps': 28503, 'loss/train': 2.059772491455078} +03/04/2022 22:57:08 - INFO - codeparrot_training - Skipping example with length 990 (seq_length=1024) +03/04/2022 22:57:11 - INFO - codeparrot_training - Step 28504: {'lr': 0.00046146781668995456, 'samples': 14594560, 'steps': 28504, 'loss/train': 1.6675336360931396} +03/04/2022 22:57:14 - INFO - codeparrot_training - Step 28505: {'lr': 0.0004614649860933268, 'samples': 14595072, 'steps': 28505, 'loss/train': 2.645833969116211} +03/04/2022 22:57:16 - INFO - codeparrot_training - Skipping example with length 332 (seq_length=1024) +03/04/2022 22:57:20 - INFO - codeparrot_training - Step 28506: {'lr': 0.0004614621554014162, 'samples': 14595584, 'steps': 28506, 'loss/train': 0.8208023905754089} +03/04/2022 22:57:23 - INFO - codeparrot_training - Step 28507: {'lr': 0.00046145932461422396, 'samples': 14596096, 'steps': 28507, 'loss/train': 1.9489738941192627} +03/04/2022 22:57:25 - INFO - codeparrot_training - Skipping example with length 696 (seq_length=1024) +03/04/2022 22:57:28 - INFO - codeparrot_training - Step 28508: {'lr': 0.00046145649373175145, 'samples': 14596608, 'steps': 28508, 'loss/train': 1.492097020149231} +03/04/2022 22:57:31 - INFO - codeparrot_training - Step 28509: {'lr': 0.0004614536627539999, 'samples': 14597120, 'steps': 28509, 'loss/train': 1.5528852939605713} +03/04/2022 22:57:33 - INFO - codeparrot_training - Skipping example with length 688 (seq_length=1024) +03/04/2022 22:57:37 - INFO - codeparrot_training - Step 28510: {'lr': 0.0004614508316809706, 'samples': 14597632, 'steps': 28510, 'loss/train': 2.003201484680176} +03/04/2022 22:57:40 - INFO - codeparrot_training - Step 28511: {'lr': 0.00046144800051266477, 'samples': 14598144, 'steps': 28511, 'loss/train': 1.5394105911254883} +03/04/2022 22:57:41 - INFO - codeparrot_training - Skipping example with length 300 (seq_length=1024) +03/04/2022 22:57:45 - INFO - codeparrot_training - Step 28512: {'lr': 0.00046144516924908377, 'samples': 14598656, 'steps': 28512, 'loss/train': 1.510074257850647} +03/04/2022 22:57:48 - INFO - codeparrot_training - Step 28513: {'lr': 0.0004614423378902289, 'samples': 14599168, 'steps': 28513, 'loss/train': 1.4555315971374512} +03/04/2022 22:57:50 - INFO - codeparrot_training - Skipping example with length 706 (seq_length=1024) +03/04/2022 22:57:54 - INFO - codeparrot_training - Step 28514: {'lr': 0.0004614395064361013, 'samples': 14599680, 'steps': 28514, 'loss/train': 1.8934745788574219} +03/04/2022 22:57:57 - INFO - codeparrot_training - Step 28515: {'lr': 0.00046143667488670226, 'samples': 14600192, 'steps': 28515, 'loss/train': 2.277827739715576} +03/04/2022 22:57:59 - INFO - codeparrot_training - Skipping example with length 253 (seq_length=1024) +03/04/2022 22:58:02 - INFO - codeparrot_training - Step 28516: {'lr': 0.00046143384324203325, 'samples': 14600704, 'steps': 28516, 'loss/train': 2.4954216480255127} +03/04/2022 22:58:05 - INFO - codeparrot_training - Step 28517: {'lr': 0.00046143101150209533, 'samples': 14601216, 'steps': 28517, 'loss/train': 1.7485748529434204} +03/04/2022 22:58:07 - INFO - codeparrot_training - Skipping example with length 310 (seq_length=1024) +03/04/2022 22:58:11 - INFO - codeparrot_training - Step 28518: {'lr': 0.0004614281796668899, 'samples': 14601728, 'steps': 28518, 'loss/train': 1.8947330713272095} +03/04/2022 22:58:14 - INFO - codeparrot_training - Step 28519: {'lr': 0.0004614253477364182, 'samples': 14602240, 'steps': 28519, 'loss/train': 0.4458111524581909} +03/04/2022 22:58:15 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/04/2022 22:58:19 - INFO - codeparrot_training - Step 28520: {'lr': 0.0004614225157106815, 'samples': 14602752, 'steps': 28520, 'loss/train': 2.479257106781006} +03/04/2022 22:58:22 - INFO - codeparrot_training - Step 28521: {'lr': 0.00046141968358968103, 'samples': 14603264, 'steps': 28521, 'loss/train': 1.9931385517120361} +03/04/2022 22:58:24 - INFO - codeparrot_training - Skipping example with length 840 (seq_length=1024) +03/04/2022 22:58:27 - INFO - codeparrot_training - Step 28522: {'lr': 0.00046141685137341814, 'samples': 14603776, 'steps': 28522, 'loss/train': 1.8274396657943726} +03/04/2022 22:58:30 - INFO - codeparrot_training - Step 28523: {'lr': 0.00046141401906189404, 'samples': 14604288, 'steps': 28523, 'loss/train': 1.5934008359909058} +03/04/2022 22:58:32 - INFO - codeparrot_training - Skipping example with length 691 (seq_length=1024) +03/04/2022 22:58:36 - INFO - codeparrot_training - Step 28524: {'lr': 0.0004614111866551101, 'samples': 14604800, 'steps': 28524, 'loss/train': 1.8763766288757324} +03/04/2022 22:58:39 - INFO - codeparrot_training - Step 28525: {'lr': 0.0004614083541530675, 'samples': 14605312, 'steps': 28525, 'loss/train': 1.0894922018051147} +03/04/2022 22:58:40 - INFO - codeparrot_training - Skipping example with length 602 (seq_length=1024) +03/04/2022 22:58:44 - INFO - codeparrot_training - Step 28526: {'lr': 0.00046140552155576767, 'samples': 14605824, 'steps': 28526, 'loss/train': 2.3152801990509033} +03/04/2022 22:58:47 - INFO - codeparrot_training - Step 28527: {'lr': 0.0004614026888632116, 'samples': 14606336, 'steps': 28527, 'loss/train': 1.2995717525482178} +03/04/2022 22:58:48 - INFO - codeparrot_training - Skipping example with length 405 (seq_length=1024) +03/04/2022 22:58:52 - INFO - codeparrot_training - Step 28528: {'lr': 0.00046139985607540087, 'samples': 14606848, 'steps': 28528, 'loss/train': 1.8299880027770996} +03/04/2022 22:58:56 - INFO - codeparrot_training - Step 28529: {'lr': 0.00046139702319233656, 'samples': 14607360, 'steps': 28529, 'loss/train': 2.5086138248443604} +03/04/2022 22:58:56 - INFO - codeparrot_training - Skipping example with length 160 (seq_length=1024) +03/04/2022 22:59:01 - INFO - codeparrot_training - Step 28530: {'lr': 0.00046139419021402005, 'samples': 14607872, 'steps': 28530, 'loss/train': 1.9696965217590332} +03/04/2022 22:59:04 - INFO - codeparrot_training - Step 28531: {'lr': 0.00046139135714045253, 'samples': 14608384, 'steps': 28531, 'loss/train': 1.411405086517334} +03/04/2022 22:59:05 - INFO - codeparrot_training - Skipping example with length 290 (seq_length=1024) +03/04/2022 22:59:09 - INFO - codeparrot_training - Step 28532: {'lr': 0.00046138852397163547, 'samples': 14608896, 'steps': 28532, 'loss/train': 1.364651083946228} +03/04/2022 22:59:12 - INFO - codeparrot_training - Step 28533: {'lr': 0.00046138569070756984, 'samples': 14609408, 'steps': 28533, 'loss/train': 1.2493674755096436} +03/04/2022 22:59:13 - INFO - codeparrot_training - Skipping example with length 761 (seq_length=1024) +03/04/2022 22:59:18 - INFO - codeparrot_training - Step 28534: {'lr': 0.00046138285734825715, 'samples': 14609920, 'steps': 28534, 'loss/train': 1.67771315574646} +03/04/2022 22:59:21 - INFO - codeparrot_training - Step 28535: {'lr': 0.0004613800238936986, 'samples': 14610432, 'steps': 28535, 'loss/train': 1.2505425214767456} +03/04/2022 22:59:22 - INFO - codeparrot_training - Skipping example with length 136 (seq_length=1024) +03/04/2022 22:59:26 - INFO - codeparrot_training - Step 28536: {'lr': 0.0004613771903438955, 'samples': 14610944, 'steps': 28536, 'loss/train': 2.1148691177368164} +03/04/2022 22:59:29 - INFO - codeparrot_training - Step 28537: {'lr': 0.00046137435669884897, 'samples': 14611456, 'steps': 28537, 'loss/train': 2.1968178749084473} +03/04/2022 22:59:30 - INFO - codeparrot_training - Skipping example with length 431 (seq_length=1024) +03/04/2022 22:59:35 - INFO - codeparrot_training - Step 28538: {'lr': 0.00046137152295856054, 'samples': 14611968, 'steps': 28538, 'loss/train': 1.8504951000213623} +03/04/2022 22:59:38 - INFO - codeparrot_training - Step 28539: {'lr': 0.0004613686891230313, 'samples': 14612480, 'steps': 28539, 'loss/train': 1.7645478248596191} +03/04/2022 22:59:38 - INFO - codeparrot_training - Skipping example with length 514 (seq_length=1024) +03/04/2022 22:59:43 - INFO - codeparrot_training - Step 28540: {'lr': 0.0004613658551922627, 'samples': 14612992, 'steps': 28540, 'loss/train': 1.5802167654037476} +03/04/2022 22:59:46 - INFO - codeparrot_training - Step 28541: {'lr': 0.0004613630211662558, 'samples': 14613504, 'steps': 28541, 'loss/train': 1.40403151512146} +03/04/2022 22:59:46 - INFO - codeparrot_training - Skipping example with length 402 (seq_length=1024) +03/04/2022 22:59:52 - INFO - codeparrot_training - Step 28542: {'lr': 0.00046136018704501203, 'samples': 14614016, 'steps': 28542, 'loss/train': 1.731514573097229} +03/04/2022 22:59:55 - INFO - codeparrot_training - Step 28543: {'lr': 0.00046135735282853263, 'samples': 14614528, 'steps': 28543, 'loss/train': 1.297905445098877} +03/04/2022 22:59:56 - INFO - codeparrot_training - Skipping example with length 830 (seq_length=1024) +03/04/2022 23:00:00 - INFO - codeparrot_training - Step 28544: {'lr': 0.0004613545185168188, 'samples': 14615040, 'steps': 28544, 'loss/train': 0.8428840041160583} +03/04/2022 23:00:04 - INFO - codeparrot_training - Step 28545: {'lr': 0.0004613516841098719, 'samples': 14615552, 'steps': 28545, 'loss/train': 2.143965244293213} +03/04/2022 23:00:05 - INFO - codeparrot_training - Skipping example with length 901 (seq_length=1024) +03/04/2022 23:00:09 - INFO - codeparrot_training - Step 28546: {'lr': 0.0004613488496076933, 'samples': 14616064, 'steps': 28546, 'loss/train': 2.10507869720459} +03/04/2022 23:00:12 - INFO - codeparrot_training - Step 28547: {'lr': 0.00046134601501028404, 'samples': 14616576, 'steps': 28547, 'loss/train': 1.8600928783416748} +03/04/2022 23:00:13 - INFO - codeparrot_training - Skipping example with length 230 (seq_length=1024) +03/04/2022 23:00:17 - INFO - codeparrot_training - Step 28548: {'lr': 0.0004613431803176456, 'samples': 14617088, 'steps': 28548, 'loss/train': 1.8657844066619873} +03/04/2022 23:00:21 - INFO - codeparrot_training - Step 28549: {'lr': 0.00046134034552977924, 'samples': 14617600, 'steps': 28549, 'loss/train': 0.9800437688827515} +03/04/2022 23:00:22 - INFO - codeparrot_training - Skipping example with length 236 (seq_length=1024) +03/04/2022 23:00:26 - INFO - codeparrot_training - Step 28550: {'lr': 0.00046133751064668605, 'samples': 14618112, 'steps': 28550, 'loss/train': 1.6841028928756714} +03/04/2022 23:00:29 - INFO - codeparrot_training - Step 28551: {'lr': 0.0004613346756683675, 'samples': 14618624, 'steps': 28551, 'loss/train': 3.004523754119873} +03/04/2022 23:00:30 - INFO - codeparrot_training - Skipping example with length 975 (seq_length=1024) +03/04/2022 23:00:34 - INFO - codeparrot_training - Step 28552: {'lr': 0.0004613318405948248, 'samples': 14619136, 'steps': 28552, 'loss/train': 1.5826362371444702} +03/04/2022 23:00:37 - INFO - codeparrot_training - Step 28553: {'lr': 0.00046132900542605925, 'samples': 14619648, 'steps': 28553, 'loss/train': 1.356889009475708} +03/04/2022 23:00:39 - INFO - codeparrot_training - Skipping example with length 1021 (seq_length=1024) +03/04/2022 23:00:43 - INFO - codeparrot_training - Step 28554: {'lr': 0.0004613261701620721, 'samples': 14620160, 'steps': 28554, 'loss/train': 1.6440720558166504} +03/04/2022 23:00:46 - INFO - codeparrot_training - Step 28555: {'lr': 0.0004613233348028646, 'samples': 14620672, 'steps': 28555, 'loss/train': 1.8009424209594727} +03/04/2022 23:00:48 - INFO - codeparrot_training - Skipping example with length 469 (seq_length=1024) +03/04/2022 23:00:51 - INFO - codeparrot_training - Step 28556: {'lr': 0.0004613204993484381, 'samples': 14621184, 'steps': 28556, 'loss/train': 1.994805932044983} +03/04/2022 23:00:54 - INFO - codeparrot_training - Step 28557: {'lr': 0.00046131766379879386, 'samples': 14621696, 'steps': 28557, 'loss/train': 1.1962062120437622} +03/04/2022 23:00:56 - INFO - codeparrot_training - Skipping example with length 792 (seq_length=1024) +03/04/2022 23:01:00 - INFO - codeparrot_training - Step 28558: {'lr': 0.0004613148281539331, 'samples': 14622208, 'steps': 28558, 'loss/train': 0.7661890387535095} +03/04/2022 23:01:03 - INFO - codeparrot_training - Step 28559: {'lr': 0.00046131199241385726, 'samples': 14622720, 'steps': 28559, 'loss/train': 1.4039686918258667} +03/04/2022 23:01:05 - INFO - codeparrot_training - Skipping example with length 168 (seq_length=1024) +03/04/2022 23:01:08 - INFO - codeparrot_training - Step 28560: {'lr': 0.0004613091565785673, 'samples': 14623232, 'steps': 28560, 'loss/train': 1.9979979991912842} +03/04/2022 23:01:11 - INFO - codeparrot_training - Step 28561: {'lr': 0.0004613063206480649, 'samples': 14623744, 'steps': 28561, 'loss/train': 2.9927990436553955} +03/04/2022 23:01:13 - INFO - codeparrot_training - Skipping example with length 468 (seq_length=1024) +03/04/2022 23:01:16 - INFO - codeparrot_training - Step 28562: {'lr': 0.000461303484622351, 'samples': 14624256, 'steps': 28562, 'loss/train': 1.378042459487915} +03/04/2022 23:01:19 - INFO - codeparrot_training - Step 28563: {'lr': 0.00046130064850142703, 'samples': 14624768, 'steps': 28563, 'loss/train': 1.649056077003479} +03/04/2022 23:01:21 - INFO - codeparrot_training - Skipping example with length 423 (seq_length=1024) +03/04/2022 23:01:25 - INFO - codeparrot_training - Step 28564: {'lr': 0.0004612978122852942, 'samples': 14625280, 'steps': 28564, 'loss/train': 2.349705696105957} +03/04/2022 23:01:28 - INFO - codeparrot_training - Step 28565: {'lr': 0.000461294975973954, 'samples': 14625792, 'steps': 28565, 'loss/train': 1.9318636655807495} +03/04/2022 23:01:29 - INFO - codeparrot_training - Skipping example with length 624 (seq_length=1024) +03/04/2022 23:01:33 - INFO - codeparrot_training - Step 28566: {'lr': 0.0004612921395674074, 'samples': 14626304, 'steps': 28566, 'loss/train': 1.240950107574463} +03/04/2022 23:01:36 - INFO - codeparrot_training - Step 28567: {'lr': 0.0004612893030656559, 'samples': 14626816, 'steps': 28567, 'loss/train': 1.9817887544631958} +03/04/2022 23:01:38 - INFO - codeparrot_training - Skipping example with length 590 (seq_length=1024) +03/04/2022 23:01:42 - INFO - codeparrot_training - Step 28568: {'lr': 0.0004612864664687007, 'samples': 14627328, 'steps': 28568, 'loss/train': 1.3493638038635254} +03/04/2022 23:01:45 - INFO - codeparrot_training - Step 28569: {'lr': 0.0004612836297765429, 'samples': 14627840, 'steps': 28569, 'loss/train': 1.2660952806472778} +03/04/2022 23:01:46 - INFO - codeparrot_training - Skipping example with length 93 (seq_length=1024) +03/04/2022 23:01:50 - INFO - codeparrot_training - Step 28570: {'lr': 0.00046128079298918414, 'samples': 14628352, 'steps': 28570, 'loss/train': 1.0702208280563354} +03/04/2022 23:01:53 - INFO - codeparrot_training - Step 28571: {'lr': 0.00046127795610662547, 'samples': 14628864, 'steps': 28571, 'loss/train': 1.3752094507217407} +03/04/2022 23:01:54 - INFO - codeparrot_training - Skipping example with length 366 (seq_length=1024) +03/04/2022 23:01:58 - INFO - codeparrot_training - Step 28572: {'lr': 0.0004612751191288682, 'samples': 14629376, 'steps': 28572, 'loss/train': 0.5533729791641235} +03/04/2022 23:02:01 - INFO - codeparrot_training - Step 28573: {'lr': 0.00046127228205591366, 'samples': 14629888, 'steps': 28573, 'loss/train': 2.1714673042297363} +03/04/2022 23:02:02 - INFO - codeparrot_training - Skipping example with length 106 (seq_length=1024) +03/04/2022 23:02:07 - INFO - codeparrot_training - Step 28574: {'lr': 0.0004612694448877631, 'samples': 14630400, 'steps': 28574, 'loss/train': 2.225487232208252} +03/04/2022 23:02:10 - INFO - codeparrot_training - Step 28575: {'lr': 0.00046126660762441774, 'samples': 14630912, 'steps': 28575, 'loss/train': 1.105965256690979} +03/04/2022 23:02:11 - INFO - codeparrot_training - Skipping example with length 653 (seq_length=1024) +03/04/2022 23:02:15 - INFO - codeparrot_training - Step 28576: {'lr': 0.00046126377026587897, 'samples': 14631424, 'steps': 28576, 'loss/train': 1.7449263334274292} +03/04/2022 23:02:18 - INFO - codeparrot_training - Step 28577: {'lr': 0.0004612609328121479, 'samples': 14631936, 'steps': 28577, 'loss/train': 1.8084131479263306} +03/04/2022 23:02:19 - INFO - codeparrot_training - Skipping example with length 18 (seq_length=1024) +03/04/2022 23:02:23 - INFO - codeparrot_training - Step 28578: {'lr': 0.000461258095263226, 'samples': 14632448, 'steps': 28578, 'loss/train': 1.202929139137268} +03/04/2022 23:02:27 - INFO - codeparrot_training - Step 28579: {'lr': 0.00046125525761911445, 'samples': 14632960, 'steps': 28579, 'loss/train': 2.0499606132507324} +03/04/2022 23:02:27 - INFO - codeparrot_training - Skipping example with length 268 (seq_length=1024) +03/04/2022 23:02:32 - INFO - codeparrot_training - Step 28580: {'lr': 0.00046125241987981445, 'samples': 14633472, 'steps': 28580, 'loss/train': 2.564877510070801} +03/04/2022 23:02:35 - INFO - codeparrot_training - Step 28581: {'lr': 0.0004612495820453275, 'samples': 14633984, 'steps': 28581, 'loss/train': 1.6800137758255005} +03/04/2022 23:02:36 - INFO - codeparrot_training - Skipping example with length 827 (seq_length=1024) +03/04/2022 23:02:40 - INFO - codeparrot_training - Step 28582: {'lr': 0.0004612467441156547, 'samples': 14634496, 'steps': 28582, 'loss/train': 2.3042914867401123} +03/04/2022 23:02:44 - INFO - codeparrot_training - Step 28583: {'lr': 0.00046124390609079735, 'samples': 14635008, 'steps': 28583, 'loss/train': 1.3925615549087524} +03/04/2022 23:02:44 - INFO - codeparrot_training - Skipping example with length 400 (seq_length=1024) +03/04/2022 23:02:50 - INFO - codeparrot_training - Step 28584: {'lr': 0.00046124106797075683, 'samples': 14635520, 'steps': 28584, 'loss/train': 1.453824758529663} +03/04/2022 23:02:53 - INFO - codeparrot_training - Step 28585: {'lr': 0.00046123822975553425, 'samples': 14636032, 'steps': 28585, 'loss/train': 1.31509268283844} +03/04/2022 23:02:55 - INFO - codeparrot_training - Skipping example with length 936 (seq_length=1024) +03/04/2022 23:02:58 - INFO - codeparrot_training - Step 28586: {'lr': 0.00046123539144513103, 'samples': 14636544, 'steps': 28586, 'loss/train': 1.5003575086593628} +03/04/2022 23:03:01 - INFO - codeparrot_training - Step 28587: {'lr': 0.00046123255303954835, 'samples': 14637056, 'steps': 28587, 'loss/train': 1.305431842803955} +03/04/2022 23:03:04 - INFO - codeparrot_training - Skipping example with length 194 (seq_length=1024) +03/04/2022 23:03:06 - INFO - codeparrot_training - Step 28588: {'lr': 0.0004612297145387876, 'samples': 14637568, 'steps': 28588, 'loss/train': 2.311103343963623} +03/04/2022 23:03:10 - INFO - codeparrot_training - Step 28589: {'lr': 0.00046122687594285, 'samples': 14638080, 'steps': 28589, 'loss/train': 1.3941694498062134} +03/04/2022 23:03:12 - INFO - codeparrot_training - Skipping example with length 743 (seq_length=1024) +03/04/2022 23:03:15 - INFO - codeparrot_training - Step 28590: {'lr': 0.0004612240372517368, 'samples': 14638592, 'steps': 28590, 'loss/train': 1.9809764623641968} +03/04/2022 23:03:18 - INFO - codeparrot_training - Step 28591: {'lr': 0.00046122119846544936, 'samples': 14639104, 'steps': 28591, 'loss/train': 1.8237049579620361} +03/04/2022 23:03:21 - INFO - codeparrot_training - Skipping example with length 268 (seq_length=1024) +03/04/2022 23:03:23 - INFO - codeparrot_training - Step 28592: {'lr': 0.00046121835958398883, 'samples': 14639616, 'steps': 28592, 'loss/train': 1.5348312854766846} +03/04/2022 23:03:26 - INFO - codeparrot_training - Step 28593: {'lr': 0.0004612155206073566, 'samples': 14640128, 'steps': 28593, 'loss/train': 2.1318936347961426} +03/04/2022 23:03:29 - INFO - codeparrot_training - Skipping example with length 838 (seq_length=1024) +03/04/2022 23:03:32 - INFO - codeparrot_training - Step 28594: {'lr': 0.000461212681535554, 'samples': 14640640, 'steps': 28594, 'loss/train': 1.4095181226730347} +03/04/2022 23:03:35 - INFO - codeparrot_training - Step 28595: {'lr': 0.0004612098423685821, 'samples': 14641152, 'steps': 28595, 'loss/train': 1.649467945098877} +03/04/2022 23:03:37 - INFO - codeparrot_training - Skipping example with length 312 (seq_length=1024) +03/04/2022 23:03:40 - INFO - codeparrot_training - Step 28596: {'lr': 0.0004612070031064424, 'samples': 14641664, 'steps': 28596, 'loss/train': 1.4351922273635864} +03/04/2022 23:03:43 - INFO - codeparrot_training - Step 28597: {'lr': 0.000461204163749136, 'samples': 14642176, 'steps': 28597, 'loss/train': 2.1127779483795166} +03/04/2022 23:03:46 - INFO - codeparrot_training - Skipping example with length 788 (seq_length=1024) +03/04/2022 23:03:49 - INFO - codeparrot_training - Step 28598: {'lr': 0.0004612013242966643, 'samples': 14642688, 'steps': 28598, 'loss/train': 2.1396238803863525} +03/04/2022 23:03:52 - INFO - codeparrot_training - Step 28599: {'lr': 0.0004611984847490285, 'samples': 14643200, 'steps': 28599, 'loss/train': 1.9402045011520386} +03/04/2022 23:03:54 - INFO - codeparrot_training - Skipping example with length 314 (seq_length=1024) +03/04/2022 23:03:57 - INFO - codeparrot_training - Step 28600: {'lr': 0.00046119564510623, 'samples': 14643712, 'steps': 28600, 'loss/train': 1.1657992601394653} +03/04/2022 23:04:00 - INFO - codeparrot_training - Step 28601: {'lr': 0.00046119280536827, 'samples': 14644224, 'steps': 28601, 'loss/train': 1.6281999349594116} +03/04/2022 23:04:02 - INFO - codeparrot_training - Skipping example with length 194 (seq_length=1024) +03/04/2022 23:04:05 - INFO - codeparrot_training - Step 28602: {'lr': 0.0004611899655351497, 'samples': 14644736, 'steps': 28602, 'loss/train': 0.1884302794933319} +03/04/2022 23:04:09 - INFO - codeparrot_training - Step 28603: {'lr': 0.0004611871256068705, 'samples': 14645248, 'steps': 28603, 'loss/train': 1.8855360746383667} +03/04/2022 23:04:11 - INFO - codeparrot_training - Skipping example with length 205 (seq_length=1024) +03/04/2022 23:04:14 - INFO - codeparrot_training - Step 28604: {'lr': 0.0004611842855834336, 'samples': 14645760, 'steps': 28604, 'loss/train': 2.1736245155334473} +03/04/2022 23:04:17 - INFO - codeparrot_training - Step 28605: {'lr': 0.00046118144546484043, 'samples': 14646272, 'steps': 28605, 'loss/train': 2.097539186477661} +03/04/2022 23:04:20 - INFO - codeparrot_training - Step 28606: {'lr': 0.0004611786052510921, 'samples': 14646784, 'steps': 28606, 'loss/train': 1.454010248184204} +03/04/2022 23:04:20 - INFO - codeparrot_training - Skipping example with length 682 (seq_length=1024) +03/04/2022 23:04:26 - INFO - codeparrot_training - Step 28607: {'lr': 0.0004611757649421899, 'samples': 14647296, 'steps': 28607, 'loss/train': 2.1387901306152344} +03/04/2022 23:04:28 - INFO - codeparrot_training - Skipping example with length 71 (seq_length=1024) +03/04/2022 23:04:31 - INFO - codeparrot_training - Step 28608: {'lr': 0.0004611729245381352, 'samples': 14647808, 'steps': 28608, 'loss/train': 1.71255362033844} +03/04/2022 23:04:34 - INFO - codeparrot_training - Step 28609: {'lr': 0.00046117008403892925, 'samples': 14648320, 'steps': 28609, 'loss/train': 1.5402244329452515} +03/04/2022 23:04:36 - INFO - codeparrot_training - Skipping example with length 658 (seq_length=1024) +03/04/2022 23:04:39 - INFO - codeparrot_training - Step 28610: {'lr': 0.0004611672434445733, 'samples': 14648832, 'steps': 28610, 'loss/train': 2.221043348312378} +03/04/2022 23:04:42 - INFO - codeparrot_training - Step 28611: {'lr': 0.0004611644027550687, 'samples': 14649344, 'steps': 28611, 'loss/train': 1.771355152130127} +03/04/2022 23:04:45 - INFO - codeparrot_training - Skipping example with length 107 (seq_length=1024) +03/04/2022 23:04:48 - INFO - codeparrot_training - Step 28612: {'lr': 0.00046116156197041657, 'samples': 14649856, 'steps': 28612, 'loss/train': 2.7303073406219482} +03/04/2022 23:04:51 - INFO - codeparrot_training - Step 28613: {'lr': 0.0004611587210906184, 'samples': 14650368, 'steps': 28613, 'loss/train': 1.7049132585525513} +03/04/2022 23:04:53 - INFO - codeparrot_training - Skipping example with length 630 (seq_length=1024) +03/04/2022 23:04:56 - INFO - codeparrot_training - Step 28614: {'lr': 0.0004611558801156753, 'samples': 14650880, 'steps': 28614, 'loss/train': 1.4431854486465454} +03/04/2022 23:04:59 - INFO - codeparrot_training - Step 28615: {'lr': 0.0004611530390455887, 'samples': 14651392, 'steps': 28615, 'loss/train': 1.3910313844680786} +03/04/2022 23:05:01 - INFO - codeparrot_training - Skipping example with length 16 (seq_length=1024) +03/04/2022 23:05:04 - INFO - codeparrot_training - Step 28616: {'lr': 0.00046115019788035974, 'samples': 14651904, 'steps': 28616, 'loss/train': 1.5558934211730957} +03/04/2022 23:05:08 - INFO - codeparrot_training - Step 28617: {'lr': 0.00046114735661998975, 'samples': 14652416, 'steps': 28617, 'loss/train': 1.3270963430404663} +03/04/2022 23:05:10 - INFO - codeparrot_training - Skipping example with length 190 (seq_length=1024) +03/04/2022 23:05:13 - INFO - codeparrot_training - Step 28618: {'lr': 0.0004611445152644801, 'samples': 14652928, 'steps': 28618, 'loss/train': 2.0974578857421875} +03/04/2022 23:05:16 - INFO - codeparrot_training - Step 28619: {'lr': 0.00046114167381383186, 'samples': 14653440, 'steps': 28619, 'loss/train': 1.9519639015197754} +03/04/2022 23:05:18 - INFO - codeparrot_training - Skipping example with length 722 (seq_length=1024) +03/04/2022 23:05:21 - INFO - codeparrot_training - Step 28620: {'lr': 0.0004611388322680465, 'samples': 14653952, 'steps': 28620, 'loss/train': 1.351516842842102} +03/04/2022 23:05:24 - INFO - codeparrot_training - Step 28621: {'lr': 0.0004611359906271253, 'samples': 14654464, 'steps': 28621, 'loss/train': 2.1074767112731934} +03/04/2022 23:05:26 - INFO - codeparrot_training - Skipping example with length 544 (seq_length=1024) +03/04/2022 23:05:30 - INFO - codeparrot_training - Step 28622: {'lr': 0.0004611331488910694, 'samples': 14654976, 'steps': 28622, 'loss/train': 1.977159023284912} +03/04/2022 23:05:33 - INFO - codeparrot_training - Step 28623: {'lr': 0.00046113030705988026, 'samples': 14655488, 'steps': 28623, 'loss/train': 0.16818027198314667} +03/04/2022 23:05:34 - INFO - codeparrot_training - Skipping example with length 556 (seq_length=1024) +03/04/2022 23:05:38 - INFO - codeparrot_training - Step 28624: {'lr': 0.000461127465133559, 'samples': 14656000, 'steps': 28624, 'loss/train': 1.1417585611343384} +03/04/2022 23:05:41 - INFO - codeparrot_training - Step 28625: {'lr': 0.0004611246231121069, 'samples': 14656512, 'steps': 28625, 'loss/train': 2.4288015365600586} +03/04/2022 23:05:42 - INFO - codeparrot_training - Skipping example with length 137 (seq_length=1024) +03/04/2022 23:05:47 - INFO - codeparrot_training - Step 28626: {'lr': 0.00046112178099552535, 'samples': 14657024, 'steps': 28626, 'loss/train': 1.7648500204086304} +03/04/2022 23:05:50 - INFO - codeparrot_training - Step 28627: {'lr': 0.0004611189387838156, 'samples': 14657536, 'steps': 28627, 'loss/train': 1.6399967670440674} +03/04/2022 23:05:52 - INFO - codeparrot_training - Skipping example with length 25 (seq_length=1024) +03/04/2022 23:05:55 - INFO - codeparrot_training - Step 28628: {'lr': 0.00046111609647697893, 'samples': 14658048, 'steps': 28628, 'loss/train': 1.6772977113723755} +03/04/2022 23:05:59 - INFO - codeparrot_training - Step 28629: {'lr': 0.0004611132540750166, 'samples': 14658560, 'steps': 28629, 'loss/train': 1.4163635969161987} +03/04/2022 23:06:01 - INFO - codeparrot_training - Skipping example with length 961 (seq_length=1024) +03/04/2022 23:06:04 - INFO - codeparrot_training - Step 28630: {'lr': 0.00046111041157792987, 'samples': 14659072, 'steps': 28630, 'loss/train': 1.9044311046600342} +03/04/2022 23:06:07 - INFO - codeparrot_training - Step 28631: {'lr': 0.00046110756898572, 'samples': 14659584, 'steps': 28631, 'loss/train': 1.6293809413909912} +03/04/2022 23:06:09 - INFO - codeparrot_training - Skipping example with length 826 (seq_length=1024) +03/04/2022 23:06:12 - INFO - codeparrot_training - Step 28632: {'lr': 0.0004611047262983884, 'samples': 14660096, 'steps': 28632, 'loss/train': 1.6695833206176758} +03/04/2022 23:06:15 - INFO - codeparrot_training - Step 28633: {'lr': 0.00046110188351593625, 'samples': 14660608, 'steps': 28633, 'loss/train': 1.1326595544815063} +03/04/2022 23:06:18 - INFO - codeparrot_training - Skipping example with length 1013 (seq_length=1024) +03/04/2022 23:06:21 - INFO - codeparrot_training - Step 28634: {'lr': 0.0004610990406383648, 'samples': 14661120, 'steps': 28634, 'loss/train': 1.068790078163147} +03/04/2022 23:06:24 - INFO - codeparrot_training - Step 28635: {'lr': 0.00046109619766567547, 'samples': 14661632, 'steps': 28635, 'loss/train': 1.6011168956756592} +03/04/2022 23:06:27 - INFO - codeparrot_training - Step 28636: {'lr': 0.0004610933545978694, 'samples': 14662144, 'steps': 28636, 'loss/train': 0.3230849504470825} +03/04/2022 23:06:27 - INFO - codeparrot_training - Skipping example with length 766 (seq_length=1024) +03/04/2022 23:06:32 - INFO - codeparrot_training - Step 28637: {'lr': 0.0004610905114349478, 'samples': 14662656, 'steps': 28637, 'loss/train': 1.9791392087936401} +03/04/2022 23:06:35 - INFO - codeparrot_training - Skipping example with length 633 (seq_length=1024) +03/04/2022 23:06:38 - INFO - codeparrot_training - Step 28638: {'lr': 0.0004610876681769123, 'samples': 14663168, 'steps': 28638, 'loss/train': 1.7662285566329956} +03/04/2022 23:06:41 - INFO - codeparrot_training - Step 28639: {'lr': 0.0004610848248237638, 'samples': 14663680, 'steps': 28639, 'loss/train': 1.9425069093704224} +03/04/2022 23:06:43 - INFO - codeparrot_training - Skipping example with length 530 (seq_length=1024) +03/04/2022 23:06:46 - INFO - codeparrot_training - Step 28640: {'lr': 0.00046108198137550377, 'samples': 14664192, 'steps': 28640, 'loss/train': 1.447983980178833} +03/04/2022 23:06:49 - INFO - codeparrot_training - Step 28641: {'lr': 0.0004610791378321335, 'samples': 14664704, 'steps': 28641, 'loss/train': 1.7699235677719116} +03/04/2022 23:06:52 - INFO - codeparrot_training - Skipping example with length 860 (seq_length=1024) +03/04/2022 23:06:55 - INFO - codeparrot_training - Step 28642: {'lr': 0.0004610762941936542, 'samples': 14665216, 'steps': 28642, 'loss/train': 2.1628410816192627} +03/04/2022 23:06:58 - INFO - codeparrot_training - Step 28643: {'lr': 0.0004610734504600671, 'samples': 14665728, 'steps': 28643, 'loss/train': 2.102304458618164} +03/04/2022 23:07:00 - INFO - codeparrot_training - Skipping example with length 20 (seq_length=1024) +03/04/2022 23:07:03 - INFO - codeparrot_training - Step 28644: {'lr': 0.00046107060663137366, 'samples': 14666240, 'steps': 28644, 'loss/train': 1.6837979555130005} +03/04/2022 23:07:06 - INFO - codeparrot_training - Step 28645: {'lr': 0.00046106776270757506, 'samples': 14666752, 'steps': 28645, 'loss/train': 1.6544755697250366} +03/04/2022 23:07:08 - INFO - codeparrot_training - Skipping example with length 491 (seq_length=1024) +03/04/2022 23:07:11 - INFO - codeparrot_training - Step 28646: {'lr': 0.0004610649186886725, 'samples': 14667264, 'steps': 28646, 'loss/train': 1.214952826499939} +03/04/2022 23:07:14 - INFO - codeparrot_training - Step 28647: {'lr': 0.00046106207457466744, 'samples': 14667776, 'steps': 28647, 'loss/train': 0.6320163607597351} +03/04/2022 23:07:16 - INFO - codeparrot_training - Skipping example with length 568 (seq_length=1024) +03/04/2022 23:07:20 - INFO - codeparrot_training - Step 28648: {'lr': 0.0004610592303655611, 'samples': 14668288, 'steps': 28648, 'loss/train': 1.8409264087677002} +03/04/2022 23:07:23 - INFO - codeparrot_training - Step 28649: {'lr': 0.0004610563860613546, 'samples': 14668800, 'steps': 28649, 'loss/train': 1.0237106084823608} +03/04/2022 23:07:25 - INFO - codeparrot_training - Skipping example with length 134 (seq_length=1024) +03/04/2022 23:07:28 - INFO - codeparrot_training - Step 28650: {'lr': 0.00046105354166204937, 'samples': 14669312, 'steps': 28650, 'loss/train': 1.8430724143981934} +03/04/2022 23:07:31 - INFO - codeparrot_training - Step 28651: {'lr': 0.00046105069716764676, 'samples': 14669824, 'steps': 28651, 'loss/train': 2.554124116897583} +03/04/2022 23:07:33 - INFO - codeparrot_training - Skipping example with length 113 (seq_length=1024) +03/04/2022 23:07:36 - INFO - codeparrot_training - Step 28652: {'lr': 0.00046104785257814786, 'samples': 14670336, 'steps': 28652, 'loss/train': 1.9458526372909546} +03/04/2022 23:07:40 - INFO - codeparrot_training - Step 28653: {'lr': 0.0004610450078935541, 'samples': 14670848, 'steps': 28653, 'loss/train': 1.4395183324813843} +03/04/2022 23:07:41 - INFO - codeparrot_training - Skipping example with length 333 (seq_length=1024) +03/04/2022 23:07:45 - INFO - codeparrot_training - Step 28654: {'lr': 0.00046104216311386676, 'samples': 14671360, 'steps': 28654, 'loss/train': 1.8047356605529785} +03/04/2022 23:07:48 - INFO - codeparrot_training - Step 28655: {'lr': 0.000461039318239087, 'samples': 14671872, 'steps': 28655, 'loss/train': 0.11996857076883316} +03/04/2022 23:07:50 - INFO - codeparrot_training - Skipping example with length 768 (seq_length=1024) +03/04/2022 23:07:53 - INFO - codeparrot_training - Step 28656: {'lr': 0.00046103647326921625, 'samples': 14672384, 'steps': 28656, 'loss/train': 2.1457059383392334} +03/04/2022 23:07:56 - INFO - codeparrot_training - Step 28657: {'lr': 0.00046103362820425567, 'samples': 14672896, 'steps': 28657, 'loss/train': 1.3384227752685547} +03/04/2022 23:07:58 - INFO - codeparrot_training - Skipping example with length 324 (seq_length=1024) +03/04/2022 23:08:02 - INFO - codeparrot_training - Step 28658: {'lr': 0.00046103078304420665, 'samples': 14673408, 'steps': 28658, 'loss/train': 1.3913062810897827} +03/04/2022 23:08:05 - INFO - codeparrot_training - Step 28659: {'lr': 0.0004610279377890704, 'samples': 14673920, 'steps': 28659, 'loss/train': 1.3581987619400024} +03/04/2022 23:08:06 - INFO - codeparrot_training - Skipping example with length 194 (seq_length=1024) +03/04/2022 23:08:10 - INFO - codeparrot_training - Step 28660: {'lr': 0.00046102509243884813, 'samples': 14674432, 'steps': 28660, 'loss/train': 2.2495434284210205} +03/04/2022 23:08:13 - INFO - codeparrot_training - Step 28661: {'lr': 0.0004610222469935413, 'samples': 14674944, 'steps': 28661, 'loss/train': 1.2739447355270386} +03/04/2022 23:08:14 - INFO - codeparrot_training - Skipping example with length 433 (seq_length=1024) +03/04/2022 23:08:19 - INFO - codeparrot_training - Step 28662: {'lr': 0.000461019401453151, 'samples': 14675456, 'steps': 28662, 'loss/train': 1.962471842765808} +03/04/2022 23:08:22 - INFO - codeparrot_training - Step 28663: {'lr': 0.00046101655581767874, 'samples': 14675968, 'steps': 28663, 'loss/train': 0.7328404188156128} +03/04/2022 23:08:23 - INFO - codeparrot_training - Skipping example with length 655 (seq_length=1024) +03/04/2022 23:08:27 - INFO - codeparrot_training - Step 28664: {'lr': 0.0004610137100871257, 'samples': 14676480, 'steps': 28664, 'loss/train': 1.6926188468933105} +03/04/2022 23:08:30 - INFO - codeparrot_training - Step 28665: {'lr': 0.00046101086426149297, 'samples': 14676992, 'steps': 28665, 'loss/train': 1.5700749158859253} +03/04/2022 23:08:31 - INFO - codeparrot_training - Skipping example with length 700 (seq_length=1024) +03/04/2022 23:08:35 - INFO - codeparrot_training - Step 28666: {'lr': 0.0004610080183407821, 'samples': 14677504, 'steps': 28666, 'loss/train': 1.2041345834732056} +03/04/2022 23:08:39 - INFO - codeparrot_training - Step 28667: {'lr': 0.0004610051723249943, 'samples': 14678016, 'steps': 28667, 'loss/train': 2.1835572719573975} +03/04/2022 23:08:39 - INFO - codeparrot_training - Skipping example with length 746 (seq_length=1024) +03/04/2022 23:08:44 - INFO - codeparrot_training - Step 28668: {'lr': 0.0004610023262141308, 'samples': 14678528, 'steps': 28668, 'loss/train': 2.7377028465270996} +03/04/2022 23:08:47 - INFO - codeparrot_training - Step 28669: {'lr': 0.00046099948000819294, 'samples': 14679040, 'steps': 28669, 'loss/train': 1.737138032913208} +03/04/2022 23:08:48 - INFO - codeparrot_training - Skipping example with length 35 (seq_length=1024) +03/04/2022 23:08:52 - INFO - codeparrot_training - Step 28670: {'lr': 0.0004609966337071819, 'samples': 14679552, 'steps': 28670, 'loss/train': 2.2142462730407715} +03/04/2022 23:08:55 - INFO - codeparrot_training - Step 28671: {'lr': 0.00046099378731109906, 'samples': 14680064, 'steps': 28671, 'loss/train': 0.4541628360748291} +03/04/2022 23:08:56 - INFO - codeparrot_training - Skipping example with length 909 (seq_length=1024) +03/04/2022 23:09:01 - INFO - codeparrot_training - Step 28672: {'lr': 0.00046099094081994565, 'samples': 14680576, 'steps': 28672, 'loss/train': 1.4846153259277344} +03/04/2022 23:09:04 - INFO - codeparrot_training - Step 28673: {'lr': 0.000460988094233723, 'samples': 14681088, 'steps': 28673, 'loss/train': 1.8271634578704834} +03/04/2022 23:09:05 - INFO - codeparrot_training - Skipping example with length 1009 (seq_length=1024) +03/04/2022 23:09:09 - INFO - codeparrot_training - Step 28674: {'lr': 0.00046098524755243246, 'samples': 14681600, 'steps': 28674, 'loss/train': 2.1662678718566895} +03/04/2022 23:09:12 - INFO - codeparrot_training - Step 28675: {'lr': 0.0004609824007760751, 'samples': 14682112, 'steps': 28675, 'loss/train': 1.6646757125854492} +03/04/2022 23:09:13 - INFO - codeparrot_training - Skipping example with length 958 (seq_length=1024) +03/04/2022 23:09:17 - INFO - codeparrot_training - Step 28676: {'lr': 0.0004609795539046524, 'samples': 14682624, 'steps': 28676, 'loss/train': 0.9444484710693359} +03/04/2022 23:09:21 - INFO - codeparrot_training - Step 28677: {'lr': 0.0004609767069381655, 'samples': 14683136, 'steps': 28677, 'loss/train': 2.473944664001465} +03/04/2022 23:09:21 - INFO - codeparrot_training - Skipping example with length 544 (seq_length=1024) +03/04/2022 23:09:26 - INFO - codeparrot_training - Step 28678: {'lr': 0.00046097385987661576, 'samples': 14683648, 'steps': 28678, 'loss/train': 1.2074100971221924} +03/04/2022 23:09:29 - INFO - codeparrot_training - Step 28679: {'lr': 0.00046097101272000454, 'samples': 14684160, 'steps': 28679, 'loss/train': 0.8830896019935608} +03/04/2022 23:09:30 - INFO - codeparrot_training - Skipping example with length 696 (seq_length=1024) +03/04/2022 23:09:34 - INFO - codeparrot_training - Step 28680: {'lr': 0.0004609681654683329, 'samples': 14684672, 'steps': 28680, 'loss/train': 2.2301034927368164} +03/04/2022 23:09:38 - INFO - codeparrot_training - Step 28681: {'lr': 0.0004609653181216024, 'samples': 14685184, 'steps': 28681, 'loss/train': 0.8800235986709595} +03/04/2022 23:09:38 - INFO - codeparrot_training - Skipping example with length 352 (seq_length=1024) +03/04/2022 23:09:43 - INFO - codeparrot_training - Step 28682: {'lr': 0.0004609624706798141, 'samples': 14685696, 'steps': 28682, 'loss/train': 1.996482014656067} +03/04/2022 23:09:46 - INFO - codeparrot_training - Step 28683: {'lr': 0.00046095962314296934, 'samples': 14686208, 'steps': 28683, 'loss/train': 1.3534278869628906} +03/04/2022 23:09:46 - INFO - codeparrot_training - Skipping example with length 132 (seq_length=1024) +03/04/2022 23:09:51 - INFO - codeparrot_training - Step 28684: {'lr': 0.00046095677551106953, 'samples': 14686720, 'steps': 28684, 'loss/train': 1.782173752784729} +03/04/2022 23:09:54 - INFO - codeparrot_training - Step 28685: {'lr': 0.00046095392778411576, 'samples': 14687232, 'steps': 28685, 'loss/train': 1.7090116739273071} +03/04/2022 23:09:55 - INFO - codeparrot_training - Skipping example with length 790 (seq_length=1024) +03/04/2022 23:10:00 - INFO - codeparrot_training - Step 28686: {'lr': 0.0004609510799621095, 'samples': 14687744, 'steps': 28686, 'loss/train': 1.865818738937378} +03/04/2022 23:10:03 - INFO - codeparrot_training - Step 28687: {'lr': 0.0004609482320450519, 'samples': 14688256, 'steps': 28687, 'loss/train': 1.9258365631103516} +03/04/2022 23:10:03 - INFO - codeparrot_training - Skipping example with length 279 (seq_length=1024) +03/04/2022 23:10:08 - INFO - codeparrot_training - Step 28688: {'lr': 0.00046094538403294416, 'samples': 14688768, 'steps': 28688, 'loss/train': 2.0483596324920654} +03/04/2022 23:10:11 - INFO - codeparrot_training - Step 28689: {'lr': 0.00046094253592578784, 'samples': 14689280, 'steps': 28689, 'loss/train': 1.2860519886016846} +03/04/2022 23:10:11 - INFO - codeparrot_training - Skipping example with length 108 (seq_length=1024) +03/04/2022 23:10:16 - INFO - codeparrot_training - Step 28690: {'lr': 0.000460939687723584, 'samples': 14689792, 'steps': 28690, 'loss/train': 0.1934487223625183} +03/04/2022 23:10:19 - INFO - codeparrot_training - Skipping example with length 78 (seq_length=1024) +03/04/2022 23:10:22 - INFO - codeparrot_training - Step 28691: {'lr': 0.000460936839426334, 'samples': 14690304, 'steps': 28691, 'loss/train': 1.7329027652740479} +03/04/2022 23:10:25 - INFO - codeparrot_training - Step 28692: {'lr': 0.00046093399103403913, 'samples': 14690816, 'steps': 28692, 'loss/train': 1.8477340936660767} +03/04/2022 23:10:27 - INFO - codeparrot_training - Skipping example with length 611 (seq_length=1024) +03/04/2022 23:10:30 - INFO - codeparrot_training - Step 28693: {'lr': 0.00046093114254670066, 'samples': 14691328, 'steps': 28693, 'loss/train': 1.7601550817489624} +03/04/2022 23:10:33 - INFO - codeparrot_training - Step 28694: {'lr': 0.0004609282939643199, 'samples': 14691840, 'steps': 28694, 'loss/train': 2.7514965534210205} +03/04/2022 23:10:36 - INFO - codeparrot_training - Skipping example with length 121 (seq_length=1024) +03/04/2022 23:10:39 - INFO - codeparrot_training - Step 28695: {'lr': 0.00046092544528689806, 'samples': 14692352, 'steps': 28695, 'loss/train': 1.702709436416626} +03/04/2022 23:10:42 - INFO - codeparrot_training - Step 28696: {'lr': 0.0004609225965144365, 'samples': 14692864, 'steps': 28696, 'loss/train': 2.0398812294006348} +03/04/2022 23:10:44 - INFO - codeparrot_training - Skipping example with length 67 (seq_length=1024) +03/04/2022 23:10:47 - INFO - codeparrot_training - Step 28697: {'lr': 0.00046091974764693645, 'samples': 14693376, 'steps': 28697, 'loss/train': 1.0523011684417725} +03/04/2022 23:10:50 - INFO - codeparrot_training - Step 28698: {'lr': 0.0004609168986843992, 'samples': 14693888, 'steps': 28698, 'loss/train': 2.114043712615967} +03/04/2022 23:10:53 - INFO - codeparrot_training - Skipping example with length 867 (seq_length=1024) +03/04/2022 23:10:55 - INFO - codeparrot_training - Step 28699: {'lr': 0.000460914049626826, 'samples': 14694400, 'steps': 28699, 'loss/train': 0.6433330774307251} +03/04/2022 23:10:59 - INFO - codeparrot_training - Step 28700: {'lr': 0.0004609112004742183, 'samples': 14694912, 'steps': 28700, 'loss/train': 1.68015456199646} +03/04/2022 23:11:01 - INFO - codeparrot_training - Skipping example with length 651 (seq_length=1024) +03/04/2022 23:11:04 - INFO - codeparrot_training - Step 28701: {'lr': 0.0004609083512265773, 'samples': 14695424, 'steps': 28701, 'loss/train': 2.042311191558838} +03/04/2022 23:11:07 - INFO - codeparrot_training - Step 28702: {'lr': 0.0004609055018839041, 'samples': 14695936, 'steps': 28702, 'loss/train': 2.185877561569214} +03/04/2022 23:11:09 - INFO - codeparrot_training - Skipping example with length 37 (seq_length=1024) +03/04/2022 23:11:12 - INFO - codeparrot_training - Step 28703: {'lr': 0.0004609026524462002, 'samples': 14696448, 'steps': 28703, 'loss/train': 1.995223879814148} +03/04/2022 23:11:16 - INFO - codeparrot_training - Step 28704: {'lr': 0.00046089980291346685, 'samples': 14696960, 'steps': 28704, 'loss/train': 1.329836368560791} +03/04/2022 23:11:21 - INFO - codeparrot_training - Step 28705: {'lr': 0.00046089695328570523, 'samples': 14697472, 'steps': 28705, 'loss/train': 1.8551639318466187} +03/04/2022 23:11:24 - INFO - codeparrot_training - Step 28706: {'lr': 0.0004608941035629168, 'samples': 14697984, 'steps': 28706, 'loss/train': 1.5789488554000854} +03/04/2022 23:11:26 - INFO - codeparrot_training - Skipping example with length 402 (seq_length=1024) +03/04/2022 23:11:29 - INFO - codeparrot_training - Step 28707: {'lr': 0.0004608912537451027, 'samples': 14698496, 'steps': 28707, 'loss/train': 1.8238446712493896} +03/04/2022 23:11:32 - INFO - codeparrot_training - Step 28708: {'lr': 0.0004608884038322642, 'samples': 14699008, 'steps': 28708, 'loss/train': 1.571372389793396} +03/04/2022 23:11:35 - INFO - codeparrot_training - Skipping example with length 66 (seq_length=1024) +03/04/2022 23:11:38 - INFO - codeparrot_training - Step 28709: {'lr': 0.00046088555382440275, 'samples': 14699520, 'steps': 28709, 'loss/train': 1.3904154300689697} +03/04/2022 23:11:41 - INFO - codeparrot_training - Step 28710: {'lr': 0.0004608827037215194, 'samples': 14700032, 'steps': 28710, 'loss/train': 1.2568423748016357} +03/04/2022 23:11:42 - INFO - codeparrot_training - Skipping example with length 25 (seq_length=1024) +03/04/2022 23:11:46 - INFO - codeparrot_training - Step 28711: {'lr': 0.0004608798535236156, 'samples': 14700544, 'steps': 28711, 'loss/train': 1.2774072885513306} +03/04/2022 23:11:49 - INFO - codeparrot_training - Step 28712: {'lr': 0.0004608770032306926, 'samples': 14701056, 'steps': 28712, 'loss/train': 2.4083003997802734} +03/04/2022 23:11:51 - INFO - codeparrot_training - Skipping example with length 807 (seq_length=1024) +03/04/2022 23:11:54 - INFO - codeparrot_training - Step 28713: {'lr': 0.0004608741528427517, 'samples': 14701568, 'steps': 28713, 'loss/train': 0.7753598093986511} +03/04/2022 23:11:58 - INFO - codeparrot_training - Step 28714: {'lr': 0.0004608713023597941, 'samples': 14702080, 'steps': 28714, 'loss/train': 1.733746886253357} +03/04/2022 23:11:59 - INFO - codeparrot_training - Skipping example with length 48 (seq_length=1024) +03/04/2022 23:12:03 - INFO - codeparrot_training - Step 28715: {'lr': 0.00046086845178182123, 'samples': 14702592, 'steps': 28715, 'loss/train': 1.154794454574585} +03/04/2022 23:12:06 - INFO - codeparrot_training - Step 28716: {'lr': 0.00046086560110883423, 'samples': 14703104, 'steps': 28716, 'loss/train': 2.2313344478607178} +03/04/2022 23:12:07 - INFO - codeparrot_training - Skipping example with length 44 (seq_length=1024) +03/04/2022 23:12:11 - INFO - codeparrot_training - Step 28717: {'lr': 0.00046086275034083453, 'samples': 14703616, 'steps': 28717, 'loss/train': 1.5891374349594116} +03/04/2022 23:12:14 - INFO - codeparrot_training - Step 28718: {'lr': 0.00046085989947782327, 'samples': 14704128, 'steps': 28718, 'loss/train': 0.9811397194862366} +03/04/2022 23:12:16 - INFO - codeparrot_training - Skipping example with length 415 (seq_length=1024) +03/04/2022 23:12:20 - INFO - codeparrot_training - Step 28719: {'lr': 0.00046085704851980174, 'samples': 14704640, 'steps': 28719, 'loss/train': 2.756617784500122} +03/04/2022 23:12:23 - INFO - codeparrot_training - Step 28720: {'lr': 0.00046085419746677136, 'samples': 14705152, 'steps': 28720, 'loss/train': 1.3453677892684937} +03/04/2022 23:12:24 - INFO - codeparrot_training - Skipping example with length 212 (seq_length=1024) +03/04/2022 23:12:28 - INFO - codeparrot_training - Step 28721: {'lr': 0.00046085134631873326, 'samples': 14705664, 'steps': 28721, 'loss/train': 1.9104872941970825} +03/04/2022 23:12:31 - INFO - codeparrot_training - Step 28722: {'lr': 0.0004608484950756888, 'samples': 14706176, 'steps': 28722, 'loss/train': 1.799933671951294} +03/04/2022 23:12:33 - INFO - codeparrot_training - Skipping example with length 130 (seq_length=1024) +03/04/2022 23:12:37 - INFO - codeparrot_training - Step 28723: {'lr': 0.0004608456437376393, 'samples': 14706688, 'steps': 28723, 'loss/train': 2.1247329711914062} +03/04/2022 23:12:40 - INFO - codeparrot_training - Step 28724: {'lr': 0.000460842792304586, 'samples': 14707200, 'steps': 28724, 'loss/train': 1.854982614517212} +03/04/2022 23:12:41 - INFO - codeparrot_training - Skipping example with length 149 (seq_length=1024) +03/04/2022 23:12:45 - INFO - codeparrot_training - Step 28725: {'lr': 0.00046083994077653024, 'samples': 14707712, 'steps': 28725, 'loss/train': 1.4383118152618408} +03/04/2022 23:12:48 - INFO - codeparrot_training - Step 28726: {'lr': 0.0004608370891534732, 'samples': 14708224, 'steps': 28726, 'loss/train': 1.284668207168579} +03/04/2022 23:12:49 - INFO - codeparrot_training - Skipping example with length 577 (seq_length=1024) +03/04/2022 23:12:53 - INFO - codeparrot_training - Step 28727: {'lr': 0.0004608342374354162, 'samples': 14708736, 'steps': 28727, 'loss/train': 1.4197306632995605} +03/04/2022 23:12:56 - INFO - codeparrot_training - Step 28728: {'lr': 0.0004608313856223606, 'samples': 14709248, 'steps': 28728, 'loss/train': 2.655355453491211} +03/04/2022 23:12:57 - INFO - codeparrot_training - Skipping example with length 532 (seq_length=1024) +03/04/2022 23:13:02 - INFO - codeparrot_training - Step 28729: {'lr': 0.00046082853371430754, 'samples': 14709760, 'steps': 28729, 'loss/train': 1.5164272785186768} +03/04/2022 23:13:05 - INFO - codeparrot_training - Step 28730: {'lr': 0.0004608256817112585, 'samples': 14710272, 'steps': 28730, 'loss/train': 1.5647177696228027} +03/04/2022 23:13:06 - INFO - codeparrot_training - Skipping example with length 947 (seq_length=1024) +03/04/2022 23:13:10 - INFO - codeparrot_training - Step 28731: {'lr': 0.00046082282961321466, 'samples': 14710784, 'steps': 28731, 'loss/train': 2.1676509380340576} +03/04/2022 23:13:13 - INFO - codeparrot_training - Step 28732: {'lr': 0.00046081997742017725, 'samples': 14711296, 'steps': 28732, 'loss/train': 1.963984727859497} +03/04/2022 23:13:14 - INFO - codeparrot_training - Skipping example with length 407 (seq_length=1024) +03/04/2022 23:13:18 - INFO - codeparrot_training - Step 28733: {'lr': 0.00046081712513214757, 'samples': 14711808, 'steps': 28733, 'loss/train': 0.7657157182693481} +03/04/2022 23:13:22 - INFO - codeparrot_training - Step 28734: {'lr': 0.0004608142727491271, 'samples': 14712320, 'steps': 28734, 'loss/train': 2.29250431060791} +03/04/2022 23:13:22 - INFO - codeparrot_training - Skipping example with length 643 (seq_length=1024) +03/04/2022 23:13:27 - INFO - codeparrot_training - Step 28735: {'lr': 0.00046081142027111683, 'samples': 14712832, 'steps': 28735, 'loss/train': 1.8452332019805908} +03/04/2022 23:13:30 - INFO - codeparrot_training - Step 28736: {'lr': 0.0004608085676981182, 'samples': 14713344, 'steps': 28736, 'loss/train': 1.7282874584197998} +03/04/2022 23:13:32 - INFO - codeparrot_training - Skipping example with length 124 (seq_length=1024) +03/04/2022 23:13:36 - INFO - codeparrot_training - Step 28737: {'lr': 0.0004608057150301326, 'samples': 14713856, 'steps': 28737, 'loss/train': 2.009702205657959} +03/04/2022 23:13:39 - INFO - codeparrot_training - Step 28738: {'lr': 0.00046080286226716106, 'samples': 14714368, 'steps': 28738, 'loss/train': 0.5559707283973694} +03/04/2022 23:13:40 - INFO - codeparrot_training - Skipping example with length 808 (seq_length=1024) +03/04/2022 23:13:44 - INFO - codeparrot_training - Step 28739: {'lr': 0.00046080000940920506, 'samples': 14714880, 'steps': 28739, 'loss/train': 1.6263608932495117} +03/04/2022 23:13:47 - INFO - codeparrot_training - Step 28740: {'lr': 0.00046079715645626584, 'samples': 14715392, 'steps': 28740, 'loss/train': 2.5383145809173584} +03/04/2022 23:13:49 - INFO - codeparrot_training - Skipping example with length 768 (seq_length=1024) +03/04/2022 23:13:53 - INFO - codeparrot_training - Step 28741: {'lr': 0.00046079430340834467, 'samples': 14715904, 'steps': 28741, 'loss/train': 1.1004078388214111} +03/04/2022 23:13:56 - INFO - codeparrot_training - Step 28742: {'lr': 0.00046079145026544277, 'samples': 14716416, 'steps': 28742, 'loss/train': 1.4663041830062866} +03/04/2022 23:13:57 - INFO - codeparrot_training - Skipping example with length 313 (seq_length=1024) +03/04/2022 23:14:01 - INFO - codeparrot_training - Step 28743: {'lr': 0.0004607885970275616, 'samples': 14716928, 'steps': 28743, 'loss/train': 1.6839425563812256} +03/04/2022 23:14:04 - INFO - codeparrot_training - Step 28744: {'lr': 0.0004607857436947023, 'samples': 14717440, 'steps': 28744, 'loss/train': 1.1125373840332031} +03/04/2022 23:14:05 - INFO - codeparrot_training - Skipping example with length 505 (seq_length=1024) +03/04/2022 23:14:09 - INFO - codeparrot_training - Step 28745: {'lr': 0.00046078289026686616, 'samples': 14717952, 'steps': 28745, 'loss/train': 1.4777553081512451} +03/04/2022 23:14:12 - INFO - codeparrot_training - Step 28746: {'lr': 0.00046078003674405457, 'samples': 14718464, 'steps': 28746, 'loss/train': 1.8516452312469482} +03/04/2022 23:14:13 - INFO - codeparrot_training - Skipping example with length 595 (seq_length=1024) +03/04/2022 23:14:18 - INFO - codeparrot_training - Step 28747: {'lr': 0.0004607771831262687, 'samples': 14718976, 'steps': 28747, 'loss/train': 2.0822913646698} +03/04/2022 23:14:21 - INFO - codeparrot_training - Step 28748: {'lr': 0.00046077432941350993, 'samples': 14719488, 'steps': 28748, 'loss/train': 1.5656276941299438} +03/04/2022 23:14:22 - INFO - codeparrot_training - Skipping example with length 269 (seq_length=1024) +03/04/2022 23:14:26 - INFO - codeparrot_training - Step 28749: {'lr': 0.00046077147560577943, 'samples': 14720000, 'steps': 28749, 'loss/train': 1.6378145217895508} +03/04/2022 23:14:30 - INFO - codeparrot_training - Step 28750: {'lr': 0.0004607686217030786, 'samples': 14720512, 'steps': 28750, 'loss/train': 1.9615832567214966} +03/04/2022 23:14:31 - INFO - codeparrot_training - Skipping example with length 108 (seq_length=1024) +03/04/2022 23:14:35 - INFO - codeparrot_training - Step 28751: {'lr': 0.00046076576770540865, 'samples': 14721024, 'steps': 28751, 'loss/train': 2.1016452312469482} +03/04/2022 23:14:38 - INFO - codeparrot_training - Step 28752: {'lr': 0.00046076291361277097, 'samples': 14721536, 'steps': 28752, 'loss/train': 2.2397773265838623} +03/04/2022 23:14:39 - INFO - codeparrot_training - Skipping example with length 144 (seq_length=1024) +03/04/2022 23:14:43 - INFO - codeparrot_training - Step 28753: {'lr': 0.00046076005942516666, 'samples': 14722048, 'steps': 28753, 'loss/train': 1.5671579837799072} +03/04/2022 23:14:46 - INFO - codeparrot_training - Step 28754: {'lr': 0.0004607572051425972, 'samples': 14722560, 'steps': 28754, 'loss/train': 1.8208417892456055} +03/04/2022 23:14:47 - INFO - codeparrot_training - Skipping example with length 933 (seq_length=1024) +03/04/2022 23:14:52 - INFO - codeparrot_training - Step 28755: {'lr': 0.00046075435076506376, 'samples': 14723072, 'steps': 28755, 'loss/train': 1.7257890701293945} +03/04/2022 23:14:55 - INFO - codeparrot_training - Step 28756: {'lr': 0.0004607514962925677, 'samples': 14723584, 'steps': 28756, 'loss/train': 1.4128739833831787} +03/04/2022 23:14:56 - INFO - codeparrot_training - Skipping example with length 144 (seq_length=1024) +03/04/2022 23:15:00 - INFO - codeparrot_training - Step 28757: {'lr': 0.00046074864172511025, 'samples': 14724096, 'steps': 28757, 'loss/train': 1.6530588865280151} +03/04/2022 23:15:03 - INFO - codeparrot_training - Step 28758: {'lr': 0.0004607457870626928, 'samples': 14724608, 'steps': 28758, 'loss/train': 2.0480055809020996} +03/04/2022 23:15:04 - INFO - codeparrot_training - Skipping example with length 617 (seq_length=1024) +03/04/2022 23:15:09 - INFO - codeparrot_training - Step 28759: {'lr': 0.0004607429323053164, 'samples': 14725120, 'steps': 28759, 'loss/train': 1.8267391920089722} +03/04/2022 23:15:12 - INFO - codeparrot_training - Step 28760: {'lr': 0.0004607400774529825, 'samples': 14725632, 'steps': 28760, 'loss/train': 1.6826145648956299} +03/04/2022 23:15:13 - INFO - codeparrot_training - Skipping example with length 81 (seq_length=1024) +03/04/2022 23:15:17 - INFO - codeparrot_training - Step 28761: {'lr': 0.0004607372225056925, 'samples': 14726144, 'steps': 28761, 'loss/train': 1.1681545972824097} +03/04/2022 23:15:20 - INFO - codeparrot_training - Step 28762: {'lr': 0.00046073436746344744, 'samples': 14726656, 'steps': 28762, 'loss/train': 1.515450358390808} +03/04/2022 23:15:21 - INFO - codeparrot_training - Skipping example with length 729 (seq_length=1024) +03/04/2022 23:15:25 - INFO - codeparrot_training - Step 28763: {'lr': 0.0004607315123262488, 'samples': 14727168, 'steps': 28763, 'loss/train': 1.999114990234375} +03/04/2022 23:15:29 - INFO - codeparrot_training - Step 28764: {'lr': 0.0004607286570940977, 'samples': 14727680, 'steps': 28764, 'loss/train': 2.160698890686035} +03/04/2022 23:15:29 - INFO - codeparrot_training - Skipping example with length 609 (seq_length=1024) +03/04/2022 23:15:34 - INFO - codeparrot_training - Step 28765: {'lr': 0.0004607258017669956, 'samples': 14728192, 'steps': 28765, 'loss/train': 1.7898920774459839} +03/04/2022 23:15:37 - INFO - codeparrot_training - Step 28766: {'lr': 0.0004607229463449437, 'samples': 14728704, 'steps': 28766, 'loss/train': 1.7552697658538818} +03/04/2022 23:15:38 - INFO - codeparrot_training - Skipping example with length 161 (seq_length=1024) +03/04/2022 23:15:42 - INFO - codeparrot_training - Step 28767: {'lr': 0.00046072009082794333, 'samples': 14729216, 'steps': 28767, 'loss/train': 1.9466519355773926} +03/04/2022 23:15:45 - INFO - codeparrot_training - Step 28768: {'lr': 0.00046071723521599563, 'samples': 14729728, 'steps': 28768, 'loss/train': 0.09505000710487366} +03/04/2022 23:15:46 - INFO - codeparrot_training - Skipping example with length 895 (seq_length=1024) +03/04/2022 23:15:51 - INFO - codeparrot_training - Step 28769: {'lr': 0.000460714379509102, 'samples': 14730240, 'steps': 28769, 'loss/train': 2.1645305156707764} +03/04/2022 23:15:54 - INFO - codeparrot_training - Step 28770: {'lr': 0.0004607115237072638, 'samples': 14730752, 'steps': 28770, 'loss/train': 1.2728168964385986} +03/04/2022 23:15:54 - INFO - codeparrot_training - Skipping example with length 631 (seq_length=1024) +03/04/2022 23:15:59 - INFO - codeparrot_training - Step 28771: {'lr': 0.00046070866781048225, 'samples': 14731264, 'steps': 28771, 'loss/train': 1.9268834590911865} +03/04/2022 23:16:02 - INFO - codeparrot_training - Step 28772: {'lr': 0.0004607058118187586, 'samples': 14731776, 'steps': 28772, 'loss/train': 1.425413966178894} +03/04/2022 23:16:02 - INFO - codeparrot_training - Skipping example with length 980 (seq_length=1024) +03/04/2022 23:16:08 - INFO - codeparrot_training - Step 28773: {'lr': 0.00046070295573209406, 'samples': 14732288, 'steps': 28773, 'loss/train': 1.2314364910125732} +03/04/2022 23:16:11 - INFO - codeparrot_training - Step 28774: {'lr': 0.00046070009955049017, 'samples': 14732800, 'steps': 28774, 'loss/train': 1.7822479009628296} +03/04/2022 23:16:11 - INFO - codeparrot_training - Skipping example with length 243 (seq_length=1024) +03/04/2022 23:16:16 - INFO - codeparrot_training - Step 28775: {'lr': 0.000460697243273948, 'samples': 14733312, 'steps': 28775, 'loss/train': 1.2611836194992065} +03/04/2022 23:16:19 - INFO - codeparrot_training - Skipping example with length 293 (seq_length=1024) +03/04/2022 23:16:22 - INFO - codeparrot_training - Step 28776: {'lr': 0.0004606943869024689, 'samples': 14733824, 'steps': 28776, 'loss/train': 1.8676648139953613} +03/04/2022 23:16:25 - INFO - codeparrot_training - Step 28777: {'lr': 0.0004606915304360542, 'samples': 14734336, 'steps': 28777, 'loss/train': 1.5964415073394775} +03/04/2022 23:16:27 - INFO - codeparrot_training - Skipping example with length 489 (seq_length=1024) +03/04/2022 23:16:30 - INFO - codeparrot_training - Step 28778: {'lr': 0.00046068867387470507, 'samples': 14734848, 'steps': 28778, 'loss/train': 2.027876138687134} +03/04/2022 23:16:33 - INFO - codeparrot_training - Step 28779: {'lr': 0.00046068581721842294, 'samples': 14735360, 'steps': 28779, 'loss/train': 2.274444580078125} +03/04/2022 23:16:35 - INFO - codeparrot_training - Skipping example with length 10 (seq_length=1024) +03/04/2022 23:16:39 - INFO - codeparrot_training - Step 28780: {'lr': 0.00046068296046720904, 'samples': 14735872, 'steps': 28780, 'loss/train': 1.1855027675628662} +03/04/2022 23:16:42 - INFO - codeparrot_training - Step 28781: {'lr': 0.0004606801036210646, 'samples': 14736384, 'steps': 28781, 'loss/train': 6.455958366394043} +03/04/2022 23:16:45 - INFO - codeparrot_training - Skipping example with length 1020 (seq_length=1024) +03/04/2022 23:16:47 - INFO - codeparrot_training - Step 28782: {'lr': 0.000460677246679991, 'samples': 14736896, 'steps': 28782, 'loss/train': 3.410822629928589} +03/04/2022 23:16:50 - INFO - codeparrot_training - Step 28783: {'lr': 0.00046067438964398944, 'samples': 14737408, 'steps': 28783, 'loss/train': 2.206223249435425} +03/04/2022 23:16:53 - INFO - codeparrot_training - Skipping example with length 914 (seq_length=1024) +03/04/2022 23:16:56 - INFO - codeparrot_training - Step 28784: {'lr': 0.00046067153251306127, 'samples': 14737920, 'steps': 28784, 'loss/train': 1.7162508964538574} +03/04/2022 23:16:59 - INFO - codeparrot_training - Step 28785: {'lr': 0.0004606686752872078, 'samples': 14738432, 'steps': 28785, 'loss/train': 1.594719409942627} +03/04/2022 23:17:02 - INFO - codeparrot_training - Skipping example with length 530 (seq_length=1024) +03/04/2022 23:17:04 - INFO - codeparrot_training - Step 28786: {'lr': 0.0004606658179664302, 'samples': 14738944, 'steps': 28786, 'loss/train': 1.9938952922821045} +03/04/2022 23:17:07 - INFO - codeparrot_training - Step 28787: {'lr': 0.00046066296055072986, 'samples': 14739456, 'steps': 28787, 'loss/train': 1.5762356519699097} +03/04/2022 23:17:09 - INFO - codeparrot_training - Skipping example with length 137 (seq_length=1024) +03/04/2022 23:17:12 - INFO - codeparrot_training - Step 28788: {'lr': 0.0004606601030401081, 'samples': 14739968, 'steps': 28788, 'loss/train': 1.337221622467041} +03/04/2022 23:17:15 - INFO - codeparrot_training - Step 28789: {'lr': 0.0004606572454345661, 'samples': 14740480, 'steps': 28789, 'loss/train': 1.3102432489395142} +03/04/2022 23:17:18 - INFO - codeparrot_training - Skipping example with length 203 (seq_length=1024) +03/04/2022 23:17:21 - INFO - codeparrot_training - Step 28790: {'lr': 0.0004606543877341052, 'samples': 14740992, 'steps': 28790, 'loss/train': 0.8645076751708984} +03/04/2022 23:17:24 - INFO - codeparrot_training - Step 28791: {'lr': 0.00046065152993872665, 'samples': 14741504, 'steps': 28791, 'loss/train': 1.2594149112701416} +03/04/2022 23:17:27 - INFO - codeparrot_training - Skipping example with length 24 (seq_length=1024) +03/04/2022 23:17:29 - INFO - codeparrot_training - Step 28792: {'lr': 0.0004606486720484318, 'samples': 14742016, 'steps': 28792, 'loss/train': 1.2640281915664673} +03/04/2022 23:17:32 - INFO - codeparrot_training - Step 28793: {'lr': 0.0004606458140632219, 'samples': 14742528, 'steps': 28793, 'loss/train': 1.5857021808624268} +03/04/2022 23:17:35 - INFO - codeparrot_training - Skipping example with length 947 (seq_length=1024) +03/04/2022 23:17:38 - INFO - codeparrot_training - Step 28794: {'lr': 0.0004606429559830982, 'samples': 14743040, 'steps': 28794, 'loss/train': 2.1024489402770996} +03/04/2022 23:17:41 - INFO - codeparrot_training - Step 28795: {'lr': 0.00046064009780806217, 'samples': 14743552, 'steps': 28795, 'loss/train': 1.847525715827942} +03/04/2022 23:17:44 - INFO - codeparrot_training - Step 28796: {'lr': 0.0004606372395381149, 'samples': 14744064, 'steps': 28796, 'loss/train': 2.113447666168213} +03/04/2022 23:17:44 - INFO - codeparrot_training - Skipping example with length 749 (seq_length=1024) +03/04/2022 23:17:50 - INFO - codeparrot_training - Step 28797: {'lr': 0.0004606343811732577, 'samples': 14744576, 'steps': 28797, 'loss/train': 1.7636909484863281} +03/04/2022 23:17:53 - INFO - codeparrot_training - Step 28798: {'lr': 0.0004606315227134919, 'samples': 14745088, 'steps': 28798, 'loss/train': 1.0039578676223755} +03/04/2022 23:17:53 - INFO - codeparrot_training - Skipping example with length 349 (seq_length=1024) +03/04/2022 23:17:58 - INFO - codeparrot_training - Step 28799: {'lr': 0.0004606286641588188, 'samples': 14745600, 'steps': 28799, 'loss/train': 1.8828938007354736} +03/04/2022 23:18:01 - INFO - codeparrot_training - Step 28800: {'lr': 0.0004606258055092397, 'samples': 14746112, 'steps': 28800, 'loss/train': 1.211090087890625} +03/04/2022 23:18:01 - INFO - codeparrot_training - Skipping example with length 503 (seq_length=1024) +03/04/2022 23:18:07 - INFO - codeparrot_training - Step 28801: {'lr': 0.00046062294676475584, 'samples': 14746624, 'steps': 28801, 'loss/train': 1.0778968334197998} +03/04/2022 23:18:10 - INFO - codeparrot_training - Step 28802: {'lr': 0.0004606200879253685, 'samples': 14747136, 'steps': 28802, 'loss/train': 2.512801170349121} +03/04/2022 23:18:11 - INFO - codeparrot_training - Skipping example with length 660 (seq_length=1024) +03/04/2022 23:18:15 - INFO - codeparrot_training - Step 28803: {'lr': 0.00046061722899107905, 'samples': 14747648, 'steps': 28803, 'loss/train': 1.892014503479004} +03/04/2022 23:18:18 - INFO - codeparrot_training - Step 28804: {'lr': 0.0004606143699618888, 'samples': 14748160, 'steps': 28804, 'loss/train': 1.9535975456237793} +03/04/2022 23:18:19 - INFO - codeparrot_training - Skipping example with length 23 (seq_length=1024) +03/04/2022 23:18:23 - INFO - codeparrot_training - Step 28805: {'lr': 0.00046061151083779886, 'samples': 14748672, 'steps': 28805, 'loss/train': 1.4053068161010742} +03/04/2022 23:18:27 - INFO - codeparrot_training - Step 28806: {'lr': 0.0004606086516188106, 'samples': 14749184, 'steps': 28806, 'loss/train': 1.4122319221496582} +03/04/2022 23:18:28 - INFO - codeparrot_training - Skipping example with length 113 (seq_length=1024) +03/04/2022 23:18:32 - INFO - codeparrot_training - Step 28807: {'lr': 0.00046060579230492533, 'samples': 14749696, 'steps': 28807, 'loss/train': 1.8122142553329468} +03/04/2022 23:18:35 - INFO - codeparrot_training - Step 28808: {'lr': 0.0004606029328961444, 'samples': 14750208, 'steps': 28808, 'loss/train': 1.5441874265670776} +03/04/2022 23:18:36 - INFO - codeparrot_training - Skipping example with length 299 (seq_length=1024) +03/04/2022 23:18:40 - INFO - codeparrot_training - Step 28809: {'lr': 0.000460600073392469, 'samples': 14750720, 'steps': 28809, 'loss/train': 1.4118648767471313} +03/04/2022 23:18:44 - INFO - codeparrot_training - Step 28810: {'lr': 0.00046059721379390053, 'samples': 14751232, 'steps': 28810, 'loss/train': 1.892203450202942} +03/04/2022 23:18:44 - INFO - codeparrot_training - Skipping example with length 867 (seq_length=1024) +03/04/2022 23:18:49 - INFO - codeparrot_training - Step 28811: {'lr': 0.0004605943541004401, 'samples': 14751744, 'steps': 28811, 'loss/train': 1.9477064609527588} +03/04/2022 23:18:52 - INFO - codeparrot_training - Step 28812: {'lr': 0.00046059149431208914, 'samples': 14752256, 'steps': 28812, 'loss/train': 1.6727160215377808} +03/04/2022 23:18:52 - INFO - codeparrot_training - Skipping example with length 967 (seq_length=1024) +03/04/2022 23:18:57 - INFO - codeparrot_training - Step 28813: {'lr': 0.0004605886344288489, 'samples': 14752768, 'steps': 28813, 'loss/train': 1.6348408460617065} +03/04/2022 23:19:00 - INFO - codeparrot_training - Step 28814: {'lr': 0.0004605857744507207, 'samples': 14753280, 'steps': 28814, 'loss/train': 1.9410821199417114} +03/04/2022 23:19:01 - INFO - codeparrot_training - Skipping example with length 959 (seq_length=1024) +03/04/2022 23:19:05 - INFO - codeparrot_training - Step 28815: {'lr': 0.00046058291437770584, 'samples': 14753792, 'steps': 28815, 'loss/train': 1.7436407804489136} +03/04/2022 23:19:09 - INFO - codeparrot_training - Step 28816: {'lr': 0.0004605800542098054, 'samples': 14754304, 'steps': 28816, 'loss/train': 0.941482424736023} +03/04/2022 23:19:09 - INFO - codeparrot_training - Skipping example with length 340 (seq_length=1024) +03/04/2022 23:19:14 - INFO - codeparrot_training - Step 28817: {'lr': 0.00046057719394702103, 'samples': 14754816, 'steps': 28817, 'loss/train': 1.7056500911712646} +03/04/2022 23:19:17 - INFO - codeparrot_training - Step 28818: {'lr': 0.00046057433358935373, 'samples': 14755328, 'steps': 28818, 'loss/train': 2.2137181758880615} +03/04/2022 23:19:17 - INFO - codeparrot_training - Skipping example with length 439 (seq_length=1024) +03/04/2022 23:19:22 - INFO - codeparrot_training - Step 28819: {'lr': 0.0004605714731368049, 'samples': 14755840, 'steps': 28819, 'loss/train': 1.7460089921951294} +03/04/2022 23:19:25 - INFO - codeparrot_training - Skipping example with length 281 (seq_length=1024) +03/04/2022 23:19:28 - INFO - codeparrot_training - Step 28820: {'lr': 0.0004605686125893758, 'samples': 14756352, 'steps': 28820, 'loss/train': 2.0048539638519287} +03/04/2022 23:19:31 - INFO - codeparrot_training - Step 28821: {'lr': 0.00046056575194706773, 'samples': 14756864, 'steps': 28821, 'loss/train': 2.419079065322876} +03/04/2022 23:19:33 - INFO - codeparrot_training - Skipping example with length 388 (seq_length=1024) +03/04/2022 23:19:36 - INFO - codeparrot_training - Step 28822: {'lr': 0.000460562891209882, 'samples': 14757376, 'steps': 28822, 'loss/train': 2.207127332687378} +03/04/2022 23:19:39 - INFO - codeparrot_training - Step 28823: {'lr': 0.0004605600303778199, 'samples': 14757888, 'steps': 28823, 'loss/train': 1.8947594165802002} +03/04/2022 23:19:44 - INFO - codeparrot_training - Step 28824: {'lr': 0.0004605571694508827, 'samples': 14758400, 'steps': 28824, 'loss/train': 1.8431636095046997} +03/04/2022 23:19:48 - INFO - codeparrot_training - Step 28825: {'lr': 0.0004605543084290716, 'samples': 14758912, 'steps': 28825, 'loss/train': 1.9718064069747925} +03/04/2022 23:19:51 - INFO - codeparrot_training - Step 28826: {'lr': 0.00046055144731238805, 'samples': 14759424, 'steps': 28826, 'loss/train': 1.2570476531982422} +03/04/2022 23:19:51 - INFO - codeparrot_training - Skipping example with length 101 (seq_length=1024) +03/04/2022 23:19:56 - INFO - codeparrot_training - Step 28827: {'lr': 0.00046054858610083325, 'samples': 14759936, 'steps': 28827, 'loss/train': 1.9794161319732666} +03/04/2022 23:19:59 - INFO - codeparrot_training - Skipping example with length 287 (seq_length=1024) +03/04/2022 23:20:02 - INFO - codeparrot_training - Step 28828: {'lr': 0.0004605457247944086, 'samples': 14760448, 'steps': 28828, 'loss/train': 1.1934902667999268} +03/04/2022 23:20:05 - INFO - codeparrot_training - Step 28829: {'lr': 0.0004605428633931152, 'samples': 14760960, 'steps': 28829, 'loss/train': 1.9077078104019165} +03/04/2022 23:20:08 - INFO - codeparrot_training - Skipping example with length 744 (seq_length=1024) +03/04/2022 23:20:10 - INFO - codeparrot_training - Step 28830: {'lr': 0.00046054000189695444, 'samples': 14761472, 'steps': 28830, 'loss/train': 2.4174952507019043} +03/04/2022 23:20:13 - INFO - codeparrot_training - Step 28831: {'lr': 0.00046053714030592764, 'samples': 14761984, 'steps': 28831, 'loss/train': 1.5270220041275024} +03/04/2022 23:20:16 - INFO - codeparrot_training - Skipping example with length 970 (seq_length=1024) +03/04/2022 23:20:18 - INFO - codeparrot_training - Step 28832: {'lr': 0.0004605342786200359, 'samples': 14762496, 'steps': 28832, 'loss/train': 0.8540604710578918} +03/04/2022 23:20:22 - INFO - codeparrot_training - Step 28833: {'lr': 0.0004605314168392809, 'samples': 14763008, 'steps': 28833, 'loss/train': 2.799633026123047} +03/04/2022 23:20:24 - INFO - codeparrot_training - Skipping example with length 998 (seq_length=1024) +03/04/2022 23:20:27 - INFO - codeparrot_training - Step 28834: {'lr': 0.00046052855496366354, 'samples': 14763520, 'steps': 28834, 'loss/train': 1.7184064388275146} +03/04/2022 23:20:30 - INFO - codeparrot_training - Step 28835: {'lr': 0.0004605256929931853, 'samples': 14764032, 'steps': 28835, 'loss/train': 1.7746673822402954} +03/04/2022 23:20:32 - INFO - codeparrot_training - Skipping example with length 430 (seq_length=1024) +03/04/2022 23:20:35 - INFO - codeparrot_training - Step 28836: {'lr': 0.0004605228309278474, 'samples': 14764544, 'steps': 28836, 'loss/train': 1.1874452829360962} +03/04/2022 23:20:39 - INFO - codeparrot_training - Step 28837: {'lr': 0.0004605199687676512, 'samples': 14765056, 'steps': 28837, 'loss/train': 1.7919214963912964} +03/04/2022 23:20:41 - INFO - codeparrot_training - Skipping example with length 150 (seq_length=1024) +03/04/2022 23:20:44 - INFO - codeparrot_training - Step 28838: {'lr': 0.00046051710651259797, 'samples': 14765568, 'steps': 28838, 'loss/train': 0.9254122376441956} +03/04/2022 23:20:47 - INFO - codeparrot_training - Step 28839: {'lr': 0.00046051424416268896, 'samples': 14766080, 'steps': 28839, 'loss/train': 1.9852148294448853} +03/04/2022 23:20:49 - INFO - codeparrot_training - Skipping example with length 748 (seq_length=1024) +03/04/2022 23:20:52 - INFO - codeparrot_training - Step 28840: {'lr': 0.0004605113817179255, 'samples': 14766592, 'steps': 28840, 'loss/train': 1.3051128387451172} +03/04/2022 23:20:56 - INFO - codeparrot_training - Step 28841: {'lr': 0.00046050851917830884, 'samples': 14767104, 'steps': 28841, 'loss/train': 2.314704656600952} +03/04/2022 23:20:57 - INFO - codeparrot_training - Skipping example with length 860 (seq_length=1024) +03/04/2022 23:21:01 - INFO - codeparrot_training - Step 28842: {'lr': 0.00046050565654384023, 'samples': 14767616, 'steps': 28842, 'loss/train': 1.473719596862793} +03/04/2022 23:21:04 - INFO - codeparrot_training - Step 28843: {'lr': 0.0004605027938145211, 'samples': 14768128, 'steps': 28843, 'loss/train': 1.9232053756713867} +03/04/2022 23:21:06 - INFO - codeparrot_training - Skipping example with length 643 (seq_length=1024) +03/04/2022 23:21:09 - INFO - codeparrot_training - Step 28844: {'lr': 0.0004604999309903526, 'samples': 14768640, 'steps': 28844, 'loss/train': 1.5329201221466064} +03/04/2022 23:21:12 - INFO - codeparrot_training - Step 28845: {'lr': 0.0004604970680713362, 'samples': 14769152, 'steps': 28845, 'loss/train': 1.8971480131149292} +03/04/2022 23:21:14 - INFO - codeparrot_training - Skipping example with length 791 (seq_length=1024) +03/04/2022 23:21:18 - INFO - codeparrot_training - Step 28846: {'lr': 0.00046049420505747294, 'samples': 14769664, 'steps': 28846, 'loss/train': 3.4711081981658936} +03/04/2022 23:21:21 - INFO - codeparrot_training - Step 28847: {'lr': 0.0004604913419487643, 'samples': 14770176, 'steps': 28847, 'loss/train': 0.8581728935241699} +03/04/2022 23:21:23 - INFO - codeparrot_training - Skipping example with length 168 (seq_length=1024) +03/04/2022 23:21:26 - INFO - codeparrot_training - Step 28848: {'lr': 0.00046048847874521144, 'samples': 14770688, 'steps': 28848, 'loss/train': 1.4948898553848267} +03/04/2022 23:21:29 - INFO - codeparrot_training - Step 28849: {'lr': 0.00046048561544681575, 'samples': 14771200, 'steps': 28849, 'loss/train': 1.423422932624817} +03/04/2022 23:21:31 - INFO - codeparrot_training - Skipping example with length 616 (seq_length=1024) +03/04/2022 23:21:35 - INFO - codeparrot_training - Step 28850: {'lr': 0.00046048275205357855, 'samples': 14771712, 'steps': 28850, 'loss/train': 1.3602997064590454} +03/04/2022 23:21:38 - INFO - codeparrot_training - Step 28851: {'lr': 0.00046047988856550104, 'samples': 14772224, 'steps': 28851, 'loss/train': 2.274324655532837} +03/04/2022 23:21:39 - INFO - codeparrot_training - Skipping example with length 832 (seq_length=1024) +03/04/2022 23:21:43 - INFO - codeparrot_training - Step 28852: {'lr': 0.00046047702498258446, 'samples': 14772736, 'steps': 28852, 'loss/train': 1.4053928852081299} +03/04/2022 23:21:46 - INFO - codeparrot_training - Step 28853: {'lr': 0.00046047416130483033, 'samples': 14773248, 'steps': 28853, 'loss/train': 1.1108251810073853} +03/04/2022 23:21:48 - INFO - codeparrot_training - Skipping example with length 930 (seq_length=1024) +03/04/2022 23:21:52 - INFO - codeparrot_training - Step 28854: {'lr': 0.00046047129753223973, 'samples': 14773760, 'steps': 28854, 'loss/train': 1.1840301752090454} +03/04/2022 23:21:55 - INFO - codeparrot_training - Step 28855: {'lr': 0.0004604684336648139, 'samples': 14774272, 'steps': 28855, 'loss/train': 1.5065953731536865} +03/04/2022 23:21:56 - INFO - codeparrot_training - Skipping example with length 375 (seq_length=1024) +03/04/2022 23:22:00 - INFO - codeparrot_training - Step 28856: {'lr': 0.00046046556970255435, 'samples': 14774784, 'steps': 28856, 'loss/train': 1.762263298034668} +03/04/2022 23:22:03 - INFO - codeparrot_training - Step 28857: {'lr': 0.0004604627056454622, 'samples': 14775296, 'steps': 28857, 'loss/train': 1.4106866121292114} +03/04/2022 23:22:04 - INFO - codeparrot_training - Skipping example with length 852 (seq_length=1024) +03/04/2022 23:22:08 - INFO - codeparrot_training - Step 28858: {'lr': 0.00046045984149353894, 'samples': 14775808, 'steps': 28858, 'loss/train': 2.2027719020843506} +03/04/2022 23:22:11 - INFO - codeparrot_training - Step 28859: {'lr': 0.0004604569772467856, 'samples': 14776320, 'steps': 28859, 'loss/train': 2.154367208480835} +03/04/2022 23:22:12 - INFO - codeparrot_training - Skipping example with length 540 (seq_length=1024) +03/04/2022 23:22:17 - INFO - codeparrot_training - Step 28860: {'lr': 0.00046045411290520364, 'samples': 14776832, 'steps': 28860, 'loss/train': 2.029752492904663} +03/04/2022 23:22:20 - INFO - codeparrot_training - Step 28861: {'lr': 0.00046045124846879427, 'samples': 14777344, 'steps': 28861, 'loss/train': 1.7678821086883545} +03/04/2022 23:22:21 - INFO - codeparrot_training - Skipping example with length 946 (seq_length=1024) +03/04/2022 23:22:25 - INFO - codeparrot_training - Step 28862: {'lr': 0.00046044838393755885, 'samples': 14777856, 'steps': 28862, 'loss/train': 2.0462210178375244} +03/04/2022 23:22:28 - INFO - codeparrot_training - Step 28863: {'lr': 0.00046044551931149856, 'samples': 14778368, 'steps': 28863, 'loss/train': 0.10074155777692795} +03/04/2022 23:22:29 - INFO - codeparrot_training - Skipping example with length 245 (seq_length=1024) +03/04/2022 23:22:34 - INFO - codeparrot_training - Step 28864: {'lr': 0.0004604426545906149, 'samples': 14778880, 'steps': 28864, 'loss/train': 1.6035149097442627} +03/04/2022 23:22:37 - INFO - codeparrot_training - Step 28865: {'lr': 0.0004604397897749089, 'samples': 14779392, 'steps': 28865, 'loss/train': 0.8119407296180725} +03/04/2022 23:22:38 - INFO - codeparrot_training - Skipping example with length 130 (seq_length=1024) +03/04/2022 23:22:42 - INFO - codeparrot_training - Step 28866: {'lr': 0.00046043692486438207, 'samples': 14779904, 'steps': 28866, 'loss/train': 1.6479127407073975} +03/04/2022 23:22:45 - INFO - codeparrot_training - Step 28867: {'lr': 0.00046043405985903555, 'samples': 14780416, 'steps': 28867, 'loss/train': 1.6287841796875} +03/04/2022 23:22:46 - INFO - codeparrot_training - Skipping example with length 468 (seq_length=1024) +03/04/2022 23:22:51 - INFO - codeparrot_training - Step 28868: {'lr': 0.00046043119475887073, 'samples': 14780928, 'steps': 28868, 'loss/train': 2.0390870571136475} +03/04/2022 23:22:54 - INFO - codeparrot_training - Step 28869: {'lr': 0.0004604283295638888, 'samples': 14781440, 'steps': 28869, 'loss/train': 2.0804827213287354} +03/04/2022 23:22:55 - INFO - codeparrot_training - Skipping example with length 153 (seq_length=1024) +03/04/2022 23:22:59 - INFO - codeparrot_training - Step 28870: {'lr': 0.00046042546427409116, 'samples': 14781952, 'steps': 28870, 'loss/train': 0.48635751008987427} +03/04/2022 23:23:02 - INFO - codeparrot_training - Step 28871: {'lr': 0.000460422598889479, 'samples': 14782464, 'steps': 28871, 'loss/train': 1.7114224433898926} +03/04/2022 23:23:03 - INFO - codeparrot_training - Skipping example with length 51 (seq_length=1024) +03/04/2022 23:23:08 - INFO - codeparrot_training - Step 28872: {'lr': 0.0004604197334100537, 'samples': 14782976, 'steps': 28872, 'loss/train': 1.4371286630630493} +03/04/2022 23:23:11 - INFO - codeparrot_training - Step 28873: {'lr': 0.0004604168678358166, 'samples': 14783488, 'steps': 28873, 'loss/train': 1.9294750690460205} +03/04/2022 23:23:12 - INFO - codeparrot_training - Skipping example with length 778 (seq_length=1024) +03/04/2022 23:23:16 - INFO - codeparrot_training - Step 28874: {'lr': 0.00046041400216676874, 'samples': 14784000, 'steps': 28874, 'loss/train': 2.0808136463165283} +03/04/2022 23:23:19 - INFO - codeparrot_training - Step 28875: {'lr': 0.0004604111364029118, 'samples': 14784512, 'steps': 28875, 'loss/train': 1.9937310218811035} +03/04/2022 23:23:20 - INFO - codeparrot_training - Skipping example with length 941 (seq_length=1024) +03/04/2022 23:23:24 - INFO - codeparrot_training - Step 28876: {'lr': 0.0004604082705442466, 'samples': 14785024, 'steps': 28876, 'loss/train': 1.9463622570037842} +03/04/2022 23:23:28 - INFO - codeparrot_training - Step 28877: {'lr': 0.00046040540459077483, 'samples': 14785536, 'steps': 28877, 'loss/train': 2.304171323776245} +03/04/2022 23:23:28 - INFO - codeparrot_training - Skipping example with length 957 (seq_length=1024) +03/04/2022 23:23:33 - INFO - codeparrot_training - Step 28878: {'lr': 0.0004604025385424976, 'samples': 14786048, 'steps': 28878, 'loss/train': 1.8226237297058105} +03/04/2022 23:23:36 - INFO - codeparrot_training - Step 28879: {'lr': 0.00046039967239941626, 'samples': 14786560, 'steps': 28879, 'loss/train': 1.9959421157836914} +03/04/2022 23:23:36 - INFO - codeparrot_training - Skipping example with length 457 (seq_length=1024) +03/04/2022 23:23:41 - INFO - codeparrot_training - Step 28880: {'lr': 0.000460396806161532, 'samples': 14787072, 'steps': 28880, 'loss/train': 1.3863372802734375} +03/04/2022 23:23:44 - INFO - codeparrot_training - Step 28881: {'lr': 0.0004603939398288463, 'samples': 14787584, 'steps': 28881, 'loss/train': 1.9170218706130981} +03/04/2022 23:23:44 - INFO - codeparrot_training - Skipping example with length 485 (seq_length=1024) +03/04/2022 23:23:50 - INFO - codeparrot_training - Step 28882: {'lr': 0.00046039107340136023, 'samples': 14788096, 'steps': 28882, 'loss/train': 1.4970533847808838} +03/04/2022 23:23:53 - INFO - codeparrot_training - Skipping example with length 99 (seq_length=1024) +03/04/2022 23:23:55 - INFO - codeparrot_training - Step 28883: {'lr': 0.00046038820687907523, 'samples': 14788608, 'steps': 28883, 'loss/train': 1.1294101476669312} +03/04/2022 23:23:58 - INFO - codeparrot_training - Step 28884: {'lr': 0.0004603853402619925, 'samples': 14789120, 'steps': 28884, 'loss/train': 1.9906418323516846} +03/04/2022 23:24:01 - INFO - codeparrot_training - Skipping example with length 386 (seq_length=1024) +03/04/2022 23:24:03 - INFO - codeparrot_training - Step 28885: {'lr': 0.00046038247355011347, 'samples': 14789632, 'steps': 28885, 'loss/train': 1.4410990476608276} +03/04/2022 23:24:06 - INFO - codeparrot_training - Step 28886: {'lr': 0.00046037960674343925, 'samples': 14790144, 'steps': 28886, 'loss/train': 2.2478859424591064} +03/04/2022 23:24:09 - INFO - codeparrot_training - Skipping example with length 737 (seq_length=1024) +03/04/2022 23:24:12 - INFO - codeparrot_training - Step 28887: {'lr': 0.0004603767398419713, 'samples': 14790656, 'steps': 28887, 'loss/train': 2.5621390342712402} +03/04/2022 23:24:15 - INFO - codeparrot_training - Step 28888: {'lr': 0.0004603738728457109, 'samples': 14791168, 'steps': 28888, 'loss/train': 2.2321207523345947} +03/04/2022 23:24:17 - INFO - codeparrot_training - Skipping example with length 670 (seq_length=1024) +03/04/2022 23:24:20 - INFO - codeparrot_training - Step 28889: {'lr': 0.0004603710057546592, 'samples': 14791680, 'steps': 28889, 'loss/train': 1.6797950267791748} +03/04/2022 23:24:23 - INFO - codeparrot_training - Step 28890: {'lr': 0.0004603681385688175, 'samples': 14792192, 'steps': 28890, 'loss/train': 1.9049224853515625} +03/04/2022 23:24:25 - INFO - codeparrot_training - Skipping example with length 396 (seq_length=1024) +03/04/2022 23:24:29 - INFO - codeparrot_training - Step 28891: {'lr': 0.00046036527128818724, 'samples': 14792704, 'steps': 28891, 'loss/train': 2.001204490661621} +03/04/2022 23:24:32 - INFO - codeparrot_training - Step 28892: {'lr': 0.0004603624039127696, 'samples': 14793216, 'steps': 28892, 'loss/train': 2.7873036861419678} +03/04/2022 23:24:34 - INFO - codeparrot_training - Skipping example with length 9 (seq_length=1024) +03/04/2022 23:24:37 - INFO - codeparrot_training - Step 28893: {'lr': 0.00046035953644256596, 'samples': 14793728, 'steps': 28893, 'loss/train': 1.8191783428192139} +03/04/2022 23:24:40 - INFO - codeparrot_training - Step 28894: {'lr': 0.00046035666887757755, 'samples': 14794240, 'steps': 28894, 'loss/train': 2.2396652698516846} +03/04/2022 23:24:42 - INFO - codeparrot_training - Skipping example with length 699 (seq_length=1024) +03/04/2022 23:24:45 - INFO - codeparrot_training - Step 28895: {'lr': 0.00046035380121780563, 'samples': 14794752, 'steps': 28895, 'loss/train': 1.0408203601837158} +03/04/2022 23:24:48 - INFO - codeparrot_training - Step 28896: {'lr': 0.0004603509334632515, 'samples': 14795264, 'steps': 28896, 'loss/train': 1.150875210762024} +03/04/2022 23:24:51 - INFO - codeparrot_training - Skipping example with length 853 (seq_length=1024) +03/04/2022 23:24:54 - INFO - codeparrot_training - Step 28897: {'lr': 0.00046034806561391655, 'samples': 14795776, 'steps': 28897, 'loss/train': 1.836584210395813} +03/04/2022 23:24:57 - INFO - codeparrot_training - Step 28898: {'lr': 0.000460345197669802, 'samples': 14796288, 'steps': 28898, 'loss/train': 1.8709797859191895} +03/04/2022 23:24:59 - INFO - codeparrot_training - Skipping example with length 515 (seq_length=1024) +03/04/2022 23:25:02 - INFO - codeparrot_training - Step 28899: {'lr': 0.0004603423296309092, 'samples': 14796800, 'steps': 28899, 'loss/train': 0.956364095211029} +03/04/2022 23:25:05 - INFO - codeparrot_training - Step 28900: {'lr': 0.0004603394614972393, 'samples': 14797312, 'steps': 28900, 'loss/train': 1.3321460485458374} +03/04/2022 23:25:08 - INFO - codeparrot_training - Skipping example with length 107 (seq_length=1024) +03/04/2022 23:25:11 - INFO - codeparrot_training - Step 28901: {'lr': 0.00046033659326879373, 'samples': 14797824, 'steps': 28901, 'loss/train': 1.9117951393127441} +03/04/2022 23:25:14 - INFO - codeparrot_training - Step 28902: {'lr': 0.00046033372494557373, 'samples': 14798336, 'steps': 28902, 'loss/train': 2.2139878273010254} +03/04/2022 23:25:16 - INFO - codeparrot_training - Skipping example with length 538 (seq_length=1024) +03/04/2022 23:25:19 - INFO - codeparrot_training - Step 28903: {'lr': 0.00046033085652758053, 'samples': 14798848, 'steps': 28903, 'loss/train': 1.5712919235229492} +03/04/2022 23:25:22 - INFO - codeparrot_training - Step 28904: {'lr': 0.00046032798801481564, 'samples': 14799360, 'steps': 28904, 'loss/train': 2.484159469604492} +03/04/2022 23:25:24 - INFO - codeparrot_training - Skipping example with length 694 (seq_length=1024) +03/04/2022 23:25:27 - INFO - codeparrot_training - Step 28905: {'lr': 0.0004603251194072801, 'samples': 14799872, 'steps': 28905, 'loss/train': 1.4546531438827515} +03/04/2022 23:25:31 - INFO - codeparrot_training - Step 28906: {'lr': 0.0004603222507049754, 'samples': 14800384, 'steps': 28906, 'loss/train': 1.8969160318374634} +03/04/2022 23:25:33 - INFO - codeparrot_training - Skipping example with length 674 (seq_length=1024) +03/04/2022 23:25:36 - INFO - codeparrot_training - Step 28907: {'lr': 0.00046031938190790254, 'samples': 14800896, 'steps': 28907, 'loss/train': 1.6149530410766602} +03/04/2022 23:25:39 - INFO - codeparrot_training - Step 28908: {'lr': 0.0004603165130160633, 'samples': 14801408, 'steps': 28908, 'loss/train': 1.1668694019317627} +03/04/2022 23:25:41 - INFO - codeparrot_training - Skipping example with length 252 (seq_length=1024) +03/04/2022 23:25:44 - INFO - codeparrot_training - Step 28909: {'lr': 0.0004603136440294584, 'samples': 14801920, 'steps': 28909, 'loss/train': 0.6168772578239441} +03/04/2022 23:25:47 - INFO - codeparrot_training - Step 28910: {'lr': 0.0004603107749480896, 'samples': 14802432, 'steps': 28910, 'loss/train': 2.043896436691284} +03/04/2022 23:25:49 - INFO - codeparrot_training - Skipping example with length 290 (seq_length=1024) +03/04/2022 23:25:53 - INFO - codeparrot_training - Step 28911: {'lr': 0.0004603079057719579, 'samples': 14802944, 'steps': 28911, 'loss/train': 2.1559605598449707} +03/04/2022 23:25:56 - INFO - codeparrot_training - Step 28912: {'lr': 0.0004603050365010648, 'samples': 14803456, 'steps': 28912, 'loss/train': 2.1699044704437256} +03/04/2022 23:25:57 - INFO - codeparrot_training - Skipping example with length 876 (seq_length=1024) +03/04/2022 23:26:01 - INFO - codeparrot_training - Step 28913: {'lr': 0.00046030216713541147, 'samples': 14803968, 'steps': 28913, 'loss/train': 2.1962292194366455} +03/04/2022 23:26:04 - INFO - codeparrot_training - Step 28914: {'lr': 0.00046029929767499924, 'samples': 14804480, 'steps': 28914, 'loss/train': 2.6712489128112793} +03/04/2022 23:26:06 - INFO - codeparrot_training - Skipping example with length 54 (seq_length=1024) +03/04/2022 23:26:10 - INFO - codeparrot_training - Step 28915: {'lr': 0.0004602964281198293, 'samples': 14804992, 'steps': 28915, 'loss/train': 1.5854524374008179} +03/04/2022 23:26:13 - INFO - codeparrot_training - Step 28916: {'lr': 0.0004602935584699031, 'samples': 14805504, 'steps': 28916, 'loss/train': 2.401256561279297} +03/04/2022 23:26:14 - INFO - codeparrot_training - Skipping example with length 517 (seq_length=1024) +03/04/2022 23:26:18 - INFO - codeparrot_training - Step 28917: {'lr': 0.00046029068872522185, 'samples': 14806016, 'steps': 28917, 'loss/train': 1.815535068511963} +03/04/2022 23:26:21 - INFO - codeparrot_training - Step 28918: {'lr': 0.0004602878188857869, 'samples': 14806528, 'steps': 28918, 'loss/train': 2.013997793197632} +03/04/2022 23:26:22 - INFO - codeparrot_training - Skipping example with length 705 (seq_length=1024) +03/04/2022 23:26:26 - INFO - codeparrot_training - Step 28919: {'lr': 0.0004602849489515995, 'samples': 14807040, 'steps': 28919, 'loss/train': 1.2687535285949707} +03/04/2022 23:26:30 - INFO - codeparrot_training - Step 28920: {'lr': 0.00046028207892266095, 'samples': 14807552, 'steps': 28920, 'loss/train': 2.077033519744873} +03/04/2022 23:26:31 - INFO - codeparrot_training - Skipping example with length 974 (seq_length=1024) +03/04/2022 23:26:35 - INFO - codeparrot_training - Step 28921: {'lr': 0.00046027920879897243, 'samples': 14808064, 'steps': 28921, 'loss/train': 1.744291067123413} +03/04/2022 23:26:38 - INFO - codeparrot_training - Step 28922: {'lr': 0.00046027633858053554, 'samples': 14808576, 'steps': 28922, 'loss/train': 1.5903853178024292} +03/04/2022 23:26:39 - INFO - codeparrot_training - Skipping example with length 682 (seq_length=1024) +03/04/2022 23:26:43 - INFO - codeparrot_training - Step 28923: {'lr': 0.0004602734682673512, 'samples': 14809088, 'steps': 28923, 'loss/train': 2.040329933166504} +03/04/2022 23:26:47 - INFO - codeparrot_training - Step 28924: {'lr': 0.0004602705978594209, 'samples': 14809600, 'steps': 28924, 'loss/train': 2.2472615242004395} +03/04/2022 23:26:48 - INFO - codeparrot_training - Skipping example with length 400 (seq_length=1024) +03/04/2022 23:26:52 - INFO - codeparrot_training - Step 28925: {'lr': 0.00046026772735674606, 'samples': 14810112, 'steps': 28925, 'loss/train': 0.7313695549964905} +03/04/2022 23:26:56 - INFO - codeparrot_training - Step 28926: {'lr': 0.00046026485675932765, 'samples': 14810624, 'steps': 28926, 'loss/train': 1.7321163415908813} +03/04/2022 23:26:58 - INFO - codeparrot_training - Skipping example with length 317 (seq_length=1024) +03/04/2022 23:27:01 - INFO - codeparrot_training - Step 28927: {'lr': 0.0004602619860671672, 'samples': 14811136, 'steps': 28927, 'loss/train': 0.09058800339698792} +03/04/2022 23:27:04 - INFO - codeparrot_training - Step 28928: {'lr': 0.000460259115280266, 'samples': 14811648, 'steps': 28928, 'loss/train': 1.60804283618927} +03/04/2022 23:27:06 - INFO - codeparrot_training - Skipping example with length 154 (seq_length=1024) +03/04/2022 23:27:09 - INFO - codeparrot_training - Step 28929: {'lr': 0.00046025624439862523, 'samples': 14812160, 'steps': 28929, 'loss/train': 2.57155179977417} +03/04/2022 23:27:12 - INFO - codeparrot_training - Step 28930: {'lr': 0.0004602533734222463, 'samples': 14812672, 'steps': 28930, 'loss/train': 1.2357851266860962} +03/04/2022 23:27:14 - INFO - codeparrot_training - Skipping example with length 944 (seq_length=1024) +03/04/2022 23:27:18 - INFO - codeparrot_training - Step 28931: {'lr': 0.00046025050235113036, 'samples': 14813184, 'steps': 28931, 'loss/train': 1.4776639938354492} +03/04/2022 23:27:21 - INFO - codeparrot_training - Step 28932: {'lr': 0.00046024763118527885, 'samples': 14813696, 'steps': 28932, 'loss/train': 1.9436545372009277} +03/04/2022 23:27:23 - INFO - codeparrot_training - Skipping example with length 990 (seq_length=1024) +03/04/2022 23:27:26 - INFO - codeparrot_training - Step 28933: {'lr': 0.00046024475992469295, 'samples': 14814208, 'steps': 28933, 'loss/train': 1.1302376985549927} +03/04/2022 23:27:29 - INFO - codeparrot_training - Step 28934: {'lr': 0.0004602418885693741, 'samples': 14814720, 'steps': 28934, 'loss/train': 1.7303214073181152} +03/04/2022 23:27:31 - INFO - codeparrot_training - Skipping example with length 150 (seq_length=1024) +03/04/2022 23:27:35 - INFO - codeparrot_training - Step 28935: {'lr': 0.0004602390171193234, 'samples': 14815232, 'steps': 28935, 'loss/train': 1.2371916770935059} +03/04/2022 23:27:38 - INFO - codeparrot_training - Step 28936: {'lr': 0.0004602361455745423, 'samples': 14815744, 'steps': 28936, 'loss/train': 1.5670655965805054} +03/04/2022 23:27:39 - INFO - codeparrot_training - Skipping example with length 444 (seq_length=1024) +03/04/2022 23:27:43 - INFO - codeparrot_training - Step 28937: {'lr': 0.000460233273935032, 'samples': 14816256, 'steps': 28937, 'loss/train': 1.7954941987991333} +03/04/2022 23:27:46 - INFO - codeparrot_training - Step 28938: {'lr': 0.00046023040220079383, 'samples': 14816768, 'steps': 28938, 'loss/train': 1.509537696838379} +03/04/2022 23:27:48 - INFO - codeparrot_training - Skipping example with length 454 (seq_length=1024) +03/04/2022 23:27:51 - INFO - codeparrot_training - Step 28939: {'lr': 0.00046022753037182915, 'samples': 14817280, 'steps': 28939, 'loss/train': 0.5564448833465576} +03/04/2022 23:27:54 - INFO - codeparrot_training - Step 28940: {'lr': 0.0004602246584481391, 'samples': 14817792, 'steps': 28940, 'loss/train': 1.5839362144470215} +03/04/2022 23:27:56 - INFO - codeparrot_training - Skipping example with length 1007 (seq_length=1024) +03/04/2022 23:28:00 - INFO - codeparrot_training - Step 28941: {'lr': 0.00046022178642972513, 'samples': 14818304, 'steps': 28941, 'loss/train': 0.9965249300003052} +03/04/2022 23:28:03 - INFO - codeparrot_training - Step 28942: {'lr': 0.00046021891431658845, 'samples': 14818816, 'steps': 28942, 'loss/train': 1.9373211860656738} +03/04/2022 23:28:04 - INFO - codeparrot_training - Skipping example with length 29 (seq_length=1024) +03/04/2022 23:28:08 - INFO - codeparrot_training - Step 28943: {'lr': 0.00046021604210873035, 'samples': 14819328, 'steps': 28943, 'loss/train': 2.0858917236328125} +03/04/2022 23:28:11 - INFO - codeparrot_training - Step 28944: {'lr': 0.0004602131698061521, 'samples': 14819840, 'steps': 28944, 'loss/train': 1.8935599327087402} +03/04/2022 23:28:12 - INFO - codeparrot_training - Skipping example with length 733 (seq_length=1024) +03/04/2022 23:28:16 - INFO - codeparrot_training - Step 28945: {'lr': 0.0004602102974088551, 'samples': 14820352, 'steps': 28945, 'loss/train': 1.888928771018982} +03/04/2022 23:28:20 - INFO - codeparrot_training - Step 28946: {'lr': 0.00046020742491684067, 'samples': 14820864, 'steps': 28946, 'loss/train': 1.401710033416748} +03/04/2022 23:28:21 - INFO - codeparrot_training - Skipping example with length 29 (seq_length=1024) +03/04/2022 23:28:25 - INFO - codeparrot_training - Step 28947: {'lr': 0.0004602045523301099, 'samples': 14821376, 'steps': 28947, 'loss/train': 1.9961256980895996} +03/04/2022 23:28:28 - INFO - codeparrot_training - Step 28948: {'lr': 0.0004602016796486642, 'samples': 14821888, 'steps': 28948, 'loss/train': 1.5738950967788696} +03/04/2022 23:28:29 - INFO - codeparrot_training - Skipping example with length 398 (seq_length=1024) +03/04/2022 23:28:33 - INFO - codeparrot_training - Step 28949: {'lr': 0.00046019880687250494, 'samples': 14822400, 'steps': 28949, 'loss/train': 2.2601029872894287} +03/04/2022 23:28:37 - INFO - codeparrot_training - Step 28950: {'lr': 0.0004601959340016333, 'samples': 14822912, 'steps': 28950, 'loss/train': 2.1484766006469727} +03/04/2022 23:28:38 - INFO - codeparrot_training - Skipping example with length 421 (seq_length=1024) +03/04/2022 23:28:42 - INFO - codeparrot_training - Step 28951: {'lr': 0.0004601930610360506, 'samples': 14823424, 'steps': 28951, 'loss/train': 1.6607630252838135} +03/04/2022 23:28:45 - INFO - codeparrot_training - Step 28952: {'lr': 0.0004601901879757582, 'samples': 14823936, 'steps': 28952, 'loss/train': 0.8688652515411377} +03/04/2022 23:28:46 - INFO - codeparrot_training - Skipping example with length 291 (seq_length=1024) +03/04/2022 23:28:50 - INFO - codeparrot_training - Step 28953: {'lr': 0.0004601873148207573, 'samples': 14824448, 'steps': 28953, 'loss/train': 1.5881937742233276} +03/04/2022 23:28:53 - INFO - codeparrot_training - Step 28954: {'lr': 0.00046018444157104924, 'samples': 14824960, 'steps': 28954, 'loss/train': 1.2198220491409302} +03/04/2022 23:28:54 - INFO - codeparrot_training - Skipping example with length 740 (seq_length=1024) +03/04/2022 23:28:59 - INFO - codeparrot_training - Step 28955: {'lr': 0.0004601815682266353, 'samples': 14825472, 'steps': 28955, 'loss/train': 2.2297163009643555} +03/04/2022 23:29:02 - INFO - codeparrot_training - Step 28956: {'lr': 0.00046017869478751685, 'samples': 14825984, 'steps': 28956, 'loss/train': 2.1559975147247314} +03/04/2022 23:29:02 - INFO - codeparrot_training - Skipping example with length 501 (seq_length=1024) +03/04/2022 23:29:07 - INFO - codeparrot_training - Step 28957: {'lr': 0.00046017582125369505, 'samples': 14826496, 'steps': 28957, 'loss/train': 2.318465232849121} +03/04/2022 23:29:10 - INFO - codeparrot_training - Step 28958: {'lr': 0.00046017294762517127, 'samples': 14827008, 'steps': 28958, 'loss/train': 1.8619863986968994} +03/04/2022 23:29:10 - INFO - codeparrot_training - Skipping example with length 961 (seq_length=1024) +03/04/2022 23:29:15 - INFO - codeparrot_training - Step 28959: {'lr': 0.0004601700739019469, 'samples': 14827520, 'steps': 28959, 'loss/train': 2.1109824180603027} +03/04/2022 23:29:19 - INFO - codeparrot_training - Step 28960: {'lr': 0.000460167200084023, 'samples': 14828032, 'steps': 28960, 'loss/train': 1.3227741718292236} +03/04/2022 23:29:19 - INFO - codeparrot_training - Skipping example with length 344 (seq_length=1024) +03/04/2022 23:29:24 - INFO - codeparrot_training - Step 28961: {'lr': 0.00046016432617140113, 'samples': 14828544, 'steps': 28961, 'loss/train': 0.16505742073059082} +03/04/2022 23:29:27 - INFO - codeparrot_training - Skipping example with length 676 (seq_length=1024) +03/04/2022 23:29:29 - INFO - codeparrot_training - Step 28962: {'lr': 0.0004601614521640824, 'samples': 14829056, 'steps': 28962, 'loss/train': 1.5241373777389526} +03/04/2022 23:29:32 - INFO - codeparrot_training - Step 28963: {'lr': 0.00046015857806206816, 'samples': 14829568, 'steps': 28963, 'loss/train': 1.9669229984283447} +03/04/2022 23:29:35 - INFO - codeparrot_training - Skipping example with length 731 (seq_length=1024) +03/04/2022 23:29:38 - INFO - codeparrot_training - Step 28964: {'lr': 0.0004601557038653597, 'samples': 14830080, 'steps': 28964, 'loss/train': 1.9989007711410522} +03/04/2022 23:29:41 - INFO - codeparrot_training - Step 28965: {'lr': 0.0004601528295739583, 'samples': 14830592, 'steps': 28965, 'loss/train': 2.0891523361206055} +03/04/2022 23:29:44 - INFO - codeparrot_training - Step 28966: {'lr': 0.00046014995518786536, 'samples': 14831104, 'steps': 28966, 'loss/train': 1.5208605527877808} +03/04/2022 23:29:45 - INFO - codeparrot_training - Skipping example with length 474 (seq_length=1024) +03/04/2022 23:29:50 - INFO - codeparrot_training - Step 28967: {'lr': 0.000460147080707082, 'samples': 14831616, 'steps': 28967, 'loss/train': 1.9914668798446655} +03/04/2022 23:29:53 - INFO - codeparrot_training - Step 28968: {'lr': 0.00046014420613160967, 'samples': 14832128, 'steps': 28968, 'loss/train': 1.9894436597824097} +03/04/2022 23:29:54 - INFO - codeparrot_training - Skipping example with length 972 (seq_length=1024) +03/04/2022 23:29:58 - INFO - codeparrot_training - Step 28969: {'lr': 0.00046014133146144966, 'samples': 14832640, 'steps': 28969, 'loss/train': 1.305442452430725} +03/04/2022 23:30:01 - INFO - codeparrot_training - Step 28970: {'lr': 0.0004601384566966031, 'samples': 14833152, 'steps': 28970, 'loss/train': 0.08471759408712387} +03/04/2022 23:30:02 - INFO - codeparrot_training - Skipping example with length 884 (seq_length=1024) +03/04/2022 23:30:06 - INFO - codeparrot_training - Step 28971: {'lr': 0.0004601355818370714, 'samples': 14833664, 'steps': 28971, 'loss/train': 1.3182474374771118} +03/04/2022 23:30:10 - INFO - codeparrot_training - Step 28972: {'lr': 0.0004601327068828559, 'samples': 14834176, 'steps': 28972, 'loss/train': 1.8941353559494019} +03/04/2022 23:30:10 - INFO - codeparrot_training - Skipping example with length 808 (seq_length=1024) +03/04/2022 23:30:15 - INFO - codeparrot_training - Step 28973: {'lr': 0.0004601298318339578, 'samples': 14834688, 'steps': 28973, 'loss/train': 0.8339391350746155} +03/04/2022 23:30:18 - INFO - codeparrot_training - Step 28974: {'lr': 0.0004601269566903785, 'samples': 14835200, 'steps': 28974, 'loss/train': 1.6451183557510376} +03/04/2022 23:30:19 - INFO - codeparrot_training - Skipping example with length 861 (seq_length=1024) +03/04/2022 23:30:23 - INFO - codeparrot_training - Step 28975: {'lr': 0.0004601240814521192, 'samples': 14835712, 'steps': 28975, 'loss/train': 6.576481342315674} +03/04/2022 23:30:27 - INFO - codeparrot_training - Step 28976: {'lr': 0.00046012120611918126, 'samples': 14836224, 'steps': 28976, 'loss/train': 0.4567897915840149} +03/04/2022 23:30:28 - INFO - codeparrot_training - Skipping example with length 196 (seq_length=1024) +03/04/2022 23:30:32 - INFO - codeparrot_training - Step 28977: {'lr': 0.0004601183306915659, 'samples': 14836736, 'steps': 28977, 'loss/train': 1.4486446380615234} +03/04/2022 23:30:35 - INFO - codeparrot_training - Step 28978: {'lr': 0.0004601154551692745, 'samples': 14837248, 'steps': 28978, 'loss/train': 2.562648296356201} +03/04/2022 23:30:36 - INFO - codeparrot_training - Skipping example with length 101 (seq_length=1024) +03/04/2022 23:30:40 - INFO - codeparrot_training - Step 28979: {'lr': 0.00046011257955230826, 'samples': 14837760, 'steps': 28979, 'loss/train': 1.7655466794967651} +03/04/2022 23:30:43 - INFO - codeparrot_training - Step 28980: {'lr': 0.00046010970384066863, 'samples': 14838272, 'steps': 28980, 'loss/train': 2.069800615310669} +03/04/2022 23:30:44 - INFO - codeparrot_training - Skipping example with length 874 (seq_length=1024) +03/04/2022 23:30:49 - INFO - codeparrot_training - Step 28981: {'lr': 0.00046010682803435674, 'samples': 14838784, 'steps': 28981, 'loss/train': 2.7180607318878174} +03/04/2022 23:30:52 - INFO - codeparrot_training - Step 28982: {'lr': 0.000460103952133374, 'samples': 14839296, 'steps': 28982, 'loss/train': 1.030515432357788} +03/04/2022 23:30:53 - INFO - codeparrot_training - Skipping example with length 26 (seq_length=1024) +03/04/2022 23:30:57 - INFO - codeparrot_training - Step 28983: {'lr': 0.00046010107613772154, 'samples': 14839808, 'steps': 28983, 'loss/train': 2.0299386978149414} +03/04/2022 23:31:00 - INFO - codeparrot_training - Step 28984: {'lr': 0.0004600982000474009, 'samples': 14840320, 'steps': 28984, 'loss/train': 0.9816235303878784} +03/04/2022 23:31:01 - INFO - codeparrot_training - Skipping example with length 363 (seq_length=1024) +03/04/2022 23:31:06 - INFO - codeparrot_training - Step 28985: {'lr': 0.0004600953238624133, 'samples': 14840832, 'steps': 28985, 'loss/train': 1.726502537727356} +03/04/2022 23:31:09 - INFO - codeparrot_training - Step 28986: {'lr': 0.00046009244758275986, 'samples': 14841344, 'steps': 28986, 'loss/train': 1.5209944248199463} +03/04/2022 23:31:10 - INFO - codeparrot_training - Skipping example with length 461 (seq_length=1024) +03/04/2022 23:31:14 - INFO - codeparrot_training - Step 28987: {'lr': 0.0004600895712084421, 'samples': 14841856, 'steps': 28987, 'loss/train': 1.5486699342727661} +03/04/2022 23:31:17 - INFO - codeparrot_training - Step 28988: {'lr': 0.0004600866947394611, 'samples': 14842368, 'steps': 28988, 'loss/train': 1.5373567342758179} +03/04/2022 23:31:18 - INFO - codeparrot_training - Skipping example with length 343 (seq_length=1024) +03/04/2022 23:31:23 - INFO - codeparrot_training - Step 28989: {'lr': 0.0004600838181758184, 'samples': 14842880, 'steps': 28989, 'loss/train': 1.8338567018508911} +03/04/2022 23:31:26 - INFO - codeparrot_training - Step 28990: {'lr': 0.00046008094151751513, 'samples': 14843392, 'steps': 28990, 'loss/train': 1.3882304430007935} +03/04/2022 23:31:26 - INFO - codeparrot_training - Skipping example with length 494 (seq_length=1024) +03/04/2022 23:31:31 - INFO - codeparrot_training - Step 28991: {'lr': 0.0004600780647645526, 'samples': 14843904, 'steps': 28991, 'loss/train': 2.097057342529297} +03/04/2022 23:31:34 - INFO - codeparrot_training - Step 28992: {'lr': 0.0004600751879169321, 'samples': 14844416, 'steps': 28992, 'loss/train': 1.3460090160369873} +03/04/2022 23:31:34 - INFO - codeparrot_training - Skipping example with length 744 (seq_length=1024) +03/04/2022 23:31:39 - INFO - codeparrot_training - Step 28993: {'lr': 0.00046007231097465505, 'samples': 14844928, 'steps': 28993, 'loss/train': 1.6487737894058228} +03/04/2022 23:31:42 - INFO - codeparrot_training - Step 28994: {'lr': 0.00046006943393772274, 'samples': 14845440, 'steps': 28994, 'loss/train': 1.7244102954864502} +03/04/2022 23:31:42 - INFO - codeparrot_training - Skipping example with length 970 (seq_length=1024) +03/04/2022 23:31:48 - INFO - codeparrot_training - Step 28995: {'lr': 0.00046006655680613616, 'samples': 14845952, 'steps': 28995, 'loss/train': 1.002383828163147} +03/04/2022 23:31:51 - INFO - codeparrot_training - Step 28996: {'lr': 0.00046006367957989705, 'samples': 14846464, 'steps': 28996, 'loss/train': 1.5702049732208252} +03/04/2022 23:31:51 - INFO - codeparrot_training - Skipping example with length 943 (seq_length=1024) +03/04/2022 23:31:56 - INFO - codeparrot_training - Step 28997: {'lr': 0.0004600608022590064, 'samples': 14846976, 'steps': 28997, 'loss/train': 1.5067673921585083} +03/04/2022 23:31:59 - INFO - codeparrot_training - Skipping example with length 381 (seq_length=1024) +03/04/2022 23:32:02 - INFO - codeparrot_training - Step 28998: {'lr': 0.0004600579248434655, 'samples': 14847488, 'steps': 28998, 'loss/train': 1.8597652912139893} +03/04/2022 23:32:05 - INFO - codeparrot_training - Step 28999: {'lr': 0.0004600550473332759, 'samples': 14848000, 'steps': 28999, 'loss/train': 1.0888842344284058} +03/04/2022 23:32:08 - INFO - codeparrot_training - Step 29000: {'lr': 0.0004600521697284386, 'samples': 14848512, 'steps': 29000, 'loss/train': 0.45745763182640076} +03/04/2022 23:32:08 - INFO - codeparrot_training - Skipping example with length 357 (seq_length=1024) +03/04/2022 23:32:13 - INFO - codeparrot_training - Step 29001: {'lr': 0.0004600492920289551, 'samples': 14849024, 'steps': 29001, 'loss/train': 1.805698275566101} +03/04/2022 23:32:16 - INFO - codeparrot_training - Step 29002: {'lr': 0.00046004641423482665, 'samples': 14849536, 'steps': 29002, 'loss/train': 2.338632583618164} +03/04/2022 23:32:17 - INFO - codeparrot_training - Skipping example with length 913 (seq_length=1024) +03/04/2022 23:32:22 - INFO - codeparrot_training - Step 29003: {'lr': 0.00046004353634605447, 'samples': 14850048, 'steps': 29003, 'loss/train': 1.0572316646575928} +03/04/2022 23:32:25 - INFO - codeparrot_training - Step 29004: {'lr': 0.00046004065836263995, 'samples': 14850560, 'steps': 29004, 'loss/train': 1.833466649055481} +03/04/2022 23:32:25 - INFO - codeparrot_training - Skipping example with length 861 (seq_length=1024) +03/04/2022 23:32:30 - INFO - codeparrot_training - Step 29005: {'lr': 0.00046003778028458434, 'samples': 14851072, 'steps': 29005, 'loss/train': 1.740554928779602} +03/04/2022 23:32:33 - INFO - codeparrot_training - Step 29006: {'lr': 0.00046003490211188894, 'samples': 14851584, 'steps': 29006, 'loss/train': 0.7715404033660889} +03/04/2022 23:32:33 - INFO - codeparrot_training - Skipping example with length 305 (seq_length=1024) +03/04/2022 23:32:38 - INFO - codeparrot_training - Step 29007: {'lr': 0.00046003202384455505, 'samples': 14852096, 'steps': 29007, 'loss/train': 1.6751095056533813} +03/04/2022 23:32:42 - INFO - codeparrot_training - Step 29008: {'lr': 0.000460029145482584, 'samples': 14852608, 'steps': 29008, 'loss/train': 2.3788061141967773} +03/04/2022 23:32:42 - INFO - codeparrot_training - Skipping example with length 976 (seq_length=1024) +03/04/2022 23:32:47 - INFO - codeparrot_training - Step 29009: {'lr': 0.00046002626702597706, 'samples': 14853120, 'steps': 29009, 'loss/train': 1.959648847579956} +03/04/2022 23:32:50 - INFO - codeparrot_training - Step 29010: {'lr': 0.00046002338847473545, 'samples': 14853632, 'steps': 29010, 'loss/train': 1.5642284154891968} +03/04/2022 23:32:50 - INFO - codeparrot_training - Skipping example with length 113 (seq_length=1024) +03/04/2022 23:32:55 - INFO - codeparrot_training - Step 29011: {'lr': 0.0004600205098288606, 'samples': 14854144, 'steps': 29011, 'loss/train': 2.116776704788208} +03/04/2022 23:32:58 - INFO - codeparrot_training - Skipping example with length 501 (seq_length=1024) +03/04/2022 23:33:01 - INFO - codeparrot_training - Step 29012: {'lr': 0.00046001763108835384, 'samples': 14854656, 'steps': 29012, 'loss/train': 2.0821869373321533} +03/04/2022 23:33:04 - INFO - codeparrot_training - Step 29013: {'lr': 0.0004600147522532162, 'samples': 14855168, 'steps': 29013, 'loss/train': 1.330973744392395} +03/04/2022 23:33:06 - INFO - codeparrot_training - Skipping example with length 429 (seq_length=1024) +03/04/2022 23:33:09 - INFO - codeparrot_training - Step 29014: {'lr': 0.0004600118733234493, 'samples': 14855680, 'steps': 29014, 'loss/train': 2.320333957672119} +03/04/2022 23:33:12 - INFO - codeparrot_training - Step 29015: {'lr': 0.0004600089942990542, 'samples': 14856192, 'steps': 29015, 'loss/train': 1.6213715076446533} +03/04/2022 23:33:15 - INFO - codeparrot_training - Skipping example with length 376 (seq_length=1024) +03/04/2022 23:33:17 - INFO - codeparrot_training - Step 29016: {'lr': 0.00046000611518003234, 'samples': 14856704, 'steps': 29016, 'loss/train': 2.0605733394622803} +03/04/2022 23:33:20 - INFO - codeparrot_training - Step 29017: {'lr': 0.00046000323596638495, 'samples': 14857216, 'steps': 29017, 'loss/train': 1.866251826286316} +03/04/2022 23:33:23 - INFO - codeparrot_training - Skipping example with length 164 (seq_length=1024) +03/04/2022 23:33:26 - INFO - codeparrot_training - Step 29018: {'lr': 0.0004600003566581133, 'samples': 14857728, 'steps': 29018, 'loss/train': 2.195990562438965} +03/04/2022 23:33:29 - INFO - codeparrot_training - Step 29019: {'lr': 0.00045999747725521876, 'samples': 14858240, 'steps': 29019, 'loss/train': 1.3971036672592163} +03/04/2022 23:33:32 - INFO - codeparrot_training - Skipping example with length 949 (seq_length=1024) +03/04/2022 23:33:34 - INFO - codeparrot_training - Step 29020: {'lr': 0.0004599945977577026, 'samples': 14858752, 'steps': 29020, 'loss/train': 0.46534863114356995} +03/04/2022 23:33:37 - INFO - codeparrot_training - Step 29021: {'lr': 0.0004599917181655661, 'samples': 14859264, 'steps': 29021, 'loss/train': 2.037235975265503} +03/04/2022 23:33:40 - INFO - codeparrot_training - Skipping example with length 856 (seq_length=1024) +03/04/2022 23:33:43 - INFO - codeparrot_training - Step 29022: {'lr': 0.00045998883847881057, 'samples': 14859776, 'steps': 29022, 'loss/train': 0.9228793382644653} +03/04/2022 23:33:46 - INFO - codeparrot_training - Step 29023: {'lr': 0.00045998595869743735, 'samples': 14860288, 'steps': 29023, 'loss/train': 2.0169968605041504} +03/04/2022 23:33:48 - INFO - codeparrot_training - Skipping example with length 567 (seq_length=1024) +03/04/2022 23:33:51 - INFO - codeparrot_training - Step 29024: {'lr': 0.0004599830788214477, 'samples': 14860800, 'steps': 29024, 'loss/train': 1.5238827466964722} +03/04/2022 23:33:54 - INFO - codeparrot_training - Step 29025: {'lr': 0.0004599801988508429, 'samples': 14861312, 'steps': 29025, 'loss/train': 1.931234359741211} +03/04/2022 23:33:57 - INFO - codeparrot_training - Skipping example with length 526 (seq_length=1024) +03/04/2022 23:33:59 - INFO - codeparrot_training - Step 29026: {'lr': 0.00045997731878562423, 'samples': 14861824, 'steps': 29026, 'loss/train': 1.754596471786499} +03/04/2022 23:34:03 - INFO - codeparrot_training - Step 29027: {'lr': 0.000459974438625793, 'samples': 14862336, 'steps': 29027, 'loss/train': 1.6463311910629272} +03/04/2022 23:34:05 - INFO - codeparrot_training - Skipping example with length 987 (seq_length=1024) +03/04/2022 23:34:08 - INFO - codeparrot_training - Step 29028: {'lr': 0.0004599715583713506, 'samples': 14862848, 'steps': 29028, 'loss/train': 2.0498368740081787} +03/04/2022 23:34:11 - INFO - codeparrot_training - Step 29029: {'lr': 0.00045996867802229824, 'samples': 14863360, 'steps': 29029, 'loss/train': 1.9638956785202026} +03/04/2022 23:34:14 - INFO - codeparrot_training - Skipping example with length 493 (seq_length=1024) +03/04/2022 23:34:17 - INFO - codeparrot_training - Step 29030: {'lr': 0.0004599657975786372, 'samples': 14863872, 'steps': 29030, 'loss/train': 1.0386602878570557} +03/04/2022 23:34:20 - INFO - codeparrot_training - Step 29031: {'lr': 0.00045996291704036884, 'samples': 14864384, 'steps': 29031, 'loss/train': 1.8849382400512695} +03/04/2022 23:34:23 - INFO - codeparrot_training - Step 29032: {'lr': 0.00045996003640749446, 'samples': 14864896, 'steps': 29032, 'loss/train': 2.1900837421417236} +03/04/2022 23:34:23 - INFO - codeparrot_training - Skipping example with length 99 (seq_length=1024) +03/04/2022 23:34:28 - INFO - codeparrot_training - Step 29033: {'lr': 0.0004599571556800153, 'samples': 14865408, 'steps': 29033, 'loss/train': 1.5591461658477783} +03/04/2022 23:34:31 - INFO - codeparrot_training - Step 29034: {'lr': 0.00045995427485793263, 'samples': 14865920, 'steps': 29034, 'loss/train': 1.9675546884536743} +03/04/2022 23:34:32 - INFO - codeparrot_training - Skipping example with length 610 (seq_length=1024) +03/04/2022 23:34:37 - INFO - codeparrot_training - Step 29035: {'lr': 0.00045995139394124784, 'samples': 14866432, 'steps': 29035, 'loss/train': 0.7928702235221863} +03/04/2022 23:34:40 - INFO - codeparrot_training - Step 29036: {'lr': 0.0004599485129299622, 'samples': 14866944, 'steps': 29036, 'loss/train': 1.9369280338287354} +03/04/2022 23:34:40 - INFO - codeparrot_training - Skipping example with length 228 (seq_length=1024) +03/04/2022 23:34:45 - INFO - codeparrot_training - Step 29037: {'lr': 0.000459945631824077, 'samples': 14867456, 'steps': 29037, 'loss/train': 2.1295254230499268} +03/04/2022 23:34:48 - INFO - codeparrot_training - Step 29038: {'lr': 0.0004599427506235936, 'samples': 14867968, 'steps': 29038, 'loss/train': 1.5909072160720825} +03/04/2022 23:34:48 - INFO - codeparrot_training - Skipping example with length 621 (seq_length=1024) +03/04/2022 23:34:54 - INFO - codeparrot_training - Step 29039: {'lr': 0.0004599398693285132, 'samples': 14868480, 'steps': 29039, 'loss/train': 1.8151884078979492} +03/04/2022 23:34:57 - INFO - codeparrot_training - Step 29040: {'lr': 0.0004599369879388371, 'samples': 14868992, 'steps': 29040, 'loss/train': 1.6150797605514526} +03/04/2022 23:34:57 - INFO - codeparrot_training - Skipping example with length 168 (seq_length=1024) +03/04/2022 23:35:02 - INFO - codeparrot_training - Step 29041: {'lr': 0.0004599341064545666, 'samples': 14869504, 'steps': 29041, 'loss/train': 1.6656608581542969} +03/04/2022 23:35:05 - INFO - codeparrot_training - Skipping example with length 406 (seq_length=1024) +03/04/2022 23:35:07 - INFO - codeparrot_training - Step 29042: {'lr': 0.00045993122487570303, 'samples': 14870016, 'steps': 29042, 'loss/train': 1.389997959136963} +03/04/2022 23:35:10 - INFO - codeparrot_training - Step 29043: {'lr': 0.00045992834320224773, 'samples': 14870528, 'steps': 29043, 'loss/train': 2.376666307449341} +03/04/2022 23:35:13 - INFO - codeparrot_training - Skipping example with length 931 (seq_length=1024) +03/04/2022 23:35:16 - INFO - codeparrot_training - Step 29044: {'lr': 0.000459925461434202, 'samples': 14871040, 'steps': 29044, 'loss/train': 2.1094560623168945} +03/04/2022 23:35:19 - INFO - codeparrot_training - Step 29045: {'lr': 0.00045992257957156704, 'samples': 14871552, 'steps': 29045, 'loss/train': 1.4378043413162231} +03/04/2022 23:35:22 - INFO - codeparrot_training - Step 29046: {'lr': 0.00045991969761434426, 'samples': 14872064, 'steps': 29046, 'loss/train': 1.391461730003357} +03/04/2022 23:35:22 - INFO - codeparrot_training - Skipping example with length 950 (seq_length=1024) +03/04/2022 23:35:28 - INFO - codeparrot_training - Step 29047: {'lr': 0.0004599168155625348, 'samples': 14872576, 'steps': 29047, 'loss/train': 1.4817235469818115} +03/04/2022 23:35:31 - INFO - codeparrot_training - Step 29048: {'lr': 0.00045991393341614017, 'samples': 14873088, 'steps': 29048, 'loss/train': 1.5636444091796875} +03/04/2022 23:35:31 - INFO - codeparrot_training - Skipping example with length 779 (seq_length=1024) +03/04/2022 23:35:36 - INFO - codeparrot_training - Step 29049: {'lr': 0.0004599110511751615, 'samples': 14873600, 'steps': 29049, 'loss/train': 1.960412859916687} +03/04/2022 23:35:39 - INFO - codeparrot_training - Skipping example with length 142 (seq_length=1024) +03/04/2022 23:35:41 - INFO - codeparrot_training - Step 29050: {'lr': 0.0004599081688396002, 'samples': 14874112, 'steps': 29050, 'loss/train': 0.9628152847290039} +03/04/2022 23:35:44 - INFO - codeparrot_training - Step 29051: {'lr': 0.0004599052864094575, 'samples': 14874624, 'steps': 29051, 'loss/train': 1.7646903991699219} +03/04/2022 23:35:47 - INFO - codeparrot_training - Skipping example with length 556 (seq_length=1024) +03/04/2022 23:35:50 - INFO - codeparrot_training - Step 29052: {'lr': 0.0004599024038847347, 'samples': 14875136, 'steps': 29052, 'loss/train': 0.6668747067451477} +03/04/2022 23:35:53 - INFO - codeparrot_training - Step 29053: {'lr': 0.0004598995212654331, 'samples': 14875648, 'steps': 29053, 'loss/train': 0.4553457498550415} +03/04/2022 23:35:55 - INFO - codeparrot_training - Skipping example with length 640 (seq_length=1024) +03/04/2022 23:35:58 - INFO - codeparrot_training - Step 29054: {'lr': 0.0004598966385515541, 'samples': 14876160, 'steps': 29054, 'loss/train': 2.3059468269348145} +03/04/2022 23:36:01 - INFO - codeparrot_training - Step 29055: {'lr': 0.00045989375574309875, 'samples': 14876672, 'steps': 29055, 'loss/train': 1.6683145761489868} +03/04/2022 23:36:04 - INFO - codeparrot_training - Skipping example with length 701 (seq_length=1024) +03/04/2022 23:36:07 - INFO - codeparrot_training - Step 29056: {'lr': 0.00045989087284006863, 'samples': 14877184, 'steps': 29056, 'loss/train': 1.4355348348617554} +03/04/2022 23:36:10 - INFO - codeparrot_training - Step 29057: {'lr': 0.00045988798984246496, 'samples': 14877696, 'steps': 29057, 'loss/train': 1.6905529499053955} +03/04/2022 23:36:12 - INFO - codeparrot_training - Skipping example with length 729 (seq_length=1024) +03/04/2022 23:36:15 - INFO - codeparrot_training - Step 29058: {'lr': 0.0004598851067502889, 'samples': 14878208, 'steps': 29058, 'loss/train': 1.843261480331421} +03/04/2022 23:36:18 - INFO - codeparrot_training - Step 29059: {'lr': 0.00045988222356354186, 'samples': 14878720, 'steps': 29059, 'loss/train': 1.6904810667037964} +03/04/2022 23:36:21 - INFO - codeparrot_training - Skipping example with length 185 (seq_length=1024) +03/04/2022 23:36:24 - INFO - codeparrot_training - Step 29060: {'lr': 0.00045987934028222515, 'samples': 14879232, 'steps': 29060, 'loss/train': 2.957566976547241} +03/04/2022 23:36:27 - INFO - codeparrot_training - Step 29061: {'lr': 0.00045987645690634003, 'samples': 14879744, 'steps': 29061, 'loss/train': 0.5805132389068604} +03/04/2022 23:36:29 - INFO - codeparrot_training - Skipping example with length 686 (seq_length=1024) +03/04/2022 23:36:32 - INFO - codeparrot_training - Step 29062: {'lr': 0.0004598735734358879, 'samples': 14880256, 'steps': 29062, 'loss/train': 1.645241141319275} +03/04/2022 23:36:35 - INFO - codeparrot_training - Step 29063: {'lr': 0.0004598706898708699, 'samples': 14880768, 'steps': 29063, 'loss/train': 1.8921828269958496} +03/04/2022 23:36:38 - INFO - codeparrot_training - Skipping example with length 58 (seq_length=1024) +03/04/2022 23:36:40 - INFO - codeparrot_training - Step 29064: {'lr': 0.00045986780621128743, 'samples': 14881280, 'steps': 29064, 'loss/train': 2.7746834754943848} +03/04/2022 23:36:44 - INFO - codeparrot_training - Step 29065: {'lr': 0.00045986492245714175, 'samples': 14881792, 'steps': 29065, 'loss/train': 1.7760132551193237} +03/04/2022 23:36:46 - INFO - codeparrot_training - Skipping example with length 396 (seq_length=1024) +03/04/2022 23:36:49 - INFO - codeparrot_training - Step 29066: {'lr': 0.0004598620386084342, 'samples': 14882304, 'steps': 29066, 'loss/train': 1.8235278129577637} +03/04/2022 23:36:52 - INFO - codeparrot_training - Step 29067: {'lr': 0.00045985915466516605, 'samples': 14882816, 'steps': 29067, 'loss/train': 2.082853078842163} +03/04/2022 23:36:54 - INFO - codeparrot_training - Skipping example with length 695 (seq_length=1024) +03/04/2022 23:36:57 - INFO - codeparrot_training - Step 29068: {'lr': 0.0004598562706273386, 'samples': 14883328, 'steps': 29068, 'loss/train': 1.6717555522918701} +03/04/2022 23:37:00 - INFO - codeparrot_training - Step 29069: {'lr': 0.0004598533864949531, 'samples': 14883840, 'steps': 29069, 'loss/train': 1.9257400035858154} +03/04/2022 23:37:03 - INFO - codeparrot_training - Skipping example with length 508 (seq_length=1024) +03/04/2022 23:37:06 - INFO - codeparrot_training - Step 29070: {'lr': 0.00045985050226801097, 'samples': 14884352, 'steps': 29070, 'loss/train': 1.6174262762069702} +03/04/2022 23:37:09 - INFO - codeparrot_training - Step 29071: {'lr': 0.0004598476179465134, 'samples': 14884864, 'steps': 29071, 'loss/train': 2.1745996475219727} +03/04/2022 23:37:11 - INFO - codeparrot_training - Skipping example with length 382 (seq_length=1024) +03/04/2022 23:37:14 - INFO - codeparrot_training - Step 29072: {'lr': 0.00045984473353046174, 'samples': 14885376, 'steps': 29072, 'loss/train': 1.4382448196411133} +03/04/2022 23:37:17 - INFO - codeparrot_training - Step 29073: {'lr': 0.00045984184901985735, 'samples': 14885888, 'steps': 29073, 'loss/train': 2.2041072845458984} +03/04/2022 23:37:20 - INFO - codeparrot_training - Skipping example with length 444 (seq_length=1024) +03/04/2022 23:37:23 - INFO - codeparrot_training - Step 29074: {'lr': 0.00045983896441470143, 'samples': 14886400, 'steps': 29074, 'loss/train': 1.5206266641616821} +03/04/2022 23:37:26 - INFO - codeparrot_training - Step 29075: {'lr': 0.00045983607971499527, 'samples': 14886912, 'steps': 29075, 'loss/train': 2.209489583969116} +03/04/2022 23:37:28 - INFO - codeparrot_training - Skipping example with length 533 (seq_length=1024) +03/04/2022 23:37:31 - INFO - codeparrot_training - Step 29076: {'lr': 0.0004598331949207402, 'samples': 14887424, 'steps': 29076, 'loss/train': 2.0450599193573} +03/04/2022 23:37:34 - INFO - codeparrot_training - Step 29077: {'lr': 0.00045983031003193756, 'samples': 14887936, 'steps': 29077, 'loss/train': 1.074481725692749} +03/04/2022 23:37:37 - INFO - codeparrot_training - Skipping example with length 145 (seq_length=1024) +03/04/2022 23:37:40 - INFO - codeparrot_training - Step 29078: {'lr': 0.0004598274250485886, 'samples': 14888448, 'steps': 29078, 'loss/train': 1.2844581604003906} +03/04/2022 23:37:43 - INFO - codeparrot_training - Step 29079: {'lr': 0.00045982453997069463, 'samples': 14888960, 'steps': 29079, 'loss/train': 2.089682102203369} +03/04/2022 23:37:45 - INFO - codeparrot_training - Skipping example with length 1008 (seq_length=1024) +03/04/2022 23:37:48 - INFO - codeparrot_training - Step 29080: {'lr': 0.00045982165479825697, 'samples': 14889472, 'steps': 29080, 'loss/train': 1.7453691959381104} +03/04/2022 23:37:51 - INFO - codeparrot_training - Step 29081: {'lr': 0.000459818769531277, 'samples': 14889984, 'steps': 29081, 'loss/train': 1.4937694072723389} +03/04/2022 23:37:53 - INFO - codeparrot_training - Skipping example with length 571 (seq_length=1024) +03/04/2022 23:37:57 - INFO - codeparrot_training - Step 29082: {'lr': 0.00045981588416975583, 'samples': 14890496, 'steps': 29082, 'loss/train': 0.6603583097457886} +03/04/2022 23:38:00 - INFO - codeparrot_training - Step 29083: {'lr': 0.00045981299871369484, 'samples': 14891008, 'steps': 29083, 'loss/train': 1.9662913084030151} +03/04/2022 23:38:02 - INFO - codeparrot_training - Skipping example with length 602 (seq_length=1024) +03/04/2022 23:38:05 - INFO - codeparrot_training - Step 29084: {'lr': 0.0004598101131630954, 'samples': 14891520, 'steps': 29084, 'loss/train': 2.0128085613250732} +03/04/2022 23:38:08 - INFO - codeparrot_training - Step 29085: {'lr': 0.0004598072275179588, 'samples': 14892032, 'steps': 29085, 'loss/train': 2.38871169090271} +03/04/2022 23:38:10 - INFO - codeparrot_training - Skipping example with length 957 (seq_length=1024) +03/04/2022 23:38:14 - INFO - codeparrot_training - Step 29086: {'lr': 0.00045980434177828625, 'samples': 14892544, 'steps': 29086, 'loss/train': 1.7146490812301636} +03/04/2022 23:38:17 - INFO - codeparrot_training - Step 29087: {'lr': 0.00045980145594407907, 'samples': 14893056, 'steps': 29087, 'loss/train': 1.5957999229431152} +03/04/2022 23:38:18 - INFO - codeparrot_training - Skipping example with length 808 (seq_length=1024) +03/04/2022 23:38:22 - INFO - codeparrot_training - Step 29088: {'lr': 0.00045979857001533867, 'samples': 14893568, 'steps': 29088, 'loss/train': 2.280099630355835} +03/04/2022 23:38:25 - INFO - codeparrot_training - Step 29089: {'lr': 0.0004597956839920662, 'samples': 14894080, 'steps': 29089, 'loss/train': 2.733269453048706} +03/04/2022 23:38:27 - INFO - codeparrot_training - Skipping example with length 780 (seq_length=1024) +03/04/2022 23:38:30 - INFO - codeparrot_training - Step 29090: {'lr': 0.00045979279787426307, 'samples': 14894592, 'steps': 29090, 'loss/train': 1.5046567916870117} +03/04/2022 23:38:34 - INFO - codeparrot_training - Step 29091: {'lr': 0.00045978991166193057, 'samples': 14895104, 'steps': 29091, 'loss/train': 1.3361395597457886} +03/04/2022 23:38:35 - INFO - codeparrot_training - Skipping example with length 378 (seq_length=1024) +03/04/2022 23:38:39 - INFO - codeparrot_training - Step 29092: {'lr': 0.0004597870253550699, 'samples': 14895616, 'steps': 29092, 'loss/train': 2.0014395713806152} +03/04/2022 23:38:42 - INFO - codeparrot_training - Step 29093: {'lr': 0.0004597841389536825, 'samples': 14896128, 'steps': 29093, 'loss/train': 1.3102402687072754} +03/04/2022 23:38:44 - INFO - codeparrot_training - Skipping example with length 812 (seq_length=1024) +03/04/2022 23:38:47 - INFO - codeparrot_training - Step 29094: {'lr': 0.00045978125245776957, 'samples': 14896640, 'steps': 29094, 'loss/train': 1.355667233467102} +03/04/2022 23:38:51 - INFO - codeparrot_training - Step 29095: {'lr': 0.00045977836586733246, 'samples': 14897152, 'steps': 29095, 'loss/train': 1.6419482231140137} +03/04/2022 23:38:53 - INFO - codeparrot_training - Skipping example with length 846 (seq_length=1024) +03/04/2022 23:38:56 - INFO - codeparrot_training - Step 29096: {'lr': 0.00045977547918237243, 'samples': 14897664, 'steps': 29096, 'loss/train': 2.0282320976257324} +03/04/2022 23:38:59 - INFO - codeparrot_training - Step 29097: {'lr': 0.0004597725924028908, 'samples': 14898176, 'steps': 29097, 'loss/train': 0.617231547832489} +03/04/2022 23:39:01 - INFO - codeparrot_training - Skipping example with length 375 (seq_length=1024) +03/04/2022 23:39:04 - INFO - codeparrot_training - Step 29098: {'lr': 0.00045976970552888896, 'samples': 14898688, 'steps': 29098, 'loss/train': 1.2387304306030273} +03/04/2022 23:39:07 - INFO - codeparrot_training - Step 29099: {'lr': 0.00045976681856036805, 'samples': 14899200, 'steps': 29099, 'loss/train': 2.3988895416259766} +03/04/2022 23:39:09 - INFO - codeparrot_training - Skipping example with length 57 (seq_length=1024) +03/04/2022 23:39:13 - INFO - codeparrot_training - Step 29100: {'lr': 0.00045976393149732943, 'samples': 14899712, 'steps': 29100, 'loss/train': 1.775471806526184} +03/04/2022 23:39:16 - INFO - codeparrot_training - Step 29101: {'lr': 0.0004597610443397745, 'samples': 14900224, 'steps': 29101, 'loss/train': 2.2531089782714844} +03/04/2022 23:39:17 - INFO - codeparrot_training - Skipping example with length 883 (seq_length=1024) +03/04/2022 23:39:21 - INFO - codeparrot_training - Step 29102: {'lr': 0.0004597581570877044, 'samples': 14900736, 'steps': 29102, 'loss/train': 1.7624447345733643} +03/04/2022 23:39:24 - INFO - codeparrot_training - Step 29103: {'lr': 0.00045975526974112056, 'samples': 14901248, 'steps': 29103, 'loss/train': 2.484616279602051} +03/04/2022 23:39:26 - INFO - codeparrot_training - Skipping example with length 969 (seq_length=1024) +03/04/2022 23:39:29 - INFO - codeparrot_training - Step 29104: {'lr': 0.0004597523823000243, 'samples': 14901760, 'steps': 29104, 'loss/train': 2.11627459526062} +03/04/2022 23:39:33 - INFO - codeparrot_training - Step 29105: {'lr': 0.0004597494947644167, 'samples': 14902272, 'steps': 29105, 'loss/train': 1.7344225645065308} +03/04/2022 23:39:34 - INFO - codeparrot_training - Skipping example with length 804 (seq_length=1024) +03/04/2022 23:39:38 - INFO - codeparrot_training - Step 29106: {'lr': 0.0004597466071342993, 'samples': 14902784, 'steps': 29106, 'loss/train': 1.3737783432006836} +03/04/2022 23:39:41 - INFO - codeparrot_training - Step 29107: {'lr': 0.0004597437194096733, 'samples': 14903296, 'steps': 29107, 'loss/train': 0.9118130207061768} +03/04/2022 23:39:42 - INFO - codeparrot_training - Skipping example with length 976 (seq_length=1024) +03/04/2022 23:39:46 - INFO - codeparrot_training - Step 29108: {'lr': 0.00045974083159054, 'samples': 14903808, 'steps': 29108, 'loss/train': 0.8802109956741333} +03/04/2022 23:39:50 - INFO - codeparrot_training - Step 29109: {'lr': 0.0004597379436769008, 'samples': 14904320, 'steps': 29109, 'loss/train': 1.9168369770050049} +03/04/2022 23:39:50 - INFO - codeparrot_training - Skipping example with length 371 (seq_length=1024) +03/04/2022 23:39:55 - INFO - codeparrot_training - Step 29110: {'lr': 0.00045973505566875684, 'samples': 14904832, 'steps': 29110, 'loss/train': 1.999769926071167} +03/04/2022 23:39:58 - INFO - codeparrot_training - Step 29111: {'lr': 0.00045973216756610945, 'samples': 14905344, 'steps': 29111, 'loss/train': 1.6698222160339355} +03/04/2022 23:39:59 - INFO - codeparrot_training - Skipping example with length 468 (seq_length=1024) +03/04/2022 23:40:03 - INFO - codeparrot_training - Step 29112: {'lr': 0.00045972927936896007, 'samples': 14905856, 'steps': 29112, 'loss/train': 1.5797452926635742} +03/04/2022 23:40:06 - INFO - codeparrot_training - Step 29113: {'lr': 0.0004597263910773099, 'samples': 14906368, 'steps': 29113, 'loss/train': 1.504439115524292} +03/04/2022 23:40:07 - INFO - codeparrot_training - Skipping example with length 1006 (seq_length=1024) +03/04/2022 23:40:12 - INFO - codeparrot_training - Step 29114: {'lr': 0.0004597235026911603, 'samples': 14906880, 'steps': 29114, 'loss/train': 1.7676739692687988} +03/04/2022 23:40:15 - INFO - codeparrot_training - Step 29115: {'lr': 0.0004597206142105124, 'samples': 14907392, 'steps': 29115, 'loss/train': 0.9439258575439453} +03/04/2022 23:40:15 - INFO - codeparrot_training - Skipping example with length 147 (seq_length=1024) +03/04/2022 23:40:20 - INFO - codeparrot_training - Step 29116: {'lr': 0.0004597177256353677, 'samples': 14907904, 'steps': 29116, 'loss/train': 1.6854532957077026} +03/04/2022 23:40:23 - INFO - codeparrot_training - Step 29117: {'lr': 0.0004597148369657275, 'samples': 14908416, 'steps': 29117, 'loss/train': 2.058504581451416} +03/04/2022 23:40:23 - INFO - codeparrot_training - Skipping example with length 16 (seq_length=1024) +03/04/2022 23:40:29 - INFO - codeparrot_training - Step 29118: {'lr': 0.0004597119482015929, 'samples': 14908928, 'steps': 29118, 'loss/train': 1.4069952964782715} +03/04/2022 23:40:32 - INFO - codeparrot_training - Step 29119: {'lr': 0.00045970905934296537, 'samples': 14909440, 'steps': 29119, 'loss/train': 2.0133395195007324} +03/04/2022 23:40:32 - INFO - codeparrot_training - Skipping example with length 1004 (seq_length=1024) +03/04/2022 23:40:37 - INFO - codeparrot_training - Step 29120: {'lr': 0.0004597061703898462, 'samples': 14909952, 'steps': 29120, 'loss/train': 3.197479009628296} +03/04/2022 23:40:40 - INFO - codeparrot_training - Skipping example with length 458 (seq_length=1024) +03/04/2022 23:40:43 - INFO - codeparrot_training - Step 29121: {'lr': 0.0004597032813422367, 'samples': 14910464, 'steps': 29121, 'loss/train': 1.5683201551437378} +03/04/2022 23:40:46 - INFO - codeparrot_training - Step 29122: {'lr': 0.00045970039220013804, 'samples': 14910976, 'steps': 29122, 'loss/train': 0.2565067410469055} +03/04/2022 23:40:48 - INFO - codeparrot_training - Skipping example with length 63 (seq_length=1024) +03/04/2022 23:40:51 - INFO - codeparrot_training - Step 29123: {'lr': 0.00045969750296355173, 'samples': 14911488, 'steps': 29123, 'loss/train': 2.474449634552002} +03/04/2022 23:40:54 - INFO - codeparrot_training - Step 29124: {'lr': 0.0004596946136324789, 'samples': 14912000, 'steps': 29124, 'loss/train': 2.0101943016052246} +03/04/2022 23:40:57 - INFO - codeparrot_training - Skipping example with length 623 (seq_length=1024) +03/04/2022 23:41:00 - INFO - codeparrot_training - Step 29125: {'lr': 0.0004596917242069209, 'samples': 14912512, 'steps': 29125, 'loss/train': 1.7664581537246704} +03/04/2022 23:41:03 - INFO - codeparrot_training - Step 29126: {'lr': 0.00045968883468687906, 'samples': 14913024, 'steps': 29126, 'loss/train': 1.4397746324539185} +03/04/2022 23:41:05 - INFO - codeparrot_training - Skipping example with length 956 (seq_length=1024) +03/04/2022 23:41:08 - INFO - codeparrot_training - Step 29127: {'lr': 0.00045968594507235467, 'samples': 14913536, 'steps': 29127, 'loss/train': 0.9504812359809875} +03/04/2022 23:41:11 - INFO - codeparrot_training - Step 29128: {'lr': 0.00045968305536334906, 'samples': 14914048, 'steps': 29128, 'loss/train': 2.012752056121826} +03/04/2022 23:41:14 - INFO - codeparrot_training - Step 29129: {'lr': 0.00045968016555986347, 'samples': 14914560, 'steps': 29129, 'loss/train': 1.8824918270111084} +03/04/2022 23:41:15 - INFO - codeparrot_training - Skipping example with length 1000 (seq_length=1024) +03/04/2022 23:41:20 - INFO - codeparrot_training - Step 29130: {'lr': 0.0004596772756618992, 'samples': 14915072, 'steps': 29130, 'loss/train': 1.524677038192749} +03/04/2022 23:41:23 - INFO - codeparrot_training - Skipping example with length 807 (seq_length=1024) +03/04/2022 23:41:25 - INFO - codeparrot_training - Step 29131: {'lr': 0.0004596743856694576, 'samples': 14915584, 'steps': 29131, 'loss/train': 1.5124056339263916} +03/04/2022 23:41:28 - INFO - codeparrot_training - Step 29132: {'lr': 0.00045967149558254, 'samples': 14916096, 'steps': 29132, 'loss/train': 1.5541143417358398} +03/04/2022 23:41:31 - INFO - codeparrot_training - Skipping example with length 644 (seq_length=1024) +03/04/2022 23:41:34 - INFO - codeparrot_training - Step 29133: {'lr': 0.0004596686054011476, 'samples': 14916608, 'steps': 29133, 'loss/train': 2.2409560680389404} +03/04/2022 23:41:37 - INFO - codeparrot_training - Step 29134: {'lr': 0.0004596657151252819, 'samples': 14917120, 'steps': 29134, 'loss/train': 2.1312177181243896} +03/04/2022 23:41:39 - INFO - codeparrot_training - Skipping example with length 75 (seq_length=1024) +03/04/2022 23:41:42 - INFO - codeparrot_training - Step 29135: {'lr': 0.0004596628247549439, 'samples': 14917632, 'steps': 29135, 'loss/train': 1.264827847480774} +03/04/2022 23:41:45 - INFO - codeparrot_training - Step 29136: {'lr': 0.00045965993429013507, 'samples': 14918144, 'steps': 29136, 'loss/train': 1.620897889137268} +03/04/2022 23:41:47 - INFO - codeparrot_training - Skipping example with length 921 (seq_length=1024) +03/04/2022 23:41:50 - INFO - codeparrot_training - Step 29137: {'lr': 0.0004596570437308568, 'samples': 14918656, 'steps': 29137, 'loss/train': 1.871425747871399} +03/04/2022 23:41:53 - INFO - codeparrot_training - Step 29138: {'lr': 0.0004596541530771103, 'samples': 14919168, 'steps': 29138, 'loss/train': 1.6030455827713013} +03/04/2022 23:41:56 - INFO - codeparrot_training - Skipping example with length 1022 (seq_length=1024) +03/04/2022 23:41:59 - INFO - codeparrot_training - Step 29139: {'lr': 0.0004596512623288969, 'samples': 14919680, 'steps': 29139, 'loss/train': 1.4921801090240479} +03/04/2022 23:42:02 - INFO - codeparrot_training - Step 29140: {'lr': 0.00045964837148621776, 'samples': 14920192, 'steps': 29140, 'loss/train': 1.1856757402420044} +03/04/2022 23:42:04 - INFO - codeparrot_training - Skipping example with length 886 (seq_length=1024) +03/04/2022 23:42:07 - INFO - codeparrot_training - Step 29141: {'lr': 0.00045964548054907434, 'samples': 14920704, 'steps': 29141, 'loss/train': 2.474169969558716} +03/04/2022 23:42:10 - INFO - codeparrot_training - Step 29142: {'lr': 0.00045964258951746795, 'samples': 14921216, 'steps': 29142, 'loss/train': 1.8467825651168823} +03/04/2022 23:42:12 - INFO - codeparrot_training - Skipping example with length 1014 (seq_length=1024) +03/04/2022 23:42:16 - INFO - codeparrot_training - Step 29143: {'lr': 0.0004596396983913998, 'samples': 14921728, 'steps': 29143, 'loss/train': 0.1105784997344017} +03/04/2022 23:42:19 - INFO - codeparrot_training - Step 29144: {'lr': 0.00045963680717087124, 'samples': 14922240, 'steps': 29144, 'loss/train': 0.7168443202972412} +03/04/2022 23:42:21 - INFO - codeparrot_training - Skipping example with length 604 (seq_length=1024) +03/04/2022 23:42:24 - INFO - codeparrot_training - Step 29145: {'lr': 0.0004596339158558835, 'samples': 14922752, 'steps': 29145, 'loss/train': 0.9533576369285583} +03/04/2022 23:42:27 - INFO - codeparrot_training - Step 29146: {'lr': 0.0004596310244464381, 'samples': 14923264, 'steps': 29146, 'loss/train': 1.4064027070999146} +03/04/2022 23:42:29 - INFO - codeparrot_training - Skipping example with length 99 (seq_length=1024) +03/04/2022 23:42:32 - INFO - codeparrot_training - Step 29147: {'lr': 0.0004596281329425361, 'samples': 14923776, 'steps': 29147, 'loss/train': 3.5055644512176514} +03/04/2022 23:42:35 - INFO - codeparrot_training - Step 29148: {'lr': 0.0004596252413441789, 'samples': 14924288, 'steps': 29148, 'loss/train': 1.0013046264648438} +03/04/2022 23:42:37 - INFO - codeparrot_training - Skipping example with length 556 (seq_length=1024) +03/04/2022 23:42:41 - INFO - codeparrot_training - Step 29149: {'lr': 0.00045962234965136783, 'samples': 14924800, 'steps': 29149, 'loss/train': 2.6260547637939453} +03/04/2022 23:42:44 - INFO - codeparrot_training - Step 29150: {'lr': 0.0004596194578641042, 'samples': 14925312, 'steps': 29150, 'loss/train': 2.0666158199310303} +03/04/2022 23:42:45 - INFO - codeparrot_training - Skipping example with length 801 (seq_length=1024) +03/04/2022 23:42:49 - INFO - codeparrot_training - Step 29151: {'lr': 0.00045961656598238925, 'samples': 14925824, 'steps': 29151, 'loss/train': 0.7794939875602722} +03/04/2022 23:42:52 - INFO - codeparrot_training - Step 29152: {'lr': 0.00045961367400622436, 'samples': 14926336, 'steps': 29152, 'loss/train': 1.543593406677246} +03/04/2022 23:42:54 - INFO - codeparrot_training - Skipping example with length 816 (seq_length=1024) +03/04/2022 23:42:58 - INFO - codeparrot_training - Step 29153: {'lr': 0.00045961078193561066, 'samples': 14926848, 'steps': 29153, 'loss/train': 1.4555134773254395} +03/04/2022 23:43:01 - INFO - codeparrot_training - Step 29154: {'lr': 0.00045960788977054967, 'samples': 14927360, 'steps': 29154, 'loss/train': 2.217865467071533} +03/04/2022 23:43:02 - INFO - codeparrot_training - Skipping example with length 221 (seq_length=1024) +03/04/2022 23:43:06 - INFO - codeparrot_training - Step 29155: {'lr': 0.0004596049975110426, 'samples': 14927872, 'steps': 29155, 'loss/train': 1.438384771347046} +03/04/2022 23:43:09 - INFO - codeparrot_training - Step 29156: {'lr': 0.00045960210515709064, 'samples': 14928384, 'steps': 29156, 'loss/train': 2.174865484237671} +03/04/2022 23:43:12 - INFO - codeparrot_training - Skipping example with length 751 (seq_length=1024) +03/04/2022 23:43:15 - INFO - codeparrot_training - Step 29157: {'lr': 0.0004595992127086953, 'samples': 14928896, 'steps': 29157, 'loss/train': 2.69874906539917} +03/04/2022 23:43:18 - INFO - codeparrot_training - Step 29158: {'lr': 0.00045959632016585774, 'samples': 14929408, 'steps': 29158, 'loss/train': 1.6032224893569946} +03/04/2022 23:43:20 - INFO - codeparrot_training - Skipping example with length 99 (seq_length=1024) +03/04/2022 23:43:23 - INFO - codeparrot_training - Step 29159: {'lr': 0.0004595934275285794, 'samples': 14929920, 'steps': 29159, 'loss/train': 1.3191182613372803} +03/04/2022 23:43:26 - INFO - codeparrot_training - Step 29160: {'lr': 0.00045959053479686143, 'samples': 14930432, 'steps': 29160, 'loss/train': 2.0268399715423584} +03/04/2022 23:43:29 - INFO - codeparrot_training - Skipping example with length 175 (seq_length=1024) +03/04/2022 23:43:32 - INFO - codeparrot_training - Step 29161: {'lr': 0.0004595876419707052, 'samples': 14930944, 'steps': 29161, 'loss/train': 1.4555333852767944} +03/04/2022 23:43:35 - INFO - codeparrot_training - Step 29162: {'lr': 0.00045958474905011205, 'samples': 14931456, 'steps': 29162, 'loss/train': 2.1187734603881836} +03/04/2022 23:43:37 - INFO - codeparrot_training - Skipping example with length 432 (seq_length=1024) +03/04/2022 23:43:40 - INFO - codeparrot_training - Step 29163: {'lr': 0.0004595818560350832, 'samples': 14931968, 'steps': 29163, 'loss/train': 6.549332141876221} +03/04/2022 23:43:43 - INFO - codeparrot_training - Step 29164: {'lr': 0.00045957896292562003, 'samples': 14932480, 'steps': 29164, 'loss/train': 2.3962900638580322} +03/04/2022 23:43:46 - INFO - codeparrot_training - Skipping example with length 144 (seq_length=1024) +03/04/2022 23:43:49 - INFO - codeparrot_training - Step 29165: {'lr': 0.0004595760697217238, 'samples': 14932992, 'steps': 29165, 'loss/train': 1.4033339023590088} +03/04/2022 23:43:52 - INFO - codeparrot_training - Step 29166: {'lr': 0.0004595731764233958, 'samples': 14933504, 'steps': 29166, 'loss/train': 1.8188443183898926} +03/04/2022 23:43:55 - INFO - codeparrot_training - Skipping example with length 210 (seq_length=1024) +03/04/2022 23:43:57 - INFO - codeparrot_training - Step 29167: {'lr': 0.0004595702830306374, 'samples': 14934016, 'steps': 29167, 'loss/train': 2.7042882442474365} +03/04/2022 23:44:00 - INFO - codeparrot_training - Step 29168: {'lr': 0.0004595673895434498, 'samples': 14934528, 'steps': 29168, 'loss/train': 6.605645179748535} +03/04/2022 23:44:04 - INFO - codeparrot_training - Step 29169: {'lr': 0.00045956449596183446, 'samples': 14935040, 'steps': 29169, 'loss/train': 1.5838942527770996} +03/04/2022 23:44:04 - INFO - codeparrot_training - Skipping example with length 10 (seq_length=1024) +03/04/2022 23:44:09 - INFO - codeparrot_training - Step 29170: {'lr': 0.00045956160228579257, 'samples': 14935552, 'steps': 29170, 'loss/train': 2.0275838375091553} +03/04/2022 23:44:12 - INFO - codeparrot_training - Step 29171: {'lr': 0.00045955870851532545, 'samples': 14936064, 'steps': 29171, 'loss/train': 2.3538730144500732} +03/04/2022 23:44:12 - INFO - codeparrot_training - Skipping example with length 995 (seq_length=1024) +03/04/2022 23:44:17 - INFO - codeparrot_training - Step 29172: {'lr': 0.0004595558146504344, 'samples': 14936576, 'steps': 29172, 'loss/train': 1.5186928510665894} +03/04/2022 23:44:20 - INFO - codeparrot_training - Step 29173: {'lr': 0.0004595529206911207, 'samples': 14937088, 'steps': 29173, 'loss/train': 1.537463903427124} +03/04/2022 23:44:21 - INFO - codeparrot_training - Skipping example with length 93 (seq_length=1024) +03/04/2022 23:44:26 - INFO - codeparrot_training - Step 29174: {'lr': 0.00045955002663738574, 'samples': 14937600, 'steps': 29174, 'loss/train': 2.609567642211914} +03/04/2022 23:44:29 - INFO - codeparrot_training - Step 29175: {'lr': 0.0004595471324892307, 'samples': 14938112, 'steps': 29175, 'loss/train': 1.4798550605773926} +03/04/2022 23:44:29 - INFO - codeparrot_training - Skipping example with length 527 (seq_length=1024) +03/04/2022 23:44:34 - INFO - codeparrot_training - Step 29176: {'lr': 0.00045954423824665704, 'samples': 14938624, 'steps': 29176, 'loss/train': 0.16768808662891388} +03/04/2022 23:44:37 - INFO - codeparrot_training - Step 29177: {'lr': 0.00045954134390966593, 'samples': 14939136, 'steps': 29177, 'loss/train': 1.3351296186447144} +03/04/2022 23:44:37 - INFO - codeparrot_training - Skipping example with length 428 (seq_length=1024) +03/04/2022 23:44:42 - INFO - codeparrot_training - Step 29178: {'lr': 0.00045953844947825876, 'samples': 14939648, 'steps': 29178, 'loss/train': 1.8945249319076538} +03/04/2022 23:44:45 - INFO - codeparrot_training - Skipping example with length 887 (seq_length=1024) +03/04/2022 23:44:48 - INFO - codeparrot_training - Step 29179: {'lr': 0.0004595355549524368, 'samples': 14940160, 'steps': 29179, 'loss/train': 2.2088303565979004} +03/04/2022 23:44:51 - INFO - codeparrot_training - Step 29180: {'lr': 0.0004595326603322013, 'samples': 14940672, 'steps': 29180, 'loss/train': 2.2044198513031006} +03/04/2022 23:44:53 - INFO - codeparrot_training - Skipping example with length 749 (seq_length=1024) +03/04/2022 23:44:56 - INFO - codeparrot_training - Step 29181: {'lr': 0.00045952976561755365, 'samples': 14941184, 'steps': 29181, 'loss/train': 2.193561553955078} +03/04/2022 23:44:59 - INFO - codeparrot_training - Step 29182: {'lr': 0.00045952687080849517, 'samples': 14941696, 'steps': 29182, 'loss/train': 1.7539474964141846} +03/04/2022 23:45:02 - INFO - codeparrot_training - Skipping example with length 47 (seq_length=1024) +03/04/2022 23:45:04 - INFO - codeparrot_training - Step 29183: {'lr': 0.000459523975905027, 'samples': 14942208, 'steps': 29183, 'loss/train': 1.1072354316711426} +03/04/2022 23:45:08 - INFO - codeparrot_training - Step 29184: {'lr': 0.0004595210809071506, 'samples': 14942720, 'steps': 29184, 'loss/train': 1.3498034477233887} +03/04/2022 23:45:10 - INFO - codeparrot_training - Skipping example with length 637 (seq_length=1024) +03/04/2022 23:45:13 - INFO - codeparrot_training - Step 29185: {'lr': 0.0004595181858148673, 'samples': 14943232, 'steps': 29185, 'loss/train': 0.6411823034286499} +03/04/2022 23:45:16 - INFO - codeparrot_training - Step 29186: {'lr': 0.00045951529062817834, 'samples': 14943744, 'steps': 29186, 'loss/train': 1.7613376379013062} +03/04/2022 23:45:18 - INFO - codeparrot_training - Skipping example with length 796 (seq_length=1024) +03/04/2022 23:45:21 - INFO - codeparrot_training - Step 29187: {'lr': 0.00045951239534708496, 'samples': 14944256, 'steps': 29187, 'loss/train': 1.896260380744934} +03/04/2022 23:45:24 - INFO - codeparrot_training - Step 29188: {'lr': 0.0004595094999715885, 'samples': 14944768, 'steps': 29188, 'loss/train': 1.6110234260559082} +03/04/2022 23:45:27 - INFO - codeparrot_training - Skipping example with length 318 (seq_length=1024) +03/04/2022 23:45:30 - INFO - codeparrot_training - Step 29189: {'lr': 0.00045950660450169034, 'samples': 14945280, 'steps': 29189, 'loss/train': 1.4957785606384277} +03/04/2022 23:45:33 - INFO - codeparrot_training - Step 29190: {'lr': 0.0004595037089373918, 'samples': 14945792, 'steps': 29190, 'loss/train': 1.7709990739822388} +03/04/2022 23:45:35 - INFO - codeparrot_training - Skipping example with length 207 (seq_length=1024) +03/04/2022 23:45:38 - INFO - codeparrot_training - Step 29191: {'lr': 0.000459500813278694, 'samples': 14946304, 'steps': 29191, 'loss/train': 1.6369274854660034} +03/04/2022 23:45:41 - INFO - codeparrot_training - Step 29192: {'lr': 0.0004594979175255984, 'samples': 14946816, 'steps': 29192, 'loss/train': 2.431550979614258} +03/04/2022 23:45:43 - INFO - codeparrot_training - Skipping example with length 682 (seq_length=1024) +03/04/2022 23:45:46 - INFO - codeparrot_training - Step 29193: {'lr': 0.0004594950216781063, 'samples': 14947328, 'steps': 29193, 'loss/train': 1.165001630783081} +03/04/2022 23:45:49 - INFO - codeparrot_training - Step 29194: {'lr': 0.000459492125736219, 'samples': 14947840, 'steps': 29194, 'loss/train': 1.9006632566452026} +03/04/2022 23:45:51 - INFO - codeparrot_training - Skipping example with length 30 (seq_length=1024) +03/04/2022 23:45:55 - INFO - codeparrot_training - Step 29195: {'lr': 0.00045948922969993777, 'samples': 14948352, 'steps': 29195, 'loss/train': 1.828600287437439} +03/04/2022 23:45:58 - INFO - codeparrot_training - Step 29196: {'lr': 0.0004594863335692639, 'samples': 14948864, 'steps': 29196, 'loss/train': 1.8421552181243896} +03/04/2022 23:46:00 - INFO - codeparrot_training - Skipping example with length 509 (seq_length=1024) +03/04/2022 23:46:03 - INFO - codeparrot_training - Step 29197: {'lr': 0.00045948343734419873, 'samples': 14949376, 'steps': 29197, 'loss/train': 1.6671466827392578} +03/04/2022 23:46:06 - INFO - codeparrot_training - Step 29198: {'lr': 0.00045948054102474357, 'samples': 14949888, 'steps': 29198, 'loss/train': 3.118701934814453} +03/04/2022 23:46:08 - INFO - codeparrot_training - Skipping example with length 381 (seq_length=1024) +03/04/2022 23:46:12 - INFO - codeparrot_training - Step 29199: {'lr': 0.00045947764461089967, 'samples': 14950400, 'steps': 29199, 'loss/train': 0.16215740144252777} +03/04/2022 23:46:15 - INFO - codeparrot_training - Step 29200: {'lr': 0.00045947474810266844, 'samples': 14950912, 'steps': 29200, 'loss/train': 1.2064462900161743} +03/04/2022 23:46:16 - INFO - codeparrot_training - Skipping example with length 926 (seq_length=1024) +03/04/2022 23:46:20 - INFO - codeparrot_training - Step 29201: {'lr': 0.00045947185150005106, 'samples': 14951424, 'steps': 29201, 'loss/train': 2.1561901569366455} +03/04/2022 23:46:23 - INFO - codeparrot_training - Step 29202: {'lr': 0.0004594689548030489, 'samples': 14951936, 'steps': 29202, 'loss/train': 2.059141159057617} +03/04/2022 23:46:24 - INFO - codeparrot_training - Skipping example with length 409 (seq_length=1024) +03/04/2022 23:46:28 - INFO - codeparrot_training - Step 29203: {'lr': 0.0004594660580116633, 'samples': 14952448, 'steps': 29203, 'loss/train': 1.4568690061569214} +03/04/2022 23:46:31 - INFO - codeparrot_training - Step 29204: {'lr': 0.00045946316112589546, 'samples': 14952960, 'steps': 29204, 'loss/train': 1.5101159811019897} +03/04/2022 23:46:32 - INFO - codeparrot_training - Skipping example with length 885 (seq_length=1024) +03/04/2022 23:46:37 - INFO - codeparrot_training - Step 29205: {'lr': 0.0004594602641457468, 'samples': 14953472, 'steps': 29205, 'loss/train': 1.8471393585205078} +03/04/2022 23:46:40 - INFO - codeparrot_training - Step 29206: {'lr': 0.0004594573670712186, 'samples': 14953984, 'steps': 29206, 'loss/train': 1.5102828741073608} +03/04/2022 23:46:41 - INFO - codeparrot_training - Skipping example with length 212 (seq_length=1024) +03/04/2022 23:46:45 - INFO - codeparrot_training - Step 29207: {'lr': 0.0004594544699023121, 'samples': 14954496, 'steps': 29207, 'loss/train': 0.9241095185279846} +03/04/2022 23:46:48 - INFO - codeparrot_training - Step 29208: {'lr': 0.0004594515726390287, 'samples': 14955008, 'steps': 29208, 'loss/train': 1.493963599205017} +03/04/2022 23:46:49 - INFO - codeparrot_training - Skipping example with length 601 (seq_length=1024) +03/04/2022 23:46:53 - INFO - codeparrot_training - Step 29209: {'lr': 0.00045944867528136956, 'samples': 14955520, 'steps': 29209, 'loss/train': 2.2859549522399902} +03/04/2022 23:46:57 - INFO - codeparrot_training - Step 29210: {'lr': 0.00045944577782933615, 'samples': 14956032, 'steps': 29210, 'loss/train': 1.8016430139541626} +03/04/2022 23:46:57 - INFO - codeparrot_training - Skipping example with length 72 (seq_length=1024) +03/04/2022 23:47:02 - INFO - codeparrot_training - Step 29211: {'lr': 0.0004594428802829297, 'samples': 14956544, 'steps': 29211, 'loss/train': 1.4314749240875244} +03/04/2022 23:47:05 - INFO - codeparrot_training - Step 29212: {'lr': 0.00045943998264215153, 'samples': 14957056, 'steps': 29212, 'loss/train': 1.7682195901870728} +03/04/2022 23:47:05 - INFO - codeparrot_training - Skipping example with length 900 (seq_length=1024) +03/04/2022 23:47:10 - INFO - codeparrot_training - Step 29213: {'lr': 0.0004594370849070029, 'samples': 14957568, 'steps': 29213, 'loss/train': 2.0264620780944824} +03/04/2022 23:47:13 - INFO - codeparrot_training - Step 29214: {'lr': 0.00045943418707748517, 'samples': 14958080, 'steps': 29214, 'loss/train': 1.371829628944397} +03/04/2022 23:47:13 - INFO - codeparrot_training - Skipping example with length 194 (seq_length=1024) +03/04/2022 23:47:19 - INFO - codeparrot_training - Step 29215: {'lr': 0.00045943128915359966, 'samples': 14958592, 'steps': 29215, 'loss/train': 0.9132925271987915} +03/04/2022 23:47:21 - INFO - codeparrot_training - Skipping example with length 152 (seq_length=1024) +03/04/2022 23:47:24 - INFO - codeparrot_training - Step 29216: {'lr': 0.0004594283911353476, 'samples': 14959104, 'steps': 29216, 'loss/train': 1.1706929206848145} +03/04/2022 23:47:27 - INFO - codeparrot_training - Step 29217: {'lr': 0.0004594254930227303, 'samples': 14959616, 'steps': 29217, 'loss/train': 1.4035017490386963} +03/04/2022 23:47:30 - INFO - codeparrot_training - Skipping example with length 787 (seq_length=1024) +03/04/2022 23:47:32 - INFO - codeparrot_training - Step 29218: {'lr': 0.0004594225948157492, 'samples': 14960128, 'steps': 29218, 'loss/train': 1.3465173244476318} +03/04/2022 23:47:35 - INFO - codeparrot_training - Step 29219: {'lr': 0.0004594196965144054, 'samples': 14960640, 'steps': 29219, 'loss/train': 1.5257153511047363} +03/04/2022 23:47:38 - INFO - codeparrot_training - Skipping example with length 56 (seq_length=1024) +03/04/2022 23:47:41 - INFO - codeparrot_training - Step 29220: {'lr': 0.0004594167981187004, 'samples': 14961152, 'steps': 29220, 'loss/train': 2.235862970352173} +03/04/2022 23:47:44 - INFO - codeparrot_training - Step 29221: {'lr': 0.00045941389962863546, 'samples': 14961664, 'steps': 29221, 'loss/train': 1.472684621810913} +03/04/2022 23:47:46 - INFO - codeparrot_training - Skipping example with length 976 (seq_length=1024) +03/04/2022 23:47:49 - INFO - codeparrot_training - Step 29222: {'lr': 0.00045941100104421176, 'samples': 14962176, 'steps': 29222, 'loss/train': 2.257507085800171} +03/04/2022 23:47:52 - INFO - codeparrot_training - Step 29223: {'lr': 0.0004594081023654307, 'samples': 14962688, 'steps': 29223, 'loss/train': 1.8167643547058105} +03/04/2022 23:47:55 - INFO - codeparrot_training - Skipping example with length 472 (seq_length=1024) +03/04/2022 23:47:57 - INFO - codeparrot_training - Step 29224: {'lr': 0.00045940520359229366, 'samples': 14963200, 'steps': 29224, 'loss/train': 0.5609900951385498} +03/04/2022 23:48:01 - INFO - codeparrot_training - Step 29225: {'lr': 0.0004594023047248018, 'samples': 14963712, 'steps': 29225, 'loss/train': 1.807757019996643} +03/04/2022 23:48:03 - INFO - codeparrot_training - Skipping example with length 616 (seq_length=1024) +03/04/2022 23:48:06 - INFO - codeparrot_training - Step 29226: {'lr': 0.0004593994057629565, 'samples': 14964224, 'steps': 29226, 'loss/train': 1.5098109245300293} +03/04/2022 23:48:09 - INFO - codeparrot_training - Step 29227: {'lr': 0.000459396506706759, 'samples': 14964736, 'steps': 29227, 'loss/train': 1.2664109468460083} +03/04/2022 23:48:11 - INFO - codeparrot_training - Skipping example with length 272 (seq_length=1024) +03/04/2022 23:48:14 - INFO - codeparrot_training - Step 29228: {'lr': 0.00045939360755621074, 'samples': 14965248, 'steps': 29228, 'loss/train': 2.0569136142730713} +03/04/2022 23:48:18 - INFO - codeparrot_training - Step 29229: {'lr': 0.00045939070831131293, 'samples': 14965760, 'steps': 29229, 'loss/train': 1.0897387266159058} +03/04/2022 23:48:20 - INFO - codeparrot_training - Skipping example with length 113 (seq_length=1024) +03/04/2022 23:48:23 - INFO - codeparrot_training - Step 29230: {'lr': 0.00045938780897206686, 'samples': 14966272, 'steps': 29230, 'loss/train': 1.7062350511550903} +03/04/2022 23:48:26 - INFO - codeparrot_training - Step 29231: {'lr': 0.000459384909538474, 'samples': 14966784, 'steps': 29231, 'loss/train': 1.452660322189331} +03/04/2022 23:48:28 - INFO - codeparrot_training - Skipping example with length 105 (seq_length=1024) +03/04/2022 23:48:31 - INFO - codeparrot_training - Step 29232: {'lr': 0.00045938201001053546, 'samples': 14967296, 'steps': 29232, 'loss/train': 1.7859289646148682} +03/04/2022 23:48:34 - INFO - codeparrot_training - Step 29233: {'lr': 0.00045937911038825257, 'samples': 14967808, 'steps': 29233, 'loss/train': 2.056885004043579} +03/04/2022 23:48:36 - INFO - codeparrot_training - Skipping example with length 626 (seq_length=1024) +03/04/2022 23:48:40 - INFO - codeparrot_training - Step 29234: {'lr': 0.00045937621067162674, 'samples': 14968320, 'steps': 29234, 'loss/train': 1.49907648563385} +03/04/2022 23:48:43 - INFO - codeparrot_training - Step 29235: {'lr': 0.0004593733108606592, 'samples': 14968832, 'steps': 29235, 'loss/train': 1.9306999444961548} +03/04/2022 23:48:45 - INFO - codeparrot_training - Skipping example with length 716 (seq_length=1024) +03/04/2022 23:48:48 - INFO - codeparrot_training - Step 29236: {'lr': 0.00045937041095535125, 'samples': 14969344, 'steps': 29236, 'loss/train': 3.049901247024536} +03/04/2022 23:48:51 - INFO - codeparrot_training - Step 29237: {'lr': 0.00045936751095570426, 'samples': 14969856, 'steps': 29237, 'loss/train': 1.6104816198349} +03/04/2022 23:48:53 - INFO - codeparrot_training - Skipping example with length 127 (seq_length=1024) +03/04/2022 23:48:56 - INFO - codeparrot_training - Step 29238: {'lr': 0.0004593646108617195, 'samples': 14970368, 'steps': 29238, 'loss/train': 1.4078501462936401} +03/04/2022 23:48:59 - INFO - codeparrot_training - Step 29239: {'lr': 0.00045936171067339826, 'samples': 14970880, 'steps': 29239, 'loss/train': 1.0141301155090332} +03/04/2022 23:49:01 - INFO - codeparrot_training - Skipping example with length 921 (seq_length=1024) +03/04/2022 23:49:05 - INFO - codeparrot_training - Step 29240: {'lr': 0.0004593588103907419, 'samples': 14971392, 'steps': 29240, 'loss/train': 1.2019420862197876} +03/04/2022 23:49:08 - INFO - codeparrot_training - Step 29241: {'lr': 0.00045935591001375163, 'samples': 14971904, 'steps': 29241, 'loss/train': 1.571325659751892} +03/04/2022 23:49:09 - INFO - codeparrot_training - Skipping example with length 557 (seq_length=1024) +03/04/2022 23:49:13 - INFO - codeparrot_training - Step 29242: {'lr': 0.0004593530095424289, 'samples': 14972416, 'steps': 29242, 'loss/train': 1.2857160568237305} +03/04/2022 23:49:16 - INFO - codeparrot_training - Step 29243: {'lr': 0.0004593501089767749, 'samples': 14972928, 'steps': 29243, 'loss/train': 1.6300240755081177} +03/04/2022 23:49:22 - INFO - codeparrot_training - Step 29244: {'lr': 0.00045934720831679093, 'samples': 14973440, 'steps': 29244, 'loss/train': 1.77163827419281} +03/04/2022 23:49:25 - INFO - codeparrot_training - Step 29245: {'lr': 0.00045934430756247835, 'samples': 14973952, 'steps': 29245, 'loss/train': 1.7497230768203735} +03/04/2022 23:49:27 - INFO - codeparrot_training - Skipping example with length 357 (seq_length=1024) +03/04/2022 23:49:30 - INFO - codeparrot_training - Step 29246: {'lr': 0.0004593414067138385, 'samples': 14974464, 'steps': 29246, 'loss/train': 1.3070402145385742} +03/04/2022 23:49:33 - INFO - codeparrot_training - Step 29247: {'lr': 0.0004593385057708726, 'samples': 14974976, 'steps': 29247, 'loss/train': 0.07284893095493317} +03/04/2022 23:49:35 - INFO - codeparrot_training - Skipping example with length 566 (seq_length=1024) +03/04/2022 23:49:39 - INFO - codeparrot_training - Step 29248: {'lr': 0.00045933560473358206, 'samples': 14975488, 'steps': 29248, 'loss/train': 1.9109457731246948} +03/04/2022 23:49:42 - INFO - codeparrot_training - Step 29249: {'lr': 0.00045933270360196804, 'samples': 14976000, 'steps': 29249, 'loss/train': 1.7871251106262207} +03/04/2022 23:49:43 - INFO - codeparrot_training - Skipping example with length 982 (seq_length=1024) +03/04/2022 23:49:47 - INFO - codeparrot_training - Step 29250: {'lr': 0.00045932980237603196, 'samples': 14976512, 'steps': 29250, 'loss/train': 1.1027573347091675} +03/04/2022 23:49:50 - INFO - codeparrot_training - Step 29251: {'lr': 0.0004593269010557751, 'samples': 14977024, 'steps': 29251, 'loss/train': 1.710270881652832} +03/04/2022 23:49:52 - INFO - codeparrot_training - Skipping example with length 199 (seq_length=1024) +03/04/2022 23:49:56 - INFO - codeparrot_training - Step 29252: {'lr': 0.00045932399964119884, 'samples': 14977536, 'steps': 29252, 'loss/train': 0.8067901730537415} +03/04/2022 23:49:59 - INFO - codeparrot_training - Step 29253: {'lr': 0.00045932109813230437, 'samples': 14978048, 'steps': 29253, 'loss/train': 1.3872336149215698} +03/04/2022 23:50:00 - INFO - codeparrot_training - Skipping example with length 735 (seq_length=1024) +03/04/2022 23:50:04 - INFO - codeparrot_training - Step 29254: {'lr': 0.00045931819652909303, 'samples': 14978560, 'steps': 29254, 'loss/train': 2.0938286781311035} +03/04/2022 23:50:07 - INFO - codeparrot_training - Step 29255: {'lr': 0.0004593152948315661, 'samples': 14979072, 'steps': 29255, 'loss/train': 1.9173353910446167} +03/04/2022 23:50:08 - INFO - codeparrot_training - Skipping example with length 92 (seq_length=1024) +03/04/2022 23:50:12 - INFO - codeparrot_training - Step 29256: {'lr': 0.000459312393039725, 'samples': 14979584, 'steps': 29256, 'loss/train': 1.884562373161316} +03/04/2022 23:50:15 - INFO - codeparrot_training - Step 29257: {'lr': 0.0004593094911535709, 'samples': 14980096, 'steps': 29257, 'loss/train': 1.7618381977081299} +03/04/2022 23:50:16 - INFO - codeparrot_training - Skipping example with length 263 (seq_length=1024) +03/04/2022 23:50:21 - INFO - codeparrot_training - Step 29258: {'lr': 0.00045930658917310525, 'samples': 14980608, 'steps': 29258, 'loss/train': 0.8617112040519714} +03/04/2022 23:50:24 - INFO - codeparrot_training - Step 29259: {'lr': 0.0004593036870983293, 'samples': 14981120, 'steps': 29259, 'loss/train': 1.3619742393493652} +03/04/2022 23:50:25 - INFO - codeparrot_training - Skipping example with length 717 (seq_length=1024) +03/04/2022 23:50:29 - INFO - codeparrot_training - Step 29260: {'lr': 0.0004593007849292442, 'samples': 14981632, 'steps': 29260, 'loss/train': 1.5753803253173828} +03/04/2022 23:50:32 - INFO - codeparrot_training - Step 29261: {'lr': 0.0004592978826658515, 'samples': 14982144, 'steps': 29261, 'loss/train': 1.4469960927963257} +03/04/2022 23:50:33 - INFO - codeparrot_training - Skipping example with length 931 (seq_length=1024) +03/04/2022 23:50:38 - INFO - codeparrot_training - Step 29262: {'lr': 0.0004592949803081524, 'samples': 14982656, 'steps': 29262, 'loss/train': 1.370040774345398} +03/04/2022 23:50:41 - INFO - codeparrot_training - Step 29263: {'lr': 0.0004592920778561481, 'samples': 14983168, 'steps': 29263, 'loss/train': 1.4759734869003296} +03/04/2022 23:50:42 - INFO - codeparrot_training - Skipping example with length 386 (seq_length=1024) +03/04/2022 23:50:46 - INFO - codeparrot_training - Step 29264: {'lr': 0.00045928917530984014, 'samples': 14983680, 'steps': 29264, 'loss/train': 1.5567537546157837} +03/04/2022 23:50:49 - INFO - codeparrot_training - Step 29265: {'lr': 0.00045928627266922974, 'samples': 14984192, 'steps': 29265, 'loss/train': 1.8861668109893799} +03/04/2022 23:50:50 - INFO - codeparrot_training - Skipping example with length 22 (seq_length=1024) +03/04/2022 23:50:54 - INFO - codeparrot_training - Step 29266: {'lr': 0.0004592833699343181, 'samples': 14984704, 'steps': 29266, 'loss/train': 1.1019564867019653} +03/04/2022 23:50:58 - INFO - codeparrot_training - Step 29267: {'lr': 0.0004592804671051066, 'samples': 14985216, 'steps': 29267, 'loss/train': 1.5800999402999878} +03/04/2022 23:50:58 - INFO - codeparrot_training - Skipping example with length 613 (seq_length=1024) +03/04/2022 23:51:03 - INFO - codeparrot_training - Step 29268: {'lr': 0.0004592775641815966, 'samples': 14985728, 'steps': 29268, 'loss/train': 2.321160316467285} +03/04/2022 23:51:06 - INFO - codeparrot_training - Step 29269: {'lr': 0.0004592746611637893, 'samples': 14986240, 'steps': 29269, 'loss/train': 1.6887288093566895} +03/04/2022 23:51:07 - INFO - codeparrot_training - Skipping example with length 767 (seq_length=1024) +03/04/2022 23:51:11 - INFO - codeparrot_training - Step 29270: {'lr': 0.00045927175805168607, 'samples': 14986752, 'steps': 29270, 'loss/train': 1.3715522289276123} +03/04/2022 23:51:14 - INFO - codeparrot_training - Step 29271: {'lr': 0.00045926885484528823, 'samples': 14987264, 'steps': 29271, 'loss/train': 1.1664009094238281} +03/04/2022 23:51:15 - INFO - codeparrot_training - Skipping example with length 881 (seq_length=1024) +03/04/2022 23:51:20 - INFO - codeparrot_training - Step 29272: {'lr': 0.0004592659515445971, 'samples': 14987776, 'steps': 29272, 'loss/train': 2.1006245613098145} +03/04/2022 23:51:23 - INFO - codeparrot_training - Step 29273: {'lr': 0.00045926304814961397, 'samples': 14988288, 'steps': 29273, 'loss/train': 1.6812574863433838} +03/04/2022 23:51:23 - INFO - codeparrot_training - Skipping example with length 498 (seq_length=1024) +03/04/2022 23:51:28 - INFO - codeparrot_training - Step 29274: {'lr': 0.00045926014466034004, 'samples': 14988800, 'steps': 29274, 'loss/train': 1.5413551330566406} +03/04/2022 23:51:31 - INFO - codeparrot_training - Step 29275: {'lr': 0.0004592572410767768, 'samples': 14989312, 'steps': 29275, 'loss/train': 1.401471734046936} +03/04/2022 23:51:32 - INFO - codeparrot_training - Skipping example with length 284 (seq_length=1024) +03/04/2022 23:51:37 - INFO - codeparrot_training - Step 29276: {'lr': 0.0004592543373989255, 'samples': 14989824, 'steps': 29276, 'loss/train': 2.292088747024536} +03/04/2022 23:51:40 - INFO - codeparrot_training - Step 29277: {'lr': 0.0004592514336267874, 'samples': 14990336, 'steps': 29277, 'loss/train': 1.968109369277954} +03/04/2022 23:51:40 - INFO - codeparrot_training - Skipping example with length 226 (seq_length=1024) +03/04/2022 23:51:45 - INFO - codeparrot_training - Step 29278: {'lr': 0.0004592485297603638, 'samples': 14990848, 'steps': 29278, 'loss/train': 1.6951879262924194} +03/04/2022 23:51:49 - INFO - codeparrot_training - Step 29279: {'lr': 0.0004592456257996561, 'samples': 14991360, 'steps': 29279, 'loss/train': 6.557301044464111} +03/04/2022 23:51:50 - INFO - codeparrot_training - Skipping example with length 88 (seq_length=1024) +03/04/2022 23:51:54 - INFO - codeparrot_training - Step 29280: {'lr': 0.0004592427217446655, 'samples': 14991872, 'steps': 29280, 'loss/train': 1.8735361099243164} +03/04/2022 23:51:57 - INFO - codeparrot_training - Step 29281: {'lr': 0.00045923981759539336, 'samples': 14992384, 'steps': 29281, 'loss/train': 1.4063336849212646} +03/04/2022 23:51:58 - INFO - codeparrot_training - Skipping example with length 302 (seq_length=1024) +03/04/2022 23:52:02 - INFO - codeparrot_training - Step 29282: {'lr': 0.000459236913351841, 'samples': 14992896, 'steps': 29282, 'loss/train': 0.7368330359458923} +03/04/2022 23:52:05 - INFO - codeparrot_training - Step 29283: {'lr': 0.0004592340090140097, 'samples': 14993408, 'steps': 29283, 'loss/train': 1.0861183404922485} +03/04/2022 23:52:07 - INFO - codeparrot_training - Skipping example with length 363 (seq_length=1024) +03/04/2022 23:52:11 - INFO - codeparrot_training - Step 29284: {'lr': 0.0004592311045819008, 'samples': 14993920, 'steps': 29284, 'loss/train': 1.5231988430023193} +03/04/2022 23:52:14 - INFO - codeparrot_training - Step 29285: {'lr': 0.00045922820005551556, 'samples': 14994432, 'steps': 29285, 'loss/train': 0.7439308762550354} +03/04/2022 23:52:15 - INFO - codeparrot_training - Skipping example with length 413 (seq_length=1024) +03/04/2022 23:52:19 - INFO - codeparrot_training - Step 29286: {'lr': 0.0004592252954348554, 'samples': 14994944, 'steps': 29286, 'loss/train': 1.690280795097351} +03/04/2022 23:52:22 - INFO - codeparrot_training - Step 29287: {'lr': 0.0004592223907199215, 'samples': 14995456, 'steps': 29287, 'loss/train': 2.8310394287109375} +03/04/2022 23:52:23 - INFO - codeparrot_training - Skipping example with length 905 (seq_length=1024) +03/04/2022 23:52:28 - INFO - codeparrot_training - Step 29288: {'lr': 0.0004592194859107153, 'samples': 14995968, 'steps': 29288, 'loss/train': 2.448380947113037} +03/04/2022 23:52:31 - INFO - codeparrot_training - Step 29289: {'lr': 0.0004592165810072379, 'samples': 14996480, 'steps': 29289, 'loss/train': 1.696124792098999} +03/04/2022 23:52:31 - INFO - codeparrot_training - Skipping example with length 727 (seq_length=1024) +03/04/2022 23:52:36 - INFO - codeparrot_training - Step 29290: {'lr': 0.00045921367600949077, 'samples': 14996992, 'steps': 29290, 'loss/train': 1.7035648822784424} +03/04/2022 23:52:39 - INFO - codeparrot_training - Step 29291: {'lr': 0.0004592107709174752, 'samples': 14997504, 'steps': 29291, 'loss/train': 1.3913615942001343} +03/04/2022 23:52:40 - INFO - codeparrot_training - Skipping example with length 626 (seq_length=1024) +03/04/2022 23:52:44 - INFO - codeparrot_training - Step 29292: {'lr': 0.0004592078657311925, 'samples': 14998016, 'steps': 29292, 'loss/train': 1.4456334114074707} +03/04/2022 23:52:48 - INFO - codeparrot_training - Step 29293: {'lr': 0.000459204960450644, 'samples': 14998528, 'steps': 29293, 'loss/train': 1.8641268014907837} +03/04/2022 23:52:48 - INFO - codeparrot_training - Skipping example with length 534 (seq_length=1024) +03/04/2022 23:52:53 - INFO - codeparrot_training - Step 29294: {'lr': 0.0004592020550758309, 'samples': 14999040, 'steps': 29294, 'loss/train': 1.8045189380645752} +03/04/2022 23:52:56 - INFO - codeparrot_training - Step 29295: {'lr': 0.0004591991496067546, 'samples': 14999552, 'steps': 29295, 'loss/train': 1.8933684825897217} +03/04/2022 23:52:56 - INFO - codeparrot_training - Skipping example with length 569 (seq_length=1024) +03/04/2022 23:53:01 - INFO - codeparrot_training - Step 29296: {'lr': 0.00045919624404341643, 'samples': 15000064, 'steps': 29296, 'loss/train': 1.6170521974563599} +03/04/2022 23:53:04 - INFO - codeparrot_training - Step 29297: {'lr': 0.00045919333838581757, 'samples': 15000576, 'steps': 29297, 'loss/train': 0.8565620183944702} +03/04/2022 23:53:04 - INFO - codeparrot_training - Skipping example with length 924 (seq_length=1024) +03/04/2022 23:53:09 - INFO - codeparrot_training - Step 29298: {'lr': 0.00045919043263395953, 'samples': 15001088, 'steps': 29298, 'loss/train': 1.7696832418441772} +03/04/2022 23:53:13 - INFO - codeparrot_training - Step 29299: {'lr': 0.00045918752678784344, 'samples': 15001600, 'steps': 29299, 'loss/train': 2.1313703060150146} +03/04/2022 23:53:13 - INFO - codeparrot_training - Skipping example with length 214 (seq_length=1024) +03/04/2022 23:53:18 - INFO - codeparrot_training - Step 29300: {'lr': 0.0004591846208474707, 'samples': 15002112, 'steps': 29300, 'loss/train': 0.11841027438640594} +03/04/2022 23:53:21 - INFO - codeparrot_training - Skipping example with length 671 (seq_length=1024) +03/04/2022 23:53:23 - INFO - codeparrot_training - Step 29301: {'lr': 0.00045918171481284256, 'samples': 15002624, 'steps': 29301, 'loss/train': 2.164182424545288} +03/04/2022 23:53:26 - INFO - codeparrot_training - Step 29302: {'lr': 0.0004591788086839604, 'samples': 15003136, 'steps': 29302, 'loss/train': 2.7862143516540527} +03/04/2022 23:53:29 - INFO - codeparrot_training - Skipping example with length 25 (seq_length=1024) +03/04/2022 23:53:32 - INFO - codeparrot_training - Step 29303: {'lr': 0.0004591759024608255, 'samples': 15003648, 'steps': 29303, 'loss/train': 1.3453686237335205} +03/04/2022 23:53:35 - INFO - codeparrot_training - Step 29304: {'lr': 0.0004591729961434392, 'samples': 15004160, 'steps': 29304, 'loss/train': 1.5795297622680664} +03/04/2022 23:53:38 - INFO - codeparrot_training - Step 29305: {'lr': 0.00045917008973180273, 'samples': 15004672, 'steps': 29305, 'loss/train': 2.180284261703491} +03/04/2022 23:53:39 - INFO - codeparrot_training - Skipping example with length 236 (seq_length=1024) +03/04/2022 23:53:44 - INFO - codeparrot_training - Step 29306: {'lr': 0.0004591671832259174, 'samples': 15005184, 'steps': 29306, 'loss/train': 2.2017552852630615} +03/04/2022 23:53:47 - INFO - codeparrot_training - Step 29307: {'lr': 0.00045916427662578464, 'samples': 15005696, 'steps': 29307, 'loss/train': 1.3762558698654175} +03/04/2022 23:53:49 - INFO - codeparrot_training - Skipping example with length 793 (seq_length=1024) +03/04/2022 23:53:52 - INFO - codeparrot_training - Step 29308: {'lr': 0.00045916136993140574, 'samples': 15006208, 'steps': 29308, 'loss/train': 2.163473129272461} +03/04/2022 23:53:55 - INFO - codeparrot_training - Step 29309: {'lr': 0.00045915846314278187, 'samples': 15006720, 'steps': 29309, 'loss/train': 1.6123850345611572} +03/04/2022 23:53:57 - INFO - codeparrot_training - Skipping example with length 941 (seq_length=1024) +03/04/2022 23:54:01 - INFO - codeparrot_training - Step 29310: {'lr': 0.0004591555562599144, 'samples': 15007232, 'steps': 29310, 'loss/train': 1.4353289604187012} +03/04/2022 23:54:04 - INFO - codeparrot_training - Step 29311: {'lr': 0.00045915264928280476, 'samples': 15007744, 'steps': 29311, 'loss/train': 2.267923593521118} +03/04/2022 23:54:05 - INFO - codeparrot_training - Skipping example with length 500 (seq_length=1024) +03/04/2022 23:54:09 - INFO - codeparrot_training - Step 29312: {'lr': 0.00045914974221145403, 'samples': 15008256, 'steps': 29312, 'loss/train': 1.6344242095947266} +03/04/2022 23:54:12 - INFO - codeparrot_training - Step 29313: {'lr': 0.00045914683504586374, 'samples': 15008768, 'steps': 29313, 'loss/train': 1.2414194345474243} +03/04/2022 23:54:13 - INFO - codeparrot_training - Skipping example with length 590 (seq_length=1024) +03/04/2022 23:54:17 - INFO - codeparrot_training - Step 29314: {'lr': 0.0004591439277860351, 'samples': 15009280, 'steps': 29314, 'loss/train': 1.7932771444320679} +03/04/2022 23:54:21 - INFO - codeparrot_training - Step 29315: {'lr': 0.00045914102043196947, 'samples': 15009792, 'steps': 29315, 'loss/train': 2.549272298812866} +03/04/2022 23:54:22 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/04/2022 23:54:26 - INFO - codeparrot_training - Step 29316: {'lr': 0.00045913811298366804, 'samples': 15010304, 'steps': 29316, 'loss/train': 1.155446171760559} +03/04/2022 23:54:29 - INFO - codeparrot_training - Step 29317: {'lr': 0.0004591352054411323, 'samples': 15010816, 'steps': 29317, 'loss/train': 1.878786325454712} +03/04/2022 23:54:30 - INFO - codeparrot_training - Skipping example with length 478 (seq_length=1024) +03/04/2022 23:54:34 - INFO - codeparrot_training - Step 29318: {'lr': 0.00045913229780436337, 'samples': 15011328, 'steps': 29318, 'loss/train': 1.4083280563354492} +03/04/2022 23:54:38 - INFO - codeparrot_training - Step 29319: {'lr': 0.00045912939007336273, 'samples': 15011840, 'steps': 29319, 'loss/train': 1.5641683340072632} +03/04/2022 23:54:38 - INFO - codeparrot_training - Skipping example with length 923 (seq_length=1024) +03/04/2022 23:54:43 - INFO - codeparrot_training - Step 29320: {'lr': 0.0004591264822481316, 'samples': 15012352, 'steps': 29320, 'loss/train': 1.866489052772522} +03/04/2022 23:54:46 - INFO - codeparrot_training - Step 29321: {'lr': 0.00045912357432867124, 'samples': 15012864, 'steps': 29321, 'loss/train': 0.9689801931381226} +03/04/2022 23:54:47 - INFO - codeparrot_training - Skipping example with length 194 (seq_length=1024) +03/04/2022 23:54:51 - INFO - codeparrot_training - Step 29322: {'lr': 0.00045912066631498304, 'samples': 15013376, 'steps': 29322, 'loss/train': 1.7738481760025024} +03/04/2022 23:54:54 - INFO - codeparrot_training - Step 29323: {'lr': 0.00045911775820706835, 'samples': 15013888, 'steps': 29323, 'loss/train': 1.4786698818206787} +03/04/2022 23:54:55 - INFO - codeparrot_training - Skipping example with length 220 (seq_length=1024) +03/04/2022 23:55:00 - INFO - codeparrot_training - Step 29324: {'lr': 0.0004591148500049284, 'samples': 15014400, 'steps': 29324, 'loss/train': 2.230304479598999} +03/04/2022 23:55:03 - INFO - codeparrot_training - Step 29325: {'lr': 0.00045911194170856454, 'samples': 15014912, 'steps': 29325, 'loss/train': 1.7666839361190796} +03/04/2022 23:55:04 - INFO - codeparrot_training - Skipping example with length 98 (seq_length=1024) +03/04/2022 23:55:08 - INFO - codeparrot_training - Step 29326: {'lr': 0.00045910903331797807, 'samples': 15015424, 'steps': 29326, 'loss/train': 2.009965658187866} +03/04/2022 23:55:11 - INFO - codeparrot_training - Step 29327: {'lr': 0.00045910612483317025, 'samples': 15015936, 'steps': 29327, 'loss/train': 1.6263728141784668} +03/04/2022 23:55:12 - INFO - codeparrot_training - Skipping example with length 205 (seq_length=1024) +03/04/2022 23:55:17 - INFO - codeparrot_training - Step 29328: {'lr': 0.00045910321625414245, 'samples': 15016448, 'steps': 29328, 'loss/train': 1.7851859331130981} +03/04/2022 23:55:20 - INFO - codeparrot_training - Step 29329: {'lr': 0.00045910030758089597, 'samples': 15016960, 'steps': 29329, 'loss/train': 1.5645885467529297} +03/04/2022 23:55:21 - INFO - codeparrot_training - Skipping example with length 152 (seq_length=1024) +03/04/2022 23:55:25 - INFO - codeparrot_training - Step 29330: {'lr': 0.00045909739881343215, 'samples': 15017472, 'steps': 29330, 'loss/train': 1.4436148405075073} +03/04/2022 23:55:28 - INFO - codeparrot_training - Step 29331: {'lr': 0.00045909448995175224, 'samples': 15017984, 'steps': 29331, 'loss/train': 2.3456761837005615} +03/04/2022 23:55:29 - INFO - codeparrot_training - Skipping example with length 858 (seq_length=1024) +03/04/2022 23:55:33 - INFO - codeparrot_training - Step 29332: {'lr': 0.00045909158099585756, 'samples': 15018496, 'steps': 29332, 'loss/train': 2.136915445327759} +03/04/2022 23:55:37 - INFO - codeparrot_training - Step 29333: {'lr': 0.00045908867194574955, 'samples': 15019008, 'steps': 29333, 'loss/train': 2.6090152263641357} +03/04/2022 23:55:37 - INFO - codeparrot_training - Skipping example with length 345 (seq_length=1024) +03/04/2022 23:55:42 - INFO - codeparrot_training - Step 29334: {'lr': 0.00045908576280142925, 'samples': 15019520, 'steps': 29334, 'loss/train': 1.0786669254302979} +03/04/2022 23:55:45 - INFO - codeparrot_training - Step 29335: {'lr': 0.00045908285356289824, 'samples': 15020032, 'steps': 29335, 'loss/train': 1.4498578310012817} +03/04/2022 23:55:46 - INFO - codeparrot_training - Skipping example with length 878 (seq_length=1024) +03/04/2022 23:55:50 - INFO - codeparrot_training - Step 29336: {'lr': 0.0004590799442301577, 'samples': 15020544, 'steps': 29336, 'loss/train': 1.2504308223724365} +03/04/2022 23:55:53 - INFO - codeparrot_training - Step 29337: {'lr': 0.00045907703480320894, 'samples': 15021056, 'steps': 29337, 'loss/train': 2.438580274581909} +03/04/2022 23:55:54 - INFO - codeparrot_training - Skipping example with length 264 (seq_length=1024) +03/04/2022 23:55:59 - INFO - codeparrot_training - Step 29338: {'lr': 0.0004590741252820533, 'samples': 15021568, 'steps': 29338, 'loss/train': 2.110753059387207} +03/04/2022 23:56:02 - INFO - codeparrot_training - Step 29339: {'lr': 0.00045907121566669216, 'samples': 15022080, 'steps': 29339, 'loss/train': 1.3871588706970215} +03/04/2022 23:56:02 - INFO - codeparrot_training - Skipping example with length 329 (seq_length=1024) +03/04/2022 23:56:07 - INFO - codeparrot_training - Step 29340: {'lr': 0.0004590683059571267, 'samples': 15022592, 'steps': 29340, 'loss/train': 1.143405795097351} +03/04/2022 23:56:10 - INFO - codeparrot_training - Step 29341: {'lr': 0.0004590653961533582, 'samples': 15023104, 'steps': 29341, 'loss/train': 1.4088921546936035} +03/04/2022 23:56:11 - INFO - codeparrot_training - Skipping example with length 765 (seq_length=1024) +03/04/2022 23:56:15 - INFO - codeparrot_training - Step 29342: {'lr': 0.00045906248625538816, 'samples': 15023616, 'steps': 29342, 'loss/train': 0.20147539675235748} +03/04/2022 23:56:19 - INFO - codeparrot_training - Step 29343: {'lr': 0.00045905957626321775, 'samples': 15024128, 'steps': 29343, 'loss/train': 2.1137566566467285} +03/04/2022 23:56:19 - INFO - codeparrot_training - Skipping example with length 255 (seq_length=1024) +03/04/2022 23:56:24 - INFO - codeparrot_training - Step 29344: {'lr': 0.0004590566661768484, 'samples': 15024640, 'steps': 29344, 'loss/train': 0.6321955323219299} +03/04/2022 23:56:27 - INFO - codeparrot_training - Step 29345: {'lr': 0.00045905375599628127, 'samples': 15025152, 'steps': 29345, 'loss/train': 1.2742096185684204} +03/04/2022 23:56:27 - INFO - codeparrot_training - Skipping example with length 784 (seq_length=1024) +03/04/2022 23:56:32 - INFO - codeparrot_training - Step 29346: {'lr': 0.00045905084572151774, 'samples': 15025664, 'steps': 29346, 'loss/train': 1.6061334609985352} +03/04/2022 23:56:36 - INFO - codeparrot_training - Step 29347: {'lr': 0.0004590479353525591, 'samples': 15026176, 'steps': 29347, 'loss/train': 1.5192416906356812} +03/04/2022 23:56:36 - INFO - codeparrot_training - Skipping example with length 30 (seq_length=1024) +03/04/2022 23:56:41 - INFO - codeparrot_training - Step 29348: {'lr': 0.00045904502488940677, 'samples': 15026688, 'steps': 29348, 'loss/train': 1.9727452993392944} +03/04/2022 23:56:44 - INFO - codeparrot_training - Skipping example with length 737 (seq_length=1024) +03/04/2022 23:56:46 - INFO - codeparrot_training - Step 29349: {'lr': 0.0004590421143320619, 'samples': 15027200, 'steps': 29349, 'loss/train': 1.8296093940734863} +03/04/2022 23:56:49 - INFO - codeparrot_training - Step 29350: {'lr': 0.0004590392036805259, 'samples': 15027712, 'steps': 29350, 'loss/train': 1.5805566310882568} +03/04/2022 23:56:52 - INFO - codeparrot_training - Skipping example with length 511 (seq_length=1024) +03/04/2022 23:56:54 - INFO - codeparrot_training - Step 29351: {'lr': 0.0004590362929348001, 'samples': 15028224, 'steps': 29351, 'loss/train': 1.9039947986602783} +03/04/2022 23:56:57 - INFO - codeparrot_training - Step 29352: {'lr': 0.00045903338209488575, 'samples': 15028736, 'steps': 29352, 'loss/train': 1.7140283584594727} +03/04/2022 23:57:00 - INFO - codeparrot_training - Skipping example with length 321 (seq_length=1024) +03/04/2022 23:57:03 - INFO - codeparrot_training - Step 29353: {'lr': 0.0004590304711607842, 'samples': 15029248, 'steps': 29353, 'loss/train': 2.059628963470459} +03/04/2022 23:57:06 - INFO - codeparrot_training - Step 29354: {'lr': 0.0004590275601324967, 'samples': 15029760, 'steps': 29354, 'loss/train': 2.090101957321167} +03/04/2022 23:57:08 - INFO - codeparrot_training - Skipping example with length 884 (seq_length=1024) +03/04/2022 23:57:11 - INFO - codeparrot_training - Step 29355: {'lr': 0.0004590246490100246, 'samples': 15030272, 'steps': 29355, 'loss/train': 1.8837968111038208} +03/04/2022 23:57:14 - INFO - codeparrot_training - Step 29356: {'lr': 0.00045902173779336925, 'samples': 15030784, 'steps': 29356, 'loss/train': 1.6638803482055664} +03/04/2022 23:57:17 - INFO - codeparrot_training - Skipping example with length 178 (seq_length=1024) +03/04/2022 23:57:20 - INFO - codeparrot_training - Step 29357: {'lr': 0.0004590188264825319, 'samples': 15031296, 'steps': 29357, 'loss/train': 1.9209269285202026} +03/04/2022 23:57:23 - INFO - codeparrot_training - Step 29358: {'lr': 0.00045901591507751393, 'samples': 15031808, 'steps': 29358, 'loss/train': 0.8897549510002136} +03/04/2022 23:57:25 - INFO - codeparrot_training - Skipping example with length 217 (seq_length=1024) +03/04/2022 23:57:28 - INFO - codeparrot_training - Step 29359: {'lr': 0.00045901300357831666, 'samples': 15032320, 'steps': 29359, 'loss/train': 1.6359342336654663} +03/04/2022 23:57:31 - INFO - codeparrot_training - Step 29360: {'lr': 0.00045901009198494124, 'samples': 15032832, 'steps': 29360, 'loss/train': 2.4698593616485596} +03/04/2022 23:57:34 - INFO - codeparrot_training - Skipping example with length 53 (seq_length=1024) +03/04/2022 23:57:37 - INFO - codeparrot_training - Step 29361: {'lr': 0.0004590071802973892, 'samples': 15033344, 'steps': 29361, 'loss/train': 2.366023302078247} +03/04/2022 23:57:40 - INFO - codeparrot_training - Step 29362: {'lr': 0.0004590042685156617, 'samples': 15033856, 'steps': 29362, 'loss/train': 2.842386484146118} +03/04/2022 23:57:42 - INFO - codeparrot_training - Skipping example with length 514 (seq_length=1024) +03/04/2022 23:57:45 - INFO - codeparrot_training - Step 29363: {'lr': 0.0004590013566397601, 'samples': 15034368, 'steps': 29363, 'loss/train': 2.3161425590515137} +03/04/2022 23:57:48 - INFO - codeparrot_training - Step 29364: {'lr': 0.00045899844466968574, 'samples': 15034880, 'steps': 29364, 'loss/train': 1.6863075494766235} +03/04/2022 23:57:51 - INFO - codeparrot_training - Skipping example with length 376 (seq_length=1024) +03/04/2022 23:57:54 - INFO - codeparrot_training - Step 29365: {'lr': 0.00045899553260543986, 'samples': 15035392, 'steps': 29365, 'loss/train': 0.3066198527812958} +03/04/2022 23:57:57 - INFO - codeparrot_training - Step 29366: {'lr': 0.0004589926204470238, 'samples': 15035904, 'steps': 29366, 'loss/train': 1.0797796249389648} +03/04/2022 23:57:59 - INFO - codeparrot_training - Skipping example with length 396 (seq_length=1024) +03/04/2022 23:58:02 - INFO - codeparrot_training - Step 29367: {'lr': 0.000458989708194439, 'samples': 15036416, 'steps': 29367, 'loss/train': 1.6987740993499756} +03/04/2022 23:58:05 - INFO - codeparrot_training - Step 29368: {'lr': 0.0004589867958476866, 'samples': 15036928, 'steps': 29368, 'loss/train': 1.0507696866989136} +03/04/2022 23:58:07 - INFO - codeparrot_training - Skipping example with length 943 (seq_length=1024) +03/04/2022 23:58:11 - INFO - codeparrot_training - Step 29369: {'lr': 0.000458983883406768, 'samples': 15037440, 'steps': 29369, 'loss/train': 1.1876485347747803} +03/04/2022 23:58:14 - INFO - codeparrot_training - Step 29370: {'lr': 0.0004589809708716844, 'samples': 15037952, 'steps': 29370, 'loss/train': 0.9224165081977844} +03/04/2022 23:58:16 - INFO - codeparrot_training - Skipping example with length 166 (seq_length=1024) +03/04/2022 23:58:19 - INFO - codeparrot_training - Step 29371: {'lr': 0.0004589780582424373, 'samples': 15038464, 'steps': 29371, 'loss/train': 1.6911582946777344} +03/04/2022 23:58:22 - INFO - codeparrot_training - Step 29372: {'lr': 0.00045897514551902785, 'samples': 15038976, 'steps': 29372, 'loss/train': 1.165657877922058} +03/04/2022 23:58:24 - INFO - codeparrot_training - Skipping example with length 160 (seq_length=1024) +03/04/2022 23:58:27 - INFO - codeparrot_training - Step 29373: {'lr': 0.0004589722327014575, 'samples': 15039488, 'steps': 29373, 'loss/train': 0.9894626140594482} +03/04/2022 23:58:30 - INFO - codeparrot_training - Step 29374: {'lr': 0.0004589693197897274, 'samples': 15040000, 'steps': 29374, 'loss/train': 1.0873037576675415} +03/04/2022 23:58:32 - INFO - codeparrot_training - Skipping example with length 10 (seq_length=1024) +03/04/2022 23:58:36 - INFO - codeparrot_training - Step 29375: {'lr': 0.0004589664067838389, 'samples': 15040512, 'steps': 29375, 'loss/train': 1.1943727731704712} +03/04/2022 23:58:39 - INFO - codeparrot_training - Step 29376: {'lr': 0.00045896349368379356, 'samples': 15041024, 'steps': 29376, 'loss/train': 1.3523303270339966} +03/04/2022 23:58:40 - INFO - codeparrot_training - Skipping example with length 837 (seq_length=1024) +03/04/2022 23:58:44 - INFO - codeparrot_training - Step 29377: {'lr': 0.00045896058048959233, 'samples': 15041536, 'steps': 29377, 'loss/train': 1.1251169443130493} +03/04/2022 23:58:47 - INFO - codeparrot_training - Step 29378: {'lr': 0.00045895766720123677, 'samples': 15042048, 'steps': 29378, 'loss/train': 1.760446310043335} +03/04/2022 23:58:49 - INFO - codeparrot_training - Skipping example with length 826 (seq_length=1024) +03/04/2022 23:58:53 - INFO - codeparrot_training - Step 29379: {'lr': 0.0004589547538187281, 'samples': 15042560, 'steps': 29379, 'loss/train': 1.7541284561157227} +03/04/2022 23:58:56 - INFO - codeparrot_training - Step 29380: {'lr': 0.0004589518403420676, 'samples': 15043072, 'steps': 29380, 'loss/train': 1.3408077955245972} +03/04/2022 23:58:57 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/04/2022 23:59:01 - INFO - codeparrot_training - Step 29381: {'lr': 0.00045894892677125667, 'samples': 15043584, 'steps': 29381, 'loss/train': 1.3243436813354492} +03/04/2022 23:59:04 - INFO - codeparrot_training - Step 29382: {'lr': 0.0004589460131062965, 'samples': 15044096, 'steps': 29382, 'loss/train': 1.48696768283844} +03/04/2022 23:59:05 - INFO - codeparrot_training - Skipping example with length 709 (seq_length=1024) +03/04/2022 23:59:10 - INFO - codeparrot_training - Step 29383: {'lr': 0.00045894309934718853, 'samples': 15044608, 'steps': 29383, 'loss/train': 1.6326838731765747} +03/04/2022 23:59:13 - INFO - codeparrot_training - Step 29384: {'lr': 0.00045894018549393404, 'samples': 15045120, 'steps': 29384, 'loss/train': 1.7004551887512207} +03/04/2022 23:59:14 - INFO - codeparrot_training - Skipping example with length 500 (seq_length=1024) +03/04/2022 23:59:18 - INFO - codeparrot_training - Step 29385: {'lr': 0.0004589372715465343, 'samples': 15045632, 'steps': 29385, 'loss/train': 1.7546541690826416} +03/04/2022 23:59:21 - INFO - codeparrot_training - Step 29386: {'lr': 0.0004589343575049907, 'samples': 15046144, 'steps': 29386, 'loss/train': 1.845996379852295} +03/04/2022 23:59:23 - INFO - codeparrot_training - Skipping example with length 292 (seq_length=1024) +03/04/2022 23:59:27 - INFO - codeparrot_training - Step 29387: {'lr': 0.0004589314433693044, 'samples': 15046656, 'steps': 29387, 'loss/train': 1.858646035194397} +03/04/2022 23:59:30 - INFO - codeparrot_training - Step 29388: {'lr': 0.0004589285291394769, 'samples': 15047168, 'steps': 29388, 'loss/train': 2.1043648719787598} +03/04/2022 23:59:31 - INFO - codeparrot_training - Skipping example with length 262 (seq_length=1024) +03/04/2022 23:59:35 - INFO - codeparrot_training - Step 29389: {'lr': 0.00045892561481550943, 'samples': 15047680, 'steps': 29389, 'loss/train': 2.2668938636779785} +03/04/2022 23:59:38 - INFO - codeparrot_training - Step 29390: {'lr': 0.0004589227003974032, 'samples': 15048192, 'steps': 29390, 'loss/train': 0.5943100452423096} +03/04/2022 23:59:39 - INFO - codeparrot_training - Skipping example with length 796 (seq_length=1024) +03/04/2022 23:59:43 - INFO - codeparrot_training - Step 29391: {'lr': 0.00045891978588515975, 'samples': 15048704, 'steps': 29391, 'loss/train': 1.4961278438568115} +03/04/2022 23:59:47 - INFO - codeparrot_training - Step 29392: {'lr': 0.0004589168712787802, 'samples': 15049216, 'steps': 29392, 'loss/train': 1.9302963018417358} +03/04/2022 23:59:48 - INFO - codeparrot_training - Skipping example with length 676 (seq_length=1024) +03/04/2022 23:59:52 - INFO - codeparrot_training - Step 29393: {'lr': 0.00045891395657826595, 'samples': 15049728, 'steps': 29393, 'loss/train': 1.2685033082962036} +03/04/2022 23:59:55 - INFO - codeparrot_training - Step 29394: {'lr': 0.0004589110417836183, 'samples': 15050240, 'steps': 29394, 'loss/train': 2.0484282970428467} +03/04/2022 23:59:56 - INFO - codeparrot_training - Skipping example with length 1000 (seq_length=1024) +03/05/2022 00:00:00 - INFO - codeparrot_training - Step 29395: {'lr': 0.0004589081268948386, 'samples': 15050752, 'steps': 29395, 'loss/train': 1.7589514255523682} +03/05/2022 00:00:04 - INFO - codeparrot_training - Step 29396: {'lr': 0.00045890521191192807, 'samples': 15051264, 'steps': 29396, 'loss/train': 2.22849440574646} +03/05/2022 00:00:05 - INFO - codeparrot_training - Skipping example with length 188 (seq_length=1024) +03/05/2022 00:00:09 - INFO - codeparrot_training - Step 29397: {'lr': 0.0004589022968348881, 'samples': 15051776, 'steps': 29397, 'loss/train': 1.3926750421524048} +03/05/2022 00:00:12 - INFO - codeparrot_training - Step 29398: {'lr': 0.0004588993816637199, 'samples': 15052288, 'steps': 29398, 'loss/train': 2.7406697273254395} +03/05/2022 00:00:14 - INFO - codeparrot_training - Skipping example with length 31 (seq_length=1024) +03/05/2022 00:00:18 - INFO - codeparrot_training - Step 29399: {'lr': 0.00045889646639842496, 'samples': 15052800, 'steps': 29399, 'loss/train': 0.07882201671600342} +03/05/2022 00:00:21 - INFO - codeparrot_training - Step 29400: {'lr': 0.0004588935510390045, 'samples': 15053312, 'steps': 29400, 'loss/train': 1.705450177192688} +03/05/2022 00:00:22 - INFO - codeparrot_training - Skipping example with length 498 (seq_length=1024) +03/05/2022 00:00:26 - INFO - codeparrot_training - Step 29401: {'lr': 0.00045889063558545974, 'samples': 15053824, 'steps': 29401, 'loss/train': 2.498893976211548} +03/05/2022 00:00:29 - INFO - codeparrot_training - Step 29402: {'lr': 0.0004588877200377921, 'samples': 15054336, 'steps': 29402, 'loss/train': 1.8964338302612305} +03/05/2022 00:00:30 - INFO - codeparrot_training - Skipping example with length 590 (seq_length=1024) +03/05/2022 00:00:35 - INFO - codeparrot_training - Step 29403: {'lr': 0.000458884804396003, 'samples': 15054848, 'steps': 29403, 'loss/train': 2.4789299964904785} +03/05/2022 00:00:38 - INFO - codeparrot_training - Step 29404: {'lr': 0.0004588818886600935, 'samples': 15055360, 'steps': 29404, 'loss/train': 2.4294638633728027} +03/05/2022 00:00:39 - INFO - codeparrot_training - Skipping example with length 129 (seq_length=1024) +03/05/2022 00:00:43 - INFO - codeparrot_training - Step 29405: {'lr': 0.00045887897283006506, 'samples': 15055872, 'steps': 29405, 'loss/train': 1.401419997215271} +03/05/2022 00:00:46 - INFO - codeparrot_training - Step 29406: {'lr': 0.00045887605690591904, 'samples': 15056384, 'steps': 29406, 'loss/train': 1.2634106874465942} +03/05/2022 00:00:48 - INFO - codeparrot_training - Skipping example with length 400 (seq_length=1024) +03/05/2022 00:00:51 - INFO - codeparrot_training - Step 29407: {'lr': 0.0004588731408876566, 'samples': 15056896, 'steps': 29407, 'loss/train': 2.0198323726654053} +03/05/2022 00:00:55 - INFO - codeparrot_training - Step 29408: {'lr': 0.00045887022477527923, 'samples': 15057408, 'steps': 29408, 'loss/train': 1.6822476387023926} +03/05/2022 00:00:56 - INFO - codeparrot_training - Skipping example with length 644 (seq_length=1024) +03/05/2022 00:01:00 - INFO - codeparrot_training - Step 29409: {'lr': 0.0004588673085687881, 'samples': 15057920, 'steps': 29409, 'loss/train': 1.9611732959747314} +03/05/2022 00:01:03 - INFO - codeparrot_training - Step 29410: {'lr': 0.00045886439226818464, 'samples': 15058432, 'steps': 29410, 'loss/train': 1.950826644897461} +03/05/2022 00:01:04 - INFO - codeparrot_training - Skipping example with length 757 (seq_length=1024) +03/05/2022 00:01:08 - INFO - codeparrot_training - Step 29411: {'lr': 0.0004588614758734701, 'samples': 15058944, 'steps': 29411, 'loss/train': 2.281588315963745} +03/05/2022 00:01:11 - INFO - codeparrot_training - Step 29412: {'lr': 0.0004588585593846458, 'samples': 15059456, 'steps': 29412, 'loss/train': 1.2941282987594604} +03/05/2022 00:01:13 - INFO - codeparrot_training - Skipping example with length 710 (seq_length=1024) +03/05/2022 00:01:17 - INFO - codeparrot_training - Step 29413: {'lr': 0.000458855642801713, 'samples': 15059968, 'steps': 29413, 'loss/train': 1.6963658332824707} +03/05/2022 00:01:20 - INFO - codeparrot_training - Step 29414: {'lr': 0.00045885272612467313, 'samples': 15060480, 'steps': 29414, 'loss/train': 2.2268877029418945} +03/05/2022 00:01:21 - INFO - codeparrot_training - Skipping example with length 471 (seq_length=1024) +03/05/2022 00:01:25 - INFO - codeparrot_training - Step 29415: {'lr': 0.0004588498093535274, 'samples': 15060992, 'steps': 29415, 'loss/train': 2.108954906463623} +03/05/2022 00:01:28 - INFO - codeparrot_training - Step 29416: {'lr': 0.0004588468924882772, 'samples': 15061504, 'steps': 29416, 'loss/train': 1.09861159324646} +03/05/2022 00:01:29 - INFO - codeparrot_training - Skipping example with length 69 (seq_length=1024) +03/05/2022 00:01:34 - INFO - codeparrot_training - Step 29417: {'lr': 0.0004588439755289238, 'samples': 15062016, 'steps': 29417, 'loss/train': 1.7276639938354492} +03/05/2022 00:01:37 - INFO - codeparrot_training - Step 29418: {'lr': 0.00045884105847546853, 'samples': 15062528, 'steps': 29418, 'loss/train': 1.8615974187850952} +03/05/2022 00:01:37 - INFO - codeparrot_training - Skipping example with length 170 (seq_length=1024) +03/05/2022 00:01:42 - INFO - codeparrot_training - Step 29419: {'lr': 0.00045883814132791274, 'samples': 15063040, 'steps': 29419, 'loss/train': 1.5378037691116333} +03/05/2022 00:01:45 - INFO - codeparrot_training - Step 29420: {'lr': 0.0004588352240862577, 'samples': 15063552, 'steps': 29420, 'loss/train': 1.5039952993392944} +03/05/2022 00:01:46 - INFO - codeparrot_training - Skipping example with length 703 (seq_length=1024) +03/05/2022 00:01:50 - INFO - codeparrot_training - Step 29421: {'lr': 0.0004588323067505047, 'samples': 15064064, 'steps': 29421, 'loss/train': 1.62093186378479} +03/05/2022 00:01:53 - INFO - codeparrot_training - Step 29422: {'lr': 0.00045882938932065504, 'samples': 15064576, 'steps': 29422, 'loss/train': 1.1943272352218628} +03/05/2022 00:01:54 - INFO - codeparrot_training - Skipping example with length 495 (seq_length=1024) +03/05/2022 00:01:59 - INFO - codeparrot_training - Step 29423: {'lr': 0.0004588264717967101, 'samples': 15065088, 'steps': 29423, 'loss/train': 1.4881778955459595} +03/05/2022 00:02:02 - INFO - codeparrot_training - Step 29424: {'lr': 0.00045882355417867124, 'samples': 15065600, 'steps': 29424, 'loss/train': 1.2962485551834106} +03/05/2022 00:02:02 - INFO - codeparrot_training - Skipping example with length 880 (seq_length=1024) +03/05/2022 00:02:07 - INFO - codeparrot_training - Step 29425: {'lr': 0.00045882063646653966, 'samples': 15066112, 'steps': 29425, 'loss/train': 1.5608347654342651} +03/05/2022 00:02:10 - INFO - codeparrot_training - Skipping example with length 425 (seq_length=1024) +03/05/2022 00:02:12 - INFO - codeparrot_training - Step 29426: {'lr': 0.00045881771866031673, 'samples': 15066624, 'steps': 29426, 'loss/train': 1.1243586540222168} +03/05/2022 00:02:16 - INFO - codeparrot_training - Step 29427: {'lr': 0.00045881480076000376, 'samples': 15067136, 'steps': 29427, 'loss/train': 2.1950507164001465} +03/05/2022 00:02:18 - INFO - codeparrot_training - Skipping example with length 926 (seq_length=1024) +03/05/2022 00:02:21 - INFO - codeparrot_training - Step 29428: {'lr': 0.00045881188276560204, 'samples': 15067648, 'steps': 29428, 'loss/train': 1.659379243850708} +03/05/2022 00:02:24 - INFO - codeparrot_training - Step 29429: {'lr': 0.000458808964677113, 'samples': 15068160, 'steps': 29429, 'loss/train': 2.2346880435943604} +03/05/2022 00:02:26 - INFO - codeparrot_training - Skipping example with length 28 (seq_length=1024) +03/05/2022 00:02:29 - INFO - codeparrot_training - Step 29430: {'lr': 0.00045880604649453774, 'samples': 15068672, 'steps': 29430, 'loss/train': 1.9851168394088745} +03/05/2022 00:02:32 - INFO - codeparrot_training - Step 29431: {'lr': 0.00045880312821787775, 'samples': 15069184, 'steps': 29431, 'loss/train': 1.529115080833435} +03/05/2022 00:02:34 - INFO - codeparrot_training - Skipping example with length 723 (seq_length=1024) +03/05/2022 00:02:37 - INFO - codeparrot_training - Step 29432: {'lr': 0.00045880020984713434, 'samples': 15069696, 'steps': 29432, 'loss/train': 1.0846222639083862} +03/05/2022 00:02:41 - INFO - codeparrot_training - Step 29433: {'lr': 0.0004587972913823087, 'samples': 15070208, 'steps': 29433, 'loss/train': 1.2217762470245361} +03/05/2022 00:02:43 - INFO - codeparrot_training - Skipping example with length 417 (seq_length=1024) +03/05/2022 00:02:46 - INFO - codeparrot_training - Step 29434: {'lr': 0.00045879437282340225, 'samples': 15070720, 'steps': 29434, 'loss/train': 3.139949083328247} +03/05/2022 00:02:49 - INFO - codeparrot_training - Step 29435: {'lr': 0.00045879145417041623, 'samples': 15071232, 'steps': 29435, 'loss/train': 1.553557276725769} +03/05/2022 00:02:51 - INFO - codeparrot_training - Skipping example with length 577 (seq_length=1024) +03/05/2022 00:02:54 - INFO - codeparrot_training - Step 29436: {'lr': 0.0004587885354233521, 'samples': 15071744, 'steps': 29436, 'loss/train': 2.011582374572754} +03/05/2022 00:02:58 - INFO - codeparrot_training - Step 29437: {'lr': 0.0004587856165822111, 'samples': 15072256, 'steps': 29437, 'loss/train': 1.313247799873352} +03/05/2022 00:02:59 - INFO - codeparrot_training - Skipping example with length 361 (seq_length=1024) +03/05/2022 00:03:03 - INFO - codeparrot_training - Step 29438: {'lr': 0.0004587826976469944, 'samples': 15072768, 'steps': 29438, 'loss/train': 1.4834846258163452} +03/05/2022 00:03:06 - INFO - codeparrot_training - Step 29439: {'lr': 0.0004587797786177035, 'samples': 15073280, 'steps': 29439, 'loss/train': 1.4459228515625} +03/05/2022 00:03:08 - INFO - codeparrot_training - Skipping example with length 563 (seq_length=1024) +03/05/2022 00:03:11 - INFO - codeparrot_training - Step 29440: {'lr': 0.0004587768594943396, 'samples': 15073792, 'steps': 29440, 'loss/train': 1.311254620552063} +03/05/2022 00:03:14 - INFO - codeparrot_training - Step 29441: {'lr': 0.00045877394027690413, 'samples': 15074304, 'steps': 29441, 'loss/train': 2.522144317626953} +03/05/2022 00:03:16 - INFO - codeparrot_training - Skipping example with length 419 (seq_length=1024) +03/05/2022 00:03:20 - INFO - codeparrot_training - Step 29442: {'lr': 0.0004587710209653984, 'samples': 15074816, 'steps': 29442, 'loss/train': 1.4157636165618896} +03/05/2022 00:03:23 - INFO - codeparrot_training - Step 29443: {'lr': 0.0004587681015598235, 'samples': 15075328, 'steps': 29443, 'loss/train': 1.665614366531372} +03/05/2022 00:03:24 - INFO - codeparrot_training - Skipping example with length 644 (seq_length=1024) +03/05/2022 00:03:28 - INFO - codeparrot_training - Step 29444: {'lr': 0.00045876518206018103, 'samples': 15075840, 'steps': 29444, 'loss/train': 0.6438286304473877} +03/05/2022 00:03:31 - INFO - codeparrot_training - Step 29445: {'lr': 0.00045876226246647226, 'samples': 15076352, 'steps': 29445, 'loss/train': 1.809487223625183} +03/05/2022 00:03:33 - INFO - codeparrot_training - Skipping example with length 617 (seq_length=1024) +03/05/2022 00:03:37 - INFO - codeparrot_training - Step 29446: {'lr': 0.0004587593427786983, 'samples': 15076864, 'steps': 29446, 'loss/train': 1.5561292171478271} +03/05/2022 00:03:40 - INFO - codeparrot_training - Step 29447: {'lr': 0.0004587564229968606, 'samples': 15077376, 'steps': 29447, 'loss/train': 1.9052890539169312} +03/05/2022 00:03:41 - INFO - codeparrot_training - Skipping example with length 87 (seq_length=1024) +03/05/2022 00:03:45 - INFO - codeparrot_training - Step 29448: {'lr': 0.00045875350312096053, 'samples': 15077888, 'steps': 29448, 'loss/train': 0.11432677507400513} +03/05/2022 00:03:48 - INFO - codeparrot_training - Step 29449: {'lr': 0.0004587505831509994, 'samples': 15078400, 'steps': 29449, 'loss/train': 1.7650479078292847} +03/05/2022 00:03:49 - INFO - codeparrot_training - Skipping example with length 74 (seq_length=1024) +03/05/2022 00:03:53 - INFO - codeparrot_training - Step 29450: {'lr': 0.0004587476630869784, 'samples': 15078912, 'steps': 29450, 'loss/train': 2.2848503589630127} +03/05/2022 00:03:57 - INFO - codeparrot_training - Step 29451: {'lr': 0.000458744742928899, 'samples': 15079424, 'steps': 29451, 'loss/train': 0.7896596789360046} +03/05/2022 00:03:57 - INFO - codeparrot_training - Skipping example with length 42 (seq_length=1024) +03/05/2022 00:04:02 - INFO - codeparrot_training - Step 29452: {'lr': 0.00045874182267676236, 'samples': 15079936, 'steps': 29452, 'loss/train': 1.7087624073028564} +03/05/2022 00:04:05 - INFO - codeparrot_training - Step 29453: {'lr': 0.0004587389023305699, 'samples': 15080448, 'steps': 29453, 'loss/train': 0.23812542855739594} +03/05/2022 00:04:06 - INFO - codeparrot_training - Skipping example with length 515 (seq_length=1024) +03/05/2022 00:04:10 - INFO - codeparrot_training - Step 29454: {'lr': 0.00045873598189032295, 'samples': 15080960, 'steps': 29454, 'loss/train': 1.3694045543670654} +03/05/2022 00:04:13 - INFO - codeparrot_training - Step 29455: {'lr': 0.00045873306135602276, 'samples': 15081472, 'steps': 29455, 'loss/train': 1.7394646406173706} +03/05/2022 00:04:14 - INFO - codeparrot_training - Skipping example with length 1019 (seq_length=1024) +03/05/2022 00:04:19 - INFO - codeparrot_training - Step 29456: {'lr': 0.00045873014072767064, 'samples': 15081984, 'steps': 29456, 'loss/train': 1.5809504985809326} +03/05/2022 00:04:22 - INFO - codeparrot_training - Step 29457: {'lr': 0.000458727220005268, 'samples': 15082496, 'steps': 29457, 'loss/train': 1.938810110092163} +03/05/2022 00:04:23 - INFO - codeparrot_training - Skipping example with length 402 (seq_length=1024) +03/05/2022 00:04:27 - INFO - codeparrot_training - Step 29458: {'lr': 0.00045872429918881606, 'samples': 15083008, 'steps': 29458, 'loss/train': 0.20404241979122162} +03/05/2022 00:04:30 - INFO - codeparrot_training - Step 29459: {'lr': 0.00045872137827831616, 'samples': 15083520, 'steps': 29459, 'loss/train': 1.9023499488830566} +03/05/2022 00:04:31 - INFO - codeparrot_training - Skipping example with length 699 (seq_length=1024) +03/05/2022 00:04:36 - INFO - codeparrot_training - Step 29460: {'lr': 0.00045871845727376973, 'samples': 15084032, 'steps': 29460, 'loss/train': 1.2742539644241333} +03/05/2022 00:04:39 - INFO - codeparrot_training - Step 29461: {'lr': 0.0004587155361751778, 'samples': 15084544, 'steps': 29461, 'loss/train': 1.5251022577285767} +03/05/2022 00:04:41 - INFO - codeparrot_training - Skipping example with length 523 (seq_length=1024) +03/05/2022 00:04:44 - INFO - codeparrot_training - Step 29462: {'lr': 0.000458712614982542, 'samples': 15085056, 'steps': 29462, 'loss/train': 1.9927170276641846} +03/05/2022 00:04:47 - INFO - codeparrot_training - Step 29463: {'lr': 0.00045870969369586346, 'samples': 15085568, 'steps': 29463, 'loss/train': 1.6972904205322266} +03/05/2022 00:04:49 - INFO - codeparrot_training - Skipping example with length 760 (seq_length=1024) +03/05/2022 00:04:53 - INFO - codeparrot_training - Step 29464: {'lr': 0.00045870677231514356, 'samples': 15086080, 'steps': 29464, 'loss/train': 1.598105549812317} +03/05/2022 00:04:56 - INFO - codeparrot_training - Step 29465: {'lr': 0.0004587038508403837, 'samples': 15086592, 'steps': 29465, 'loss/train': 1.0332462787628174} +03/05/2022 00:04:57 - INFO - codeparrot_training - Skipping example with length 933 (seq_length=1024) +03/05/2022 00:05:01 - INFO - codeparrot_training - Step 29466: {'lr': 0.000458700929271585, 'samples': 15087104, 'steps': 29466, 'loss/train': 0.1613921970129013} +03/05/2022 00:05:05 - INFO - codeparrot_training - Step 29467: {'lr': 0.0004586980076087489, 'samples': 15087616, 'steps': 29467, 'loss/train': 1.0321905612945557} +03/05/2022 00:05:06 - INFO - codeparrot_training - Skipping example with length 287 (seq_length=1024) +03/05/2022 00:05:10 - INFO - codeparrot_training - Step 29468: {'lr': 0.0004586950858518767, 'samples': 15088128, 'steps': 29468, 'loss/train': 1.592992901802063} +03/05/2022 00:05:13 - INFO - codeparrot_training - Step 29469: {'lr': 0.0004586921640009697, 'samples': 15088640, 'steps': 29469, 'loss/train': 1.910004734992981} +03/05/2022 00:05:15 - INFO - codeparrot_training - Skipping example with length 279 (seq_length=1024) +03/05/2022 00:05:18 - INFO - codeparrot_training - Step 29470: {'lr': 0.0004586892420560294, 'samples': 15089152, 'steps': 29470, 'loss/train': 1.3640133142471313} +03/05/2022 00:05:21 - INFO - codeparrot_training - Step 29471: {'lr': 0.0004586863200170567, 'samples': 15089664, 'steps': 29471, 'loss/train': 1.0251942873001099} +03/05/2022 00:05:24 - INFO - codeparrot_training - Skipping example with length 215 (seq_length=1024) +03/05/2022 00:05:27 - INFO - codeparrot_training - Step 29472: {'lr': 0.00045868339788405333, 'samples': 15090176, 'steps': 29472, 'loss/train': 1.586605429649353} +03/05/2022 00:05:30 - INFO - codeparrot_training - Step 29473: {'lr': 0.0004586804756570204, 'samples': 15090688, 'steps': 29473, 'loss/train': 1.9300031661987305} +03/05/2022 00:05:32 - INFO - codeparrot_training - Skipping example with length 596 (seq_length=1024) +03/05/2022 00:05:35 - INFO - codeparrot_training - Step 29474: {'lr': 0.0004586775533359592, 'samples': 15091200, 'steps': 29474, 'loss/train': 2.6338372230529785} +03/05/2022 00:05:39 - INFO - codeparrot_training - Step 29475: {'lr': 0.00045867463092087116, 'samples': 15091712, 'steps': 29475, 'loss/train': 1.684784173965454} +03/05/2022 00:05:41 - INFO - codeparrot_training - Skipping example with length 825 (seq_length=1024) +03/05/2022 00:05:44 - INFO - codeparrot_training - Step 29476: {'lr': 0.00045867170841175755, 'samples': 15092224, 'steps': 29476, 'loss/train': 1.5876818895339966} +03/05/2022 00:05:47 - INFO - codeparrot_training - Step 29477: {'lr': 0.0004586687858086197, 'samples': 15092736, 'steps': 29477, 'loss/train': 1.8086566925048828} +03/05/2022 00:05:49 - INFO - codeparrot_training - Skipping example with length 458 (seq_length=1024) +03/05/2022 00:05:52 - INFO - codeparrot_training - Step 29478: {'lr': 0.0004586658631114589, 'samples': 15093248, 'steps': 29478, 'loss/train': 2.555069923400879} +03/05/2022 00:05:55 - INFO - codeparrot_training - Step 29479: {'lr': 0.0004586629403202765, 'samples': 15093760, 'steps': 29479, 'loss/train': 1.3479344844818115} +03/05/2022 00:05:58 - INFO - codeparrot_training - Skipping example with length 608 (seq_length=1024) +03/05/2022 00:06:01 - INFO - codeparrot_training - Step 29480: {'lr': 0.0004586600174350738, 'samples': 15094272, 'steps': 29480, 'loss/train': 1.6688258647918701} +03/05/2022 00:06:04 - INFO - codeparrot_training - Step 29481: {'lr': 0.0004586570944558521, 'samples': 15094784, 'steps': 29481, 'loss/train': 2.1818978786468506} +03/05/2022 00:06:06 - INFO - codeparrot_training - Skipping example with length 974 (seq_length=1024) +03/05/2022 00:06:09 - INFO - codeparrot_training - Step 29482: {'lr': 0.00045865417138261276, 'samples': 15095296, 'steps': 29482, 'loss/train': 2.1142513751983643} +03/05/2022 00:06:12 - INFO - codeparrot_training - Step 29483: {'lr': 0.00045865124821535704, 'samples': 15095808, 'steps': 29483, 'loss/train': 2.4780969619750977} +03/05/2022 00:06:14 - INFO - codeparrot_training - Skipping example with length 280 (seq_length=1024) +03/05/2022 00:06:18 - INFO - codeparrot_training - Step 29484: {'lr': 0.00045864832495408624, 'samples': 15096320, 'steps': 29484, 'loss/train': 2.1094555854797363} +03/05/2022 00:06:21 - INFO - codeparrot_training - Step 29485: {'lr': 0.0004586454015988019, 'samples': 15096832, 'steps': 29485, 'loss/train': 1.250625491142273} +03/05/2022 00:06:22 - INFO - codeparrot_training - Skipping example with length 752 (seq_length=1024) +03/05/2022 00:06:26 - INFO - codeparrot_training - Step 29486: {'lr': 0.000458642478149505, 'samples': 15097344, 'steps': 29486, 'loss/train': 1.978638768196106} +03/05/2022 00:06:29 - INFO - codeparrot_training - Step 29487: {'lr': 0.00045863955460619707, 'samples': 15097856, 'steps': 29487, 'loss/train': 1.8101561069488525} +03/05/2022 00:06:31 - INFO - codeparrot_training - Skipping example with length 486 (seq_length=1024) +03/05/2022 00:06:34 - INFO - codeparrot_training - Step 29488: {'lr': 0.0004586366309688793, 'samples': 15098368, 'steps': 29488, 'loss/train': 0.8289637565612793} +03/05/2022 00:06:38 - INFO - codeparrot_training - Step 29489: {'lr': 0.00045863370723755315, 'samples': 15098880, 'steps': 29489, 'loss/train': 1.0590749979019165} +03/05/2022 00:06:39 - INFO - codeparrot_training - Skipping example with length 283 (seq_length=1024) +03/05/2022 00:06:43 - INFO - codeparrot_training - Step 29490: {'lr': 0.00045863078341221993, 'samples': 15099392, 'steps': 29490, 'loss/train': 0.19803790748119354} +03/05/2022 00:06:46 - INFO - codeparrot_training - Step 29491: {'lr': 0.0004586278594928808, 'samples': 15099904, 'steps': 29491, 'loss/train': 1.5104310512542725} +03/05/2022 00:06:48 - INFO - codeparrot_training - Skipping example with length 687 (seq_length=1024) +03/05/2022 00:06:51 - INFO - codeparrot_training - Step 29492: {'lr': 0.0004586249354795372, 'samples': 15100416, 'steps': 29492, 'loss/train': 1.437464714050293} +03/05/2022 00:06:54 - INFO - codeparrot_training - Step 29493: {'lr': 0.0004586220113721905, 'samples': 15100928, 'steps': 29493, 'loss/train': 2.1306979656219482} +03/05/2022 00:06:56 - INFO - codeparrot_training - Skipping example with length 710 (seq_length=1024) +03/05/2022 00:07:00 - INFO - codeparrot_training - Step 29494: {'lr': 0.0004586190871708419, 'samples': 15101440, 'steps': 29494, 'loss/train': 1.0925543308258057} +03/05/2022 00:07:03 - INFO - codeparrot_training - Step 29495: {'lr': 0.0004586161628754927, 'samples': 15101952, 'steps': 29495, 'loss/train': 1.2560229301452637} +03/05/2022 00:07:04 - INFO - codeparrot_training - Skipping example with length 280 (seq_length=1024) +03/05/2022 00:07:08 - INFO - codeparrot_training - Step 29496: {'lr': 0.0004586132384861443, 'samples': 15102464, 'steps': 29496, 'loss/train': 1.743109941482544} +03/05/2022 00:07:11 - INFO - codeparrot_training - Step 29497: {'lr': 0.000458610314002798, 'samples': 15102976, 'steps': 29497, 'loss/train': 2.0666310787200928} +03/05/2022 00:07:12 - INFO - codeparrot_training - Skipping example with length 586 (seq_length=1024) +03/05/2022 00:07:16 - INFO - codeparrot_training - Step 29498: {'lr': 0.0004586073894254551, 'samples': 15103488, 'steps': 29498, 'loss/train': 1.0570305585861206} +03/05/2022 00:07:20 - INFO - codeparrot_training - Step 29499: {'lr': 0.000458604464754117, 'samples': 15104000, 'steps': 29499, 'loss/train': 1.6065893173217773} +03/05/2022 00:07:21 - INFO - codeparrot_training - Skipping example with length 382 (seq_length=1024) +03/05/2022 00:07:25 - INFO - codeparrot_training - Step 29500: {'lr': 0.0004586015399887849, 'samples': 15104512, 'steps': 29500, 'loss/train': 1.0707597732543945} +03/05/2022 00:07:28 - INFO - codeparrot_training - Step 29501: {'lr': 0.0004585986151294602, 'samples': 15105024, 'steps': 29501, 'loss/train': 1.6413307189941406} +03/05/2022 00:07:29 - INFO - codeparrot_training - Skipping example with length 503 (seq_length=1024) +03/05/2022 00:07:33 - INFO - codeparrot_training - Step 29502: {'lr': 0.0004585956901761441, 'samples': 15105536, 'steps': 29502, 'loss/train': 1.4918277263641357} +03/05/2022 00:07:36 - INFO - codeparrot_training - Step 29503: {'lr': 0.00045859276512883807, 'samples': 15106048, 'steps': 29503, 'loss/train': 0.06658778339624405} +03/05/2022 00:07:37 - INFO - codeparrot_training - Skipping example with length 241 (seq_length=1024) +03/05/2022 00:07:42 - INFO - codeparrot_training - Step 29504: {'lr': 0.00045858983998754336, 'samples': 15106560, 'steps': 29504, 'loss/train': 1.9468345642089844} +03/05/2022 00:07:45 - INFO - codeparrot_training - Step 29505: {'lr': 0.0004585869147522612, 'samples': 15107072, 'steps': 29505, 'loss/train': 1.9459869861602783} +03/05/2022 00:07:46 - INFO - codeparrot_training - Skipping example with length 684 (seq_length=1024) +03/05/2022 00:07:50 - INFO - codeparrot_training - Step 29506: {'lr': 0.00045858398942299306, 'samples': 15107584, 'steps': 29506, 'loss/train': 1.6417884826660156} +03/05/2022 00:07:53 - INFO - codeparrot_training - Step 29507: {'lr': 0.0004585810639997402, 'samples': 15108096, 'steps': 29507, 'loss/train': 0.7079473733901978} +03/05/2022 00:07:54 - INFO - codeparrot_training - Skipping example with length 662 (seq_length=1024) +03/05/2022 00:07:59 - INFO - codeparrot_training - Step 29508: {'lr': 0.0004585781384825039, 'samples': 15108608, 'steps': 29508, 'loss/train': 2.4221251010894775} +03/05/2022 00:08:02 - INFO - codeparrot_training - Step 29509: {'lr': 0.00045857521287128556, 'samples': 15109120, 'steps': 29509, 'loss/train': 1.5184063911437988} +03/05/2022 00:08:02 - INFO - codeparrot_training - Skipping example with length 487 (seq_length=1024) +03/05/2022 00:08:07 - INFO - codeparrot_training - Step 29510: {'lr': 0.0004585722871660864, 'samples': 15109632, 'steps': 29510, 'loss/train': 1.5314116477966309} +03/05/2022 00:08:10 - INFO - codeparrot_training - Step 29511: {'lr': 0.0004585693613669078, 'samples': 15110144, 'steps': 29511, 'loss/train': 1.9491839408874512} +03/05/2022 00:08:11 - INFO - codeparrot_training - Skipping example with length 442 (seq_length=1024) +03/05/2022 00:08:15 - INFO - codeparrot_training - Step 29512: {'lr': 0.0004585664354737511, 'samples': 15110656, 'steps': 29512, 'loss/train': 1.3117051124572754} +03/05/2022 00:08:19 - INFO - codeparrot_training - Step 29513: {'lr': 0.0004585635094866175, 'samples': 15111168, 'steps': 29513, 'loss/train': 1.8874579668045044} +03/05/2022 00:08:19 - INFO - codeparrot_training - Skipping example with length 74 (seq_length=1024) +03/05/2022 00:08:24 - INFO - codeparrot_training - Step 29514: {'lr': 0.0004585605834055084, 'samples': 15111680, 'steps': 29514, 'loss/train': 1.1464364528656006} +03/05/2022 00:08:27 - INFO - codeparrot_training - Step 29515: {'lr': 0.00045855765723042526, 'samples': 15112192, 'steps': 29515, 'loss/train': 2.029849052429199} +03/05/2022 00:08:28 - INFO - codeparrot_training - Skipping example with length 496 (seq_length=1024) +03/05/2022 00:08:32 - INFO - codeparrot_training - Step 29516: {'lr': 0.00045855473096136914, 'samples': 15112704, 'steps': 29516, 'loss/train': 0.6989114284515381} +03/05/2022 00:08:35 - INFO - codeparrot_training - Step 29517: {'lr': 0.00045855180459834153, 'samples': 15113216, 'steps': 29517, 'loss/train': 1.8639788627624512} +03/05/2022 00:08:36 - INFO - codeparrot_training - Skipping example with length 694 (seq_length=1024) +03/05/2022 00:08:41 - INFO - codeparrot_training - Step 29518: {'lr': 0.0004585488781413437, 'samples': 15113728, 'steps': 29518, 'loss/train': 1.591664433479309} +03/05/2022 00:08:44 - INFO - codeparrot_training - Step 29519: {'lr': 0.00045854595159037695, 'samples': 15114240, 'steps': 29519, 'loss/train': 1.4009218215942383} +03/05/2022 00:08:44 - INFO - codeparrot_training - Skipping example with length 383 (seq_length=1024) +03/05/2022 00:08:49 - INFO - codeparrot_training - Step 29520: {'lr': 0.0004585430249454425, 'samples': 15114752, 'steps': 29520, 'loss/train': 1.8163731098175049} +03/05/2022 00:08:52 - INFO - codeparrot_training - Step 29521: {'lr': 0.000458540098206542, 'samples': 15115264, 'steps': 29521, 'loss/train': 1.7235661745071411} +03/05/2022 00:08:53 - INFO - codeparrot_training - Skipping example with length 865 (seq_length=1024) +03/05/2022 00:08:58 - INFO - codeparrot_training - Step 29522: {'lr': 0.00045853717137367634, 'samples': 15115776, 'steps': 29522, 'loss/train': 1.4032801389694214} +03/05/2022 00:09:01 - INFO - codeparrot_training - Step 29523: {'lr': 0.0004585342444468471, 'samples': 15116288, 'steps': 29523, 'loss/train': 1.9199674129486084} +03/05/2022 00:09:01 - INFO - codeparrot_training - Skipping example with length 273 (seq_length=1024) +03/05/2022 00:09:06 - INFO - codeparrot_training - Step 29524: {'lr': 0.00045853131742605563, 'samples': 15116800, 'steps': 29524, 'loss/train': 0.11558540165424347} +03/05/2022 00:09:09 - INFO - codeparrot_training - Step 29525: {'lr': 0.0004585283903113031, 'samples': 15117312, 'steps': 29525, 'loss/train': 2.0342044830322266} +03/05/2022 00:09:14 - INFO - codeparrot_training - Step 29526: {'lr': 0.00045852546310259093, 'samples': 15117824, 'steps': 29526, 'loss/train': 1.684152364730835} +03/05/2022 00:09:18 - INFO - codeparrot_training - Step 29527: {'lr': 0.00045852253579992043, 'samples': 15118336, 'steps': 29527, 'loss/train': 0.712755024433136} +03/05/2022 00:09:18 - INFO - codeparrot_training - Skipping example with length 583 (seq_length=1024) +03/05/2022 00:09:23 - INFO - codeparrot_training - Step 29528: {'lr': 0.0004585196084032928, 'samples': 15118848, 'steps': 29528, 'loss/train': 2.1710665225982666} +03/05/2022 00:09:26 - INFO - codeparrot_training - Step 29529: {'lr': 0.0004585166809127095, 'samples': 15119360, 'steps': 29529, 'loss/train': 1.670620322227478} +03/05/2022 00:09:26 - INFO - codeparrot_training - Skipping example with length 98 (seq_length=1024) +03/05/2022 00:09:31 - INFO - codeparrot_training - Step 29530: {'lr': 0.0004585137533281718, 'samples': 15119872, 'steps': 29530, 'loss/train': 0.8960245251655579} +03/05/2022 00:09:34 - INFO - codeparrot_training - Step 29531: {'lr': 0.00045851082564968103, 'samples': 15120384, 'steps': 29531, 'loss/train': 1.4882266521453857} +03/05/2022 00:09:34 - INFO - codeparrot_training - Skipping example with length 44 (seq_length=1024) +03/05/2022 00:09:40 - INFO - codeparrot_training - Step 29532: {'lr': 0.0004585078978772385, 'samples': 15120896, 'steps': 29532, 'loss/train': 1.1559669971466064} +03/05/2022 00:09:43 - INFO - codeparrot_training - Step 29533: {'lr': 0.0004585049700108455, 'samples': 15121408, 'steps': 29533, 'loss/train': 2.140377998352051} +03/05/2022 00:09:43 - INFO - codeparrot_training - Skipping example with length 510 (seq_length=1024) +03/05/2022 00:09:48 - INFO - codeparrot_training - Step 29534: {'lr': 0.00045850204205050344, 'samples': 15121920, 'steps': 29534, 'loss/train': 2.095122814178467} +03/05/2022 00:09:51 - INFO - codeparrot_training - Skipping example with length 369 (seq_length=1024) +03/05/2022 00:09:54 - INFO - codeparrot_training - Step 29535: {'lr': 0.0004584991139962135, 'samples': 15122432, 'steps': 29535, 'loss/train': 1.1537307500839233} +03/05/2022 00:09:57 - INFO - codeparrot_training - Step 29536: {'lr': 0.00045849618584797717, 'samples': 15122944, 'steps': 29536, 'loss/train': 1.6044560670852661} +03/05/2022 00:10:00 - INFO - codeparrot_training - Skipping example with length 368 (seq_length=1024) +03/05/2022 00:10:02 - INFO - codeparrot_training - Step 29537: {'lr': 0.0004584932576057956, 'samples': 15123456, 'steps': 29537, 'loss/train': 2.026948928833008} +03/05/2022 00:10:05 - INFO - codeparrot_training - Step 29538: {'lr': 0.00045849032926967016, 'samples': 15123968, 'steps': 29538, 'loss/train': 1.543022871017456} +03/05/2022 00:10:08 - INFO - codeparrot_training - Step 29539: {'lr': 0.0004584874008396023, 'samples': 15124480, 'steps': 29539, 'loss/train': 0.10876262187957764} +03/05/2022 00:10:08 - INFO - codeparrot_training - Skipping example with length 859 (seq_length=1024) +03/05/2022 00:10:14 - INFO - codeparrot_training - Step 29540: {'lr': 0.00045848447231559315, 'samples': 15124992, 'steps': 29540, 'loss/train': 4.418543815612793} +03/05/2022 00:10:17 - INFO - codeparrot_training - Step 29541: {'lr': 0.00045848154369764415, 'samples': 15125504, 'steps': 29541, 'loss/train': 1.3366106748580933} +03/05/2022 00:10:17 - INFO - codeparrot_training - Skipping example with length 344 (seq_length=1024) +03/05/2022 00:10:22 - INFO - codeparrot_training - Step 29542: {'lr': 0.0004584786149857566, 'samples': 15126016, 'steps': 29542, 'loss/train': 2.273329734802246} +03/05/2022 00:10:25 - INFO - codeparrot_training - Skipping example with length 924 (seq_length=1024) +03/05/2022 00:10:27 - INFO - codeparrot_training - Step 29543: {'lr': 0.00045847568617993174, 'samples': 15126528, 'steps': 29543, 'loss/train': 0.7684382796287537} +03/05/2022 00:10:31 - INFO - codeparrot_training - Step 29544: {'lr': 0.000458472757280171, 'samples': 15127040, 'steps': 29544, 'loss/train': 2.2687673568725586} +03/05/2022 00:10:33 - INFO - codeparrot_training - Skipping example with length 453 (seq_length=1024) +03/05/2022 00:10:36 - INFO - codeparrot_training - Step 29545: {'lr': 0.0004584698282864757, 'samples': 15127552, 'steps': 29545, 'loss/train': 1.3599854707717896} +03/05/2022 00:10:39 - INFO - codeparrot_training - Step 29546: {'lr': 0.000458466899198847, 'samples': 15128064, 'steps': 29546, 'loss/train': 1.8748090267181396} +03/05/2022 00:10:43 - INFO - codeparrot_training - Step 29547: {'lr': 0.0004584639700172863, 'samples': 15128576, 'steps': 29547, 'loss/train': 2.043598175048828} +03/05/2022 00:10:44 - INFO - codeparrot_training - Skipping example with length 929 (seq_length=1024) +03/05/2022 00:10:48 - INFO - codeparrot_training - Step 29548: {'lr': 0.00045846104074179504, 'samples': 15129088, 'steps': 29548, 'loss/train': 2.306469678878784} +03/05/2022 00:10:51 - INFO - codeparrot_training - Step 29549: {'lr': 0.00045845811137237445, 'samples': 15129600, 'steps': 29549, 'loss/train': 0.9882426261901855} +03/05/2022 00:10:52 - INFO - codeparrot_training - Skipping example with length 327 (seq_length=1024) +03/05/2022 00:10:56 - INFO - codeparrot_training - Step 29550: {'lr': 0.0004584551819090259, 'samples': 15130112, 'steps': 29550, 'loss/train': 1.8231916427612305} +03/05/2022 00:10:59 - INFO - codeparrot_training - Step 29551: {'lr': 0.0004584522523517506, 'samples': 15130624, 'steps': 29551, 'loss/train': 1.5368438959121704} +03/05/2022 00:11:00 - INFO - codeparrot_training - Skipping example with length 23 (seq_length=1024) +03/05/2022 00:11:04 - INFO - codeparrot_training - Step 29552: {'lr': 0.00045844932270054997, 'samples': 15131136, 'steps': 29552, 'loss/train': 1.6573116779327393} +03/05/2022 00:11:08 - INFO - codeparrot_training - Step 29553: {'lr': 0.00045844639295542525, 'samples': 15131648, 'steps': 29553, 'loss/train': 1.160616159439087} +03/05/2022 00:11:09 - INFO - codeparrot_training - Skipping example with length 321 (seq_length=1024) +03/05/2022 00:11:13 - INFO - codeparrot_training - Step 29554: {'lr': 0.0004584434631163779, 'samples': 15132160, 'steps': 29554, 'loss/train': 1.9815462827682495} +03/05/2022 00:11:16 - INFO - codeparrot_training - Step 29555: {'lr': 0.000458440533183409, 'samples': 15132672, 'steps': 29555, 'loss/train': 2.0560553073883057} +03/05/2022 00:11:17 - INFO - codeparrot_training - Skipping example with length 746 (seq_length=1024) +03/05/2022 00:11:22 - INFO - codeparrot_training - Step 29556: {'lr': 0.0004584376031565201, 'samples': 15133184, 'steps': 29556, 'loss/train': 1.6338471174240112} +03/05/2022 00:11:25 - INFO - codeparrot_training - Step 29557: {'lr': 0.0004584346730357124, 'samples': 15133696, 'steps': 29557, 'loss/train': 0.8693658709526062} +03/05/2022 00:11:26 - INFO - codeparrot_training - Skipping example with length 201 (seq_length=1024) +03/05/2022 00:11:30 - INFO - codeparrot_training - Step 29558: {'lr': 0.0004584317428209872, 'samples': 15134208, 'steps': 29558, 'loss/train': 1.6076276302337646} +03/05/2022 00:11:33 - INFO - codeparrot_training - Step 29559: {'lr': 0.0004584288125123459, 'samples': 15134720, 'steps': 29559, 'loss/train': 1.7829666137695312} +03/05/2022 00:11:34 - INFO - codeparrot_training - Skipping example with length 248 (seq_length=1024) +03/05/2022 00:11:38 - INFO - codeparrot_training - Step 29560: {'lr': 0.0004584258821097899, 'samples': 15135232, 'steps': 29560, 'loss/train': 1.1429840326309204} +03/05/2022 00:11:42 - INFO - codeparrot_training - Step 29561: {'lr': 0.0004584229516133203, 'samples': 15135744, 'steps': 29561, 'loss/train': 1.9897711277008057} +03/05/2022 00:11:43 - INFO - codeparrot_training - Skipping example with length 830 (seq_length=1024) +03/05/2022 00:11:47 - INFO - codeparrot_training - Step 29562: {'lr': 0.00045842002102293856, 'samples': 15136256, 'steps': 29562, 'loss/train': 0.9868795871734619} +03/05/2022 00:11:50 - INFO - codeparrot_training - Step 29563: {'lr': 0.000458417090338646, 'samples': 15136768, 'steps': 29563, 'loss/train': 2.8524515628814697} +03/05/2022 00:11:51 - INFO - codeparrot_training - Skipping example with length 337 (seq_length=1024) +03/05/2022 00:11:55 - INFO - codeparrot_training - Step 29564: {'lr': 0.00045841415956044394, 'samples': 15137280, 'steps': 29564, 'loss/train': 1.781243085861206} +03/05/2022 00:11:59 - INFO - codeparrot_training - Step 29565: {'lr': 0.0004584112286883336, 'samples': 15137792, 'steps': 29565, 'loss/train': 1.0656503438949585} +03/05/2022 00:11:59 - INFO - codeparrot_training - Skipping example with length 576 (seq_length=1024) +03/05/2022 00:12:04 - INFO - codeparrot_training - Step 29566: {'lr': 0.0004584082977223164, 'samples': 15138304, 'steps': 29566, 'loss/train': 1.3876230716705322} +03/05/2022 00:12:07 - INFO - codeparrot_training - Step 29567: {'lr': 0.0004584053666623937, 'samples': 15138816, 'steps': 29567, 'loss/train': 1.4978874921798706} +03/05/2022 00:12:08 - INFO - codeparrot_training - Skipping example with length 710 (seq_length=1024) +03/05/2022 00:12:12 - INFO - codeparrot_training - Step 29568: {'lr': 0.00045840243550856666, 'samples': 15139328, 'steps': 29568, 'loss/train': 1.9485430717468262} +03/05/2022 00:12:15 - INFO - codeparrot_training - Step 29569: {'lr': 0.00045839950426083677, 'samples': 15139840, 'steps': 29569, 'loss/train': 2.9539616107940674} +03/05/2022 00:12:16 - INFO - codeparrot_training - Skipping example with length 618 (seq_length=1024) +03/05/2022 00:12:21 - INFO - codeparrot_training - Step 29570: {'lr': 0.0004583965729192052, 'samples': 15140352, 'steps': 29570, 'loss/train': 1.068267822265625} +03/05/2022 00:12:24 - INFO - codeparrot_training - Step 29571: {'lr': 0.00045839364148367345, 'samples': 15140864, 'steps': 29571, 'loss/train': 1.5929888486862183} +03/05/2022 00:12:25 - INFO - codeparrot_training - Skipping example with length 469 (seq_length=1024) +03/05/2022 00:12:29 - INFO - codeparrot_training - Step 29572: {'lr': 0.00045839070995424273, 'samples': 15141376, 'steps': 29572, 'loss/train': 0.6068469285964966} +03/05/2022 00:12:32 - INFO - codeparrot_training - Step 29573: {'lr': 0.00045838777833091425, 'samples': 15141888, 'steps': 29573, 'loss/train': 2.285215377807617} +03/05/2022 00:12:33 - INFO - codeparrot_training - Skipping example with length 503 (seq_length=1024) +03/05/2022 00:12:37 - INFO - codeparrot_training - Step 29574: {'lr': 0.00045838484661368963, 'samples': 15142400, 'steps': 29574, 'loss/train': 1.169742465019226} +03/05/2022 00:12:41 - INFO - codeparrot_training - Step 29575: {'lr': 0.00045838191480256985, 'samples': 15142912, 'steps': 29575, 'loss/train': 2.101759910583496} +03/05/2022 00:12:42 - INFO - codeparrot_training - Skipping example with length 215 (seq_length=1024) +03/05/2022 00:12:46 - INFO - codeparrot_training - Step 29576: {'lr': 0.00045837898289755654, 'samples': 15143424, 'steps': 29576, 'loss/train': 1.7290599346160889} +03/05/2022 00:12:49 - INFO - codeparrot_training - Step 29577: {'lr': 0.0004583760508986508, 'samples': 15143936, 'steps': 29577, 'loss/train': 2.44104266166687} +03/05/2022 00:12:50 - INFO - codeparrot_training - Skipping example with length 604 (seq_length=1024) +03/05/2022 00:12:54 - INFO - codeparrot_training - Step 29578: {'lr': 0.000458373118805854, 'samples': 15144448, 'steps': 29578, 'loss/train': 2.2922070026397705} +03/05/2022 00:12:58 - INFO - codeparrot_training - Step 29579: {'lr': 0.00045837018661916754, 'samples': 15144960, 'steps': 29579, 'loss/train': 2.013897657394409} +03/05/2022 00:12:58 - INFO - codeparrot_training - Skipping example with length 966 (seq_length=1024) +03/05/2022 00:13:03 - INFO - codeparrot_training - Step 29580: {'lr': 0.00045836725433859266, 'samples': 15145472, 'steps': 29580, 'loss/train': 0.7683368921279907} +03/05/2022 00:13:06 - INFO - codeparrot_training - Step 29581: {'lr': 0.0004583643219641307, 'samples': 15145984, 'steps': 29581, 'loss/train': 1.45555579662323} +03/05/2022 00:13:06 - INFO - codeparrot_training - Skipping example with length 182 (seq_length=1024) +03/05/2022 00:13:12 - INFO - codeparrot_training - Step 29582: {'lr': 0.00045836138949578297, 'samples': 15146496, 'steps': 29582, 'loss/train': 1.0350154638290405} +03/05/2022 00:13:15 - INFO - codeparrot_training - Step 29583: {'lr': 0.00045835845693355096, 'samples': 15147008, 'steps': 29583, 'loss/train': 1.3677806854248047} +03/05/2022 00:13:16 - INFO - codeparrot_training - Skipping example with length 945 (seq_length=1024) +03/05/2022 00:13:20 - INFO - codeparrot_training - Step 29584: {'lr': 0.00045835552427743567, 'samples': 15147520, 'steps': 29584, 'loss/train': 1.9622410535812378} +03/05/2022 00:13:23 - INFO - codeparrot_training - Step 29585: {'lr': 0.00045835259152743866, 'samples': 15148032, 'steps': 29585, 'loss/train': 0.5172232389450073} +03/05/2022 00:13:25 - INFO - codeparrot_training - Skipping example with length 259 (seq_length=1024) +03/05/2022 00:13:28 - INFO - codeparrot_training - Step 29586: {'lr': 0.0004583496586835612, 'samples': 15148544, 'steps': 29586, 'loss/train': 1.5638020038604736} +03/05/2022 00:13:32 - INFO - codeparrot_training - Step 29587: {'lr': 0.0004583467257458046, 'samples': 15149056, 'steps': 29587, 'loss/train': 2.2973859310150146} +03/05/2022 00:13:33 - INFO - codeparrot_training - Skipping example with length 263 (seq_length=1024) +03/05/2022 00:13:37 - INFO - codeparrot_training - Step 29588: {'lr': 0.00045834379271417013, 'samples': 15149568, 'steps': 29588, 'loss/train': 1.674185872077942} +03/05/2022 00:13:40 - INFO - codeparrot_training - Step 29589: {'lr': 0.0004583408595886592, 'samples': 15150080, 'steps': 29589, 'loss/train': 2.2917025089263916} +03/05/2022 00:13:42 - INFO - codeparrot_training - Skipping example with length 891 (seq_length=1024) +03/05/2022 00:13:45 - INFO - codeparrot_training - Step 29590: {'lr': 0.0004583379263692732, 'samples': 15150592, 'steps': 29590, 'loss/train': 1.8480969667434692} +03/05/2022 00:13:48 - INFO - codeparrot_training - Step 29591: {'lr': 0.0004583349930560132, 'samples': 15151104, 'steps': 29591, 'loss/train': 1.8870465755462646} +03/05/2022 00:13:50 - INFO - codeparrot_training - Skipping example with length 518 (seq_length=1024) +03/05/2022 00:13:54 - INFO - codeparrot_training - Step 29592: {'lr': 0.0004583320596488807, 'samples': 15151616, 'steps': 29592, 'loss/train': 1.9500160217285156} +03/05/2022 00:13:57 - INFO - codeparrot_training - Step 29593: {'lr': 0.000458329126147877, 'samples': 15152128, 'steps': 29593, 'loss/train': 1.9467270374298096} +03/05/2022 00:14:00 - INFO - codeparrot_training - Skipping example with length 352 (seq_length=1024) +03/05/2022 00:14:03 - INFO - codeparrot_training - Step 29594: {'lr': 0.00045832619255300344, 'samples': 15152640, 'steps': 29594, 'loss/train': 0.7993493676185608} +03/05/2022 00:14:06 - INFO - codeparrot_training - Step 29595: {'lr': 0.00045832325886426125, 'samples': 15153152, 'steps': 29595, 'loss/train': 1.691361427307129} +03/05/2022 00:14:08 - INFO - codeparrot_training - Skipping example with length 352 (seq_length=1024) +03/05/2022 00:14:11 - INFO - codeparrot_training - Step 29596: {'lr': 0.0004583203250816518, 'samples': 15153664, 'steps': 29596, 'loss/train': 2.2305729389190674} +03/05/2022 00:14:14 - INFO - codeparrot_training - Step 29597: {'lr': 0.0004583173912051765, 'samples': 15154176, 'steps': 29597, 'loss/train': 2.181438684463501} +03/05/2022 00:14:16 - INFO - codeparrot_training - Skipping example with length 733 (seq_length=1024) +03/05/2022 00:14:19 - INFO - codeparrot_training - Step 29598: {'lr': 0.00045831445723483656, 'samples': 15154688, 'steps': 29598, 'loss/train': 1.5685683488845825} +03/05/2022 00:14:23 - INFO - codeparrot_training - Step 29599: {'lr': 0.0004583115231706334, 'samples': 15155200, 'steps': 29599, 'loss/train': 1.7535970211029053} +03/05/2022 00:14:25 - INFO - codeparrot_training - Skipping example with length 352 (seq_length=1024) +03/05/2022 00:14:28 - INFO - codeparrot_training - Step 29600: {'lr': 0.0004583085890125682, 'samples': 15155712, 'steps': 29600, 'loss/train': 2.4371070861816406} +03/05/2022 00:14:31 - INFO - codeparrot_training - Step 29601: {'lr': 0.0004583056547606424, 'samples': 15156224, 'steps': 29601, 'loss/train': 1.4144538640975952} +03/05/2022 00:14:33 - INFO - codeparrot_training - Skipping example with length 311 (seq_length=1024) +03/05/2022 00:14:36 - INFO - codeparrot_training - Step 29602: {'lr': 0.0004583027204148573, 'samples': 15156736, 'steps': 29602, 'loss/train': 1.7700753211975098} +03/05/2022 00:14:40 - INFO - codeparrot_training - Step 29603: {'lr': 0.0004582997859752142, 'samples': 15157248, 'steps': 29603, 'loss/train': 2.3405330181121826} +03/05/2022 00:14:41 - INFO - codeparrot_training - Skipping example with length 1009 (seq_length=1024) +03/05/2022 00:14:45 - INFO - codeparrot_training - Step 29604: {'lr': 0.0004582968514417144, 'samples': 15157760, 'steps': 29604, 'loss/train': 1.399165391921997} +03/05/2022 00:14:48 - INFO - codeparrot_training - Step 29605: {'lr': 0.00045829391681435926, 'samples': 15158272, 'steps': 29605, 'loss/train': 0.10402455180883408} +03/05/2022 00:14:50 - INFO - codeparrot_training - Skipping example with length 166 (seq_length=1024) +03/05/2022 00:14:53 - INFO - codeparrot_training - Step 29606: {'lr': 0.0004582909820931501, 'samples': 15158784, 'steps': 29606, 'loss/train': 2.2843778133392334} +03/05/2022 00:14:56 - INFO - codeparrot_training - Step 29607: {'lr': 0.00045828804727808824, 'samples': 15159296, 'steps': 29607, 'loss/train': 1.6677777767181396} +03/05/2022 00:14:58 - INFO - codeparrot_training - Skipping example with length 288 (seq_length=1024) +03/05/2022 00:15:02 - INFO - codeparrot_training - Step 29608: {'lr': 0.000458285112369175, 'samples': 15159808, 'steps': 29608, 'loss/train': 1.1638938188552856} +03/05/2022 00:15:05 - INFO - codeparrot_training - Step 29609: {'lr': 0.0004582821773664118, 'samples': 15160320, 'steps': 29609, 'loss/train': 1.8593134880065918} +03/05/2022 00:15:06 - INFO - codeparrot_training - Skipping example with length 754 (seq_length=1024) +03/05/2022 00:15:10 - INFO - codeparrot_training - Step 29610: {'lr': 0.0004582792422697997, 'samples': 15160832, 'steps': 29610, 'loss/train': 1.438664197921753} +03/05/2022 00:15:13 - INFO - codeparrot_training - Step 29611: {'lr': 0.0004582763070793403, 'samples': 15161344, 'steps': 29611, 'loss/train': 1.5748016834259033} +03/05/2022 00:15:15 - INFO - codeparrot_training - Skipping example with length 78 (seq_length=1024) +03/05/2022 00:15:18 - INFO - codeparrot_training - Step 29612: {'lr': 0.0004582733717950347, 'samples': 15161856, 'steps': 29612, 'loss/train': 1.8385947942733765} +03/05/2022 00:15:22 - INFO - codeparrot_training - Step 29613: {'lr': 0.00045827043641688444, 'samples': 15162368, 'steps': 29613, 'loss/train': 2.0978434085845947} +03/05/2022 00:15:23 - INFO - codeparrot_training - Skipping example with length 685 (seq_length=1024) +03/05/2022 00:15:27 - INFO - codeparrot_training - Step 29614: {'lr': 0.00045826750094489065, 'samples': 15162880, 'steps': 29614, 'loss/train': 1.619504451751709} +03/05/2022 00:15:30 - INFO - codeparrot_training - Step 29615: {'lr': 0.00045826456537905483, 'samples': 15163392, 'steps': 29615, 'loss/train': 1.7345117330551147} +03/05/2022 00:15:31 - INFO - codeparrot_training - Skipping example with length 1016 (seq_length=1024) +03/05/2022 00:15:35 - INFO - codeparrot_training - Step 29616: {'lr': 0.0004582616297193781, 'samples': 15163904, 'steps': 29616, 'loss/train': 2.030402183532715} +03/05/2022 00:15:39 - INFO - codeparrot_training - Step 29617: {'lr': 0.000458258693965862, 'samples': 15164416, 'steps': 29617, 'loss/train': 2.590390920639038} +03/05/2022 00:15:40 - INFO - codeparrot_training - Skipping example with length 473 (seq_length=1024) +03/05/2022 00:15:44 - INFO - codeparrot_training - Step 29618: {'lr': 0.0004582557581185077, 'samples': 15164928, 'steps': 29618, 'loss/train': 1.591430425643921} +03/05/2022 00:15:47 - INFO - codeparrot_training - Step 29619: {'lr': 0.00045825282217731655, 'samples': 15165440, 'steps': 29619, 'loss/train': 2.272411346435547} +03/05/2022 00:15:48 - INFO - codeparrot_training - Skipping example with length 380 (seq_length=1024) +03/05/2022 00:15:52 - INFO - codeparrot_training - Step 29620: {'lr': 0.00045824988614228995, 'samples': 15165952, 'steps': 29620, 'loss/train': 1.7040380239486694} +03/05/2022 00:15:55 - INFO - codeparrot_training - Step 29621: {'lr': 0.0004582469500134292, 'samples': 15166464, 'steps': 29621, 'loss/train': 1.5744106769561768} +03/05/2022 00:15:56 - INFO - codeparrot_training - Skipping example with length 587 (seq_length=1024) +03/05/2022 00:16:01 - INFO - codeparrot_training - Step 29622: {'lr': 0.00045824401379073544, 'samples': 15166976, 'steps': 29622, 'loss/train': 1.8472628593444824} +03/05/2022 00:16:04 - INFO - codeparrot_training - Step 29623: {'lr': 0.0004582410774742103, 'samples': 15167488, 'steps': 29623, 'loss/train': 1.8156287670135498} +03/05/2022 00:16:04 - INFO - codeparrot_training - Skipping example with length 849 (seq_length=1024) +03/05/2022 00:16:09 - INFO - codeparrot_training - Step 29624: {'lr': 0.00045823814106385485, 'samples': 15168000, 'steps': 29624, 'loss/train': 1.7630374431610107} +03/05/2022 00:16:12 - INFO - codeparrot_training - Step 29625: {'lr': 0.0004582352045596705, 'samples': 15168512, 'steps': 29625, 'loss/train': 2.4423153400421143} +03/05/2022 00:16:13 - INFO - codeparrot_training - Skipping example with length 476 (seq_length=1024) +03/05/2022 00:16:18 - INFO - codeparrot_training - Step 29626: {'lr': 0.0004582322679616586, 'samples': 15169024, 'steps': 29626, 'loss/train': 1.4555785655975342} +03/05/2022 00:16:21 - INFO - codeparrot_training - Step 29627: {'lr': 0.0004582293312698205, 'samples': 15169536, 'steps': 29627, 'loss/train': 1.271009922027588} +03/05/2022 00:16:22 - INFO - codeparrot_training - Skipping example with length 572 (seq_length=1024) +03/05/2022 00:16:26 - INFO - codeparrot_training - Step 29628: {'lr': 0.00045822639448415736, 'samples': 15170048, 'steps': 29628, 'loss/train': 1.444993495941162} +03/05/2022 00:16:30 - INFO - codeparrot_training - Step 29629: {'lr': 0.0004582234576046707, 'samples': 15170560, 'steps': 29629, 'loss/train': 1.9633160829544067} +03/05/2022 00:16:31 - INFO - codeparrot_training - Skipping example with length 68 (seq_length=1024) +03/05/2022 00:16:35 - INFO - codeparrot_training - Step 29630: {'lr': 0.00045822052063136177, 'samples': 15171072, 'steps': 29630, 'loss/train': 2.0363192558288574} +03/05/2022 00:16:38 - INFO - codeparrot_training - Step 29631: {'lr': 0.0004582175835642319, 'samples': 15171584, 'steps': 29631, 'loss/train': 2.1065759658813477} +03/05/2022 00:16:39 - INFO - codeparrot_training - Skipping example with length 423 (seq_length=1024) +03/05/2022 00:16:43 - INFO - codeparrot_training - Step 29632: {'lr': 0.0004582146464032824, 'samples': 15172096, 'steps': 29632, 'loss/train': 1.8479074239730835} +03/05/2022 00:16:46 - INFO - codeparrot_training - Step 29633: {'lr': 0.0004582117091485145, 'samples': 15172608, 'steps': 29633, 'loss/train': 0.0593196377158165} +03/05/2022 00:16:48 - INFO - codeparrot_training - Skipping example with length 481 (seq_length=1024) +03/05/2022 00:16:52 - INFO - codeparrot_training - Step 29634: {'lr': 0.0004582087717999297, 'samples': 15173120, 'steps': 29634, 'loss/train': 1.6204078197479248} +03/05/2022 00:16:55 - INFO - codeparrot_training - Step 29635: {'lr': 0.0004582058343575292, 'samples': 15173632, 'steps': 29635, 'loss/train': 2.0541532039642334} +03/05/2022 00:16:56 - INFO - codeparrot_training - Skipping example with length 415 (seq_length=1024) +03/05/2022 00:17:00 - INFO - codeparrot_training - Step 29636: {'lr': 0.00045820289682131437, 'samples': 15174144, 'steps': 29636, 'loss/train': 0.6851502060890198} +03/05/2022 00:17:03 - INFO - codeparrot_training - Step 29637: {'lr': 0.0004581999591912865, 'samples': 15174656, 'steps': 29637, 'loss/train': 1.3127148151397705} +03/05/2022 00:17:05 - INFO - codeparrot_training - Skipping example with length 363 (seq_length=1024) +03/05/2022 00:17:09 - INFO - codeparrot_training - Step 29638: {'lr': 0.000458197021467447, 'samples': 15175168, 'steps': 29638, 'loss/train': 1.9174312353134155} +03/05/2022 00:17:12 - INFO - codeparrot_training - Step 29639: {'lr': 0.00045819408364979714, 'samples': 15175680, 'steps': 29639, 'loss/train': 0.9182990193367004} +03/05/2022 00:17:13 - INFO - codeparrot_training - Skipping example with length 504 (seq_length=1024) +03/05/2022 00:17:17 - INFO - codeparrot_training - Step 29640: {'lr': 0.0004581911457383382, 'samples': 15176192, 'steps': 29640, 'loss/train': 1.9126458168029785} +03/05/2022 00:17:21 - INFO - codeparrot_training - Step 29641: {'lr': 0.0004581882077330716, 'samples': 15176704, 'steps': 29641, 'loss/train': 1.6636160612106323} +03/05/2022 00:17:23 - INFO - codeparrot_training - Skipping example with length 759 (seq_length=1024) +03/05/2022 00:17:26 - INFO - codeparrot_training - Step 29642: {'lr': 0.0004581852696339985, 'samples': 15177216, 'steps': 29642, 'loss/train': 1.5770938396453857} +03/05/2022 00:17:29 - INFO - codeparrot_training - Step 29643: {'lr': 0.00045818233144112044, 'samples': 15177728, 'steps': 29643, 'loss/train': 2.2921130657196045} +03/05/2022 00:17:31 - INFO - codeparrot_training - Skipping example with length 613 (seq_length=1024) +03/05/2022 00:17:34 - INFO - codeparrot_training - Step 29644: {'lr': 0.00045817939315443855, 'samples': 15178240, 'steps': 29644, 'loss/train': 1.8131349086761475} +03/05/2022 00:17:37 - INFO - codeparrot_training - Step 29645: {'lr': 0.0004581764547739543, 'samples': 15178752, 'steps': 29645, 'loss/train': 1.7691534757614136} +03/05/2022 00:17:39 - INFO - codeparrot_training - Skipping example with length 684 (seq_length=1024) +03/05/2022 00:17:43 - INFO - codeparrot_training - Step 29646: {'lr': 0.00045817351629966896, 'samples': 15179264, 'steps': 29646, 'loss/train': 1.7306549549102783} +03/05/2022 00:17:46 - INFO - codeparrot_training - Step 29647: {'lr': 0.00045817057773158375, 'samples': 15179776, 'steps': 29647, 'loss/train': 1.89664626121521} +03/05/2022 00:17:48 - INFO - codeparrot_training - Skipping example with length 779 (seq_length=1024) +03/05/2022 00:17:51 - INFO - codeparrot_training - Step 29648: {'lr': 0.0004581676390697002, 'samples': 15180288, 'steps': 29648, 'loss/train': 1.81739342212677} +03/05/2022 00:17:54 - INFO - codeparrot_training - Step 29649: {'lr': 0.00045816470031401945, 'samples': 15180800, 'steps': 29649, 'loss/train': 1.505497694015503} +03/05/2022 00:17:56 - INFO - codeparrot_training - Skipping example with length 196 (seq_length=1024) +03/05/2022 00:17:59 - INFO - codeparrot_training - Step 29650: {'lr': 0.00045816176146454296, 'samples': 15181312, 'steps': 29650, 'loss/train': 1.2665773630142212} +03/05/2022 00:18:03 - INFO - codeparrot_training - Step 29651: {'lr': 0.00045815882252127197, 'samples': 15181824, 'steps': 29651, 'loss/train': 1.7291022539138794} +03/05/2022 00:18:04 - INFO - codeparrot_training - Skipping example with length 306 (seq_length=1024) +03/05/2022 00:18:08 - INFO - codeparrot_training - Step 29652: {'lr': 0.0004581558834842078, 'samples': 15182336, 'steps': 29652, 'loss/train': 1.5999352931976318} +03/05/2022 00:18:11 - INFO - codeparrot_training - Step 29653: {'lr': 0.00045815294435335184, 'samples': 15182848, 'steps': 29653, 'loss/train': 2.4165852069854736} +03/05/2022 00:18:12 - INFO - codeparrot_training - Skipping example with length 877 (seq_length=1024) +03/05/2022 00:18:16 - INFO - codeparrot_training - Step 29654: {'lr': 0.0004581500051287053, 'samples': 15183360, 'steps': 29654, 'loss/train': 2.910472869873047} +03/05/2022 00:18:20 - INFO - codeparrot_training - Step 29655: {'lr': 0.00045814706581026967, 'samples': 15183872, 'steps': 29655, 'loss/train': 1.8675283193588257} +03/05/2022 00:18:21 - INFO - codeparrot_training - Skipping example with length 541 (seq_length=1024) +03/05/2022 00:18:25 - INFO - codeparrot_training - Step 29656: {'lr': 0.0004581441263980461, 'samples': 15184384, 'steps': 29656, 'loss/train': 2.431382417678833} +03/05/2022 00:18:28 - INFO - codeparrot_training - Step 29657: {'lr': 0.0004581411868920361, 'samples': 15184896, 'steps': 29657, 'loss/train': 1.1192803382873535} +03/05/2022 00:18:29 - INFO - codeparrot_training - Skipping example with length 402 (seq_length=1024) +03/05/2022 00:18:33 - INFO - codeparrot_training - Step 29658: {'lr': 0.00045813824729224085, 'samples': 15185408, 'steps': 29658, 'loss/train': 0.33740270137786865} +03/05/2022 00:18:36 - INFO - codeparrot_training - Step 29659: {'lr': 0.0004581353075986617, 'samples': 15185920, 'steps': 29659, 'loss/train': 1.6104646921157837} +03/05/2022 00:18:38 - INFO - codeparrot_training - Skipping example with length 605 (seq_length=1024) +03/05/2022 00:18:42 - INFO - codeparrot_training - Step 29660: {'lr': 0.00045813236781129996, 'samples': 15186432, 'steps': 29660, 'loss/train': 2.2894287109375} +03/05/2022 00:18:45 - INFO - codeparrot_training - Step 29661: {'lr': 0.00045812942793015707, 'samples': 15186944, 'steps': 29661, 'loss/train': 1.7148869037628174} +03/05/2022 00:18:46 - INFO - codeparrot_training - Skipping example with length 90 (seq_length=1024) +03/05/2022 00:18:50 - INFO - codeparrot_training - Step 29662: {'lr': 0.0004581264879552342, 'samples': 15187456, 'steps': 29662, 'loss/train': 0.7447697520256042} +03/05/2022 00:18:53 - INFO - codeparrot_training - Step 29663: {'lr': 0.00045812354788653275, 'samples': 15187968, 'steps': 29663, 'loss/train': 1.8524298667907715} +03/05/2022 00:18:54 - INFO - codeparrot_training - Skipping example with length 587 (seq_length=1024) +03/05/2022 00:18:59 - INFO - codeparrot_training - Step 29664: {'lr': 0.00045812060772405403, 'samples': 15188480, 'steps': 29664, 'loss/train': 1.5619217157363892} +03/05/2022 00:19:02 - INFO - codeparrot_training - Step 29665: {'lr': 0.0004581176674677995, 'samples': 15188992, 'steps': 29665, 'loss/train': 0.7549598813056946} +03/05/2022 00:19:03 - INFO - codeparrot_training - Skipping example with length 401 (seq_length=1024) +03/05/2022 00:19:07 - INFO - codeparrot_training - Step 29666: {'lr': 0.00045811472711777026, 'samples': 15189504, 'steps': 29666, 'loss/train': 0.8397639989852905} +03/05/2022 00:19:10 - INFO - codeparrot_training - Step 29667: {'lr': 0.0004581117866739677, 'samples': 15190016, 'steps': 29667, 'loss/train': 2.2766034603118896} +03/05/2022 00:19:11 - INFO - codeparrot_training - Skipping example with length 537 (seq_length=1024) +03/05/2022 00:19:15 - INFO - codeparrot_training - Step 29668: {'lr': 0.00045810884613639325, 'samples': 15190528, 'steps': 29668, 'loss/train': 1.3779386281967163} +03/05/2022 00:19:19 - INFO - codeparrot_training - Step 29669: {'lr': 0.00045810590550504816, 'samples': 15191040, 'steps': 29669, 'loss/train': 1.3402960300445557} +03/05/2022 00:19:20 - INFO - codeparrot_training - Skipping example with length 131 (seq_length=1024) +03/05/2022 00:19:24 - INFO - codeparrot_training - Step 29670: {'lr': 0.0004581029647799337, 'samples': 15191552, 'steps': 29670, 'loss/train': 1.1934853792190552} +03/05/2022 00:19:27 - INFO - codeparrot_training - Step 29671: {'lr': 0.0004581000239610513, 'samples': 15192064, 'steps': 29671, 'loss/train': 1.5362460613250732} +03/05/2022 00:19:28 - INFO - codeparrot_training - Skipping example with length 908 (seq_length=1024) +03/05/2022 00:19:32 - INFO - codeparrot_training - Step 29672: {'lr': 0.0004580970830484023, 'samples': 15192576, 'steps': 29672, 'loss/train': 1.6032607555389404} +03/05/2022 00:19:35 - INFO - codeparrot_training - Step 29673: {'lr': 0.00045809414204198785, 'samples': 15193088, 'steps': 29673, 'loss/train': 1.8287689685821533} +03/05/2022 00:19:37 - INFO - codeparrot_training - Skipping example with length 341 (seq_length=1024) +03/05/2022 00:19:41 - INFO - codeparrot_training - Step 29674: {'lr': 0.00045809120094180946, 'samples': 15193600, 'steps': 29674, 'loss/train': 1.8490982055664062} +03/05/2022 00:19:44 - INFO - codeparrot_training - Step 29675: {'lr': 0.00045808825974786834, 'samples': 15194112, 'steps': 29675, 'loss/train': 2.54978609085083} +03/05/2022 00:19:45 - INFO - codeparrot_training - Skipping example with length 950 (seq_length=1024) +03/05/2022 00:19:49 - INFO - codeparrot_training - Step 29676: {'lr': 0.0004580853184601659, 'samples': 15194624, 'steps': 29676, 'loss/train': 2.3288164138793945} +03/05/2022 00:19:52 - INFO - codeparrot_training - Step 29677: {'lr': 0.0004580823770787034, 'samples': 15195136, 'steps': 29677, 'loss/train': 1.4746782779693604} +03/05/2022 00:19:53 - INFO - codeparrot_training - Skipping example with length 505 (seq_length=1024) +03/05/2022 00:19:58 - INFO - codeparrot_training - Step 29678: {'lr': 0.0004580794356034822, 'samples': 15195648, 'steps': 29678, 'loss/train': 1.5826754570007324} +03/05/2022 00:20:01 - INFO - codeparrot_training - Step 29679: {'lr': 0.0004580764940345036, 'samples': 15196160, 'steps': 29679, 'loss/train': 1.5396908521652222} +03/05/2022 00:20:01 - INFO - codeparrot_training - Skipping example with length 729 (seq_length=1024) +03/05/2022 00:20:06 - INFO - codeparrot_training - Step 29680: {'lr': 0.00045807355237176896, 'samples': 15196672, 'steps': 29680, 'loss/train': 1.6554358005523682} +03/05/2022 00:20:09 - INFO - codeparrot_training - Step 29681: {'lr': 0.0004580706106152796, 'samples': 15197184, 'steps': 29681, 'loss/train': 1.8935580253601074} +03/05/2022 00:20:10 - INFO - codeparrot_training - Skipping example with length 512 (seq_length=1024) +03/05/2022 00:20:14 - INFO - codeparrot_training - Step 29682: {'lr': 0.00045806766876503683, 'samples': 15197696, 'steps': 29682, 'loss/train': 2.466559648513794} +03/05/2022 00:20:18 - INFO - codeparrot_training - Step 29683: {'lr': 0.000458064726821042, 'samples': 15198208, 'steps': 29683, 'loss/train': 0.12548203766345978} +03/05/2022 00:20:18 - INFO - codeparrot_training - Skipping example with length 89 (seq_length=1024) +03/05/2022 00:20:23 - INFO - codeparrot_training - Step 29684: {'lr': 0.0004580617847832964, 'samples': 15198720, 'steps': 29684, 'loss/train': 1.2880687713623047} +03/05/2022 00:20:26 - INFO - codeparrot_training - Step 29685: {'lr': 0.0004580588426518013, 'samples': 15199232, 'steps': 29685, 'loss/train': 1.0839146375656128} +03/05/2022 00:20:27 - INFO - codeparrot_training - Skipping example with length 945 (seq_length=1024) +03/05/2022 00:20:31 - INFO - codeparrot_training - Step 29686: {'lr': 0.0004580559004265582, 'samples': 15199744, 'steps': 29686, 'loss/train': 0.9003174901008606} +03/05/2022 00:20:35 - INFO - codeparrot_training - Step 29687: {'lr': 0.0004580529581075683, 'samples': 15200256, 'steps': 29687, 'loss/train': 1.7114344835281372} +03/05/2022 00:20:35 - INFO - codeparrot_training - Skipping example with length 237 (seq_length=1024) +03/05/2022 00:20:40 - INFO - codeparrot_training - Step 29688: {'lr': 0.0004580500156948329, 'samples': 15200768, 'steps': 29688, 'loss/train': 1.1175161600112915} +03/05/2022 00:20:43 - INFO - codeparrot_training - Step 29689: {'lr': 0.0004580470731883534, 'samples': 15201280, 'steps': 29689, 'loss/train': 1.391488790512085} +03/05/2022 00:20:45 - INFO - codeparrot_training - Skipping example with length 473 (seq_length=1024) +03/05/2022 00:20:48 - INFO - codeparrot_training - Step 29690: {'lr': 0.0004580441305881311, 'samples': 15201792, 'steps': 29690, 'loss/train': 3.170132637023926} +03/05/2022 00:20:51 - INFO - codeparrot_training - Step 29691: {'lr': 0.0004580411878941673, 'samples': 15202304, 'steps': 29691, 'loss/train': 1.9590134620666504} +03/05/2022 00:20:53 - INFO - codeparrot_training - Skipping example with length 463 (seq_length=1024) +03/05/2022 00:20:57 - INFO - codeparrot_training - Step 29692: {'lr': 0.0004580382451064634, 'samples': 15202816, 'steps': 29692, 'loss/train': 2.138234853744507} +03/05/2022 00:21:00 - INFO - codeparrot_training - Step 29693: {'lr': 0.00045803530222502065, 'samples': 15203328, 'steps': 29693, 'loss/train': 1.8251312971115112} +03/05/2022 00:21:02 - INFO - codeparrot_training - Skipping example with length 759 (seq_length=1024) +03/05/2022 00:21:05 - INFO - codeparrot_training - Step 29694: {'lr': 0.0004580323592498404, 'samples': 15203840, 'steps': 29694, 'loss/train': 2.222379446029663} +03/05/2022 00:21:08 - INFO - codeparrot_training - Step 29695: {'lr': 0.00045802941618092397, 'samples': 15204352, 'steps': 29695, 'loss/train': 0.7423340082168579} +03/05/2022 00:21:10 - INFO - codeparrot_training - Skipping example with length 387 (seq_length=1024) +03/05/2022 00:21:14 - INFO - codeparrot_training - Step 29696: {'lr': 0.0004580264730182727, 'samples': 15204864, 'steps': 29696, 'loss/train': 1.9618735313415527} +03/05/2022 00:21:17 - INFO - codeparrot_training - Step 29697: {'lr': 0.000458023529761888, 'samples': 15205376, 'steps': 29697, 'loss/train': 2.4216809272766113} +03/05/2022 00:21:18 - INFO - codeparrot_training - Skipping example with length 504 (seq_length=1024) +03/05/2022 00:21:22 - INFO - codeparrot_training - Step 29698: {'lr': 0.00045802058641177104, 'samples': 15205888, 'steps': 29698, 'loss/train': 1.8496674299240112} +03/05/2022 00:21:25 - INFO - codeparrot_training - Step 29699: {'lr': 0.00045801764296792317, 'samples': 15206400, 'steps': 29699, 'loss/train': 1.9643638134002686} +03/05/2022 00:21:27 - INFO - codeparrot_training - Skipping example with length 139 (seq_length=1024) +03/05/2022 00:21:30 - INFO - codeparrot_training - Step 29700: {'lr': 0.0004580146994303458, 'samples': 15206912, 'steps': 29700, 'loss/train': 2.1131811141967773} +03/05/2022 00:21:34 - INFO - codeparrot_training - Step 29701: {'lr': 0.0004580117557990402, 'samples': 15207424, 'steps': 29701, 'loss/train': 2.038806200027466} +03/05/2022 00:21:35 - INFO - codeparrot_training - Skipping example with length 423 (seq_length=1024) +03/05/2022 00:21:39 - INFO - codeparrot_training - Step 29702: {'lr': 0.0004580088120740077, 'samples': 15207936, 'steps': 29702, 'loss/train': 2.4480178356170654} +03/05/2022 00:21:42 - INFO - codeparrot_training - Step 29703: {'lr': 0.0004580058682552497, 'samples': 15208448, 'steps': 29703, 'loss/train': 1.6460272073745728} +03/05/2022 00:21:43 - INFO - codeparrot_training - Skipping example with length 410 (seq_length=1024) +03/05/2022 00:21:48 - INFO - codeparrot_training - Step 29704: {'lr': 0.00045800292434276736, 'samples': 15208960, 'steps': 29704, 'loss/train': 1.217252492904663} +03/05/2022 00:21:51 - INFO - codeparrot_training - Step 29705: {'lr': 0.0004579999803365622, 'samples': 15209472, 'steps': 29705, 'loss/train': 1.1021175384521484} +03/05/2022 00:21:52 - INFO - codeparrot_training - Skipping example with length 104 (seq_length=1024) +03/05/2022 00:21:56 - INFO - codeparrot_training - Step 29706: {'lr': 0.00045799703623663546, 'samples': 15209984, 'steps': 29706, 'loss/train': 1.5363919734954834} +03/05/2022 00:21:59 - INFO - codeparrot_training - Step 29707: {'lr': 0.00045799409204298844, 'samples': 15210496, 'steps': 29707, 'loss/train': 1.032495379447937} +03/05/2022 00:22:01 - INFO - codeparrot_training - Skipping example with length 813 (seq_length=1024) +03/05/2022 00:22:04 - INFO - codeparrot_training - Step 29708: {'lr': 0.00045799114775562245, 'samples': 15211008, 'steps': 29708, 'loss/train': 1.1356053352355957} +03/05/2022 00:22:07 - INFO - codeparrot_training - Step 29709: {'lr': 0.00045798820337453894, 'samples': 15211520, 'steps': 29709, 'loss/train': 1.7650761604309082} +03/05/2022 00:22:09 - INFO - codeparrot_training - Skipping example with length 921 (seq_length=1024) +03/05/2022 00:22:13 - INFO - codeparrot_training - Step 29710: {'lr': 0.00045798525889973905, 'samples': 15212032, 'steps': 29710, 'loss/train': 1.5482856035232544} +03/05/2022 00:22:16 - INFO - codeparrot_training - Step 29711: {'lr': 0.00045798231433122436, 'samples': 15212544, 'steps': 29711, 'loss/train': 2.1354830265045166} +03/05/2022 00:22:17 - INFO - codeparrot_training - Skipping example with length 357 (seq_length=1024) +03/05/2022 00:22:21 - INFO - codeparrot_training - Step 29712: {'lr': 0.00045797936966899595, 'samples': 15213056, 'steps': 29712, 'loss/train': 1.8377057313919067} +03/05/2022 00:22:24 - INFO - codeparrot_training - Step 29713: {'lr': 0.00045797642491305523, 'samples': 15213568, 'steps': 29713, 'loss/train': 2.0352139472961426} +03/05/2022 00:22:25 - INFO - codeparrot_training - Skipping example with length 152 (seq_length=1024) +03/05/2022 00:22:30 - INFO - codeparrot_training - Step 29714: {'lr': 0.0004579734800634036, 'samples': 15214080, 'steps': 29714, 'loss/train': 1.6556828022003174} +03/05/2022 00:22:33 - INFO - codeparrot_training - Step 29715: {'lr': 0.0004579705351200423, 'samples': 15214592, 'steps': 29715, 'loss/train': 2.6244359016418457} +03/05/2022 00:22:34 - INFO - codeparrot_training - Skipping example with length 732 (seq_length=1024) +03/05/2022 00:22:38 - INFO - codeparrot_training - Step 29716: {'lr': 0.0004579675900829727, 'samples': 15215104, 'steps': 29716, 'loss/train': 1.44682776927948} +03/05/2022 00:22:41 - INFO - codeparrot_training - Step 29717: {'lr': 0.00045796464495219614, 'samples': 15215616, 'steps': 29717, 'loss/train': 1.3237671852111816} +03/05/2022 00:22:42 - INFO - codeparrot_training - Skipping example with length 497 (seq_length=1024) +03/05/2022 00:22:46 - INFO - codeparrot_training - Step 29718: {'lr': 0.00045796169972771387, 'samples': 15216128, 'steps': 29718, 'loss/train': 0.8220032453536987} +03/05/2022 00:22:49 - INFO - codeparrot_training - Step 29719: {'lr': 0.00045795875440952726, 'samples': 15216640, 'steps': 29719, 'loss/train': 1.8861268758773804} +03/05/2022 00:22:50 - INFO - codeparrot_training - Skipping example with length 952 (seq_length=1024) +03/05/2022 00:22:55 - INFO - codeparrot_training - Step 29720: {'lr': 0.00045795580899763767, 'samples': 15217152, 'steps': 29720, 'loss/train': 1.9192782640457153} +03/05/2022 00:22:58 - INFO - codeparrot_training - Step 29721: {'lr': 0.00045795286349204633, 'samples': 15217664, 'steps': 29721, 'loss/train': 1.7380495071411133} +03/05/2022 00:22:58 - INFO - codeparrot_training - Skipping example with length 535 (seq_length=1024) +03/05/2022 00:23:03 - INFO - codeparrot_training - Step 29722: {'lr': 0.0004579499178927547, 'samples': 15218176, 'steps': 29722, 'loss/train': 1.0124503374099731} +03/05/2022 00:23:06 - INFO - codeparrot_training - Step 29723: {'lr': 0.0004579469721997641, 'samples': 15218688, 'steps': 29723, 'loss/train': 1.9146580696105957} +03/05/2022 00:23:07 - INFO - codeparrot_training - Skipping example with length 51 (seq_length=1024) +03/05/2022 00:23:12 - INFO - codeparrot_training - Step 29724: {'lr': 0.0004579440264130758, 'samples': 15219200, 'steps': 29724, 'loss/train': 1.272321343421936} +03/05/2022 00:23:15 - INFO - codeparrot_training - Step 29725: {'lr': 0.000457941080532691, 'samples': 15219712, 'steps': 29725, 'loss/train': 0.6490805149078369} +03/05/2022 00:23:15 - INFO - codeparrot_training - Skipping example with length 455 (seq_length=1024) +03/05/2022 00:23:20 - INFO - codeparrot_training - Step 29726: {'lr': 0.0004579381345586113, 'samples': 15220224, 'steps': 29726, 'loss/train': 1.2653623819351196} +03/05/2022 00:23:23 - INFO - codeparrot_training - Step 29727: {'lr': 0.0004579351884908378, 'samples': 15220736, 'steps': 29727, 'loss/train': 2.292475938796997} +03/05/2022 00:23:25 - INFO - codeparrot_training - Skipping example with length 56 (seq_length=1024) +03/05/2022 00:23:29 - INFO - codeparrot_training - Step 29728: {'lr': 0.00045793224232937193, 'samples': 15221248, 'steps': 29728, 'loss/train': 1.8938950300216675} +03/05/2022 00:23:32 - INFO - codeparrot_training - Step 29729: {'lr': 0.0004579292960742151, 'samples': 15221760, 'steps': 29729, 'loss/train': 1.7529852390289307} +03/05/2022 00:23:33 - INFO - codeparrot_training - Skipping example with length 899 (seq_length=1024) +03/05/2022 00:23:37 - INFO - codeparrot_training - Step 29730: {'lr': 0.0004579263497253684, 'samples': 15222272, 'steps': 29730, 'loss/train': 1.7478187084197998} +03/05/2022 00:23:40 - INFO - codeparrot_training - Step 29731: {'lr': 0.00045792340328283334, 'samples': 15222784, 'steps': 29731, 'loss/train': 1.9145957231521606} +03/05/2022 00:23:42 - INFO - codeparrot_training - Skipping example with length 707 (seq_length=1024) +03/05/2022 00:23:46 - INFO - codeparrot_training - Step 29732: {'lr': 0.0004579204567466112, 'samples': 15223296, 'steps': 29732, 'loss/train': 2.0138182640075684} +03/05/2022 00:23:49 - INFO - codeparrot_training - Step 29733: {'lr': 0.0004579175101167033, 'samples': 15223808, 'steps': 29733, 'loss/train': 1.0153049230575562} +03/05/2022 00:23:50 - INFO - codeparrot_training - Skipping example with length 109 (seq_length=1024) +03/05/2022 00:23:54 - INFO - codeparrot_training - Step 29734: {'lr': 0.000457914563393111, 'samples': 15224320, 'steps': 29734, 'loss/train': 1.529606580734253} +03/05/2022 00:23:57 - INFO - codeparrot_training - Step 29735: {'lr': 0.00045791161657583555, 'samples': 15224832, 'steps': 29735, 'loss/train': 1.5589978694915771} +03/05/2022 00:23:58 - INFO - codeparrot_training - Skipping example with length 912 (seq_length=1024) +03/05/2022 00:24:02 - INFO - codeparrot_training - Step 29736: {'lr': 0.00045790866966487843, 'samples': 15225344, 'steps': 29736, 'loss/train': 1.4167641401290894} +03/05/2022 00:24:06 - INFO - codeparrot_training - Step 29737: {'lr': 0.0004579057226602408, 'samples': 15225856, 'steps': 29737, 'loss/train': 2.1128108501434326} +03/05/2022 00:24:07 - INFO - codeparrot_training - Skipping example with length 181 (seq_length=1024) +03/05/2022 00:24:11 - INFO - codeparrot_training - Step 29738: {'lr': 0.00045790277556192414, 'samples': 15226368, 'steps': 29738, 'loss/train': 1.8649201393127441} +03/05/2022 00:24:14 - INFO - codeparrot_training - Step 29739: {'lr': 0.0004578998283699296, 'samples': 15226880, 'steps': 29739, 'loss/train': 1.8969552516937256} +03/05/2022 00:24:15 - INFO - codeparrot_training - Skipping example with length 13 (seq_length=1024) +03/05/2022 00:24:19 - INFO - codeparrot_training - Step 29740: {'lr': 0.0004578968810842586, 'samples': 15227392, 'steps': 29740, 'loss/train': 1.7157353162765503} +03/05/2022 00:24:22 - INFO - codeparrot_training - Step 29741: {'lr': 0.0004578939337049126, 'samples': 15227904, 'steps': 29741, 'loss/train': 1.7864376306533813} +03/05/2022 00:24:23 - INFO - codeparrot_training - Skipping example with length 104 (seq_length=1024) +03/05/2022 00:24:28 - INFO - codeparrot_training - Step 29742: {'lr': 0.0004578909862318927, 'samples': 15228416, 'steps': 29742, 'loss/train': 1.288945198059082} +03/05/2022 00:24:31 - INFO - codeparrot_training - Step 29743: {'lr': 0.00045788803866520037, 'samples': 15228928, 'steps': 29743, 'loss/train': 0.7327784895896912} +03/05/2022 00:24:32 - INFO - codeparrot_training - Skipping example with length 212 (seq_length=1024) +03/05/2022 00:24:36 - INFO - codeparrot_training - Step 29744: {'lr': 0.0004578850910048369, 'samples': 15229440, 'steps': 29744, 'loss/train': 1.9166655540466309} +03/05/2022 00:24:39 - INFO - codeparrot_training - Step 29745: {'lr': 0.0004578821432508036, 'samples': 15229952, 'steps': 29745, 'loss/train': 2.2561233043670654} +03/05/2022 00:24:40 - INFO - codeparrot_training - Skipping example with length 1019 (seq_length=1024) +03/05/2022 00:24:45 - INFO - codeparrot_training - Step 29746: {'lr': 0.00045787919540310175, 'samples': 15230464, 'steps': 29746, 'loss/train': 0.46884867548942566} +03/05/2022 00:24:48 - INFO - codeparrot_training - Step 29747: {'lr': 0.0004578762474617328, 'samples': 15230976, 'steps': 29747, 'loss/train': 0.8854328989982605} +03/05/2022 00:24:53 - INFO - codeparrot_training - Step 29748: {'lr': 0.00045787329942669803, 'samples': 15231488, 'steps': 29748, 'loss/train': 1.5276596546173096} +03/05/2022 00:24:56 - INFO - codeparrot_training - Step 29749: {'lr': 0.0004578703512979988, 'samples': 15232000, 'steps': 29749, 'loss/train': 2.0027172565460205} +03/05/2022 00:24:57 - INFO - codeparrot_training - Skipping example with length 252 (seq_length=1024) +03/05/2022 00:25:01 - INFO - codeparrot_training - Step 29750: {'lr': 0.00045786740307563633, 'samples': 15232512, 'steps': 29750, 'loss/train': 1.809160828590393} +03/05/2022 00:25:05 - INFO - codeparrot_training - Step 29751: {'lr': 0.000457864454759612, 'samples': 15233024, 'steps': 29751, 'loss/train': 2.1877565383911133} +03/05/2022 00:25:05 - INFO - codeparrot_training - Skipping example with length 491 (seq_length=1024) +03/05/2022 00:25:10 - INFO - codeparrot_training - Step 29752: {'lr': 0.00045786150634992716, 'samples': 15233536, 'steps': 29752, 'loss/train': 1.768035650253296} +03/05/2022 00:25:13 - INFO - codeparrot_training - Step 29753: {'lr': 0.0004578585578465833, 'samples': 15234048, 'steps': 29753, 'loss/train': 2.0426881313323975} +03/05/2022 00:25:14 - INFO - codeparrot_training - Skipping example with length 736 (seq_length=1024) +03/05/2022 00:25:18 - INFO - codeparrot_training - Step 29754: {'lr': 0.00045785560924958135, 'samples': 15234560, 'steps': 29754, 'loss/train': 1.62642240524292} +03/05/2022 00:25:22 - INFO - codeparrot_training - Step 29755: {'lr': 0.00045785266055892296, 'samples': 15235072, 'steps': 29755, 'loss/train': 1.9782161712646484} +03/05/2022 00:25:22 - INFO - codeparrot_training - Skipping example with length 576 (seq_length=1024) +03/05/2022 00:25:27 - INFO - codeparrot_training - Step 29756: {'lr': 0.0004578497117746094, 'samples': 15235584, 'steps': 29756, 'loss/train': 0.8306182622909546} +03/05/2022 00:25:30 - INFO - codeparrot_training - Step 29757: {'lr': 0.00045784676289664194, 'samples': 15236096, 'steps': 29757, 'loss/train': 0.8733512163162231} +03/05/2022 00:25:31 - INFO - codeparrot_training - Skipping example with length 724 (seq_length=1024) +03/05/2022 00:25:35 - INFO - codeparrot_training - Step 29758: {'lr': 0.00045784381392502193, 'samples': 15236608, 'steps': 29758, 'loss/train': 0.11854469776153564} +03/05/2022 00:25:39 - INFO - codeparrot_training - Step 29759: {'lr': 0.00045784086485975076, 'samples': 15237120, 'steps': 29759, 'loss/train': 1.9815386533737183} +03/05/2022 00:25:39 - INFO - codeparrot_training - Skipping example with length 874 (seq_length=1024) +03/05/2022 00:25:44 - INFO - codeparrot_training - Step 29760: {'lr': 0.00045783791570082956, 'samples': 15237632, 'steps': 29760, 'loss/train': 2.071347236633301} +03/05/2022 00:25:47 - INFO - codeparrot_training - Step 29761: {'lr': 0.00045783496644825997, 'samples': 15238144, 'steps': 29761, 'loss/train': 1.8988083600997925} +03/05/2022 00:25:47 - INFO - codeparrot_training - Skipping example with length 740 (seq_length=1024) +03/05/2022 00:25:52 - INFO - codeparrot_training - Step 29762: {'lr': 0.000457832017102043, 'samples': 15238656, 'steps': 29762, 'loss/train': 1.7896339893341064} +03/05/2022 00:25:55 - INFO - codeparrot_training - Step 29763: {'lr': 0.00045782906766218026, 'samples': 15239168, 'steps': 29763, 'loss/train': 2.400224447250366} +03/05/2022 00:25:56 - INFO - codeparrot_training - Skipping example with length 876 (seq_length=1024) +03/05/2022 00:26:01 - INFO - codeparrot_training - Step 29764: {'lr': 0.00045782611812867285, 'samples': 15239680, 'steps': 29764, 'loss/train': 1.7666404247283936} +03/05/2022 00:26:04 - INFO - codeparrot_training - Step 29765: {'lr': 0.0004578231685015223, 'samples': 15240192, 'steps': 29765, 'loss/train': 1.6612834930419922} +03/05/2022 00:26:04 - INFO - codeparrot_training - Skipping example with length 687 (seq_length=1024) +03/05/2022 00:26:09 - INFO - codeparrot_training - Step 29766: {'lr': 0.00045782021878072976, 'samples': 15240704, 'steps': 29766, 'loss/train': 0.5175473093986511} +03/05/2022 00:26:12 - INFO - codeparrot_training - Step 29767: {'lr': 0.0004578172689662967, 'samples': 15241216, 'steps': 29767, 'loss/train': 1.9802935123443604} +03/05/2022 00:26:12 - INFO - codeparrot_training - Skipping example with length 936 (seq_length=1024) +03/05/2022 00:26:17 - INFO - codeparrot_training - Step 29768: {'lr': 0.0004578143190582243, 'samples': 15241728, 'steps': 29768, 'loss/train': 1.9796173572540283} +03/05/2022 00:26:21 - INFO - codeparrot_training - Step 29769: {'lr': 0.000457811369056514, 'samples': 15242240, 'steps': 29769, 'loss/train': 1.405814290046692} +03/05/2022 00:26:21 - INFO - codeparrot_training - Skipping example with length 280 (seq_length=1024) +03/05/2022 00:26:26 - INFO - codeparrot_training - Step 29770: {'lr': 0.0004578084189611671, 'samples': 15242752, 'steps': 29770, 'loss/train': 1.1862342357635498} +03/05/2022 00:26:29 - INFO - codeparrot_training - Step 29771: {'lr': 0.000457805468772185, 'samples': 15243264, 'steps': 29771, 'loss/train': 1.620605707168579} +03/05/2022 00:26:29 - INFO - codeparrot_training - Skipping example with length 889 (seq_length=1024) +03/05/2022 00:26:34 - INFO - codeparrot_training - Step 29772: {'lr': 0.00045780251848956887, 'samples': 15243776, 'steps': 29772, 'loss/train': 1.3739683628082275} +03/05/2022 00:26:37 - INFO - codeparrot_training - Skipping example with length 754 (seq_length=1024) +03/05/2022 00:26:40 - INFO - codeparrot_training - Step 29773: {'lr': 0.0004577995681133202, 'samples': 15244288, 'steps': 29773, 'loss/train': 1.7113828659057617} +03/05/2022 00:26:43 - INFO - codeparrot_training - Step 29774: {'lr': 0.00045779661764344025, 'samples': 15244800, 'steps': 29774, 'loss/train': 1.5478070974349976} +03/05/2022 00:26:45 - INFO - codeparrot_training - Skipping example with length 782 (seq_length=1024) +03/05/2022 00:26:48 - INFO - codeparrot_training - Step 29775: {'lr': 0.0004577936670799303, 'samples': 15245312, 'steps': 29775, 'loss/train': 1.6475026607513428} +03/05/2022 00:26:51 - INFO - codeparrot_training - Step 29776: {'lr': 0.00045779071642279177, 'samples': 15245824, 'steps': 29776, 'loss/train': 1.8982794284820557} +03/05/2022 00:26:53 - INFO - codeparrot_training - Skipping example with length 1023 (seq_length=1024) +03/05/2022 00:26:56 - INFO - codeparrot_training - Step 29777: {'lr': 0.00045778776567202597, 'samples': 15246336, 'steps': 29777, 'loss/train': 1.674333095550537} +03/05/2022 00:26:59 - INFO - codeparrot_training - Step 29778: {'lr': 0.0004577848148276341, 'samples': 15246848, 'steps': 29778, 'loss/train': 2.3123972415924072} +03/05/2022 00:27:02 - INFO - codeparrot_training - Skipping example with length 453 (seq_length=1024) +03/05/2022 00:27:05 - INFO - codeparrot_training - Step 29779: {'lr': 0.00045778186388961776, 'samples': 15247360, 'steps': 29779, 'loss/train': 1.2153793573379517} +03/05/2022 00:27:08 - INFO - codeparrot_training - Step 29780: {'lr': 0.000457778912857978, 'samples': 15247872, 'steps': 29780, 'loss/train': 1.726761817932129} +03/05/2022 00:27:10 - INFO - codeparrot_training - Skipping example with length 635 (seq_length=1024) +03/05/2022 00:27:13 - INFO - codeparrot_training - Step 29781: {'lr': 0.0004577759617327163, 'samples': 15248384, 'steps': 29781, 'loss/train': 1.901078462600708} +03/05/2022 00:27:16 - INFO - codeparrot_training - Step 29782: {'lr': 0.000457773010513834, 'samples': 15248896, 'steps': 29782, 'loss/train': 1.9935625791549683} +03/05/2022 00:27:18 - INFO - codeparrot_training - Skipping example with length 562 (seq_length=1024) +03/05/2022 00:27:22 - INFO - codeparrot_training - Step 29783: {'lr': 0.0004577700592013323, 'samples': 15249408, 'steps': 29783, 'loss/train': 1.8599544763565063} +03/05/2022 00:27:25 - INFO - codeparrot_training - Step 29784: {'lr': 0.0004577671077952127, 'samples': 15249920, 'steps': 29784, 'loss/train': 2.3417301177978516} +03/05/2022 00:27:27 - INFO - codeparrot_training - Skipping example with length 604 (seq_length=1024) +03/05/2022 00:27:30 - INFO - codeparrot_training - Step 29785: {'lr': 0.0004577641562954764, 'samples': 15250432, 'steps': 29785, 'loss/train': 1.7938324213027954} +03/05/2022 00:27:33 - INFO - codeparrot_training - Step 29786: {'lr': 0.00045776120470212477, 'samples': 15250944, 'steps': 29786, 'loss/train': 1.9551998376846313} +03/05/2022 00:27:35 - INFO - codeparrot_training - Skipping example with length 870 (seq_length=1024) +03/05/2022 00:27:38 - INFO - codeparrot_training - Step 29787: {'lr': 0.00045775825301515923, 'samples': 15251456, 'steps': 29787, 'loss/train': 1.5274182558059692} +03/05/2022 00:27:42 - INFO - codeparrot_training - Step 29788: {'lr': 0.00045775530123458096, 'samples': 15251968, 'steps': 29788, 'loss/train': 2.371561288833618} +03/05/2022 00:27:43 - INFO - codeparrot_training - Skipping example with length 177 (seq_length=1024) +03/05/2022 00:27:47 - INFO - codeparrot_training - Step 29789: {'lr': 0.00045775234936039133, 'samples': 15252480, 'steps': 29789, 'loss/train': 1.8421707153320312} +03/05/2022 00:27:50 - INFO - codeparrot_training - Step 29790: {'lr': 0.00045774939739259173, 'samples': 15252992, 'steps': 29790, 'loss/train': 2.2099366188049316} +03/05/2022 00:27:52 - INFO - codeparrot_training - Skipping example with length 957 (seq_length=1024) +03/05/2022 00:27:55 - INFO - codeparrot_training - Step 29791: {'lr': 0.0004577464453311835, 'samples': 15253504, 'steps': 29791, 'loss/train': 1.3780889511108398} +03/05/2022 00:27:58 - INFO - codeparrot_training - Step 29792: {'lr': 0.00045774349317616786, 'samples': 15254016, 'steps': 29792, 'loss/train': 1.6950688362121582} +03/05/2022 00:28:00 - INFO - codeparrot_training - Skipping example with length 487 (seq_length=1024) +03/05/2022 00:28:04 - INFO - codeparrot_training - Step 29793: {'lr': 0.00045774054092754624, 'samples': 15254528, 'steps': 29793, 'loss/train': 2.1432509422302246} +03/05/2022 00:28:07 - INFO - codeparrot_training - Step 29794: {'lr': 0.00045773758858531997, 'samples': 15255040, 'steps': 29794, 'loss/train': 1.6225379705429077} +03/05/2022 00:28:08 - INFO - codeparrot_training - Skipping example with length 792 (seq_length=1024) +03/05/2022 00:28:12 - INFO - codeparrot_training - Step 29795: {'lr': 0.0004577346361494903, 'samples': 15255552, 'steps': 29795, 'loss/train': 1.5391451120376587} +03/05/2022 00:28:15 - INFO - codeparrot_training - Step 29796: {'lr': 0.0004577316836200586, 'samples': 15256064, 'steps': 29796, 'loss/train': 1.2449394464492798} +03/05/2022 00:28:16 - INFO - codeparrot_training - Skipping example with length 178 (seq_length=1024) +03/05/2022 00:28:21 - INFO - codeparrot_training - Step 29797: {'lr': 0.0004577287309970262, 'samples': 15256576, 'steps': 29797, 'loss/train': 1.245635986328125} +03/05/2022 00:28:24 - INFO - codeparrot_training - Step 29798: {'lr': 0.0004577257782803945, 'samples': 15257088, 'steps': 29798, 'loss/train': 1.5356526374816895} +03/05/2022 00:28:24 - INFO - codeparrot_training - Skipping example with length 943 (seq_length=1024) +03/05/2022 00:28:29 - INFO - codeparrot_training - Step 29799: {'lr': 0.00045772282547016475, 'samples': 15257600, 'steps': 29799, 'loss/train': 1.12308931350708} +03/05/2022 00:28:32 - INFO - codeparrot_training - Step 29800: {'lr': 0.0004577198725663383, 'samples': 15258112, 'steps': 29800, 'loss/train': 1.3153727054595947} +03/05/2022 00:28:33 - INFO - codeparrot_training - Skipping example with length 562 (seq_length=1024) +03/05/2022 00:28:37 - INFO - codeparrot_training - Step 29801: {'lr': 0.00045771691956891645, 'samples': 15258624, 'steps': 29801, 'loss/train': 1.8563051223754883} +03/05/2022 00:28:40 - INFO - codeparrot_training - Step 29802: {'lr': 0.00045771396647790053, 'samples': 15259136, 'steps': 29802, 'loss/train': 0.5884143710136414} +03/05/2022 00:28:41 - INFO - codeparrot_training - Skipping example with length 670 (seq_length=1024) +03/05/2022 00:28:46 - INFO - codeparrot_training - Step 29803: {'lr': 0.00045771101329329195, 'samples': 15259648, 'steps': 29803, 'loss/train': 1.2342315912246704} +03/05/2022 00:28:49 - INFO - codeparrot_training - Step 29804: {'lr': 0.00045770806001509205, 'samples': 15260160, 'steps': 29804, 'loss/train': 2.640705108642578} +03/05/2022 00:28:50 - INFO - codeparrot_training - Skipping example with length 992 (seq_length=1024) +03/05/2022 00:28:54 - INFO - codeparrot_training - Step 29805: {'lr': 0.00045770510664330203, 'samples': 15260672, 'steps': 29805, 'loss/train': 2.3832898139953613} +03/05/2022 00:28:57 - INFO - codeparrot_training - Step 29806: {'lr': 0.0004577021531779233, 'samples': 15261184, 'steps': 29806, 'loss/train': 1.4057444334030151} +03/05/2022 00:28:58 - INFO - codeparrot_training - Skipping example with length 514 (seq_length=1024) +03/05/2022 00:29:03 - INFO - codeparrot_training - Step 29807: {'lr': 0.00045769919961895716, 'samples': 15261696, 'steps': 29807, 'loss/train': 0.47574424743652344} +03/05/2022 00:29:06 - INFO - codeparrot_training - Step 29808: {'lr': 0.000457696245966405, 'samples': 15262208, 'steps': 29808, 'loss/train': 1.0762819051742554} +03/05/2022 00:29:07 - INFO - codeparrot_training - Skipping example with length 358 (seq_length=1024) +03/05/2022 00:29:11 - INFO - codeparrot_training - Step 29809: {'lr': 0.0004576932922202681, 'samples': 15262720, 'steps': 29809, 'loss/train': 1.8180761337280273} +03/05/2022 00:29:14 - INFO - codeparrot_training - Step 29810: {'lr': 0.00045769033838054783, 'samples': 15263232, 'steps': 29810, 'loss/train': 1.8638055324554443} +03/05/2022 00:29:15 - INFO - codeparrot_training - Skipping example with length 782 (seq_length=1024) +03/05/2022 00:29:20 - INFO - codeparrot_training - Step 29811: {'lr': 0.0004576873844472455, 'samples': 15263744, 'steps': 29811, 'loss/train': 1.3484022617340088} +03/05/2022 00:29:23 - INFO - codeparrot_training - Step 29812: {'lr': 0.00045768443042036247, 'samples': 15264256, 'steps': 29812, 'loss/train': 1.748068928718567} +03/05/2022 00:29:23 - INFO - codeparrot_training - Skipping example with length 305 (seq_length=1024) +03/05/2022 00:29:28 - INFO - codeparrot_training - Step 29813: {'lr': 0.0004576814762999, 'samples': 15264768, 'steps': 29813, 'loss/train': 1.978164792060852} +03/05/2022 00:29:31 - INFO - codeparrot_training - Step 29814: {'lr': 0.00045767852208585945, 'samples': 15265280, 'steps': 29814, 'loss/train': 1.5703387260437012} +03/05/2022 00:29:31 - INFO - codeparrot_training - Skipping example with length 66 (seq_length=1024) +03/05/2022 00:29:36 - INFO - codeparrot_training - Step 29815: {'lr': 0.00045767556777824217, 'samples': 15265792, 'steps': 29815, 'loss/train': 1.2389801740646362} +03/05/2022 00:29:40 - INFO - codeparrot_training - Step 29816: {'lr': 0.00045767261337704946, 'samples': 15266304, 'steps': 29816, 'loss/train': 1.7846262454986572} +03/05/2022 00:29:40 - INFO - codeparrot_training - Skipping example with length 933 (seq_length=1024) +03/05/2022 00:29:45 - INFO - codeparrot_training - Step 29817: {'lr': 0.00045766965888228273, 'samples': 15266816, 'steps': 29817, 'loss/train': 0.7802075743675232} +03/05/2022 00:29:48 - INFO - codeparrot_training - Step 29818: {'lr': 0.00045766670429394317, 'samples': 15267328, 'steps': 29818, 'loss/train': 1.8447935581207275} +03/05/2022 00:29:48 - INFO - codeparrot_training - Skipping example with length 850 (seq_length=1024) +03/05/2022 00:29:54 - INFO - codeparrot_training - Step 29819: {'lr': 0.00045766374961203236, 'samples': 15267840, 'steps': 29819, 'loss/train': 1.563899040222168} +03/05/2022 00:29:57 - INFO - codeparrot_training - Step 29820: {'lr': 0.0004576607948365513, 'samples': 15268352, 'steps': 29820, 'loss/train': 0.20916035771369934} +03/05/2022 00:29:57 - INFO - codeparrot_training - Skipping example with length 198 (seq_length=1024) +03/05/2022 00:30:02 - INFO - codeparrot_training - Step 29821: {'lr': 0.0004576578399675015, 'samples': 15268864, 'steps': 29821, 'loss/train': 0.733149528503418} +03/05/2022 00:30:05 - INFO - codeparrot_training - Step 29822: {'lr': 0.00045765488500488437, 'samples': 15269376, 'steps': 29822, 'loss/train': 1.7109016180038452} +03/05/2022 00:30:06 - INFO - codeparrot_training - Skipping example with length 378 (seq_length=1024) +03/05/2022 00:30:10 - INFO - codeparrot_training - Step 29823: {'lr': 0.0004576519299487012, 'samples': 15269888, 'steps': 29823, 'loss/train': 1.7575656175613403} +03/05/2022 00:30:14 - INFO - codeparrot_training - Step 29824: {'lr': 0.00045764897479895315, 'samples': 15270400, 'steps': 29824, 'loss/train': 1.2565714120864868} +03/05/2022 00:30:14 - INFO - codeparrot_training - Skipping example with length 955 (seq_length=1024) +03/05/2022 00:30:19 - INFO - codeparrot_training - Step 29825: {'lr': 0.0004576460195556418, 'samples': 15270912, 'steps': 29825, 'loss/train': 1.1597185134887695} +03/05/2022 00:30:22 - INFO - codeparrot_training - Step 29826: {'lr': 0.0004576430642187682, 'samples': 15271424, 'steps': 29826, 'loss/train': 1.9400850534439087} +03/05/2022 00:30:22 - INFO - codeparrot_training - Skipping example with length 912 (seq_length=1024) +03/05/2022 00:30:27 - INFO - codeparrot_training - Step 29827: {'lr': 0.00045764010878833396, 'samples': 15271936, 'steps': 29827, 'loss/train': 2.006314992904663} +03/05/2022 00:30:30 - INFO - codeparrot_training - Step 29828: {'lr': 0.00045763715326434023, 'samples': 15272448, 'steps': 29828, 'loss/train': 1.271968126296997} +03/05/2022 00:30:30 - INFO - codeparrot_training - Skipping example with length 832 (seq_length=1024) +03/05/2022 00:30:36 - INFO - codeparrot_training - Step 29829: {'lr': 0.0004576341976467884, 'samples': 15272960, 'steps': 29829, 'loss/train': 2.4425759315490723} +03/05/2022 00:30:38 - INFO - codeparrot_training - Skipping example with length 224 (seq_length=1024) +03/05/2022 00:30:41 - INFO - codeparrot_training - Step 29830: {'lr': 0.00045763124193567983, 'samples': 15273472, 'steps': 29830, 'loss/train': 1.6264653205871582} +03/05/2022 00:30:44 - INFO - codeparrot_training - Step 29831: {'lr': 0.0004576282861310158, 'samples': 15273984, 'steps': 29831, 'loss/train': 1.4065762758255005} +03/05/2022 00:30:47 - INFO - codeparrot_training - Skipping example with length 753 (seq_length=1024) +03/05/2022 00:30:49 - INFO - codeparrot_training - Step 29832: {'lr': 0.00045762533023279773, 'samples': 15274496, 'steps': 29832, 'loss/train': 1.361911416053772} +03/05/2022 00:30:52 - INFO - codeparrot_training - Step 29833: {'lr': 0.00045762237424102687, 'samples': 15275008, 'steps': 29833, 'loss/train': 1.993345022201538} +03/05/2022 00:30:55 - INFO - codeparrot_training - Skipping example with length 701 (seq_length=1024) +03/05/2022 00:30:58 - INFO - codeparrot_training - Step 29834: {'lr': 0.0004576194181557045, 'samples': 15275520, 'steps': 29834, 'loss/train': 1.3203306198120117} +03/05/2022 00:31:01 - INFO - codeparrot_training - Step 29835: {'lr': 0.00045761646197683216, 'samples': 15276032, 'steps': 29835, 'loss/train': 1.8211629390716553} +03/05/2022 00:31:04 - INFO - codeparrot_training - Skipping example with length 221 (seq_length=1024) +03/05/2022 00:31:07 - INFO - codeparrot_training - Step 29836: {'lr': 0.00045761350570441096, 'samples': 15276544, 'steps': 29836, 'loss/train': 1.8432252407073975} +03/05/2022 00:31:10 - INFO - codeparrot_training - Step 29837: {'lr': 0.0004576105493384423, 'samples': 15277056, 'steps': 29837, 'loss/train': 2.1293351650238037} +03/05/2022 00:31:13 - INFO - codeparrot_training - Step 29838: {'lr': 0.00045760759287892755, 'samples': 15277568, 'steps': 29838, 'loss/train': 1.832953691482544} +03/05/2022 00:31:14 - INFO - codeparrot_training - Skipping example with length 801 (seq_length=1024) +03/05/2022 00:31:18 - INFO - codeparrot_training - Step 29839: {'lr': 0.000457604636325868, 'samples': 15278080, 'steps': 29839, 'loss/train': 1.5511794090270996} +03/05/2022 00:31:21 - INFO - codeparrot_training - Step 29840: {'lr': 0.00045760167967926504, 'samples': 15278592, 'steps': 29840, 'loss/train': 1.9861503839492798} +03/05/2022 00:31:22 - INFO - codeparrot_training - Skipping example with length 872 (seq_length=1024) +03/05/2022 00:31:27 - INFO - codeparrot_training - Step 29841: {'lr': 0.00045759872293911995, 'samples': 15279104, 'steps': 29841, 'loss/train': 1.2327396869659424} +03/05/2022 00:31:30 - INFO - codeparrot_training - Step 29842: {'lr': 0.00045759576610543407, 'samples': 15279616, 'steps': 29842, 'loss/train': 1.9080917835235596} +03/05/2022 00:31:30 - INFO - codeparrot_training - Skipping example with length 153 (seq_length=1024) +03/05/2022 00:31:35 - INFO - codeparrot_training - Step 29843: {'lr': 0.0004575928091782088, 'samples': 15280128, 'steps': 29843, 'loss/train': 1.903252363204956} +03/05/2022 00:31:38 - INFO - codeparrot_training - Step 29844: {'lr': 0.00045758985215744536, 'samples': 15280640, 'steps': 29844, 'loss/train': 1.5287150144577026} +03/05/2022 00:31:38 - INFO - codeparrot_training - Skipping example with length 678 (seq_length=1024) +03/05/2022 00:31:44 - INFO - codeparrot_training - Step 29845: {'lr': 0.0004575868950431452, 'samples': 15281152, 'steps': 29845, 'loss/train': 7.017806529998779} +03/05/2022 00:31:47 - INFO - codeparrot_training - Step 29846: {'lr': 0.0004575839378353095, 'samples': 15281664, 'steps': 29846, 'loss/train': 0.9437267184257507} +03/05/2022 00:31:48 - INFO - codeparrot_training - Skipping example with length 286 (seq_length=1024) +03/05/2022 00:31:52 - INFO - codeparrot_training - Step 29847: {'lr': 0.0004575809805339397, 'samples': 15282176, 'steps': 29847, 'loss/train': 6.641016483306885} +03/05/2022 00:31:55 - INFO - codeparrot_training - Step 29848: {'lr': 0.0004575780231390371, 'samples': 15282688, 'steps': 29848, 'loss/train': 1.6209219694137573} +03/05/2022 00:31:57 - INFO - codeparrot_training - Skipping example with length 742 (seq_length=1024) +03/05/2022 00:32:00 - INFO - codeparrot_training - Step 29849: {'lr': 0.0004575750656506031, 'samples': 15283200, 'steps': 29849, 'loss/train': 0.634583592414856} +03/05/2022 00:32:03 - INFO - codeparrot_training - Step 29850: {'lr': 0.00045757210806863895, 'samples': 15283712, 'steps': 29850, 'loss/train': 1.173998236656189} +03/05/2022 00:32:05 - INFO - codeparrot_training - Skipping example with length 904 (seq_length=1024) +03/05/2022 00:32:09 - INFO - codeparrot_training - Step 29851: {'lr': 0.0004575691503931461, 'samples': 15284224, 'steps': 29851, 'loss/train': 1.6008409261703491} +03/05/2022 00:32:12 - INFO - codeparrot_training - Step 29852: {'lr': 0.00045756619262412565, 'samples': 15284736, 'steps': 29852, 'loss/train': 1.976826548576355} +03/05/2022 00:32:13 - INFO - codeparrot_training - Skipping example with length 173 (seq_length=1024) +03/05/2022 00:32:17 - INFO - codeparrot_training - Step 29853: {'lr': 0.0004575632347615791, 'samples': 15285248, 'steps': 29853, 'loss/train': 0.8037336468696594} +03/05/2022 00:32:20 - INFO - codeparrot_training - Step 29854: {'lr': 0.0004575602768055078, 'samples': 15285760, 'steps': 29854, 'loss/train': 1.617974042892456} +03/05/2022 00:32:21 - INFO - codeparrot_training - Skipping example with length 768 (seq_length=1024) +03/05/2022 00:32:26 - INFO - codeparrot_training - Step 29855: {'lr': 0.00045755731875591303, 'samples': 15286272, 'steps': 29855, 'loss/train': 1.691144347190857} +03/05/2022 00:32:29 - INFO - codeparrot_training - Step 29856: {'lr': 0.0004575543606127961, 'samples': 15286784, 'steps': 29856, 'loss/train': 0.07484892010688782} +03/05/2022 00:32:30 - INFO - codeparrot_training - Skipping example with length 1004 (seq_length=1024) +03/05/2022 00:32:34 - INFO - codeparrot_training - Step 29857: {'lr': 0.0004575514023761585, 'samples': 15287296, 'steps': 29857, 'loss/train': 1.8722554445266724} +03/05/2022 00:32:37 - INFO - codeparrot_training - Step 29858: {'lr': 0.00045754844404600136, 'samples': 15287808, 'steps': 29858, 'loss/train': 1.8896632194519043} +03/05/2022 00:32:38 - INFO - codeparrot_training - Skipping example with length 295 (seq_length=1024) +03/05/2022 00:32:42 - INFO - codeparrot_training - Step 29859: {'lr': 0.00045754548562232605, 'samples': 15288320, 'steps': 29859, 'loss/train': 2.0749549865722656} +03/05/2022 00:32:46 - INFO - codeparrot_training - Step 29860: {'lr': 0.00045754252710513397, 'samples': 15288832, 'steps': 29860, 'loss/train': 1.7416914701461792} +03/05/2022 00:32:46 - INFO - codeparrot_training - Skipping example with length 722 (seq_length=1024) +03/05/2022 00:32:51 - INFO - codeparrot_training - Step 29861: {'lr': 0.00045753956849442647, 'samples': 15289344, 'steps': 29861, 'loss/train': 0.9600059390068054} +03/05/2022 00:32:54 - INFO - codeparrot_training - Step 29862: {'lr': 0.00045753660979020485, 'samples': 15289856, 'steps': 29862, 'loss/train': 2.124492645263672} +03/05/2022 00:32:55 - INFO - codeparrot_training - Skipping example with length 193 (seq_length=1024) +03/05/2022 00:32:59 - INFO - codeparrot_training - Step 29863: {'lr': 0.0004575336509924704, 'samples': 15290368, 'steps': 29863, 'loss/train': 2.2291338443756104} +03/05/2022 00:33:02 - INFO - codeparrot_training - Step 29864: {'lr': 0.0004575306921012245, 'samples': 15290880, 'steps': 29864, 'loss/train': 3.1992719173431396} +03/05/2022 00:33:03 - INFO - codeparrot_training - Skipping example with length 695 (seq_length=1024) +03/05/2022 00:33:08 - INFO - codeparrot_training - Step 29865: {'lr': 0.00045752773311646846, 'samples': 15291392, 'steps': 29865, 'loss/train': 1.3580504655838013} +03/05/2022 00:33:11 - INFO - codeparrot_training - Step 29866: {'lr': 0.0004575247740382037, 'samples': 15291904, 'steps': 29866, 'loss/train': 1.603963851928711} +03/05/2022 00:33:11 - INFO - codeparrot_training - Skipping example with length 636 (seq_length=1024) +03/05/2022 00:33:16 - INFO - codeparrot_training - Step 29867: {'lr': 0.0004575218148664314, 'samples': 15292416, 'steps': 29867, 'loss/train': 1.3678820133209229} +03/05/2022 00:33:19 - INFO - codeparrot_training - Step 29868: {'lr': 0.00045751885560115294, 'samples': 15292928, 'steps': 29868, 'loss/train': 1.4411547183990479} +03/05/2022 00:33:20 - INFO - codeparrot_training - Skipping example with length 323 (seq_length=1024) +03/05/2022 00:33:25 - INFO - codeparrot_training - Step 29869: {'lr': 0.0004575158962423698, 'samples': 15293440, 'steps': 29869, 'loss/train': 1.8885822296142578} +03/05/2022 00:33:28 - INFO - codeparrot_training - Step 29870: {'lr': 0.0004575129367900831, 'samples': 15293952, 'steps': 29870, 'loss/train': 7.112231254577637} +03/05/2022 00:33:28 - INFO - codeparrot_training - Skipping example with length 385 (seq_length=1024) +03/05/2022 00:33:33 - INFO - codeparrot_training - Step 29871: {'lr': 0.0004575099772442943, 'samples': 15294464, 'steps': 29871, 'loss/train': 1.7824209928512573} +03/05/2022 00:33:36 - INFO - codeparrot_training - Step 29872: {'lr': 0.0004575070176050047, 'samples': 15294976, 'steps': 29872, 'loss/train': 2.213797092437744} +03/05/2022 00:33:37 - INFO - codeparrot_training - Skipping example with length 599 (seq_length=1024) +03/05/2022 00:33:42 - INFO - codeparrot_training - Step 29873: {'lr': 0.00045750405787221566, 'samples': 15295488, 'steps': 29873, 'loss/train': 1.5352163314819336} +03/05/2022 00:33:45 - INFO - codeparrot_training - Step 29874: {'lr': 0.0004575010980459285, 'samples': 15296000, 'steps': 29874, 'loss/train': 1.5212042331695557} +03/05/2022 00:33:45 - INFO - codeparrot_training - Skipping example with length 473 (seq_length=1024) +03/05/2022 00:33:50 - INFO - codeparrot_training - Step 29875: {'lr': 0.0004574981381261445, 'samples': 15296512, 'steps': 29875, 'loss/train': 1.6703989505767822} +03/05/2022 00:33:53 - INFO - codeparrot_training - Step 29876: {'lr': 0.0004574951781128651, 'samples': 15297024, 'steps': 29876, 'loss/train': 1.5002238750457764} +03/05/2022 00:33:53 - INFO - codeparrot_training - Skipping example with length 403 (seq_length=1024) +03/05/2022 00:33:58 - INFO - codeparrot_training - Step 29877: {'lr': 0.0004574922180060915, 'samples': 15297536, 'steps': 29877, 'loss/train': 2.000081777572632} +03/05/2022 00:34:02 - INFO - codeparrot_training - Step 29878: {'lr': 0.0004574892578058252, 'samples': 15298048, 'steps': 29878, 'loss/train': 1.456671953201294} +03/05/2022 00:34:02 - INFO - codeparrot_training - Skipping example with length 797 (seq_length=1024) +03/05/2022 00:34:07 - INFO - codeparrot_training - Step 29879: {'lr': 0.0004574862975120674, 'samples': 15298560, 'steps': 29879, 'loss/train': 1.570813536643982} +03/05/2022 00:34:10 - INFO - codeparrot_training - Skipping example with length 969 (seq_length=1024) +03/05/2022 00:34:12 - INFO - codeparrot_training - Step 29880: {'lr': 0.0004574833371248195, 'samples': 15299072, 'steps': 29880, 'loss/train': 1.0843474864959717} +03/05/2022 00:34:15 - INFO - codeparrot_training - Step 29881: {'lr': 0.00045748037664408275, 'samples': 15299584, 'steps': 29881, 'loss/train': 1.7602812051773071} +03/05/2022 00:34:18 - INFO - codeparrot_training - Skipping example with length 127 (seq_length=1024) +03/05/2022 00:34:20 - INFO - codeparrot_training - Step 29882: {'lr': 0.0004574774160698586, 'samples': 15300096, 'steps': 29882, 'loss/train': 2.031615734100342} +03/05/2022 00:34:24 - INFO - codeparrot_training - Step 29883: {'lr': 0.00045747445540214826, 'samples': 15300608, 'steps': 29883, 'loss/train': 1.8771326541900635} +03/05/2022 00:34:26 - INFO - codeparrot_training - Skipping example with length 598 (seq_length=1024) +03/05/2022 00:34:29 - INFO - codeparrot_training - Step 29884: {'lr': 0.00045747149464095324, 'samples': 15301120, 'steps': 29884, 'loss/train': 1.2074363231658936} +03/05/2022 00:34:32 - INFO - codeparrot_training - Step 29885: {'lr': 0.00045746853378627467, 'samples': 15301632, 'steps': 29885, 'loss/train': 2.0074334144592285} +03/05/2022 00:34:35 - INFO - codeparrot_training - Skipping example with length 575 (seq_length=1024) +03/05/2022 00:34:37 - INFO - codeparrot_training - Step 29886: {'lr': 0.000457465572838114, 'samples': 15302144, 'steps': 29886, 'loss/train': 1.3116018772125244} +03/05/2022 00:34:41 - INFO - codeparrot_training - Step 29887: {'lr': 0.0004574626117964726, 'samples': 15302656, 'steps': 29887, 'loss/train': 1.064660906791687} +03/05/2022 00:34:44 - INFO - codeparrot_training - Step 29888: {'lr': 0.00045745965066135163, 'samples': 15303168, 'steps': 29888, 'loss/train': 1.8852994441986084} +03/05/2022 00:34:44 - INFO - codeparrot_training - Skipping example with length 150 (seq_length=1024) +03/05/2022 00:34:49 - INFO - codeparrot_training - Step 29889: {'lr': 0.00045745668943275266, 'samples': 15303680, 'steps': 29889, 'loss/train': 1.6285513639450073} +03/05/2022 00:34:52 - INFO - codeparrot_training - Step 29890: {'lr': 0.00045745372811067687, 'samples': 15304192, 'steps': 29890, 'loss/train': 1.9510889053344727} +03/05/2022 00:34:52 - INFO - codeparrot_training - Skipping example with length 929 (seq_length=1024) +03/05/2022 00:34:58 - INFO - codeparrot_training - Step 29891: {'lr': 0.00045745076669512566, 'samples': 15304704, 'steps': 29891, 'loss/train': 2.5489838123321533} +03/05/2022 00:35:01 - INFO - codeparrot_training - Step 29892: {'lr': 0.0004574478051861003, 'samples': 15305216, 'steps': 29892, 'loss/train': 1.6912596225738525} +03/05/2022 00:35:01 - INFO - codeparrot_training - Skipping example with length 957 (seq_length=1024) +03/05/2022 00:35:06 - INFO - codeparrot_training - Step 29893: {'lr': 0.00045744484358360216, 'samples': 15305728, 'steps': 29893, 'loss/train': 1.9176524877548218} +03/05/2022 00:35:09 - INFO - codeparrot_training - Skipping example with length 93 (seq_length=1024) +03/05/2022 00:35:11 - INFO - codeparrot_training - Step 29894: {'lr': 0.0004574418818876326, 'samples': 15306240, 'steps': 29894, 'loss/train': 1.7842119932174683} +03/05/2022 00:35:14 - INFO - codeparrot_training - Step 29895: {'lr': 0.0004574389200981929, 'samples': 15306752, 'steps': 29895, 'loss/train': 2.377946615219116} +03/05/2022 00:35:17 - INFO - codeparrot_training - Skipping example with length 84 (seq_length=1024) +03/05/2022 00:35:20 - INFO - codeparrot_training - Step 29896: {'lr': 0.00045743595821528437, 'samples': 15307264, 'steps': 29896, 'loss/train': 1.564409852027893} +03/05/2022 00:35:23 - INFO - codeparrot_training - Step 29897: {'lr': 0.0004574329962389085, 'samples': 15307776, 'steps': 29897, 'loss/train': 1.9296013116836548} +03/05/2022 00:35:26 - INFO - codeparrot_training - Skipping example with length 966 (seq_length=1024) +03/05/2022 00:35:28 - INFO - codeparrot_training - Step 29898: {'lr': 0.0004574300341690665, 'samples': 15308288, 'steps': 29898, 'loss/train': 0.28194689750671387} +03/05/2022 00:35:31 - INFO - codeparrot_training - Step 29899: {'lr': 0.00045742707200575975, 'samples': 15308800, 'steps': 29899, 'loss/train': 2.3497543334960938} +03/05/2022 00:35:34 - INFO - codeparrot_training - Skipping example with length 483 (seq_length=1024) +03/05/2022 00:35:37 - INFO - codeparrot_training - Step 29900: {'lr': 0.00045742410974898947, 'samples': 15309312, 'steps': 29900, 'loss/train': 1.952285885810852} +03/05/2022 00:35:40 - INFO - codeparrot_training - Step 29901: {'lr': 0.0004574211473987571, 'samples': 15309824, 'steps': 29901, 'loss/train': 1.614179015159607} +03/05/2022 00:35:42 - INFO - codeparrot_training - Skipping example with length 702 (seq_length=1024) +03/05/2022 00:35:45 - INFO - codeparrot_training - Step 29902: {'lr': 0.00045741818495506403, 'samples': 15310336, 'steps': 29902, 'loss/train': 1.7428537607192993} +03/05/2022 00:35:48 - INFO - codeparrot_training - Step 29903: {'lr': 0.0004574152224179115, 'samples': 15310848, 'steps': 29903, 'loss/train': 1.290940284729004} +03/05/2022 00:35:50 - INFO - codeparrot_training - Skipping example with length 814 (seq_length=1024) +03/05/2022 00:35:54 - INFO - codeparrot_training - Step 29904: {'lr': 0.0004574122597873008, 'samples': 15311360, 'steps': 29904, 'loss/train': 1.9227781295776367} +03/05/2022 00:35:57 - INFO - codeparrot_training - Step 29905: {'lr': 0.0004574092970632335, 'samples': 15311872, 'steps': 29905, 'loss/train': 1.6412632465362549} +03/05/2022 00:35:59 - INFO - codeparrot_training - Skipping example with length 244 (seq_length=1024) +03/05/2022 00:36:02 - INFO - codeparrot_training - Step 29906: {'lr': 0.00045740633424571064, 'samples': 15312384, 'steps': 29906, 'loss/train': 1.2250454425811768} +03/05/2022 00:36:05 - INFO - codeparrot_training - Step 29907: {'lr': 0.00045740337133473374, 'samples': 15312896, 'steps': 29907, 'loss/train': 1.7881981134414673} +03/05/2022 00:36:07 - INFO - codeparrot_training - Skipping example with length 939 (seq_length=1024) +03/05/2022 00:36:11 - INFO - codeparrot_training - Step 29908: {'lr': 0.00045740040833030404, 'samples': 15313408, 'steps': 29908, 'loss/train': 1.8580331802368164} +03/05/2022 00:36:14 - INFO - codeparrot_training - Step 29909: {'lr': 0.00045739744523242294, 'samples': 15313920, 'steps': 29909, 'loss/train': 2.1727869510650635} +03/05/2022 00:36:17 - INFO - codeparrot_training - Step 29910: {'lr': 0.0004573944820410918, 'samples': 15314432, 'steps': 29910, 'loss/train': 1.52242910861969} +03/05/2022 00:36:17 - INFO - codeparrot_training - Skipping example with length 734 (seq_length=1024) +03/05/2022 00:36:22 - INFO - codeparrot_training - Step 29911: {'lr': 0.0004573915187563118, 'samples': 15314944, 'steps': 29911, 'loss/train': 1.4623748064041138} +03/05/2022 00:36:26 - INFO - codeparrot_training - Step 29912: {'lr': 0.00045738855537808443, 'samples': 15315456, 'steps': 29912, 'loss/train': 0.7852261066436768} +03/05/2022 00:36:26 - INFO - codeparrot_training - Skipping example with length 228 (seq_length=1024) +03/05/2022 00:36:31 - INFO - codeparrot_training - Step 29913: {'lr': 0.000457385591906411, 'samples': 15315968, 'steps': 29913, 'loss/train': 2.221421480178833} +03/05/2022 00:36:34 - INFO - codeparrot_training - Step 29914: {'lr': 0.00045738262834129283, 'samples': 15316480, 'steps': 29914, 'loss/train': 2.536991596221924} +03/05/2022 00:36:34 - INFO - codeparrot_training - Skipping example with length 665 (seq_length=1024) +03/05/2022 00:36:39 - INFO - codeparrot_training - Step 29915: {'lr': 0.0004573796646827312, 'samples': 15316992, 'steps': 29915, 'loss/train': 1.4360783100128174} +03/05/2022 00:36:42 - INFO - codeparrot_training - Skipping example with length 305 (seq_length=1024) +03/05/2022 00:36:45 - INFO - codeparrot_training - Step 29916: {'lr': 0.0004573767009307276, 'samples': 15317504, 'steps': 29916, 'loss/train': 1.3504279851913452} +03/05/2022 00:36:48 - INFO - codeparrot_training - Step 29917: {'lr': 0.0004573737370852831, 'samples': 15318016, 'steps': 29917, 'loss/train': 1.5515646934509277} +03/05/2022 00:36:51 - INFO - codeparrot_training - Skipping example with length 306 (seq_length=1024) +03/05/2022 00:36:53 - INFO - codeparrot_training - Step 29918: {'lr': 0.0004573707731463993, 'samples': 15318528, 'steps': 29918, 'loss/train': 2.1219840049743652} +03/05/2022 00:36:57 - INFO - codeparrot_training - Step 29919: {'lr': 0.00045736780911407736, 'samples': 15319040, 'steps': 29919, 'loss/train': 6.804988861083984} +03/05/2022 00:37:00 - INFO - codeparrot_training - Step 29920: {'lr': 0.00045736484498831877, 'samples': 15319552, 'steps': 29920, 'loss/train': 1.5250482559204102} +03/05/2022 00:37:00 - INFO - codeparrot_training - Skipping example with length 48 (seq_length=1024) +03/05/2022 00:37:05 - INFO - codeparrot_training - Step 29921: {'lr': 0.0004573618807691248, 'samples': 15320064, 'steps': 29921, 'loss/train': 0.03833547234535217} +03/05/2022 00:37:09 - INFO - codeparrot_training - Step 29922: {'lr': 0.0004573589164564966, 'samples': 15320576, 'steps': 29922, 'loss/train': 2.3265116214752197} +03/05/2022 00:37:10 - INFO - codeparrot_training - Skipping example with length 15 (seq_length=1024) +03/05/2022 00:37:14 - INFO - codeparrot_training - Step 29923: {'lr': 0.00045735595205043583, 'samples': 15321088, 'steps': 29923, 'loss/train': 1.54251229763031} +03/05/2022 00:37:17 - INFO - codeparrot_training - Step 29924: {'lr': 0.00045735298755094364, 'samples': 15321600, 'steps': 29924, 'loss/train': 1.8605268001556396} +03/05/2022 00:37:19 - INFO - codeparrot_training - Skipping example with length 672 (seq_length=1024) +03/05/2022 00:37:22 - INFO - codeparrot_training - Step 29925: {'lr': 0.00045735002295802137, 'samples': 15322112, 'steps': 29925, 'loss/train': 1.5954961776733398} +03/05/2022 00:37:25 - INFO - codeparrot_training - Step 29926: {'lr': 0.00045734705827167035, 'samples': 15322624, 'steps': 29926, 'loss/train': 1.425561547279358} +03/05/2022 00:37:27 - INFO - codeparrot_training - Skipping example with length 486 (seq_length=1024) +03/05/2022 00:37:31 - INFO - codeparrot_training - Step 29927: {'lr': 0.000457344093491892, 'samples': 15323136, 'steps': 29927, 'loss/train': 2.0718324184417725} +03/05/2022 00:37:34 - INFO - codeparrot_training - Step 29928: {'lr': 0.00045734112861868753, 'samples': 15323648, 'steps': 29928, 'loss/train': 1.6861531734466553} +03/05/2022 00:37:35 - INFO - codeparrot_training - Skipping example with length 174 (seq_length=1024) +03/05/2022 00:37:39 - INFO - codeparrot_training - Step 29929: {'lr': 0.0004573381636520584, 'samples': 15324160, 'steps': 29929, 'loss/train': 1.9465038776397705} +03/05/2022 00:37:42 - INFO - codeparrot_training - Step 29930: {'lr': 0.0004573351985920059, 'samples': 15324672, 'steps': 29930, 'loss/train': 1.902191400527954} +03/05/2022 00:37:43 - INFO - codeparrot_training - Skipping example with length 532 (seq_length=1024) +03/05/2022 00:37:48 - INFO - codeparrot_training - Step 29931: {'lr': 0.0004573322334385314, 'samples': 15325184, 'steps': 29931, 'loss/train': 1.6553391218185425} +03/05/2022 00:37:51 - INFO - codeparrot_training - Step 29932: {'lr': 0.0004573292681916361, 'samples': 15325696, 'steps': 29932, 'loss/train': 1.0357544422149658} +03/05/2022 00:37:52 - INFO - codeparrot_training - Skipping example with length 535 (seq_length=1024) +03/05/2022 00:37:56 - INFO - codeparrot_training - Step 29933: {'lr': 0.0004573263028513214, 'samples': 15326208, 'steps': 29933, 'loss/train': 2.2017669677734375} +03/05/2022 00:37:59 - INFO - codeparrot_training - Step 29934: {'lr': 0.0004573233374175888, 'samples': 15326720, 'steps': 29934, 'loss/train': 1.7318389415740967} +03/05/2022 00:38:00 - INFO - codeparrot_training - Skipping example with length 816 (seq_length=1024) +03/05/2022 00:38:04 - INFO - codeparrot_training - Step 29935: {'lr': 0.0004573203718904394, 'samples': 15327232, 'steps': 29935, 'loss/train': 1.3272916078567505} +03/05/2022 00:38:08 - INFO - codeparrot_training - Step 29936: {'lr': 0.00045731740626987473, 'samples': 15327744, 'steps': 29936, 'loss/train': 1.4265285730361938} +03/05/2022 00:38:08 - INFO - codeparrot_training - Skipping example with length 64 (seq_length=1024) +03/05/2022 00:38:13 - INFO - codeparrot_training - Step 29937: {'lr': 0.00045731444055589597, 'samples': 15328256, 'steps': 29937, 'loss/train': 0.8252424001693726} +03/05/2022 00:38:16 - INFO - codeparrot_training - Step 29938: {'lr': 0.0004573114747485045, 'samples': 15328768, 'steps': 29938, 'loss/train': 1.7402069568634033} +03/05/2022 00:38:17 - INFO - codeparrot_training - Skipping example with length 785 (seq_length=1024) +03/05/2022 00:38:21 - INFO - codeparrot_training - Step 29939: {'lr': 0.0004573085088477017, 'samples': 15329280, 'steps': 29939, 'loss/train': 1.616119384765625} +03/05/2022 00:38:24 - INFO - codeparrot_training - Step 29940: {'lr': 0.0004573055428534889, 'samples': 15329792, 'steps': 29940, 'loss/train': 1.3272995948791504} +03/05/2022 00:38:25 - INFO - codeparrot_training - Skipping example with length 467 (seq_length=1024) +03/05/2022 00:38:30 - INFO - codeparrot_training - Step 29941: {'lr': 0.00045730257676586747, 'samples': 15330304, 'steps': 29941, 'loss/train': 1.352555274963379} +03/05/2022 00:38:33 - INFO - codeparrot_training - Step 29942: {'lr': 0.0004572996105848386, 'samples': 15330816, 'steps': 29942, 'loss/train': 1.4182147979736328} +03/05/2022 00:38:34 - INFO - codeparrot_training - Skipping example with length 75 (seq_length=1024) +03/05/2022 00:38:38 - INFO - codeparrot_training - Step 29943: {'lr': 0.0004572966443104038, 'samples': 15331328, 'steps': 29943, 'loss/train': 1.0883547067642212} +03/05/2022 00:38:41 - INFO - codeparrot_training - Step 29944: {'lr': 0.00045729367794256434, 'samples': 15331840, 'steps': 29944, 'loss/train': 1.725461483001709} +03/05/2022 00:38:42 - INFO - codeparrot_training - Skipping example with length 191 (seq_length=1024) +03/05/2022 00:38:47 - INFO - codeparrot_training - Step 29945: {'lr': 0.0004572907114813215, 'samples': 15332352, 'steps': 29945, 'loss/train': 2.173579216003418} +03/05/2022 00:38:50 - INFO - codeparrot_training - Step 29946: {'lr': 0.0004572877449266767, 'samples': 15332864, 'steps': 29946, 'loss/train': 1.618887186050415} +03/05/2022 00:38:50 - INFO - codeparrot_training - Skipping example with length 317 (seq_length=1024) +03/05/2022 00:38:55 - INFO - codeparrot_training - Step 29947: {'lr': 0.0004572847782786312, 'samples': 15333376, 'steps': 29947, 'loss/train': 1.1793477535247803} +03/05/2022 00:38:58 - INFO - codeparrot_training - Step 29948: {'lr': 0.0004572818115371864, 'samples': 15333888, 'steps': 29948, 'loss/train': 1.466715693473816} +03/05/2022 00:38:58 - INFO - codeparrot_training - Skipping example with length 419 (seq_length=1024) +03/05/2022 00:39:03 - INFO - codeparrot_training - Step 29949: {'lr': 0.0004572788447023436, 'samples': 15334400, 'steps': 29949, 'loss/train': 1.5073846578598022} +03/05/2022 00:39:06 - INFO - codeparrot_training - Step 29950: {'lr': 0.00045727587777410415, 'samples': 15334912, 'steps': 29950, 'loss/train': 1.3496739864349365} +03/05/2022 00:39:07 - INFO - codeparrot_training - Skipping example with length 860 (seq_length=1024) +03/05/2022 00:39:12 - INFO - codeparrot_training - Step 29951: {'lr': 0.00045727291075246937, 'samples': 15335424, 'steps': 29951, 'loss/train': 1.7845113277435303} +03/05/2022 00:39:15 - INFO - codeparrot_training - Step 29952: {'lr': 0.0004572699436374407, 'samples': 15335936, 'steps': 29952, 'loss/train': 1.0496718883514404} +03/05/2022 00:39:15 - INFO - codeparrot_training - Skipping example with length 220 (seq_length=1024) +03/05/2022 00:39:20 - INFO - codeparrot_training - Step 29953: {'lr': 0.00045726697642901925, 'samples': 15336448, 'steps': 29953, 'loss/train': 2.0062108039855957} +03/05/2022 00:39:23 - INFO - codeparrot_training - Skipping example with length 598 (seq_length=1024) +03/05/2022 00:39:25 - INFO - codeparrot_training - Step 29954: {'lr': 0.0004572640091272066, 'samples': 15336960, 'steps': 29954, 'loss/train': 1.34402334690094} +03/05/2022 00:39:28 - INFO - codeparrot_training - Step 29955: {'lr': 0.000457261041732004, 'samples': 15337472, 'steps': 29955, 'loss/train': 2.5742132663726807} +03/05/2022 00:39:31 - INFO - codeparrot_training - Skipping example with length 79 (seq_length=1024) +03/05/2022 00:39:34 - INFO - codeparrot_training - Step 29956: {'lr': 0.0004572580742434127, 'samples': 15337984, 'steps': 29956, 'loss/train': 2.3087236881256104} +03/05/2022 00:39:37 - INFO - codeparrot_training - Step 29957: {'lr': 0.00045725510666143424, 'samples': 15338496, 'steps': 29957, 'loss/train': 0.8209245800971985} +03/05/2022 00:39:40 - INFO - codeparrot_training - Skipping example with length 675 (seq_length=1024) +03/05/2022 00:39:42 - INFO - codeparrot_training - Step 29958: {'lr': 0.0004572521389860697, 'samples': 15339008, 'steps': 29958, 'loss/train': 1.744937539100647} +03/05/2022 00:39:45 - INFO - codeparrot_training - Step 29959: {'lr': 0.00045724917121732055, 'samples': 15339520, 'steps': 29959, 'loss/train': 1.4647159576416016} +03/05/2022 00:39:48 - INFO - codeparrot_training - Skipping example with length 459 (seq_length=1024) +03/05/2022 00:39:51 - INFO - codeparrot_training - Step 29960: {'lr': 0.0004572462033551882, 'samples': 15340032, 'steps': 29960, 'loss/train': 1.565503716468811} +03/05/2022 00:39:54 - INFO - codeparrot_training - Step 29961: {'lr': 0.00045724323539967385, 'samples': 15340544, 'steps': 29961, 'loss/train': 1.8964900970458984} +03/05/2022 00:39:56 - INFO - codeparrot_training - Skipping example with length 632 (seq_length=1024) +03/05/2022 00:39:59 - INFO - codeparrot_training - Step 29962: {'lr': 0.00045724026735077886, 'samples': 15341056, 'steps': 29962, 'loss/train': 2.7797446250915527} +03/05/2022 00:40:02 - INFO - codeparrot_training - Step 29963: {'lr': 0.00045723729920850464, 'samples': 15341568, 'steps': 29963, 'loss/train': 0.4381738603115082} +03/05/2022 00:40:05 - INFO - codeparrot_training - Skipping example with length 314 (seq_length=1024) +03/05/2022 00:40:08 - INFO - codeparrot_training - Step 29964: {'lr': 0.00045723433097285247, 'samples': 15342080, 'steps': 29964, 'loss/train': 1.5689224004745483} +03/05/2022 00:40:11 - INFO - codeparrot_training - Step 29965: {'lr': 0.0004572313626438238, 'samples': 15342592, 'steps': 29965, 'loss/train': 1.2798312902450562} +03/05/2022 00:40:14 - INFO - codeparrot_training - Step 29966: {'lr': 0.00045722839422141984, 'samples': 15343104, 'steps': 29966, 'loss/train': 2.160632610321045} +03/05/2022 00:40:14 - INFO - codeparrot_training - Skipping example with length 445 (seq_length=1024) +03/05/2022 00:40:20 - INFO - codeparrot_training - Step 29967: {'lr': 0.000457225425705642, 'samples': 15343616, 'steps': 29967, 'loss/train': 1.5215989351272583} +03/05/2022 00:40:23 - INFO - codeparrot_training - Step 29968: {'lr': 0.0004572224570964915, 'samples': 15344128, 'steps': 29968, 'loss/train': 1.860026240348816} +03/05/2022 00:40:23 - INFO - codeparrot_training - Skipping example with length 471 (seq_length=1024) +03/05/2022 00:40:28 - INFO - codeparrot_training - Step 29969: {'lr': 0.0004572194883939697, 'samples': 15344640, 'steps': 29969, 'loss/train': 1.383028507232666} +03/05/2022 00:40:31 - INFO - codeparrot_training - Step 29970: {'lr': 0.0004572165195980781, 'samples': 15345152, 'steps': 29970, 'loss/train': 1.851492166519165} +03/05/2022 00:40:31 - INFO - codeparrot_training - Skipping example with length 577 (seq_length=1024) +03/05/2022 00:40:36 - INFO - codeparrot_training - Step 29971: {'lr': 0.0004572135507088179, 'samples': 15345664, 'steps': 29971, 'loss/train': 1.6599832773208618} +03/05/2022 00:40:40 - INFO - codeparrot_training - Step 29972: {'lr': 0.00045721058172619043, 'samples': 15346176, 'steps': 29972, 'loss/train': 1.6131421327590942} +03/05/2022 00:40:40 - INFO - codeparrot_training - Skipping example with length 447 (seq_length=1024) +03/05/2022 00:40:45 - INFO - codeparrot_training - Step 29973: {'lr': 0.0004572076126501972, 'samples': 15346688, 'steps': 29973, 'loss/train': 1.864082932472229} +03/05/2022 00:40:48 - INFO - codeparrot_training - Step 29974: {'lr': 0.00045720464348083937, 'samples': 15347200, 'steps': 29974, 'loss/train': 2.0404343605041504} +03/05/2022 00:40:49 - INFO - codeparrot_training - Skipping example with length 834 (seq_length=1024) +03/05/2022 00:40:53 - INFO - codeparrot_training - Step 29975: {'lr': 0.0004572016742181182, 'samples': 15347712, 'steps': 29975, 'loss/train': 1.6090868711471558} +03/05/2022 00:40:57 - INFO - codeparrot_training - Step 29976: {'lr': 0.0004571987048620353, 'samples': 15348224, 'steps': 29976, 'loss/train': 1.8193855285644531} +03/05/2022 00:40:58 - INFO - codeparrot_training - Skipping example with length 556 (seq_length=1024) +03/05/2022 00:41:02 - INFO - codeparrot_training - Step 29977: {'lr': 0.0004571957354125918, 'samples': 15348736, 'steps': 29977, 'loss/train': 3.0422651767730713} +03/05/2022 00:41:05 - INFO - codeparrot_training - Step 29978: {'lr': 0.00045719276586978907, 'samples': 15349248, 'steps': 29978, 'loss/train': 2.032134532928467} +03/05/2022 00:41:06 - INFO - codeparrot_training - Skipping example with length 750 (seq_length=1024) +03/05/2022 00:41:10 - INFO - codeparrot_training - Step 29979: {'lr': 0.00045718979623362855, 'samples': 15349760, 'steps': 29979, 'loss/train': 2.4302265644073486} +03/05/2022 00:41:14 - INFO - codeparrot_training - Step 29980: {'lr': 0.00045718682650411146, 'samples': 15350272, 'steps': 29980, 'loss/train': 0.4171745181083679} +03/05/2022 00:41:14 - INFO - codeparrot_training - Skipping example with length 603 (seq_length=1024) +03/05/2022 00:41:19 - INFO - codeparrot_training - Step 29981: {'lr': 0.0004571838566812392, 'samples': 15350784, 'steps': 29981, 'loss/train': 1.4371411800384521} +03/05/2022 00:41:22 - INFO - codeparrot_training - Step 29982: {'lr': 0.00045718088676501305, 'samples': 15351296, 'steps': 29982, 'loss/train': 1.8016290664672852} +03/05/2022 00:41:23 - INFO - codeparrot_training - Skipping example with length 29 (seq_length=1024) +03/05/2022 00:41:27 - INFO - codeparrot_training - Step 29983: {'lr': 0.0004571779167554344, 'samples': 15351808, 'steps': 29983, 'loss/train': 0.9044046998023987} +03/05/2022 00:41:30 - INFO - codeparrot_training - Step 29984: {'lr': 0.0004571749466525046, 'samples': 15352320, 'steps': 29984, 'loss/train': 1.9681452512741089} +03/05/2022 00:41:31 - INFO - codeparrot_training - Skipping example with length 599 (seq_length=1024) +03/05/2022 00:41:36 - INFO - codeparrot_training - Step 29985: {'lr': 0.000457171976456225, 'samples': 15352832, 'steps': 29985, 'loss/train': 1.8412591218948364} +03/05/2022 00:41:39 - INFO - codeparrot_training - Step 29986: {'lr': 0.00045716900616659686, 'samples': 15353344, 'steps': 29986, 'loss/train': 1.3607157468795776} +03/05/2022 00:41:39 - INFO - codeparrot_training - Skipping example with length 620 (seq_length=1024) +03/05/2022 00:41:44 - INFO - codeparrot_training - Step 29987: {'lr': 0.00045716603578362157, 'samples': 15353856, 'steps': 29987, 'loss/train': 1.7665804624557495} +03/05/2022 00:41:47 - INFO - codeparrot_training - Step 29988: {'lr': 0.00045716306530730043, 'samples': 15354368, 'steps': 29988, 'loss/train': 1.1285563707351685} +03/05/2022 00:41:48 - INFO - codeparrot_training - Skipping example with length 189 (seq_length=1024) +03/05/2022 00:41:53 - INFO - codeparrot_training - Step 29989: {'lr': 0.00045716009473763486, 'samples': 15354880, 'steps': 29989, 'loss/train': 2.236178159713745} +03/05/2022 00:41:56 - INFO - codeparrot_training - Step 29990: {'lr': 0.0004571571240746262, 'samples': 15355392, 'steps': 29990, 'loss/train': 2.2003471851348877} +03/05/2022 00:41:56 - INFO - codeparrot_training - Skipping example with length 156 (seq_length=1024) +03/05/2022 00:42:01 - INFO - codeparrot_training - Step 29991: {'lr': 0.00045715415331827564, 'samples': 15355904, 'steps': 29991, 'loss/train': 1.7204599380493164} +03/05/2022 00:42:04 - INFO - codeparrot_training - Step 29992: {'lr': 0.00045715118246858466, 'samples': 15356416, 'steps': 29992, 'loss/train': 2.0565106868743896} +03/05/2022 00:42:05 - INFO - codeparrot_training - Skipping example with length 284 (seq_length=1024) +03/05/2022 00:42:09 - INFO - codeparrot_training - Step 29993: {'lr': 0.0004571482115255545, 'samples': 15356928, 'steps': 29993, 'loss/train': 1.5984420776367188} +03/05/2022 00:42:13 - INFO - codeparrot_training - Step 29994: {'lr': 0.0004571452404891866, 'samples': 15357440, 'steps': 29994, 'loss/train': 1.843674659729004} +03/05/2022 00:42:13 - INFO - codeparrot_training - Skipping example with length 661 (seq_length=1024) +03/05/2022 00:42:18 - INFO - codeparrot_training - Step 29995: {'lr': 0.0004571422693594822, 'samples': 15357952, 'steps': 29995, 'loss/train': 1.3999497890472412} +03/05/2022 00:42:21 - INFO - codeparrot_training - Step 29996: {'lr': 0.00045713929813644274, 'samples': 15358464, 'steps': 29996, 'loss/train': 1.836114525794983} +03/05/2022 00:42:22 - INFO - codeparrot_training - Skipping example with length 433 (seq_length=1024) +03/05/2022 00:42:26 - INFO - codeparrot_training - Step 29997: {'lr': 0.0004571363268200695, 'samples': 15358976, 'steps': 29997, 'loss/train': 1.7641074657440186} +03/05/2022 00:42:30 - INFO - codeparrot_training - Step 29998: {'lr': 0.0004571333554103638, 'samples': 15359488, 'steps': 29998, 'loss/train': 1.455445408821106} +03/05/2022 00:42:30 - INFO - codeparrot_training - Skipping example with length 520 (seq_length=1024) +03/05/2022 00:42:35 - INFO - codeparrot_training - Step 29999: {'lr': 0.0004571303839073271, 'samples': 15360000, 'steps': 29999, 'loss/train': 1.5493794679641724} +03/05/2022 00:42:35 - INFO - codeparrot_training - Evaluating and saving model checkpoint