diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -30468,3 +30468,7453 @@ Use FP16 precision: False 03/04/2022 12:56:50 - INFO - codeparrot_training - Skipping example with length 276 (seq_length=1024) 03/04/2022 12:56:55 - INFO - codeparrot_training - Step 19999: {'lr': 0.00048197423542587143, 'samples': 10240000, 'steps': 19999, 'loss/train': 1.2787634134292603} 03/04/2022 12:56:55 - INFO - codeparrot_training - Evaluating and saving model checkpoint +03/04/2022 12:57:09 - WARNING - huggingface_hub.repository - Several commits (4) will be pushed upstream. +03/04/2022 12:57:09 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +03/04/2022 12:57:32 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/cm_code_clippy + 7d4fba8..3033721 glowing-puddle-3 -> glowing-puddle-3 + +03/04/2022 12:57:36 - INFO - codeparrot_training - Step 20000: {'lr': 0.0004819722568241274, 'samples': 10240512, 'steps': 20000, 'loss/train': 1.6575286388397217} +03/04/2022 12:57:36 - INFO - codeparrot_training - Skipping example with length 190 (seq_length=1024) +03/04/2022 12:57:41 - INFO - codeparrot_training - Step 20001: {'lr': 0.0004819702781178601, 'samples': 10241024, 'steps': 20001, 'loss/train': 2.603853702545166} +03/04/2022 12:57:44 - INFO - codeparrot_training - Step 20002: {'lr': 0.00048196829930707066, 'samples': 10241536, 'steps': 20002, 'loss/train': 1.8656319379806519} +03/04/2022 12:57:45 - INFO - codeparrot_training - Skipping example with length 382 (seq_length=1024) +03/04/2022 12:57:50 - INFO - codeparrot_training - Step 20003: {'lr': 0.0004819663203917599, 'samples': 10242048, 'steps': 20003, 'loss/train': 1.3059179782867432} +03/04/2022 12:57:53 - INFO - codeparrot_training - Step 20004: {'lr': 0.0004819643413719287, 'samples': 10242560, 'steps': 20004, 'loss/train': 1.2890772819519043} +03/04/2022 12:57:55 - INFO - codeparrot_training - Skipping example with length 258 (seq_length=1024) +03/04/2022 12:57:59 - INFO - codeparrot_training - Step 20005: {'lr': 0.0004819623622475779, 'samples': 10243072, 'steps': 20005, 'loss/train': 2.073068380355835} +03/04/2022 12:58:02 - INFO - codeparrot_training - Step 20006: {'lr': 0.00048196038301870847, 'samples': 10243584, 'steps': 20006, 'loss/train': 2.0313401222229004} +03/04/2022 12:58:04 - INFO - codeparrot_training - Skipping example with length 682 (seq_length=1024) +03/04/2022 12:58:07 - INFO - codeparrot_training - Step 20007: {'lr': 0.0004819584036853212, 'samples': 10244096, 'steps': 20007, 'loss/train': 2.538348913192749} +03/04/2022 12:58:10 - INFO - codeparrot_training - Step 20008: {'lr': 0.00048195642424741716, 'samples': 10244608, 'steps': 20008, 'loss/train': 2.657820224761963} +03/04/2022 12:58:13 - INFO - codeparrot_training - Skipping example with length 613 (seq_length=1024) +03/04/2022 12:58:16 - INFO - codeparrot_training - Step 20009: {'lr': 0.00048195444470499704, 'samples': 10245120, 'steps': 20009, 'loss/train': 2.204522132873535} +03/04/2022 12:58:19 - INFO - codeparrot_training - Step 20010: {'lr': 0.0004819524650580619, 'samples': 10245632, 'steps': 20010, 'loss/train': 2.578894853591919} +03/04/2022 12:58:21 - INFO - codeparrot_training - Skipping example with length 283 (seq_length=1024) +03/04/2022 12:58:24 - INFO - codeparrot_training - Step 20011: {'lr': 0.0004819504853066126, 'samples': 10246144, 'steps': 20011, 'loss/train': 1.9453705549240112} +03/04/2022 12:58:27 - INFO - codeparrot_training - Step 20012: {'lr': 0.0004819485054506498, 'samples': 10246656, 'steps': 20012, 'loss/train': 2.286206007003784} +03/04/2022 12:58:30 - INFO - codeparrot_training - Skipping example with length 881 (seq_length=1024) +03/04/2022 12:58:33 - INFO - codeparrot_training - Step 20013: {'lr': 0.00048194652549017484, 'samples': 10247168, 'steps': 20013, 'loss/train': 2.0102620124816895} +03/04/2022 12:58:36 - INFO - codeparrot_training - Step 20014: {'lr': 0.0004819445454251882, 'samples': 10247680, 'steps': 20014, 'loss/train': 2.317781925201416} +03/04/2022 12:58:39 - INFO - codeparrot_training - Skipping example with length 116 (seq_length=1024) +03/04/2022 12:58:41 - INFO - codeparrot_training - Step 20015: {'lr': 0.0004819425652556909, 'samples': 10248192, 'steps': 20015, 'loss/train': 1.2648102045059204} +03/04/2022 12:58:44 - INFO - codeparrot_training - Step 20016: {'lr': 0.0004819405849816839, 'samples': 10248704, 'steps': 20016, 'loss/train': 1.5415961742401123} +03/04/2022 12:58:47 - INFO - codeparrot_training - Skipping example with length 283 (seq_length=1024) +03/04/2022 12:58:50 - INFO - codeparrot_training - Step 20017: {'lr': 0.00048193860460316805, 'samples': 10249216, 'steps': 20017, 'loss/train': 2.311629295349121} +03/04/2022 12:58:53 - INFO - codeparrot_training - Step 20018: {'lr': 0.00048193662412014427, 'samples': 10249728, 'steps': 20018, 'loss/train': 1.4635286331176758} +03/04/2022 12:58:56 - INFO - codeparrot_training - Skipping example with length 954 (seq_length=1024) +03/04/2022 12:58:58 - INFO - codeparrot_training - Step 20019: {'lr': 0.0004819346435326134, 'samples': 10250240, 'steps': 20019, 'loss/train': 0.2795967161655426} +03/04/2022 12:59:01 - INFO - codeparrot_training - Step 20020: {'lr': 0.00048193266284057634, 'samples': 10250752, 'steps': 20020, 'loss/train': 1.7154669761657715} +03/04/2022 12:59:05 - INFO - codeparrot_training - Step 20021: {'lr': 0.0004819306820440341, 'samples': 10251264, 'steps': 20021, 'loss/train': 1.126009464263916} +03/04/2022 12:59:05 - INFO - codeparrot_training - Skipping example with length 873 (seq_length=1024) +03/04/2022 12:59:10 - INFO - codeparrot_training - Step 20022: {'lr': 0.0004819287011429874, 'samples': 10251776, 'steps': 20022, 'loss/train': 0.5634230375289917} +03/04/2022 12:59:13 - INFO - codeparrot_training - Step 20023: {'lr': 0.0004819267201374372, 'samples': 10252288, 'steps': 20023, 'loss/train': 2.269754648208618} +03/04/2022 12:59:14 - INFO - codeparrot_training - Skipping example with length 177 (seq_length=1024) +03/04/2022 12:59:18 - INFO - codeparrot_training - Step 20024: {'lr': 0.0004819247390273844, 'samples': 10252800, 'steps': 20024, 'loss/train': 2.016040086746216} +03/04/2022 12:59:21 - INFO - codeparrot_training - Step 20025: {'lr': 0.00048192275781282993, 'samples': 10253312, 'steps': 20025, 'loss/train': 1.7799144983291626} +03/04/2022 12:59:22 - INFO - codeparrot_training - Skipping example with length 956 (seq_length=1024) +03/04/2022 12:59:27 - INFO - codeparrot_training - Step 20026: {'lr': 0.00048192077649377455, 'samples': 10253824, 'steps': 20026, 'loss/train': 1.4315651655197144} +03/04/2022 12:59:30 - INFO - codeparrot_training - Step 20027: {'lr': 0.0004819187950702193, 'samples': 10254336, 'steps': 20027, 'loss/train': 1.9015066623687744} +03/04/2022 12:59:31 - INFO - codeparrot_training - Skipping example with length 237 (seq_length=1024) +03/04/2022 12:59:35 - INFO - codeparrot_training - Step 20028: {'lr': 0.00048191681354216504, 'samples': 10254848, 'steps': 20028, 'loss/train': 1.982316255569458} +03/04/2022 12:59:38 - INFO - codeparrot_training - Step 20029: {'lr': 0.0004819148319096126, 'samples': 10255360, 'steps': 20029, 'loss/train': 1.954868197441101} +03/04/2022 12:59:39 - INFO - codeparrot_training - Skipping example with length 284 (seq_length=1024) +03/04/2022 12:59:44 - INFO - codeparrot_training - Step 20030: {'lr': 0.00048191285017256297, 'samples': 10255872, 'steps': 20030, 'loss/train': 1.918177604675293} +03/04/2022 12:59:47 - INFO - codeparrot_training - Step 20031: {'lr': 0.00048191086833101695, 'samples': 10256384, 'steps': 20031, 'loss/train': 2.029825448989868} +03/04/2022 12:59:48 - INFO - codeparrot_training - Skipping example with length 1001 (seq_length=1024) +03/04/2022 12:59:52 - INFO - codeparrot_training - Step 20032: {'lr': 0.00048190888638497553, 'samples': 10256896, 'steps': 20032, 'loss/train': 2.1189045906066895} +03/04/2022 12:59:55 - INFO - codeparrot_training - Step 20033: {'lr': 0.00048190690433443946, 'samples': 10257408, 'steps': 20033, 'loss/train': 2.132322311401367} +03/04/2022 12:59:56 - INFO - codeparrot_training - Skipping example with length 238 (seq_length=1024) +03/04/2022 13:00:01 - INFO - codeparrot_training - Step 20034: {'lr': 0.0004819049221794097, 'samples': 10257920, 'steps': 20034, 'loss/train': 1.5557727813720703} +03/04/2022 13:00:04 - INFO - codeparrot_training - Step 20035: {'lr': 0.0004819029399198873, 'samples': 10258432, 'steps': 20035, 'loss/train': 2.551872968673706} +03/04/2022 13:00:05 - INFO - codeparrot_training - Skipping example with length 143 (seq_length=1024) +03/04/2022 13:00:09 - INFO - codeparrot_training - Step 20036: {'lr': 0.0004819009575558729, 'samples': 10258944, 'steps': 20036, 'loss/train': 1.3711940050125122} +03/04/2022 13:00:12 - INFO - codeparrot_training - Step 20037: {'lr': 0.0004818989750873676, 'samples': 10259456, 'steps': 20037, 'loss/train': 0.147262305021286} +03/04/2022 13:00:13 - INFO - codeparrot_training - Skipping example with length 597 (seq_length=1024) +03/04/2022 13:00:17 - INFO - codeparrot_training - Step 20038: {'lr': 0.00048189699251437206, 'samples': 10259968, 'steps': 20038, 'loss/train': 1.6891417503356934} +03/04/2022 13:00:21 - INFO - codeparrot_training - Step 20039: {'lr': 0.0004818950098368874, 'samples': 10260480, 'steps': 20039, 'loss/train': 2.598771095275879} +03/04/2022 13:00:22 - INFO - codeparrot_training - Skipping example with length 31 (seq_length=1024) +03/04/2022 13:00:26 - INFO - codeparrot_training - Step 20040: {'lr': 0.00048189302705491446, 'samples': 10260992, 'steps': 20040, 'loss/train': 2.1570889949798584} +03/04/2022 13:00:29 - INFO - codeparrot_training - Step 20041: {'lr': 0.000481891044168454, 'samples': 10261504, 'steps': 20041, 'loss/train': 3.5457839965820312} +03/04/2022 13:00:30 - INFO - codeparrot_training - Skipping example with length 859 (seq_length=1024) +03/04/2022 13:00:35 - INFO - codeparrot_training - Step 20042: {'lr': 0.00048188906117750706, 'samples': 10262016, 'steps': 20042, 'loss/train': 1.9421305656433105} +03/04/2022 13:00:38 - INFO - codeparrot_training - Step 20043: {'lr': 0.00048188707808207457, 'samples': 10262528, 'steps': 20043, 'loss/train': 1.1323516368865967} +03/04/2022 13:00:39 - INFO - codeparrot_training - Skipping example with length 585 (seq_length=1024) +03/04/2022 13:00:43 - INFO - codeparrot_training - Step 20044: {'lr': 0.00048188509488215724, 'samples': 10263040, 'steps': 20044, 'loss/train': 1.6568485498428345} +03/04/2022 13:00:46 - INFO - codeparrot_training - Step 20045: {'lr': 0.0004818831115777561, 'samples': 10263552, 'steps': 20045, 'loss/train': 2.068376302719116} +03/04/2022 13:00:47 - INFO - codeparrot_training - Skipping example with length 698 (seq_length=1024) +03/04/2022 13:00:52 - INFO - codeparrot_training - Step 20046: {'lr': 0.00048188112816887203, 'samples': 10264064, 'steps': 20046, 'loss/train': 2.6807892322540283} +03/04/2022 13:00:55 - INFO - codeparrot_training - Step 20047: {'lr': 0.0004818791446555059, 'samples': 10264576, 'steps': 20047, 'loss/train': 2.1022512912750244} +03/04/2022 13:00:56 - INFO - codeparrot_training - Skipping example with length 401 (seq_length=1024) +03/04/2022 13:01:00 - INFO - codeparrot_training - Step 20048: {'lr': 0.00048187716103765854, 'samples': 10265088, 'steps': 20048, 'loss/train': 1.0668904781341553} +03/04/2022 13:01:04 - INFO - codeparrot_training - Step 20049: {'lr': 0.0004818751773153309, 'samples': 10265600, 'steps': 20049, 'loss/train': 1.9260876178741455} +03/04/2022 13:01:06 - INFO - codeparrot_training - Skipping example with length 565 (seq_length=1024) +03/04/2022 13:01:09 - INFO - codeparrot_training - Step 20050: {'lr': 0.000481873193488524, 'samples': 10266112, 'steps': 20050, 'loss/train': 1.8349353075027466} +03/04/2022 13:01:12 - INFO - codeparrot_training - Step 20051: {'lr': 0.0004818712095572385, 'samples': 10266624, 'steps': 20051, 'loss/train': 1.828360915184021} +03/04/2022 13:01:14 - INFO - codeparrot_training - Skipping example with length 7 (seq_length=1024) +03/04/2022 13:01:17 - INFO - codeparrot_training - Step 20052: {'lr': 0.0004818692255214755, 'samples': 10267136, 'steps': 20052, 'loss/train': 0.5336731672286987} +03/04/2022 13:01:21 - INFO - codeparrot_training - Step 20053: {'lr': 0.00048186724138123577, 'samples': 10267648, 'steps': 20053, 'loss/train': 3.193540334701538} +03/04/2022 13:01:22 - INFO - codeparrot_training - Skipping example with length 865 (seq_length=1024) +03/04/2022 13:01:26 - INFO - codeparrot_training - Step 20054: {'lr': 0.00048186525713652024, 'samples': 10268160, 'steps': 20054, 'loss/train': 2.0249836444854736} +03/04/2022 13:01:29 - INFO - codeparrot_training - Step 20055: {'lr': 0.0004818632727873298, 'samples': 10268672, 'steps': 20055, 'loss/train': 0.8536461591720581} +03/04/2022 13:01:30 - INFO - codeparrot_training - Skipping example with length 469 (seq_length=1024) +03/04/2022 13:01:34 - INFO - codeparrot_training - Step 20056: {'lr': 0.00048186128833366536, 'samples': 10269184, 'steps': 20056, 'loss/train': 1.551547646522522} +03/04/2022 13:01:37 - INFO - codeparrot_training - Step 20057: {'lr': 0.0004818593037755278, 'samples': 10269696, 'steps': 20057, 'loss/train': 1.8232673406600952} +03/04/2022 13:01:39 - INFO - codeparrot_training - Skipping example with length 197 (seq_length=1024) +03/04/2022 13:01:43 - INFO - codeparrot_training - Step 20058: {'lr': 0.000481857319112918, 'samples': 10270208, 'steps': 20058, 'loss/train': 1.820905089378357} +03/04/2022 13:01:46 - INFO - codeparrot_training - Step 20059: {'lr': 0.0004818553343458368, 'samples': 10270720, 'steps': 20059, 'loss/train': 2.261099338531494} +03/04/2022 13:01:47 - INFO - codeparrot_training - Skipping example with length 70 (seq_length=1024) +03/04/2022 13:01:51 - INFO - codeparrot_training - Step 20060: {'lr': 0.00048185334947428525, 'samples': 10271232, 'steps': 20060, 'loss/train': 1.95997154712677} +03/04/2022 13:01:54 - INFO - codeparrot_training - Step 20061: {'lr': 0.0004818513644982642, 'samples': 10271744, 'steps': 20061, 'loss/train': 1.8718122243881226} +03/04/2022 13:01:56 - INFO - codeparrot_training - Skipping example with length 469 (seq_length=1024) +03/04/2022 13:02:00 - INFO - codeparrot_training - Step 20062: {'lr': 0.0004818493794177744, 'samples': 10272256, 'steps': 20062, 'loss/train': 1.9540126323699951} +03/04/2022 13:02:03 - INFO - codeparrot_training - Step 20063: {'lr': 0.00048184739423281695, 'samples': 10272768, 'steps': 20063, 'loss/train': 1.9549976587295532} +03/04/2022 13:02:04 - INFO - codeparrot_training - Skipping example with length 16 (seq_length=1024) +03/04/2022 13:02:08 - INFO - codeparrot_training - Step 20064: {'lr': 0.00048184540894339256, 'samples': 10273280, 'steps': 20064, 'loss/train': 2.454415798187256} +03/04/2022 13:02:11 - INFO - codeparrot_training - Step 20065: {'lr': 0.00048184342354950225, 'samples': 10273792, 'steps': 20065, 'loss/train': 1.5750740766525269} +03/04/2022 13:02:13 - INFO - codeparrot_training - Skipping example with length 683 (seq_length=1024) +03/04/2022 13:02:17 - INFO - codeparrot_training - Step 20066: {'lr': 0.00048184143805114684, 'samples': 10274304, 'steps': 20066, 'loss/train': 1.4278597831726074} +03/04/2022 13:02:20 - INFO - codeparrot_training - Step 20067: {'lr': 0.00048183945244832725, 'samples': 10274816, 'steps': 20067, 'loss/train': 1.3187370300292969} +03/04/2022 13:02:22 - INFO - codeparrot_training - Skipping example with length 927 (seq_length=1024) +03/04/2022 13:02:25 - INFO - codeparrot_training - Step 20068: {'lr': 0.00048183746674104446, 'samples': 10275328, 'steps': 20068, 'loss/train': 2.2554614543914795} +03/04/2022 13:02:28 - INFO - codeparrot_training - Step 20069: {'lr': 0.00048183548092929916, 'samples': 10275840, 'steps': 20069, 'loss/train': 1.1259300708770752} +03/04/2022 13:02:30 - INFO - codeparrot_training - Skipping example with length 240 (seq_length=1024) +03/04/2022 13:02:34 - INFO - codeparrot_training - Step 20070: {'lr': 0.0004818334950130925, 'samples': 10276352, 'steps': 20070, 'loss/train': 1.6540038585662842} +03/04/2022 13:02:37 - INFO - codeparrot_training - Step 20071: {'lr': 0.00048183150899242514, 'samples': 10276864, 'steps': 20071, 'loss/train': 1.6608667373657227} +03/04/2022 13:02:39 - INFO - codeparrot_training - Skipping example with length 212 (seq_length=1024) +03/04/2022 13:02:42 - INFO - codeparrot_training - Step 20072: {'lr': 0.0004818295228672981, 'samples': 10277376, 'steps': 20072, 'loss/train': 0.9173825979232788} +03/04/2022 13:02:45 - INFO - codeparrot_training - Step 20073: {'lr': 0.0004818275366377123, 'samples': 10277888, 'steps': 20073, 'loss/train': 2.0447678565979004} +03/04/2022 13:02:47 - INFO - codeparrot_training - Skipping example with length 745 (seq_length=1024) +03/04/2022 13:02:51 - INFO - codeparrot_training - Step 20074: {'lr': 0.00048182555030366854, 'samples': 10278400, 'steps': 20074, 'loss/train': 1.6849154233932495} +03/04/2022 13:02:54 - INFO - codeparrot_training - Step 20075: {'lr': 0.0004818235638651678, 'samples': 10278912, 'steps': 20075, 'loss/train': 0.7425598502159119} +03/04/2022 13:02:56 - INFO - codeparrot_training - Skipping example with length 454 (seq_length=1024) +03/04/2022 13:02:59 - INFO - codeparrot_training - Step 20076: {'lr': 0.0004818215773222109, 'samples': 10279424, 'steps': 20076, 'loss/train': 1.851235032081604} +03/04/2022 13:03:02 - INFO - codeparrot_training - Step 20077: {'lr': 0.0004818195906747988, 'samples': 10279936, 'steps': 20077, 'loss/train': 2.7639236450195312} +03/04/2022 13:03:04 - INFO - codeparrot_training - Skipping example with length 95 (seq_length=1024) +03/04/2022 13:03:08 - INFO - codeparrot_training - Step 20078: {'lr': 0.0004818176039229324, 'samples': 10280448, 'steps': 20078, 'loss/train': 1.697530746459961} +03/04/2022 13:03:11 - INFO - codeparrot_training - Step 20079: {'lr': 0.0004818156170666125, 'samples': 10280960, 'steps': 20079, 'loss/train': 2.0468196868896484} +03/04/2022 13:03:13 - INFO - codeparrot_training - Skipping example with length 48 (seq_length=1024) +03/04/2022 13:03:16 - INFO - codeparrot_training - Step 20080: {'lr': 0.0004818136301058401, 'samples': 10281472, 'steps': 20080, 'loss/train': 1.2681143283843994} +03/04/2022 13:03:19 - INFO - codeparrot_training - Step 20081: {'lr': 0.0004818116430406161, 'samples': 10281984, 'steps': 20081, 'loss/train': 2.1994969844818115} +03/04/2022 13:03:21 - INFO - codeparrot_training - Skipping example with length 575 (seq_length=1024) +03/04/2022 13:03:25 - INFO - codeparrot_training - Step 20082: {'lr': 0.00048180965587094125, 'samples': 10282496, 'steps': 20082, 'loss/train': 1.341614007949829} +03/04/2022 13:03:28 - INFO - codeparrot_training - Step 20083: {'lr': 0.00048180766859681664, 'samples': 10283008, 'steps': 20083, 'loss/train': 1.024436354637146} +03/04/2022 13:03:31 - INFO - codeparrot_training - Step 20084: {'lr': 0.000481805681218243, 'samples': 10283520, 'steps': 20084, 'loss/train': 1.46431565284729} +03/04/2022 13:03:31 - INFO - codeparrot_training - Skipping example with length 41 (seq_length=1024) +03/04/2022 13:03:36 - INFO - codeparrot_training - Step 20085: {'lr': 0.0004818036937352214, 'samples': 10284032, 'steps': 20085, 'loss/train': 1.140002965927124} +03/04/2022 13:03:40 - INFO - codeparrot_training - Step 20086: {'lr': 0.0004818017061477525, 'samples': 10284544, 'steps': 20086, 'loss/train': 2.3306374549865723} +03/04/2022 13:03:40 - INFO - codeparrot_training - Skipping example with length 603 (seq_length=1024) +03/04/2022 13:03:45 - INFO - codeparrot_training - Step 20087: {'lr': 0.00048179971845583734, 'samples': 10285056, 'steps': 20087, 'loss/train': 1.966194987297058} +03/04/2022 13:03:48 - INFO - codeparrot_training - Skipping example with length 683 (seq_length=1024) +03/04/2022 13:03:50 - INFO - codeparrot_training - Step 20088: {'lr': 0.00048179773065947683, 'samples': 10285568, 'steps': 20088, 'loss/train': 2.0812532901763916} +03/04/2022 13:03:53 - INFO - codeparrot_training - Step 20089: {'lr': 0.0004817957427586719, 'samples': 10286080, 'steps': 20089, 'loss/train': 2.00014591217041} +03/04/2022 13:03:56 - INFO - codeparrot_training - Skipping example with length 1005 (seq_length=1024) +03/04/2022 13:03:58 - INFO - codeparrot_training - Step 20090: {'lr': 0.00048179375475342333, 'samples': 10286592, 'steps': 20090, 'loss/train': 1.510907769203186} +03/04/2022 13:04:02 - INFO - codeparrot_training - Step 20091: {'lr': 0.00048179176664373214, 'samples': 10287104, 'steps': 20091, 'loss/train': 2.672574758529663} +03/04/2022 13:04:04 - INFO - codeparrot_training - Skipping example with length 508 (seq_length=1024) +03/04/2022 13:04:07 - INFO - codeparrot_training - Step 20092: {'lr': 0.0004817897784295991, 'samples': 10287616, 'steps': 20092, 'loss/train': 1.299666404724121} +03/04/2022 13:04:10 - INFO - codeparrot_training - Step 20093: {'lr': 0.0004817877901110251, 'samples': 10288128, 'steps': 20093, 'loss/train': 1.459531545639038} +03/04/2022 13:04:13 - INFO - codeparrot_training - Skipping example with length 484 (seq_length=1024) +03/04/2022 13:04:15 - INFO - codeparrot_training - Step 20094: {'lr': 0.0004817858016880112, 'samples': 10288640, 'steps': 20094, 'loss/train': 2.0033960342407227} +03/04/2022 13:04:18 - INFO - codeparrot_training - Step 20095: {'lr': 0.0004817838131605582, 'samples': 10289152, 'steps': 20095, 'loss/train': 1.1471773386001587} +03/04/2022 13:04:21 - INFO - codeparrot_training - Skipping example with length 635 (seq_length=1024) +03/04/2022 13:04:24 - INFO - codeparrot_training - Step 20096: {'lr': 0.00048178182452866694, 'samples': 10289664, 'steps': 20096, 'loss/train': 1.3958827257156372} +03/04/2022 13:04:27 - INFO - codeparrot_training - Step 20097: {'lr': 0.0004817798357923384, 'samples': 10290176, 'steps': 20097, 'loss/train': 1.8693748712539673} +03/04/2022 13:04:30 - INFO - codeparrot_training - Skipping example with length 32 (seq_length=1024) +03/04/2022 13:04:32 - INFO - codeparrot_training - Step 20098: {'lr': 0.00048177784695157335, 'samples': 10290688, 'steps': 20098, 'loss/train': 1.9797894954681396} +03/04/2022 13:04:35 - INFO - codeparrot_training - Step 20099: {'lr': 0.00048177585800637286, 'samples': 10291200, 'steps': 20099, 'loss/train': 1.9770817756652832} +03/04/2022 13:04:38 - INFO - codeparrot_training - Skipping example with length 203 (seq_length=1024) +03/04/2022 13:04:40 - INFO - codeparrot_training - Step 20100: {'lr': 0.00048177386895673774, 'samples': 10291712, 'steps': 20100, 'loss/train': 1.1254260540008545} +03/04/2022 13:04:44 - INFO - codeparrot_training - Step 20101: {'lr': 0.0004817718798026689, 'samples': 10292224, 'steps': 20101, 'loss/train': 2.1793744564056396} +03/04/2022 13:04:46 - INFO - codeparrot_training - Skipping example with length 780 (seq_length=1024) +03/04/2022 13:04:49 - INFO - codeparrot_training - Step 20102: {'lr': 0.0004817698905441672, 'samples': 10292736, 'steps': 20102, 'loss/train': 2.199984550476074} +03/04/2022 13:04:52 - INFO - codeparrot_training - Step 20103: {'lr': 0.0004817679011812336, 'samples': 10293248, 'steps': 20103, 'loss/train': 1.8419699668884277} +03/04/2022 13:04:55 - INFO - codeparrot_training - Skipping example with length 73 (seq_length=1024) +03/04/2022 13:04:57 - INFO - codeparrot_training - Step 20104: {'lr': 0.00048176591171386884, 'samples': 10293760, 'steps': 20104, 'loss/train': 1.6900770664215088} +03/04/2022 13:05:00 - INFO - codeparrot_training - Step 20105: {'lr': 0.0004817639221420741, 'samples': 10294272, 'steps': 20105, 'loss/train': 1.9167472124099731} +03/04/2022 13:05:03 - INFO - codeparrot_training - Skipping example with length 886 (seq_length=1024) +03/04/2022 13:05:06 - INFO - codeparrot_training - Step 20106: {'lr': 0.00048176193246585, 'samples': 10294784, 'steps': 20106, 'loss/train': 2.16170072555542} +03/04/2022 13:05:09 - INFO - codeparrot_training - Step 20107: {'lr': 0.00048175994268519765, 'samples': 10295296, 'steps': 20107, 'loss/train': 1.8333518505096436} +03/04/2022 13:05:12 - INFO - codeparrot_training - Step 20108: {'lr': 0.00048175795280011775, 'samples': 10295808, 'steps': 20108, 'loss/train': 1.4512406587600708} +03/04/2022 13:05:12 - INFO - codeparrot_training - Skipping example with length 423 (seq_length=1024) +03/04/2022 13:05:18 - INFO - codeparrot_training - Step 20109: {'lr': 0.00048175596281061135, 'samples': 10296320, 'steps': 20109, 'loss/train': 2.8230700492858887} +03/04/2022 13:05:21 - INFO - codeparrot_training - Step 20110: {'lr': 0.00048175397271667925, 'samples': 10296832, 'steps': 20110, 'loss/train': 1.7583922147750854} +03/04/2022 13:05:21 - INFO - codeparrot_training - Skipping example with length 397 (seq_length=1024) +03/04/2022 13:05:26 - INFO - codeparrot_training - Step 20111: {'lr': 0.00048175198251832244, 'samples': 10297344, 'steps': 20111, 'loss/train': 1.7755942344665527} +03/04/2022 13:05:29 - INFO - codeparrot_training - Skipping example with length 915 (seq_length=1024) +03/04/2022 13:05:31 - INFO - codeparrot_training - Step 20112: {'lr': 0.00048174999221554173, 'samples': 10297856, 'steps': 20112, 'loss/train': 2.415696859359741} +03/04/2022 13:05:35 - INFO - codeparrot_training - Step 20113: {'lr': 0.000481748001808338, 'samples': 10298368, 'steps': 20113, 'loss/train': 2.4323534965515137} +03/04/2022 13:05:37 - INFO - codeparrot_training - Skipping example with length 668 (seq_length=1024) +03/04/2022 13:05:40 - INFO - codeparrot_training - Step 20114: {'lr': 0.00048174601129671223, 'samples': 10298880, 'steps': 20114, 'loss/train': 1.8854634761810303} +03/04/2022 13:05:43 - INFO - codeparrot_training - Step 20115: {'lr': 0.00048174402068066534, 'samples': 10299392, 'steps': 20115, 'loss/train': 0.9546204209327698} +03/04/2022 13:05:46 - INFO - codeparrot_training - Skipping example with length 558 (seq_length=1024) +03/04/2022 13:05:48 - INFO - codeparrot_training - Step 20116: {'lr': 0.0004817420299601981, 'samples': 10299904, 'steps': 20116, 'loss/train': 0.9483279585838318} +03/04/2022 13:05:51 - INFO - codeparrot_training - Step 20117: {'lr': 0.0004817400391353115, 'samples': 10300416, 'steps': 20117, 'loss/train': 0.5516338348388672} +03/04/2022 13:05:54 - INFO - codeparrot_training - Skipping example with length 383 (seq_length=1024) +03/04/2022 13:05:57 - INFO - codeparrot_training - Step 20118: {'lr': 0.00048173804820600646, 'samples': 10300928, 'steps': 20118, 'loss/train': 1.8890305757522583} +03/04/2022 13:06:00 - INFO - codeparrot_training - Step 20119: {'lr': 0.0004817360571722838, 'samples': 10301440, 'steps': 20119, 'loss/train': 2.0130345821380615} +03/04/2022 13:06:02 - INFO - codeparrot_training - Skipping example with length 433 (seq_length=1024) +03/04/2022 13:06:05 - INFO - codeparrot_training - Step 20120: {'lr': 0.00048173406603414445, 'samples': 10301952, 'steps': 20120, 'loss/train': 1.9464830160140991} +03/04/2022 13:06:08 - INFO - codeparrot_training - Step 20121: {'lr': 0.00048173207479158933, 'samples': 10302464, 'steps': 20121, 'loss/train': 1.805264949798584} +03/04/2022 13:06:11 - INFO - codeparrot_training - Skipping example with length 205 (seq_length=1024) +03/04/2022 13:06:13 - INFO - codeparrot_training - Step 20122: {'lr': 0.0004817300834446192, 'samples': 10302976, 'steps': 20122, 'loss/train': 2.198972225189209} +03/04/2022 13:06:17 - INFO - codeparrot_training - Step 20123: {'lr': 0.0004817280919932352, 'samples': 10303488, 'steps': 20123, 'loss/train': 2.301971197128296} +03/04/2022 13:06:19 - INFO - codeparrot_training - Skipping example with length 126 (seq_length=1024) +03/04/2022 13:06:22 - INFO - codeparrot_training - Step 20124: {'lr': 0.000481726100437438, 'samples': 10304000, 'steps': 20124, 'loss/train': 1.30559241771698} +03/04/2022 13:06:25 - INFO - codeparrot_training - Step 20125: {'lr': 0.00048172410877722865, 'samples': 10304512, 'steps': 20125, 'loss/train': 1.7633129358291626} +03/04/2022 13:06:27 - INFO - codeparrot_training - Skipping example with length 545 (seq_length=1024) +03/04/2022 13:06:30 - INFO - codeparrot_training - Step 20126: {'lr': 0.00048172211701260807, 'samples': 10305024, 'steps': 20126, 'loss/train': 1.035833716392517} +03/04/2022 13:06:33 - INFO - codeparrot_training - Step 20127: {'lr': 0.0004817201251435769, 'samples': 10305536, 'steps': 20127, 'loss/train': 1.7589365243911743} +03/04/2022 13:06:36 - INFO - codeparrot_training - Skipping example with length 132 (seq_length=1024) +03/04/2022 13:06:39 - INFO - codeparrot_training - Step 20128: {'lr': 0.00048171813317013633, 'samples': 10306048, 'steps': 20128, 'loss/train': 1.0727430582046509} +03/04/2022 13:06:42 - INFO - codeparrot_training - Step 20129: {'lr': 0.00048171614109228714, 'samples': 10306560, 'steps': 20129, 'loss/train': 2.10495662689209} +03/04/2022 13:06:45 - INFO - codeparrot_training - Skipping example with length 6 (seq_length=1024) +03/04/2022 13:06:47 - INFO - codeparrot_training - Step 20130: {'lr': 0.0004817141489100302, 'samples': 10307072, 'steps': 20130, 'loss/train': 2.4750287532806396} +03/04/2022 13:06:50 - INFO - codeparrot_training - Step 20131: {'lr': 0.0004817121566233665, 'samples': 10307584, 'steps': 20131, 'loss/train': 1.5406067371368408} +03/04/2022 13:06:53 - INFO - codeparrot_training - Skipping example with length 794 (seq_length=1024) +03/04/2022 13:06:56 - INFO - codeparrot_training - Step 20132: {'lr': 0.0004817101642322968, 'samples': 10308096, 'steps': 20132, 'loss/train': 1.4651095867156982} +03/04/2022 13:06:59 - INFO - codeparrot_training - Step 20133: {'lr': 0.00048170817173682215, 'samples': 10308608, 'steps': 20133, 'loss/train': 1.4553554058074951} +03/04/2022 13:07:02 - INFO - codeparrot_training - Step 20134: {'lr': 0.00048170617913694333, 'samples': 10309120, 'steps': 20134, 'loss/train': 2.036808490753174} +03/04/2022 13:07:03 - INFO - codeparrot_training - Skipping example with length 411 (seq_length=1024) +03/04/2022 13:07:07 - INFO - codeparrot_training - Step 20135: {'lr': 0.00048170418643266125, 'samples': 10309632, 'steps': 20135, 'loss/train': 2.0080301761627197} +03/04/2022 13:07:10 - INFO - codeparrot_training - Step 20136: {'lr': 0.00048170219362397685, 'samples': 10310144, 'steps': 20136, 'loss/train': 1.347101092338562} +03/04/2022 13:07:11 - INFO - codeparrot_training - Skipping example with length 72 (seq_length=1024) +03/04/2022 13:07:16 - INFO - codeparrot_training - Step 20137: {'lr': 0.00048170020071089105, 'samples': 10310656, 'steps': 20137, 'loss/train': 4.991723537445068} +03/04/2022 13:07:19 - INFO - codeparrot_training - Step 20138: {'lr': 0.00048169820769340476, 'samples': 10311168, 'steps': 20138, 'loss/train': 1.162432312965393} +03/04/2022 13:07:20 - INFO - codeparrot_training - Skipping example with length 695 (seq_length=1024) +03/04/2022 13:07:24 - INFO - codeparrot_training - Step 20139: {'lr': 0.0004816962145715188, 'samples': 10311680, 'steps': 20139, 'loss/train': 1.8139748573303223} +03/04/2022 13:07:28 - INFO - codeparrot_training - Step 20140: {'lr': 0.00048169422134523404, 'samples': 10312192, 'steps': 20140, 'loss/train': 2.475980520248413} +03/04/2022 13:07:28 - INFO - codeparrot_training - Skipping example with length 870 (seq_length=1024) +03/04/2022 13:07:33 - INFO - codeparrot_training - Step 20141: {'lr': 0.0004816922280145515, 'samples': 10312704, 'steps': 20141, 'loss/train': 1.9039881229400635} +03/04/2022 13:07:36 - INFO - codeparrot_training - Step 20142: {'lr': 0.00048169023457947195, 'samples': 10313216, 'steps': 20142, 'loss/train': 1.8672627210617065} +03/04/2022 13:07:37 - INFO - codeparrot_training - Skipping example with length 885 (seq_length=1024) +03/04/2022 13:07:41 - INFO - codeparrot_training - Step 20143: {'lr': 0.0004816882410399964, 'samples': 10313728, 'steps': 20143, 'loss/train': 2.564636707305908} +03/04/2022 13:07:44 - INFO - codeparrot_training - Step 20144: {'lr': 0.00048168624739612577, 'samples': 10314240, 'steps': 20144, 'loss/train': 1.9907383918762207} +03/04/2022 13:07:45 - INFO - codeparrot_training - Skipping example with length 296 (seq_length=1024) +03/04/2022 13:07:50 - INFO - codeparrot_training - Step 20145: {'lr': 0.0004816842536478608, 'samples': 10314752, 'steps': 20145, 'loss/train': 1.6476013660430908} +03/04/2022 13:07:53 - INFO - codeparrot_training - Step 20146: {'lr': 0.00048168225979520254, 'samples': 10315264, 'steps': 20146, 'loss/train': 1.472593069076538} +03/04/2022 13:07:54 - INFO - codeparrot_training - Skipping example with length 426 (seq_length=1024) +03/04/2022 13:07:58 - INFO - codeparrot_training - Step 20147: {'lr': 0.0004816802658381518, 'samples': 10315776, 'steps': 20147, 'loss/train': 1.2078913450241089} +03/04/2022 13:08:01 - INFO - codeparrot_training - Step 20148: {'lr': 0.00048167827177670946, 'samples': 10316288, 'steps': 20148, 'loss/train': 1.2442843914031982} +03/04/2022 13:08:02 - INFO - codeparrot_training - Skipping example with length 109 (seq_length=1024) +03/04/2022 13:08:07 - INFO - codeparrot_training - Step 20149: {'lr': 0.0004816762776108765, 'samples': 10316800, 'steps': 20149, 'loss/train': 2.3771984577178955} +03/04/2022 13:08:10 - INFO - codeparrot_training - Step 20150: {'lr': 0.0004816742833406538, 'samples': 10317312, 'steps': 20150, 'loss/train': 1.1970820426940918} +03/04/2022 13:08:11 - INFO - codeparrot_training - Skipping example with length 906 (seq_length=1024) +03/04/2022 13:08:15 - INFO - codeparrot_training - Step 20151: {'lr': 0.0004816722889660423, 'samples': 10317824, 'steps': 20151, 'loss/train': 2.3721420764923096} +03/04/2022 13:08:18 - INFO - codeparrot_training - Step 20152: {'lr': 0.00048167029448704273, 'samples': 10318336, 'steps': 20152, 'loss/train': 1.976988434791565} +03/04/2022 13:08:19 - INFO - codeparrot_training - Skipping example with length 715 (seq_length=1024) +03/04/2022 13:08:24 - INFO - codeparrot_training - Step 20153: {'lr': 0.00048166829990365615, 'samples': 10318848, 'steps': 20153, 'loss/train': 2.734611988067627} +03/04/2022 13:08:27 - INFO - codeparrot_training - Step 20154: {'lr': 0.0004816663052158834, 'samples': 10319360, 'steps': 20154, 'loss/train': 1.5984156131744385} +03/04/2022 13:08:27 - INFO - codeparrot_training - Skipping example with length 600 (seq_length=1024) +03/04/2022 13:08:32 - INFO - codeparrot_training - Step 20155: {'lr': 0.0004816643104237254, 'samples': 10319872, 'steps': 20155, 'loss/train': 1.739317774772644} +03/04/2022 13:08:35 - INFO - codeparrot_training - Step 20156: {'lr': 0.00048166231552718305, 'samples': 10320384, 'steps': 20156, 'loss/train': 3.2433419227600098} +03/04/2022 13:08:36 - INFO - codeparrot_training - Skipping example with length 656 (seq_length=1024) +03/04/2022 13:08:40 - INFO - codeparrot_training - Step 20157: {'lr': 0.0004816603205262572, 'samples': 10320896, 'steps': 20157, 'loss/train': 2.416246175765991} +03/04/2022 13:08:44 - INFO - codeparrot_training - Step 20158: {'lr': 0.0004816583254209488, 'samples': 10321408, 'steps': 20158, 'loss/train': 2.883939266204834} +03/04/2022 13:08:45 - INFO - codeparrot_training - Skipping example with length 623 (seq_length=1024) +03/04/2022 13:08:49 - INFO - codeparrot_training - Step 20159: {'lr': 0.00048165633021125874, 'samples': 10321920, 'steps': 20159, 'loss/train': 1.9856821298599243} +03/04/2022 13:08:52 - INFO - codeparrot_training - Step 20160: {'lr': 0.0004816543348971879, 'samples': 10322432, 'steps': 20160, 'loss/train': 0.317841500043869} +03/04/2022 13:08:54 - INFO - codeparrot_training - Skipping example with length 380 (seq_length=1024) +03/04/2022 13:08:58 - INFO - codeparrot_training - Step 20161: {'lr': 0.0004816523394787372, 'samples': 10322944, 'steps': 20161, 'loss/train': 1.9499415159225464} +03/04/2022 13:09:01 - INFO - codeparrot_training - Step 20162: {'lr': 0.00048165034395590756, 'samples': 10323456, 'steps': 20162, 'loss/train': 1.7060134410858154} +03/04/2022 13:09:02 - INFO - codeparrot_training - Skipping example with length 823 (seq_length=1024) +03/04/2022 13:09:06 - INFO - codeparrot_training - Step 20163: {'lr': 0.0004816483483286998, 'samples': 10323968, 'steps': 20163, 'loss/train': 2.481736421585083} +03/04/2022 13:09:09 - INFO - codeparrot_training - Step 20164: {'lr': 0.0004816463525971149, 'samples': 10324480, 'steps': 20164, 'loss/train': 1.8282256126403809} +03/04/2022 13:09:11 - INFO - codeparrot_training - Skipping example with length 466 (seq_length=1024) +03/04/2022 13:09:15 - INFO - codeparrot_training - Step 20165: {'lr': 0.0004816443567611537, 'samples': 10324992, 'steps': 20165, 'loss/train': 2.4921884536743164} +03/04/2022 13:09:18 - INFO - codeparrot_training - Step 20166: {'lr': 0.00048164236082081713, 'samples': 10325504, 'steps': 20166, 'loss/train': 1.69943368434906} +03/04/2022 13:09:19 - INFO - codeparrot_training - Skipping example with length 725 (seq_length=1024) +03/04/2022 13:09:23 - INFO - codeparrot_training - Step 20167: {'lr': 0.00048164036477610616, 'samples': 10326016, 'steps': 20167, 'loss/train': 1.9492504596710205} +03/04/2022 13:09:26 - INFO - codeparrot_training - Step 20168: {'lr': 0.00048163836862702154, 'samples': 10326528, 'steps': 20168, 'loss/train': 0.9991345405578613} +03/04/2022 13:09:28 - INFO - codeparrot_training - Skipping example with length 893 (seq_length=1024) +03/04/2022 13:09:32 - INFO - codeparrot_training - Step 20169: {'lr': 0.0004816363723735643, 'samples': 10327040, 'steps': 20169, 'loss/train': 2.475334405899048} +03/04/2022 13:09:35 - INFO - codeparrot_training - Step 20170: {'lr': 0.00048163437601573525, 'samples': 10327552, 'steps': 20170, 'loss/train': 1.964302897453308} +03/04/2022 13:09:36 - INFO - codeparrot_training - Skipping example with length 949 (seq_length=1024) +03/04/2022 13:09:40 - INFO - codeparrot_training - Step 20171: {'lr': 0.00048163237955353526, 'samples': 10328064, 'steps': 20171, 'loss/train': 1.1419132947921753} +03/04/2022 13:09:43 - INFO - codeparrot_training - Step 20172: {'lr': 0.00048163038298696537, 'samples': 10328576, 'steps': 20172, 'loss/train': 2.510044813156128} +03/04/2022 13:09:44 - INFO - codeparrot_training - Skipping example with length 740 (seq_length=1024) +03/04/2022 13:09:48 - INFO - codeparrot_training - Step 20173: {'lr': 0.00048162838631602643, 'samples': 10329088, 'steps': 20173, 'loss/train': 1.101884365081787} +03/04/2022 13:09:52 - INFO - codeparrot_training - Step 20174: {'lr': 0.00048162638954071926, 'samples': 10329600, 'steps': 20174, 'loss/train': 1.9567729234695435} +03/04/2022 13:09:53 - INFO - codeparrot_training - Skipping example with length 973 (seq_length=1024) +03/04/2022 13:09:57 - INFO - codeparrot_training - Step 20175: {'lr': 0.0004816243926610448, 'samples': 10330112, 'steps': 20175, 'loss/train': 1.6369774341583252} +03/04/2022 13:10:00 - INFO - codeparrot_training - Step 20176: {'lr': 0.000481622395677004, 'samples': 10330624, 'steps': 20176, 'loss/train': 1.3960374593734741} +03/04/2022 13:10:01 - INFO - codeparrot_training - Skipping example with length 569 (seq_length=1024) +03/04/2022 13:10:05 - INFO - codeparrot_training - Step 20177: {'lr': 0.0004816203985885977, 'samples': 10331136, 'steps': 20177, 'loss/train': 1.8416271209716797} +03/04/2022 13:10:08 - INFO - codeparrot_training - Step 20178: {'lr': 0.0004816184013958268, 'samples': 10331648, 'steps': 20178, 'loss/train': 2.1183910369873047} +03/04/2022 13:10:09 - INFO - codeparrot_training - Skipping example with length 617 (seq_length=1024) +03/04/2022 13:10:14 - INFO - codeparrot_training - Step 20179: {'lr': 0.0004816164040986923, 'samples': 10332160, 'steps': 20179, 'loss/train': 2.0789103507995605} +03/04/2022 13:10:17 - INFO - codeparrot_training - Step 20180: {'lr': 0.00048161440669719496, 'samples': 10332672, 'steps': 20180, 'loss/train': 1.6821813583374023} +03/04/2022 13:10:17 - INFO - codeparrot_training - Skipping example with length 12 (seq_length=1024) +03/04/2022 13:10:22 - INFO - codeparrot_training - Step 20181: {'lr': 0.00048161240919133573, 'samples': 10333184, 'steps': 20181, 'loss/train': 2.166088819503784} +03/04/2022 13:10:25 - INFO - codeparrot_training - Step 20182: {'lr': 0.00048161041158111564, 'samples': 10333696, 'steps': 20182, 'loss/train': 1.8097742795944214} +03/04/2022 13:10:26 - INFO - codeparrot_training - Skipping example with length 95 (seq_length=1024) +03/04/2022 13:10:30 - INFO - codeparrot_training - Step 20183: {'lr': 0.0004816084138665353, 'samples': 10334208, 'steps': 20183, 'loss/train': 1.6145159006118774} +03/04/2022 13:10:34 - INFO - codeparrot_training - Step 20184: {'lr': 0.00048160641604759593, 'samples': 10334720, 'steps': 20184, 'loss/train': 2.049403190612793} +03/04/2022 13:10:34 - INFO - codeparrot_training - Skipping example with length 891 (seq_length=1024) +03/04/2022 13:10:39 - INFO - codeparrot_training - Step 20185: {'lr': 0.0004816044181242982, 'samples': 10335232, 'steps': 20185, 'loss/train': 1.927978754043579} +03/04/2022 13:10:42 - INFO - codeparrot_training - Step 20186: {'lr': 0.0004816024200966431, 'samples': 10335744, 'steps': 20186, 'loss/train': 1.9577816724777222} +03/04/2022 13:10:43 - INFO - codeparrot_training - Skipping example with length 293 (seq_length=1024) +03/04/2022 13:10:47 - INFO - codeparrot_training - Step 20187: {'lr': 0.00048160042196463153, 'samples': 10336256, 'steps': 20187, 'loss/train': 1.8856642246246338} +03/04/2022 13:10:50 - INFO - codeparrot_training - Step 20188: {'lr': 0.00048159842372826446, 'samples': 10336768, 'steps': 20188, 'loss/train': 1.9664798974990845} +03/04/2022 13:10:51 - INFO - codeparrot_training - Skipping example with length 201 (seq_length=1024) +03/04/2022 13:10:56 - INFO - codeparrot_training - Step 20189: {'lr': 0.0004815964253875426, 'samples': 10337280, 'steps': 20189, 'loss/train': 2.6603853702545166} +03/04/2022 13:10:59 - INFO - codeparrot_training - Step 20190: {'lr': 0.000481594426942467, 'samples': 10337792, 'steps': 20190, 'loss/train': 2.2537975311279297} +03/04/2022 13:11:00 - INFO - codeparrot_training - Skipping example with length 683 (seq_length=1024) +03/04/2022 13:11:04 - INFO - codeparrot_training - Step 20191: {'lr': 0.0004815924283930385, 'samples': 10338304, 'steps': 20191, 'loss/train': 2.1698126792907715} +03/04/2022 13:11:07 - INFO - codeparrot_training - Step 20192: {'lr': 0.0004815904297392582, 'samples': 10338816, 'steps': 20192, 'loss/train': 1.97845458984375} +03/04/2022 13:11:08 - INFO - codeparrot_training - Skipping example with length 435 (seq_length=1024) +03/04/2022 13:11:13 - INFO - codeparrot_training - Step 20193: {'lr': 0.00048158843098112657, 'samples': 10339328, 'steps': 20193, 'loss/train': 1.9734463691711426} +03/04/2022 13:11:16 - INFO - codeparrot_training - Step 20194: {'lr': 0.00048158643211864495, 'samples': 10339840, 'steps': 20194, 'loss/train': 2.654714822769165} +03/04/2022 13:11:18 - INFO - codeparrot_training - Skipping example with length 429 (seq_length=1024) +03/04/2022 13:11:21 - INFO - codeparrot_training - Step 20195: {'lr': 0.000481584433151814, 'samples': 10340352, 'steps': 20195, 'loss/train': 1.906168818473816} +03/04/2022 13:11:25 - INFO - codeparrot_training - Step 20196: {'lr': 0.00048158243408063465, 'samples': 10340864, 'steps': 20196, 'loss/train': 2.7094783782958984} +03/04/2022 13:11:26 - INFO - codeparrot_training - Skipping example with length 979 (seq_length=1024) +03/04/2022 13:11:30 - INFO - codeparrot_training - Step 20197: {'lr': 0.0004815804349051078, 'samples': 10341376, 'steps': 20197, 'loss/train': 0.5238750576972961} +03/04/2022 13:11:33 - INFO - codeparrot_training - Step 20198: {'lr': 0.0004815784356252344, 'samples': 10341888, 'steps': 20198, 'loss/train': 1.9689972400665283} +03/04/2022 13:11:35 - INFO - codeparrot_training - Skipping example with length 852 (seq_length=1024) +03/04/2022 13:11:38 - INFO - codeparrot_training - Step 20199: {'lr': 0.0004815764362410154, 'samples': 10342400, 'steps': 20199, 'loss/train': 0.46228301525115967} +03/04/2022 13:11:41 - INFO - codeparrot_training - Step 20200: {'lr': 0.0004815744367524516, 'samples': 10342912, 'steps': 20200, 'loss/train': 2.4215142726898193} +03/04/2022 13:11:43 - INFO - codeparrot_training - Skipping example with length 794 (seq_length=1024) +03/04/2022 13:11:47 - INFO - codeparrot_training - Step 20201: {'lr': 0.0004815724371595439, 'samples': 10343424, 'steps': 20201, 'loss/train': 1.6345268487930298} +03/04/2022 13:11:50 - INFO - codeparrot_training - Step 20202: {'lr': 0.00048157043746229324, 'samples': 10343936, 'steps': 20202, 'loss/train': 1.781322956085205} +03/04/2022 13:11:51 - INFO - codeparrot_training - Skipping example with length 491 (seq_length=1024) +03/04/2022 13:11:55 - INFO - codeparrot_training - Step 20203: {'lr': 0.0004815684376607006, 'samples': 10344448, 'steps': 20203, 'loss/train': 2.2415480613708496} +03/04/2022 13:11:58 - INFO - codeparrot_training - Step 20204: {'lr': 0.0004815664377547667, 'samples': 10344960, 'steps': 20204, 'loss/train': 1.5976799726486206} +03/04/2022 13:12:00 - INFO - codeparrot_training - Skipping example with length 378 (seq_length=1024) +03/04/2022 13:12:04 - INFO - codeparrot_training - Step 20205: {'lr': 0.00048156443774449254, 'samples': 10345472, 'steps': 20205, 'loss/train': 1.0402053594589233} +03/04/2022 13:12:07 - INFO - codeparrot_training - Step 20206: {'lr': 0.00048156243762987905, 'samples': 10345984, 'steps': 20206, 'loss/train': 0.26645708084106445} +03/04/2022 13:12:08 - INFO - codeparrot_training - Skipping example with length 636 (seq_length=1024) +03/04/2022 13:12:12 - INFO - codeparrot_training - Step 20207: {'lr': 0.00048156043741092705, 'samples': 10346496, 'steps': 20207, 'loss/train': 0.5832125544548035} +03/04/2022 13:12:15 - INFO - codeparrot_training - Step 20208: {'lr': 0.00048155843708763755, 'samples': 10347008, 'steps': 20208, 'loss/train': 1.3667802810668945} +03/04/2022 13:12:17 - INFO - codeparrot_training - Skipping example with length 401 (seq_length=1024) +03/04/2022 13:12:20 - INFO - codeparrot_training - Step 20209: {'lr': 0.0004815564366600114, 'samples': 10347520, 'steps': 20209, 'loss/train': 1.5324790477752686} +03/04/2022 13:12:24 - INFO - codeparrot_training - Step 20210: {'lr': 0.0004815544361280494, 'samples': 10348032, 'steps': 20210, 'loss/train': 1.0052947998046875} +03/04/2022 13:12:26 - INFO - codeparrot_training - Skipping example with length 622 (seq_length=1024) +03/04/2022 13:12:29 - INFO - codeparrot_training - Step 20211: {'lr': 0.00048155243549175263, 'samples': 10348544, 'steps': 20211, 'loss/train': 1.9849969148635864} +03/04/2022 13:12:32 - INFO - codeparrot_training - Step 20212: {'lr': 0.00048155043475112184, 'samples': 10349056, 'steps': 20212, 'loss/train': 2.4145913124084473} +03/04/2022 13:12:34 - INFO - codeparrot_training - Skipping example with length 846 (seq_length=1024) +03/04/2022 13:12:37 - INFO - codeparrot_training - Step 20213: {'lr': 0.0004815484339061581, 'samples': 10349568, 'steps': 20213, 'loss/train': 1.6418592929840088} +03/04/2022 13:12:40 - INFO - codeparrot_training - Step 20214: {'lr': 0.0004815464329568621, 'samples': 10350080, 'steps': 20214, 'loss/train': 2.285188913345337} +03/04/2022 13:12:43 - INFO - codeparrot_training - Skipping example with length 634 (seq_length=1024) +03/04/2022 13:12:46 - INFO - codeparrot_training - Step 20215: {'lr': 0.00048154443190323495, 'samples': 10350592, 'steps': 20215, 'loss/train': 1.8057971000671387} +03/04/2022 13:12:49 - INFO - codeparrot_training - Step 20216: {'lr': 0.0004815424307452774, 'samples': 10351104, 'steps': 20216, 'loss/train': 2.2119946479797363} +03/04/2022 13:12:51 - INFO - codeparrot_training - Skipping example with length 123 (seq_length=1024) +03/04/2022 13:12:54 - INFO - codeparrot_training - Step 20217: {'lr': 0.0004815404294829904, 'samples': 10351616, 'steps': 20217, 'loss/train': 1.617721676826477} +03/04/2022 13:12:57 - INFO - codeparrot_training - Step 20218: {'lr': 0.0004815384281163748, 'samples': 10352128, 'steps': 20218, 'loss/train': 1.3031798601150513} +03/04/2022 13:13:00 - INFO - codeparrot_training - Skipping example with length 695 (seq_length=1024) +03/04/2022 13:13:03 - INFO - codeparrot_training - Step 20219: {'lr': 0.0004815364266454316, 'samples': 10352640, 'steps': 20219, 'loss/train': 1.5227128267288208} +03/04/2022 13:13:06 - INFO - codeparrot_training - Step 20220: {'lr': 0.00048153442507016173, 'samples': 10353152, 'steps': 20220, 'loss/train': 1.3807380199432373} +03/04/2022 13:13:08 - INFO - codeparrot_training - Skipping example with length 730 (seq_length=1024) +03/04/2022 13:13:11 - INFO - codeparrot_training - Step 20221: {'lr': 0.00048153242339056594, 'samples': 10353664, 'steps': 20221, 'loss/train': 2.0320663452148438} +03/04/2022 13:13:14 - INFO - codeparrot_training - Step 20222: {'lr': 0.0004815304216066453, 'samples': 10354176, 'steps': 20222, 'loss/train': 1.5758334398269653} +03/04/2022 13:13:16 - INFO - codeparrot_training - Skipping example with length 856 (seq_length=1024) +03/04/2022 13:13:19 - INFO - codeparrot_training - Step 20223: {'lr': 0.0004815284197184005, 'samples': 10354688, 'steps': 20223, 'loss/train': 2.03945255279541} +03/04/2022 13:13:23 - INFO - codeparrot_training - Step 20224: {'lr': 0.0004815264177258326, 'samples': 10355200, 'steps': 20224, 'loss/train': 1.8572531938552856} +03/04/2022 13:13:25 - INFO - codeparrot_training - Skipping example with length 550 (seq_length=1024) +03/04/2022 13:13:28 - INFO - codeparrot_training - Step 20225: {'lr': 0.00048152441562894255, 'samples': 10355712, 'steps': 20225, 'loss/train': 2.2414445877075195} +03/04/2022 13:13:31 - INFO - codeparrot_training - Step 20226: {'lr': 0.0004815224134277311, 'samples': 10356224, 'steps': 20226, 'loss/train': 1.9176788330078125} +03/04/2022 13:13:33 - INFO - codeparrot_training - Skipping example with length 831 (seq_length=1024) +03/04/2022 13:13:36 - INFO - codeparrot_training - Step 20227: {'lr': 0.00048152041112219926, 'samples': 10356736, 'steps': 20227, 'loss/train': 2.175896406173706} +03/04/2022 13:13:39 - INFO - codeparrot_training - Step 20228: {'lr': 0.0004815184087123479, 'samples': 10357248, 'steps': 20228, 'loss/train': 2.489959716796875} +03/04/2022 13:13:41 - INFO - codeparrot_training - Skipping example with length 498 (seq_length=1024) +03/04/2022 13:13:45 - INFO - codeparrot_training - Step 20229: {'lr': 0.0004815164061981778, 'samples': 10357760, 'steps': 20229, 'loss/train': 2.494072675704956} +03/04/2022 13:13:48 - INFO - codeparrot_training - Step 20230: {'lr': 0.0004815144035796901, 'samples': 10358272, 'steps': 20230, 'loss/train': 1.5134727954864502} +03/04/2022 13:13:50 - INFO - codeparrot_training - Skipping example with length 613 (seq_length=1024) +03/04/2022 13:13:53 - INFO - codeparrot_training - Step 20231: {'lr': 0.0004815124008568856, 'samples': 10358784, 'steps': 20231, 'loss/train': 1.9387024641036987} +03/04/2022 13:13:56 - INFO - codeparrot_training - Step 20232: {'lr': 0.00048151039802976517, 'samples': 10359296, 'steps': 20232, 'loss/train': 3.3582944869995117} +03/04/2022 13:13:59 - INFO - codeparrot_training - Skipping example with length 206 (seq_length=1024) +03/04/2022 13:14:02 - INFO - codeparrot_training - Step 20233: {'lr': 0.00048150839509832966, 'samples': 10359808, 'steps': 20233, 'loss/train': 1.329757809638977} +03/04/2022 13:14:05 - INFO - codeparrot_training - Step 20234: {'lr': 0.0004815063920625801, 'samples': 10360320, 'steps': 20234, 'loss/train': 1.709287405014038} +03/04/2022 13:14:07 - INFO - codeparrot_training - Skipping example with length 940 (seq_length=1024) +03/04/2022 13:14:10 - INFO - codeparrot_training - Step 20235: {'lr': 0.00048150438892251724, 'samples': 10360832, 'steps': 20235, 'loss/train': 1.7714455127716064} +03/04/2022 13:14:13 - INFO - codeparrot_training - Step 20236: {'lr': 0.00048150238567814217, 'samples': 10361344, 'steps': 20236, 'loss/train': 1.9611821174621582} +03/04/2022 13:14:16 - INFO - codeparrot_training - Skipping example with length 78 (seq_length=1024) +03/04/2022 13:14:19 - INFO - codeparrot_training - Step 20237: {'lr': 0.0004815003823294557, 'samples': 10361856, 'steps': 20237, 'loss/train': 2.4422056674957275} +03/04/2022 13:14:22 - INFO - codeparrot_training - Step 20238: {'lr': 0.0004814983788764587, 'samples': 10362368, 'steps': 20238, 'loss/train': 2.015700578689575} +03/04/2022 13:14:24 - INFO - codeparrot_training - Skipping example with length 103 (seq_length=1024) +03/04/2022 13:14:27 - INFO - codeparrot_training - Step 20239: {'lr': 0.00048149637531915215, 'samples': 10362880, 'steps': 20239, 'loss/train': 2.3615810871124268} +03/04/2022 13:14:30 - INFO - codeparrot_training - Step 20240: {'lr': 0.00048149437165753684, 'samples': 10363392, 'steps': 20240, 'loss/train': 0.887686550617218} +03/04/2022 13:14:33 - INFO - codeparrot_training - Skipping example with length 327 (seq_length=1024) +03/04/2022 13:14:36 - INFO - codeparrot_training - Step 20241: {'lr': 0.00048149236789161374, 'samples': 10363904, 'steps': 20241, 'loss/train': 2.2266316413879395} +03/04/2022 13:14:39 - INFO - codeparrot_training - Step 20242: {'lr': 0.0004814903640213838, 'samples': 10364416, 'steps': 20242, 'loss/train': 2.1513102054595947} +03/04/2022 13:14:41 - INFO - codeparrot_training - Skipping example with length 893 (seq_length=1024) +03/04/2022 13:14:44 - INFO - codeparrot_training - Step 20243: {'lr': 0.0004814883600468478, 'samples': 10364928, 'steps': 20243, 'loss/train': 1.7661263942718506} +03/04/2022 13:14:47 - INFO - codeparrot_training - Step 20244: {'lr': 0.0004814863559680068, 'samples': 10365440, 'steps': 20244, 'loss/train': 2.161482095718384} +03/04/2022 13:14:52 - INFO - codeparrot_training - Step 20245: {'lr': 0.00048148435178486156, 'samples': 10365952, 'steps': 20245, 'loss/train': 2.3683910369873047} +03/04/2022 13:14:56 - INFO - codeparrot_training - Step 20246: {'lr': 0.00048148234749741304, 'samples': 10366464, 'steps': 20246, 'loss/train': 1.5506311655044556} +03/04/2022 13:14:57 - INFO - codeparrot_training - Skipping example with length 889 (seq_length=1024) +03/04/2022 13:15:01 - INFO - codeparrot_training - Step 20247: {'lr': 0.0004814803431056622, 'samples': 10366976, 'steps': 20247, 'loss/train': 1.8024710416793823} +03/04/2022 13:15:04 - INFO - codeparrot_training - Step 20248: {'lr': 0.0004814783386096099, 'samples': 10367488, 'steps': 20248, 'loss/train': 1.8314342498779297} +03/04/2022 13:15:06 - INFO - codeparrot_training - Skipping example with length 462 (seq_length=1024) +03/04/2022 13:15:09 - INFO - codeparrot_training - Step 20249: {'lr': 0.00048147633400925693, 'samples': 10368000, 'steps': 20249, 'loss/train': 1.8675981760025024} +03/04/2022 13:15:12 - INFO - codeparrot_training - Step 20250: {'lr': 0.00048147432930460433, 'samples': 10368512, 'steps': 20250, 'loss/train': 2.258758068084717} +03/04/2022 13:15:14 - INFO - codeparrot_training - Skipping example with length 929 (seq_length=1024) +03/04/2022 13:15:18 - INFO - codeparrot_training - Step 20251: {'lr': 0.00048147232449565305, 'samples': 10369024, 'steps': 20251, 'loss/train': 1.3598523139953613} +03/04/2022 13:15:21 - INFO - codeparrot_training - Step 20252: {'lr': 0.00048147031958240384, 'samples': 10369536, 'steps': 20252, 'loss/train': 1.931631326675415} +03/04/2022 13:15:23 - INFO - codeparrot_training - Skipping example with length 958 (seq_length=1024) +03/04/2022 13:15:26 - INFO - codeparrot_training - Step 20253: {'lr': 0.00048146831456485776, 'samples': 10370048, 'steps': 20253, 'loss/train': 1.5764318704605103} +03/04/2022 13:15:29 - INFO - codeparrot_training - Step 20254: {'lr': 0.0004814663094430155, 'samples': 10370560, 'steps': 20254, 'loss/train': 1.3625842332839966} +03/04/2022 13:15:31 - INFO - codeparrot_training - Skipping example with length 876 (seq_length=1024) +03/04/2022 13:15:34 - INFO - codeparrot_training - Step 20255: {'lr': 0.00048146430421687817, 'samples': 10371072, 'steps': 20255, 'loss/train': 1.9887365102767944} +03/04/2022 13:15:38 - INFO - codeparrot_training - Step 20256: {'lr': 0.00048146229888644656, 'samples': 10371584, 'steps': 20256, 'loss/train': 0.7683753371238708} +03/04/2022 13:15:40 - INFO - codeparrot_training - Skipping example with length 434 (seq_length=1024) +03/04/2022 13:15:43 - INFO - codeparrot_training - Step 20257: {'lr': 0.00048146029345172165, 'samples': 10372096, 'steps': 20257, 'loss/train': 2.2820956707000732} +03/04/2022 13:15:46 - INFO - codeparrot_training - Step 20258: {'lr': 0.0004814582879127043, 'samples': 10372608, 'steps': 20258, 'loss/train': 1.757788896560669} +03/04/2022 13:15:48 - INFO - codeparrot_training - Skipping example with length 353 (seq_length=1024) +03/04/2022 13:15:51 - INFO - codeparrot_training - Step 20259: {'lr': 0.0004814562822693954, 'samples': 10373120, 'steps': 20259, 'loss/train': 1.8060390949249268} +03/04/2022 13:15:54 - INFO - codeparrot_training - Step 20260: {'lr': 0.00048145427652179583, 'samples': 10373632, 'steps': 20260, 'loss/train': 1.931718349456787} +03/04/2022 13:15:56 - INFO - codeparrot_training - Skipping example with length 446 (seq_length=1024) +03/04/2022 13:16:00 - INFO - codeparrot_training - Step 20261: {'lr': 0.0004814522706699066, 'samples': 10374144, 'steps': 20261, 'loss/train': 1.9194152355194092} +03/04/2022 13:16:03 - INFO - codeparrot_training - Step 20262: {'lr': 0.00048145026471372855, 'samples': 10374656, 'steps': 20262, 'loss/train': 1.2826192378997803} +03/04/2022 13:16:04 - INFO - codeparrot_training - Skipping example with length 818 (seq_length=1024) +03/04/2022 13:16:08 - INFO - codeparrot_training - Step 20263: {'lr': 0.0004814482586532626, 'samples': 10375168, 'steps': 20263, 'loss/train': 1.9188547134399414} +03/04/2022 13:16:11 - INFO - codeparrot_training - Step 20264: {'lr': 0.00048144625248850955, 'samples': 10375680, 'steps': 20264, 'loss/train': 2.2196075916290283} +03/04/2022 13:16:13 - INFO - codeparrot_training - Skipping example with length 740 (seq_length=1024) +03/04/2022 13:16:17 - INFO - codeparrot_training - Step 20265: {'lr': 0.0004814442462194704, 'samples': 10376192, 'steps': 20265, 'loss/train': 1.2020035982131958} +03/04/2022 13:16:20 - INFO - codeparrot_training - Step 20266: {'lr': 0.0004814422398461461, 'samples': 10376704, 'steps': 20266, 'loss/train': 2.0894978046417236} +03/04/2022 13:16:21 - INFO - codeparrot_training - Skipping example with length 299 (seq_length=1024) +03/04/2022 13:16:25 - INFO - codeparrot_training - Step 20267: {'lr': 0.00048144023336853746, 'samples': 10377216, 'steps': 20267, 'loss/train': 2.225355625152588} +03/04/2022 13:16:28 - INFO - codeparrot_training - Step 20268: {'lr': 0.00048143822678664545, 'samples': 10377728, 'steps': 20268, 'loss/train': 1.3462374210357666} +03/04/2022 13:16:30 - INFO - codeparrot_training - Skipping example with length 210 (seq_length=1024) +03/04/2022 13:16:33 - INFO - codeparrot_training - Step 20269: {'lr': 0.00048143622010047096, 'samples': 10378240, 'steps': 20269, 'loss/train': 2.0127334594726562} +03/04/2022 13:16:37 - INFO - codeparrot_training - Step 20270: {'lr': 0.0004814342133100149, 'samples': 10378752, 'steps': 20270, 'loss/train': 1.4516279697418213} +03/04/2022 13:16:38 - INFO - codeparrot_training - Skipping example with length 63 (seq_length=1024) +03/04/2022 13:16:42 - INFO - codeparrot_training - Step 20271: {'lr': 0.00048143220641527805, 'samples': 10379264, 'steps': 20271, 'loss/train': 1.6147431135177612} +03/04/2022 13:16:45 - INFO - codeparrot_training - Step 20272: {'lr': 0.0004814301994162615, 'samples': 10379776, 'steps': 20272, 'loss/train': 1.3817028999328613} +03/04/2022 13:16:47 - INFO - codeparrot_training - Skipping example with length 292 (seq_length=1024) +03/04/2022 13:16:50 - INFO - codeparrot_training - Step 20273: {'lr': 0.000481428192312966, 'samples': 10380288, 'steps': 20273, 'loss/train': 2.0180304050445557} +03/04/2022 13:16:53 - INFO - codeparrot_training - Step 20274: {'lr': 0.0004814261851053926, 'samples': 10380800, 'steps': 20274, 'loss/train': 1.9820739030838013} +03/04/2022 13:16:55 - INFO - codeparrot_training - Skipping example with length 245 (seq_length=1024) +03/04/2022 13:16:59 - INFO - codeparrot_training - Step 20275: {'lr': 0.00048142417779354214, 'samples': 10381312, 'steps': 20275, 'loss/train': 1.9524707794189453} +03/04/2022 13:17:02 - INFO - codeparrot_training - Step 20276: {'lr': 0.0004814221703774155, 'samples': 10381824, 'steps': 20276, 'loss/train': 1.286349892616272} +03/04/2022 13:17:04 - INFO - codeparrot_training - Skipping example with length 334 (seq_length=1024) +03/04/2022 13:17:07 - INFO - codeparrot_training - Step 20277: {'lr': 0.00048142016285701356, 'samples': 10382336, 'steps': 20277, 'loss/train': 1.608291745185852} +03/04/2022 13:17:10 - INFO - codeparrot_training - Step 20278: {'lr': 0.00048141815523233735, 'samples': 10382848, 'steps': 20278, 'loss/train': 2.0682904720306396} +03/04/2022 13:17:12 - INFO - codeparrot_training - Skipping example with length 923 (seq_length=1024) +03/04/2022 13:17:16 - INFO - codeparrot_training - Step 20279: {'lr': 0.00048141614750338757, 'samples': 10383360, 'steps': 20279, 'loss/train': 1.3807785511016846} +03/04/2022 13:17:19 - INFO - codeparrot_training - Step 20280: {'lr': 0.00048141413967016535, 'samples': 10383872, 'steps': 20280, 'loss/train': 1.4437352418899536} +03/04/2022 13:17:20 - INFO - codeparrot_training - Skipping example with length 840 (seq_length=1024) +03/04/2022 13:17:24 - INFO - codeparrot_training - Step 20281: {'lr': 0.00048141213173267145, 'samples': 10384384, 'steps': 20281, 'loss/train': 0.9472389221191406} +03/04/2022 13:17:27 - INFO - codeparrot_training - Step 20282: {'lr': 0.0004814101236909068, 'samples': 10384896, 'steps': 20282, 'loss/train': 2.4436330795288086} +03/04/2022 13:17:29 - INFO - codeparrot_training - Skipping example with length 12 (seq_length=1024) +03/04/2022 13:17:33 - INFO - codeparrot_training - Step 20283: {'lr': 0.00048140811554487234, 'samples': 10385408, 'steps': 20283, 'loss/train': 1.425564169883728} +03/04/2022 13:17:36 - INFO - codeparrot_training - Step 20284: {'lr': 0.000481406107294569, 'samples': 10385920, 'steps': 20284, 'loss/train': 1.5306607484817505} +03/04/2022 13:17:37 - INFO - codeparrot_training - Skipping example with length 699 (seq_length=1024) +03/04/2022 13:17:41 - INFO - codeparrot_training - Step 20285: {'lr': 0.0004814040989399975, 'samples': 10386432, 'steps': 20285, 'loss/train': 1.5772647857666016} +03/04/2022 13:17:44 - INFO - codeparrot_training - Step 20286: {'lr': 0.000481402090481159, 'samples': 10386944, 'steps': 20286, 'loss/train': 1.8681546449661255} +03/04/2022 13:17:46 - INFO - codeparrot_training - Skipping example with length 604 (seq_length=1024) +03/04/2022 13:17:50 - INFO - codeparrot_training - Step 20287: {'lr': 0.0004814000819180543, 'samples': 10387456, 'steps': 20287, 'loss/train': 2.0079705715179443} +03/04/2022 13:17:53 - INFO - codeparrot_training - Step 20288: {'lr': 0.00048139807325068423, 'samples': 10387968, 'steps': 20288, 'loss/train': 1.4021387100219727} +03/04/2022 13:17:55 - INFO - codeparrot_training - Skipping example with length 270 (seq_length=1024) +03/04/2022 13:17:58 - INFO - codeparrot_training - Step 20289: {'lr': 0.0004813960644790498, 'samples': 10388480, 'steps': 20289, 'loss/train': 2.0971012115478516} +03/04/2022 13:18:01 - INFO - codeparrot_training - Step 20290: {'lr': 0.00048139405560315186, 'samples': 10388992, 'steps': 20290, 'loss/train': 1.857638955116272} +03/04/2022 13:18:03 - INFO - codeparrot_training - Skipping example with length 363 (seq_length=1024) +03/04/2022 13:18:07 - INFO - codeparrot_training - Step 20291: {'lr': 0.0004813920466229913, 'samples': 10389504, 'steps': 20291, 'loss/train': 2.142422676086426} +03/04/2022 13:18:10 - INFO - codeparrot_training - Step 20292: {'lr': 0.0004813900375385691, 'samples': 10390016, 'steps': 20292, 'loss/train': 2.0671212673187256} +03/04/2022 13:18:12 - INFO - codeparrot_training - Skipping example with length 107 (seq_length=1024) +03/04/2022 13:18:15 - INFO - codeparrot_training - Step 20293: {'lr': 0.0004813880283498861, 'samples': 10390528, 'steps': 20293, 'loss/train': 1.4597253799438477} +03/04/2022 13:18:18 - INFO - codeparrot_training - Step 20294: {'lr': 0.00048138601905694324, 'samples': 10391040, 'steps': 20294, 'loss/train': 1.7426204681396484} +03/04/2022 13:18:20 - INFO - codeparrot_training - Skipping example with length 87 (seq_length=1024) +03/04/2022 13:18:23 - INFO - codeparrot_training - Step 20295: {'lr': 0.0004813840096597414, 'samples': 10391552, 'steps': 20295, 'loss/train': 1.7160520553588867} +03/04/2022 13:18:27 - INFO - codeparrot_training - Step 20296: {'lr': 0.00048138200015828146, 'samples': 10392064, 'steps': 20296, 'loss/train': 1.929692268371582} +03/04/2022 13:18:28 - INFO - codeparrot_training - Skipping example with length 38 (seq_length=1024) +03/04/2022 13:18:32 - INFO - codeparrot_training - Step 20297: {'lr': 0.00048137999055256444, 'samples': 10392576, 'steps': 20297, 'loss/train': 1.9815778732299805} +03/04/2022 13:18:35 - INFO - codeparrot_training - Step 20298: {'lr': 0.0004813779808425911, 'samples': 10393088, 'steps': 20298, 'loss/train': 1.6334166526794434} +03/04/2022 13:18:37 - INFO - codeparrot_training - Skipping example with length 740 (seq_length=1024) +03/04/2022 13:18:40 - INFO - codeparrot_training - Step 20299: {'lr': 0.0004813759710283624, 'samples': 10393600, 'steps': 20299, 'loss/train': 2.034120798110962} +03/04/2022 13:18:43 - INFO - codeparrot_training - Step 20300: {'lr': 0.0004813739611098793, 'samples': 10394112, 'steps': 20300, 'loss/train': 1.7253496646881104} +03/04/2022 13:18:45 - INFO - codeparrot_training - Skipping example with length 811 (seq_length=1024) +03/04/2022 13:18:49 - INFO - codeparrot_training - Step 20301: {'lr': 0.00048137195108714266, 'samples': 10394624, 'steps': 20301, 'loss/train': 1.3769193887710571} +03/04/2022 13:18:52 - INFO - codeparrot_training - Step 20302: {'lr': 0.00048136994096015343, 'samples': 10395136, 'steps': 20302, 'loss/train': 1.826712965965271} +03/04/2022 13:18:53 - INFO - codeparrot_training - Skipping example with length 705 (seq_length=1024) +03/04/2022 13:18:57 - INFO - codeparrot_training - Step 20303: {'lr': 0.00048136793072891236, 'samples': 10395648, 'steps': 20303, 'loss/train': 2.2832858562469482} +03/04/2022 13:19:00 - INFO - codeparrot_training - Step 20304: {'lr': 0.00048136592039342053, 'samples': 10396160, 'steps': 20304, 'loss/train': 2.2106282711029053} +03/04/2022 13:19:01 - INFO - codeparrot_training - Skipping example with length 676 (seq_length=1024) +03/04/2022 13:19:05 - INFO - codeparrot_training - Step 20305: {'lr': 0.0004813639099536789, 'samples': 10396672, 'steps': 20305, 'loss/train': 1.3505048751831055} +03/04/2022 13:19:09 - INFO - codeparrot_training - Step 20306: {'lr': 0.0004813618994096881, 'samples': 10397184, 'steps': 20306, 'loss/train': 2.0112452507019043} +03/04/2022 13:19:09 - INFO - codeparrot_training - Skipping example with length 108 (seq_length=1024) +03/04/2022 13:19:14 - INFO - codeparrot_training - Step 20307: {'lr': 0.0004813598887614492, 'samples': 10397696, 'steps': 20307, 'loss/train': 1.1415553092956543} +03/04/2022 13:19:17 - INFO - codeparrot_training - Step 20308: {'lr': 0.0004813578780089632, 'samples': 10398208, 'steps': 20308, 'loss/train': 2.2053706645965576} +03/04/2022 13:19:18 - INFO - codeparrot_training - Skipping example with length 541 (seq_length=1024) +03/04/2022 13:19:22 - INFO - codeparrot_training - Step 20309: {'lr': 0.00048135586715223087, 'samples': 10398720, 'steps': 20309, 'loss/train': 1.2573555707931519} +03/04/2022 13:19:26 - INFO - codeparrot_training - Step 20310: {'lr': 0.00048135385619125316, 'samples': 10399232, 'steps': 20310, 'loss/train': 1.9854685068130493} +03/04/2022 13:19:27 - INFO - codeparrot_training - Skipping example with length 75 (seq_length=1024) +03/04/2022 13:19:31 - INFO - codeparrot_training - Step 20311: {'lr': 0.00048135184512603093, 'samples': 10399744, 'steps': 20311, 'loss/train': 2.006193161010742} +03/04/2022 13:19:34 - INFO - codeparrot_training - Step 20312: {'lr': 0.00048134983395656516, 'samples': 10400256, 'steps': 20312, 'loss/train': 1.9392521381378174} +03/04/2022 13:19:35 - INFO - codeparrot_training - Skipping example with length 427 (seq_length=1024) +03/04/2022 13:19:40 - INFO - codeparrot_training - Step 20313: {'lr': 0.00048134782268285676, 'samples': 10400768, 'steps': 20313, 'loss/train': 0.6946647763252258} +03/04/2022 13:19:43 - INFO - codeparrot_training - Step 20314: {'lr': 0.00048134581130490655, 'samples': 10401280, 'steps': 20314, 'loss/train': 0.8104281425476074} +03/04/2022 13:19:45 - INFO - codeparrot_training - Skipping example with length 420 (seq_length=1024) +03/04/2022 13:19:48 - INFO - codeparrot_training - Step 20315: {'lr': 0.0004813437998227155, 'samples': 10401792, 'steps': 20315, 'loss/train': 1.896400809288025} +03/04/2022 13:19:51 - INFO - codeparrot_training - Step 20316: {'lr': 0.00048134178823628455, 'samples': 10402304, 'steps': 20316, 'loss/train': 1.3297690153121948} +03/04/2022 13:19:54 - INFO - codeparrot_training - Skipping example with length 390 (seq_length=1024) +03/04/2022 13:19:57 - INFO - codeparrot_training - Step 20317: {'lr': 0.0004813397765456145, 'samples': 10402816, 'steps': 20317, 'loss/train': 1.544603943824768} +03/04/2022 13:20:00 - INFO - codeparrot_training - Step 20318: {'lr': 0.00048133776475070637, 'samples': 10403328, 'steps': 20318, 'loss/train': 1.4077571630477905} +03/04/2022 13:20:02 - INFO - codeparrot_training - Skipping example with length 899 (seq_length=1024) +03/04/2022 13:20:05 - INFO - codeparrot_training - Step 20319: {'lr': 0.00048133575285156093, 'samples': 10403840, 'steps': 20319, 'loss/train': 1.7472107410430908} +03/04/2022 13:20:08 - INFO - codeparrot_training - Step 20320: {'lr': 0.00048133374084817927, 'samples': 10404352, 'steps': 20320, 'loss/train': 1.233176589012146} +03/04/2022 13:20:13 - INFO - codeparrot_training - Step 20321: {'lr': 0.00048133172874056213, 'samples': 10404864, 'steps': 20321, 'loss/train': 2.1606802940368652} +03/04/2022 13:20:17 - INFO - codeparrot_training - Step 20322: {'lr': 0.0004813297165287105, 'samples': 10405376, 'steps': 20322, 'loss/train': 0.23842552304267883} +03/04/2022 13:20:19 - INFO - codeparrot_training - Skipping example with length 941 (seq_length=1024) +03/04/2022 13:20:22 - INFO - codeparrot_training - Step 20323: {'lr': 0.00048132770421262526, 'samples': 10405888, 'steps': 20323, 'loss/train': 2.1917285919189453} +03/04/2022 13:20:25 - INFO - codeparrot_training - Step 20324: {'lr': 0.00048132569179230736, 'samples': 10406400, 'steps': 20324, 'loss/train': 2.2419137954711914} +03/04/2022 13:20:27 - INFO - codeparrot_training - Skipping example with length 138 (seq_length=1024) +03/04/2022 13:20:30 - INFO - codeparrot_training - Step 20325: {'lr': 0.0004813236792677577, 'samples': 10406912, 'steps': 20325, 'loss/train': 1.8667510747909546} +03/04/2022 13:20:33 - INFO - codeparrot_training - Step 20326: {'lr': 0.00048132166663897703, 'samples': 10407424, 'steps': 20326, 'loss/train': 1.754474401473999} +03/04/2022 13:20:35 - INFO - codeparrot_training - Skipping example with length 575 (seq_length=1024) +03/04/2022 13:20:39 - INFO - codeparrot_training - Step 20327: {'lr': 0.0004813196539059665, 'samples': 10407936, 'steps': 20327, 'loss/train': 2.130167007446289} +03/04/2022 13:20:42 - INFO - codeparrot_training - Step 20328: {'lr': 0.0004813176410687269, 'samples': 10408448, 'steps': 20328, 'loss/train': 1.6143391132354736} +03/04/2022 13:20:43 - INFO - codeparrot_training - Skipping example with length 412 (seq_length=1024) +03/04/2022 13:20:47 - INFO - codeparrot_training - Step 20329: {'lr': 0.00048131562812725904, 'samples': 10408960, 'steps': 20329, 'loss/train': 1.4584846496582031} +03/04/2022 13:20:50 - INFO - codeparrot_training - Step 20330: {'lr': 0.000481313615081564, 'samples': 10409472, 'steps': 20330, 'loss/train': 0.5400277376174927} +03/04/2022 13:20:52 - INFO - codeparrot_training - Skipping example with length 844 (seq_length=1024) +03/04/2022 13:20:56 - INFO - codeparrot_training - Step 20331: {'lr': 0.00048131160193164266, 'samples': 10409984, 'steps': 20331, 'loss/train': 1.5004860162734985} +03/04/2022 13:20:59 - INFO - codeparrot_training - Step 20332: {'lr': 0.0004813095886774958, 'samples': 10410496, 'steps': 20332, 'loss/train': 1.6282845735549927} +03/04/2022 13:21:00 - INFO - codeparrot_training - Skipping example with length 948 (seq_length=1024) +03/04/2022 13:21:04 - INFO - codeparrot_training - Step 20333: {'lr': 0.00048130757531912447, 'samples': 10411008, 'steps': 20333, 'loss/train': 1.8343230485916138} +03/04/2022 13:21:07 - INFO - codeparrot_training - Step 20334: {'lr': 0.00048130556185652947, 'samples': 10411520, 'steps': 20334, 'loss/train': 1.8938406705856323} +03/04/2022 13:21:09 - INFO - codeparrot_training - Skipping example with length 120 (seq_length=1024) +03/04/2022 13:21:13 - INFO - codeparrot_training - Step 20335: {'lr': 0.0004813035482897118, 'samples': 10412032, 'steps': 20335, 'loss/train': 1.6977053880691528} +03/04/2022 13:21:16 - INFO - codeparrot_training - Step 20336: {'lr': 0.00048130153461867225, 'samples': 10412544, 'steps': 20336, 'loss/train': 1.8562641143798828} +03/04/2022 13:21:17 - INFO - codeparrot_training - Skipping example with length 150 (seq_length=1024) +03/04/2022 13:21:21 - INFO - codeparrot_training - Step 20337: {'lr': 0.0004812995208434119, 'samples': 10413056, 'steps': 20337, 'loss/train': 2.538099765777588} +03/04/2022 13:21:24 - INFO - codeparrot_training - Step 20338: {'lr': 0.00048129750696393144, 'samples': 10413568, 'steps': 20338, 'loss/train': 1.3659586906433105} +03/04/2022 13:21:25 - INFO - codeparrot_training - Skipping example with length 886 (seq_length=1024) +03/04/2022 13:21:29 - INFO - codeparrot_training - Step 20339: {'lr': 0.00048129549298023196, 'samples': 10414080, 'steps': 20339, 'loss/train': 2.215716600418091} +03/04/2022 13:21:33 - INFO - codeparrot_training - Step 20340: {'lr': 0.0004812934788923143, 'samples': 10414592, 'steps': 20340, 'loss/train': 1.971356749534607} +03/04/2022 13:21:34 - INFO - codeparrot_training - Skipping example with length 45 (seq_length=1024) +03/04/2022 13:21:38 - INFO - codeparrot_training - Step 20341: {'lr': 0.00048129146470017933, 'samples': 10415104, 'steps': 20341, 'loss/train': 1.595149040222168} +03/04/2022 13:21:41 - INFO - codeparrot_training - Step 20342: {'lr': 0.000481289450403828, 'samples': 10415616, 'steps': 20342, 'loss/train': 1.8534880876541138} +03/04/2022 13:21:42 - INFO - codeparrot_training - Skipping example with length 228 (seq_length=1024) +03/04/2022 13:21:46 - INFO - codeparrot_training - Step 20343: {'lr': 0.0004812874360032613, 'samples': 10416128, 'steps': 20343, 'loss/train': 2.1731081008911133} +03/04/2022 13:21:50 - INFO - codeparrot_training - Step 20344: {'lr': 0.0004812854214984799, 'samples': 10416640, 'steps': 20344, 'loss/train': 1.5320478677749634} +03/04/2022 13:21:50 - INFO - codeparrot_training - Skipping example with length 374 (seq_length=1024) +03/04/2022 13:21:55 - INFO - codeparrot_training - Step 20345: {'lr': 0.000481283406889485, 'samples': 10417152, 'steps': 20345, 'loss/train': 2.062438488006592} +03/04/2022 13:21:58 - INFO - codeparrot_training - Step 20346: {'lr': 0.00048128139217627725, 'samples': 10417664, 'steps': 20346, 'loss/train': 2.098503351211548} +03/04/2022 13:21:59 - INFO - codeparrot_training - Skipping example with length 458 (seq_length=1024) +03/04/2022 13:22:03 - INFO - codeparrot_training - Step 20347: {'lr': 0.00048127937735885774, 'samples': 10418176, 'steps': 20347, 'loss/train': 2.301358222961426} +03/04/2022 13:22:06 - INFO - codeparrot_training - Step 20348: {'lr': 0.0004812773624372273, 'samples': 10418688, 'steps': 20348, 'loss/train': 1.6554186344146729} +03/04/2022 13:22:07 - INFO - codeparrot_training - Skipping example with length 318 (seq_length=1024) +03/04/2022 13:22:12 - INFO - codeparrot_training - Step 20349: {'lr': 0.0004812753474113869, 'samples': 10419200, 'steps': 20349, 'loss/train': 1.3251705169677734} +03/04/2022 13:22:15 - INFO - codeparrot_training - Step 20350: {'lr': 0.0004812733322813373, 'samples': 10419712, 'steps': 20350, 'loss/train': 1.3182694911956787} +03/04/2022 13:22:18 - INFO - codeparrot_training - Skipping example with length 835 (seq_length=1024) +03/04/2022 13:22:21 - INFO - codeparrot_training - Step 20351: {'lr': 0.00048127131704707953, 'samples': 10420224, 'steps': 20351, 'loss/train': 1.0903029441833496} +03/04/2022 13:22:24 - INFO - codeparrot_training - Step 20352: {'lr': 0.0004812693017086145, 'samples': 10420736, 'steps': 20352, 'loss/train': 1.7895545959472656} +03/04/2022 13:22:26 - INFO - codeparrot_training - Skipping example with length 287 (seq_length=1024) +03/04/2022 13:22:29 - INFO - codeparrot_training - Step 20353: {'lr': 0.00048126728626594315, 'samples': 10421248, 'steps': 20353, 'loss/train': 1.431334137916565} +03/04/2022 13:22:32 - INFO - codeparrot_training - Step 20354: {'lr': 0.00048126527071906623, 'samples': 10421760, 'steps': 20354, 'loss/train': 1.5782909393310547} +03/04/2022 13:22:35 - INFO - codeparrot_training - Skipping example with length 33 (seq_length=1024) +03/04/2022 13:22:37 - INFO - codeparrot_training - Step 20355: {'lr': 0.0004812632550679848, 'samples': 10422272, 'steps': 20355, 'loss/train': 2.2521138191223145} +03/04/2022 13:22:41 - INFO - codeparrot_training - Step 20356: {'lr': 0.00048126123931269973, 'samples': 10422784, 'steps': 20356, 'loss/train': 1.773417353630066} +03/04/2022 13:22:43 - INFO - codeparrot_training - Skipping example with length 806 (seq_length=1024) +03/04/2022 13:22:46 - INFO - codeparrot_training - Step 20357: {'lr': 0.0004812592234532118, 'samples': 10423296, 'steps': 20357, 'loss/train': 1.5069940090179443} +03/04/2022 13:22:49 - INFO - codeparrot_training - Step 20358: {'lr': 0.00048125720748952216, 'samples': 10423808, 'steps': 20358, 'loss/train': 1.9643890857696533} +03/04/2022 13:22:52 - INFO - codeparrot_training - Skipping example with length 864 (seq_length=1024) +03/04/2022 13:22:54 - INFO - codeparrot_training - Step 20359: {'lr': 0.00048125519142163157, 'samples': 10424320, 'steps': 20359, 'loss/train': 2.557032823562622} +03/04/2022 13:22:58 - INFO - codeparrot_training - Step 20360: {'lr': 0.0004812531752495409, 'samples': 10424832, 'steps': 20360, 'loss/train': 2.056480646133423} +03/04/2022 13:23:00 - INFO - codeparrot_training - Skipping example with length 528 (seq_length=1024) +03/04/2022 13:23:03 - INFO - codeparrot_training - Step 20361: {'lr': 0.00048125115897325115, 'samples': 10425344, 'steps': 20361, 'loss/train': 1.844163179397583} +03/04/2022 13:23:06 - INFO - codeparrot_training - Step 20362: {'lr': 0.0004812491425927632, 'samples': 10425856, 'steps': 20362, 'loss/train': 1.5912896394729614} +03/04/2022 13:23:10 - INFO - codeparrot_training - Step 20363: {'lr': 0.000481247126108078, 'samples': 10426368, 'steps': 20363, 'loss/train': 1.7340617179870605} +03/04/2022 13:23:10 - INFO - codeparrot_training - Skipping example with length 561 (seq_length=1024) +03/04/2022 13:23:15 - INFO - codeparrot_training - Step 20364: {'lr': 0.00048124510951919633, 'samples': 10426880, 'steps': 20364, 'loss/train': 1.1696362495422363} +03/04/2022 13:23:18 - INFO - codeparrot_training - Step 20365: {'lr': 0.0004812430928261192, 'samples': 10427392, 'steps': 20365, 'loss/train': 0.907446563243866} +03/04/2022 13:23:19 - INFO - codeparrot_training - Skipping example with length 749 (seq_length=1024) +03/04/2022 13:23:23 - INFO - codeparrot_training - Step 20366: {'lr': 0.00048124107602884753, 'samples': 10427904, 'steps': 20366, 'loss/train': 2.48476243019104} +03/04/2022 13:23:26 - INFO - codeparrot_training - Step 20367: {'lr': 0.0004812390591273822, 'samples': 10428416, 'steps': 20367, 'loss/train': 1.7469379901885986} +03/04/2022 13:23:28 - INFO - codeparrot_training - Skipping example with length 818 (seq_length=1024) +03/04/2022 13:23:32 - INFO - codeparrot_training - Step 20368: {'lr': 0.00048123704212172416, 'samples': 10428928, 'steps': 20368, 'loss/train': 1.1882168054580688} +03/04/2022 13:23:35 - INFO - codeparrot_training - Step 20369: {'lr': 0.0004812350250118742, 'samples': 10429440, 'steps': 20369, 'loss/train': 1.5872604846954346} +03/04/2022 13:23:36 - INFO - codeparrot_training - Skipping example with length 301 (seq_length=1024) +03/04/2022 13:23:40 - INFO - codeparrot_training - Step 20370: {'lr': 0.0004812330077978333, 'samples': 10429952, 'steps': 20370, 'loss/train': 2.2186641693115234} +03/04/2022 13:23:43 - INFO - codeparrot_training - Step 20371: {'lr': 0.0004812309904796024, 'samples': 10430464, 'steps': 20371, 'loss/train': 2.340975284576416} +03/04/2022 13:23:45 - INFO - codeparrot_training - Skipping example with length 621 (seq_length=1024) +03/04/2022 13:23:49 - INFO - codeparrot_training - Step 20372: {'lr': 0.0004812289730571824, 'samples': 10430976, 'steps': 20372, 'loss/train': 2.283179759979248} +03/04/2022 13:23:52 - INFO - codeparrot_training - Step 20373: {'lr': 0.00048122695553057417, 'samples': 10431488, 'steps': 20373, 'loss/train': 1.290582299232483} +03/04/2022 13:23:54 - INFO - codeparrot_training - Skipping example with length 528 (seq_length=1024) +03/04/2022 13:23:57 - INFO - codeparrot_training - Step 20374: {'lr': 0.00048122493789977866, 'samples': 10432000, 'steps': 20374, 'loss/train': 1.8281712532043457} +03/04/2022 13:24:00 - INFO - codeparrot_training - Step 20375: {'lr': 0.00048122292016479674, 'samples': 10432512, 'steps': 20375, 'loss/train': 1.3553353548049927} +03/04/2022 13:24:02 - INFO - codeparrot_training - Skipping example with length 235 (seq_length=1024) +03/04/2022 13:24:05 - INFO - codeparrot_training - Step 20376: {'lr': 0.0004812209023256294, 'samples': 10433024, 'steps': 20376, 'loss/train': 1.724345326423645} +03/04/2022 13:24:09 - INFO - codeparrot_training - Step 20377: {'lr': 0.0004812188843822775, 'samples': 10433536, 'steps': 20377, 'loss/train': 2.3454768657684326} +03/04/2022 13:24:11 - INFO - codeparrot_training - Skipping example with length 27 (seq_length=1024) +03/04/2022 13:24:14 - INFO - codeparrot_training - Step 20378: {'lr': 0.0004812168663347418, 'samples': 10434048, 'steps': 20378, 'loss/train': 2.4093613624572754} +03/04/2022 13:24:17 - INFO - codeparrot_training - Step 20379: {'lr': 0.00048121484818302343, 'samples': 10434560, 'steps': 20379, 'loss/train': 1.271134376525879} +03/04/2022 13:24:19 - INFO - codeparrot_training - Skipping example with length 882 (seq_length=1024) +03/04/2022 13:24:22 - INFO - codeparrot_training - Step 20380: {'lr': 0.00048121282992712324, 'samples': 10435072, 'steps': 20380, 'loss/train': 1.9415888786315918} +03/04/2022 13:24:25 - INFO - codeparrot_training - Step 20381: {'lr': 0.00048121081156704207, 'samples': 10435584, 'steps': 20381, 'loss/train': 2.448842763900757} +03/04/2022 13:24:27 - INFO - codeparrot_training - Skipping example with length 739 (seq_length=1024) +03/04/2022 13:24:31 - INFO - codeparrot_training - Step 20382: {'lr': 0.00048120879310278094, 'samples': 10436096, 'steps': 20382, 'loss/train': 2.6376023292541504} +03/04/2022 13:24:34 - INFO - codeparrot_training - Step 20383: {'lr': 0.00048120677453434066, 'samples': 10436608, 'steps': 20383, 'loss/train': 0.865773618221283} +03/04/2022 13:24:36 - INFO - codeparrot_training - Skipping example with length 550 (seq_length=1024) +03/04/2022 13:24:39 - INFO - codeparrot_training - Step 20384: {'lr': 0.00048120475586172217, 'samples': 10437120, 'steps': 20384, 'loss/train': 2.056737184524536} +03/04/2022 13:24:42 - INFO - codeparrot_training - Step 20385: {'lr': 0.00048120273708492637, 'samples': 10437632, 'steps': 20385, 'loss/train': 1.8882478475570679} +03/04/2022 13:24:44 - INFO - codeparrot_training - Skipping example with length 610 (seq_length=1024) +03/04/2022 13:24:48 - INFO - codeparrot_training - Step 20386: {'lr': 0.0004812007182039542, 'samples': 10438144, 'steps': 20386, 'loss/train': 1.3399277925491333} +03/04/2022 13:24:51 - INFO - codeparrot_training - Step 20387: {'lr': 0.00048119869921880656, 'samples': 10438656, 'steps': 20387, 'loss/train': 1.551895022392273} +03/04/2022 13:24:53 - INFO - codeparrot_training - Skipping example with length 478 (seq_length=1024) +03/04/2022 13:24:56 - INFO - codeparrot_training - Step 20388: {'lr': 0.00048119668012948434, 'samples': 10439168, 'steps': 20388, 'loss/train': 2.479062557220459} +03/04/2022 13:24:59 - INFO - codeparrot_training - Step 20389: {'lr': 0.0004811946609359885, 'samples': 10439680, 'steps': 20389, 'loss/train': 1.934212327003479} +03/04/2022 13:25:01 - INFO - codeparrot_training - Skipping example with length 895 (seq_length=1024) +03/04/2022 13:25:05 - INFO - codeparrot_training - Step 20390: {'lr': 0.00048119264163831987, 'samples': 10440192, 'steps': 20390, 'loss/train': 2.335300922393799} +03/04/2022 13:25:08 - INFO - codeparrot_training - Step 20391: {'lr': 0.0004811906222364794, 'samples': 10440704, 'steps': 20391, 'loss/train': 2.26424241065979} +03/04/2022 13:25:10 - INFO - codeparrot_training - Skipping example with length 248 (seq_length=1024) +03/04/2022 13:25:13 - INFO - codeparrot_training - Step 20392: {'lr': 0.00048118860273046804, 'samples': 10441216, 'steps': 20392, 'loss/train': 1.7915430068969727} +03/04/2022 13:25:16 - INFO - codeparrot_training - Step 20393: {'lr': 0.00048118658312028663, 'samples': 10441728, 'steps': 20393, 'loss/train': 0.38916534185409546} +03/04/2022 13:25:18 - INFO - codeparrot_training - Skipping example with length 412 (seq_length=1024) +03/04/2022 13:25:21 - INFO - codeparrot_training - Step 20394: {'lr': 0.0004811845634059361, 'samples': 10442240, 'steps': 20394, 'loss/train': 2.617605447769165} +03/04/2022 13:25:25 - INFO - codeparrot_training - Step 20395: {'lr': 0.0004811825435874174, 'samples': 10442752, 'steps': 20395, 'loss/train': 1.5396442413330078} +03/04/2022 13:25:26 - INFO - codeparrot_training - Skipping example with length 978 (seq_length=1024) +03/04/2022 13:25:30 - INFO - codeparrot_training - Step 20396: {'lr': 0.0004811805236647314, 'samples': 10443264, 'steps': 20396, 'loss/train': 1.2303355932235718} +03/04/2022 13:25:33 - INFO - codeparrot_training - Step 20397: {'lr': 0.0004811785036378791, 'samples': 10443776, 'steps': 20397, 'loss/train': 1.3081837892532349} +03/04/2022 13:25:35 - INFO - codeparrot_training - Skipping example with length 631 (seq_length=1024) +03/04/2022 13:25:38 - INFO - codeparrot_training - Step 20398: {'lr': 0.0004811764835068613, 'samples': 10444288, 'steps': 20398, 'loss/train': 1.8741294145584106} +03/04/2022 13:25:41 - INFO - codeparrot_training - Step 20399: {'lr': 0.0004811744632716789, 'samples': 10444800, 'steps': 20399, 'loss/train': 2.0554354190826416} +03/04/2022 13:25:43 - INFO - codeparrot_training - Skipping example with length 901 (seq_length=1024) +03/04/2022 13:25:47 - INFO - codeparrot_training - Step 20400: {'lr': 0.0004811724429323329, 'samples': 10445312, 'steps': 20400, 'loss/train': 1.965099811553955} +03/04/2022 13:25:50 - INFO - codeparrot_training - Step 20401: {'lr': 0.0004811704224888241, 'samples': 10445824, 'steps': 20401, 'loss/train': 2.2221293449401855} +03/04/2022 13:25:52 - INFO - codeparrot_training - Skipping example with length 765 (seq_length=1024) +03/04/2022 13:25:55 - INFO - codeparrot_training - Step 20402: {'lr': 0.0004811684019411535, 'samples': 10446336, 'steps': 20402, 'loss/train': 2.4441635608673096} +03/04/2022 13:25:59 - INFO - codeparrot_training - Step 20403: {'lr': 0.000481166381289322, 'samples': 10446848, 'steps': 20403, 'loss/train': 1.9303005933761597} +03/04/2022 13:26:01 - INFO - codeparrot_training - Skipping example with length 118 (seq_length=1024) +03/04/2022 13:26:04 - INFO - codeparrot_training - Step 20404: {'lr': 0.0004811643605333305, 'samples': 10447360, 'steps': 20404, 'loss/train': 2.419502019882202} +03/04/2022 13:26:07 - INFO - codeparrot_training - Step 20405: {'lr': 0.0004811623396731799, 'samples': 10447872, 'steps': 20405, 'loss/train': 0.5135295391082764} +03/04/2022 13:26:09 - INFO - codeparrot_training - Skipping example with length 49 (seq_length=1024) +03/04/2022 13:26:12 - INFO - codeparrot_training - Step 20406: {'lr': 0.0004811603187088711, 'samples': 10448384, 'steps': 20406, 'loss/train': 2.0092034339904785} +03/04/2022 13:26:15 - INFO - codeparrot_training - Step 20407: {'lr': 0.00048115829764040503, 'samples': 10448896, 'steps': 20407, 'loss/train': 1.9149476289749146} +03/04/2022 13:26:18 - INFO - codeparrot_training - Skipping example with length 115 (seq_length=1024) +03/04/2022 13:26:21 - INFO - codeparrot_training - Step 20408: {'lr': 0.0004811562764677826, 'samples': 10449408, 'steps': 20408, 'loss/train': 1.9891232252120972} +03/04/2022 13:26:24 - INFO - codeparrot_training - Step 20409: {'lr': 0.00048115425519100474, 'samples': 10449920, 'steps': 20409, 'loss/train': 1.968295693397522} +03/04/2022 13:26:26 - INFO - codeparrot_training - Skipping example with length 840 (seq_length=1024) +03/04/2022 13:26:29 - INFO - codeparrot_training - Step 20410: {'lr': 0.0004811522338100723, 'samples': 10450432, 'steps': 20410, 'loss/train': 1.7027602195739746} +03/04/2022 13:26:32 - INFO - codeparrot_training - Step 20411: {'lr': 0.0004811502123249862, 'samples': 10450944, 'steps': 20411, 'loss/train': 2.7670481204986572} +03/04/2022 13:26:34 - INFO - codeparrot_training - Skipping example with length 1004 (seq_length=1024) +03/04/2022 13:26:38 - INFO - codeparrot_training - Step 20412: {'lr': 0.0004811481907357475, 'samples': 10451456, 'steps': 20412, 'loss/train': 2.065373420715332} +03/04/2022 13:26:41 - INFO - codeparrot_training - Step 20413: {'lr': 0.000481146169042357, 'samples': 10451968, 'steps': 20413, 'loss/train': 2.3624441623687744} +03/04/2022 13:26:45 - INFO - codeparrot_training - Step 20414: {'lr': 0.0004811441472448155, 'samples': 10452480, 'steps': 20414, 'loss/train': 1.972161889076233} +03/04/2022 13:26:47 - INFO - codeparrot_training - Skipping example with length 756 (seq_length=1024) +03/04/2022 13:26:50 - INFO - codeparrot_training - Step 20415: {'lr': 0.000481142125343124, 'samples': 10452992, 'steps': 20415, 'loss/train': 1.8398648500442505} +03/04/2022 13:26:53 - INFO - codeparrot_training - Step 20416: {'lr': 0.0004811401033372835, 'samples': 10453504, 'steps': 20416, 'loss/train': 1.709134817123413} +03/04/2022 13:26:55 - INFO - codeparrot_training - Skipping example with length 964 (seq_length=1024) +03/04/2022 13:26:58 - INFO - codeparrot_training - Step 20417: {'lr': 0.0004811380812272948, 'samples': 10454016, 'steps': 20417, 'loss/train': 2.085507392883301} +03/04/2022 13:27:02 - INFO - codeparrot_training - Step 20418: {'lr': 0.0004811360590131589, 'samples': 10454528, 'steps': 20418, 'loss/train': 1.4669723510742188} +03/04/2022 13:27:03 - INFO - codeparrot_training - Skipping example with length 918 (seq_length=1024) +03/04/2022 13:27:07 - INFO - codeparrot_training - Step 20419: {'lr': 0.00048113403669487655, 'samples': 10455040, 'steps': 20419, 'loss/train': 1.52060067653656} +03/04/2022 13:27:10 - INFO - codeparrot_training - Step 20420: {'lr': 0.0004811320142724489, 'samples': 10455552, 'steps': 20420, 'loss/train': 2.3218374252319336} +03/04/2022 13:27:12 - INFO - codeparrot_training - Skipping example with length 966 (seq_length=1024) +03/04/2022 13:27:15 - INFO - codeparrot_training - Step 20421: {'lr': 0.0004811299917458766, 'samples': 10456064, 'steps': 20421, 'loss/train': 2.3772382736206055} +03/04/2022 13:27:18 - INFO - codeparrot_training - Step 20422: {'lr': 0.00048112796911516076, 'samples': 10456576, 'steps': 20422, 'loss/train': 1.7651783227920532} +03/04/2022 13:27:20 - INFO - codeparrot_training - Skipping example with length 1007 (seq_length=1024) +03/04/2022 13:27:24 - INFO - codeparrot_training - Step 20423: {'lr': 0.00048112594638030225, 'samples': 10457088, 'steps': 20423, 'loss/train': 2.3160226345062256} +03/04/2022 13:27:27 - INFO - codeparrot_training - Step 20424: {'lr': 0.00048112392354130194, 'samples': 10457600, 'steps': 20424, 'loss/train': 1.6645604372024536} +03/04/2022 13:27:28 - INFO - codeparrot_training - Skipping example with length 81 (seq_length=1024) +03/04/2022 13:27:32 - INFO - codeparrot_training - Step 20425: {'lr': 0.00048112190059816076, 'samples': 10458112, 'steps': 20425, 'loss/train': 1.8664242029190063} +03/04/2022 13:27:35 - INFO - codeparrot_training - Step 20426: {'lr': 0.0004811198775508796, 'samples': 10458624, 'steps': 20426, 'loss/train': 1.6594371795654297} +03/04/2022 13:27:37 - INFO - codeparrot_training - Skipping example with length 720 (seq_length=1024) +03/04/2022 13:27:40 - INFO - codeparrot_training - Step 20427: {'lr': 0.0004811178543994593, 'samples': 10459136, 'steps': 20427, 'loss/train': 0.7872492074966431} +03/04/2022 13:27:44 - INFO - codeparrot_training - Step 20428: {'lr': 0.000481115831143901, 'samples': 10459648, 'steps': 20428, 'loss/train': 1.242393970489502} +03/04/2022 13:27:45 - INFO - codeparrot_training - Skipping example with length 742 (seq_length=1024) +03/04/2022 13:27:49 - INFO - codeparrot_training - Step 20429: {'lr': 0.00048111380778420544, 'samples': 10460160, 'steps': 20429, 'loss/train': 1.8225417137145996} +03/04/2022 13:27:52 - INFO - codeparrot_training - Step 20430: {'lr': 0.0004811117843203735, 'samples': 10460672, 'steps': 20430, 'loss/train': 1.223367691040039} +03/04/2022 13:27:53 - INFO - codeparrot_training - Skipping example with length 661 (seq_length=1024) +03/04/2022 13:27:57 - INFO - codeparrot_training - Step 20431: {'lr': 0.00048110976075240624, 'samples': 10461184, 'steps': 20431, 'loss/train': 3.027543067932129} +03/04/2022 13:28:00 - INFO - codeparrot_training - Step 20432: {'lr': 0.00048110773708030444, 'samples': 10461696, 'steps': 20432, 'loss/train': 1.7364612817764282} +03/04/2022 13:28:02 - INFO - codeparrot_training - Skipping example with length 865 (seq_length=1024) +03/04/2022 13:28:06 - INFO - codeparrot_training - Step 20433: {'lr': 0.00048110571330406903, 'samples': 10462208, 'steps': 20433, 'loss/train': 2.1509816646575928} +03/04/2022 13:28:09 - INFO - codeparrot_training - Step 20434: {'lr': 0.0004811036894237011, 'samples': 10462720, 'steps': 20434, 'loss/train': 1.8670737743377686} +03/04/2022 13:28:10 - INFO - codeparrot_training - Skipping example with length 522 (seq_length=1024) +03/04/2022 13:28:14 - INFO - codeparrot_training - Step 20435: {'lr': 0.00048110166543920125, 'samples': 10463232, 'steps': 20435, 'loss/train': 0.5064175128936768} +03/04/2022 13:28:17 - INFO - codeparrot_training - Step 20436: {'lr': 0.0004810996413505706, 'samples': 10463744, 'steps': 20436, 'loss/train': 2.10916805267334} +03/04/2022 13:28:18 - INFO - codeparrot_training - Skipping example with length 271 (seq_length=1024) +03/04/2022 13:28:23 - INFO - codeparrot_training - Step 20437: {'lr': 0.0004810976171578101, 'samples': 10464256, 'steps': 20437, 'loss/train': 2.328972101211548} +03/04/2022 13:28:26 - INFO - codeparrot_training - Step 20438: {'lr': 0.00048109559286092047, 'samples': 10464768, 'steps': 20438, 'loss/train': 2.05403208732605} +03/04/2022 13:28:27 - INFO - codeparrot_training - Skipping example with length 943 (seq_length=1024) +03/04/2022 13:28:31 - INFO - codeparrot_training - Step 20439: {'lr': 0.0004810935684599028, 'samples': 10465280, 'steps': 20439, 'loss/train': 1.608642339706421} +03/04/2022 13:28:34 - INFO - codeparrot_training - Step 20440: {'lr': 0.00048109154395475787, 'samples': 10465792, 'steps': 20440, 'loss/train': 2.510542631149292} +03/04/2022 13:28:35 - INFO - codeparrot_training - Skipping example with length 953 (seq_length=1024) +03/04/2022 13:28:40 - INFO - codeparrot_training - Step 20441: {'lr': 0.00048108951934548673, 'samples': 10466304, 'steps': 20441, 'loss/train': 1.219976782798767} +03/04/2022 13:28:43 - INFO - codeparrot_training - Step 20442: {'lr': 0.0004810874946320901, 'samples': 10466816, 'steps': 20442, 'loss/train': 2.348198175430298} +03/04/2022 13:28:44 - INFO - codeparrot_training - Skipping example with length 879 (seq_length=1024) +03/04/2022 13:28:48 - INFO - codeparrot_training - Step 20443: {'lr': 0.00048108546981456916, 'samples': 10467328, 'steps': 20443, 'loss/train': 1.7358585596084595} +03/04/2022 13:28:51 - INFO - codeparrot_training - Step 20444: {'lr': 0.0004810834448929246, 'samples': 10467840, 'steps': 20444, 'loss/train': 2.3606083393096924} +03/04/2022 13:28:52 - INFO - codeparrot_training - Skipping example with length 771 (seq_length=1024) +03/04/2022 13:28:57 - INFO - codeparrot_training - Step 20445: {'lr': 0.0004810814198671574, 'samples': 10468352, 'steps': 20445, 'loss/train': 1.7099816799163818} +03/04/2022 13:29:00 - INFO - codeparrot_training - Step 20446: {'lr': 0.00048107939473726846, 'samples': 10468864, 'steps': 20446, 'loss/train': 2.0231876373291016} +03/04/2022 13:29:01 - INFO - codeparrot_training - Skipping example with length 890 (seq_length=1024) +03/04/2022 13:29:05 - INFO - codeparrot_training - Step 20447: {'lr': 0.0004810773695032588, 'samples': 10469376, 'steps': 20447, 'loss/train': 1.808205246925354} +03/04/2022 13:29:08 - INFO - codeparrot_training - Step 20448: {'lr': 0.00048107534416512915, 'samples': 10469888, 'steps': 20448, 'loss/train': 2.2882440090179443} +03/04/2022 13:29:09 - INFO - codeparrot_training - Skipping example with length 96 (seq_length=1024) +03/04/2022 13:29:13 - INFO - codeparrot_training - Step 20449: {'lr': 0.00048107331872288055, 'samples': 10470400, 'steps': 20449, 'loss/train': 1.2700802087783813} +03/04/2022 13:29:17 - INFO - codeparrot_training - Step 20450: {'lr': 0.0004810712931765139, 'samples': 10470912, 'steps': 20450, 'loss/train': 2.8365399837493896} +03/04/2022 13:29:18 - INFO - codeparrot_training - Skipping example with length 664 (seq_length=1024) +03/04/2022 13:29:22 - INFO - codeparrot_training - Step 20451: {'lr': 0.00048106926752603007, 'samples': 10471424, 'steps': 20451, 'loss/train': 1.5664759874343872} +03/04/2022 13:29:25 - INFO - codeparrot_training - Step 20452: {'lr': 0.00048106724177143, 'samples': 10471936, 'steps': 20452, 'loss/train': 2.0549042224884033} +03/04/2022 13:29:26 - INFO - codeparrot_training - Skipping example with length 167 (seq_length=1024) +03/04/2022 13:29:30 - INFO - codeparrot_training - Step 20453: {'lr': 0.00048106521591271455, 'samples': 10472448, 'steps': 20453, 'loss/train': 2.18337345123291} +03/04/2022 13:29:33 - INFO - codeparrot_training - Step 20454: {'lr': 0.00048106318994988476, 'samples': 10472960, 'steps': 20454, 'loss/train': 1.731911540031433} +03/04/2022 13:29:35 - INFO - codeparrot_training - Skipping example with length 418 (seq_length=1024) +03/04/2022 13:29:39 - INFO - codeparrot_training - Step 20455: {'lr': 0.0004810611638829414, 'samples': 10473472, 'steps': 20455, 'loss/train': 1.9947426319122314} +03/04/2022 13:29:42 - INFO - codeparrot_training - Step 20456: {'lr': 0.00048105913771188545, 'samples': 10473984, 'steps': 20456, 'loss/train': 2.0211124420166016} +03/04/2022 13:29:43 - INFO - codeparrot_training - Skipping example with length 429 (seq_length=1024) +03/04/2022 13:29:48 - INFO - codeparrot_training - Step 20457: {'lr': 0.00048105711143671783, 'samples': 10474496, 'steps': 20457, 'loss/train': 2.29929518699646} +03/04/2022 13:29:51 - INFO - codeparrot_training - Step 20458: {'lr': 0.0004810550850574394, 'samples': 10475008, 'steps': 20458, 'loss/train': 0.413383424282074} +03/04/2022 13:29:53 - INFO - codeparrot_training - Skipping example with length 317 (seq_length=1024) +03/04/2022 13:29:56 - INFO - codeparrot_training - Step 20459: {'lr': 0.0004810530585740512, 'samples': 10475520, 'steps': 20459, 'loss/train': 1.200756311416626} +03/04/2022 13:29:59 - INFO - codeparrot_training - Step 20460: {'lr': 0.00048105103198655406, 'samples': 10476032, 'steps': 20460, 'loss/train': 1.3011879920959473} +03/04/2022 13:30:02 - INFO - codeparrot_training - Skipping example with length 816 (seq_length=1024) +03/04/2022 13:30:04 - INFO - codeparrot_training - Step 20461: {'lr': 0.0004810490052949488, 'samples': 10476544, 'steps': 20461, 'loss/train': 1.2738149166107178} +03/04/2022 13:30:08 - INFO - codeparrot_training - Step 20462: {'lr': 0.0004810469784992365, 'samples': 10477056, 'steps': 20462, 'loss/train': 2.066554546356201} +03/04/2022 13:30:10 - INFO - codeparrot_training - Skipping example with length 795 (seq_length=1024) +03/04/2022 13:30:13 - INFO - codeparrot_training - Step 20463: {'lr': 0.00048104495159941794, 'samples': 10477568, 'steps': 20463, 'loss/train': 2.2703185081481934} +03/04/2022 13:30:16 - INFO - codeparrot_training - Step 20464: {'lr': 0.00048104292459549413, 'samples': 10478080, 'steps': 20464, 'loss/train': 0.5864220261573792} +03/04/2022 13:30:19 - INFO - codeparrot_training - Skipping example with length 992 (seq_length=1024) +03/04/2022 13:30:21 - INFO - codeparrot_training - Step 20465: {'lr': 0.0004810408974874659, 'samples': 10478592, 'steps': 20465, 'loss/train': 1.6697965860366821} +03/04/2022 13:30:24 - INFO - codeparrot_training - Step 20466: {'lr': 0.0004810388702753342, 'samples': 10479104, 'steps': 20466, 'loss/train': 2.1550307273864746} +03/04/2022 13:30:27 - INFO - codeparrot_training - Skipping example with length 968 (seq_length=1024) +03/04/2022 13:30:30 - INFO - codeparrot_training - Step 20467: {'lr': 0.0004810368429591, 'samples': 10479616, 'steps': 20467, 'loss/train': 0.8951159715652466} +03/04/2022 13:30:33 - INFO - codeparrot_training - Step 20468: {'lr': 0.00048103481553876415, 'samples': 10480128, 'steps': 20468, 'loss/train': 2.172673225402832} +03/04/2022 13:30:35 - INFO - codeparrot_training - Skipping example with length 82 (seq_length=1024) +03/04/2022 13:30:38 - INFO - codeparrot_training - Step 20469: {'lr': 0.0004810327880143276, 'samples': 10480640, 'steps': 20469, 'loss/train': 0.3059053122997284} +03/04/2022 13:30:41 - INFO - codeparrot_training - Step 20470: {'lr': 0.00048103076038579125, 'samples': 10481152, 'steps': 20470, 'loss/train': 1.6274641752243042} +03/04/2022 13:30:44 - INFO - codeparrot_training - Skipping example with length 548 (seq_length=1024) +03/04/2022 13:30:46 - INFO - codeparrot_training - Step 20471: {'lr': 0.00048102873265315596, 'samples': 10481664, 'steps': 20471, 'loss/train': 1.9880660772323608} +03/04/2022 13:30:50 - INFO - codeparrot_training - Step 20472: {'lr': 0.0004810267048164227, 'samples': 10482176, 'steps': 20472, 'loss/train': 1.835856318473816} +03/04/2022 13:30:52 - INFO - codeparrot_training - Skipping example with length 497 (seq_length=1024) +03/04/2022 13:30:55 - INFO - codeparrot_training - Step 20473: {'lr': 0.0004810246768755924, 'samples': 10482688, 'steps': 20473, 'loss/train': 1.7808772325515747} +03/04/2022 13:30:58 - INFO - codeparrot_training - Step 20474: {'lr': 0.0004810226488306659, 'samples': 10483200, 'steps': 20474, 'loss/train': 1.7930465936660767} +03/04/2022 13:31:01 - INFO - codeparrot_training - Skipping example with length 173 (seq_length=1024) +03/04/2022 13:31:03 - INFO - codeparrot_training - Step 20475: {'lr': 0.00048102062068164413, 'samples': 10483712, 'steps': 20475, 'loss/train': 1.8961561918258667} +03/04/2022 13:31:07 - INFO - codeparrot_training - Step 20476: {'lr': 0.0004810185924285281, 'samples': 10484224, 'steps': 20476, 'loss/train': 1.7032997608184814} +03/04/2022 13:31:09 - INFO - codeparrot_training - Skipping example with length 397 (seq_length=1024) +03/04/2022 13:31:12 - INFO - codeparrot_training - Step 20477: {'lr': 0.00048101656407131864, 'samples': 10484736, 'steps': 20477, 'loss/train': 1.1949142217636108} +03/04/2022 13:31:15 - INFO - codeparrot_training - Step 20478: {'lr': 0.00048101453561001667, 'samples': 10485248, 'steps': 20478, 'loss/train': 1.038216471672058} +03/04/2022 13:31:17 - INFO - codeparrot_training - Skipping example with length 272 (seq_length=1024) +03/04/2022 13:31:20 - INFO - codeparrot_training - Step 20479: {'lr': 0.00048101250704462315, 'samples': 10485760, 'steps': 20479, 'loss/train': 2.075403928756714} +03/04/2022 13:31:24 - INFO - codeparrot_training - Step 20480: {'lr': 0.0004810104783751389, 'samples': 10486272, 'steps': 20480, 'loss/train': 1.7467021942138672} +03/04/2022 13:31:26 - INFO - codeparrot_training - Skipping example with length 825 (seq_length=1024) +03/04/2022 13:31:29 - INFO - codeparrot_training - Step 20481: {'lr': 0.00048100844960156496, 'samples': 10486784, 'steps': 20481, 'loss/train': 1.327009677886963} +03/04/2022 13:31:32 - INFO - codeparrot_training - Step 20482: {'lr': 0.0004810064207239021, 'samples': 10487296, 'steps': 20482, 'loss/train': 1.840355396270752} +03/04/2022 13:31:35 - INFO - codeparrot_training - Skipping example with length 365 (seq_length=1024) +03/04/2022 13:31:37 - INFO - codeparrot_training - Step 20483: {'lr': 0.0004810043917421514, 'samples': 10487808, 'steps': 20483, 'loss/train': 1.8505570888519287} +03/04/2022 13:31:41 - INFO - codeparrot_training - Step 20484: {'lr': 0.0004810023626563136, 'samples': 10488320, 'steps': 20484, 'loss/train': 3.536642074584961} +03/04/2022 13:31:43 - INFO - codeparrot_training - Skipping example with length 144 (seq_length=1024) +03/04/2022 13:31:46 - INFO - codeparrot_training - Step 20485: {'lr': 0.0004810003334663898, 'samples': 10488832, 'steps': 20485, 'loss/train': 2.4519662857055664} +03/04/2022 13:31:49 - INFO - codeparrot_training - Step 20486: {'lr': 0.0004809983041723807, 'samples': 10489344, 'steps': 20486, 'loss/train': 2.0070531368255615} +03/04/2022 13:31:52 - INFO - codeparrot_training - Skipping example with length 912 (seq_length=1024) +03/04/2022 13:31:54 - INFO - codeparrot_training - Step 20487: {'lr': 0.00048099627477428744, 'samples': 10489856, 'steps': 20487, 'loss/train': 2.567838430404663} +03/04/2022 13:31:57 - INFO - codeparrot_training - Step 20488: {'lr': 0.0004809942452721107, 'samples': 10490368, 'steps': 20488, 'loss/train': 1.092480182647705} +03/04/2022 13:32:00 - INFO - codeparrot_training - Skipping example with length 191 (seq_length=1024) +03/04/2022 13:32:03 - INFO - codeparrot_training - Step 20489: {'lr': 0.0004809922156658516, 'samples': 10490880, 'steps': 20489, 'loss/train': 1.9522677659988403} +03/04/2022 13:32:06 - INFO - codeparrot_training - Step 20490: {'lr': 0.00048099018595551096, 'samples': 10491392, 'steps': 20490, 'loss/train': 1.6156349182128906} +03/04/2022 13:32:08 - INFO - codeparrot_training - Skipping example with length 222 (seq_length=1024) +03/04/2022 13:32:11 - INFO - codeparrot_training - Step 20491: {'lr': 0.0004809881561410897, 'samples': 10491904, 'steps': 20491, 'loss/train': 1.7115347385406494} +03/04/2022 13:32:14 - INFO - codeparrot_training - Step 20492: {'lr': 0.00048098612622258873, 'samples': 10492416, 'steps': 20492, 'loss/train': 2.4155454635620117} +03/04/2022 13:32:16 - INFO - codeparrot_training - Skipping example with length 128 (seq_length=1024) +03/04/2022 13:32:19 - INFO - codeparrot_training - Step 20493: {'lr': 0.00048098409620000906, 'samples': 10492928, 'steps': 20493, 'loss/train': 2.2255070209503174} +03/04/2022 13:32:23 - INFO - codeparrot_training - Step 20494: {'lr': 0.00048098206607335135, 'samples': 10493440, 'steps': 20494, 'loss/train': 2.27629017829895} +03/04/2022 13:32:25 - INFO - codeparrot_training - Skipping example with length 507 (seq_length=1024) +03/04/2022 13:32:28 - INFO - codeparrot_training - Step 20495: {'lr': 0.00048098003584261684, 'samples': 10493952, 'steps': 20495, 'loss/train': 2.08027982711792} +03/04/2022 13:32:31 - INFO - codeparrot_training - Step 20496: {'lr': 0.00048097800550780625, 'samples': 10494464, 'steps': 20496, 'loss/train': 2.0890092849731445} +03/04/2022 13:32:34 - INFO - codeparrot_training - Skipping example with length 904 (seq_length=1024) +03/04/2022 13:32:36 - INFO - codeparrot_training - Step 20497: {'lr': 0.0004809759750689205, 'samples': 10494976, 'steps': 20497, 'loss/train': 0.8619827628135681} +03/04/2022 13:32:39 - INFO - codeparrot_training - Step 20498: {'lr': 0.00048097394452596053, 'samples': 10495488, 'steps': 20498, 'loss/train': 1.5897575616836548} +03/04/2022 13:32:42 - INFO - codeparrot_training - Skipping example with length 204 (seq_length=1024) +03/04/2022 13:32:45 - INFO - codeparrot_training - Step 20499: {'lr': 0.0004809719138789273, 'samples': 10496000, 'steps': 20499, 'loss/train': 0.8328710794448853} +03/04/2022 13:32:48 - INFO - codeparrot_training - Step 20500: {'lr': 0.0004809698831278217, 'samples': 10496512, 'steps': 20500, 'loss/train': 2.0871291160583496} +03/04/2022 13:32:50 - INFO - codeparrot_training - Skipping example with length 328 (seq_length=1024) +03/04/2022 13:32:53 - INFO - codeparrot_training - Step 20501: {'lr': 0.0004809678522726446, 'samples': 10497024, 'steps': 20501, 'loss/train': 1.9859899282455444} +03/04/2022 13:32:56 - INFO - codeparrot_training - Step 20502: {'lr': 0.000480965821313397, 'samples': 10497536, 'steps': 20502, 'loss/train': 2.541849136352539} +03/04/2022 13:32:59 - INFO - codeparrot_training - Skipping example with length 182 (seq_length=1024) +03/04/2022 13:33:01 - INFO - codeparrot_training - Step 20503: {'lr': 0.0004809637902500797, 'samples': 10498048, 'steps': 20503, 'loss/train': 1.9376754760742188} +03/04/2022 13:33:05 - INFO - codeparrot_training - Step 20504: {'lr': 0.00048096175908269375, 'samples': 10498560, 'steps': 20504, 'loss/train': 2.102771043777466} +03/04/2022 13:33:07 - INFO - codeparrot_training - Skipping example with length 281 (seq_length=1024) +03/04/2022 13:33:10 - INFO - codeparrot_training - Step 20505: {'lr': 0.00048095972781124, 'samples': 10499072, 'steps': 20505, 'loss/train': 0.9444851279258728} +03/04/2022 13:33:13 - INFO - codeparrot_training - Step 20506: {'lr': 0.00048095769643571927, 'samples': 10499584, 'steps': 20506, 'loss/train': 2.1419150829315186} +03/04/2022 13:33:15 - INFO - codeparrot_training - Skipping example with length 889 (seq_length=1024) +03/04/2022 13:33:18 - INFO - codeparrot_training - Step 20507: {'lr': 0.0004809556649561326, 'samples': 10500096, 'steps': 20507, 'loss/train': 1.7455748319625854} +03/04/2022 13:33:21 - INFO - codeparrot_training - Step 20508: {'lr': 0.0004809536333724809, 'samples': 10500608, 'steps': 20508, 'loss/train': 2.4186573028564453} +03/04/2022 13:33:24 - INFO - codeparrot_training - Skipping example with length 152 (seq_length=1024) +03/04/2022 13:33:27 - INFO - codeparrot_training - Step 20509: {'lr': 0.000480951601684765, 'samples': 10501120, 'steps': 20509, 'loss/train': 1.6323879957199097} +03/04/2022 13:33:30 - INFO - codeparrot_training - Step 20510: {'lr': 0.00048094956989298593, 'samples': 10501632, 'steps': 20510, 'loss/train': 1.8355625867843628} +03/04/2022 13:33:33 - INFO - codeparrot_training - Step 20511: {'lr': 0.0004809475379971445, 'samples': 10502144, 'steps': 20511, 'loss/train': 0.627000093460083} +03/04/2022 13:33:33 - INFO - codeparrot_training - Skipping example with length 274 (seq_length=1024) +03/04/2022 13:33:39 - INFO - codeparrot_training - Step 20512: {'lr': 0.00048094550599724176, 'samples': 10502656, 'steps': 20512, 'loss/train': 1.5604933500289917} +03/04/2022 13:33:42 - INFO - codeparrot_training - Step 20513: {'lr': 0.0004809434738932785, 'samples': 10503168, 'steps': 20513, 'loss/train': 1.6675570011138916} +03/04/2022 13:33:42 - INFO - codeparrot_training - Skipping example with length 428 (seq_length=1024) +03/04/2022 13:33:47 - INFO - codeparrot_training - Step 20514: {'lr': 0.0004809414416852557, 'samples': 10503680, 'steps': 20514, 'loss/train': 0.2800006568431854} +03/04/2022 13:33:50 - INFO - codeparrot_training - Step 20515: {'lr': 0.00048093940937317414, 'samples': 10504192, 'steps': 20515, 'loss/train': 2.060588836669922} +03/04/2022 13:33:50 - INFO - codeparrot_training - Skipping example with length 677 (seq_length=1024) +03/04/2022 13:33:56 - INFO - codeparrot_training - Step 20516: {'lr': 0.00048093737695703494, 'samples': 10504704, 'steps': 20516, 'loss/train': 1.6464698314666748} +03/04/2022 13:33:59 - INFO - codeparrot_training - Step 20517: {'lr': 0.0004809353444368389, 'samples': 10505216, 'steps': 20517, 'loss/train': 2.219045400619507} +03/04/2022 13:33:59 - INFO - codeparrot_training - Skipping example with length 812 (seq_length=1024) +03/04/2022 13:34:05 - INFO - codeparrot_training - Step 20518: {'lr': 0.00048093331181258694, 'samples': 10505728, 'steps': 20518, 'loss/train': 2.6132962703704834} +03/04/2022 13:34:08 - INFO - codeparrot_training - Step 20519: {'lr': 0.00048093127908428, 'samples': 10506240, 'steps': 20519, 'loss/train': 2.3432576656341553} +03/04/2022 13:34:10 - INFO - codeparrot_training - Skipping example with length 860 (seq_length=1024) +03/04/2022 13:34:13 - INFO - codeparrot_training - Step 20520: {'lr': 0.00048092924625191903, 'samples': 10506752, 'steps': 20520, 'loss/train': 2.1681833267211914} +03/04/2022 13:34:16 - INFO - codeparrot_training - Step 20521: {'lr': 0.0004809272133155048, 'samples': 10507264, 'steps': 20521, 'loss/train': 2.10150408744812} +03/04/2022 13:34:19 - INFO - codeparrot_training - Skipping example with length 973 (seq_length=1024) +03/04/2022 13:34:22 - INFO - codeparrot_training - Step 20522: {'lr': 0.00048092518027503844, 'samples': 10507776, 'steps': 20522, 'loss/train': 1.8934534788131714} +03/04/2022 13:34:25 - INFO - codeparrot_training - Step 20523: {'lr': 0.0004809231471305208, 'samples': 10508288, 'steps': 20523, 'loss/train': 1.7568614482879639} +03/04/2022 13:34:27 - INFO - codeparrot_training - Skipping example with length 217 (seq_length=1024) +03/04/2022 13:34:30 - INFO - codeparrot_training - Step 20524: {'lr': 0.0004809211138819526, 'samples': 10508800, 'steps': 20524, 'loss/train': 2.171069383621216} +03/04/2022 13:34:33 - INFO - codeparrot_training - Step 20525: {'lr': 0.000480919080529335, 'samples': 10509312, 'steps': 20525, 'loss/train': 1.9185090065002441} +03/04/2022 13:34:35 - INFO - codeparrot_training - Skipping example with length 560 (seq_length=1024) +03/04/2022 13:34:38 - INFO - codeparrot_training - Step 20526: {'lr': 0.0004809170470726688, 'samples': 10509824, 'steps': 20526, 'loss/train': 1.6862729787826538} +03/04/2022 13:34:42 - INFO - codeparrot_training - Step 20527: {'lr': 0.00048091501351195495, 'samples': 10510336, 'steps': 20527, 'loss/train': 2.5053324699401855} +03/04/2022 13:34:44 - INFO - codeparrot_training - Skipping example with length 878 (seq_length=1024) +03/04/2022 13:34:47 - INFO - codeparrot_training - Step 20528: {'lr': 0.00048091297984719433, 'samples': 10510848, 'steps': 20528, 'loss/train': 1.1084288358688354} +03/04/2022 13:34:50 - INFO - codeparrot_training - Step 20529: {'lr': 0.0004809109460783879, 'samples': 10511360, 'steps': 20529, 'loss/train': 2.0203561782836914} +03/04/2022 13:34:52 - INFO - codeparrot_training - Skipping example with length 868 (seq_length=1024) +03/04/2022 13:34:55 - INFO - codeparrot_training - Step 20530: {'lr': 0.0004809089122055366, 'samples': 10511872, 'steps': 20530, 'loss/train': 2.093722105026245} +03/04/2022 13:34:58 - INFO - codeparrot_training - Step 20531: {'lr': 0.00048090687822864125, 'samples': 10512384, 'steps': 20531, 'loss/train': 1.423715353012085} +03/04/2022 13:35:00 - INFO - codeparrot_training - Skipping example with length 432 (seq_length=1024) +03/04/2022 13:35:04 - INFO - codeparrot_training - Step 20532: {'lr': 0.00048090484414770284, 'samples': 10512896, 'steps': 20532, 'loss/train': 1.8944523334503174} +03/04/2022 13:35:07 - INFO - codeparrot_training - Step 20533: {'lr': 0.00048090280996272234, 'samples': 10513408, 'steps': 20533, 'loss/train': 1.0160870552062988} +03/04/2022 13:35:09 - INFO - codeparrot_training - Skipping example with length 12 (seq_length=1024) +03/04/2022 13:35:12 - INFO - codeparrot_training - Step 20534: {'lr': 0.0004809007756737005, 'samples': 10513920, 'steps': 20534, 'loss/train': 1.5946910381317139} +03/04/2022 13:35:15 - INFO - codeparrot_training - Step 20535: {'lr': 0.0004808987412806384, 'samples': 10514432, 'steps': 20535, 'loss/train': 1.1321042776107788} +03/04/2022 13:35:17 - INFO - codeparrot_training - Skipping example with length 222 (seq_length=1024) +03/04/2022 13:35:21 - INFO - codeparrot_training - Step 20536: {'lr': 0.0004808967067835369, 'samples': 10514944, 'steps': 20536, 'loss/train': 1.8620120286941528} +03/04/2022 13:35:24 - INFO - codeparrot_training - Step 20537: {'lr': 0.00048089467218239687, 'samples': 10515456, 'steps': 20537, 'loss/train': 2.162780284881592} +03/04/2022 13:35:25 - INFO - codeparrot_training - Skipping example with length 906 (seq_length=1024) +03/04/2022 13:35:29 - INFO - codeparrot_training - Step 20538: {'lr': 0.00048089263747721925, 'samples': 10515968, 'steps': 20538, 'loss/train': 1.5353078842163086} +03/04/2022 13:35:32 - INFO - codeparrot_training - Step 20539: {'lr': 0.000480890602668005, 'samples': 10516480, 'steps': 20539, 'loss/train': 1.1610027551651} +03/04/2022 13:35:37 - INFO - codeparrot_training - Step 20540: {'lr': 0.000480888567754755, 'samples': 10516992, 'steps': 20540, 'loss/train': 4.48935604095459} +03/04/2022 13:35:41 - INFO - codeparrot_training - Step 20541: {'lr': 0.0004808865327374701, 'samples': 10517504, 'steps': 20541, 'loss/train': 2.20967960357666} +03/04/2022 13:35:42 - INFO - codeparrot_training - Skipping example with length 931 (seq_length=1024) +03/04/2022 13:35:46 - INFO - codeparrot_training - Step 20542: {'lr': 0.0004808844976161514, 'samples': 10518016, 'steps': 20542, 'loss/train': 2.2568769454956055} +03/04/2022 13:35:49 - INFO - codeparrot_training - Step 20543: {'lr': 0.0004808824623907997, 'samples': 10518528, 'steps': 20543, 'loss/train': 1.5520856380462646} +03/04/2022 13:35:51 - INFO - codeparrot_training - Skipping example with length 612 (seq_length=1024) +03/04/2022 13:35:54 - INFO - codeparrot_training - Step 20544: {'lr': 0.0004808804270614159, 'samples': 10519040, 'steps': 20544, 'loss/train': 2.4500033855438232} +03/04/2022 13:35:58 - INFO - codeparrot_training - Step 20545: {'lr': 0.0004808783916280008, 'samples': 10519552, 'steps': 20545, 'loss/train': 1.1142815351486206} +03/04/2022 13:35:59 - INFO - codeparrot_training - Skipping example with length 227 (seq_length=1024) +03/04/2022 13:36:03 - INFO - codeparrot_training - Step 20546: {'lr': 0.0004808763560905557, 'samples': 10520064, 'steps': 20546, 'loss/train': 1.5092437267303467} +03/04/2022 13:36:06 - INFO - codeparrot_training - Step 20547: {'lr': 0.0004808743204490811, 'samples': 10520576, 'steps': 20547, 'loss/train': 1.8915859460830688} +03/04/2022 13:36:08 - INFO - codeparrot_training - Skipping example with length 91 (seq_length=1024) +03/04/2022 13:36:11 - INFO - codeparrot_training - Step 20548: {'lr': 0.00048087228470357823, 'samples': 10521088, 'steps': 20548, 'loss/train': 1.743141770362854} +03/04/2022 13:36:14 - INFO - codeparrot_training - Step 20549: {'lr': 0.00048087024885404777, 'samples': 10521600, 'steps': 20549, 'loss/train': 1.4230200052261353} +03/04/2022 13:36:16 - INFO - codeparrot_training - Skipping example with length 441 (seq_length=1024) +03/04/2022 13:36:20 - INFO - codeparrot_training - Step 20550: {'lr': 0.00048086821290049077, 'samples': 10522112, 'steps': 20550, 'loss/train': 2.04496431350708} +03/04/2022 13:36:23 - INFO - codeparrot_training - Step 20551: {'lr': 0.00048086617684290814, 'samples': 10522624, 'steps': 20551, 'loss/train': 2.203918933868408} +03/04/2022 13:36:24 - INFO - codeparrot_training - Skipping example with length 546 (seq_length=1024) +03/04/2022 13:36:28 - INFO - codeparrot_training - Step 20552: {'lr': 0.00048086414068130077, 'samples': 10523136, 'steps': 20552, 'loss/train': 0.7275916934013367} +03/04/2022 13:36:31 - INFO - codeparrot_training - Step 20553: {'lr': 0.00048086210441566956, 'samples': 10523648, 'steps': 20553, 'loss/train': 1.4173226356506348} +03/04/2022 13:36:33 - INFO - codeparrot_training - Skipping example with length 854 (seq_length=1024) +03/04/2022 13:36:37 - INFO - codeparrot_training - Step 20554: {'lr': 0.00048086006804601544, 'samples': 10524160, 'steps': 20554, 'loss/train': 2.5101277828216553} +03/04/2022 13:36:40 - INFO - codeparrot_training - Step 20555: {'lr': 0.00048085803157233933, 'samples': 10524672, 'steps': 20555, 'loss/train': 1.5636099576950073} +03/04/2022 13:36:41 - INFO - codeparrot_training - Skipping example with length 715 (seq_length=1024) +03/04/2022 13:36:45 - INFO - codeparrot_training - Step 20556: {'lr': 0.00048085599499464216, 'samples': 10525184, 'steps': 20556, 'loss/train': 2.111664295196533} +03/04/2022 13:36:48 - INFO - codeparrot_training - Step 20557: {'lr': 0.0004808539583129249, 'samples': 10525696, 'steps': 20557, 'loss/train': 2.3063805103302} +03/04/2022 13:36:50 - INFO - codeparrot_training - Skipping example with length 854 (seq_length=1024) +03/04/2022 13:36:53 - INFO - codeparrot_training - Step 20558: {'lr': 0.0004808519215271884, 'samples': 10526208, 'steps': 20558, 'loss/train': 2.0706403255462646} +03/04/2022 13:36:57 - INFO - codeparrot_training - Step 20559: {'lr': 0.0004808498846374335, 'samples': 10526720, 'steps': 20559, 'loss/train': 2.0973684787750244} +03/04/2022 13:36:58 - INFO - codeparrot_training - Skipping example with length 120 (seq_length=1024) +03/04/2022 13:37:02 - INFO - codeparrot_training - Step 20560: {'lr': 0.0004808478476436612, 'samples': 10527232, 'steps': 20560, 'loss/train': 1.6198869943618774} +03/04/2022 13:37:05 - INFO - codeparrot_training - Step 20561: {'lr': 0.00048084581054587253, 'samples': 10527744, 'steps': 20561, 'loss/train': 1.901327133178711} +03/04/2022 13:37:08 - INFO - codeparrot_training - Skipping example with length 895 (seq_length=1024) +03/04/2022 13:37:10 - INFO - codeparrot_training - Step 20562: {'lr': 0.0004808437733440682, 'samples': 10528256, 'steps': 20562, 'loss/train': 1.534468412399292} +03/04/2022 13:37:14 - INFO - codeparrot_training - Step 20563: {'lr': 0.0004808417360382493, 'samples': 10528768, 'steps': 20563, 'loss/train': 1.8267016410827637} +03/04/2022 13:37:16 - INFO - codeparrot_training - Skipping example with length 916 (seq_length=1024) +03/04/2022 13:37:19 - INFO - codeparrot_training - Step 20564: {'lr': 0.00048083969862841667, 'samples': 10529280, 'steps': 20564, 'loss/train': 1.7290340662002563} +03/04/2022 13:37:22 - INFO - codeparrot_training - Step 20565: {'lr': 0.00048083766111457115, 'samples': 10529792, 'steps': 20565, 'loss/train': 2.2309768199920654} +03/04/2022 13:37:26 - INFO - codeparrot_training - Step 20566: {'lr': 0.0004808356234967138, 'samples': 10530304, 'steps': 20566, 'loss/train': 1.1230095624923706} +03/04/2022 13:37:26 - INFO - codeparrot_training - Skipping example with length 637 (seq_length=1024) +03/04/2022 13:37:31 - INFO - codeparrot_training - Step 20567: {'lr': 0.00048083358577484547, 'samples': 10530816, 'steps': 20567, 'loss/train': 2.190361976623535} +03/04/2022 13:37:34 - INFO - codeparrot_training - Step 20568: {'lr': 0.0004808315479489671, 'samples': 10531328, 'steps': 20568, 'loss/train': 0.7066911458969116} +03/04/2022 13:37:34 - INFO - codeparrot_training - Skipping example with length 726 (seq_length=1024) +03/04/2022 13:37:39 - INFO - codeparrot_training - Step 20569: {'lr': 0.00048082951001907965, 'samples': 10531840, 'steps': 20569, 'loss/train': 2.0183420181274414} +03/04/2022 13:37:43 - INFO - codeparrot_training - Step 20570: {'lr': 0.0004808274719851839, 'samples': 10532352, 'steps': 20570, 'loss/train': 1.7740540504455566} +03/04/2022 13:37:43 - INFO - codeparrot_training - Skipping example with length 1004 (seq_length=1024) +03/04/2022 13:37:48 - INFO - codeparrot_training - Step 20571: {'lr': 0.0004808254338472809, 'samples': 10532864, 'steps': 20571, 'loss/train': 2.0523171424865723} +03/04/2022 13:37:51 - INFO - codeparrot_training - Step 20572: {'lr': 0.00048082339560537145, 'samples': 10533376, 'steps': 20572, 'loss/train': 2.1971435546875} +03/04/2022 13:37:51 - INFO - codeparrot_training - Skipping example with length 271 (seq_length=1024) +03/04/2022 13:37:56 - INFO - codeparrot_training - Step 20573: {'lr': 0.00048082135725945665, 'samples': 10533888, 'steps': 20573, 'loss/train': 1.814721941947937} +03/04/2022 13:37:59 - INFO - codeparrot_training - Skipping example with length 455 (seq_length=1024) +03/04/2022 13:38:02 - INFO - codeparrot_training - Step 20574: {'lr': 0.0004808193188095372, 'samples': 10534400, 'steps': 20574, 'loss/train': 0.9366166591644287} +03/04/2022 13:38:05 - INFO - codeparrot_training - Step 20575: {'lr': 0.0004808172802556142, 'samples': 10534912, 'steps': 20575, 'loss/train': 0.9939374923706055} +03/04/2022 13:38:08 - INFO - codeparrot_training - Skipping example with length 373 (seq_length=1024) +03/04/2022 13:38:10 - INFO - codeparrot_training - Step 20576: {'lr': 0.0004808152415976885, 'samples': 10535424, 'steps': 20576, 'loss/train': 1.3819323778152466} +03/04/2022 13:38:13 - INFO - codeparrot_training - Step 20577: {'lr': 0.000480813202835761, 'samples': 10535936, 'steps': 20577, 'loss/train': 1.6481845378875732} +03/04/2022 13:38:16 - INFO - codeparrot_training - Skipping example with length 640 (seq_length=1024) +03/04/2022 13:38:18 - INFO - codeparrot_training - Step 20578: {'lr': 0.0004808111639698326, 'samples': 10536448, 'steps': 20578, 'loss/train': 2.095339059829712} +03/04/2022 13:38:22 - INFO - codeparrot_training - Step 20579: {'lr': 0.0004808091249999043, 'samples': 10536960, 'steps': 20579, 'loss/train': 2.475553035736084} +03/04/2022 13:38:24 - INFO - codeparrot_training - Skipping example with length 120 (seq_length=1024) +03/04/2022 13:38:27 - INFO - codeparrot_training - Step 20580: {'lr': 0.0004808070859259769, 'samples': 10537472, 'steps': 20580, 'loss/train': 1.9107547998428345} +03/04/2022 13:38:30 - INFO - codeparrot_training - Step 20581: {'lr': 0.0004808050467480515, 'samples': 10537984, 'steps': 20581, 'loss/train': 0.558265209197998} +03/04/2022 13:38:33 - INFO - codeparrot_training - Skipping example with length 201 (seq_length=1024) +03/04/2022 13:38:35 - INFO - codeparrot_training - Step 20582: {'lr': 0.0004808030074661288, 'samples': 10538496, 'steps': 20582, 'loss/train': 2.1978917121887207} +03/04/2022 13:38:38 - INFO - codeparrot_training - Step 20583: {'lr': 0.0004808009680802099, 'samples': 10539008, 'steps': 20583, 'loss/train': 2.066957473754883} +03/04/2022 13:38:41 - INFO - codeparrot_training - Skipping example with length 738 (seq_length=1024) +03/04/2022 13:38:44 - INFO - codeparrot_training - Step 20584: {'lr': 0.00048079892859029564, 'samples': 10539520, 'steps': 20584, 'loss/train': 1.6681959629058838} +03/04/2022 13:38:47 - INFO - codeparrot_training - Step 20585: {'lr': 0.00048079688899638684, 'samples': 10540032, 'steps': 20585, 'loss/train': 1.9815449714660645} +03/04/2022 13:38:49 - INFO - codeparrot_training - Skipping example with length 849 (seq_length=1024) +03/04/2022 13:38:52 - INFO - codeparrot_training - Step 20586: {'lr': 0.0004807948492984846, 'samples': 10540544, 'steps': 20586, 'loss/train': 1.190662145614624} +03/04/2022 13:38:55 - INFO - codeparrot_training - Step 20587: {'lr': 0.0004807928094965898, 'samples': 10541056, 'steps': 20587, 'loss/train': 2.0260086059570312} +03/04/2022 13:38:58 - INFO - codeparrot_training - Skipping example with length 432 (seq_length=1024) +03/04/2022 13:39:01 - INFO - codeparrot_training - Step 20588: {'lr': 0.0004807907695907032, 'samples': 10541568, 'steps': 20588, 'loss/train': 1.2616569995880127} +03/04/2022 13:39:04 - INFO - codeparrot_training - Step 20589: {'lr': 0.000480788729580826, 'samples': 10542080, 'steps': 20589, 'loss/train': 2.016632318496704} +03/04/2022 13:39:07 - INFO - codeparrot_training - Step 20590: {'lr': 0.00048078668946695887, 'samples': 10542592, 'steps': 20590, 'loss/train': 1.2489697933197021} +03/04/2022 13:39:07 - INFO - codeparrot_training - Skipping example with length 360 (seq_length=1024) +03/04/2022 13:39:12 - INFO - codeparrot_training - Step 20591: {'lr': 0.0004807846492491028, 'samples': 10543104, 'steps': 20591, 'loss/train': 2.332156181335449} +03/04/2022 13:39:15 - INFO - codeparrot_training - Skipping example with length 617 (seq_length=1024) +03/04/2022 13:39:18 - INFO - codeparrot_training - Step 20592: {'lr': 0.0004807826089272588, 'samples': 10543616, 'steps': 20592, 'loss/train': 1.9255601167678833} +03/04/2022 13:39:21 - INFO - codeparrot_training - Step 20593: {'lr': 0.0004807805685014277, 'samples': 10544128, 'steps': 20593, 'loss/train': 2.1923904418945312} +03/04/2022 13:39:24 - INFO - codeparrot_training - Step 20594: {'lr': 0.00048077852797161034, 'samples': 10544640, 'steps': 20594, 'loss/train': 0.2019602209329605} +03/04/2022 13:39:24 - INFO - codeparrot_training - Skipping example with length 186 (seq_length=1024) +03/04/2022 13:39:29 - INFO - codeparrot_training - Step 20595: {'lr': 0.0004807764873378079, 'samples': 10545152, 'steps': 20595, 'loss/train': 0.9719663262367249} +03/04/2022 13:39:32 - INFO - codeparrot_training - Skipping example with length 512 (seq_length=1024) +03/04/2022 13:39:35 - INFO - codeparrot_training - Step 20596: {'lr': 0.000480774446600021, 'samples': 10545664, 'steps': 20596, 'loss/train': 2.263751983642578} +03/04/2022 13:39:38 - INFO - codeparrot_training - Step 20597: {'lr': 0.00048077240575825075, 'samples': 10546176, 'steps': 20597, 'loss/train': 1.2643367052078247} +03/04/2022 13:39:41 - INFO - codeparrot_training - Step 20598: {'lr': 0.000480770364812498, 'samples': 10546688, 'steps': 20598, 'loss/train': 2.0041215419769287} +03/04/2022 13:39:41 - INFO - codeparrot_training - Skipping example with length 337 (seq_length=1024) +03/04/2022 13:39:46 - INFO - codeparrot_training - Step 20599: {'lr': 0.0004807683237627637, 'samples': 10547200, 'steps': 20599, 'loss/train': 1.9147244691848755} +03/04/2022 13:39:49 - INFO - codeparrot_training - Step 20600: {'lr': 0.0004807662826090488, 'samples': 10547712, 'steps': 20600, 'loss/train': 1.4070805311203003} +03/04/2022 13:39:50 - INFO - codeparrot_training - Skipping example with length 250 (seq_length=1024) +03/04/2022 13:39:55 - INFO - codeparrot_training - Step 20601: {'lr': 0.00048076424135135406, 'samples': 10548224, 'steps': 20601, 'loss/train': 1.6716095209121704} +03/04/2022 13:39:58 - INFO - codeparrot_training - Step 20602: {'lr': 0.00048076219998968055, 'samples': 10548736, 'steps': 20602, 'loss/train': 0.5408016443252563} +03/04/2022 13:39:58 - INFO - codeparrot_training - Skipping example with length 405 (seq_length=1024) +03/04/2022 13:40:03 - INFO - codeparrot_training - Step 20603: {'lr': 0.0004807601585240292, 'samples': 10549248, 'steps': 20603, 'loss/train': 1.7388633489608765} +03/04/2022 13:40:06 - INFO - codeparrot_training - Step 20604: {'lr': 0.0004807581169544009, 'samples': 10549760, 'steps': 20604, 'loss/train': 2.4022042751312256} +03/04/2022 13:40:06 - INFO - codeparrot_training - Skipping example with length 205 (seq_length=1024) +03/04/2022 13:40:12 - INFO - codeparrot_training - Step 20605: {'lr': 0.00048075607528079645, 'samples': 10550272, 'steps': 20605, 'loss/train': 1.3515058755874634} +03/04/2022 13:40:15 - INFO - codeparrot_training - Step 20606: {'lr': 0.0004807540335032169, 'samples': 10550784, 'steps': 20606, 'loss/train': 1.7544512748718262} +03/04/2022 13:40:15 - INFO - codeparrot_training - Skipping example with length 766 (seq_length=1024) +03/04/2022 13:40:20 - INFO - codeparrot_training - Step 20607: {'lr': 0.0004807519916216633, 'samples': 10551296, 'steps': 20607, 'loss/train': 2.1516003608703613} +03/04/2022 13:40:23 - INFO - codeparrot_training - Step 20608: {'lr': 0.0004807499496361362, 'samples': 10551808, 'steps': 20608, 'loss/train': 2.469893455505371} +03/04/2022 13:40:23 - INFO - codeparrot_training - Skipping example with length 1017 (seq_length=1024) +03/04/2022 13:40:29 - INFO - codeparrot_training - Step 20609: {'lr': 0.00048074790754663686, 'samples': 10552320, 'steps': 20609, 'loss/train': 2.3112213611602783} +03/04/2022 13:40:31 - INFO - codeparrot_training - Skipping example with length 624 (seq_length=1024) +03/04/2022 13:40:34 - INFO - codeparrot_training - Step 20610: {'lr': 0.000480745865353166, 'samples': 10552832, 'steps': 20610, 'loss/train': 1.4391309022903442} +03/04/2022 13:40:37 - INFO - codeparrot_training - Step 20611: {'lr': 0.0004807438230557247, 'samples': 10553344, 'steps': 20611, 'loss/train': 0.9567596316337585} +03/04/2022 13:40:40 - INFO - codeparrot_training - Step 20612: {'lr': 0.00048074178065431373, 'samples': 10553856, 'steps': 20612, 'loss/train': 2.9121127128601074} +03/04/2022 13:40:40 - INFO - codeparrot_training - Skipping example with length 728 (seq_length=1024) +03/04/2022 13:40:46 - INFO - codeparrot_training - Step 20613: {'lr': 0.0004807397381489341, 'samples': 10554368, 'steps': 20613, 'loss/train': 1.4604493379592896} +03/04/2022 13:40:49 - INFO - codeparrot_training - Step 20614: {'lr': 0.00048073769553958666, 'samples': 10554880, 'steps': 20614, 'loss/train': 2.5874922275543213} +03/04/2022 13:40:49 - INFO - codeparrot_training - Skipping example with length 288 (seq_length=1024) +03/04/2022 13:40:54 - INFO - codeparrot_training - Step 20615: {'lr': 0.00048073565282627246, 'samples': 10555392, 'steps': 20615, 'loss/train': 2.803088903427124} +03/04/2022 13:40:57 - INFO - codeparrot_training - Skipping example with length 41 (seq_length=1024) +03/04/2022 13:40:59 - INFO - codeparrot_training - Step 20616: {'lr': 0.0004807336100089923, 'samples': 10555904, 'steps': 20616, 'loss/train': 2.074979543685913} +03/04/2022 13:41:03 - INFO - codeparrot_training - Step 20617: {'lr': 0.0004807315670877471, 'samples': 10556416, 'steps': 20617, 'loss/train': 1.9806272983551025} +03/04/2022 13:41:05 - INFO - codeparrot_training - Skipping example with length 465 (seq_length=1024) +03/04/2022 13:41:08 - INFO - codeparrot_training - Step 20618: {'lr': 0.00048072952406253783, 'samples': 10556928, 'steps': 20618, 'loss/train': 0.21339386701583862} +03/04/2022 13:41:11 - INFO - codeparrot_training - Step 20619: {'lr': 0.00048072748093336536, 'samples': 10557440, 'steps': 20619, 'loss/train': 1.7451608180999756} +03/04/2022 13:41:14 - INFO - codeparrot_training - Step 20620: {'lr': 0.00048072543770023076, 'samples': 10557952, 'steps': 20620, 'loss/train': 1.8885148763656616} +03/04/2022 13:41:14 - INFO - codeparrot_training - Skipping example with length 672 (seq_length=1024) +03/04/2022 13:41:20 - INFO - codeparrot_training - Step 20621: {'lr': 0.0004807233943631347, 'samples': 10558464, 'steps': 20621, 'loss/train': 2.109591007232666} +03/04/2022 13:41:23 - INFO - codeparrot_training - Step 20622: {'lr': 0.0004807213509220784, 'samples': 10558976, 'steps': 20622, 'loss/train': 1.2343413829803467} +03/04/2022 13:41:23 - INFO - codeparrot_training - Skipping example with length 962 (seq_length=1024) +03/04/2022 13:41:28 - INFO - codeparrot_training - Step 20623: {'lr': 0.0004807193073770625, 'samples': 10559488, 'steps': 20623, 'loss/train': 2.5142805576324463} +03/04/2022 13:41:31 - INFO - codeparrot_training - Step 20624: {'lr': 0.0004807172637280881, 'samples': 10560000, 'steps': 20624, 'loss/train': 2.1950204372406006} +03/04/2022 13:41:32 - INFO - codeparrot_training - Skipping example with length 460 (seq_length=1024) +03/04/2022 13:41:37 - INFO - codeparrot_training - Step 20625: {'lr': 0.000480715219975156, 'samples': 10560512, 'steps': 20625, 'loss/train': 2.2035341262817383} +03/04/2022 13:41:40 - INFO - codeparrot_training - Step 20626: {'lr': 0.0004807131761182672, 'samples': 10561024, 'steps': 20626, 'loss/train': 2.5084140300750732} +03/04/2022 13:41:40 - INFO - codeparrot_training - Skipping example with length 389 (seq_length=1024) +03/04/2022 13:41:45 - INFO - codeparrot_training - Step 20627: {'lr': 0.00048071113215742263, 'samples': 10561536, 'steps': 20627, 'loss/train': 1.960379958152771} +03/04/2022 13:41:48 - INFO - codeparrot_training - Step 20628: {'lr': 0.00048070908809262316, 'samples': 10562048, 'steps': 20628, 'loss/train': 2.1590895652770996} +03/04/2022 13:41:48 - INFO - codeparrot_training - Skipping example with length 401 (seq_length=1024) +03/04/2022 13:41:53 - INFO - codeparrot_training - Step 20629: {'lr': 0.0004807070439238698, 'samples': 10562560, 'steps': 20629, 'loss/train': 2.1765875816345215} +03/04/2022 13:41:57 - INFO - codeparrot_training - Step 20630: {'lr': 0.0004807049996511633, 'samples': 10563072, 'steps': 20630, 'loss/train': 1.2814838886260986} +03/04/2022 13:41:57 - INFO - codeparrot_training - Skipping example with length 427 (seq_length=1024) +03/04/2022 13:42:02 - INFO - codeparrot_training - Step 20631: {'lr': 0.00048070295527450474, 'samples': 10563584, 'steps': 20631, 'loss/train': 2.005636692047119} +03/04/2022 13:42:05 - INFO - codeparrot_training - Step 20632: {'lr': 0.000480700910793895, 'samples': 10564096, 'steps': 20632, 'loss/train': 2.1985225677490234} +03/04/2022 13:42:05 - INFO - codeparrot_training - Skipping example with length 23 (seq_length=1024) +03/04/2022 13:42:10 - INFO - codeparrot_training - Step 20633: {'lr': 0.000480698866209335, 'samples': 10564608, 'steps': 20633, 'loss/train': 1.8133372068405151} +03/04/2022 13:42:13 - INFO - codeparrot_training - Step 20634: {'lr': 0.0004806968215208256, 'samples': 10565120, 'steps': 20634, 'loss/train': 1.233330488204956} +03/04/2022 13:42:14 - INFO - codeparrot_training - Skipping example with length 1007 (seq_length=1024) +03/04/2022 13:42:19 - INFO - codeparrot_training - Step 20635: {'lr': 0.0004806947767283678, 'samples': 10565632, 'steps': 20635, 'loss/train': 1.4435844421386719} +03/04/2022 13:42:22 - INFO - codeparrot_training - Step 20636: {'lr': 0.0004806927318319625, 'samples': 10566144, 'steps': 20636, 'loss/train': 2.1718313694000244} +03/04/2022 13:42:22 - INFO - codeparrot_training - Skipping example with length 824 (seq_length=1024) +03/04/2022 13:42:27 - INFO - codeparrot_training - Step 20637: {'lr': 0.0004806906868316106, 'samples': 10566656, 'steps': 20637, 'loss/train': 2.0529532432556152} +03/04/2022 13:42:30 - INFO - codeparrot_training - Step 20638: {'lr': 0.000480688641727313, 'samples': 10567168, 'steps': 20638, 'loss/train': 2.0600876808166504} +03/04/2022 13:42:30 - INFO - codeparrot_training - Skipping example with length 338 (seq_length=1024) +03/04/2022 13:42:36 - INFO - codeparrot_training - Step 20639: {'lr': 0.00048068659651907076, 'samples': 10567680, 'steps': 20639, 'loss/train': 2.0734565258026123} +03/04/2022 13:42:39 - INFO - codeparrot_training - Step 20640: {'lr': 0.0004806845512068846, 'samples': 10568192, 'steps': 20640, 'loss/train': 2.5211849212646484} +03/04/2022 13:42:39 - INFO - codeparrot_training - Skipping example with length 272 (seq_length=1024) +03/04/2022 13:42:44 - INFO - codeparrot_training - Step 20641: {'lr': 0.00048068250579075554, 'samples': 10568704, 'steps': 20641, 'loss/train': 2.2971339225769043} +03/04/2022 13:42:47 - INFO - codeparrot_training - Step 20642: {'lr': 0.00048068046027068456, 'samples': 10569216, 'steps': 20642, 'loss/train': 2.349273443222046} +03/04/2022 13:42:47 - INFO - codeparrot_training - Skipping example with length 650 (seq_length=1024) +03/04/2022 13:42:53 - INFO - codeparrot_training - Step 20643: {'lr': 0.0004806784146466726, 'samples': 10569728, 'steps': 20643, 'loss/train': 2.259838819503784} +03/04/2022 13:42:56 - INFO - codeparrot_training - Step 20644: {'lr': 0.00048067636891872036, 'samples': 10570240, 'steps': 20644, 'loss/train': 1.8413264751434326} +03/04/2022 13:42:56 - INFO - codeparrot_training - Skipping example with length 890 (seq_length=1024) +03/04/2022 13:43:01 - INFO - codeparrot_training - Step 20645: {'lr': 0.00048067432308682894, 'samples': 10570752, 'steps': 20645, 'loss/train': 2.1669483184814453} +03/04/2022 13:43:04 - INFO - codeparrot_training - Step 20646: {'lr': 0.0004806722771509993, 'samples': 10571264, 'steps': 20646, 'loss/train': 1.2995960712432861} +03/04/2022 13:43:04 - INFO - codeparrot_training - Skipping example with length 229 (seq_length=1024) +03/04/2022 13:43:10 - INFO - codeparrot_training - Step 20647: {'lr': 0.0004806702311112322, 'samples': 10571776, 'steps': 20647, 'loss/train': 1.0408862829208374} +03/04/2022 13:43:13 - INFO - codeparrot_training - Step 20648: {'lr': 0.0004806681849675287, 'samples': 10572288, 'steps': 20648, 'loss/train': 2.52351713180542} +03/04/2022 13:43:13 - INFO - codeparrot_training - Skipping example with length 1002 (seq_length=1024) +03/04/2022 13:43:18 - INFO - codeparrot_training - Step 20649: {'lr': 0.00048066613871988967, 'samples': 10572800, 'steps': 20649, 'loss/train': 2.007094383239746} +03/04/2022 13:43:21 - INFO - codeparrot_training - Skipping example with length 944 (seq_length=1024) +03/04/2022 13:43:23 - INFO - codeparrot_training - Step 20650: {'lr': 0.00048066409236831607, 'samples': 10573312, 'steps': 20650, 'loss/train': 1.3885326385498047} +03/04/2022 13:43:27 - INFO - codeparrot_training - Step 20651: {'lr': 0.0004806620459128087, 'samples': 10573824, 'steps': 20651, 'loss/train': 1.963650107383728} +03/04/2022 13:43:29 - INFO - codeparrot_training - Skipping example with length 953 (seq_length=1024) +03/04/2022 13:43:32 - INFO - codeparrot_training - Step 20652: {'lr': 0.0004806599993533687, 'samples': 10574336, 'steps': 20652, 'loss/train': 2.060060977935791} +03/04/2022 13:43:35 - INFO - codeparrot_training - Step 20653: {'lr': 0.00048065795268999677, 'samples': 10574848, 'steps': 20653, 'loss/train': 2.368366241455078} +03/04/2022 13:43:38 - INFO - codeparrot_training - Step 20654: {'lr': 0.00048065590592269393, 'samples': 10575360, 'steps': 20654, 'loss/train': 1.8393296003341675} +03/04/2022 13:43:38 - INFO - codeparrot_training - Skipping example with length 561 (seq_length=1024) +03/04/2022 13:43:43 - INFO - codeparrot_training - Step 20655: {'lr': 0.00048065385905146114, 'samples': 10575872, 'steps': 20655, 'loss/train': 1.888199806213379} +03/04/2022 13:43:47 - INFO - codeparrot_training - Step 20656: {'lr': 0.0004806518120762993, 'samples': 10576384, 'steps': 20656, 'loss/train': 2.014191150665283} +03/04/2022 13:43:47 - INFO - codeparrot_training - Skipping example with length 163 (seq_length=1024) +03/04/2022 13:43:52 - INFO - codeparrot_training - Step 20657: {'lr': 0.00048064976499720923, 'samples': 10576896, 'steps': 20657, 'loss/train': 1.9318780899047852} +03/04/2022 13:43:55 - INFO - codeparrot_training - Step 20658: {'lr': 0.000480647717814192, 'samples': 10577408, 'steps': 20658, 'loss/train': 2.656240463256836} +03/04/2022 13:43:55 - INFO - codeparrot_training - Skipping example with length 549 (seq_length=1024) +03/04/2022 13:44:00 - INFO - codeparrot_training - Step 20659: {'lr': 0.0004806456705272484, 'samples': 10577920, 'steps': 20659, 'loss/train': 2.0877795219421387} +03/04/2022 13:44:04 - INFO - codeparrot_training - Step 20660: {'lr': 0.0004806436231363795, 'samples': 10578432, 'steps': 20660, 'loss/train': 1.1328251361846924} +03/04/2022 13:44:04 - INFO - codeparrot_training - Skipping example with length 425 (seq_length=1024) +03/04/2022 13:44:09 - INFO - codeparrot_training - Step 20661: {'lr': 0.00048064157564158607, 'samples': 10578944, 'steps': 20661, 'loss/train': 1.7436243295669556} +03/04/2022 13:44:12 - INFO - codeparrot_training - Step 20662: {'lr': 0.00048063952804286913, 'samples': 10579456, 'steps': 20662, 'loss/train': 1.046575903892517} +03/04/2022 13:44:12 - INFO - codeparrot_training - Skipping example with length 952 (seq_length=1024) +03/04/2022 13:44:17 - INFO - codeparrot_training - Step 20663: {'lr': 0.0004806374803402296, 'samples': 10579968, 'steps': 20663, 'loss/train': 1.5387860536575317} +03/04/2022 13:44:21 - INFO - codeparrot_training - Step 20664: {'lr': 0.00048063543253366837, 'samples': 10580480, 'steps': 20664, 'loss/train': 1.6794464588165283} +03/04/2022 13:44:21 - INFO - codeparrot_training - Skipping example with length 632 (seq_length=1024) +03/04/2022 13:44:26 - INFO - codeparrot_training - Step 20665: {'lr': 0.0004806333846231864, 'samples': 10580992, 'steps': 20665, 'loss/train': 1.3881405591964722} +03/04/2022 13:44:29 - INFO - codeparrot_training - Step 20666: {'lr': 0.00048063133660878455, 'samples': 10581504, 'steps': 20666, 'loss/train': 2.3521902561187744} +03/04/2022 13:44:29 - INFO - codeparrot_training - Skipping example with length 278 (seq_length=1024) +03/04/2022 13:44:34 - INFO - codeparrot_training - Step 20667: {'lr': 0.00048062928849046377, 'samples': 10582016, 'steps': 20667, 'loss/train': 2.146852970123291} +03/04/2022 13:44:37 - INFO - codeparrot_training - Skipping example with length 797 (seq_length=1024) +03/04/2022 13:44:40 - INFO - codeparrot_training - Step 20668: {'lr': 0.00048062724026822504, 'samples': 10582528, 'steps': 20668, 'loss/train': 1.692043662071228} +03/04/2022 13:44:43 - INFO - codeparrot_training - Step 20669: {'lr': 0.00048062519194206916, 'samples': 10583040, 'steps': 20669, 'loss/train': 1.7595391273498535} +03/04/2022 13:44:46 - INFO - codeparrot_training - Step 20670: {'lr': 0.0004806231435119972, 'samples': 10583552, 'steps': 20670, 'loss/train': 1.5545532703399658} +03/04/2022 13:44:46 - INFO - codeparrot_training - Skipping example with length 653 (seq_length=1024) +03/04/2022 13:44:51 - INFO - codeparrot_training - Step 20671: {'lr': 0.00048062109497800997, 'samples': 10584064, 'steps': 20671, 'loss/train': 2.72775936126709} +03/04/2022 13:44:55 - INFO - codeparrot_training - Step 20672: {'lr': 0.00048061904634010845, 'samples': 10584576, 'steps': 20672, 'loss/train': 0.38718119263648987} +03/04/2022 13:44:55 - INFO - codeparrot_training - Skipping example with length 608 (seq_length=1024) +03/04/2022 13:45:00 - INFO - codeparrot_training - Step 20673: {'lr': 0.0004806169975982935, 'samples': 10585088, 'steps': 20673, 'loss/train': 1.6511955261230469} +03/04/2022 13:45:03 - INFO - codeparrot_training - Step 20674: {'lr': 0.0004806149487525662, 'samples': 10585600, 'steps': 20674, 'loss/train': 0.8807926177978516} +03/04/2022 13:45:03 - INFO - codeparrot_training - Skipping example with length 907 (seq_length=1024) +03/04/2022 13:45:08 - INFO - codeparrot_training - Step 20675: {'lr': 0.0004806128998029272, 'samples': 10586112, 'steps': 20675, 'loss/train': 1.7558990716934204} +03/04/2022 13:45:11 - INFO - codeparrot_training - Step 20676: {'lr': 0.0004806108507493777, 'samples': 10586624, 'steps': 20676, 'loss/train': 1.5095884799957275} +03/04/2022 13:45:11 - INFO - codeparrot_training - Skipping example with length 372 (seq_length=1024) +03/04/2022 13:45:17 - INFO - codeparrot_training - Step 20677: {'lr': 0.0004806088015919185, 'samples': 10587136, 'steps': 20677, 'loss/train': 2.048875093460083} +03/04/2022 13:45:20 - INFO - codeparrot_training - Skipping example with length 382 (seq_length=1024) +03/04/2022 13:45:22 - INFO - codeparrot_training - Step 20678: {'lr': 0.0004806067523305505, 'samples': 10587648, 'steps': 20678, 'loss/train': 0.9130741357803345} +03/04/2022 13:45:25 - INFO - codeparrot_training - Step 20679: {'lr': 0.0004806047029652747, 'samples': 10588160, 'steps': 20679, 'loss/train': 1.9362869262695312} +03/04/2022 13:45:28 - INFO - codeparrot_training - Skipping example with length 25 (seq_length=1024) +03/04/2022 13:45:30 - INFO - codeparrot_training - Step 20680: {'lr': 0.00048060265349609193, 'samples': 10588672, 'steps': 20680, 'loss/train': 2.0841214656829834} +03/04/2022 13:45:34 - INFO - codeparrot_training - Step 20681: {'lr': 0.0004806006039230032, 'samples': 10589184, 'steps': 20681, 'loss/train': 2.533137321472168} +03/04/2022 13:45:37 - INFO - codeparrot_training - Step 20682: {'lr': 0.0004805985542460094, 'samples': 10589696, 'steps': 20682, 'loss/train': 5.91558313369751} +03/04/2022 13:45:37 - INFO - codeparrot_training - Skipping example with length 725 (seq_length=1024) +03/04/2022 13:45:42 - INFO - codeparrot_training - Step 20683: {'lr': 0.00048059650446511136, 'samples': 10590208, 'steps': 20683, 'loss/train': 2.247016191482544} +03/04/2022 13:45:45 - INFO - codeparrot_training - Step 20684: {'lr': 0.00048059445458031023, 'samples': 10590720, 'steps': 20684, 'loss/train': 1.9033803939819336} +03/04/2022 13:45:45 - INFO - codeparrot_training - Skipping example with length 958 (seq_length=1024) +03/04/2022 13:45:50 - INFO - codeparrot_training - Step 20685: {'lr': 0.0004805924045916067, 'samples': 10591232, 'steps': 20685, 'loss/train': 2.1728031635284424} +03/04/2022 13:45:53 - INFO - codeparrot_training - Skipping example with length 355 (seq_length=1024) +03/04/2022 13:45:56 - INFO - codeparrot_training - Step 20686: {'lr': 0.00048059035449900185, 'samples': 10591744, 'steps': 20686, 'loss/train': 1.8516231775283813} +03/04/2022 13:45:59 - INFO - codeparrot_training - Step 20687: {'lr': 0.0004805883043024965, 'samples': 10592256, 'steps': 20687, 'loss/train': 1.5646461248397827} +03/04/2022 13:46:02 - INFO - codeparrot_training - Skipping example with length 984 (seq_length=1024) +03/04/2022 13:46:04 - INFO - codeparrot_training - Step 20688: {'lr': 0.0004805862540020917, 'samples': 10592768, 'steps': 20688, 'loss/train': 1.2542964220046997} +03/04/2022 13:46:07 - INFO - codeparrot_training - Step 20689: {'lr': 0.0004805842035977882, 'samples': 10593280, 'steps': 20689, 'loss/train': 2.107980966567993} +03/04/2022 13:46:10 - INFO - codeparrot_training - Skipping example with length 1023 (seq_length=1024) +03/04/2022 13:46:13 - INFO - codeparrot_training - Step 20690: {'lr': 0.00048058215308958703, 'samples': 10593792, 'steps': 20690, 'loss/train': 1.6637312173843384} +03/04/2022 13:46:16 - INFO - codeparrot_training - Step 20691: {'lr': 0.00048058010247748904, 'samples': 10594304, 'steps': 20691, 'loss/train': 1.996462345123291} +03/04/2022 13:46:19 - INFO - codeparrot_training - Skipping example with length 309 (seq_length=1024) +03/04/2022 13:46:21 - INFO - codeparrot_training - Step 20692: {'lr': 0.0004805780517614954, 'samples': 10594816, 'steps': 20692, 'loss/train': 1.6531703472137451} +03/04/2022 13:46:24 - INFO - codeparrot_training - Step 20693: {'lr': 0.0004805760009416067, 'samples': 10595328, 'steps': 20693, 'loss/train': 1.759236454963684} +03/04/2022 13:46:27 - INFO - codeparrot_training - Skipping example with length 679 (seq_length=1024) +03/04/2022 13:46:29 - INFO - codeparrot_training - Step 20694: {'lr': 0.000480573950017824, 'samples': 10595840, 'steps': 20694, 'loss/train': 1.5111132860183716} +03/04/2022 13:46:33 - INFO - codeparrot_training - Step 20695: {'lr': 0.0004805718989901483, 'samples': 10596352, 'steps': 20695, 'loss/train': 0.814595103263855} +03/04/2022 13:46:35 - INFO - codeparrot_training - Skipping example with length 663 (seq_length=1024) +03/04/2022 13:46:38 - INFO - codeparrot_training - Step 20696: {'lr': 0.00048056984785858046, 'samples': 10596864, 'steps': 20696, 'loss/train': 1.1707097291946411} +03/04/2022 13:46:41 - INFO - codeparrot_training - Step 20697: {'lr': 0.0004805677966231214, 'samples': 10597376, 'steps': 20697, 'loss/train': 1.855120062828064} +03/04/2022 13:46:43 - INFO - codeparrot_training - Skipping example with length 435 (seq_length=1024) +03/04/2022 13:46:46 - INFO - codeparrot_training - Step 20698: {'lr': 0.00048056574528377205, 'samples': 10597888, 'steps': 20698, 'loss/train': 1.75473952293396} +03/04/2022 13:46:49 - INFO - codeparrot_training - Step 20699: {'lr': 0.00048056369384053335, 'samples': 10598400, 'steps': 20699, 'loss/train': 1.9385000467300415} +03/04/2022 13:46:52 - INFO - codeparrot_training - Skipping example with length 596 (seq_length=1024) +03/04/2022 13:46:55 - INFO - codeparrot_training - Step 20700: {'lr': 0.00048056164229340613, 'samples': 10598912, 'steps': 20700, 'loss/train': 1.8183764219284058} +03/04/2022 13:46:58 - INFO - codeparrot_training - Step 20701: {'lr': 0.0004805595906423914, 'samples': 10599424, 'steps': 20701, 'loss/train': 2.7235965728759766} +03/04/2022 13:47:00 - INFO - codeparrot_training - Skipping example with length 254 (seq_length=1024) +03/04/2022 13:47:03 - INFO - codeparrot_training - Step 20702: {'lr': 0.00048055753888749013, 'samples': 10599936, 'steps': 20702, 'loss/train': 2.039680004119873} +03/04/2022 13:47:06 - INFO - codeparrot_training - Step 20703: {'lr': 0.0004805554870287032, 'samples': 10600448, 'steps': 20703, 'loss/train': 1.6946420669555664} +03/04/2022 13:47:09 - INFO - codeparrot_training - Skipping example with length 61 (seq_length=1024) +03/04/2022 13:47:11 - INFO - codeparrot_training - Step 20704: {'lr': 0.0004805534350660315, 'samples': 10600960, 'steps': 20704, 'loss/train': 2.3469293117523193} +03/04/2022 13:47:15 - INFO - codeparrot_training - Step 20705: {'lr': 0.000480551382999476, 'samples': 10601472, 'steps': 20705, 'loss/train': 1.5779931545257568} +03/04/2022 13:47:17 - INFO - codeparrot_training - Skipping example with length 657 (seq_length=1024) +03/04/2022 13:47:20 - INFO - codeparrot_training - Step 20706: {'lr': 0.00048054933082903754, 'samples': 10601984, 'steps': 20706, 'loss/train': 2.0609006881713867} +03/04/2022 13:47:23 - INFO - codeparrot_training - Step 20707: {'lr': 0.00048054727855471717, 'samples': 10602496, 'steps': 20707, 'loss/train': 1.4554849863052368} +03/04/2022 13:47:26 - INFO - codeparrot_training - Skipping example with length 277 (seq_length=1024) +03/04/2022 13:47:29 - INFO - codeparrot_training - Step 20708: {'lr': 0.00048054522617651575, 'samples': 10603008, 'steps': 20708, 'loss/train': 1.3971600532531738} +03/04/2022 13:47:32 - INFO - codeparrot_training - Step 20709: {'lr': 0.0004805431736944342, 'samples': 10603520, 'steps': 20709, 'loss/train': 0.2454409897327423} +03/04/2022 13:47:34 - INFO - codeparrot_training - Skipping example with length 15 (seq_length=1024) +03/04/2022 13:47:37 - INFO - codeparrot_training - Step 20710: {'lr': 0.0004805411211084735, 'samples': 10604032, 'steps': 20710, 'loss/train': 1.6719807386398315} +03/04/2022 13:47:40 - INFO - codeparrot_training - Step 20711: {'lr': 0.0004805390684186344, 'samples': 10604544, 'steps': 20711, 'loss/train': 1.3089810609817505} +03/04/2022 13:47:44 - INFO - codeparrot_training - Step 20712: {'lr': 0.00048053701562491804, 'samples': 10605056, 'steps': 20712, 'loss/train': 1.367372751235962} +03/04/2022 13:47:44 - INFO - codeparrot_training - Skipping example with length 212 (seq_length=1024) +03/04/2022 13:47:49 - INFO - codeparrot_training - Step 20713: {'lr': 0.0004805349627273253, 'samples': 10605568, 'steps': 20713, 'loss/train': 2.0853347778320312} +03/04/2022 13:47:52 - INFO - codeparrot_training - Step 20714: {'lr': 0.00048053290972585697, 'samples': 10606080, 'steps': 20714, 'loss/train': 1.9316208362579346} +03/04/2022 13:47:52 - INFO - codeparrot_training - Skipping example with length 899 (seq_length=1024) +03/04/2022 13:47:58 - INFO - codeparrot_training - Step 20715: {'lr': 0.0004805308566205141, 'samples': 10606592, 'steps': 20715, 'loss/train': 1.5982234477996826} +03/04/2022 13:48:01 - INFO - codeparrot_training - Step 20716: {'lr': 0.00048052880341129764, 'samples': 10607104, 'steps': 20716, 'loss/train': 6.703266620635986} +03/04/2022 13:48:02 - INFO - codeparrot_training - Skipping example with length 552 (seq_length=1024) +03/04/2022 13:48:06 - INFO - codeparrot_training - Step 20717: {'lr': 0.00048052675009820837, 'samples': 10607616, 'steps': 20717, 'loss/train': 0.8411498069763184} +03/04/2022 13:48:09 - INFO - codeparrot_training - Step 20718: {'lr': 0.0004805246966812474, 'samples': 10608128, 'steps': 20718, 'loss/train': 1.5483967065811157} +03/04/2022 13:48:11 - INFO - codeparrot_training - Skipping example with length 668 (seq_length=1024) +03/04/2022 13:48:15 - INFO - codeparrot_training - Step 20719: {'lr': 0.0004805226431604155, 'samples': 10608640, 'steps': 20719, 'loss/train': 1.997488260269165} +03/04/2022 13:48:18 - INFO - codeparrot_training - Step 20720: {'lr': 0.00048052058953571366, 'samples': 10609152, 'steps': 20720, 'loss/train': 2.891796588897705} +03/04/2022 13:48:19 - INFO - codeparrot_training - Skipping example with length 466 (seq_length=1024) +03/04/2022 13:48:23 - INFO - codeparrot_training - Step 20721: {'lr': 0.0004805185358071428, 'samples': 10609664, 'steps': 20721, 'loss/train': 1.9587011337280273} +03/04/2022 13:48:26 - INFO - codeparrot_training - Step 20722: {'lr': 0.0004805164819747038, 'samples': 10610176, 'steps': 20722, 'loss/train': 1.9166762828826904} +03/04/2022 13:48:27 - INFO - codeparrot_training - Skipping example with length 627 (seq_length=1024) +03/04/2022 13:48:31 - INFO - codeparrot_training - Step 20723: {'lr': 0.0004805144280383977, 'samples': 10610688, 'steps': 20723, 'loss/train': 2.055347204208374} +03/04/2022 13:48:35 - INFO - codeparrot_training - Step 20724: {'lr': 0.00048051237399822534, 'samples': 10611200, 'steps': 20724, 'loss/train': 2.1195297241210938} +03/04/2022 13:48:35 - INFO - codeparrot_training - Skipping example with length 859 (seq_length=1024) +03/04/2022 13:48:40 - INFO - codeparrot_training - Step 20725: {'lr': 0.00048051031985418764, 'samples': 10611712, 'steps': 20725, 'loss/train': 2.290299654006958} +03/04/2022 13:48:43 - INFO - codeparrot_training - Step 20726: {'lr': 0.0004805082656062856, 'samples': 10612224, 'steps': 20726, 'loss/train': 1.9501078128814697} +03/04/2022 13:48:44 - INFO - codeparrot_training - Skipping example with length 537 (seq_length=1024) +03/04/2022 13:48:48 - INFO - codeparrot_training - Step 20727: {'lr': 0.00048050621125451996, 'samples': 10612736, 'steps': 20727, 'loss/train': 0.9164448976516724} +03/04/2022 13:48:52 - INFO - codeparrot_training - Step 20728: {'lr': 0.00048050415679889194, 'samples': 10613248, 'steps': 20728, 'loss/train': 1.0623432397842407} +03/04/2022 13:48:53 - INFO - codeparrot_training - Skipping example with length 391 (seq_length=1024) +03/04/2022 13:48:57 - INFO - codeparrot_training - Step 20729: {'lr': 0.0004805021022394022, 'samples': 10613760, 'steps': 20729, 'loss/train': 1.7323359251022339} +03/04/2022 13:49:00 - INFO - codeparrot_training - Step 20730: {'lr': 0.0004805000475760518, 'samples': 10614272, 'steps': 20730, 'loss/train': 1.662230372428894} +03/04/2022 13:49:01 - INFO - codeparrot_training - Skipping example with length 101 (seq_length=1024) +03/04/2022 13:49:05 - INFO - codeparrot_training - Step 20731: {'lr': 0.0004804979928088417, 'samples': 10614784, 'steps': 20731, 'loss/train': 0.5864414572715759} +03/04/2022 13:49:08 - INFO - codeparrot_training - Step 20732: {'lr': 0.0004804959379377727, 'samples': 10615296, 'steps': 20732, 'loss/train': 2.540903091430664} +03/04/2022 13:49:10 - INFO - codeparrot_training - Skipping example with length 955 (seq_length=1024) +03/04/2022 13:49:14 - INFO - codeparrot_training - Step 20733: {'lr': 0.00048049388296284576, 'samples': 10615808, 'steps': 20733, 'loss/train': 1.2453272342681885} +03/04/2022 13:49:17 - INFO - codeparrot_training - Step 20734: {'lr': 0.00048049182788406186, 'samples': 10616320, 'steps': 20734, 'loss/train': 1.6099131107330322} +03/04/2022 13:49:18 - INFO - codeparrot_training - Skipping example with length 586 (seq_length=1024) +03/04/2022 13:49:22 - INFO - codeparrot_training - Step 20735: {'lr': 0.0004804897727014219, 'samples': 10616832, 'steps': 20735, 'loss/train': 1.4992687702178955} +03/04/2022 13:49:25 - INFO - codeparrot_training - Step 20736: {'lr': 0.0004804877174149268, 'samples': 10617344, 'steps': 20736, 'loss/train': 1.464448094367981} +03/04/2022 13:49:27 - INFO - codeparrot_training - Skipping example with length 968 (seq_length=1024) +03/04/2022 13:49:31 - INFO - codeparrot_training - Step 20737: {'lr': 0.00048048566202457747, 'samples': 10617856, 'steps': 20737, 'loss/train': 1.8131709098815918} +03/04/2022 13:49:34 - INFO - codeparrot_training - Step 20738: {'lr': 0.00048048360653037494, 'samples': 10618368, 'steps': 20738, 'loss/train': 0.7531116008758545} +03/04/2022 13:49:35 - INFO - codeparrot_training - Skipping example with length 361 (seq_length=1024) +03/04/2022 13:49:39 - INFO - codeparrot_training - Step 20739: {'lr': 0.00048048155093231994, 'samples': 10618880, 'steps': 20739, 'loss/train': 1.7284878492355347} +03/04/2022 13:49:42 - INFO - codeparrot_training - Step 20740: {'lr': 0.00048047949523041355, 'samples': 10619392, 'steps': 20740, 'loss/train': 1.8991143703460693} +03/04/2022 13:49:44 - INFO - codeparrot_training - Skipping example with length 117 (seq_length=1024) +03/04/2022 13:49:48 - INFO - codeparrot_training - Step 20741: {'lr': 0.0004804774394246567, 'samples': 10619904, 'steps': 20741, 'loss/train': 1.7342756986618042} +03/04/2022 13:49:51 - INFO - codeparrot_training - Step 20742: {'lr': 0.0004804753835150503, 'samples': 10620416, 'steps': 20742, 'loss/train': 1.453991174697876} +03/04/2022 13:49:52 - INFO - codeparrot_training - Skipping example with length 406 (seq_length=1024) +03/04/2022 13:49:56 - INFO - codeparrot_training - Step 20743: {'lr': 0.0004804733275015951, 'samples': 10620928, 'steps': 20743, 'loss/train': 1.5944546461105347} +03/04/2022 13:49:59 - INFO - codeparrot_training - Step 20744: {'lr': 0.0004804712713842923, 'samples': 10621440, 'steps': 20744, 'loss/train': 1.490140438079834} +03/04/2022 13:50:01 - INFO - codeparrot_training - Skipping example with length 434 (seq_length=1024) +03/04/2022 13:50:04 - INFO - codeparrot_training - Step 20745: {'lr': 0.0004804692151631427, 'samples': 10621952, 'steps': 20745, 'loss/train': 1.663615107536316} +03/04/2022 13:50:08 - INFO - codeparrot_training - Step 20746: {'lr': 0.00048046715883814716, 'samples': 10622464, 'steps': 20746, 'loss/train': 2.4552431106567383} +03/04/2022 13:50:09 - INFO - codeparrot_training - Skipping example with length 129 (seq_length=1024) +03/04/2022 13:50:13 - INFO - codeparrot_training - Step 20747: {'lr': 0.00048046510240930674, 'samples': 10622976, 'steps': 20747, 'loss/train': 2.5529441833496094} +03/04/2022 13:50:16 - INFO - codeparrot_training - Step 20748: {'lr': 0.00048046304587662225, 'samples': 10623488, 'steps': 20748, 'loss/train': 1.8041871786117554} +03/04/2022 13:50:18 - INFO - codeparrot_training - Skipping example with length 452 (seq_length=1024) +03/04/2022 13:50:21 - INFO - codeparrot_training - Step 20749: {'lr': 0.00048046098924009467, 'samples': 10624000, 'steps': 20749, 'loss/train': 1.9448494911193848} +03/04/2022 13:50:25 - INFO - codeparrot_training - Step 20750: {'lr': 0.00048045893249972497, 'samples': 10624512, 'steps': 20750, 'loss/train': 1.6445696353912354} +03/04/2022 13:50:26 - INFO - codeparrot_training - Skipping example with length 710 (seq_length=1024) +03/04/2022 13:50:30 - INFO - codeparrot_training - Step 20751: {'lr': 0.000480456875655514, 'samples': 10625024, 'steps': 20751, 'loss/train': 1.8324373960494995} +03/04/2022 13:50:33 - INFO - codeparrot_training - Step 20752: {'lr': 0.0004804548187074628, 'samples': 10625536, 'steps': 20752, 'loss/train': 0.8228806257247925} +03/04/2022 13:50:35 - INFO - codeparrot_training - Skipping example with length 923 (seq_length=1024) +03/04/2022 13:50:38 - INFO - codeparrot_training - Step 20753: {'lr': 0.0004804527616555721, 'samples': 10626048, 'steps': 20753, 'loss/train': 1.895346999168396} +03/04/2022 13:50:41 - INFO - codeparrot_training - Step 20754: {'lr': 0.00048045070449984295, 'samples': 10626560, 'steps': 20754, 'loss/train': 3.001166582107544} +03/04/2022 13:50:43 - INFO - codeparrot_training - Skipping example with length 536 (seq_length=1024) +03/04/2022 13:50:47 - INFO - codeparrot_training - Step 20755: {'lr': 0.0004804486472402763, 'samples': 10627072, 'steps': 20755, 'loss/train': 2.32140851020813} +03/04/2022 13:50:50 - INFO - codeparrot_training - Step 20756: {'lr': 0.0004804465898768731, 'samples': 10627584, 'steps': 20756, 'loss/train': 2.0942177772521973} +03/04/2022 13:50:52 - INFO - codeparrot_training - Skipping example with length 877 (seq_length=1024) +03/04/2022 13:50:56 - INFO - codeparrot_training - Step 20757: {'lr': 0.00048044453240963413, 'samples': 10628096, 'steps': 20757, 'loss/train': 1.6772987842559814} +03/04/2022 13:50:59 - INFO - codeparrot_training - Step 20758: {'lr': 0.00048044247483856043, 'samples': 10628608, 'steps': 20758, 'loss/train': 1.9403187036514282} +03/04/2022 13:51:02 - INFO - codeparrot_training - Step 20759: {'lr': 0.00048044041716365296, 'samples': 10629120, 'steps': 20759, 'loss/train': 1.6408607959747314} +03/04/2022 13:51:03 - INFO - codeparrot_training - Skipping example with length 565 (seq_length=1024) +03/04/2022 13:51:07 - INFO - codeparrot_training - Step 20760: {'lr': 0.00048043835938491253, 'samples': 10629632, 'steps': 20760, 'loss/train': 1.9574334621429443} +03/04/2022 13:51:10 - INFO - codeparrot_training - Step 20761: {'lr': 0.0004804363015023402, 'samples': 10630144, 'steps': 20761, 'loss/train': 1.4089950323104858} +03/04/2022 13:51:11 - INFO - codeparrot_training - Skipping example with length 928 (seq_length=1024) +03/04/2022 13:51:16 - INFO - codeparrot_training - Step 20762: {'lr': 0.00048043424351593676, 'samples': 10630656, 'steps': 20762, 'loss/train': 1.580742359161377} +03/04/2022 13:51:19 - INFO - codeparrot_training - Step 20763: {'lr': 0.0004804321854257032, 'samples': 10631168, 'steps': 20763, 'loss/train': 1.594428300857544} +03/04/2022 13:51:19 - INFO - codeparrot_training - Skipping example with length 922 (seq_length=1024) +03/04/2022 13:51:24 - INFO - codeparrot_training - Step 20764: {'lr': 0.0004804301272316405, 'samples': 10631680, 'steps': 20764, 'loss/train': 1.2975013256072998} +03/04/2022 13:51:27 - INFO - codeparrot_training - Step 20765: {'lr': 0.0004804280689337496, 'samples': 10632192, 'steps': 20765, 'loss/train': 1.986747145652771} +03/04/2022 13:51:28 - INFO - codeparrot_training - Skipping example with length 885 (seq_length=1024) +03/04/2022 13:51:33 - INFO - codeparrot_training - Step 20766: {'lr': 0.00048042601053203125, 'samples': 10632704, 'steps': 20766, 'loss/train': 2.9567766189575195} +03/04/2022 13:51:36 - INFO - codeparrot_training - Step 20767: {'lr': 0.00048042395202648646, 'samples': 10633216, 'steps': 20767, 'loss/train': 2.460460662841797} +03/04/2022 13:51:37 - INFO - codeparrot_training - Skipping example with length 349 (seq_length=1024) +03/04/2022 13:51:41 - INFO - codeparrot_training - Step 20768: {'lr': 0.00048042189341711636, 'samples': 10633728, 'steps': 20768, 'loss/train': 1.9340392351150513} +03/04/2022 13:51:44 - INFO - codeparrot_training - Step 20769: {'lr': 0.0004804198347039216, 'samples': 10634240, 'steps': 20769, 'loss/train': 1.8332546949386597} +03/04/2022 13:51:45 - INFO - codeparrot_training - Skipping example with length 600 (seq_length=1024) +03/04/2022 13:51:49 - INFO - codeparrot_training - Step 20770: {'lr': 0.0004804177758869032, 'samples': 10634752, 'steps': 20770, 'loss/train': 2.072420835494995} +03/04/2022 13:51:53 - INFO - codeparrot_training - Step 20771: {'lr': 0.0004804157169660622, 'samples': 10635264, 'steps': 20771, 'loss/train': 0.5164312124252319} +03/04/2022 13:51:54 - INFO - codeparrot_training - Skipping example with length 360 (seq_length=1024) +03/04/2022 13:51:58 - INFO - codeparrot_training - Step 20772: {'lr': 0.00048041365794139934, 'samples': 10635776, 'steps': 20772, 'loss/train': 1.6460829973220825} +03/04/2022 13:52:01 - INFO - codeparrot_training - Step 20773: {'lr': 0.00048041159881291574, 'samples': 10636288, 'steps': 20773, 'loss/train': 0.6652989387512207} +03/04/2022 13:52:03 - INFO - codeparrot_training - Skipping example with length 491 (seq_length=1024) +03/04/2022 13:52:06 - INFO - codeparrot_training - Step 20774: {'lr': 0.0004804095395806122, 'samples': 10636800, 'steps': 20774, 'loss/train': 1.890141487121582} +03/04/2022 13:52:09 - INFO - codeparrot_training - Step 20775: {'lr': 0.00048040748024448954, 'samples': 10637312, 'steps': 20775, 'loss/train': 2.6718902587890625} +03/04/2022 13:52:11 - INFO - codeparrot_training - Skipping example with length 525 (seq_length=1024) +03/04/2022 13:52:15 - INFO - codeparrot_training - Step 20776: {'lr': 0.00048040542080454897, 'samples': 10637824, 'steps': 20776, 'loss/train': 2.159229278564453} +03/04/2022 13:52:18 - INFO - codeparrot_training - Step 20777: {'lr': 0.0004804033612607912, 'samples': 10638336, 'steps': 20777, 'loss/train': 1.537187099456787} +03/04/2022 13:52:19 - INFO - codeparrot_training - Skipping example with length 66 (seq_length=1024) +03/04/2022 13:52:23 - INFO - codeparrot_training - Step 20778: {'lr': 0.00048040130161321724, 'samples': 10638848, 'steps': 20778, 'loss/train': 1.7471376657485962} +03/04/2022 13:52:26 - INFO - codeparrot_training - Step 20779: {'lr': 0.0004803992418618281, 'samples': 10639360, 'steps': 20779, 'loss/train': 2.4992740154266357} +03/04/2022 13:52:27 - INFO - codeparrot_training - Skipping example with length 850 (seq_length=1024) +03/04/2022 13:52:32 - INFO - codeparrot_training - Step 20780: {'lr': 0.00048039718200662454, 'samples': 10639872, 'steps': 20780, 'loss/train': 1.9102097749710083} +03/04/2022 13:52:35 - INFO - codeparrot_training - Step 20781: {'lr': 0.0004803951220476076, 'samples': 10640384, 'steps': 20781, 'loss/train': 1.8250401020050049} +03/04/2022 13:52:36 - INFO - codeparrot_training - Skipping example with length 114 (seq_length=1024) +03/04/2022 13:52:40 - INFO - codeparrot_training - Step 20782: {'lr': 0.00048039306198477817, 'samples': 10640896, 'steps': 20782, 'loss/train': 2.0065274238586426} +03/04/2022 13:52:43 - INFO - codeparrot_training - Step 20783: {'lr': 0.0004803910018181371, 'samples': 10641408, 'steps': 20783, 'loss/train': 1.005293369293213} +03/04/2022 13:52:44 - INFO - codeparrot_training - Skipping example with length 203 (seq_length=1024) +03/04/2022 13:52:48 - INFO - codeparrot_training - Step 20784: {'lr': 0.0004803889415476855, 'samples': 10641920, 'steps': 20784, 'loss/train': 2.4946234226226807} +03/04/2022 13:52:52 - INFO - codeparrot_training - Step 20785: {'lr': 0.0004803868811734242, 'samples': 10642432, 'steps': 20785, 'loss/train': 1.9197194576263428} +03/04/2022 13:52:52 - INFO - codeparrot_training - Skipping example with length 540 (seq_length=1024) +03/04/2022 13:52:57 - INFO - codeparrot_training - Step 20786: {'lr': 0.00048038482069535406, 'samples': 10642944, 'steps': 20786, 'loss/train': 2.383737802505493} +03/04/2022 13:53:00 - INFO - codeparrot_training - Step 20787: {'lr': 0.000480382760113476, 'samples': 10643456, 'steps': 20787, 'loss/train': 2.4900193214416504} +03/04/2022 13:53:01 - INFO - codeparrot_training - Skipping example with length 62 (seq_length=1024) +03/04/2022 13:53:05 - INFO - codeparrot_training - Step 20788: {'lr': 0.00048038069942779116, 'samples': 10643968, 'steps': 20788, 'loss/train': 2.7334108352661133} +03/04/2022 13:53:09 - INFO - codeparrot_training - Step 20789: {'lr': 0.00048037863863830034, 'samples': 10644480, 'steps': 20789, 'loss/train': 1.751271367073059} +03/04/2022 13:53:09 - INFO - codeparrot_training - Skipping example with length 791 (seq_length=1024) +03/04/2022 13:53:14 - INFO - codeparrot_training - Step 20790: {'lr': 0.0004803765777450044, 'samples': 10644992, 'steps': 20790, 'loss/train': 2.4003334045410156} +03/04/2022 13:53:17 - INFO - codeparrot_training - Step 20791: {'lr': 0.00048037451674790433, 'samples': 10645504, 'steps': 20791, 'loss/train': 1.570690393447876} +03/04/2022 13:53:17 - INFO - codeparrot_training - Skipping example with length 948 (seq_length=1024) +03/04/2022 13:53:22 - INFO - codeparrot_training - Step 20792: {'lr': 0.0004803724556470011, 'samples': 10646016, 'steps': 20792, 'loss/train': 2.028837203979492} +03/04/2022 13:53:25 - INFO - codeparrot_training - Step 20793: {'lr': 0.0004803703944422956, 'samples': 10646528, 'steps': 20793, 'loss/train': 2.036442279815674} +03/04/2022 13:53:26 - INFO - codeparrot_training - Skipping example with length 973 (seq_length=1024) +03/04/2022 13:53:31 - INFO - codeparrot_training - Step 20794: {'lr': 0.0004803683331337887, 'samples': 10647040, 'steps': 20794, 'loss/train': 1.6753735542297363} +03/04/2022 13:53:34 - INFO - codeparrot_training - Step 20795: {'lr': 0.0004803662717214814, 'samples': 10647552, 'steps': 20795, 'loss/train': 1.06248939037323} +03/04/2022 13:53:34 - INFO - codeparrot_training - Skipping example with length 505 (seq_length=1024) +03/04/2022 13:53:39 - INFO - codeparrot_training - Step 20796: {'lr': 0.00048036421020537464, 'samples': 10648064, 'steps': 20796, 'loss/train': 1.664320707321167} +03/04/2022 13:53:42 - INFO - codeparrot_training - Step 20797: {'lr': 0.0004803621485854693, 'samples': 10648576, 'steps': 20797, 'loss/train': 1.7644977569580078} +03/04/2022 13:53:43 - INFO - codeparrot_training - Skipping example with length 1009 (seq_length=1024) +03/04/2022 13:53:48 - INFO - codeparrot_training - Step 20798: {'lr': 0.00048036008686176636, 'samples': 10649088, 'steps': 20798, 'loss/train': 1.125112771987915} +03/04/2022 13:53:51 - INFO - codeparrot_training - Step 20799: {'lr': 0.0004803580250342666, 'samples': 10649600, 'steps': 20799, 'loss/train': 1.0924681425094604} +03/04/2022 13:53:51 - INFO - codeparrot_training - Skipping example with length 918 (seq_length=1024) +03/04/2022 13:53:56 - INFO - codeparrot_training - Step 20800: {'lr': 0.00048035596310297125, 'samples': 10650112, 'steps': 20800, 'loss/train': 1.4679067134857178} +03/04/2022 13:53:59 - INFO - codeparrot_training - Step 20801: {'lr': 0.0004803539010678809, 'samples': 10650624, 'steps': 20801, 'loss/train': 2.2058913707733154} +03/04/2022 13:53:59 - INFO - codeparrot_training - Skipping example with length 375 (seq_length=1024) +03/04/2022 13:54:04 - INFO - codeparrot_training - Step 20802: {'lr': 0.00048035183892899676, 'samples': 10651136, 'steps': 20802, 'loss/train': 2.0208399295806885} +03/04/2022 13:54:08 - INFO - codeparrot_training - Step 20803: {'lr': 0.0004803497766863195, 'samples': 10651648, 'steps': 20803, 'loss/train': 2.344757318496704} +03/04/2022 13:54:08 - INFO - codeparrot_training - Skipping example with length 223 (seq_length=1024) +03/04/2022 13:54:13 - INFO - codeparrot_training - Step 20804: {'lr': 0.00048034771433985035, 'samples': 10652160, 'steps': 20804, 'loss/train': 1.1309911012649536} +03/04/2022 13:54:16 - INFO - codeparrot_training - Step 20805: {'lr': 0.00048034565188959, 'samples': 10652672, 'steps': 20805, 'loss/train': 1.8621139526367188} +03/04/2022 13:54:17 - INFO - codeparrot_training - Skipping example with length 374 (seq_length=1024) +03/04/2022 13:54:22 - INFO - codeparrot_training - Step 20806: {'lr': 0.0004803435893355394, 'samples': 10653184, 'steps': 20806, 'loss/train': 2.0247139930725098} +03/04/2022 13:54:25 - INFO - codeparrot_training - Step 20807: {'lr': 0.00048034152667769957, 'samples': 10653696, 'steps': 20807, 'loss/train': 1.5568569898605347} +03/04/2022 13:54:26 - INFO - codeparrot_training - Skipping example with length 720 (seq_length=1024) +03/04/2022 13:54:30 - INFO - codeparrot_training - Step 20808: {'lr': 0.0004803394639160714, 'samples': 10654208, 'steps': 20808, 'loss/train': 1.249288558959961} +03/04/2022 13:54:33 - INFO - codeparrot_training - Step 20809: {'lr': 0.00048033740105065585, 'samples': 10654720, 'steps': 20809, 'loss/train': 2.435887098312378} +03/04/2022 13:54:35 - INFO - codeparrot_training - Skipping example with length 535 (seq_length=1024) +03/04/2022 13:54:39 - INFO - codeparrot_training - Step 20810: {'lr': 0.0004803353380814538, 'samples': 10655232, 'steps': 20810, 'loss/train': 1.5099842548370361} +03/04/2022 13:54:42 - INFO - codeparrot_training - Step 20811: {'lr': 0.00048033327500846625, 'samples': 10655744, 'steps': 20811, 'loss/train': 1.4758402109146118} +03/04/2022 13:54:43 - INFO - codeparrot_training - Skipping example with length 123 (seq_length=1024) +03/04/2022 13:54:47 - INFO - codeparrot_training - Step 20812: {'lr': 0.000480331211831694, 'samples': 10656256, 'steps': 20812, 'loss/train': 2.047667980194092} +03/04/2022 13:54:50 - INFO - codeparrot_training - Step 20813: {'lr': 0.00048032914855113807, 'samples': 10656768, 'steps': 20813, 'loss/train': 2.0801894664764404} +03/04/2022 13:54:52 - INFO - codeparrot_training - Skipping example with length 961 (seq_length=1024) +03/04/2022 13:54:56 - INFO - codeparrot_training - Step 20814: {'lr': 0.00048032708516679946, 'samples': 10657280, 'steps': 20814, 'loss/train': 2.168907642364502} +03/04/2022 13:54:59 - INFO - codeparrot_training - Step 20815: {'lr': 0.00048032502167867896, 'samples': 10657792, 'steps': 20815, 'loss/train': 2.041142225265503} +03/04/2022 13:55:00 - INFO - codeparrot_training - Skipping example with length 778 (seq_length=1024) +03/04/2022 13:55:04 - INFO - codeparrot_training - Step 20816: {'lr': 0.0004803229580867775, 'samples': 10658304, 'steps': 20816, 'loss/train': 1.5593611001968384} +03/04/2022 13:55:07 - INFO - codeparrot_training - Step 20817: {'lr': 0.0004803208943910962, 'samples': 10658816, 'steps': 20817, 'loss/train': 1.5333776473999023} +03/04/2022 13:55:08 - INFO - codeparrot_training - Skipping example with length 681 (seq_length=1024) +03/04/2022 13:55:13 - INFO - codeparrot_training - Step 20818: {'lr': 0.00048031883059163576, 'samples': 10659328, 'steps': 20818, 'loss/train': 1.8670252561569214} +03/04/2022 13:55:16 - INFO - codeparrot_training - Step 20819: {'lr': 0.00048031676668839723, 'samples': 10659840, 'steps': 20819, 'loss/train': 1.8951810598373413} +03/04/2022 13:55:17 - INFO - codeparrot_training - Skipping example with length 823 (seq_length=1024) +03/04/2022 13:55:21 - INFO - codeparrot_training - Step 20820: {'lr': 0.00048031470268138153, 'samples': 10660352, 'steps': 20820, 'loss/train': 0.6324357390403748} +03/04/2022 13:55:24 - INFO - codeparrot_training - Step 20821: {'lr': 0.00048031263857058957, 'samples': 10660864, 'steps': 20821, 'loss/train': 1.783376693725586} +03/04/2022 13:55:26 - INFO - codeparrot_training - Skipping example with length 154 (seq_length=1024) +03/04/2022 13:55:30 - INFO - codeparrot_training - Step 20822: {'lr': 0.00048031057435602234, 'samples': 10661376, 'steps': 20822, 'loss/train': 2.5321507453918457} +03/04/2022 13:55:33 - INFO - codeparrot_training - Step 20823: {'lr': 0.0004803085100376807, 'samples': 10661888, 'steps': 20823, 'loss/train': 1.523669719696045} +03/04/2022 13:55:35 - INFO - codeparrot_training - Skipping example with length 739 (seq_length=1024) +03/04/2022 13:55:38 - INFO - codeparrot_training - Step 20824: {'lr': 0.00048030644561556556, 'samples': 10662400, 'steps': 20824, 'loss/train': 2.407277822494507} +03/04/2022 13:55:42 - INFO - codeparrot_training - Step 20825: {'lr': 0.0004803043810896779, 'samples': 10662912, 'steps': 20825, 'loss/train': 1.562023401260376} +03/04/2022 13:55:44 - INFO - codeparrot_training - Skipping example with length 558 (seq_length=1024) +03/04/2022 13:55:47 - INFO - codeparrot_training - Step 20826: {'lr': 0.00048030231646001867, 'samples': 10663424, 'steps': 20826, 'loss/train': 2.7319130897521973} +03/04/2022 13:55:50 - INFO - codeparrot_training - Step 20827: {'lr': 0.0004803002517265887, 'samples': 10663936, 'steps': 20827, 'loss/train': 1.715059757232666} +03/04/2022 13:55:53 - INFO - codeparrot_training - Skipping example with length 848 (seq_length=1024) +03/04/2022 13:55:55 - INFO - codeparrot_training - Step 20828: {'lr': 0.0004802981868893891, 'samples': 10664448, 'steps': 20828, 'loss/train': 2.1793973445892334} +03/04/2022 13:55:59 - INFO - codeparrot_training - Step 20829: {'lr': 0.00048029612194842056, 'samples': 10664960, 'steps': 20829, 'loss/train': 1.8685741424560547} +03/04/2022 13:56:01 - INFO - codeparrot_training - Skipping example with length 959 (seq_length=1024) +03/04/2022 13:56:04 - INFO - codeparrot_training - Step 20830: {'lr': 0.0004802940569036842, 'samples': 10665472, 'steps': 20830, 'loss/train': 1.1490752696990967} +03/04/2022 13:56:07 - INFO - codeparrot_training - Step 20831: {'lr': 0.0004802919917551809, 'samples': 10665984, 'steps': 20831, 'loss/train': 1.8238575458526611} +03/04/2022 13:56:09 - INFO - codeparrot_training - Skipping example with length 390 (seq_length=1024) +03/04/2022 13:56:12 - INFO - codeparrot_training - Step 20832: {'lr': 0.00048028992650291156, 'samples': 10666496, 'steps': 20832, 'loss/train': 1.3150571584701538} +03/04/2022 13:56:15 - INFO - codeparrot_training - Step 20833: {'lr': 0.00048028786114687715, 'samples': 10667008, 'steps': 20833, 'loss/train': 1.9175732135772705} +03/04/2022 13:56:18 - INFO - codeparrot_training - Skipping example with length 743 (seq_length=1024) +03/04/2022 13:56:21 - INFO - codeparrot_training - Step 20834: {'lr': 0.0004802857956870786, 'samples': 10667520, 'steps': 20834, 'loss/train': 2.3021647930145264} +03/04/2022 13:56:24 - INFO - codeparrot_training - Step 20835: {'lr': 0.00048028373012351684, 'samples': 10668032, 'steps': 20835, 'loss/train': 2.487605333328247} +03/04/2022 13:56:26 - INFO - codeparrot_training - Skipping example with length 870 (seq_length=1024) +03/04/2022 13:56:29 - INFO - codeparrot_training - Step 20836: {'lr': 0.00048028166445619275, 'samples': 10668544, 'steps': 20836, 'loss/train': 0.32284048199653625} +03/04/2022 13:56:33 - INFO - codeparrot_training - Step 20837: {'lr': 0.0004802795986851073, 'samples': 10669056, 'steps': 20837, 'loss/train': 1.2048430442810059} +03/04/2022 13:56:35 - INFO - codeparrot_training - Skipping example with length 516 (seq_length=1024) +03/04/2022 13:56:38 - INFO - codeparrot_training - Step 20838: {'lr': 0.00048027753281026144, 'samples': 10669568, 'steps': 20838, 'loss/train': 2.0701801776885986} +03/04/2022 13:56:41 - INFO - codeparrot_training - Step 20839: {'lr': 0.000480275466831656, 'samples': 10670080, 'steps': 20839, 'loss/train': 1.565854787826538} +03/04/2022 13:56:44 - INFO - codeparrot_training - Skipping example with length 179 (seq_length=1024) +03/04/2022 13:56:46 - INFO - codeparrot_training - Step 20840: {'lr': 0.00048027340074929207, 'samples': 10670592, 'steps': 20840, 'loss/train': 1.6101205348968506} +03/04/2022 13:56:50 - INFO - codeparrot_training - Step 20841: {'lr': 0.0004802713345631705, 'samples': 10671104, 'steps': 20841, 'loss/train': 1.9119445085525513} +03/04/2022 13:56:52 - INFO - codeparrot_training - Skipping example with length 655 (seq_length=1024) +03/04/2022 13:56:55 - INFO - codeparrot_training - Step 20842: {'lr': 0.0004802692682732922, 'samples': 10671616, 'steps': 20842, 'loss/train': 2.7444100379943848} +03/04/2022 13:56:58 - INFO - codeparrot_training - Step 20843: {'lr': 0.0004802672018796581, 'samples': 10672128, 'steps': 20843, 'loss/train': 1.5290002822875977} +03/04/2022 13:57:01 - INFO - codeparrot_training - Skipping example with length 671 (seq_length=1024) +03/04/2022 13:57:03 - INFO - codeparrot_training - Step 20844: {'lr': 0.0004802651353822691, 'samples': 10672640, 'steps': 20844, 'loss/train': 1.1908310651779175} +03/04/2022 13:57:06 - INFO - codeparrot_training - Step 20845: {'lr': 0.0004802630687811263, 'samples': 10673152, 'steps': 20845, 'loss/train': 1.260087251663208} +03/04/2022 13:57:09 - INFO - codeparrot_training - Skipping example with length 182 (seq_length=1024) +03/04/2022 13:57:12 - INFO - codeparrot_training - Step 20846: {'lr': 0.00048026100207623047, 'samples': 10673664, 'steps': 20846, 'loss/train': 1.8149884939193726} +03/04/2022 13:57:15 - INFO - codeparrot_training - Step 20847: {'lr': 0.0004802589352675826, 'samples': 10674176, 'steps': 20847, 'loss/train': 1.40677011013031} +03/04/2022 13:57:17 - INFO - codeparrot_training - Skipping example with length 437 (seq_length=1024) +03/04/2022 13:57:20 - INFO - codeparrot_training - Step 20848: {'lr': 0.0004802568683551836, 'samples': 10674688, 'steps': 20848, 'loss/train': 1.8028876781463623} +03/04/2022 13:57:23 - INFO - codeparrot_training - Step 20849: {'lr': 0.0004802548013390343, 'samples': 10675200, 'steps': 20849, 'loss/train': 2.1015944480895996} +03/04/2022 13:57:26 - INFO - codeparrot_training - Skipping example with length 682 (seq_length=1024) +03/04/2022 13:57:29 - INFO - codeparrot_training - Step 20850: {'lr': 0.00048025273421913587, 'samples': 10675712, 'steps': 20850, 'loss/train': 1.793839693069458} +03/04/2022 13:57:32 - INFO - codeparrot_training - Step 20851: {'lr': 0.0004802506669954891, 'samples': 10676224, 'steps': 20851, 'loss/train': 1.029511570930481} +03/04/2022 13:57:34 - INFO - codeparrot_training - Skipping example with length 116 (seq_length=1024) +03/04/2022 13:57:37 - INFO - codeparrot_training - Step 20852: {'lr': 0.00048024859966809487, 'samples': 10676736, 'steps': 20852, 'loss/train': 1.8482638597488403} +03/04/2022 13:57:40 - INFO - codeparrot_training - Step 20853: {'lr': 0.00048024653223695425, 'samples': 10677248, 'steps': 20853, 'loss/train': 2.040074348449707} +03/04/2022 13:57:43 - INFO - codeparrot_training - Skipping example with length 585 (seq_length=1024) +03/04/2022 13:57:45 - INFO - codeparrot_training - Step 20854: {'lr': 0.00048024446470206806, 'samples': 10677760, 'steps': 20854, 'loss/train': 2.1193761825561523} +03/04/2022 13:57:49 - INFO - codeparrot_training - Step 20855: {'lr': 0.0004802423970634373, 'samples': 10678272, 'steps': 20855, 'loss/train': 1.9989287853240967} +03/04/2022 13:57:51 - INFO - codeparrot_training - Skipping example with length 862 (seq_length=1024) +03/04/2022 13:57:54 - INFO - codeparrot_training - Step 20856: {'lr': 0.00048024032932106277, 'samples': 10678784, 'steps': 20856, 'loss/train': 2.5009138584136963} +03/04/2022 13:57:57 - INFO - codeparrot_training - Step 20857: {'lr': 0.00048023826147494556, 'samples': 10679296, 'steps': 20857, 'loss/train': 1.629153847694397} +03/04/2022 13:58:00 - INFO - codeparrot_training - Skipping example with length 79 (seq_length=1024) +03/04/2022 13:58:02 - INFO - codeparrot_training - Step 20858: {'lr': 0.0004802361935250865, 'samples': 10679808, 'steps': 20858, 'loss/train': 1.8697221279144287} +03/04/2022 13:58:06 - INFO - codeparrot_training - Step 20859: {'lr': 0.0004802341254714867, 'samples': 10680320, 'steps': 20859, 'loss/train': 1.629812240600586} +03/04/2022 13:58:08 - INFO - codeparrot_training - Skipping example with length 540 (seq_length=1024) +03/04/2022 13:58:11 - INFO - codeparrot_training - Step 20860: {'lr': 0.00048023205731414684, 'samples': 10680832, 'steps': 20860, 'loss/train': 2.1021032333374023} +03/04/2022 13:58:14 - INFO - codeparrot_training - Step 20861: {'lr': 0.00048022998905306795, 'samples': 10681344, 'steps': 20861, 'loss/train': 1.8104851245880127} +03/04/2022 13:58:17 - INFO - codeparrot_training - Skipping example with length 857 (seq_length=1024) +03/04/2022 13:58:19 - INFO - codeparrot_training - Step 20862: {'lr': 0.00048022792068825107, 'samples': 10681856, 'steps': 20862, 'loss/train': 1.7847647666931152} +03/04/2022 13:58:23 - INFO - codeparrot_training - Step 20863: {'lr': 0.00048022585221969697, 'samples': 10682368, 'steps': 20863, 'loss/train': 2.2858006954193115} +03/04/2022 13:58:25 - INFO - codeparrot_training - Skipping example with length 769 (seq_length=1024) +03/04/2022 13:58:28 - INFO - codeparrot_training - Step 20864: {'lr': 0.00048022378364740673, 'samples': 10682880, 'steps': 20864, 'loss/train': 1.3208547830581665} +03/04/2022 13:58:31 - INFO - codeparrot_training - Step 20865: {'lr': 0.0004802217149713811, 'samples': 10683392, 'steps': 20865, 'loss/train': 1.2520369291305542} +03/04/2022 13:58:33 - INFO - codeparrot_training - Skipping example with length 903 (seq_length=1024) +03/04/2022 13:58:36 - INFO - codeparrot_training - Step 20866: {'lr': 0.0004802196461916212, 'samples': 10683904, 'steps': 20866, 'loss/train': 2.5959959030151367} +03/04/2022 13:58:40 - INFO - codeparrot_training - Step 20867: {'lr': 0.0004802175773081278, 'samples': 10684416, 'steps': 20867, 'loss/train': 1.8089581727981567} +03/04/2022 13:58:42 - INFO - codeparrot_training - Skipping example with length 432 (seq_length=1024) +03/04/2022 13:58:45 - INFO - codeparrot_training - Step 20868: {'lr': 0.000480215508320902, 'samples': 10684928, 'steps': 20868, 'loss/train': 2.142228603363037} +03/04/2022 13:58:48 - INFO - codeparrot_training - Step 20869: {'lr': 0.0004802134392299446, 'samples': 10685440, 'steps': 20869, 'loss/train': 2.1460585594177246} +03/04/2022 13:58:50 - INFO - codeparrot_training - Skipping example with length 892 (seq_length=1024) +03/04/2022 13:58:53 - INFO - codeparrot_training - Step 20870: {'lr': 0.0004802113700352566, 'samples': 10685952, 'steps': 20870, 'loss/train': 1.9857639074325562} +03/04/2022 13:58:57 - INFO - codeparrot_training - Step 20871: {'lr': 0.00048020930073683886, 'samples': 10686464, 'steps': 20871, 'loss/train': 2.2877047061920166} +03/04/2022 13:58:59 - INFO - codeparrot_training - Skipping example with length 864 (seq_length=1024) +03/04/2022 13:59:02 - INFO - codeparrot_training - Step 20872: {'lr': 0.0004802072313346924, 'samples': 10686976, 'steps': 20872, 'loss/train': 1.6700838804244995} +03/04/2022 13:59:05 - INFO - codeparrot_training - Step 20873: {'lr': 0.00048020516182881813, 'samples': 10687488, 'steps': 20873, 'loss/train': 2.167400598526001} +03/04/2022 13:59:07 - INFO - codeparrot_training - Skipping example with length 618 (seq_length=1024) +03/04/2022 13:59:10 - INFO - codeparrot_training - Step 20874: {'lr': 0.00048020309221921686, 'samples': 10688000, 'steps': 20874, 'loss/train': 2.214726686477661} +03/04/2022 13:59:13 - INFO - codeparrot_training - Step 20875: {'lr': 0.00048020102250588976, 'samples': 10688512, 'steps': 20875, 'loss/train': 1.4992328882217407} +03/04/2022 13:59:15 - INFO - codeparrot_training - Skipping example with length 832 (seq_length=1024) +03/04/2022 13:59:19 - INFO - codeparrot_training - Step 20876: {'lr': 0.00048019895268883764, 'samples': 10689024, 'steps': 20876, 'loss/train': 1.7187817096710205} +03/04/2022 13:59:22 - INFO - codeparrot_training - Step 20877: {'lr': 0.0004801968827680613, 'samples': 10689536, 'steps': 20877, 'loss/train': 2.057805061340332} +03/04/2022 13:59:24 - INFO - codeparrot_training - Skipping example with length 457 (seq_length=1024) +03/04/2022 13:59:27 - INFO - codeparrot_training - Step 20878: {'lr': 0.00048019481274356194, 'samples': 10690048, 'steps': 20878, 'loss/train': 1.156369686126709} +03/04/2022 13:59:30 - INFO - codeparrot_training - Step 20879: {'lr': 0.0004801927426153402, 'samples': 10690560, 'steps': 20879, 'loss/train': 2.293339490890503} +03/04/2022 13:59:32 - INFO - codeparrot_training - Skipping example with length 216 (seq_length=1024) +03/04/2022 13:59:36 - INFO - codeparrot_training - Step 20880: {'lr': 0.00048019067238339725, 'samples': 10691072, 'steps': 20880, 'loss/train': 2.0923233032226562} +03/04/2022 13:59:39 - INFO - codeparrot_training - Step 20881: {'lr': 0.000480188602047734, 'samples': 10691584, 'steps': 20881, 'loss/train': 1.6031556129455566} +03/04/2022 13:59:44 - INFO - codeparrot_training - Step 20882: {'lr': 0.0004801865316083512, 'samples': 10692096, 'steps': 20882, 'loss/train': 2.1488945484161377} +03/04/2022 13:59:47 - INFO - codeparrot_training - Step 20883: {'lr': 0.0004801844610652499, 'samples': 10692608, 'steps': 20883, 'loss/train': 1.5937058925628662} +03/04/2022 13:59:49 - INFO - codeparrot_training - Skipping example with length 531 (seq_length=1024) +03/04/2022 13:59:53 - INFO - codeparrot_training - Step 20884: {'lr': 0.0004801823904184311, 'samples': 10693120, 'steps': 20884, 'loss/train': 2.144099235534668} +03/04/2022 13:59:56 - INFO - codeparrot_training - Step 20885: {'lr': 0.00048018031966789564, 'samples': 10693632, 'steps': 20885, 'loss/train': 2.1497116088867188} +03/04/2022 13:59:58 - INFO - codeparrot_training - Skipping example with length 814 (seq_length=1024) +03/04/2022 14:00:02 - INFO - codeparrot_training - Step 20886: {'lr': 0.0004801782488136445, 'samples': 10694144, 'steps': 20886, 'loss/train': 1.049652099609375} +03/04/2022 14:00:05 - INFO - codeparrot_training - Step 20887: {'lr': 0.00048017617785567855, 'samples': 10694656, 'steps': 20887, 'loss/train': 1.0194826126098633} +03/04/2022 14:00:08 - INFO - codeparrot_training - Step 20888: {'lr': 0.00048017410679399876, 'samples': 10695168, 'steps': 20888, 'loss/train': 0.33174365758895874} +03/04/2022 14:00:08 - INFO - codeparrot_training - Skipping example with length 177 (seq_length=1024) +03/04/2022 14:00:13 - INFO - codeparrot_training - Step 20889: {'lr': 0.00048017203562860614, 'samples': 10695680, 'steps': 20889, 'loss/train': 0.7849172949790955} +03/04/2022 14:00:16 - INFO - codeparrot_training - Step 20890: {'lr': 0.0004801699643595015, 'samples': 10696192, 'steps': 20890, 'loss/train': 2.1856367588043213} +03/04/2022 14:00:17 - INFO - codeparrot_training - Skipping example with length 479 (seq_length=1024) +03/04/2022 14:00:22 - INFO - codeparrot_training - Step 20891: {'lr': 0.00048016789298668583, 'samples': 10696704, 'steps': 20891, 'loss/train': 0.7068802714347839} +03/04/2022 14:00:25 - INFO - codeparrot_training - Step 20892: {'lr': 0.0004801658215101601, 'samples': 10697216, 'steps': 20892, 'loss/train': 2.197996139526367} +03/04/2022 14:00:25 - INFO - codeparrot_training - Skipping example with length 635 (seq_length=1024) +03/04/2022 14:00:30 - INFO - codeparrot_training - Step 20893: {'lr': 0.00048016374992992516, 'samples': 10697728, 'steps': 20893, 'loss/train': 1.6651238203048706} +03/04/2022 14:00:33 - INFO - codeparrot_training - Step 20894: {'lr': 0.000480161678245982, 'samples': 10698240, 'steps': 20894, 'loss/train': 1.8614627122879028} +03/04/2022 14:00:34 - INFO - codeparrot_training - Skipping example with length 193 (seq_length=1024) +03/04/2022 14:00:39 - INFO - codeparrot_training - Step 20895: {'lr': 0.0004801596064583315, 'samples': 10698752, 'steps': 20895, 'loss/train': 1.5763322114944458} +03/04/2022 14:00:42 - INFO - codeparrot_training - Step 20896: {'lr': 0.00048015753456697466, 'samples': 10699264, 'steps': 20896, 'loss/train': 1.7777482271194458} +03/04/2022 14:00:42 - INFO - codeparrot_training - Skipping example with length 900 (seq_length=1024) +03/04/2022 14:00:47 - INFO - codeparrot_training - Step 20897: {'lr': 0.00048015546257191243, 'samples': 10699776, 'steps': 20897, 'loss/train': 2.374885320663452} +03/04/2022 14:00:50 - INFO - codeparrot_training - Step 20898: {'lr': 0.00048015339047314566, 'samples': 10700288, 'steps': 20898, 'loss/train': 0.5673134326934814} +03/04/2022 14:00:51 - INFO - codeparrot_training - Skipping example with length 342 (seq_length=1024) +03/04/2022 14:00:56 - INFO - codeparrot_training - Step 20899: {'lr': 0.00048015131827067534, 'samples': 10700800, 'steps': 20899, 'loss/train': 2.348464012145996} +03/04/2022 14:00:59 - INFO - codeparrot_training - Step 20900: {'lr': 0.0004801492459645024, 'samples': 10701312, 'steps': 20900, 'loss/train': 1.106835961341858} +03/04/2022 14:01:00 - INFO - codeparrot_training - Skipping example with length 872 (seq_length=1024) +03/04/2022 14:01:04 - INFO - codeparrot_training - Step 20901: {'lr': 0.0004801471735546277, 'samples': 10701824, 'steps': 20901, 'loss/train': 1.5124635696411133} +03/04/2022 14:01:07 - INFO - codeparrot_training - Step 20902: {'lr': 0.0004801451010410522, 'samples': 10702336, 'steps': 20902, 'loss/train': 1.7611114978790283} +03/04/2022 14:01:08 - INFO - codeparrot_training - Skipping example with length 225 (seq_length=1024) +03/04/2022 14:01:13 - INFO - codeparrot_training - Step 20903: {'lr': 0.000480143028423777, 'samples': 10702848, 'steps': 20903, 'loss/train': 1.1506478786468506} +03/04/2022 14:01:16 - INFO - codeparrot_training - Step 20904: {'lr': 0.0004801409557028028, 'samples': 10703360, 'steps': 20904, 'loss/train': 1.01083242893219} +03/04/2022 14:01:17 - INFO - codeparrot_training - Skipping example with length 720 (seq_length=1024) +03/04/2022 14:01:21 - INFO - codeparrot_training - Step 20905: {'lr': 0.0004801388828781307, 'samples': 10703872, 'steps': 20905, 'loss/train': 2.435297727584839} +03/04/2022 14:01:24 - INFO - codeparrot_training - Step 20906: {'lr': 0.00048013680994976154, 'samples': 10704384, 'steps': 20906, 'loss/train': 1.0091973543167114} +03/04/2022 14:01:25 - INFO - codeparrot_training - Skipping example with length 103 (seq_length=1024) +03/04/2022 14:01:30 - INFO - codeparrot_training - Step 20907: {'lr': 0.0004801347369176963, 'samples': 10704896, 'steps': 20907, 'loss/train': 0.6503301858901978} +03/04/2022 14:01:33 - INFO - codeparrot_training - Step 20908: {'lr': 0.00048013266378193586, 'samples': 10705408, 'steps': 20908, 'loss/train': 2.2979490756988525} +03/04/2022 14:01:34 - INFO - codeparrot_training - Skipping example with length 144 (seq_length=1024) +03/04/2022 14:01:38 - INFO - codeparrot_training - Step 20909: {'lr': 0.00048013059054248134, 'samples': 10705920, 'steps': 20909, 'loss/train': 1.460168719291687} +03/04/2022 14:01:41 - INFO - codeparrot_training - Step 20910: {'lr': 0.00048012851719933335, 'samples': 10706432, 'steps': 20910, 'loss/train': 2.3421051502227783} +03/04/2022 14:01:42 - INFO - codeparrot_training - Skipping example with length 905 (seq_length=1024) +03/04/2022 14:01:47 - INFO - codeparrot_training - Step 20911: {'lr': 0.000480126443752493, 'samples': 10706944, 'steps': 20911, 'loss/train': 2.89636492729187} +03/04/2022 14:01:50 - INFO - codeparrot_training - Step 20912: {'lr': 0.0004801243702019614, 'samples': 10707456, 'steps': 20912, 'loss/train': 1.5734823942184448} +03/04/2022 14:01:50 - INFO - codeparrot_training - Skipping example with length 716 (seq_length=1024) +03/04/2022 14:01:55 - INFO - codeparrot_training - Step 20913: {'lr': 0.00048012229654773915, 'samples': 10707968, 'steps': 20913, 'loss/train': 1.6774368286132812} +03/04/2022 14:01:58 - INFO - codeparrot_training - Step 20914: {'lr': 0.0004801202227898274, 'samples': 10708480, 'steps': 20914, 'loss/train': 2.2911112308502197} +03/04/2022 14:01:59 - INFO - codeparrot_training - Skipping example with length 82 (seq_length=1024) +03/04/2022 14:02:04 - INFO - codeparrot_training - Step 20915: {'lr': 0.00048011814892822704, 'samples': 10708992, 'steps': 20915, 'loss/train': 1.8135896921157837} +03/04/2022 14:02:07 - INFO - codeparrot_training - Step 20916: {'lr': 0.00048011607496293896, 'samples': 10709504, 'steps': 20916, 'loss/train': 6.612919330596924} +03/04/2022 14:02:08 - INFO - codeparrot_training - Skipping example with length 632 (seq_length=1024) +03/04/2022 14:02:12 - INFO - codeparrot_training - Step 20917: {'lr': 0.0004801140008939642, 'samples': 10710016, 'steps': 20917, 'loss/train': 2.074500560760498} +03/04/2022 14:02:15 - INFO - codeparrot_training - Step 20918: {'lr': 0.00048011192672130356, 'samples': 10710528, 'steps': 20918, 'loss/train': 2.1148486137390137} +03/04/2022 14:02:18 - INFO - codeparrot_training - Skipping example with length 212 (seq_length=1024) +03/04/2022 14:02:21 - INFO - codeparrot_training - Step 20919: {'lr': 0.000480109852444958, 'samples': 10711040, 'steps': 20919, 'loss/train': 2.084437370300293} +03/04/2022 14:02:24 - INFO - codeparrot_training - Step 20920: {'lr': 0.0004801077780649286, 'samples': 10711552, 'steps': 20920, 'loss/train': 1.447923183441162} +03/04/2022 14:02:26 - INFO - codeparrot_training - Skipping example with length 427 (seq_length=1024) +03/04/2022 14:02:29 - INFO - codeparrot_training - Step 20921: {'lr': 0.00048010570358121606, 'samples': 10712064, 'steps': 20921, 'loss/train': 0.7491136789321899} +03/04/2022 14:02:32 - INFO - codeparrot_training - Step 20922: {'lr': 0.0004801036289938215, 'samples': 10712576, 'steps': 20922, 'loss/train': 2.1448090076446533} +03/04/2022 14:02:35 - INFO - codeparrot_training - Skipping example with length 988 (seq_length=1024) +03/04/2022 14:02:38 - INFO - codeparrot_training - Step 20923: {'lr': 0.0004801015543027458, 'samples': 10713088, 'steps': 20923, 'loss/train': 1.3634004592895508} +03/04/2022 14:02:41 - INFO - codeparrot_training - Step 20924: {'lr': 0.0004800994795079899, 'samples': 10713600, 'steps': 20924, 'loss/train': 1.8497120141983032} +03/04/2022 14:02:44 - INFO - codeparrot_training - Skipping example with length 723 (seq_length=1024) +03/04/2022 14:02:46 - INFO - codeparrot_training - Step 20925: {'lr': 0.00048009740460955465, 'samples': 10714112, 'steps': 20925, 'loss/train': 4.047117710113525} +03/04/2022 14:02:49 - INFO - codeparrot_training - Step 20926: {'lr': 0.00048009532960744116, 'samples': 10714624, 'steps': 20926, 'loss/train': 1.832513689994812} +03/04/2022 14:02:52 - INFO - codeparrot_training - Skipping example with length 648 (seq_length=1024) +03/04/2022 14:02:55 - INFO - codeparrot_training - Step 20927: {'lr': 0.0004800932545016502, 'samples': 10715136, 'steps': 20927, 'loss/train': 1.8530898094177246} +03/04/2022 14:02:58 - INFO - codeparrot_training - Step 20928: {'lr': 0.0004800911792921828, 'samples': 10715648, 'steps': 20928, 'loss/train': 1.5409419536590576} +03/04/2022 14:03:00 - INFO - codeparrot_training - Skipping example with length 96 (seq_length=1024) +03/04/2022 14:03:03 - INFO - codeparrot_training - Step 20929: {'lr': 0.0004800891039790399, 'samples': 10716160, 'steps': 20929, 'loss/train': 1.2101333141326904} +03/04/2022 14:03:06 - INFO - codeparrot_training - Step 20930: {'lr': 0.00048008702856222233, 'samples': 10716672, 'steps': 20930, 'loss/train': 1.9573794603347778} +03/04/2022 14:03:09 - INFO - codeparrot_training - Skipping example with length 756 (seq_length=1024) +03/04/2022 14:03:12 - INFO - codeparrot_training - Step 20931: {'lr': 0.0004800849530417312, 'samples': 10717184, 'steps': 20931, 'loss/train': 2.1005876064300537} +03/04/2022 14:03:15 - INFO - codeparrot_training - Step 20932: {'lr': 0.00048008287741756715, 'samples': 10717696, 'steps': 20932, 'loss/train': 2.5953757762908936} +03/04/2022 14:03:18 - INFO - codeparrot_training - Step 20933: {'lr': 0.00048008080168973144, 'samples': 10718208, 'steps': 20933, 'loss/train': 1.108125925064087} +03/04/2022 14:03:18 - INFO - codeparrot_training - Skipping example with length 975 (seq_length=1024) +03/04/2022 14:03:23 - INFO - codeparrot_training - Step 20934: {'lr': 0.00048007872585822486, 'samples': 10718720, 'steps': 20934, 'loss/train': 6.591165065765381} +03/04/2022 14:03:27 - INFO - codeparrot_training - Step 20935: {'lr': 0.00048007664992304834, 'samples': 10719232, 'steps': 20935, 'loss/train': 2.307241678237915} +03/04/2022 14:03:27 - INFO - codeparrot_training - Skipping example with length 429 (seq_length=1024) +03/04/2022 14:03:32 - INFO - codeparrot_training - Step 20936: {'lr': 0.0004800745738842029, 'samples': 10719744, 'steps': 20936, 'loss/train': 2.2226650714874268} +03/04/2022 14:03:35 - INFO - codeparrot_training - Step 20937: {'lr': 0.0004800724977416894, 'samples': 10720256, 'steps': 20937, 'loss/train': 1.2777947187423706} +03/04/2022 14:03:35 - INFO - codeparrot_training - Skipping example with length 925 (seq_length=1024) +03/04/2022 14:03:40 - INFO - codeparrot_training - Step 20938: {'lr': 0.00048007042149550866, 'samples': 10720768, 'steps': 20938, 'loss/train': 1.9590764045715332} +03/04/2022 14:03:44 - INFO - codeparrot_training - Step 20939: {'lr': 0.00048006834514566183, 'samples': 10721280, 'steps': 20939, 'loss/train': 2.402327299118042} +03/04/2022 14:03:44 - INFO - codeparrot_training - Skipping example with length 758 (seq_length=1024) +03/04/2022 14:03:49 - INFO - codeparrot_training - Step 20940: {'lr': 0.00048006626869214977, 'samples': 10721792, 'steps': 20940, 'loss/train': 2.055049419403076} +03/04/2022 14:03:52 - INFO - codeparrot_training - Step 20941: {'lr': 0.00048006419213497334, 'samples': 10722304, 'steps': 20941, 'loss/train': 1.448603868484497} +03/04/2022 14:03:52 - INFO - codeparrot_training - Skipping example with length 851 (seq_length=1024) +03/04/2022 14:03:57 - INFO - codeparrot_training - Step 20942: {'lr': 0.0004800621154741335, 'samples': 10722816, 'steps': 20942, 'loss/train': 2.7676401138305664} +03/04/2022 14:04:00 - INFO - codeparrot_training - Skipping example with length 972 (seq_length=1024) +03/04/2022 14:04:03 - INFO - codeparrot_training - Step 20943: {'lr': 0.00048006003870963135, 'samples': 10723328, 'steps': 20943, 'loss/train': 2.422480344772339} +03/04/2022 14:04:06 - INFO - codeparrot_training - Step 20944: {'lr': 0.0004800579618414676, 'samples': 10723840, 'steps': 20944, 'loss/train': 2.5437510013580322} +03/04/2022 14:04:09 - INFO - codeparrot_training - Skipping example with length 793 (seq_length=1024) +03/04/2022 14:04:11 - INFO - codeparrot_training - Step 20945: {'lr': 0.0004800558848696433, 'samples': 10724352, 'steps': 20945, 'loss/train': 1.9272394180297852} +03/04/2022 14:04:14 - INFO - codeparrot_training - Step 20946: {'lr': 0.0004800538077941594, 'samples': 10724864, 'steps': 20946, 'loss/train': 3.2978649139404297} +03/04/2022 14:04:17 - INFO - codeparrot_training - Skipping example with length 584 (seq_length=1024) +03/04/2022 14:04:20 - INFO - codeparrot_training - Step 20947: {'lr': 0.00048005173061501673, 'samples': 10725376, 'steps': 20947, 'loss/train': 2.176875352859497} +03/04/2022 14:04:23 - INFO - codeparrot_training - Step 20948: {'lr': 0.0004800496533322164, 'samples': 10725888, 'steps': 20948, 'loss/train': 1.7226618528366089} +03/04/2022 14:04:26 - INFO - codeparrot_training - Step 20949: {'lr': 0.00048004757594575923, 'samples': 10726400, 'steps': 20949, 'loss/train': 2.182947874069214} +03/04/2022 14:04:26 - INFO - codeparrot_training - Skipping example with length 874 (seq_length=1024) +03/04/2022 14:04:32 - INFO - codeparrot_training - Step 20950: {'lr': 0.0004800454984556461, 'samples': 10726912, 'steps': 20950, 'loss/train': 2.4146082401275635} +03/04/2022 14:04:35 - INFO - codeparrot_training - Skipping example with length 156 (seq_length=1024) +03/04/2022 14:04:37 - INFO - codeparrot_training - Step 20951: {'lr': 0.00048004342086187805, 'samples': 10727424, 'steps': 20951, 'loss/train': 0.6538257598876953} +03/04/2022 14:04:41 - INFO - codeparrot_training - Step 20952: {'lr': 0.000480041343164456, 'samples': 10727936, 'steps': 20952, 'loss/train': 1.0694661140441895} +03/04/2022 14:04:43 - INFO - codeparrot_training - Skipping example with length 875 (seq_length=1024) +03/04/2022 14:04:46 - INFO - codeparrot_training - Step 20953: {'lr': 0.0004800392653633808, 'samples': 10728448, 'steps': 20953, 'loss/train': 1.3556169271469116} +03/04/2022 14:04:50 - INFO - codeparrot_training - Step 20954: {'lr': 0.0004800371874586535, 'samples': 10728960, 'steps': 20954, 'loss/train': 1.6540600061416626} +03/04/2022 14:04:52 - INFO - codeparrot_training - Skipping example with length 659 (seq_length=1024) +03/04/2022 14:04:55 - INFO - codeparrot_training - Step 20955: {'lr': 0.0004800351094502751, 'samples': 10729472, 'steps': 20955, 'loss/train': 0.1891086995601654} +03/04/2022 14:04:58 - INFO - codeparrot_training - Step 20956: {'lr': 0.00048003303133824633, 'samples': 10729984, 'steps': 20956, 'loss/train': 2.4375791549682617} +03/04/2022 14:05:01 - INFO - codeparrot_training - Skipping example with length 128 (seq_length=1024) +03/04/2022 14:05:03 - INFO - codeparrot_training - Step 20957: {'lr': 0.0004800309531225683, 'samples': 10730496, 'steps': 20957, 'loss/train': 2.1612603664398193} +03/04/2022 14:05:06 - INFO - codeparrot_training - Step 20958: {'lr': 0.00048002887480324175, 'samples': 10731008, 'steps': 20958, 'loss/train': 1.142179012298584} +03/04/2022 14:05:09 - INFO - codeparrot_training - Skipping example with length 220 (seq_length=1024) +03/04/2022 14:05:12 - INFO - codeparrot_training - Step 20959: {'lr': 0.0004800267963802678, 'samples': 10731520, 'steps': 20959, 'loss/train': 2.0623273849487305} +03/04/2022 14:05:15 - INFO - codeparrot_training - Step 20960: {'lr': 0.0004800247178536473, 'samples': 10732032, 'steps': 20960, 'loss/train': 1.951095700263977} +03/04/2022 14:05:17 - INFO - codeparrot_training - Skipping example with length 663 (seq_length=1024) +03/04/2022 14:05:20 - INFO - codeparrot_training - Step 20961: {'lr': 0.0004800226392233813, 'samples': 10732544, 'steps': 20961, 'loss/train': 1.7979859113693237} +03/04/2022 14:05:23 - INFO - codeparrot_training - Step 20962: {'lr': 0.00048002056048947054, 'samples': 10733056, 'steps': 20962, 'loss/train': 2.0531952381134033} +03/04/2022 14:05:26 - INFO - codeparrot_training - Skipping example with length 895 (seq_length=1024) +03/04/2022 14:05:29 - INFO - codeparrot_training - Step 20963: {'lr': 0.0004800184816519161, 'samples': 10733568, 'steps': 20963, 'loss/train': 1.2084144353866577} +03/04/2022 14:05:32 - INFO - codeparrot_training - Step 20964: {'lr': 0.0004800164027107189, 'samples': 10734080, 'steps': 20964, 'loss/train': 0.9265140891075134} +03/04/2022 14:05:34 - INFO - codeparrot_training - Skipping example with length 247 (seq_length=1024) +03/04/2022 14:05:37 - INFO - codeparrot_training - Step 20965: {'lr': 0.0004800143236658798, 'samples': 10734592, 'steps': 20965, 'loss/train': 2.211191177368164} +03/04/2022 14:05:40 - INFO - codeparrot_training - Step 20966: {'lr': 0.0004800122445173999, 'samples': 10735104, 'steps': 20966, 'loss/train': 1.792712688446045} +03/04/2022 14:05:42 - INFO - codeparrot_training - Skipping example with length 614 (seq_length=1024) +03/04/2022 14:05:45 - INFO - codeparrot_training - Step 20967: {'lr': 0.00048001016526528, 'samples': 10735616, 'steps': 20967, 'loss/train': 0.8221688270568848} +03/04/2022 14:05:49 - INFO - codeparrot_training - Step 20968: {'lr': 0.00048000808590952106, 'samples': 10736128, 'steps': 20968, 'loss/train': 2.807900905609131} +03/04/2022 14:05:50 - INFO - codeparrot_training - Skipping example with length 961 (seq_length=1024) +03/04/2022 14:05:54 - INFO - codeparrot_training - Step 20969: {'lr': 0.0004800060064501239, 'samples': 10736640, 'steps': 20969, 'loss/train': 1.5611026287078857} +03/04/2022 14:05:57 - INFO - codeparrot_training - Step 20970: {'lr': 0.00048000392688708976, 'samples': 10737152, 'steps': 20970, 'loss/train': 1.8889790773391724} +03/04/2022 14:05:59 - INFO - codeparrot_training - Skipping example with length 230 (seq_length=1024) +03/04/2022 14:06:02 - INFO - codeparrot_training - Step 20971: {'lr': 0.00048000184722041934, 'samples': 10737664, 'steps': 20971, 'loss/train': 2.887594699859619} +03/04/2022 14:06:06 - INFO - codeparrot_training - Step 20972: {'lr': 0.00047999976745011366, 'samples': 10738176, 'steps': 20972, 'loss/train': 1.2868293523788452} +03/04/2022 14:06:08 - INFO - codeparrot_training - Skipping example with length 369 (seq_length=1024) +03/04/2022 14:06:11 - INFO - codeparrot_training - Step 20973: {'lr': 0.0004799976875761736, 'samples': 10738688, 'steps': 20973, 'loss/train': 2.1198692321777344} +03/04/2022 14:06:14 - INFO - codeparrot_training - Step 20974: {'lr': 0.00047999560759860006, 'samples': 10739200, 'steps': 20974, 'loss/train': 1.9696040153503418} +03/04/2022 14:06:17 - INFO - codeparrot_training - Skipping example with length 352 (seq_length=1024) +03/04/2022 14:06:19 - INFO - codeparrot_training - Step 20975: {'lr': 0.00047999352751739414, 'samples': 10739712, 'steps': 20975, 'loss/train': 6.751706123352051} +03/04/2022 14:06:23 - INFO - codeparrot_training - Step 20976: {'lr': 0.0004799914473325567, 'samples': 10740224, 'steps': 20976, 'loss/train': 2.803312063217163} +03/04/2022 14:06:25 - INFO - codeparrot_training - Skipping example with length 435 (seq_length=1024) +03/04/2022 14:06:28 - INFO - codeparrot_training - Step 20977: {'lr': 0.00047998936704408865, 'samples': 10740736, 'steps': 20977, 'loss/train': 1.6607009172439575} +03/04/2022 14:06:31 - INFO - codeparrot_training - Step 20978: {'lr': 0.00047998728665199085, 'samples': 10741248, 'steps': 20978, 'loss/train': 1.9760867357254028} +03/04/2022 14:06:35 - INFO - codeparrot_training - Step 20979: {'lr': 0.00047998520615626447, 'samples': 10741760, 'steps': 20979, 'loss/train': 2.697526454925537} +03/04/2022 14:06:35 - INFO - codeparrot_training - Skipping example with length 230 (seq_length=1024) +03/04/2022 14:06:40 - INFO - codeparrot_training - Step 20980: {'lr': 0.0004799831255569102, 'samples': 10742272, 'steps': 20980, 'loss/train': 2.7061853408813477} +03/04/2022 14:06:43 - INFO - codeparrot_training - Step 20981: {'lr': 0.00047998104485392915, 'samples': 10742784, 'steps': 20981, 'loss/train': 1.7697581052780151} +03/04/2022 14:06:43 - INFO - codeparrot_training - Skipping example with length 568 (seq_length=1024) +03/04/2022 14:06:48 - INFO - codeparrot_training - Step 20982: {'lr': 0.0004799789640473221, 'samples': 10743296, 'steps': 20982, 'loss/train': 2.2667737007141113} +03/04/2022 14:06:51 - INFO - codeparrot_training - Step 20983: {'lr': 0.0004799768831370902, 'samples': 10743808, 'steps': 20983, 'loss/train': 2.551400661468506} +03/04/2022 14:06:52 - INFO - codeparrot_training - Skipping example with length 470 (seq_length=1024) +03/04/2022 14:06:57 - INFO - codeparrot_training - Step 20984: {'lr': 0.0004799748021232342, 'samples': 10744320, 'steps': 20984, 'loss/train': 2.3130695819854736} +03/04/2022 14:07:00 - INFO - codeparrot_training - Step 20985: {'lr': 0.00047997272100575505, 'samples': 10744832, 'steps': 20985, 'loss/train': 2.0080254077911377} +03/04/2022 14:07:00 - INFO - codeparrot_training - Skipping example with length 586 (seq_length=1024) +03/04/2022 14:07:05 - INFO - codeparrot_training - Step 20986: {'lr': 0.00047997063978465383, 'samples': 10745344, 'steps': 20986, 'loss/train': 2.0402441024780273} +03/04/2022 14:07:08 - INFO - codeparrot_training - Step 20987: {'lr': 0.0004799685584599313, 'samples': 10745856, 'steps': 20987, 'loss/train': 1.6427807807922363} +03/04/2022 14:07:09 - INFO - codeparrot_training - Skipping example with length 304 (seq_length=1024) +03/04/2022 14:07:14 - INFO - codeparrot_training - Step 20988: {'lr': 0.00047996647703158857, 'samples': 10746368, 'steps': 20988, 'loss/train': 2.0794150829315186} +03/04/2022 14:07:17 - INFO - codeparrot_training - Step 20989: {'lr': 0.00047996439549962647, 'samples': 10746880, 'steps': 20989, 'loss/train': 2.5017802715301514} +03/04/2022 14:07:18 - INFO - codeparrot_training - Skipping example with length 893 (seq_length=1024) +03/04/2022 14:07:22 - INFO - codeparrot_training - Step 20990: {'lr': 0.00047996231386404593, 'samples': 10747392, 'steps': 20990, 'loss/train': 1.7806341648101807} +03/04/2022 14:07:25 - INFO - codeparrot_training - Step 20991: {'lr': 0.00047996023212484797, 'samples': 10747904, 'steps': 20991, 'loss/train': 1.590509295463562} +03/04/2022 14:07:26 - INFO - codeparrot_training - Skipping example with length 575 (seq_length=1024) +03/04/2022 14:07:31 - INFO - codeparrot_training - Step 20992: {'lr': 0.00047995815028203346, 'samples': 10748416, 'steps': 20992, 'loss/train': 2.24640154838562} +03/04/2022 14:07:34 - INFO - codeparrot_training - Step 20993: {'lr': 0.00047995606833560337, 'samples': 10748928, 'steps': 20993, 'loss/train': 1.8359594345092773} +03/04/2022 14:07:35 - INFO - codeparrot_training - Skipping example with length 675 (seq_length=1024) +03/04/2022 14:07:39 - INFO - codeparrot_training - Step 20994: {'lr': 0.0004799539862855585, 'samples': 10749440, 'steps': 20994, 'loss/train': 1.5833895206451416} +03/04/2022 14:07:42 - INFO - codeparrot_training - Step 20995: {'lr': 0.00047995190413190004, 'samples': 10749952, 'steps': 20995, 'loss/train': 2.503422737121582} +03/04/2022 14:07:43 - INFO - codeparrot_training - Skipping example with length 911 (seq_length=1024) +03/04/2022 14:07:48 - INFO - codeparrot_training - Step 20996: {'lr': 0.00047994982187462876, 'samples': 10750464, 'steps': 20996, 'loss/train': 2.1112051010131836} +03/04/2022 14:07:51 - INFO - codeparrot_training - Step 20997: {'lr': 0.0004799477395137457, 'samples': 10750976, 'steps': 20997, 'loss/train': 1.7586954832077026} +03/04/2022 14:07:52 - INFO - codeparrot_training - Skipping example with length 707 (seq_length=1024) +03/04/2022 14:07:56 - INFO - codeparrot_training - Step 20998: {'lr': 0.00047994565704925166, 'samples': 10751488, 'steps': 20998, 'loss/train': 1.296667218208313} +03/04/2022 14:07:59 - INFO - codeparrot_training - Step 20999: {'lr': 0.0004799435744811477, 'samples': 10752000, 'steps': 20999, 'loss/train': 1.3900771141052246} +03/04/2022 14:08:00 - INFO - codeparrot_training - Skipping example with length 542 (seq_length=1024) +03/04/2022 14:08:05 - INFO - codeparrot_training - Step 21000: {'lr': 0.0004799414918094347, 'samples': 10752512, 'steps': 21000, 'loss/train': 1.9416128396987915} +03/04/2022 14:08:08 - INFO - codeparrot_training - Step 21001: {'lr': 0.0004799394090341136, 'samples': 10753024, 'steps': 21001, 'loss/train': 1.7562849521636963} +03/04/2022 14:08:09 - INFO - codeparrot_training - Skipping example with length 886 (seq_length=1024) +03/04/2022 14:08:13 - INFO - codeparrot_training - Step 21002: {'lr': 0.0004799373261551854, 'samples': 10753536, 'steps': 21002, 'loss/train': 1.7214233875274658} +03/04/2022 14:08:17 - INFO - codeparrot_training - Step 21003: {'lr': 0.0004799352431726509, 'samples': 10754048, 'steps': 21003, 'loss/train': 1.6033939123153687} +03/04/2022 14:08:17 - INFO - codeparrot_training - Skipping example with length 564 (seq_length=1024) +03/04/2022 14:08:22 - INFO - codeparrot_training - Step 21004: {'lr': 0.0004799331600865112, 'samples': 10754560, 'steps': 21004, 'loss/train': 1.4792697429656982} +03/04/2022 14:08:25 - INFO - codeparrot_training - Step 21005: {'lr': 0.0004799310768967671, 'samples': 10755072, 'steps': 21005, 'loss/train': 1.6898143291473389} +03/04/2022 14:08:26 - INFO - codeparrot_training - Skipping example with length 297 (seq_length=1024) +03/04/2022 14:08:30 - INFO - codeparrot_training - Step 21006: {'lr': 0.00047992899360341966, 'samples': 10755584, 'steps': 21006, 'loss/train': 1.9339125156402588} +03/04/2022 14:08:34 - INFO - codeparrot_training - Step 21007: {'lr': 0.0004799269102064698, 'samples': 10756096, 'steps': 21007, 'loss/train': 1.8577698469161987} +03/04/2022 14:08:35 - INFO - codeparrot_training - Skipping example with length 408 (seq_length=1024) +03/04/2022 14:08:39 - INFO - codeparrot_training - Step 21008: {'lr': 0.0004799248267059183, 'samples': 10756608, 'steps': 21008, 'loss/train': 3.099095106124878} +03/04/2022 14:08:42 - INFO - codeparrot_training - Step 21009: {'lr': 0.0004799227431017663, 'samples': 10757120, 'steps': 21009, 'loss/train': 1.3929773569107056} +03/04/2022 14:08:43 - INFO - codeparrot_training - Skipping example with length 867 (seq_length=1024) +03/04/2022 14:08:47 - INFO - codeparrot_training - Step 21010: {'lr': 0.0004799206593940147, 'samples': 10757632, 'steps': 21010, 'loss/train': 1.6412585973739624} +03/04/2022 14:08:51 - INFO - codeparrot_training - Step 21011: {'lr': 0.0004799185755826644, 'samples': 10758144, 'steps': 21011, 'loss/train': 2.2008609771728516} +03/04/2022 14:08:52 - INFO - codeparrot_training - Skipping example with length 450 (seq_length=1024) +03/04/2022 14:08:56 - INFO - codeparrot_training - Step 21012: {'lr': 0.00047991649166771624, 'samples': 10758656, 'steps': 21012, 'loss/train': 1.2843642234802246} +03/04/2022 14:08:59 - INFO - codeparrot_training - Step 21013: {'lr': 0.00047991440764917127, 'samples': 10759168, 'steps': 21013, 'loss/train': 2.390031099319458} +03/04/2022 14:09:00 - INFO - codeparrot_training - Skipping example with length 514 (seq_length=1024) +03/04/2022 14:09:04 - INFO - codeparrot_training - Step 21014: {'lr': 0.0004799123235270305, 'samples': 10759680, 'steps': 21014, 'loss/train': 1.5562835931777954} +03/04/2022 14:09:07 - INFO - codeparrot_training - Step 21015: {'lr': 0.0004799102393012947, 'samples': 10760192, 'steps': 21015, 'loss/train': 2.2148594856262207} +03/04/2022 14:09:09 - INFO - codeparrot_training - Skipping example with length 671 (seq_length=1024) +03/04/2022 14:09:13 - INFO - codeparrot_training - Step 21016: {'lr': 0.0004799081549719649, 'samples': 10760704, 'steps': 21016, 'loss/train': 2.123008966445923} +03/04/2022 14:09:17 - INFO - codeparrot_training - Step 21017: {'lr': 0.0004799060705390421, 'samples': 10761216, 'steps': 21017, 'loss/train': 1.887379765510559} +03/04/2022 14:09:20 - INFO - codeparrot_training - Step 21018: {'lr': 0.00047990398600252713, 'samples': 10761728, 'steps': 21018, 'loss/train': 2.049764633178711} +03/04/2022 14:09:21 - INFO - codeparrot_training - Skipping example with length 393 (seq_length=1024) +03/04/2022 14:09:25 - INFO - codeparrot_training - Step 21019: {'lr': 0.00047990190136242103, 'samples': 10762240, 'steps': 21019, 'loss/train': 2.232881546020508} +03/04/2022 14:09:28 - INFO - codeparrot_training - Step 21020: {'lr': 0.0004798998166187246, 'samples': 10762752, 'steps': 21020, 'loss/train': 2.300814151763916} +03/04/2022 14:09:29 - INFO - codeparrot_training - Skipping example with length 696 (seq_length=1024) +03/04/2022 14:09:33 - INFO - codeparrot_training - Step 21021: {'lr': 0.0004798977317714389, 'samples': 10763264, 'steps': 21021, 'loss/train': 2.5423784255981445} +03/04/2022 14:09:37 - INFO - codeparrot_training - Step 21022: {'lr': 0.00047989564682056487, 'samples': 10763776, 'steps': 21022, 'loss/train': 2.376940965652466} +03/04/2022 14:09:38 - INFO - codeparrot_training - Skipping example with length 176 (seq_length=1024) +03/04/2022 14:09:42 - INFO - codeparrot_training - Step 21023: {'lr': 0.0004798935617661033, 'samples': 10764288, 'steps': 21023, 'loss/train': 1.8303455114364624} +03/04/2022 14:09:45 - INFO - codeparrot_training - Step 21024: {'lr': 0.0004798914766080553, 'samples': 10764800, 'steps': 21024, 'loss/train': 1.8974179029464722} +03/04/2022 14:09:46 - INFO - codeparrot_training - Skipping example with length 491 (seq_length=1024) +03/04/2022 14:09:50 - INFO - codeparrot_training - Step 21025: {'lr': 0.00047988939134642174, 'samples': 10765312, 'steps': 21025, 'loss/train': 3.2753186225891113} +03/04/2022 14:09:54 - INFO - codeparrot_training - Step 21026: {'lr': 0.00047988730598120356, 'samples': 10765824, 'steps': 21026, 'loss/train': 1.611180305480957} +03/04/2022 14:09:55 - INFO - codeparrot_training - Skipping example with length 726 (seq_length=1024) +03/04/2022 14:09:59 - INFO - codeparrot_training - Step 21027: {'lr': 0.00047988522051240173, 'samples': 10766336, 'steps': 21027, 'loss/train': 1.8409048318862915} +03/04/2022 14:10:02 - INFO - codeparrot_training - Step 21028: {'lr': 0.0004798831349400172, 'samples': 10766848, 'steps': 21028, 'loss/train': 2.9157004356384277} +03/04/2022 14:10:03 - INFO - codeparrot_training - Skipping example with length 728 (seq_length=1024) +03/04/2022 14:10:07 - INFO - codeparrot_training - Step 21029: {'lr': 0.0004798810492640508, 'samples': 10767360, 'steps': 21029, 'loss/train': 0.5771483182907104} +03/04/2022 14:10:10 - INFO - codeparrot_training - Step 21030: {'lr': 0.00047987896348450354, 'samples': 10767872, 'steps': 21030, 'loss/train': 1.7507086992263794} +03/04/2022 14:10:12 - INFO - codeparrot_training - Skipping example with length 459 (seq_length=1024) +03/04/2022 14:10:16 - INFO - codeparrot_training - Step 21031: {'lr': 0.00047987687760137646, 'samples': 10768384, 'steps': 21031, 'loss/train': 2.245497465133667} +03/04/2022 14:10:19 - INFO - codeparrot_training - Step 21032: {'lr': 0.00047987479161467033, 'samples': 10768896, 'steps': 21032, 'loss/train': 2.177969217300415} +03/04/2022 14:10:20 - INFO - codeparrot_training - Skipping example with length 886 (seq_length=1024) +03/04/2022 14:10:24 - INFO - codeparrot_training - Step 21033: {'lr': 0.0004798727055243862, 'samples': 10769408, 'steps': 21033, 'loss/train': 1.567217469215393} +03/04/2022 14:10:27 - INFO - codeparrot_training - Step 21034: {'lr': 0.000479870619330525, 'samples': 10769920, 'steps': 21034, 'loss/train': 2.0709125995635986} +03/04/2022 14:10:29 - INFO - codeparrot_training - Skipping example with length 148 (seq_length=1024) +03/04/2022 14:10:33 - INFO - codeparrot_training - Step 21035: {'lr': 0.0004798685330330876, 'samples': 10770432, 'steps': 21035, 'loss/train': 1.8080439567565918} +03/04/2022 14:10:36 - INFO - codeparrot_training - Step 21036: {'lr': 0.000479866446632075, 'samples': 10770944, 'steps': 21036, 'loss/train': 2.0826783180236816} +03/04/2022 14:10:37 - INFO - codeparrot_training - Skipping example with length 846 (seq_length=1024) +03/04/2022 14:10:41 - INFO - codeparrot_training - Step 21037: {'lr': 0.00047986436012748815, 'samples': 10771456, 'steps': 21037, 'loss/train': 1.887762188911438} +03/04/2022 14:10:44 - INFO - codeparrot_training - Step 21038: {'lr': 0.00047986227351932785, 'samples': 10771968, 'steps': 21038, 'loss/train': 1.950585126876831} +03/04/2022 14:10:45 - INFO - codeparrot_training - Skipping example with length 154 (seq_length=1024) +03/04/2022 14:10:50 - INFO - codeparrot_training - Step 21039: {'lr': 0.00047986018680759525, 'samples': 10772480, 'steps': 21039, 'loss/train': 1.5664708614349365} +03/04/2022 14:10:53 - INFO - codeparrot_training - Step 21040: {'lr': 0.00047985809999229125, 'samples': 10772992, 'steps': 21040, 'loss/train': 2.358828544616699} +03/04/2022 14:10:55 - INFO - codeparrot_training - Skipping example with length 108 (seq_length=1024) +03/04/2022 14:10:58 - INFO - codeparrot_training - Step 21041: {'lr': 0.00047985601307341667, 'samples': 10773504, 'steps': 21041, 'loss/train': 2.6842236518859863} +03/04/2022 14:11:01 - INFO - codeparrot_training - Step 21042: {'lr': 0.0004798539260509725, 'samples': 10774016, 'steps': 21042, 'loss/train': 1.2840571403503418} +03/04/2022 14:11:04 - INFO - codeparrot_training - Skipping example with length 159 (seq_length=1024) +03/04/2022 14:11:07 - INFO - codeparrot_training - Step 21043: {'lr': 0.00047985183892495977, 'samples': 10774528, 'steps': 21043, 'loss/train': 2.5035223960876465} +03/04/2022 14:11:10 - INFO - codeparrot_training - Step 21044: {'lr': 0.00047984975169537925, 'samples': 10775040, 'steps': 21044, 'loss/train': 2.1038990020751953} +03/04/2022 14:11:12 - INFO - codeparrot_training - Skipping example with length 507 (seq_length=1024) +03/04/2022 14:11:15 - INFO - codeparrot_training - Step 21045: {'lr': 0.00047984766436223205, 'samples': 10775552, 'steps': 21045, 'loss/train': 1.960594654083252} +03/04/2022 14:11:18 - INFO - codeparrot_training - Step 21046: {'lr': 0.000479845576925519, 'samples': 10776064, 'steps': 21046, 'loss/train': 1.6092848777770996} +03/04/2022 14:11:21 - INFO - codeparrot_training - Skipping example with length 391 (seq_length=1024) +03/04/2022 14:11:24 - INFO - codeparrot_training - Step 21047: {'lr': 0.00047984348938524113, 'samples': 10776576, 'steps': 21047, 'loss/train': 1.8981611728668213} +03/04/2022 14:11:27 - INFO - codeparrot_training - Step 21048: {'lr': 0.00047984140174139926, 'samples': 10777088, 'steps': 21048, 'loss/train': 1.8008337020874023} +03/04/2022 14:11:29 - INFO - codeparrot_training - Skipping example with length 588 (seq_length=1024) +03/04/2022 14:11:32 - INFO - codeparrot_training - Step 21049: {'lr': 0.0004798393139939945, 'samples': 10777600, 'steps': 21049, 'loss/train': 1.901609182357788} +03/04/2022 14:11:35 - INFO - codeparrot_training - Step 21050: {'lr': 0.0004798372261430276, 'samples': 10778112, 'steps': 21050, 'loss/train': 1.91522216796875} +03/04/2022 14:11:37 - INFO - codeparrot_training - Skipping example with length 148 (seq_length=1024) +03/04/2022 14:11:41 - INFO - codeparrot_training - Step 21051: {'lr': 0.00047983513818849967, 'samples': 10778624, 'steps': 21051, 'loss/train': 2.1081037521362305} +03/04/2022 14:11:44 - INFO - codeparrot_training - Step 21052: {'lr': 0.0004798330501304115, 'samples': 10779136, 'steps': 21052, 'loss/train': 1.9694583415985107} +03/04/2022 14:11:46 - INFO - codeparrot_training - Skipping example with length 493 (seq_length=1024) +03/04/2022 14:11:49 - INFO - codeparrot_training - Step 21053: {'lr': 0.00047983096196876413, 'samples': 10779648, 'steps': 21053, 'loss/train': 1.954062581062317} +03/04/2022 14:11:52 - INFO - codeparrot_training - Step 21054: {'lr': 0.00047982887370355846, 'samples': 10780160, 'steps': 21054, 'loss/train': 2.002750873565674} +03/04/2022 14:11:55 - INFO - codeparrot_training - Skipping example with length 820 (seq_length=1024) +03/04/2022 14:11:58 - INFO - codeparrot_training - Step 21055: {'lr': 0.0004798267853347955, 'samples': 10780672, 'steps': 21055, 'loss/train': 1.5530085563659668} +03/04/2022 14:12:01 - INFO - codeparrot_training - Step 21056: {'lr': 0.0004798246968624761, 'samples': 10781184, 'steps': 21056, 'loss/train': 1.9373832941055298} +03/04/2022 14:12:03 - INFO - codeparrot_training - Skipping example with length 554 (seq_length=1024) +03/04/2022 14:12:06 - INFO - codeparrot_training - Step 21057: {'lr': 0.00047982260828660124, 'samples': 10781696, 'steps': 21057, 'loss/train': 1.8972980976104736} +03/04/2022 14:12:09 - INFO - codeparrot_training - Step 21058: {'lr': 0.0004798205196071719, 'samples': 10782208, 'steps': 21058, 'loss/train': 2.7910962104797363} +03/04/2022 14:12:12 - INFO - codeparrot_training - Skipping example with length 525 (seq_length=1024) +03/04/2022 14:12:15 - INFO - codeparrot_training - Step 21059: {'lr': 0.00047981843082418884, 'samples': 10782720, 'steps': 21059, 'loss/train': 2.2382314205169678} +03/04/2022 14:12:18 - INFO - codeparrot_training - Step 21060: {'lr': 0.0004798163419376533, 'samples': 10783232, 'steps': 21060, 'loss/train': 2.063704490661621} +03/04/2022 14:12:20 - INFO - codeparrot_training - Skipping example with length 531 (seq_length=1024) +03/04/2022 14:12:23 - INFO - codeparrot_training - Step 21061: {'lr': 0.00047981425294756595, 'samples': 10783744, 'steps': 21061, 'loss/train': 1.33449387550354} +03/04/2022 14:12:26 - INFO - codeparrot_training - Step 21062: {'lr': 0.00047981216385392796, 'samples': 10784256, 'steps': 21062, 'loss/train': 2.142117500305176} +03/04/2022 14:12:28 - INFO - codeparrot_training - Skipping example with length 188 (seq_length=1024) +03/04/2022 14:12:32 - INFO - codeparrot_training - Step 21063: {'lr': 0.0004798100746567401, 'samples': 10784768, 'steps': 21063, 'loss/train': 2.1613659858703613} +03/04/2022 14:12:35 - INFO - codeparrot_training - Step 21064: {'lr': 0.00047980798535600334, 'samples': 10785280, 'steps': 21064, 'loss/train': 2.1160268783569336} +03/04/2022 14:12:37 - INFO - codeparrot_training - Skipping example with length 393 (seq_length=1024) +03/04/2022 14:12:40 - INFO - codeparrot_training - Step 21065: {'lr': 0.00047980589595171866, 'samples': 10785792, 'steps': 21065, 'loss/train': 1.4669512510299683} +03/04/2022 14:12:43 - INFO - codeparrot_training - Step 21066: {'lr': 0.000479803806443887, 'samples': 10786304, 'steps': 21066, 'loss/train': 2.2270255088806152} +03/04/2022 14:12:45 - INFO - codeparrot_training - Skipping example with length 539 (seq_length=1024) +03/04/2022 14:12:48 - INFO - codeparrot_training - Step 21067: {'lr': 0.0004798017168325093, 'samples': 10786816, 'steps': 21067, 'loss/train': 2.0607972145080566} +03/04/2022 14:12:51 - INFO - codeparrot_training - Step 21068: {'lr': 0.0004797996271175865, 'samples': 10787328, 'steps': 21068, 'loss/train': 1.4589486122131348} +03/04/2022 14:12:53 - INFO - codeparrot_training - Skipping example with length 935 (seq_length=1024) +03/04/2022 14:12:57 - INFO - codeparrot_training - Step 21069: {'lr': 0.00047979753729911944, 'samples': 10787840, 'steps': 21069, 'loss/train': 2.265150785446167} +03/04/2022 14:13:00 - INFO - codeparrot_training - Step 21070: {'lr': 0.00047979544737710925, 'samples': 10788352, 'steps': 21070, 'loss/train': 1.6598445177078247} +03/04/2022 14:13:01 - INFO - codeparrot_training - Skipping example with length 417 (seq_length=1024) +03/04/2022 14:13:05 - INFO - codeparrot_training - Step 21071: {'lr': 0.00047979335735155677, 'samples': 10788864, 'steps': 21071, 'loss/train': 1.4378305673599243} +03/04/2022 14:13:09 - INFO - codeparrot_training - Step 21072: {'lr': 0.00047979126722246294, 'samples': 10789376, 'steps': 21072, 'loss/train': 2.14701771736145} +03/04/2022 14:13:11 - INFO - codeparrot_training - Skipping example with length 347 (seq_length=1024) +03/04/2022 14:13:14 - INFO - codeparrot_training - Step 21073: {'lr': 0.0004797891769898287, 'samples': 10789888, 'steps': 21073, 'loss/train': 2.206808567047119} +03/04/2022 14:13:17 - INFO - codeparrot_training - Step 21074: {'lr': 0.00047978708665365503, 'samples': 10790400, 'steps': 21074, 'loss/train': 1.7821307182312012} +03/04/2022 14:13:20 - INFO - codeparrot_training - Step 21075: {'lr': 0.0004797849962139428, 'samples': 10790912, 'steps': 21075, 'loss/train': 2.60322642326355} +03/04/2022 14:13:20 - INFO - codeparrot_training - Skipping example with length 56 (seq_length=1024) +03/04/2022 14:13:26 - INFO - codeparrot_training - Step 21076: {'lr': 0.00047978290567069306, 'samples': 10791424, 'steps': 21076, 'loss/train': 1.3353028297424316} +03/04/2022 14:13:29 - INFO - codeparrot_training - Step 21077: {'lr': 0.00047978081502390656, 'samples': 10791936, 'steps': 21077, 'loss/train': 2.302077531814575} +03/04/2022 14:13:29 - INFO - codeparrot_training - Skipping example with length 236 (seq_length=1024) +03/04/2022 14:13:34 - INFO - codeparrot_training - Step 21078: {'lr': 0.0004797787242735845, 'samples': 10792448, 'steps': 21078, 'loss/train': 1.8832178115844727} +03/04/2022 14:13:37 - INFO - codeparrot_training - Step 21079: {'lr': 0.00047977663341972765, 'samples': 10792960, 'steps': 21079, 'loss/train': 1.789940595626831} +03/04/2022 14:13:37 - INFO - codeparrot_training - Skipping example with length 894 (seq_length=1024) +03/04/2022 14:13:43 - INFO - codeparrot_training - Step 21080: {'lr': 0.00047977454246233696, 'samples': 10793472, 'steps': 21080, 'loss/train': 1.8167808055877686} +03/04/2022 14:13:46 - INFO - codeparrot_training - Step 21081: {'lr': 0.00047977245140141354, 'samples': 10793984, 'steps': 21081, 'loss/train': 2.1022427082061768} +03/04/2022 14:13:46 - INFO - codeparrot_training - Skipping example with length 294 (seq_length=1024) +03/04/2022 14:13:51 - INFO - codeparrot_training - Step 21082: {'lr': 0.00047977036023695807, 'samples': 10794496, 'steps': 21082, 'loss/train': 2.226292848587036} +03/04/2022 14:13:54 - INFO - codeparrot_training - Skipping example with length 390 (seq_length=1024) +03/04/2022 14:13:56 - INFO - codeparrot_training - Step 21083: {'lr': 0.00047976826896897165, 'samples': 10795008, 'steps': 21083, 'loss/train': 1.9705091714859009} +03/04/2022 14:13:59 - INFO - codeparrot_training - Step 21084: {'lr': 0.0004797661775974552, 'samples': 10795520, 'steps': 21084, 'loss/train': 2.0154731273651123} +03/04/2022 14:14:02 - INFO - codeparrot_training - Skipping example with length 80 (seq_length=1024) +03/04/2022 14:14:05 - INFO - codeparrot_training - Step 21085: {'lr': 0.00047976408612240964, 'samples': 10796032, 'steps': 21085, 'loss/train': 1.8506687879562378} +03/04/2022 14:14:08 - INFO - codeparrot_training - Step 21086: {'lr': 0.00047976199454383595, 'samples': 10796544, 'steps': 21086, 'loss/train': 1.7181332111358643} +03/04/2022 14:14:11 - INFO - codeparrot_training - Step 21087: {'lr': 0.00047975990286173504, 'samples': 10797056, 'steps': 21087, 'loss/train': 0.6872444748878479} +03/04/2022 14:14:11 - INFO - codeparrot_training - Skipping example with length 499 (seq_length=1024) +03/04/2022 14:14:17 - INFO - codeparrot_training - Step 21088: {'lr': 0.00047975781107610784, 'samples': 10797568, 'steps': 21088, 'loss/train': 1.6296441555023193} +03/04/2022 14:14:20 - INFO - codeparrot_training - Step 21089: {'lr': 0.0004797557191869554, 'samples': 10798080, 'steps': 21089, 'loss/train': 2.564734697341919} +03/04/2022 14:14:20 - INFO - codeparrot_training - Skipping example with length 802 (seq_length=1024) +03/04/2022 14:14:25 - INFO - codeparrot_training - Step 21090: {'lr': 0.0004797536271942785, 'samples': 10798592, 'steps': 21090, 'loss/train': 2.3377904891967773} +03/04/2022 14:14:28 - INFO - codeparrot_training - Step 21091: {'lr': 0.00047975153509807815, 'samples': 10799104, 'steps': 21091, 'loss/train': 1.4904173612594604} +03/04/2022 14:14:29 - INFO - codeparrot_training - Skipping example with length 14 (seq_length=1024) +03/04/2022 14:14:33 - INFO - codeparrot_training - Step 21092: {'lr': 0.0004797494428983553, 'samples': 10799616, 'steps': 21092, 'loss/train': 1.7046700716018677} +03/04/2022 14:14:37 - INFO - codeparrot_training - Step 21093: {'lr': 0.000479747350595111, 'samples': 10800128, 'steps': 21093, 'loss/train': 1.6265757083892822} +03/04/2022 14:14:37 - INFO - codeparrot_training - Skipping example with length 503 (seq_length=1024) +03/04/2022 14:14:42 - INFO - codeparrot_training - Step 21094: {'lr': 0.00047974525818834604, 'samples': 10800640, 'steps': 21094, 'loss/train': 1.2710318565368652} +03/04/2022 14:14:45 - INFO - codeparrot_training - Step 21095: {'lr': 0.0004797431656780613, 'samples': 10801152, 'steps': 21095, 'loss/train': 0.3854876756668091} +03/04/2022 14:14:45 - INFO - codeparrot_training - Skipping example with length 350 (seq_length=1024) +03/04/2022 14:14:50 - INFO - codeparrot_training - Step 21096: {'lr': 0.000479741073064258, 'samples': 10801664, 'steps': 21096, 'loss/train': 2.340409517288208} +03/04/2022 14:14:54 - INFO - codeparrot_training - Step 21097: {'lr': 0.0004797389803469369, 'samples': 10802176, 'steps': 21097, 'loss/train': 2.3350300788879395} +03/04/2022 14:14:54 - INFO - codeparrot_training - Skipping example with length 416 (seq_length=1024) +03/04/2022 14:14:59 - INFO - codeparrot_training - Step 21098: {'lr': 0.0004797368875260988, 'samples': 10802688, 'steps': 21098, 'loss/train': 0.8781698942184448} +03/04/2022 14:15:02 - INFO - codeparrot_training - Step 21099: {'lr': 0.00047973479460174497, 'samples': 10803200, 'steps': 21099, 'loss/train': 1.9037513732910156} +03/04/2022 14:15:02 - INFO - codeparrot_training - Skipping example with length 932 (seq_length=1024) +03/04/2022 14:15:07 - INFO - codeparrot_training - Step 21100: {'lr': 0.00047973270157387605, 'samples': 10803712, 'steps': 21100, 'loss/train': 1.9518972635269165} +03/04/2022 14:15:10 - INFO - codeparrot_training - Step 21101: {'lr': 0.0004797306084424932, 'samples': 10804224, 'steps': 21101, 'loss/train': 1.909010887145996} +03/04/2022 14:15:11 - INFO - codeparrot_training - Skipping example with length 301 (seq_length=1024) +03/04/2022 14:15:16 - INFO - codeparrot_training - Step 21102: {'lr': 0.0004797285152075973, 'samples': 10804736, 'steps': 21102, 'loss/train': 2.565692663192749} +03/04/2022 14:15:19 - INFO - codeparrot_training - Step 21103: {'lr': 0.00047972642186918925, 'samples': 10805248, 'steps': 21103, 'loss/train': 1.776336669921875} +03/04/2022 14:15:19 - INFO - codeparrot_training - Skipping example with length 676 (seq_length=1024) +03/04/2022 14:15:24 - INFO - codeparrot_training - Step 21104: {'lr': 0.00047972432842727003, 'samples': 10805760, 'steps': 21104, 'loss/train': 1.6379461288452148} +03/04/2022 14:15:28 - INFO - codeparrot_training - Step 21105: {'lr': 0.0004797222348818405, 'samples': 10806272, 'steps': 21105, 'loss/train': 1.8836085796356201} +03/04/2022 14:15:28 - INFO - codeparrot_training - Skipping example with length 326 (seq_length=1024) +03/04/2022 14:15:33 - INFO - codeparrot_training - Step 21106: {'lr': 0.00047972014123290183, 'samples': 10806784, 'steps': 21106, 'loss/train': 1.0955758094787598} +03/04/2022 14:15:36 - INFO - codeparrot_training - Step 21107: {'lr': 0.00047971804748045464, 'samples': 10807296, 'steps': 21107, 'loss/train': 2.4363014698028564} +03/04/2022 14:15:36 - INFO - codeparrot_training - Skipping example with length 407 (seq_length=1024) +03/04/2022 14:15:41 - INFO - codeparrot_training - Step 21108: {'lr': 0.00047971595362450014, 'samples': 10807808, 'steps': 21108, 'loss/train': 1.5124515295028687} +03/04/2022 14:15:44 - INFO - codeparrot_training - Step 21109: {'lr': 0.00047971385966503923, 'samples': 10808320, 'steps': 21109, 'loss/train': 1.6728283166885376} +03/04/2022 14:15:44 - INFO - codeparrot_training - Skipping example with length 754 (seq_length=1024) +03/04/2022 14:15:50 - INFO - codeparrot_training - Step 21110: {'lr': 0.0004797117656020727, 'samples': 10808832, 'steps': 21110, 'loss/train': 2.0742056369781494} +03/04/2022 14:15:53 - INFO - codeparrot_training - Step 21111: {'lr': 0.0004797096714356016, 'samples': 10809344, 'steps': 21111, 'loss/train': 0.3206772804260254} +03/04/2022 14:15:53 - INFO - codeparrot_training - Skipping example with length 222 (seq_length=1024) +03/04/2022 14:15:59 - INFO - codeparrot_training - Step 21112: {'lr': 0.0004797075771656269, 'samples': 10809856, 'steps': 21112, 'loss/train': 0.6569735407829285} +03/04/2022 14:16:02 - INFO - codeparrot_training - Step 21113: {'lr': 0.0004797054827921495, 'samples': 10810368, 'steps': 21113, 'loss/train': 2.222818613052368} +03/04/2022 14:16:06 - INFO - codeparrot_training - Step 21114: {'lr': 0.0004797033883151703, 'samples': 10810880, 'steps': 21114, 'loss/train': 1.4898802042007446} +03/04/2022 14:16:07 - INFO - codeparrot_training - Skipping example with length 481 (seq_length=1024) +03/04/2022 14:16:11 - INFO - codeparrot_training - Step 21115: {'lr': 0.0004797012937346904, 'samples': 10811392, 'steps': 21115, 'loss/train': 2.8644816875457764} +03/04/2022 14:16:14 - INFO - codeparrot_training - Step 21116: {'lr': 0.0004796991990507106, 'samples': 10811904, 'steps': 21116, 'loss/train': 2.0687501430511475} +03/04/2022 14:16:16 - INFO - codeparrot_training - Skipping example with length 338 (seq_length=1024) +03/04/2022 14:16:19 - INFO - codeparrot_training - Step 21117: {'lr': 0.00047969710426323185, 'samples': 10812416, 'steps': 21117, 'loss/train': 1.5901048183441162} +03/04/2022 14:16:22 - INFO - codeparrot_training - Step 21118: {'lr': 0.0004796950093722552, 'samples': 10812928, 'steps': 21118, 'loss/train': 1.8892055749893188} +03/04/2022 14:16:24 - INFO - codeparrot_training - Skipping example with length 523 (seq_length=1024) +03/04/2022 14:16:28 - INFO - codeparrot_training - Step 21119: {'lr': 0.00047969291437778143, 'samples': 10813440, 'steps': 21119, 'loss/train': 1.3112480640411377} +03/04/2022 14:16:31 - INFO - codeparrot_training - Step 21120: {'lr': 0.00047969081927981165, 'samples': 10813952, 'steps': 21120, 'loss/train': 1.636164903640747} +03/04/2022 14:16:33 - INFO - codeparrot_training - Skipping example with length 291 (seq_length=1024) +03/04/2022 14:16:36 - INFO - codeparrot_training - Step 21121: {'lr': 0.0004796887240783467, 'samples': 10814464, 'steps': 21121, 'loss/train': 1.861754059791565} +03/04/2022 14:16:39 - INFO - codeparrot_training - Step 21122: {'lr': 0.0004796866287733875, 'samples': 10814976, 'steps': 21122, 'loss/train': 1.8809359073638916} +03/04/2022 14:16:41 - INFO - codeparrot_training - Skipping example with length 853 (seq_length=1024) +03/04/2022 14:16:45 - INFO - codeparrot_training - Step 21123: {'lr': 0.0004796845333649352, 'samples': 10815488, 'steps': 21123, 'loss/train': 1.9858554601669312} +03/04/2022 14:16:48 - INFO - codeparrot_training - Step 21124: {'lr': 0.00047968243785299046, 'samples': 10816000, 'steps': 21124, 'loss/train': 2.4395761489868164} +03/04/2022 14:16:50 - INFO - codeparrot_training - Skipping example with length 161 (seq_length=1024) +03/04/2022 14:16:53 - INFO - codeparrot_training - Step 21125: {'lr': 0.0004796803422375544, 'samples': 10816512, 'steps': 21125, 'loss/train': 2.148000717163086} +03/04/2022 14:16:56 - INFO - codeparrot_training - Step 21126: {'lr': 0.0004796782465186279, 'samples': 10817024, 'steps': 21126, 'loss/train': 2.086805582046509} +03/04/2022 14:16:58 - INFO - codeparrot_training - Skipping example with length 442 (seq_length=1024) +03/04/2022 14:17:02 - INFO - codeparrot_training - Step 21127: {'lr': 0.00047967615069621197, 'samples': 10817536, 'steps': 21127, 'loss/train': 2.1219265460968018} +03/04/2022 14:17:05 - INFO - codeparrot_training - Step 21128: {'lr': 0.0004796740547703075, 'samples': 10818048, 'steps': 21128, 'loss/train': 1.184230923652649} +03/04/2022 14:17:06 - INFO - codeparrot_training - Skipping example with length 382 (seq_length=1024) +03/04/2022 14:17:10 - INFO - codeparrot_training - Step 21129: {'lr': 0.00047967195874091547, 'samples': 10818560, 'steps': 21129, 'loss/train': 3.0659241676330566} +03/04/2022 14:17:13 - INFO - codeparrot_training - Step 21130: {'lr': 0.00047966986260803676, 'samples': 10819072, 'steps': 21130, 'loss/train': 0.58502596616745} +03/04/2022 14:17:15 - INFO - codeparrot_training - Skipping example with length 248 (seq_length=1024) +03/04/2022 14:17:18 - INFO - codeparrot_training - Step 21131: {'lr': 0.0004796677663716723, 'samples': 10819584, 'steps': 21131, 'loss/train': 1.8203253746032715} +03/04/2022 14:17:21 - INFO - codeparrot_training - Step 21132: {'lr': 0.00047966567003182315, 'samples': 10820096, 'steps': 21132, 'loss/train': 1.6295959949493408} +03/04/2022 14:17:23 - INFO - codeparrot_training - Skipping example with length 877 (seq_length=1024) +03/04/2022 14:17:27 - INFO - codeparrot_training - Step 21133: {'lr': 0.0004796635735884902, 'samples': 10820608, 'steps': 21133, 'loss/train': 1.7200300693511963} +03/04/2022 14:17:30 - INFO - codeparrot_training - Step 21134: {'lr': 0.0004796614770416744, 'samples': 10821120, 'steps': 21134, 'loss/train': 2.2141950130462646} +03/04/2022 14:17:31 - INFO - codeparrot_training - Skipping example with length 404 (seq_length=1024) +03/04/2022 14:17:35 - INFO - codeparrot_training - Step 21135: {'lr': 0.00047965938039137666, 'samples': 10821632, 'steps': 21135, 'loss/train': 1.9960881471633911} +03/04/2022 14:17:39 - INFO - codeparrot_training - Step 21136: {'lr': 0.000479657283637598, 'samples': 10822144, 'steps': 21136, 'loss/train': 1.7144848108291626} +03/04/2022 14:17:40 - INFO - codeparrot_training - Skipping example with length 117 (seq_length=1024) +03/04/2022 14:17:44 - INFO - codeparrot_training - Step 21137: {'lr': 0.00047965518678033924, 'samples': 10822656, 'steps': 21137, 'loss/train': 1.7283679246902466} +03/04/2022 14:17:47 - INFO - codeparrot_training - Step 21138: {'lr': 0.00047965308981960143, 'samples': 10823168, 'steps': 21138, 'loss/train': 0.9877696633338928} +03/04/2022 14:17:48 - INFO - codeparrot_training - Skipping example with length 311 (seq_length=1024) +03/04/2022 14:17:52 - INFO - codeparrot_training - Step 21139: {'lr': 0.0004796509927553854, 'samples': 10823680, 'steps': 21139, 'loss/train': 1.753754734992981} +03/04/2022 14:17:55 - INFO - codeparrot_training - Step 21140: {'lr': 0.00047964889558769233, 'samples': 10824192, 'steps': 21140, 'loss/train': 1.286018967628479} +03/04/2022 14:17:57 - INFO - codeparrot_training - Skipping example with length 681 (seq_length=1024) +03/04/2022 14:18:01 - INFO - codeparrot_training - Step 21141: {'lr': 0.00047964679831652294, 'samples': 10824704, 'steps': 21141, 'loss/train': 1.8315149545669556} +03/04/2022 14:18:04 - INFO - codeparrot_training - Step 21142: {'lr': 0.00047964470094187815, 'samples': 10825216, 'steps': 21142, 'loss/train': 2.1816277503967285} +03/04/2022 14:18:05 - INFO - codeparrot_training - Skipping example with length 897 (seq_length=1024) +03/04/2022 14:18:09 - INFO - codeparrot_training - Step 21143: {'lr': 0.0004796426034637591, 'samples': 10825728, 'steps': 21143, 'loss/train': 2.2653775215148926} +03/04/2022 14:18:12 - INFO - codeparrot_training - Step 21144: {'lr': 0.0004796405058821666, 'samples': 10826240, 'steps': 21144, 'loss/train': 1.5483577251434326} +03/04/2022 14:18:14 - INFO - codeparrot_training - Skipping example with length 665 (seq_length=1024) +03/04/2022 14:18:17 - INFO - codeparrot_training - Step 21145: {'lr': 0.0004796384081971017, 'samples': 10826752, 'steps': 21145, 'loss/train': 2.0517871379852295} +03/04/2022 14:18:21 - INFO - codeparrot_training - Step 21146: {'lr': 0.0004796363104085652, 'samples': 10827264, 'steps': 21146, 'loss/train': 0.9529291391372681} +03/04/2022 14:18:22 - INFO - codeparrot_training - Skipping example with length 48 (seq_length=1024) +03/04/2022 14:18:26 - INFO - codeparrot_training - Step 21147: {'lr': 0.00047963421251655817, 'samples': 10827776, 'steps': 21147, 'loss/train': 1.492002248764038} +03/04/2022 14:18:29 - INFO - codeparrot_training - Step 21148: {'lr': 0.00047963211452108144, 'samples': 10828288, 'steps': 21148, 'loss/train': 2.3942465782165527} +03/04/2022 14:18:31 - INFO - codeparrot_training - Skipping example with length 664 (seq_length=1024) +03/04/2022 14:18:34 - INFO - codeparrot_training - Step 21149: {'lr': 0.0004796300164221361, 'samples': 10828800, 'steps': 21149, 'loss/train': 2.0873513221740723} +03/04/2022 14:18:38 - INFO - codeparrot_training - Step 21150: {'lr': 0.00047962791821972296, 'samples': 10829312, 'steps': 21150, 'loss/train': 1.69476318359375} +03/04/2022 14:18:40 - INFO - codeparrot_training - Skipping example with length 195 (seq_length=1024) +03/04/2022 14:18:43 - INFO - codeparrot_training - Step 21151: {'lr': 0.00047962581991384305, 'samples': 10829824, 'steps': 21151, 'loss/train': 1.4095745086669922} +03/04/2022 14:18:46 - INFO - codeparrot_training - Step 21152: {'lr': 0.0004796237215044973, 'samples': 10830336, 'steps': 21152, 'loss/train': 2.3897125720977783} +03/04/2022 14:18:50 - INFO - codeparrot_training - Step 21153: {'lr': 0.0004796216229916867, 'samples': 10830848, 'steps': 21153, 'loss/train': 1.0487030744552612} +03/04/2022 14:18:50 - INFO - codeparrot_training - Skipping example with length 944 (seq_length=1024) +03/04/2022 14:18:55 - INFO - codeparrot_training - Step 21154: {'lr': 0.000479619524375412, 'samples': 10831360, 'steps': 21154, 'loss/train': 2.096177101135254} +03/04/2022 14:18:58 - INFO - codeparrot_training - Step 21155: {'lr': 0.0004796174256556744, 'samples': 10831872, 'steps': 21155, 'loss/train': 1.7782570123672485} +03/04/2022 14:18:58 - INFO - codeparrot_training - Skipping example with length 336 (seq_length=1024) +03/04/2022 14:19:03 - INFO - codeparrot_training - Step 21156: {'lr': 0.0004796153268324747, 'samples': 10832384, 'steps': 21156, 'loss/train': 2.2128190994262695} +03/04/2022 14:19:07 - INFO - codeparrot_training - Step 21157: {'lr': 0.00047961322790581384, 'samples': 10832896, 'steps': 21157, 'loss/train': 1.3381930589675903} +03/04/2022 14:19:07 - INFO - codeparrot_training - Skipping example with length 627 (seq_length=1024) +03/04/2022 14:19:12 - INFO - codeparrot_training - Step 21158: {'lr': 0.00047961112887569285, 'samples': 10833408, 'steps': 21158, 'loss/train': 2.3194620609283447} +03/04/2022 14:19:15 - INFO - codeparrot_training - Step 21159: {'lr': 0.0004796090297421126, 'samples': 10833920, 'steps': 21159, 'loss/train': 1.6068179607391357} +03/04/2022 14:19:15 - INFO - codeparrot_training - Skipping example with length 669 (seq_length=1024) +03/04/2022 14:19:20 - INFO - codeparrot_training - Step 21160: {'lr': 0.0004796069305050741, 'samples': 10834432, 'steps': 21160, 'loss/train': 2.635826826095581} +03/04/2022 14:19:24 - INFO - codeparrot_training - Step 21161: {'lr': 0.0004796048311645782, 'samples': 10834944, 'steps': 21161, 'loss/train': 1.6798481941223145} +03/04/2022 14:19:25 - INFO - codeparrot_training - Skipping example with length 64 (seq_length=1024) +03/04/2022 14:19:29 - INFO - codeparrot_training - Step 21162: {'lr': 0.00047960273172062596, 'samples': 10835456, 'steps': 21162, 'loss/train': 2.228679895401001} +03/04/2022 14:19:32 - INFO - codeparrot_training - Step 21163: {'lr': 0.00047960063217321824, 'samples': 10835968, 'steps': 21163, 'loss/train': 1.3766992092132568} +03/04/2022 14:19:33 - INFO - codeparrot_training - Skipping example with length 157 (seq_length=1024) +03/04/2022 14:19:37 - INFO - codeparrot_training - Step 21164: {'lr': 0.0004795985325223561, 'samples': 10836480, 'steps': 21164, 'loss/train': 1.5830128192901611} +03/04/2022 14:19:40 - INFO - codeparrot_training - Step 21165: {'lr': 0.00047959643276804026, 'samples': 10836992, 'steps': 21165, 'loss/train': 0.8572365641593933} +03/04/2022 14:19:41 - INFO - codeparrot_training - Skipping example with length 114 (seq_length=1024) +03/04/2022 14:19:46 - INFO - codeparrot_training - Step 21166: {'lr': 0.0004795943329102719, 'samples': 10837504, 'steps': 21166, 'loss/train': 1.6668130159378052} +03/04/2022 14:19:49 - INFO - codeparrot_training - Step 21167: {'lr': 0.00047959223294905185, 'samples': 10838016, 'steps': 21167, 'loss/train': 2.3324332237243652} +03/04/2022 14:19:50 - INFO - codeparrot_training - Skipping example with length 415 (seq_length=1024) +03/04/2022 14:19:54 - INFO - codeparrot_training - Step 21168: {'lr': 0.00047959013288438113, 'samples': 10838528, 'steps': 21168, 'loss/train': 1.7264528274536133} +03/04/2022 14:19:57 - INFO - codeparrot_training - Step 21169: {'lr': 0.0004795880327162606, 'samples': 10839040, 'steps': 21169, 'loss/train': 2.182647466659546} +03/04/2022 14:19:58 - INFO - codeparrot_training - Skipping example with length 193 (seq_length=1024) +03/04/2022 14:20:03 - INFO - codeparrot_training - Step 21170: {'lr': 0.0004795859324446912, 'samples': 10839552, 'steps': 21170, 'loss/train': 1.8980892896652222} +03/04/2022 14:20:06 - INFO - codeparrot_training - Step 21171: {'lr': 0.000479583832069674, 'samples': 10840064, 'steps': 21171, 'loss/train': 0.9141941070556641} +03/04/2022 14:20:07 - INFO - codeparrot_training - Skipping example with length 937 (seq_length=1024) +03/04/2022 14:20:11 - INFO - codeparrot_training - Step 21172: {'lr': 0.00047958173159120984, 'samples': 10840576, 'steps': 21172, 'loss/train': 2.176936626434326} +03/04/2022 14:20:14 - INFO - codeparrot_training - Step 21173: {'lr': 0.0004795796310092997, 'samples': 10841088, 'steps': 21173, 'loss/train': 0.3896430432796478} +03/04/2022 14:20:16 - INFO - codeparrot_training - Skipping example with length 259 (seq_length=1024) +03/04/2022 14:20:19 - INFO - codeparrot_training - Step 21174: {'lr': 0.00047957753032394445, 'samples': 10841600, 'steps': 21174, 'loss/train': 1.7506471872329712} +03/04/2022 14:20:23 - INFO - codeparrot_training - Step 21175: {'lr': 0.00047957542953514523, 'samples': 10842112, 'steps': 21175, 'loss/train': 1.6608985662460327} +03/04/2022 14:20:24 - INFO - codeparrot_training - Skipping example with length 856 (seq_length=1024) +03/04/2022 14:20:28 - INFO - codeparrot_training - Step 21176: {'lr': 0.00047957332864290283, 'samples': 10842624, 'steps': 21176, 'loss/train': 1.8469350337982178} +03/04/2022 14:20:31 - INFO - codeparrot_training - Step 21177: {'lr': 0.00047957122764721817, 'samples': 10843136, 'steps': 21177, 'loss/train': 2.1154544353485107} +03/04/2022 14:20:32 - INFO - codeparrot_training - Skipping example with length 147 (seq_length=1024) +03/04/2022 14:20:36 - INFO - codeparrot_training - Step 21178: {'lr': 0.00047956912654809227, 'samples': 10843648, 'steps': 21178, 'loss/train': 1.2120853662490845} +03/04/2022 14:20:40 - INFO - codeparrot_training - Step 21179: {'lr': 0.0004795670253455261, 'samples': 10844160, 'steps': 21179, 'loss/train': 1.3024139404296875} +03/04/2022 14:20:41 - INFO - codeparrot_training - Skipping example with length 705 (seq_length=1024) +03/04/2022 14:20:45 - INFO - codeparrot_training - Step 21180: {'lr': 0.00047956492403952055, 'samples': 10844672, 'steps': 21180, 'loss/train': 1.7748509645462036} +03/04/2022 14:20:48 - INFO - codeparrot_training - Step 21181: {'lr': 0.00047956282263007663, 'samples': 10845184, 'steps': 21181, 'loss/train': 1.1602504253387451} +03/04/2022 14:20:49 - INFO - codeparrot_training - Skipping example with length 533 (seq_length=1024) +03/04/2022 14:20:53 - INFO - codeparrot_training - Step 21182: {'lr': 0.00047956072111719517, 'samples': 10845696, 'steps': 21182, 'loss/train': 1.948672890663147} +03/04/2022 14:20:57 - INFO - codeparrot_training - Step 21183: {'lr': 0.00047955861950087724, 'samples': 10846208, 'steps': 21183, 'loss/train': 1.7772477865219116} +03/04/2022 14:20:57 - INFO - codeparrot_training - Skipping example with length 151 (seq_length=1024) +03/04/2022 14:21:02 - INFO - codeparrot_training - Step 21184: {'lr': 0.00047955651778112376, 'samples': 10846720, 'steps': 21184, 'loss/train': 2.0294008255004883} +03/04/2022 14:21:05 - INFO - codeparrot_training - Step 21185: {'lr': 0.00047955441595793556, 'samples': 10847232, 'steps': 21185, 'loss/train': 2.0014960765838623} +03/04/2022 14:21:06 - INFO - codeparrot_training - Skipping example with length 434 (seq_length=1024) +03/04/2022 14:21:10 - INFO - codeparrot_training - Step 21186: {'lr': 0.0004795523140313138, 'samples': 10847744, 'steps': 21186, 'loss/train': 2.378399610519409} +03/04/2022 14:21:13 - INFO - codeparrot_training - Step 21187: {'lr': 0.00047955021200125924, 'samples': 10848256, 'steps': 21187, 'loss/train': 1.92316472530365} +03/04/2022 14:21:14 - INFO - codeparrot_training - Skipping example with length 396 (seq_length=1024) +03/04/2022 14:21:19 - INFO - codeparrot_training - Step 21188: {'lr': 0.0004795481098677729, 'samples': 10848768, 'steps': 21188, 'loss/train': 1.6132185459136963} +03/04/2022 14:21:22 - INFO - codeparrot_training - Step 21189: {'lr': 0.00047954600763085577, 'samples': 10849280, 'steps': 21189, 'loss/train': 1.5404436588287354} +03/04/2022 14:21:23 - INFO - codeparrot_training - Skipping example with length 919 (seq_length=1024) +03/04/2022 14:21:28 - INFO - codeparrot_training - Step 21190: {'lr': 0.0004795439052905087, 'samples': 10849792, 'steps': 21190, 'loss/train': 2.15837025642395} +03/04/2022 14:21:31 - INFO - codeparrot_training - Step 21191: {'lr': 0.0004795418028467327, 'samples': 10850304, 'steps': 21191, 'loss/train': 1.824462890625} +03/04/2022 14:21:32 - INFO - codeparrot_training - Skipping example with length 990 (seq_length=1024) +03/04/2022 14:21:36 - INFO - codeparrot_training - Step 21192: {'lr': 0.0004795397002995288, 'samples': 10850816, 'steps': 21192, 'loss/train': 1.597153663635254} +03/04/2022 14:21:39 - INFO - codeparrot_training - Step 21193: {'lr': 0.0004795375976488977, 'samples': 10851328, 'steps': 21193, 'loss/train': 0.21810907125473022} +03/04/2022 14:21:41 - INFO - codeparrot_training - Skipping example with length 639 (seq_length=1024) +03/04/2022 14:21:44 - INFO - codeparrot_training - Step 21194: {'lr': 0.00047953549489484056, 'samples': 10851840, 'steps': 21194, 'loss/train': 1.9583873748779297} +03/04/2022 14:21:47 - INFO - codeparrot_training - Step 21195: {'lr': 0.0004795333920373583, 'samples': 10852352, 'steps': 21195, 'loss/train': 1.7765870094299316} +03/04/2022 14:21:49 - INFO - codeparrot_training - Skipping example with length 368 (seq_length=1024) +03/04/2022 14:21:53 - INFO - codeparrot_training - Step 21196: {'lr': 0.00047953128907645185, 'samples': 10852864, 'steps': 21196, 'loss/train': 1.6495625972747803} +03/04/2022 14:21:56 - INFO - codeparrot_training - Step 21197: {'lr': 0.000479529186012122, 'samples': 10853376, 'steps': 21197, 'loss/train': 2.202082395553589} +03/04/2022 14:21:57 - INFO - codeparrot_training - Skipping example with length 369 (seq_length=1024) +03/04/2022 14:22:01 - INFO - codeparrot_training - Step 21198: {'lr': 0.00047952708284437, 'samples': 10853888, 'steps': 21198, 'loss/train': 1.6591230630874634} +03/04/2022 14:22:04 - INFO - codeparrot_training - Step 21199: {'lr': 0.0004795249795731966, 'samples': 10854400, 'steps': 21199, 'loss/train': 2.1426596641540527} +03/04/2022 14:22:06 - INFO - codeparrot_training - Skipping example with length 75 (seq_length=1024) +03/04/2022 14:22:10 - INFO - codeparrot_training - Step 21200: {'lr': 0.00047952287619860273, 'samples': 10854912, 'steps': 21200, 'loss/train': 2.395808219909668} +03/04/2022 14:22:13 - INFO - codeparrot_training - Step 21201: {'lr': 0.0004795207727205895, 'samples': 10855424, 'steps': 21201, 'loss/train': 1.162631869316101} +03/04/2022 14:22:14 - INFO - codeparrot_training - Skipping example with length 417 (seq_length=1024) +03/04/2022 14:22:18 - INFO - codeparrot_training - Step 21202: {'lr': 0.00047951866913915767, 'samples': 10855936, 'steps': 21202, 'loss/train': 2.3184118270874023} +03/04/2022 14:22:21 - INFO - codeparrot_training - Step 21203: {'lr': 0.0004795165654543082, 'samples': 10856448, 'steps': 21203, 'loss/train': 1.7647569179534912} +03/04/2022 14:22:23 - INFO - codeparrot_training - Skipping example with length 47 (seq_length=1024) +03/04/2022 14:22:26 - INFO - codeparrot_training - Step 21204: {'lr': 0.0004795144616660422, 'samples': 10856960, 'steps': 21204, 'loss/train': 1.7691354751586914} +03/04/2022 14:22:30 - INFO - codeparrot_training - Step 21205: {'lr': 0.0004795123577743605, 'samples': 10857472, 'steps': 21205, 'loss/train': 1.7189066410064697} +03/04/2022 14:22:31 - INFO - codeparrot_training - Skipping example with length 323 (seq_length=1024) +03/04/2022 14:22:35 - INFO - codeparrot_training - Step 21206: {'lr': 0.0004795102537792641, 'samples': 10857984, 'steps': 21206, 'loss/train': 2.5308282375335693} +03/04/2022 14:22:38 - INFO - codeparrot_training - Step 21207: {'lr': 0.000479508149680754, 'samples': 10858496, 'steps': 21207, 'loss/train': 1.822077989578247} +03/04/2022 14:22:40 - INFO - codeparrot_training - Skipping example with length 939 (seq_length=1024) +03/04/2022 14:22:43 - INFO - codeparrot_training - Step 21208: {'lr': 0.0004795060454788309, 'samples': 10859008, 'steps': 21208, 'loss/train': 1.976842999458313} +03/04/2022 14:22:47 - INFO - codeparrot_training - Step 21209: {'lr': 0.000479503941173496, 'samples': 10859520, 'steps': 21209, 'loss/train': 1.593087077140808} +03/04/2022 14:22:48 - INFO - codeparrot_training - Skipping example with length 1017 (seq_length=1024) +03/04/2022 14:22:52 - INFO - codeparrot_training - Step 21210: {'lr': 0.0004795018367647501, 'samples': 10860032, 'steps': 21210, 'loss/train': 1.2500641345977783} +03/04/2022 14:22:55 - INFO - codeparrot_training - Step 21211: {'lr': 0.0004794997322525944, 'samples': 10860544, 'steps': 21211, 'loss/train': 0.2709581255912781} +03/04/2022 14:22:57 - INFO - codeparrot_training - Skipping example with length 329 (seq_length=1024) +03/04/2022 14:23:00 - INFO - codeparrot_training - Step 21212: {'lr': 0.0004794976276370295, 'samples': 10861056, 'steps': 21212, 'loss/train': 1.6405266523361206} +03/04/2022 14:23:04 - INFO - codeparrot_training - Step 21213: {'lr': 0.00047949552291805654, 'samples': 10861568, 'steps': 21213, 'loss/train': 2.248438596725464} +03/04/2022 14:23:05 - INFO - codeparrot_training - Skipping example with length 748 (seq_length=1024) +03/04/2022 14:23:09 - INFO - codeparrot_training - Step 21214: {'lr': 0.0004794934180956764, 'samples': 10862080, 'steps': 21214, 'loss/train': 2.4185264110565186} +03/04/2022 14:23:12 - INFO - codeparrot_training - Step 21215: {'lr': 0.00047949131316989016, 'samples': 10862592, 'steps': 21215, 'loss/train': 2.5255978107452393} +03/04/2022 14:23:13 - INFO - codeparrot_training - Skipping example with length 388 (seq_length=1024) +03/04/2022 14:23:17 - INFO - codeparrot_training - Step 21216: {'lr': 0.0004794892081406986, 'samples': 10863104, 'steps': 21216, 'loss/train': 0.6957576274871826} +03/04/2022 14:23:20 - INFO - codeparrot_training - Step 21217: {'lr': 0.00047948710300810276, 'samples': 10863616, 'steps': 21217, 'loss/train': 2.071634531021118} +03/04/2022 14:23:22 - INFO - codeparrot_training - Skipping example with length 379 (seq_length=1024) +03/04/2022 14:23:26 - INFO - codeparrot_training - Step 21218: {'lr': 0.0004794849977721036, 'samples': 10864128, 'steps': 21218, 'loss/train': 2.4744577407836914} +03/04/2022 14:23:29 - INFO - codeparrot_training - Step 21219: {'lr': 0.00047948289243270205, 'samples': 10864640, 'steps': 21219, 'loss/train': 2.1670923233032227} +03/04/2022 14:23:30 - INFO - codeparrot_training - Skipping example with length 370 (seq_length=1024) +03/04/2022 14:23:34 - INFO - codeparrot_training - Step 21220: {'lr': 0.000479480786989899, 'samples': 10865152, 'steps': 21220, 'loss/train': 1.4368562698364258} +03/04/2022 14:23:38 - INFO - codeparrot_training - Step 21221: {'lr': 0.0004794786814436955, 'samples': 10865664, 'steps': 21221, 'loss/train': 2.2024271488189697} +03/04/2022 14:23:40 - INFO - codeparrot_training - Skipping example with length 777 (seq_length=1024) +03/04/2022 14:23:43 - INFO - codeparrot_training - Step 21222: {'lr': 0.0004794765757940924, 'samples': 10866176, 'steps': 21222, 'loss/train': 1.7404894828796387} +03/04/2022 14:23:46 - INFO - codeparrot_training - Step 21223: {'lr': 0.00047947447004109066, 'samples': 10866688, 'steps': 21223, 'loss/train': 1.8579998016357422} +03/04/2022 14:23:48 - INFO - codeparrot_training - Skipping example with length 120 (seq_length=1024) +03/04/2022 14:23:51 - INFO - codeparrot_training - Step 21224: {'lr': 0.0004794723641846914, 'samples': 10867200, 'steps': 21224, 'loss/train': 1.075830101966858} +03/04/2022 14:23:54 - INFO - codeparrot_training - Step 21225: {'lr': 0.0004794702582248953, 'samples': 10867712, 'steps': 21225, 'loss/train': 2.50860333442688} +03/04/2022 14:23:57 - INFO - codeparrot_training - Skipping example with length 122 (seq_length=1024) +03/04/2022 14:24:00 - INFO - codeparrot_training - Step 21226: {'lr': 0.0004794681521617035, 'samples': 10868224, 'steps': 21226, 'loss/train': 0.9017037153244019} +03/04/2022 14:24:03 - INFO - codeparrot_training - Step 21227: {'lr': 0.0004794660459951169, 'samples': 10868736, 'steps': 21227, 'loss/train': 1.7271755933761597} +03/04/2022 14:24:05 - INFO - codeparrot_training - Skipping example with length 673 (seq_length=1024) +03/04/2022 14:24:08 - INFO - codeparrot_training - Step 21228: {'lr': 0.0004794639397251365, 'samples': 10869248, 'steps': 21228, 'loss/train': 2.3620362281799316} +03/04/2022 14:24:11 - INFO - codeparrot_training - Step 21229: {'lr': 0.00047946183335176307, 'samples': 10869760, 'steps': 21229, 'loss/train': 1.7966151237487793} +03/04/2022 14:24:13 - INFO - codeparrot_training - Skipping example with length 149 (seq_length=1024) +03/04/2022 14:24:16 - INFO - codeparrot_training - Step 21230: {'lr': 0.00047945972687499775, 'samples': 10870272, 'steps': 21230, 'loss/train': 2.0047144889831543} +03/04/2022 14:24:19 - INFO - codeparrot_training - Step 21231: {'lr': 0.0004794576202948414, 'samples': 10870784, 'steps': 21231, 'loss/train': 2.7041053771972656} +03/04/2022 14:24:22 - INFO - codeparrot_training - Skipping example with length 24 (seq_length=1024) +03/04/2022 14:24:25 - INFO - codeparrot_training - Step 21232: {'lr': 0.000479455513611295, 'samples': 10871296, 'steps': 21232, 'loss/train': 1.4387954473495483} +03/04/2022 14:24:28 - INFO - codeparrot_training - Step 21233: {'lr': 0.00047945340682435943, 'samples': 10871808, 'steps': 21233, 'loss/train': 0.8335886001586914} +03/04/2022 14:24:30 - INFO - codeparrot_training - Skipping example with length 210 (seq_length=1024) +03/04/2022 14:24:33 - INFO - codeparrot_training - Step 21234: {'lr': 0.00047945129993403577, 'samples': 10872320, 'steps': 21234, 'loss/train': 1.9933964014053345} +03/04/2022 14:24:36 - INFO - codeparrot_training - Step 21235: {'lr': 0.00047944919294032486, 'samples': 10872832, 'steps': 21235, 'loss/train': 1.8389767408370972} +03/04/2022 14:24:38 - INFO - codeparrot_training - Skipping example with length 718 (seq_length=1024) +03/04/2022 14:24:42 - INFO - codeparrot_training - Step 21236: {'lr': 0.00047944708584322763, 'samples': 10873344, 'steps': 21236, 'loss/train': 1.8486281633377075} +03/04/2022 14:24:45 - INFO - codeparrot_training - Step 21237: {'lr': 0.00047944497864274517, 'samples': 10873856, 'steps': 21237, 'loss/train': 4.02432107925415} +03/04/2022 14:24:47 - INFO - codeparrot_training - Skipping example with length 47 (seq_length=1024) +03/04/2022 14:24:50 - INFO - codeparrot_training - Step 21238: {'lr': 0.00047944287133887834, 'samples': 10874368, 'steps': 21238, 'loss/train': 1.546904444694519} +03/04/2022 14:24:53 - INFO - codeparrot_training - Step 21239: {'lr': 0.00047944076393162806, 'samples': 10874880, 'steps': 21239, 'loss/train': 1.6473089456558228} +03/04/2022 14:24:55 - INFO - codeparrot_training - Skipping example with length 862 (seq_length=1024) +03/04/2022 14:24:59 - INFO - codeparrot_training - Step 21240: {'lr': 0.00047943865642099525, 'samples': 10875392, 'steps': 21240, 'loss/train': 1.1174792051315308} +03/04/2022 14:25:02 - INFO - codeparrot_training - Step 21241: {'lr': 0.00047943654880698106, 'samples': 10875904, 'steps': 21241, 'loss/train': 1.893358588218689} +03/04/2022 14:25:04 - INFO - codeparrot_training - Skipping example with length 494 (seq_length=1024) +03/04/2022 14:25:07 - INFO - codeparrot_training - Step 21242: {'lr': 0.00047943444108958623, 'samples': 10876416, 'steps': 21242, 'loss/train': 2.127570390701294} +03/04/2022 14:25:10 - INFO - codeparrot_training - Step 21243: {'lr': 0.00047943233326881176, 'samples': 10876928, 'steps': 21243, 'loss/train': 1.6648515462875366} +03/04/2022 14:25:13 - INFO - codeparrot_training - Skipping example with length 343 (seq_length=1024) +03/04/2022 14:25:16 - INFO - codeparrot_training - Step 21244: {'lr': 0.00047943022534465866, 'samples': 10877440, 'steps': 21244, 'loss/train': 2.2198119163513184} +03/04/2022 14:25:19 - INFO - codeparrot_training - Step 21245: {'lr': 0.00047942811731712775, 'samples': 10877952, 'steps': 21245, 'loss/train': 1.7648465633392334} +03/04/2022 14:25:21 - INFO - codeparrot_training - Skipping example with length 203 (seq_length=1024) +03/04/2022 14:25:24 - INFO - codeparrot_training - Step 21246: {'lr': 0.0004794260091862202, 'samples': 10878464, 'steps': 21246, 'loss/train': 1.7027069330215454} +03/04/2022 14:25:27 - INFO - codeparrot_training - Step 21247: {'lr': 0.0004794239009519368, 'samples': 10878976, 'steps': 21247, 'loss/train': 1.7320854663848877} +03/04/2022 14:25:30 - INFO - codeparrot_training - Skipping example with length 405 (seq_length=1024) +03/04/2022 14:25:33 - INFO - codeparrot_training - Step 21248: {'lr': 0.00047942179261427847, 'samples': 10879488, 'steps': 21248, 'loss/train': 1.9551740884780884} +03/04/2022 14:25:36 - INFO - codeparrot_training - Step 21249: {'lr': 0.0004794196841732463, 'samples': 10880000, 'steps': 21249, 'loss/train': 1.5515176057815552} +03/04/2022 14:25:39 - INFO - codeparrot_training - Skipping example with length 732 (seq_length=1024) +03/04/2022 14:25:41 - INFO - codeparrot_training - Step 21250: {'lr': 0.0004794175756288411, 'samples': 10880512, 'steps': 21250, 'loss/train': 1.80269193649292} +03/04/2022 14:25:44 - INFO - codeparrot_training - Step 21251: {'lr': 0.00047941546698106386, 'samples': 10881024, 'steps': 21251, 'loss/train': 1.7902920246124268} +03/04/2022 14:25:47 - INFO - codeparrot_training - Skipping example with length 575 (seq_length=1024) +03/04/2022 14:25:50 - INFO - codeparrot_training - Step 21252: {'lr': 0.0004794133582299156, 'samples': 10881536, 'steps': 21252, 'loss/train': 1.7715116739273071} +03/04/2022 14:25:53 - INFO - codeparrot_training - Step 21253: {'lr': 0.0004794112493753972, 'samples': 10882048, 'steps': 21253, 'loss/train': 1.9789659976959229} +03/04/2022 14:25:55 - INFO - codeparrot_training - Skipping example with length 163 (seq_length=1024) +03/04/2022 14:25:58 - INFO - codeparrot_training - Step 21254: {'lr': 0.0004794091404175097, 'samples': 10882560, 'steps': 21254, 'loss/train': 1.9713318347930908} +03/04/2022 14:26:01 - INFO - codeparrot_training - Step 21255: {'lr': 0.00047940703135625386, 'samples': 10883072, 'steps': 21255, 'loss/train': 2.0809836387634277} +03/04/2022 14:26:04 - INFO - codeparrot_training - Step 21256: {'lr': 0.0004794049221916308, 'samples': 10883584, 'steps': 21256, 'loss/train': 2.098627805709839} +03/04/2022 14:26:04 - INFO - codeparrot_training - Skipping example with length 40 (seq_length=1024) +03/04/2022 14:26:10 - INFO - codeparrot_training - Step 21257: {'lr': 0.00047940281292364146, 'samples': 10884096, 'steps': 21257, 'loss/train': 3.2147161960601807} +03/04/2022 14:26:13 - INFO - codeparrot_training - Step 21258: {'lr': 0.0004794007035522867, 'samples': 10884608, 'steps': 21258, 'loss/train': 2.747690200805664} +03/04/2022 14:26:13 - INFO - codeparrot_training - Skipping example with length 470 (seq_length=1024) +03/04/2022 14:26:18 - INFO - codeparrot_training - Step 21259: {'lr': 0.0004793985940775676, 'samples': 10885120, 'steps': 21259, 'loss/train': 2.3952293395996094} +03/04/2022 14:26:21 - INFO - codeparrot_training - Step 21260: {'lr': 0.0004793964844994849, 'samples': 10885632, 'steps': 21260, 'loss/train': 2.247006416320801} +03/04/2022 14:26:21 - INFO - codeparrot_training - Skipping example with length 224 (seq_length=1024) +03/04/2022 14:26:27 - INFO - codeparrot_training - Step 21261: {'lr': 0.00047939437481803984, 'samples': 10886144, 'steps': 21261, 'loss/train': 2.2030515670776367} +03/04/2022 14:26:30 - INFO - codeparrot_training - Skipping example with length 607 (seq_length=1024) +03/04/2022 14:26:32 - INFO - codeparrot_training - Step 21262: {'lr': 0.00047939226503323313, 'samples': 10886656, 'steps': 21262, 'loss/train': 1.8542122840881348} +03/04/2022 14:26:35 - INFO - codeparrot_training - Step 21263: {'lr': 0.0004793901551450658, 'samples': 10887168, 'steps': 21263, 'loss/train': 0.8855220079421997} +03/04/2022 14:26:38 - INFO - codeparrot_training - Step 21264: {'lr': 0.00047938804515353887, 'samples': 10887680, 'steps': 21264, 'loss/train': 2.002821207046509} +03/04/2022 14:26:38 - INFO - codeparrot_training - Skipping example with length 400 (seq_length=1024) +03/04/2022 14:26:44 - INFO - codeparrot_training - Step 21265: {'lr': 0.00047938593505865315, 'samples': 10888192, 'steps': 21265, 'loss/train': 2.742032527923584} +03/04/2022 14:26:47 - INFO - codeparrot_training - Step 21266: {'lr': 0.00047938382486040963, 'samples': 10888704, 'steps': 21266, 'loss/train': 1.1937470436096191} +03/04/2022 14:26:47 - INFO - codeparrot_training - Skipping example with length 885 (seq_length=1024) +03/04/2022 14:26:52 - INFO - codeparrot_training - Step 21267: {'lr': 0.0004793817145588094, 'samples': 10889216, 'steps': 21267, 'loss/train': 1.0652046203613281} +03/04/2022 14:26:55 - INFO - codeparrot_training - Step 21268: {'lr': 0.0004793796041538533, 'samples': 10889728, 'steps': 21268, 'loss/train': 1.5225547552108765} +03/04/2022 14:26:56 - INFO - codeparrot_training - Skipping example with length 182 (seq_length=1024) +03/04/2022 14:27:00 - INFO - codeparrot_training - Step 21269: {'lr': 0.00047937749364554226, 'samples': 10890240, 'steps': 21269, 'loss/train': 1.6688915491104126} +03/04/2022 14:27:04 - INFO - codeparrot_training - Step 21270: {'lr': 0.0004793753830338773, 'samples': 10890752, 'steps': 21270, 'loss/train': 2.0986592769622803} +03/04/2022 14:27:04 - INFO - codeparrot_training - Skipping example with length 632 (seq_length=1024) +03/04/2022 14:27:09 - INFO - codeparrot_training - Step 21271: {'lr': 0.00047937327231885925, 'samples': 10891264, 'steps': 21271, 'loss/train': 1.4415392875671387} +03/04/2022 14:27:12 - INFO - codeparrot_training - Step 21272: {'lr': 0.0004793711615004892, 'samples': 10891776, 'steps': 21272, 'loss/train': 2.268324375152588} +03/04/2022 14:27:13 - INFO - codeparrot_training - Skipping example with length 195 (seq_length=1024) +03/04/2022 14:27:17 - INFO - codeparrot_training - Step 21273: {'lr': 0.000479369050578768, 'samples': 10892288, 'steps': 21273, 'loss/train': 1.6223810911178589} +03/04/2022 14:27:21 - INFO - codeparrot_training - Step 21274: {'lr': 0.0004793669395536967, 'samples': 10892800, 'steps': 21274, 'loss/train': 2.4806551933288574} +03/04/2022 14:27:21 - INFO - codeparrot_training - Skipping example with length 397 (seq_length=1024) +03/04/2022 14:27:26 - INFO - codeparrot_training - Step 21275: {'lr': 0.00047936482842527616, 'samples': 10893312, 'steps': 21275, 'loss/train': 1.9370464086532593} +03/04/2022 14:27:29 - INFO - codeparrot_training - Step 21276: {'lr': 0.00047936271719350743, 'samples': 10893824, 'steps': 21276, 'loss/train': 0.3720245361328125} +03/04/2022 14:27:30 - INFO - codeparrot_training - Skipping example with length 1017 (seq_length=1024) +03/04/2022 14:27:34 - INFO - codeparrot_training - Step 21277: {'lr': 0.0004793606058583913, 'samples': 10894336, 'steps': 21277, 'loss/train': 1.8277949094772339} +03/04/2022 14:27:37 - INFO - codeparrot_training - Step 21278: {'lr': 0.00047935849441992887, 'samples': 10894848, 'steps': 21278, 'loss/train': 2.3039402961730957} +03/04/2022 14:27:38 - INFO - codeparrot_training - Skipping example with length 868 (seq_length=1024) +03/04/2022 14:27:43 - INFO - codeparrot_training - Step 21279: {'lr': 0.00047935638287812104, 'samples': 10895360, 'steps': 21279, 'loss/train': 2.679325580596924} +03/04/2022 14:27:46 - INFO - codeparrot_training - Step 21280: {'lr': 0.00047935427123296884, 'samples': 10895872, 'steps': 21280, 'loss/train': 2.481029987335205} +03/04/2022 14:27:46 - INFO - codeparrot_training - Skipping example with length 745 (seq_length=1024) +03/04/2022 14:27:51 - INFO - codeparrot_training - Step 21281: {'lr': 0.000479352159484473, 'samples': 10896384, 'steps': 21281, 'loss/train': 1.6225268840789795} +03/04/2022 14:27:54 - INFO - codeparrot_training - Step 21282: {'lr': 0.0004793500476326347, 'samples': 10896896, 'steps': 21282, 'loss/train': 1.9672857522964478} +03/04/2022 14:27:54 - INFO - codeparrot_training - Skipping example with length 624 (seq_length=1024) +03/04/2022 14:28:00 - INFO - codeparrot_training - Step 21283: {'lr': 0.0004793479356774548, 'samples': 10897408, 'steps': 21283, 'loss/train': 1.6339694261550903} +03/04/2022 14:28:03 - INFO - codeparrot_training - Step 21284: {'lr': 0.00047934582361893423, 'samples': 10897920, 'steps': 21284, 'loss/train': 1.7789896726608276} +03/04/2022 14:28:03 - INFO - codeparrot_training - Skipping example with length 677 (seq_length=1024) +03/04/2022 14:28:08 - INFO - codeparrot_training - Step 21285: {'lr': 0.000479343711457074, 'samples': 10898432, 'steps': 21285, 'loss/train': 2.081801414489746} +03/04/2022 14:28:11 - INFO - codeparrot_training - Step 21286: {'lr': 0.00047934159919187504, 'samples': 10898944, 'steps': 21286, 'loss/train': 1.4552674293518066} +03/04/2022 14:28:11 - INFO - codeparrot_training - Skipping example with length 38 (seq_length=1024) +03/04/2022 14:28:16 - INFO - codeparrot_training - Step 21287: {'lr': 0.0004793394868233383, 'samples': 10899456, 'steps': 21287, 'loss/train': 0.5746604800224304} +03/04/2022 14:28:20 - INFO - codeparrot_training - Step 21288: {'lr': 0.0004793373743514647, 'samples': 10899968, 'steps': 21288, 'loss/train': 3.265566110610962} +03/04/2022 14:28:20 - INFO - codeparrot_training - Skipping example with length 623 (seq_length=1024) +03/04/2022 14:28:25 - INFO - codeparrot_training - Step 21289: {'lr': 0.0004793352617762552, 'samples': 10900480, 'steps': 21289, 'loss/train': 1.5769035816192627} +03/04/2022 14:28:28 - INFO - codeparrot_training - Step 21290: {'lr': 0.0004793331490977108, 'samples': 10900992, 'steps': 21290, 'loss/train': 1.8881736993789673} +03/04/2022 14:28:28 - INFO - codeparrot_training - Skipping example with length 763 (seq_length=1024) +03/04/2022 14:28:34 - INFO - codeparrot_training - Step 21291: {'lr': 0.0004793310363158324, 'samples': 10901504, 'steps': 21291, 'loss/train': 2.6699297428131104} +03/04/2022 14:28:37 - INFO - codeparrot_training - Step 21292: {'lr': 0.00047932892343062103, 'samples': 10902016, 'steps': 21292, 'loss/train': 1.404732584953308} +03/04/2022 14:28:37 - INFO - codeparrot_training - Skipping example with length 397 (seq_length=1024) +03/04/2022 14:28:42 - INFO - codeparrot_training - Step 21293: {'lr': 0.00047932681044207757, 'samples': 10902528, 'steps': 21293, 'loss/train': 1.9775080680847168} +03/04/2022 14:28:45 - INFO - codeparrot_training - Skipping example with length 98 (seq_length=1024) +03/04/2022 14:28:48 - INFO - codeparrot_training - Step 21294: {'lr': 0.0004793246973502029, 'samples': 10903040, 'steps': 21294, 'loss/train': 1.6194936037063599} +03/04/2022 14:28:51 - INFO - codeparrot_training - Step 21295: {'lr': 0.0004793225841549982, 'samples': 10903552, 'steps': 21295, 'loss/train': 0.8015040755271912} +03/04/2022 14:28:53 - INFO - codeparrot_training - Skipping example with length 739 (seq_length=1024) +03/04/2022 14:28:56 - INFO - codeparrot_training - Step 21296: {'lr': 0.00047932047085646416, 'samples': 10904064, 'steps': 21296, 'loss/train': 1.8374627828598022} +03/04/2022 14:28:59 - INFO - codeparrot_training - Step 21297: {'lr': 0.0004793183574546019, 'samples': 10904576, 'steps': 21297, 'loss/train': 0.8128302097320557} +03/04/2022 14:29:02 - INFO - codeparrot_training - Step 21298: {'lr': 0.0004793162439494123, 'samples': 10905088, 'steps': 21298, 'loss/train': 0.6968725919723511} +03/04/2022 14:29:03 - INFO - codeparrot_training - Skipping example with length 350 (seq_length=1024) +03/04/2022 14:29:08 - INFO - codeparrot_training - Step 21299: {'lr': 0.00047931413034089644, 'samples': 10905600, 'steps': 21299, 'loss/train': 1.9320310354232788} +03/04/2022 14:29:11 - INFO - codeparrot_training - Step 21300: {'lr': 0.00047931201662905503, 'samples': 10906112, 'steps': 21300, 'loss/train': 1.0564392805099487} +03/04/2022 14:29:11 - INFO - codeparrot_training - Skipping example with length 764 (seq_length=1024) +03/04/2022 14:29:16 - INFO - codeparrot_training - Step 21301: {'lr': 0.00047930990281388927, 'samples': 10906624, 'steps': 21301, 'loss/train': 1.7168023586273193} +03/04/2022 14:29:19 - INFO - codeparrot_training - Step 21302: {'lr': 0.00047930778889539996, 'samples': 10907136, 'steps': 21302, 'loss/train': 2.5230023860931396} +03/04/2022 14:29:19 - INFO - codeparrot_training - Skipping example with length 519 (seq_length=1024) +03/04/2022 14:29:25 - INFO - codeparrot_training - Step 21303: {'lr': 0.00047930567487358813, 'samples': 10907648, 'steps': 21303, 'loss/train': 2.559621572494507} +03/04/2022 14:29:28 - INFO - codeparrot_training - Step 21304: {'lr': 0.00047930356074845466, 'samples': 10908160, 'steps': 21304, 'loss/train': 1.103300929069519} +03/04/2022 14:29:29 - INFO - codeparrot_training - Skipping example with length 463 (seq_length=1024) +03/04/2022 14:29:33 - INFO - codeparrot_training - Step 21305: {'lr': 0.0004793014465200005, 'samples': 10908672, 'steps': 21305, 'loss/train': 1.5795378684997559} +03/04/2022 14:29:36 - INFO - codeparrot_training - Step 21306: {'lr': 0.0004792993321882267, 'samples': 10909184, 'steps': 21306, 'loss/train': 0.7476072311401367} +03/04/2022 14:29:37 - INFO - codeparrot_training - Skipping example with length 315 (seq_length=1024) +03/04/2022 14:29:42 - INFO - codeparrot_training - Step 21307: {'lr': 0.0004792972177531342, 'samples': 10909696, 'steps': 21307, 'loss/train': 3.5414369106292725} +03/04/2022 14:29:45 - INFO - codeparrot_training - Step 21308: {'lr': 0.0004792951032147239, 'samples': 10910208, 'steps': 21308, 'loss/train': 2.594623327255249} +03/04/2022 14:29:45 - INFO - codeparrot_training - Skipping example with length 234 (seq_length=1024) +03/04/2022 14:29:50 - INFO - codeparrot_training - Step 21309: {'lr': 0.00047929298857299677, 'samples': 10910720, 'steps': 21309, 'loss/train': 2.068553924560547} +03/04/2022 14:29:53 - INFO - codeparrot_training - Step 21310: {'lr': 0.00047929087382795374, 'samples': 10911232, 'steps': 21310, 'loss/train': 1.318542242050171} +03/04/2022 14:29:54 - INFO - codeparrot_training - Skipping example with length 127 (seq_length=1024) +03/04/2022 14:29:59 - INFO - codeparrot_training - Step 21311: {'lr': 0.0004792887589795957, 'samples': 10911744, 'steps': 21311, 'loss/train': 1.2751250267028809} +03/04/2022 14:30:02 - INFO - codeparrot_training - Step 21312: {'lr': 0.00047928664402792376, 'samples': 10912256, 'steps': 21312, 'loss/train': 1.4075323343276978} +03/04/2022 14:30:02 - INFO - codeparrot_training - Skipping example with length 58 (seq_length=1024) +03/04/2022 14:30:07 - INFO - codeparrot_training - Step 21313: {'lr': 0.0004792845289729388, 'samples': 10912768, 'steps': 21313, 'loss/train': 2.446117877960205} +03/04/2022 14:30:10 - INFO - codeparrot_training - Step 21314: {'lr': 0.00047928241381464177, 'samples': 10913280, 'steps': 21314, 'loss/train': 2.0164198875427246} +03/04/2022 14:30:11 - INFO - codeparrot_training - Skipping example with length 793 (seq_length=1024) +03/04/2022 14:30:16 - INFO - codeparrot_training - Step 21315: {'lr': 0.0004792802985530337, 'samples': 10913792, 'steps': 21315, 'loss/train': 1.168752670288086} +03/04/2022 14:30:19 - INFO - codeparrot_training - Step 21316: {'lr': 0.0004792781831881153, 'samples': 10914304, 'steps': 21316, 'loss/train': 1.9491513967514038} +03/04/2022 14:30:19 - INFO - codeparrot_training - Skipping example with length 107 (seq_length=1024) +03/04/2022 14:30:24 - INFO - codeparrot_training - Step 21317: {'lr': 0.0004792760677198878, 'samples': 10914816, 'steps': 21317, 'loss/train': 0.6838359236717224} +03/04/2022 14:30:27 - INFO - codeparrot_training - Step 21318: {'lr': 0.00047927395214835203, 'samples': 10915328, 'steps': 21318, 'loss/train': 1.6185662746429443} +03/04/2022 14:30:27 - INFO - codeparrot_training - Skipping example with length 72 (seq_length=1024) +03/04/2022 14:30:33 - INFO - codeparrot_training - Step 21319: {'lr': 0.0004792718364735089, 'samples': 10915840, 'steps': 21319, 'loss/train': 1.8563828468322754} +03/04/2022 14:30:36 - INFO - codeparrot_training - Step 21320: {'lr': 0.00047926972069535945, 'samples': 10916352, 'steps': 21320, 'loss/train': 2.3466405868530273} +03/04/2022 14:30:37 - INFO - codeparrot_training - Skipping example with length 751 (seq_length=1024) +03/04/2022 14:30:41 - INFO - codeparrot_training - Step 21321: {'lr': 0.00047926760481390465, 'samples': 10916864, 'steps': 21321, 'loss/train': 2.049877882003784} +03/04/2022 14:30:44 - INFO - codeparrot_training - Step 21322: {'lr': 0.00047926548882914533, 'samples': 10917376, 'steps': 21322, 'loss/train': 2.4038209915161133} +03/04/2022 14:30:45 - INFO - codeparrot_training - Skipping example with length 735 (seq_length=1024) +03/04/2022 14:30:49 - INFO - codeparrot_training - Step 21323: {'lr': 0.0004792633727410826, 'samples': 10917888, 'steps': 21323, 'loss/train': 1.2239258289337158} +03/04/2022 14:30:53 - INFO - codeparrot_training - Step 21324: {'lr': 0.0004792612565497172, 'samples': 10918400, 'steps': 21324, 'loss/train': 1.2301756143569946} +03/04/2022 14:30:53 - INFO - codeparrot_training - Skipping example with length 527 (seq_length=1024) +03/04/2022 14:30:58 - INFO - codeparrot_training - Step 21325: {'lr': 0.00047925914025505036, 'samples': 10918912, 'steps': 21325, 'loss/train': 2.311999797821045} +03/04/2022 14:31:01 - INFO - codeparrot_training - Step 21326: {'lr': 0.0004792570238570828, 'samples': 10919424, 'steps': 21326, 'loss/train': 2.101301431655884} +03/04/2022 14:31:02 - INFO - codeparrot_training - Skipping example with length 618 (seq_length=1024) +03/04/2022 14:31:06 - INFO - codeparrot_training - Step 21327: {'lr': 0.00047925490735581557, 'samples': 10919936, 'steps': 21327, 'loss/train': 2.495593309402466} +03/04/2022 14:31:10 - INFO - codeparrot_training - Step 21328: {'lr': 0.00047925279075124963, 'samples': 10920448, 'steps': 21328, 'loss/train': 1.4839733839035034} +03/04/2022 14:31:10 - INFO - codeparrot_training - Skipping example with length 734 (seq_length=1024) +03/04/2022 14:31:15 - INFO - codeparrot_training - Step 21329: {'lr': 0.00047925067404338596, 'samples': 10920960, 'steps': 21329, 'loss/train': 1.6059410572052002} +03/04/2022 14:31:18 - INFO - codeparrot_training - Step 21330: {'lr': 0.00047924855723222536, 'samples': 10921472, 'steps': 21330, 'loss/train': 1.5514646768569946} +03/04/2022 14:31:19 - INFO - codeparrot_training - Skipping example with length 783 (seq_length=1024) +03/04/2022 14:31:23 - INFO - codeparrot_training - Step 21331: {'lr': 0.000479246440317769, 'samples': 10921984, 'steps': 21331, 'loss/train': 1.4081480503082275} +03/04/2022 14:31:26 - INFO - codeparrot_training - Step 21332: {'lr': 0.00047924432330001776, 'samples': 10922496, 'steps': 21332, 'loss/train': 1.3725852966308594} +03/04/2022 14:31:27 - INFO - codeparrot_training - Skipping example with length 215 (seq_length=1024) +03/04/2022 14:31:32 - INFO - codeparrot_training - Step 21333: {'lr': 0.0004792422061789725, 'samples': 10923008, 'steps': 21333, 'loss/train': 1.1545569896697998} +03/04/2022 14:31:35 - INFO - codeparrot_training - Step 21334: {'lr': 0.0004792400889546342, 'samples': 10923520, 'steps': 21334, 'loss/train': 2.1848702430725098} +03/04/2022 14:31:35 - INFO - codeparrot_training - Skipping example with length 340 (seq_length=1024) +03/04/2022 14:31:40 - INFO - codeparrot_training - Step 21335: {'lr': 0.00047923797162700393, 'samples': 10924032, 'steps': 21335, 'loss/train': 2.1090304851531982} +03/04/2022 14:31:43 - INFO - codeparrot_training - Step 21336: {'lr': 0.0004792358541960826, 'samples': 10924544, 'steps': 21336, 'loss/train': 1.8369355201721191} +03/04/2022 14:31:44 - INFO - codeparrot_training - Skipping example with length 204 (seq_length=1024) +03/04/2022 14:31:49 - INFO - codeparrot_training - Step 21337: {'lr': 0.000479233736661871, 'samples': 10925056, 'steps': 21337, 'loss/train': 1.6815305948257446} +03/04/2022 14:31:52 - INFO - codeparrot_training - Step 21338: {'lr': 0.0004792316190243703, 'samples': 10925568, 'steps': 21338, 'loss/train': 1.9934661388397217} +03/04/2022 14:31:52 - INFO - codeparrot_training - Skipping example with length 444 (seq_length=1024) +03/04/2022 14:31:57 - INFO - codeparrot_training - Step 21339: {'lr': 0.0004792295012835814, 'samples': 10926080, 'steps': 21339, 'loss/train': 2.0236146450042725} +03/04/2022 14:32:00 - INFO - codeparrot_training - Step 21340: {'lr': 0.0004792273834395052, 'samples': 10926592, 'steps': 21340, 'loss/train': 2.7067954540252686} +03/04/2022 14:32:01 - INFO - codeparrot_training - Skipping example with length 989 (seq_length=1024) +03/04/2022 14:32:05 - INFO - codeparrot_training - Step 21341: {'lr': 0.0004792252654921426, 'samples': 10927104, 'steps': 21341, 'loss/train': 2.5637238025665283} +03/04/2022 14:32:08 - INFO - codeparrot_training - Step 21342: {'lr': 0.00047922314744149475, 'samples': 10927616, 'steps': 21342, 'loss/train': 0.33674442768096924} +03/04/2022 14:32:09 - INFO - codeparrot_training - Skipping example with length 849 (seq_length=1024) +03/04/2022 14:32:14 - INFO - codeparrot_training - Step 21343: {'lr': 0.0004792210292875624, 'samples': 10928128, 'steps': 21343, 'loss/train': 2.657834529876709} +03/04/2022 14:32:17 - INFO - codeparrot_training - Step 21344: {'lr': 0.00047921891103034665, 'samples': 10928640, 'steps': 21344, 'loss/train': 1.8752540349960327} +03/04/2022 14:32:17 - INFO - codeparrot_training - Skipping example with length 493 (seq_length=1024) +03/04/2022 14:32:22 - INFO - codeparrot_training - Step 21345: {'lr': 0.0004792167926698483, 'samples': 10929152, 'steps': 21345, 'loss/train': 2.227583885192871} +03/04/2022 14:32:25 - INFO - codeparrot_training - Step 21346: {'lr': 0.0004792146742060685, 'samples': 10929664, 'steps': 21346, 'loss/train': 2.0447771549224854} +03/04/2022 14:32:26 - INFO - codeparrot_training - Skipping example with length 68 (seq_length=1024) +03/04/2022 14:32:31 - INFO - codeparrot_training - Step 21347: {'lr': 0.00047921255563900813, 'samples': 10930176, 'steps': 21347, 'loss/train': 1.8645390272140503} +03/04/2022 14:32:34 - INFO - codeparrot_training - Step 21348: {'lr': 0.000479210436968668, 'samples': 10930688, 'steps': 21348, 'loss/train': 2.2230801582336426} +03/04/2022 14:32:34 - INFO - codeparrot_training - Skipping example with length 65 (seq_length=1024) +03/04/2022 14:32:39 - INFO - codeparrot_training - Step 21349: {'lr': 0.0004792083181950493, 'samples': 10931200, 'steps': 21349, 'loss/train': 2.3117294311523438} +03/04/2022 14:32:42 - INFO - codeparrot_training - Step 21350: {'lr': 0.0004792061993181528, 'samples': 10931712, 'steps': 21350, 'loss/train': 1.3179750442504883} +03/04/2022 14:32:43 - INFO - codeparrot_training - Skipping example with length 973 (seq_length=1024) +03/04/2022 14:32:47 - INFO - codeparrot_training - Step 21351: {'lr': 0.00047920408033797954, 'samples': 10932224, 'steps': 21351, 'loss/train': 1.6069706678390503} +03/04/2022 14:32:50 - INFO - codeparrot_training - Step 21352: {'lr': 0.0004792019612545304, 'samples': 10932736, 'steps': 21352, 'loss/train': 1.1786447763442993} +03/04/2022 14:32:51 - INFO - codeparrot_training - Skipping example with length 154 (seq_length=1024) +03/04/2022 14:32:56 - INFO - codeparrot_training - Step 21353: {'lr': 0.00047919984206780647, 'samples': 10933248, 'steps': 21353, 'loss/train': 2.8706212043762207} +03/04/2022 14:32:59 - INFO - codeparrot_training - Step 21354: {'lr': 0.0004791977227778086, 'samples': 10933760, 'steps': 21354, 'loss/train': 1.6270660161972046} +03/04/2022 14:33:00 - INFO - codeparrot_training - Skipping example with length 639 (seq_length=1024) +03/04/2022 14:33:04 - INFO - codeparrot_training - Step 21355: {'lr': 0.00047919560338453783, 'samples': 10934272, 'steps': 21355, 'loss/train': 1.8051972389221191} +03/04/2022 14:33:07 - INFO - codeparrot_training - Step 21356: {'lr': 0.000479193483887995, 'samples': 10934784, 'steps': 21356, 'loss/train': 1.9108983278274536} +03/04/2022 14:33:08 - INFO - codeparrot_training - Skipping example with length 720 (seq_length=1024) +03/04/2022 14:33:13 - INFO - codeparrot_training - Step 21357: {'lr': 0.0004791913642881811, 'samples': 10935296, 'steps': 21357, 'loss/train': 1.7846661806106567} +03/04/2022 14:33:16 - INFO - codeparrot_training - Step 21358: {'lr': 0.00047918924458509717, 'samples': 10935808, 'steps': 21358, 'loss/train': 1.5067073106765747} +03/04/2022 14:33:16 - INFO - codeparrot_training - Skipping example with length 765 (seq_length=1024) +03/04/2022 14:33:21 - INFO - codeparrot_training - Step 21359: {'lr': 0.00047918712477874404, 'samples': 10936320, 'steps': 21359, 'loss/train': 1.8304567337036133} +03/04/2022 14:33:24 - INFO - codeparrot_training - Step 21360: {'lr': 0.00047918500486912276, 'samples': 10936832, 'steps': 21360, 'loss/train': 1.9056445360183716} +03/04/2022 14:33:25 - INFO - codeparrot_training - Skipping example with length 818 (seq_length=1024) +03/04/2022 14:33:29 - INFO - codeparrot_training - Step 21361: {'lr': 0.00047918288485623427, 'samples': 10937344, 'steps': 21361, 'loss/train': 1.833178162574768} +03/04/2022 14:33:33 - INFO - codeparrot_training - Step 21362: {'lr': 0.0004791807647400795, 'samples': 10937856, 'steps': 21362, 'loss/train': 1.2298716306686401} +03/04/2022 14:33:33 - INFO - codeparrot_training - Skipping example with length 542 (seq_length=1024) +03/04/2022 14:33:38 - INFO - codeparrot_training - Step 21363: {'lr': 0.0004791786445206594, 'samples': 10938368, 'steps': 21363, 'loss/train': 1.4132949113845825} +03/04/2022 14:33:41 - INFO - codeparrot_training - Step 21364: {'lr': 0.00047917652419797495, 'samples': 10938880, 'steps': 21364, 'loss/train': 1.4455417394638062} +03/04/2022 14:33:41 - INFO - codeparrot_training - Skipping example with length 334 (seq_length=1024) +03/04/2022 14:33:46 - INFO - codeparrot_training - Step 21365: {'lr': 0.0004791744037720271, 'samples': 10939392, 'steps': 21365, 'loss/train': 6.877840518951416} +03/04/2022 14:33:50 - INFO - codeparrot_training - Step 21366: {'lr': 0.00047917228324281683, 'samples': 10939904, 'steps': 21366, 'loss/train': 2.338094711303711} +03/04/2022 14:33:50 - INFO - codeparrot_training - Skipping example with length 316 (seq_length=1024) +03/04/2022 14:33:55 - INFO - codeparrot_training - Step 21367: {'lr': 0.00047917016261034496, 'samples': 10940416, 'steps': 21367, 'loss/train': 0.27147966623306274} +03/04/2022 14:33:58 - INFO - codeparrot_training - Step 21368: {'lr': 0.0004791680418746126, 'samples': 10940928, 'steps': 21368, 'loss/train': 1.8768056631088257} +03/04/2022 14:33:59 - INFO - codeparrot_training - Skipping example with length 644 (seq_length=1024) +03/04/2022 14:34:03 - INFO - codeparrot_training - Step 21369: {'lr': 0.00047916592103562075, 'samples': 10941440, 'steps': 21369, 'loss/train': 1.8415457010269165} +03/04/2022 14:34:06 - INFO - codeparrot_training - Step 21370: {'lr': 0.00047916380009337014, 'samples': 10941952, 'steps': 21370, 'loss/train': 1.7826203107833862} +03/04/2022 14:34:07 - INFO - codeparrot_training - Skipping example with length 738 (seq_length=1024) +03/04/2022 14:34:12 - INFO - codeparrot_training - Step 21371: {'lr': 0.0004791616790478619, 'samples': 10942464, 'steps': 21371, 'loss/train': 1.73987877368927} +03/04/2022 14:34:15 - INFO - codeparrot_training - Step 21372: {'lr': 0.000479159557899097, 'samples': 10942976, 'steps': 21372, 'loss/train': 2.4192821979522705} +03/04/2022 14:34:17 - INFO - codeparrot_training - Skipping example with length 825 (seq_length=1024) +03/04/2022 14:34:21 - INFO - codeparrot_training - Step 21373: {'lr': 0.00047915743664707626, 'samples': 10943488, 'steps': 21373, 'loss/train': 6.676584243774414} +03/04/2022 14:34:24 - INFO - codeparrot_training - Step 21374: {'lr': 0.0004791553152918008, 'samples': 10944000, 'steps': 21374, 'loss/train': 2.3663370609283447} +03/04/2022 14:34:26 - INFO - codeparrot_training - Skipping example with length 903 (seq_length=1024) +03/04/2022 14:34:29 - INFO - codeparrot_training - Step 21375: {'lr': 0.0004791531938332714, 'samples': 10944512, 'steps': 21375, 'loss/train': 2.0791358947753906} +03/04/2022 14:34:32 - INFO - codeparrot_training - Step 21376: {'lr': 0.0004791510722714891, 'samples': 10945024, 'steps': 21376, 'loss/train': 1.7046338319778442} +03/04/2022 14:34:35 - INFO - codeparrot_training - Skipping example with length 374 (seq_length=1024) +03/04/2022 14:34:38 - INFO - codeparrot_training - Step 21377: {'lr': 0.000479148950606455, 'samples': 10945536, 'steps': 21377, 'loss/train': 1.1815587282180786} +03/04/2022 14:34:41 - INFO - codeparrot_training - Step 21378: {'lr': 0.00047914682883816977, 'samples': 10946048, 'steps': 21378, 'loss/train': 2.4111080169677734} +03/04/2022 14:34:43 - INFO - codeparrot_training - Skipping example with length 407 (seq_length=1024) +03/04/2022 14:34:46 - INFO - codeparrot_training - Step 21379: {'lr': 0.00047914470696663457, 'samples': 10946560, 'steps': 21379, 'loss/train': 1.0533455610275269} +03/04/2022 14:34:49 - INFO - codeparrot_training - Step 21380: {'lr': 0.00047914258499185037, 'samples': 10947072, 'steps': 21380, 'loss/train': 1.893118143081665} +03/04/2022 14:34:52 - INFO - codeparrot_training - Skipping example with length 303 (seq_length=1024) +03/04/2022 14:34:54 - INFO - codeparrot_training - Step 21381: {'lr': 0.000479140462913818, 'samples': 10947584, 'steps': 21381, 'loss/train': 1.881816029548645} +03/04/2022 14:34:57 - INFO - codeparrot_training - Step 21382: {'lr': 0.0004791383407325384, 'samples': 10948096, 'steps': 21382, 'loss/train': 1.0542922019958496} +03/04/2022 14:35:00 - INFO - codeparrot_training - Skipping example with length 108 (seq_length=1024) +03/04/2022 14:35:03 - INFO - codeparrot_training - Step 21383: {'lr': 0.0004791362184480127, 'samples': 10948608, 'steps': 21383, 'loss/train': 2.705415964126587} +03/04/2022 14:35:06 - INFO - codeparrot_training - Step 21384: {'lr': 0.0004791340960602417, 'samples': 10949120, 'steps': 21384, 'loss/train': 1.5238046646118164} +03/04/2022 14:35:09 - INFO - codeparrot_training - Skipping example with length 155 (seq_length=1024) +03/04/2022 14:35:12 - INFO - codeparrot_training - Step 21385: {'lr': 0.0004791319735692264, 'samples': 10949632, 'steps': 21385, 'loss/train': 1.2581837177276611} +03/04/2022 14:35:15 - INFO - codeparrot_training - Step 21386: {'lr': 0.00047912985097496786, 'samples': 10950144, 'steps': 21386, 'loss/train': 1.4499257802963257} +03/04/2022 14:35:18 - INFO - codeparrot_training - Skipping example with length 252 (seq_length=1024) +03/04/2022 14:35:20 - INFO - codeparrot_training - Step 21387: {'lr': 0.00047912772827746685, 'samples': 10950656, 'steps': 21387, 'loss/train': 0.7538501024246216} +03/04/2022 14:35:23 - INFO - codeparrot_training - Step 21388: {'lr': 0.00047912560547672453, 'samples': 10951168, 'steps': 21388, 'loss/train': 2.9867985248565674} +03/04/2022 14:35:26 - INFO - codeparrot_training - Skipping example with length 516 (seq_length=1024) +03/04/2022 14:35:28 - INFO - codeparrot_training - Step 21389: {'lr': 0.0004791234825727416, 'samples': 10951680, 'steps': 21389, 'loss/train': 1.7869586944580078} +03/04/2022 14:35:32 - INFO - codeparrot_training - Step 21390: {'lr': 0.0004791213595655193, 'samples': 10952192, 'steps': 21390, 'loss/train': 1.676133155822754} +03/04/2022 14:35:34 - INFO - codeparrot_training - Skipping example with length 216 (seq_length=1024) +03/04/2022 14:35:37 - INFO - codeparrot_training - Step 21391: {'lr': 0.0004791192364550584, 'samples': 10952704, 'steps': 21391, 'loss/train': 2.747164726257324} +03/04/2022 14:35:40 - INFO - codeparrot_training - Step 21392: {'lr': 0.00047911711324135985, 'samples': 10953216, 'steps': 21392, 'loss/train': 2.1707706451416016} +03/04/2022 14:35:43 - INFO - codeparrot_training - Skipping example with length 670 (seq_length=1024) +03/04/2022 14:35:45 - INFO - codeparrot_training - Step 21393: {'lr': 0.00047911498992442476, 'samples': 10953728, 'steps': 21393, 'loss/train': 2.0733323097229004} +03/04/2022 14:35:49 - INFO - codeparrot_training - Step 21394: {'lr': 0.0004791128665042539, 'samples': 10954240, 'steps': 21394, 'loss/train': 1.882938265800476} +03/04/2022 14:35:51 - INFO - codeparrot_training - Skipping example with length 8 (seq_length=1024) +03/04/2022 14:35:54 - INFO - codeparrot_training - Step 21395: {'lr': 0.0004791107429808484, 'samples': 10954752, 'steps': 21395, 'loss/train': 1.7375019788742065} +03/04/2022 14:35:57 - INFO - codeparrot_training - Step 21396: {'lr': 0.00047910861935420915, 'samples': 10955264, 'steps': 21396, 'loss/train': 2.0429680347442627} +03/04/2022 14:36:00 - INFO - codeparrot_training - Skipping example with length 274 (seq_length=1024) +03/04/2022 14:36:02 - INFO - codeparrot_training - Step 21397: {'lr': 0.00047910649562433696, 'samples': 10955776, 'steps': 21397, 'loss/train': 1.4532302618026733} +03/04/2022 14:36:05 - INFO - codeparrot_training - Step 21398: {'lr': 0.000479104371791233, 'samples': 10956288, 'steps': 21398, 'loss/train': 2.2226054668426514} +03/04/2022 14:36:08 - INFO - codeparrot_training - Skipping example with length 795 (seq_length=1024) +03/04/2022 14:36:11 - INFO - codeparrot_training - Step 21399: {'lr': 0.0004791022478548982, 'samples': 10956800, 'steps': 21399, 'loss/train': 1.7070503234863281} +03/04/2022 14:36:14 - INFO - codeparrot_training - Step 21400: {'lr': 0.0004791001238153334, 'samples': 10957312, 'steps': 21400, 'loss/train': 1.6513333320617676} +03/04/2022 14:36:17 - INFO - codeparrot_training - Skipping example with length 584 (seq_length=1024) +03/04/2022 14:36:19 - INFO - codeparrot_training - Step 21401: {'lr': 0.00047909799967253957, 'samples': 10957824, 'steps': 21401, 'loss/train': 1.1307201385498047} +03/04/2022 14:36:22 - INFO - codeparrot_training - Step 21402: {'lr': 0.00047909587542651776, 'samples': 10958336, 'steps': 21402, 'loss/train': 0.2515629231929779} +03/04/2022 14:36:25 - INFO - codeparrot_training - Skipping example with length 663 (seq_length=1024) +03/04/2022 14:36:28 - INFO - codeparrot_training - Step 21403: {'lr': 0.00047909375107726894, 'samples': 10958848, 'steps': 21403, 'loss/train': 1.240584135055542} +03/04/2022 14:36:31 - INFO - codeparrot_training - Step 21404: {'lr': 0.000479091626624794, 'samples': 10959360, 'steps': 21404, 'loss/train': 2.0698139667510986} +03/04/2022 14:36:33 - INFO - codeparrot_training - Skipping example with length 451 (seq_length=1024) +03/04/2022 14:36:36 - INFO - codeparrot_training - Step 21405: {'lr': 0.00047908950206909385, 'samples': 10959872, 'steps': 21405, 'loss/train': 2.3667664527893066} +03/04/2022 14:36:39 - INFO - codeparrot_training - Step 21406: {'lr': 0.0004790873774101695, 'samples': 10960384, 'steps': 21406, 'loss/train': 0.7792904376983643} +03/04/2022 14:36:42 - INFO - codeparrot_training - Skipping example with length 352 (seq_length=1024) +03/04/2022 14:36:44 - INFO - codeparrot_training - Step 21407: {'lr': 0.00047908525264802194, 'samples': 10960896, 'steps': 21407, 'loss/train': 1.5116703510284424} +03/04/2022 14:36:48 - INFO - codeparrot_training - Step 21408: {'lr': 0.00047908312778265213, 'samples': 10961408, 'steps': 21408, 'loss/train': 1.9496053457260132} +03/04/2022 14:36:50 - INFO - codeparrot_training - Skipping example with length 37 (seq_length=1024) +03/04/2022 14:36:53 - INFO - codeparrot_training - Step 21409: {'lr': 0.00047908100281406096, 'samples': 10961920, 'steps': 21409, 'loss/train': 1.6290923357009888} +03/04/2022 14:36:56 - INFO - codeparrot_training - Step 21410: {'lr': 0.00047907887774224946, 'samples': 10962432, 'steps': 21410, 'loss/train': 1.5754854679107666} +03/04/2022 14:36:59 - INFO - codeparrot_training - Skipping example with length 812 (seq_length=1024) +03/04/2022 14:37:01 - INFO - codeparrot_training - Step 21411: {'lr': 0.0004790767525672185, 'samples': 10962944, 'steps': 21411, 'loss/train': 1.5278072357177734} +03/04/2022 14:37:05 - INFO - codeparrot_training - Step 21412: {'lr': 0.0004790746272889691, 'samples': 10963456, 'steps': 21412, 'loss/train': 1.3580609560012817} +03/04/2022 14:37:07 - INFO - codeparrot_training - Skipping example with length 476 (seq_length=1024) +03/04/2022 14:37:10 - INFO - codeparrot_training - Step 21413: {'lr': 0.00047907250190750225, 'samples': 10963968, 'steps': 21413, 'loss/train': 0.5422234535217285} +03/04/2022 14:37:13 - INFO - codeparrot_training - Step 21414: {'lr': 0.0004790703764228188, 'samples': 10964480, 'steps': 21414, 'loss/train': 1.479291558265686} +03/04/2022 14:37:15 - INFO - codeparrot_training - Skipping example with length 335 (seq_length=1024) +03/04/2022 14:37:18 - INFO - codeparrot_training - Step 21415: {'lr': 0.0004790682508349198, 'samples': 10964992, 'steps': 21415, 'loss/train': 1.4466280937194824} +03/04/2022 14:37:21 - INFO - codeparrot_training - Step 21416: {'lr': 0.00047906612514380623, 'samples': 10965504, 'steps': 21416, 'loss/train': 1.9544333219528198} +03/04/2022 14:37:24 - INFO - codeparrot_training - Skipping example with length 667 (seq_length=1024) +03/04/2022 14:37:27 - INFO - codeparrot_training - Step 21417: {'lr': 0.000479063999349479, 'samples': 10966016, 'steps': 21417, 'loss/train': 1.6393282413482666} +03/04/2022 14:37:30 - INFO - codeparrot_training - Step 21418: {'lr': 0.00047906187345193895, 'samples': 10966528, 'steps': 21418, 'loss/train': 2.806004047393799} +03/04/2022 14:37:33 - INFO - codeparrot_training - Skipping example with length 502 (seq_length=1024) +03/04/2022 14:37:35 - INFO - codeparrot_training - Step 21419: {'lr': 0.0004790597474511873, 'samples': 10967040, 'steps': 21419, 'loss/train': 1.5740598440170288} +03/04/2022 14:37:38 - INFO - codeparrot_training - Step 21420: {'lr': 0.0004790576213472248, 'samples': 10967552, 'steps': 21420, 'loss/train': 1.1114505529403687} +03/04/2022 14:37:42 - INFO - codeparrot_training - Step 21421: {'lr': 0.0004790554951400524, 'samples': 10968064, 'steps': 21421, 'loss/train': 2.140463352203369} +03/04/2022 14:37:42 - INFO - codeparrot_training - Skipping example with length 840 (seq_length=1024) +03/04/2022 14:37:47 - INFO - codeparrot_training - Step 21422: {'lr': 0.0004790533688296712, 'samples': 10968576, 'steps': 21422, 'loss/train': 1.3225126266479492} +03/04/2022 14:37:50 - INFO - codeparrot_training - Step 21423: {'lr': 0.0004790512424160821, 'samples': 10969088, 'steps': 21423, 'loss/train': 1.3168799877166748} +03/04/2022 14:37:50 - INFO - codeparrot_training - Skipping example with length 595 (seq_length=1024) +03/04/2022 14:37:55 - INFO - codeparrot_training - Step 21424: {'lr': 0.00047904911589928605, 'samples': 10969600, 'steps': 21424, 'loss/train': 1.9538557529449463} +03/04/2022 14:37:58 - INFO - codeparrot_training - Step 21425: {'lr': 0.00047904698927928404, 'samples': 10970112, 'steps': 21425, 'loss/train': 2.121349334716797} +03/04/2022 14:37:58 - INFO - codeparrot_training - Skipping example with length 764 (seq_length=1024) +03/04/2022 14:38:04 - INFO - codeparrot_training - Step 21426: {'lr': 0.0004790448625560769, 'samples': 10970624, 'steps': 21426, 'loss/train': 1.1823254823684692} +03/04/2022 14:38:07 - INFO - codeparrot_training - Step 21427: {'lr': 0.0004790427357296657, 'samples': 10971136, 'steps': 21427, 'loss/train': 1.8716572523117065} +03/04/2022 14:38:07 - INFO - codeparrot_training - Skipping example with length 769 (seq_length=1024) +03/04/2022 14:38:12 - INFO - codeparrot_training - Step 21428: {'lr': 0.0004790406088000514, 'samples': 10971648, 'steps': 21428, 'loss/train': 2.1148195266723633} +03/04/2022 14:38:15 - INFO - codeparrot_training - Step 21429: {'lr': 0.00047903848176723493, 'samples': 10972160, 'steps': 21429, 'loss/train': 3.000239372253418} +03/04/2022 14:38:15 - INFO - codeparrot_training - Skipping example with length 943 (seq_length=1024) +03/04/2022 14:38:21 - INFO - codeparrot_training - Step 21430: {'lr': 0.0004790363546312172, 'samples': 10972672, 'steps': 21430, 'loss/train': 1.7237545251846313} +03/04/2022 14:38:24 - INFO - codeparrot_training - Step 21431: {'lr': 0.0004790342273919993, 'samples': 10973184, 'steps': 21431, 'loss/train': 2.141228675842285} +03/04/2022 14:38:24 - INFO - codeparrot_training - Skipping example with length 710 (seq_length=1024) +03/04/2022 14:38:29 - INFO - codeparrot_training - Step 21432: {'lr': 0.00047903210004958207, 'samples': 10973696, 'steps': 21432, 'loss/train': 2.4902491569519043} +03/04/2022 14:38:32 - INFO - codeparrot_training - Step 21433: {'lr': 0.0004790299726039665, 'samples': 10974208, 'steps': 21433, 'loss/train': 2.3511226177215576} +03/04/2022 14:38:32 - INFO - codeparrot_training - Skipping example with length 253 (seq_length=1024) +03/04/2022 14:38:37 - INFO - codeparrot_training - Step 21434: {'lr': 0.0004790278450551536, 'samples': 10974720, 'steps': 21434, 'loss/train': 1.6275179386138916} +03/04/2022 14:38:40 - INFO - codeparrot_training - Step 21435: {'lr': 0.00047902571740314427, 'samples': 10975232, 'steps': 21435, 'loss/train': 1.6710506677627563} +03/04/2022 14:38:41 - INFO - codeparrot_training - Skipping example with length 585 (seq_length=1024) +03/04/2022 14:38:46 - INFO - codeparrot_training - Step 21436: {'lr': 0.00047902358964793944, 'samples': 10975744, 'steps': 21436, 'loss/train': 2.1068241596221924} +03/04/2022 14:38:49 - INFO - codeparrot_training - Step 21437: {'lr': 0.0004790214617895402, 'samples': 10976256, 'steps': 21437, 'loss/train': 2.9839892387390137} +03/04/2022 14:38:49 - INFO - codeparrot_training - Skipping example with length 22 (seq_length=1024) +03/04/2022 14:38:54 - INFO - codeparrot_training - Step 21438: {'lr': 0.0004790193338279474, 'samples': 10976768, 'steps': 21438, 'loss/train': 1.7487232685089111} +03/04/2022 14:38:58 - INFO - codeparrot_training - Step 21439: {'lr': 0.000479017205763162, 'samples': 10977280, 'steps': 21439, 'loss/train': 1.7582899332046509} +03/04/2022 14:38:58 - INFO - codeparrot_training - Skipping example with length 695 (seq_length=1024) +03/04/2022 14:39:03 - INFO - codeparrot_training - Step 21440: {'lr': 0.000479015077595185, 'samples': 10977792, 'steps': 21440, 'loss/train': 1.2180627584457397} +03/04/2022 14:39:06 - INFO - codeparrot_training - Step 21441: {'lr': 0.0004790129493240173, 'samples': 10978304, 'steps': 21441, 'loss/train': 2.4189274311065674} +03/04/2022 14:39:06 - INFO - codeparrot_training - Skipping example with length 57 (seq_length=1024) +03/04/2022 14:39:11 - INFO - codeparrot_training - Step 21442: {'lr': 0.0004790108209496599, 'samples': 10978816, 'steps': 21442, 'loss/train': 1.5742186307907104} +03/04/2022 14:39:14 - INFO - codeparrot_training - Step 21443: {'lr': 0.00047900869247211384, 'samples': 10979328, 'steps': 21443, 'loss/train': 1.613582730293274} +03/04/2022 14:39:15 - INFO - codeparrot_training - Skipping example with length 439 (seq_length=1024) +03/04/2022 14:39:20 - INFO - codeparrot_training - Step 21444: {'lr': 0.0004790065638913799, 'samples': 10979840, 'steps': 21444, 'loss/train': 2.002614974975586} +03/04/2022 14:39:23 - INFO - codeparrot_training - Step 21445: {'lr': 0.00047900443520745915, 'samples': 10980352, 'steps': 21445, 'loss/train': 1.3103599548339844} +03/04/2022 14:39:23 - INFO - codeparrot_training - Skipping example with length 902 (seq_length=1024) +03/04/2022 14:39:28 - INFO - codeparrot_training - Step 21446: {'lr': 0.0004790023064203526, 'samples': 10980864, 'steps': 21446, 'loss/train': 1.1245958805084229} +03/04/2022 14:39:31 - INFO - codeparrot_training - Step 21447: {'lr': 0.00047900017753006106, 'samples': 10981376, 'steps': 21447, 'loss/train': 1.8358019590377808} +03/04/2022 14:39:32 - INFO - codeparrot_training - Skipping example with length 902 (seq_length=1024) +03/04/2022 14:39:37 - INFO - codeparrot_training - Step 21448: {'lr': 0.0004789980485365857, 'samples': 10981888, 'steps': 21448, 'loss/train': 1.849956750869751} +03/04/2022 14:39:40 - INFO - codeparrot_training - Step 21449: {'lr': 0.00047899591943992726, 'samples': 10982400, 'steps': 21449, 'loss/train': 1.408422827720642} +03/04/2022 14:39:40 - INFO - codeparrot_training - Skipping example with length 944 (seq_length=1024) +03/04/2022 14:39:45 - INFO - codeparrot_training - Step 21450: {'lr': 0.0004789937902400868, 'samples': 10982912, 'steps': 21450, 'loss/train': 1.7223128080368042} +03/04/2022 14:39:48 - INFO - codeparrot_training - Step 21451: {'lr': 0.00047899166093706523, 'samples': 10983424, 'steps': 21451, 'loss/train': 2.3465030193328857} +03/04/2022 14:39:48 - INFO - codeparrot_training - Skipping example with length 953 (seq_length=1024) +03/04/2022 14:39:53 - INFO - codeparrot_training - Step 21452: {'lr': 0.0004789895315308636, 'samples': 10983936, 'steps': 21452, 'loss/train': 0.9675999283790588} +03/04/2022 14:39:56 - INFO - codeparrot_training - Step 21453: {'lr': 0.00047898740202148284, 'samples': 10984448, 'steps': 21453, 'loss/train': 2.007962942123413} +03/04/2022 14:39:56 - INFO - codeparrot_training - Skipping example with length 47 (seq_length=1024) +03/04/2022 14:40:02 - INFO - codeparrot_training - Step 21454: {'lr': 0.0004789852724089239, 'samples': 10984960, 'steps': 21454, 'loss/train': 1.3526530265808105} +03/04/2022 14:40:05 - INFO - codeparrot_training - Step 21455: {'lr': 0.00047898314269318766, 'samples': 10985472, 'steps': 21455, 'loss/train': 1.8304344415664673} +03/04/2022 14:40:05 - INFO - codeparrot_training - Skipping example with length 358 (seq_length=1024) +03/04/2022 14:40:10 - INFO - codeparrot_training - Step 21456: {'lr': 0.00047898101287427523, 'samples': 10985984, 'steps': 21456, 'loss/train': 2.2407093048095703} +03/04/2022 14:40:13 - INFO - codeparrot_training - Skipping example with length 181 (seq_length=1024) +03/04/2022 14:40:15 - INFO - codeparrot_training - Step 21457: {'lr': 0.0004789788829521874, 'samples': 10986496, 'steps': 21457, 'loss/train': 1.9300391674041748} +03/04/2022 14:40:19 - INFO - codeparrot_training - Step 21458: {'lr': 0.0004789767529269253, 'samples': 10987008, 'steps': 21458, 'loss/train': 1.9247490167617798} +03/04/2022 14:40:21 - INFO - codeparrot_training - Skipping example with length 454 (seq_length=1024) +03/04/2022 14:40:24 - INFO - codeparrot_training - Step 21459: {'lr': 0.0004789746227984897, 'samples': 10987520, 'steps': 21459, 'loss/train': 2.1624252796173096} +03/04/2022 14:40:27 - INFO - codeparrot_training - Step 21460: {'lr': 0.0004789724925668818, 'samples': 10988032, 'steps': 21460, 'loss/train': 2.142587184906006} +03/04/2022 14:40:30 - INFO - codeparrot_training - Skipping example with length 980 (seq_length=1024) +03/04/2022 14:40:32 - INFO - codeparrot_training - Step 21461: {'lr': 0.00047897036223210234, 'samples': 10988544, 'steps': 21461, 'loss/train': 2.19892954826355} +03/04/2022 14:40:36 - INFO - codeparrot_training - Step 21462: {'lr': 0.00047896823179415237, 'samples': 10989056, 'steps': 21462, 'loss/train': 2.5395398139953613} +03/04/2022 14:40:38 - INFO - codeparrot_training - Skipping example with length 473 (seq_length=1024) +03/04/2022 14:40:41 - INFO - codeparrot_training - Step 21463: {'lr': 0.0004789661012530329, 'samples': 10989568, 'steps': 21463, 'loss/train': 1.254885196685791} +03/04/2022 14:40:44 - INFO - codeparrot_training - Step 21464: {'lr': 0.00047896397060874485, 'samples': 10990080, 'steps': 21464, 'loss/train': 1.7813750505447388} +03/04/2022 14:40:47 - INFO - codeparrot_training - Skipping example with length 379 (seq_length=1024) +03/04/2022 14:40:49 - INFO - codeparrot_training - Step 21465: {'lr': 0.0004789618398612891, 'samples': 10990592, 'steps': 21465, 'loss/train': 1.9561644792556763} +03/04/2022 14:40:52 - INFO - codeparrot_training - Step 21466: {'lr': 0.0004789597090106667, 'samples': 10991104, 'steps': 21466, 'loss/train': 2.4381911754608154} +03/04/2022 14:40:55 - INFO - codeparrot_training - Skipping example with length 705 (seq_length=1024) +03/04/2022 14:40:58 - INFO - codeparrot_training - Step 21467: {'lr': 0.00047895757805687864, 'samples': 10991616, 'steps': 21467, 'loss/train': 1.7176200151443481} +03/04/2022 14:41:01 - INFO - codeparrot_training - Step 21468: {'lr': 0.0004789554469999258, 'samples': 10992128, 'steps': 21468, 'loss/train': 1.1600180864334106} +03/04/2022 14:41:03 - INFO - codeparrot_training - Skipping example with length 480 (seq_length=1024) +03/04/2022 14:41:06 - INFO - codeparrot_training - Step 21469: {'lr': 0.0004789533158398091, 'samples': 10992640, 'steps': 21469, 'loss/train': 1.734182357788086} +03/04/2022 14:41:09 - INFO - codeparrot_training - Step 21470: {'lr': 0.00047895118457652965, 'samples': 10993152, 'steps': 21470, 'loss/train': 2.157457113265991} +03/04/2022 14:41:11 - INFO - codeparrot_training - Skipping example with length 923 (seq_length=1024) +03/04/2022 14:41:15 - INFO - codeparrot_training - Step 21471: {'lr': 0.0004789490532100883, 'samples': 10993664, 'steps': 21471, 'loss/train': 1.8903000354766846} +03/04/2022 14:41:18 - INFO - codeparrot_training - Step 21472: {'lr': 0.000478946921740486, 'samples': 10994176, 'steps': 21472, 'loss/train': 1.1657543182373047} +03/04/2022 14:41:20 - INFO - codeparrot_training - Skipping example with length 903 (seq_length=1024) +03/04/2022 14:41:23 - INFO - codeparrot_training - Step 21473: {'lr': 0.0004789447901677238, 'samples': 10994688, 'steps': 21473, 'loss/train': 2.1841516494750977} +03/04/2022 14:41:26 - INFO - codeparrot_training - Step 21474: {'lr': 0.00047894265849180264, 'samples': 10995200, 'steps': 21474, 'loss/train': 1.8514823913574219} +03/04/2022 14:41:28 - INFO - codeparrot_training - Skipping example with length 262 (seq_length=1024) +03/04/2022 14:41:31 - INFO - codeparrot_training - Step 21475: {'lr': 0.00047894052671272337, 'samples': 10995712, 'steps': 21475, 'loss/train': 1.860870599746704} +03/04/2022 14:41:35 - INFO - codeparrot_training - Step 21476: {'lr': 0.0004789383948304871, 'samples': 10996224, 'steps': 21476, 'loss/train': 2.0299603939056396} +03/04/2022 14:41:37 - INFO - codeparrot_training - Skipping example with length 964 (seq_length=1024) +03/04/2022 14:41:40 - INFO - codeparrot_training - Step 21477: {'lr': 0.00047893626284509466, 'samples': 10996736, 'steps': 21477, 'loss/train': 2.009037971496582} +03/04/2022 14:41:43 - INFO - codeparrot_training - Step 21478: {'lr': 0.0004789341307565471, 'samples': 10997248, 'steps': 21478, 'loss/train': 2.1882731914520264} +03/04/2022 14:41:45 - INFO - codeparrot_training - Skipping example with length 221 (seq_length=1024) +03/04/2022 14:41:48 - INFO - codeparrot_training - Step 21479: {'lr': 0.0004789319985648454, 'samples': 10997760, 'steps': 21479, 'loss/train': 1.9371702671051025} +03/04/2022 14:41:51 - INFO - codeparrot_training - Step 21480: {'lr': 0.0004789298662699905, 'samples': 10998272, 'steps': 21480, 'loss/train': 1.4941586256027222} +03/04/2022 14:41:54 - INFO - codeparrot_training - Skipping example with length 197 (seq_length=1024) +03/04/2022 14:41:57 - INFO - codeparrot_training - Step 21481: {'lr': 0.0004789277338719832, 'samples': 10998784, 'steps': 21481, 'loss/train': 0.9644403457641602} +03/04/2022 14:42:00 - INFO - codeparrot_training - Step 21482: {'lr': 0.0004789256013708246, 'samples': 10999296, 'steps': 21482, 'loss/train': 1.8315128087997437} +03/04/2022 14:42:03 - INFO - codeparrot_training - Skipping example with length 776 (seq_length=1024) +03/04/2022 14:42:05 - INFO - codeparrot_training - Step 21483: {'lr': 0.0004789234687665158, 'samples': 10999808, 'steps': 21483, 'loss/train': 1.4685949087142944} +03/04/2022 14:42:08 - INFO - codeparrot_training - Step 21484: {'lr': 0.0004789213360590575, 'samples': 11000320, 'steps': 21484, 'loss/train': 2.1373486518859863} +03/04/2022 14:42:11 - INFO - codeparrot_training - Skipping example with length 512 (seq_length=1024) +03/04/2022 14:42:14 - INFO - codeparrot_training - Step 21485: {'lr': 0.00047891920324845085, 'samples': 11000832, 'steps': 21485, 'loss/train': 1.3258105516433716} +03/04/2022 14:42:17 - INFO - codeparrot_training - Step 21486: {'lr': 0.00047891707033469665, 'samples': 11001344, 'steps': 21486, 'loss/train': 2.312465190887451} +03/04/2022 14:42:20 - INFO - codeparrot_training - Skipping example with length 816 (seq_length=1024) +03/04/2022 14:42:22 - INFO - codeparrot_training - Step 21487: {'lr': 0.00047891493731779607, 'samples': 11001856, 'steps': 21487, 'loss/train': 1.0944656133651733} +03/04/2022 14:42:25 - INFO - codeparrot_training - Step 21488: {'lr': 0.00047891280419774985, 'samples': 11002368, 'steps': 21488, 'loss/train': 2.453826427459717} +03/04/2022 14:42:28 - INFO - codeparrot_training - Skipping example with length 303 (seq_length=1024) +03/04/2022 14:42:31 - INFO - codeparrot_training - Step 21489: {'lr': 0.0004789106709745591, 'samples': 11002880, 'steps': 21489, 'loss/train': 1.8672728538513184} +03/04/2022 14:42:34 - INFO - codeparrot_training - Step 21490: {'lr': 0.0004789085376482247, 'samples': 11003392, 'steps': 21490, 'loss/train': 1.3548579216003418} +03/04/2022 14:42:36 - INFO - codeparrot_training - Skipping example with length 359 (seq_length=1024) +03/04/2022 14:42:39 - INFO - codeparrot_training - Step 21491: {'lr': 0.00047890640421874775, 'samples': 11003904, 'steps': 21491, 'loss/train': 1.7006772756576538} +03/04/2022 14:42:42 - INFO - codeparrot_training - Step 21492: {'lr': 0.000478904270686129, 'samples': 11004416, 'steps': 21492, 'loss/train': 1.7065300941467285} +03/04/2022 14:42:45 - INFO - codeparrot_training - Step 21493: {'lr': 0.00047890213705036955, 'samples': 11004928, 'steps': 21493, 'loss/train': 2.139211893081665} +03/04/2022 14:42:45 - INFO - codeparrot_training - Skipping example with length 622 (seq_length=1024) +03/04/2022 14:42:51 - INFO - codeparrot_training - Step 21494: {'lr': 0.00047890000331147033, 'samples': 11005440, 'steps': 21494, 'loss/train': 2.007594347000122} +03/04/2022 14:42:54 - INFO - codeparrot_training - Step 21495: {'lr': 0.0004788978694694323, 'samples': 11005952, 'steps': 21495, 'loss/train': 1.5440399646759033} +03/04/2022 14:42:54 - INFO - codeparrot_training - Skipping example with length 418 (seq_length=1024) +03/04/2022 14:42:59 - INFO - codeparrot_training - Step 21496: {'lr': 0.0004788957355242564, 'samples': 11006464, 'steps': 21496, 'loss/train': 2.0802841186523438} +03/04/2022 14:43:02 - INFO - codeparrot_training - Skipping example with length 988 (seq_length=1024) +03/04/2022 14:43:04 - INFO - codeparrot_training - Step 21497: {'lr': 0.00047889360147594363, 'samples': 11006976, 'steps': 21497, 'loss/train': 1.7139008045196533} +03/04/2022 14:43:08 - INFO - codeparrot_training - Step 21498: {'lr': 0.00047889146732449497, 'samples': 11007488, 'steps': 21498, 'loss/train': 1.6637988090515137} +03/04/2022 14:43:10 - INFO - codeparrot_training - Skipping example with length 751 (seq_length=1024) +03/04/2022 14:43:13 - INFO - codeparrot_training - Step 21499: {'lr': 0.00047888933306991136, 'samples': 11008000, 'steps': 21499, 'loss/train': 2.4190354347229004} +03/04/2022 14:43:16 - INFO - codeparrot_training - Step 21500: {'lr': 0.00047888719871219367, 'samples': 11008512, 'steps': 21500, 'loss/train': 1.8156580924987793} +03/04/2022 14:43:19 - INFO - codeparrot_training - Skipping example with length 161 (seq_length=1024) +03/04/2022 14:43:21 - INFO - codeparrot_training - Step 21501: {'lr': 0.00047888506425134293, 'samples': 11009024, 'steps': 21501, 'loss/train': 1.4511713981628418} +03/04/2022 14:43:25 - INFO - codeparrot_training - Step 21502: {'lr': 0.0004788829296873601, 'samples': 11009536, 'steps': 21502, 'loss/train': 1.841587781906128} +03/04/2022 14:43:27 - INFO - codeparrot_training - Skipping example with length 250 (seq_length=1024) +03/04/2022 14:43:30 - INFO - codeparrot_training - Step 21503: {'lr': 0.0004788807950202463, 'samples': 11010048, 'steps': 21503, 'loss/train': 1.9730677604675293} +03/04/2022 14:43:33 - INFO - codeparrot_training - Step 21504: {'lr': 0.00047887866025000226, 'samples': 11010560, 'steps': 21504, 'loss/train': 1.7167561054229736} +03/04/2022 14:43:38 - INFO - codeparrot_training - Step 21505: {'lr': 0.000478876525376629, 'samples': 11011072, 'steps': 21505, 'loss/train': 2.352836847305298} +03/04/2022 14:43:41 - INFO - codeparrot_training - Step 21506: {'lr': 0.00047887439040012755, 'samples': 11011584, 'steps': 21506, 'loss/train': 1.7617042064666748} +03/04/2022 14:43:44 - INFO - codeparrot_training - Skipping example with length 495 (seq_length=1024) +03/04/2022 14:43:47 - INFO - codeparrot_training - Step 21507: {'lr': 0.0004788722553204988, 'samples': 11012096, 'steps': 21507, 'loss/train': 2.0973446369171143} +03/04/2022 14:43:50 - INFO - codeparrot_training - Step 21508: {'lr': 0.0004788701201377438, 'samples': 11012608, 'steps': 21508, 'loss/train': 2.298877000808716} +03/04/2022 14:43:52 - INFO - codeparrot_training - Skipping example with length 423 (seq_length=1024) +03/04/2022 14:43:55 - INFO - codeparrot_training - Step 21509: {'lr': 0.0004788679848518633, 'samples': 11013120, 'steps': 21509, 'loss/train': 1.7931782007217407} +03/04/2022 14:43:58 - INFO - codeparrot_training - Step 21510: {'lr': 0.0004788658494628586, 'samples': 11013632, 'steps': 21510, 'loss/train': 1.6538856029510498} +03/04/2022 14:44:01 - INFO - codeparrot_training - Skipping example with length 770 (seq_length=1024) +03/04/2022 14:44:04 - INFO - codeparrot_training - Step 21511: {'lr': 0.0004788637139707304, 'samples': 11014144, 'steps': 21511, 'loss/train': 1.9512265920639038} +03/04/2022 14:44:07 - INFO - codeparrot_training - Step 21512: {'lr': 0.00047886157837547975, 'samples': 11014656, 'steps': 21512, 'loss/train': 4.8209381103515625} +03/04/2022 14:44:09 - INFO - codeparrot_training - Skipping example with length 764 (seq_length=1024) +03/04/2022 14:44:12 - INFO - codeparrot_training - Step 21513: {'lr': 0.0004788594426771076, 'samples': 11015168, 'steps': 21513, 'loss/train': 2.5708723068237305} +03/04/2022 14:44:15 - INFO - codeparrot_training - Step 21514: {'lr': 0.0004788573068756149, 'samples': 11015680, 'steps': 21514, 'loss/train': 2.220301628112793} +03/04/2022 14:44:18 - INFO - codeparrot_training - Skipping example with length 210 (seq_length=1024) +03/04/2022 14:44:20 - INFO - codeparrot_training - Step 21515: {'lr': 0.0004788551709710027, 'samples': 11016192, 'steps': 21515, 'loss/train': 1.7218488454818726} +03/04/2022 14:44:24 - INFO - codeparrot_training - Step 21516: {'lr': 0.0004788530349632718, 'samples': 11016704, 'steps': 21516, 'loss/train': 1.4557878971099854} +03/04/2022 14:44:26 - INFO - codeparrot_training - Skipping example with length 1016 (seq_length=1024) +03/04/2022 14:44:29 - INFO - codeparrot_training - Step 21517: {'lr': 0.00047885089885242333, 'samples': 11017216, 'steps': 21517, 'loss/train': 1.94817316532135} +03/04/2022 14:44:32 - INFO - codeparrot_training - Step 21518: {'lr': 0.0004788487626384581, 'samples': 11017728, 'steps': 21518, 'loss/train': 1.7355237007141113} +03/04/2022 14:44:34 - INFO - codeparrot_training - Skipping example with length 1010 (seq_length=1024) +03/04/2022 14:44:37 - INFO - codeparrot_training - Step 21519: {'lr': 0.0004788466263213772, 'samples': 11018240, 'steps': 21519, 'loss/train': 2.206615447998047} +03/04/2022 14:44:41 - INFO - codeparrot_training - Step 21520: {'lr': 0.00047884448990118155, 'samples': 11018752, 'steps': 21520, 'loss/train': 1.5198429822921753} +03/04/2022 14:44:43 - INFO - codeparrot_training - Skipping example with length 895 (seq_length=1024) +03/04/2022 14:44:46 - INFO - codeparrot_training - Step 21521: {'lr': 0.0004788423533778721, 'samples': 11019264, 'steps': 21521, 'loss/train': 1.537156581878662} +03/04/2022 14:44:49 - INFO - codeparrot_training - Step 21522: {'lr': 0.00047884021675144987, 'samples': 11019776, 'steps': 21522, 'loss/train': 2.3668158054351807} +03/04/2022 14:44:52 - INFO - codeparrot_training - Step 21523: {'lr': 0.0004788380800219156, 'samples': 11020288, 'steps': 21523, 'loss/train': 2.0248425006866455} +03/04/2022 14:44:52 - INFO - codeparrot_training - Skipping example with length 534 (seq_length=1024) +03/04/2022 14:44:58 - INFO - codeparrot_training - Step 21524: {'lr': 0.0004788359431892706, 'samples': 11020800, 'steps': 21524, 'loss/train': 1.0499323606491089} +03/04/2022 14:45:01 - INFO - codeparrot_training - Step 21525: {'lr': 0.00047883380625351557, 'samples': 11021312, 'steps': 21525, 'loss/train': 1.4792402982711792} +03/04/2022 14:45:01 - INFO - codeparrot_training - Skipping example with length 610 (seq_length=1024) +03/04/2022 14:45:06 - INFO - codeparrot_training - Step 21526: {'lr': 0.00047883166921465156, 'samples': 11021824, 'steps': 21526, 'loss/train': 0.5955403447151184} +03/04/2022 14:45:09 - INFO - codeparrot_training - Step 21527: {'lr': 0.00047882953207267954, 'samples': 11022336, 'steps': 21527, 'loss/train': 2.020458221435547} +03/04/2022 14:45:10 - INFO - codeparrot_training - Skipping example with length 453 (seq_length=1024) +03/04/2022 14:45:15 - INFO - codeparrot_training - Step 21528: {'lr': 0.00047882739482760044, 'samples': 11022848, 'steps': 21528, 'loss/train': 3.0137877464294434} +03/04/2022 14:45:18 - INFO - codeparrot_training - Step 21529: {'lr': 0.0004788252574794153, 'samples': 11023360, 'steps': 21529, 'loss/train': 2.3676369190216064} +03/04/2022 14:45:18 - INFO - codeparrot_training - Skipping example with length 649 (seq_length=1024) +03/04/2022 14:45:23 - INFO - codeparrot_training - Step 21530: {'lr': 0.000478823120028125, 'samples': 11023872, 'steps': 21530, 'loss/train': 1.057709813117981} +03/04/2022 14:45:26 - INFO - codeparrot_training - Step 21531: {'lr': 0.0004788209824737305, 'samples': 11024384, 'steps': 21531, 'loss/train': 2.3037679195404053} +03/04/2022 14:45:27 - INFO - codeparrot_training - Skipping example with length 111 (seq_length=1024) +03/04/2022 14:45:32 - INFO - codeparrot_training - Step 21532: {'lr': 0.00047881884481623286, 'samples': 11024896, 'steps': 21532, 'loss/train': 2.2995364665985107} +03/04/2022 14:45:35 - INFO - codeparrot_training - Step 21533: {'lr': 0.000478816707055633, 'samples': 11025408, 'steps': 21533, 'loss/train': 1.383269190788269} +03/04/2022 14:45:35 - INFO - codeparrot_training - Skipping example with length 586 (seq_length=1024) +03/04/2022 14:45:40 - INFO - codeparrot_training - Step 21534: {'lr': 0.0004788145691919318, 'samples': 11025920, 'steps': 21534, 'loss/train': 2.041614294052124} +03/04/2022 14:45:43 - INFO - codeparrot_training - Step 21535: {'lr': 0.0004788124312251303, 'samples': 11026432, 'steps': 21535, 'loss/train': 1.1838492155075073} +03/04/2022 14:45:45 - INFO - codeparrot_training - Skipping example with length 64 (seq_length=1024) +03/04/2022 14:45:49 - INFO - codeparrot_training - Step 21536: {'lr': 0.0004788102931552294, 'samples': 11026944, 'steps': 21536, 'loss/train': 1.5915859937667847} +03/04/2022 14:45:52 - INFO - codeparrot_training - Step 21537: {'lr': 0.0004788081549822302, 'samples': 11027456, 'steps': 21537, 'loss/train': 1.9229464530944824} +03/04/2022 14:45:54 - INFO - codeparrot_training - Skipping example with length 886 (seq_length=1024) +03/04/2022 14:45:57 - INFO - codeparrot_training - Step 21538: {'lr': 0.0004788060167061335, 'samples': 11027968, 'steps': 21538, 'loss/train': 2.2973062992095947} +03/04/2022 14:46:00 - INFO - codeparrot_training - Step 21539: {'lr': 0.0004788038783269404, 'samples': 11028480, 'steps': 21539, 'loss/train': 2.0456817150115967} +03/04/2022 14:46:03 - INFO - codeparrot_training - Skipping example with length 728 (seq_length=1024) +03/04/2022 14:46:06 - INFO - codeparrot_training - Step 21540: {'lr': 0.00047880173984465174, 'samples': 11028992, 'steps': 21540, 'loss/train': 1.6001238822937012} +03/04/2022 14:46:09 - INFO - codeparrot_training - Step 21541: {'lr': 0.0004787996012592686, 'samples': 11029504, 'steps': 21541, 'loss/train': 1.678215503692627} +03/04/2022 14:46:11 - INFO - codeparrot_training - Skipping example with length 304 (seq_length=1024) +03/04/2022 14:46:14 - INFO - codeparrot_training - Step 21542: {'lr': 0.0004787974625707919, 'samples': 11030016, 'steps': 21542, 'loss/train': 1.3499032258987427} +03/04/2022 14:46:17 - INFO - codeparrot_training - Step 21543: {'lr': 0.0004787953237792225, 'samples': 11030528, 'steps': 21543, 'loss/train': 1.7572563886642456} +03/04/2022 14:46:20 - INFO - codeparrot_training - Skipping example with length 482 (seq_length=1024) +03/04/2022 14:46:23 - INFO - codeparrot_training - Step 21544: {'lr': 0.0004787931848845616, 'samples': 11031040, 'steps': 21544, 'loss/train': 2.4015235900878906} +03/04/2022 14:46:26 - INFO - codeparrot_training - Step 21545: {'lr': 0.00047879104588680987, 'samples': 11031552, 'steps': 21545, 'loss/train': 1.8358739614486694} +03/04/2022 14:46:28 - INFO - codeparrot_training - Skipping example with length 136 (seq_length=1024) +03/04/2022 14:46:31 - INFO - codeparrot_training - Step 21546: {'lr': 0.00047878890678596854, 'samples': 11032064, 'steps': 21546, 'loss/train': 1.4450644254684448} +03/04/2022 14:46:34 - INFO - codeparrot_training - Step 21547: {'lr': 0.00047878676758203844, 'samples': 11032576, 'steps': 21547, 'loss/train': 1.9784510135650635} +03/04/2022 14:46:36 - INFO - codeparrot_training - Skipping example with length 266 (seq_length=1024) +03/04/2022 14:46:40 - INFO - codeparrot_training - Step 21548: {'lr': 0.00047878462827502055, 'samples': 11033088, 'steps': 21548, 'loss/train': 2.089634895324707} +03/04/2022 14:46:43 - INFO - codeparrot_training - Step 21549: {'lr': 0.0004787824888649158, 'samples': 11033600, 'steps': 21549, 'loss/train': 2.3916525840759277} +03/04/2022 14:46:45 - INFO - codeparrot_training - Skipping example with length 747 (seq_length=1024) +03/04/2022 14:46:48 - INFO - codeparrot_training - Step 21550: {'lr': 0.0004787803493517252, 'samples': 11034112, 'steps': 21550, 'loss/train': 2.3134846687316895} +03/04/2022 14:46:51 - INFO - codeparrot_training - Step 21551: {'lr': 0.0004787782097354497, 'samples': 11034624, 'steps': 21551, 'loss/train': 1.5790374279022217} +03/04/2022 14:46:53 - INFO - codeparrot_training - Skipping example with length 822 (seq_length=1024) +03/04/2022 14:46:57 - INFO - codeparrot_training - Step 21552: {'lr': 0.00047877607001609035, 'samples': 11035136, 'steps': 21552, 'loss/train': 1.7583547830581665} +03/04/2022 14:47:00 - INFO - codeparrot_training - Step 21553: {'lr': 0.00047877393019364796, 'samples': 11035648, 'steps': 21553, 'loss/train': 1.8453491926193237} +03/04/2022 14:47:02 - INFO - codeparrot_training - Skipping example with length 717 (seq_length=1024) +03/04/2022 14:47:05 - INFO - codeparrot_training - Step 21554: {'lr': 0.0004787717902681236, 'samples': 11036160, 'steps': 21554, 'loss/train': 5.243675231933594} +03/04/2022 14:47:08 - INFO - codeparrot_training - Step 21555: {'lr': 0.00047876965023951814, 'samples': 11036672, 'steps': 21555, 'loss/train': 1.7240666151046753} +03/04/2022 14:47:10 - INFO - codeparrot_training - Skipping example with length 585 (seq_length=1024) +03/04/2022 14:47:13 - INFO - codeparrot_training - Step 21556: {'lr': 0.00047876751010783266, 'samples': 11037184, 'steps': 21556, 'loss/train': 3.3290889263153076} +03/04/2022 14:47:17 - INFO - codeparrot_training - Step 21557: {'lr': 0.0004787653698730681, 'samples': 11037696, 'steps': 21557, 'loss/train': 1.8469316959381104} +03/04/2022 14:47:19 - INFO - codeparrot_training - Skipping example with length 333 (seq_length=1024) +03/04/2022 14:47:22 - INFO - codeparrot_training - Step 21558: {'lr': 0.00047876322953522535, 'samples': 11038208, 'steps': 21558, 'loss/train': 2.0145440101623535} +03/04/2022 14:47:25 - INFO - codeparrot_training - Step 21559: {'lr': 0.00047876108909430536, 'samples': 11038720, 'steps': 21559, 'loss/train': 0.9881811738014221} +03/04/2022 14:47:27 - INFO - codeparrot_training - Skipping example with length 131 (seq_length=1024) +03/04/2022 14:47:30 - INFO - codeparrot_training - Step 21560: {'lr': 0.00047875894855030923, 'samples': 11039232, 'steps': 21560, 'loss/train': 1.459581971168518} +03/04/2022 14:47:33 - INFO - codeparrot_training - Step 21561: {'lr': 0.00047875680790323785, 'samples': 11039744, 'steps': 21561, 'loss/train': 1.5403809547424316} +03/04/2022 14:47:35 - INFO - codeparrot_training - Skipping example with length 538 (seq_length=1024) +03/04/2022 14:47:39 - INFO - codeparrot_training - Step 21562: {'lr': 0.0004787546671530921, 'samples': 11040256, 'steps': 21562, 'loss/train': 1.7764769792556763} +03/04/2022 14:47:42 - INFO - codeparrot_training - Step 21563: {'lr': 0.0004787525262998731, 'samples': 11040768, 'steps': 21563, 'loss/train': 2.0692877769470215} +03/04/2022 14:47:44 - INFO - codeparrot_training - Skipping example with length 859 (seq_length=1024) +03/04/2022 14:47:47 - INFO - codeparrot_training - Step 21564: {'lr': 0.0004787503853435817, 'samples': 11041280, 'steps': 21564, 'loss/train': 2.2987916469573975} +03/04/2022 14:47:50 - INFO - codeparrot_training - Step 21565: {'lr': 0.00047874824428421897, 'samples': 11041792, 'steps': 21565, 'loss/train': 1.960558295249939} +03/04/2022 14:47:52 - INFO - codeparrot_training - Skipping example with length 974 (seq_length=1024) +03/04/2022 14:47:55 - INFO - codeparrot_training - Step 21566: {'lr': 0.0004787461031217858, 'samples': 11042304, 'steps': 21566, 'loss/train': 1.108335018157959} +03/04/2022 14:47:59 - INFO - codeparrot_training - Step 21567: {'lr': 0.0004787439618562831, 'samples': 11042816, 'steps': 21567, 'loss/train': 1.7978209257125854} +03/04/2022 14:48:00 - INFO - codeparrot_training - Skipping example with length 172 (seq_length=1024) +03/04/2022 14:48:04 - INFO - codeparrot_training - Step 21568: {'lr': 0.000478741820487712, 'samples': 11043328, 'steps': 21568, 'loss/train': 1.945860505104065} +03/04/2022 14:48:07 - INFO - codeparrot_training - Step 21569: {'lr': 0.0004787396790160733, 'samples': 11043840, 'steps': 21569, 'loss/train': 1.7507333755493164} +03/04/2022 14:48:09 - INFO - codeparrot_training - Skipping example with length 796 (seq_length=1024) +03/04/2022 14:48:12 - INFO - codeparrot_training - Step 21570: {'lr': 0.00047873753744136807, 'samples': 11044352, 'steps': 21570, 'loss/train': 1.6642969846725464} +03/04/2022 14:48:16 - INFO - codeparrot_training - Step 21571: {'lr': 0.0004787353957635971, 'samples': 11044864, 'steps': 21571, 'loss/train': 1.5343148708343506} +03/04/2022 14:48:17 - INFO - codeparrot_training - Skipping example with length 253 (seq_length=1024) +03/04/2022 14:48:21 - INFO - codeparrot_training - Step 21572: {'lr': 0.0004787332539827617, 'samples': 11045376, 'steps': 21572, 'loss/train': 1.265346646308899} +03/04/2022 14:48:24 - INFO - codeparrot_training - Step 21573: {'lr': 0.00047873111209886245, 'samples': 11045888, 'steps': 21573, 'loss/train': 1.1103456020355225} +03/04/2022 14:48:25 - INFO - codeparrot_training - Skipping example with length 508 (seq_length=1024) +03/04/2022 14:48:29 - INFO - codeparrot_training - Step 21574: {'lr': 0.00047872897011190063, 'samples': 11046400, 'steps': 21574, 'loss/train': 1.6689702272415161} +03/04/2022 14:48:33 - INFO - codeparrot_training - Step 21575: {'lr': 0.00047872682802187693, 'samples': 11046912, 'steps': 21575, 'loss/train': 1.1269067525863647} +03/04/2022 14:48:34 - INFO - codeparrot_training - Skipping example with length 882 (seq_length=1024) +03/04/2022 14:48:38 - INFO - codeparrot_training - Step 21576: {'lr': 0.0004787246858287926, 'samples': 11047424, 'steps': 21576, 'loss/train': 1.9633636474609375} +03/04/2022 14:48:41 - INFO - codeparrot_training - Step 21577: {'lr': 0.0004787225435326483, 'samples': 11047936, 'steps': 21577, 'loss/train': 2.339289903640747} +03/04/2022 14:48:42 - INFO - codeparrot_training - Skipping example with length 910 (seq_length=1024) +03/04/2022 14:48:46 - INFO - codeparrot_training - Step 21578: {'lr': 0.0004787204011334453, 'samples': 11048448, 'steps': 21578, 'loss/train': 0.6621271967887878} +03/04/2022 14:48:49 - INFO - codeparrot_training - Step 21579: {'lr': 0.0004787182586311843, 'samples': 11048960, 'steps': 21579, 'loss/train': 2.0520291328430176} +03/04/2022 14:48:51 - INFO - codeparrot_training - Skipping example with length 56 (seq_length=1024) +03/04/2022 14:48:55 - INFO - codeparrot_training - Step 21580: {'lr': 0.0004787161160258664, 'samples': 11049472, 'steps': 21580, 'loss/train': 1.9009538888931274} +03/04/2022 14:48:58 - INFO - codeparrot_training - Step 21581: {'lr': 0.00047871397331749254, 'samples': 11049984, 'steps': 21581, 'loss/train': 1.6777318716049194} +03/04/2022 14:48:59 - INFO - codeparrot_training - Skipping example with length 250 (seq_length=1024) +03/04/2022 14:49:03 - INFO - codeparrot_training - Step 21582: {'lr': 0.00047871183050606376, 'samples': 11050496, 'steps': 21582, 'loss/train': 1.5293546915054321} +03/04/2022 14:49:06 - INFO - codeparrot_training - Step 21583: {'lr': 0.00047870968759158096, 'samples': 11051008, 'steps': 21583, 'loss/train': 2.180189609527588} +03/04/2022 14:49:08 - INFO - codeparrot_training - Skipping example with length 249 (seq_length=1024) +03/04/2022 14:49:12 - INFO - codeparrot_training - Step 21584: {'lr': 0.000478707544574045, 'samples': 11051520, 'steps': 21584, 'loss/train': 2.138096809387207} +03/04/2022 14:49:15 - INFO - codeparrot_training - Step 21585: {'lr': 0.000478705401453457, 'samples': 11052032, 'steps': 21585, 'loss/train': 2.0357449054718018} +03/04/2022 14:49:16 - INFO - codeparrot_training - Skipping example with length 535 (seq_length=1024) +03/04/2022 14:49:20 - INFO - codeparrot_training - Step 21586: {'lr': 0.000478703258229818, 'samples': 11052544, 'steps': 21586, 'loss/train': 2.038154363632202} +03/04/2022 14:49:23 - INFO - codeparrot_training - Step 21587: {'lr': 0.0004787011149031287, 'samples': 11053056, 'steps': 21587, 'loss/train': 1.6752598285675049} +03/04/2022 14:49:24 - INFO - codeparrot_training - Skipping example with length 187 (seq_length=1024) +03/04/2022 14:49:29 - INFO - codeparrot_training - Step 21588: {'lr': 0.0004786989714733902, 'samples': 11053568, 'steps': 21588, 'loss/train': 1.709121584892273} +03/04/2022 14:49:32 - INFO - codeparrot_training - Step 21589: {'lr': 0.0004786968279406035, 'samples': 11054080, 'steps': 21589, 'loss/train': 1.585824966430664} +03/04/2022 14:49:33 - INFO - codeparrot_training - Skipping example with length 545 (seq_length=1024) +03/04/2022 14:49:37 - INFO - codeparrot_training - Step 21590: {'lr': 0.0004786946843047696, 'samples': 11054592, 'steps': 21590, 'loss/train': 1.5783535242080688} +03/04/2022 14:49:40 - INFO - codeparrot_training - Step 21591: {'lr': 0.00047869254056588927, 'samples': 11055104, 'steps': 21591, 'loss/train': 1.7673418521881104} +03/04/2022 14:49:41 - INFO - codeparrot_training - Skipping example with length 561 (seq_length=1024) +03/04/2022 14:49:46 - INFO - codeparrot_training - Step 21592: {'lr': 0.0004786903967239637, 'samples': 11055616, 'steps': 21592, 'loss/train': 2.1486194133758545} +03/04/2022 14:49:49 - INFO - codeparrot_training - Step 21593: {'lr': 0.0004786882527789938, 'samples': 11056128, 'steps': 21593, 'loss/train': 1.8065016269683838} +03/04/2022 14:49:53 - INFO - codeparrot_training - Step 21594: {'lr': 0.00047868610873098047, 'samples': 11056640, 'steps': 21594, 'loss/train': 1.7069061994552612} +03/04/2022 14:49:54 - INFO - codeparrot_training - Skipping example with length 428 (seq_length=1024) +03/04/2022 14:49:58 - INFO - codeparrot_training - Step 21595: {'lr': 0.0004786839645799247, 'samples': 11057152, 'steps': 21595, 'loss/train': 2.43520188331604} +03/04/2022 14:50:01 - INFO - codeparrot_training - Step 21596: {'lr': 0.00047868182032582746, 'samples': 11057664, 'steps': 21596, 'loss/train': 1.8797132968902588} +03/04/2022 14:50:02 - INFO - codeparrot_training - Skipping example with length 683 (seq_length=1024) +03/04/2022 14:50:06 - INFO - codeparrot_training - Step 21597: {'lr': 0.00047867967596868974, 'samples': 11058176, 'steps': 21597, 'loss/train': 2.293253183364868} +03/04/2022 14:50:09 - INFO - codeparrot_training - Step 21598: {'lr': 0.00047867753150851244, 'samples': 11058688, 'steps': 21598, 'loss/train': 1.947310209274292} +03/04/2022 14:50:10 - INFO - codeparrot_training - Skipping example with length 683 (seq_length=1024) +03/04/2022 14:50:15 - INFO - codeparrot_training - Step 21599: {'lr': 0.0004786753869452966, 'samples': 11059200, 'steps': 21599, 'loss/train': 1.0892637968063354} +03/04/2022 14:50:18 - INFO - codeparrot_training - Step 21600: {'lr': 0.00047867324227904317, 'samples': 11059712, 'steps': 21600, 'loss/train': 1.5483022928237915} +03/04/2022 14:50:19 - INFO - codeparrot_training - Skipping example with length 859 (seq_length=1024) +03/04/2022 14:50:23 - INFO - codeparrot_training - Step 21601: {'lr': 0.0004786710975097531, 'samples': 11060224, 'steps': 21601, 'loss/train': 1.3634780645370483} +03/04/2022 14:50:26 - INFO - codeparrot_training - Step 21602: {'lr': 0.0004786689526374274, 'samples': 11060736, 'steps': 21602, 'loss/train': 1.9115177392959595} +03/04/2022 14:50:27 - INFO - codeparrot_training - Skipping example with length 844 (seq_length=1024) +03/04/2022 14:50:31 - INFO - codeparrot_training - Step 21603: {'lr': 0.00047866680766206693, 'samples': 11061248, 'steps': 21603, 'loss/train': 1.928360939025879} +03/04/2022 14:50:35 - INFO - codeparrot_training - Step 21604: {'lr': 0.0004786646625836727, 'samples': 11061760, 'steps': 21604, 'loss/train': 1.3518400192260742} +03/04/2022 14:50:36 - INFO - codeparrot_training - Skipping example with length 689 (seq_length=1024) +03/04/2022 14:50:40 - INFO - codeparrot_training - Step 21605: {'lr': 0.0004786625174022458, 'samples': 11062272, 'steps': 21605, 'loss/train': 1.6078460216522217} +03/04/2022 14:50:43 - INFO - codeparrot_training - Step 21606: {'lr': 0.00047866037211778705, 'samples': 11062784, 'steps': 21606, 'loss/train': 1.8137054443359375} +03/04/2022 14:50:44 - INFO - codeparrot_training - Skipping example with length 1018 (seq_length=1024) +03/04/2022 14:50:48 - INFO - codeparrot_training - Step 21607: {'lr': 0.0004786582267302975, 'samples': 11063296, 'steps': 21607, 'loss/train': 1.192622423171997} +03/04/2022 14:50:51 - INFO - codeparrot_training - Step 21608: {'lr': 0.000478656081239778, 'samples': 11063808, 'steps': 21608, 'loss/train': 1.2776453495025635} +03/04/2022 14:50:52 - INFO - codeparrot_training - Skipping example with length 613 (seq_length=1024) +03/04/2022 14:50:57 - INFO - codeparrot_training - Step 21609: {'lr': 0.0004786539356462297, 'samples': 11064320, 'steps': 21609, 'loss/train': 2.407715320587158} +03/04/2022 14:51:00 - INFO - codeparrot_training - Step 21610: {'lr': 0.0004786517899496534, 'samples': 11064832, 'steps': 21610, 'loss/train': 2.002889633178711} +03/04/2022 14:51:00 - INFO - codeparrot_training - Skipping example with length 973 (seq_length=1024) +03/04/2022 14:51:05 - INFO - codeparrot_training - Step 21611: {'lr': 0.0004786496441500502, 'samples': 11065344, 'steps': 21611, 'loss/train': 1.9632091522216797} +03/04/2022 14:51:08 - INFO - codeparrot_training - Step 21612: {'lr': 0.00047864749824742093, 'samples': 11065856, 'steps': 21612, 'loss/train': 1.731671690940857} +03/04/2022 14:51:09 - INFO - codeparrot_training - Skipping example with length 349 (seq_length=1024) +03/04/2022 14:51:13 - INFO - codeparrot_training - Step 21613: {'lr': 0.00047864535224176666, 'samples': 11066368, 'steps': 21613, 'loss/train': 1.9077495336532593} +03/04/2022 14:51:17 - INFO - codeparrot_training - Step 21614: {'lr': 0.0004786432061330882, 'samples': 11066880, 'steps': 21614, 'loss/train': 2.1098737716674805} +03/04/2022 14:51:18 - INFO - codeparrot_training - Skipping example with length 818 (seq_length=1024) +03/04/2022 14:51:22 - INFO - codeparrot_training - Step 21615: {'lr': 0.0004786410599213868, 'samples': 11067392, 'steps': 21615, 'loss/train': 1.7675665616989136} +03/04/2022 14:51:25 - INFO - codeparrot_training - Step 21616: {'lr': 0.00047863891360666323, 'samples': 11067904, 'steps': 21616, 'loss/train': 1.6805068254470825} +03/04/2022 14:51:26 - INFO - codeparrot_training - Skipping example with length 525 (seq_length=1024) +03/04/2022 14:51:31 - INFO - codeparrot_training - Step 21617: {'lr': 0.00047863676718891846, 'samples': 11068416, 'steps': 21617, 'loss/train': 1.790153980255127} +03/04/2022 14:51:34 - INFO - codeparrot_training - Step 21618: {'lr': 0.0004786346206681535, 'samples': 11068928, 'steps': 21618, 'loss/train': 1.2833428382873535} +03/04/2022 14:51:37 - INFO - codeparrot_training - Step 21619: {'lr': 0.0004786324740443693, 'samples': 11069440, 'steps': 21619, 'loss/train': 2.032071352005005} +03/04/2022 14:51:37 - INFO - codeparrot_training - Skipping example with length 477 (seq_length=1024) +03/04/2022 14:51:42 - INFO - codeparrot_training - Step 21620: {'lr': 0.00047863032731756684, 'samples': 11069952, 'steps': 21620, 'loss/train': 1.3048440217971802} +03/04/2022 14:51:45 - INFO - codeparrot_training - Skipping example with length 903 (seq_length=1024) +03/04/2022 14:51:48 - INFO - codeparrot_training - Step 21621: {'lr': 0.0004786281804877471, 'samples': 11070464, 'steps': 21621, 'loss/train': 2.5797715187072754} +03/04/2022 14:51:51 - INFO - codeparrot_training - Step 21622: {'lr': 0.00047862603355491103, 'samples': 11070976, 'steps': 21622, 'loss/train': 2.66520619392395} +03/04/2022 14:51:54 - INFO - codeparrot_training - Skipping example with length 986 (seq_length=1024) +03/04/2022 14:51:56 - INFO - codeparrot_training - Step 21623: {'lr': 0.0004786238865190595, 'samples': 11071488, 'steps': 21623, 'loss/train': 1.3190083503723145} +03/04/2022 14:51:59 - INFO - codeparrot_training - Step 21624: {'lr': 0.0004786217393801937, 'samples': 11072000, 'steps': 21624, 'loss/train': 1.344419002532959} +03/04/2022 14:52:02 - INFO - codeparrot_training - Skipping example with length 609 (seq_length=1024) +03/04/2022 14:52:04 - INFO - codeparrot_training - Step 21625: {'lr': 0.00047861959213831446, 'samples': 11072512, 'steps': 21625, 'loss/train': 1.826554775238037} +03/04/2022 14:52:08 - INFO - codeparrot_training - Step 21626: {'lr': 0.0004786174447934227, 'samples': 11073024, 'steps': 21626, 'loss/train': 1.7219583988189697} +03/04/2022 14:52:10 - INFO - codeparrot_training - Skipping example with length 343 (seq_length=1024) +03/04/2022 14:52:13 - INFO - codeparrot_training - Step 21627: {'lr': 0.0004786152973455195, 'samples': 11073536, 'steps': 21627, 'loss/train': 1.9456664323806763} +03/04/2022 14:52:16 - INFO - codeparrot_training - Step 21628: {'lr': 0.0004786131497946058, 'samples': 11074048, 'steps': 21628, 'loss/train': 1.9369336366653442} +03/04/2022 14:52:19 - INFO - codeparrot_training - Skipping example with length 723 (seq_length=1024) +03/04/2022 14:52:21 - INFO - codeparrot_training - Step 21629: {'lr': 0.0004786110021406824, 'samples': 11074560, 'steps': 21629, 'loss/train': 2.341231346130371} +03/04/2022 14:52:25 - INFO - codeparrot_training - Step 21630: {'lr': 0.0004786088543837506, 'samples': 11075072, 'steps': 21630, 'loss/train': 1.1697386503219604} +03/04/2022 14:52:27 - INFO - codeparrot_training - Skipping example with length 137 (seq_length=1024) +03/04/2022 14:52:30 - INFO - codeparrot_training - Step 21631: {'lr': 0.00047860670652381105, 'samples': 11075584, 'steps': 21631, 'loss/train': 2.088310956954956} +03/04/2022 14:52:33 - INFO - codeparrot_training - Step 21632: {'lr': 0.00047860455856086487, 'samples': 11076096, 'steps': 21632, 'loss/train': 1.918359637260437} +03/04/2022 14:52:36 - INFO - codeparrot_training - Skipping example with length 306 (seq_length=1024) +03/04/2022 14:52:38 - INFO - codeparrot_training - Step 21633: {'lr': 0.00047860241049491303, 'samples': 11076608, 'steps': 21633, 'loss/train': 1.7962597608566284} +03/04/2022 14:52:41 - INFO - codeparrot_training - Step 21634: {'lr': 0.00047860026232595645, 'samples': 11077120, 'steps': 21634, 'loss/train': 1.3057301044464111} +03/04/2022 14:52:45 - INFO - codeparrot_training - Step 21635: {'lr': 0.0004785981140539961, 'samples': 11077632, 'steps': 21635, 'loss/train': 1.847962498664856} +03/04/2022 14:52:45 - INFO - codeparrot_training - Skipping example with length 606 (seq_length=1024) +03/04/2022 14:52:50 - INFO - codeparrot_training - Step 21636: {'lr': 0.000478595965679033, 'samples': 11078144, 'steps': 21636, 'loss/train': 0.7102426290512085} +03/04/2022 14:52:53 - INFO - codeparrot_training - Step 21637: {'lr': 0.0004785938172010681, 'samples': 11078656, 'steps': 21637, 'loss/train': 1.4587161540985107} +03/04/2022 14:52:53 - INFO - codeparrot_training - Skipping example with length 233 (seq_length=1024) +03/04/2022 14:52:58 - INFO - codeparrot_training - Step 21638: {'lr': 0.0004785916686201023, 'samples': 11079168, 'steps': 21638, 'loss/train': 1.1614410877227783} +03/04/2022 14:53:02 - INFO - codeparrot_training - Step 21639: {'lr': 0.00047858951993613665, 'samples': 11079680, 'steps': 21639, 'loss/train': 1.8197566270828247} +03/04/2022 14:53:02 - INFO - codeparrot_training - Skipping example with length 479 (seq_length=1024) +03/04/2022 14:53:07 - INFO - codeparrot_training - Step 21640: {'lr': 0.0004785873711491721, 'samples': 11080192, 'steps': 21640, 'loss/train': 1.907324194908142} +03/04/2022 14:53:10 - INFO - codeparrot_training - Step 21641: {'lr': 0.00047858522225920964, 'samples': 11080704, 'steps': 21641, 'loss/train': 1.4754407405853271} +03/04/2022 14:53:10 - INFO - codeparrot_training - Skipping example with length 273 (seq_length=1024) +03/04/2022 14:53:15 - INFO - codeparrot_training - Step 21642: {'lr': 0.00047858307326625014, 'samples': 11081216, 'steps': 21642, 'loss/train': 1.5278581380844116} +03/04/2022 14:53:18 - INFO - codeparrot_training - Step 21643: {'lr': 0.00047858092417029464, 'samples': 11081728, 'steps': 21643, 'loss/train': 1.350403070449829} +03/04/2022 14:53:18 - INFO - codeparrot_training - Skipping example with length 354 (seq_length=1024) +03/04/2022 14:53:24 - INFO - codeparrot_training - Step 21644: {'lr': 0.00047857877497134416, 'samples': 11082240, 'steps': 21644, 'loss/train': 1.7687522172927856} +03/04/2022 14:53:27 - INFO - codeparrot_training - Step 21645: {'lr': 0.0004785766256693995, 'samples': 11082752, 'steps': 21645, 'loss/train': 1.978022813796997} +03/04/2022 14:53:27 - INFO - codeparrot_training - Skipping example with length 390 (seq_length=1024) +03/04/2022 14:53:32 - INFO - codeparrot_training - Step 21646: {'lr': 0.0004785744762644619, 'samples': 11083264, 'steps': 21646, 'loss/train': 2.0448451042175293} +03/04/2022 14:53:36 - INFO - codeparrot_training - Step 21647: {'lr': 0.00047857232675653207, 'samples': 11083776, 'steps': 21647, 'loss/train': 2.01790714263916} +03/04/2022 14:53:36 - INFO - codeparrot_training - Skipping example with length 56 (seq_length=1024) +03/04/2022 14:53:41 - INFO - codeparrot_training - Step 21648: {'lr': 0.00047857017714561105, 'samples': 11084288, 'steps': 21648, 'loss/train': 1.658448576927185} +03/04/2022 14:53:44 - INFO - codeparrot_training - Step 21649: {'lr': 0.00047856802743169994, 'samples': 11084800, 'steps': 21649, 'loss/train': 0.9894323348999023} +03/04/2022 14:53:44 - INFO - codeparrot_training - Skipping example with length 316 (seq_length=1024) +03/04/2022 14:53:49 - INFO - codeparrot_training - Step 21650: {'lr': 0.00047856587761479954, 'samples': 11085312, 'steps': 21650, 'loss/train': 0.8269980549812317} +03/04/2022 14:53:52 - INFO - codeparrot_training - Step 21651: {'lr': 0.00047856372769491083, 'samples': 11085824, 'steps': 21651, 'loss/train': 1.817694902420044} +03/04/2022 14:53:52 - INFO - codeparrot_training - Skipping example with length 916 (seq_length=1024) +03/04/2022 14:53:58 - INFO - codeparrot_training - Step 21652: {'lr': 0.0004785615776720349, 'samples': 11086336, 'steps': 21652, 'loss/train': 0.826675295829773} +03/04/2022 14:54:01 - INFO - codeparrot_training - Step 21653: {'lr': 0.0004785594275461726, 'samples': 11086848, 'steps': 21653, 'loss/train': 2.391334056854248} +03/04/2022 14:54:01 - INFO - codeparrot_training - Skipping example with length 158 (seq_length=1024) +03/04/2022 14:54:06 - INFO - codeparrot_training - Step 21654: {'lr': 0.00047855727731732503, 'samples': 11087360, 'steps': 21654, 'loss/train': 1.5006520748138428} +03/04/2022 14:54:09 - INFO - codeparrot_training - Skipping example with length 425 (seq_length=1024) +03/04/2022 14:54:12 - INFO - codeparrot_training - Step 21655: {'lr': 0.00047855512698549295, 'samples': 11087872, 'steps': 21655, 'loss/train': 2.0863168239593506} +03/04/2022 14:54:15 - INFO - codeparrot_training - Step 21656: {'lr': 0.00047855297655067754, 'samples': 11088384, 'steps': 21656, 'loss/train': 1.3177684545516968} +03/04/2022 14:54:18 - INFO - codeparrot_training - Skipping example with length 325 (seq_length=1024) +03/04/2022 14:54:20 - INFO - codeparrot_training - Step 21657: {'lr': 0.0004785508260128797, 'samples': 11088896, 'steps': 21657, 'loss/train': 2.400218963623047} +03/04/2022 14:54:23 - INFO - codeparrot_training - Step 21658: {'lr': 0.00047854867537210034, 'samples': 11089408, 'steps': 21658, 'loss/train': 1.112760066986084} +03/04/2022 14:54:26 - INFO - codeparrot_training - Skipping example with length 438 (seq_length=1024) +03/04/2022 14:54:28 - INFO - codeparrot_training - Step 21659: {'lr': 0.00047854652462834055, 'samples': 11089920, 'steps': 21659, 'loss/train': 1.8920199871063232} +03/04/2022 14:54:32 - INFO - codeparrot_training - Step 21660: {'lr': 0.0004785443737816012, 'samples': 11090432, 'steps': 21660, 'loss/train': 1.0126533508300781} +03/04/2022 14:54:34 - INFO - codeparrot_training - Skipping example with length 712 (seq_length=1024) +03/04/2022 14:54:37 - INFO - codeparrot_training - Step 21661: {'lr': 0.0004785422228318832, 'samples': 11090944, 'steps': 21661, 'loss/train': 2.1009271144866943} +03/04/2022 14:54:40 - INFO - codeparrot_training - Step 21662: {'lr': 0.0004785400717791877, 'samples': 11091456, 'steps': 21662, 'loss/train': 2.6071882247924805} +03/04/2022 14:54:43 - INFO - codeparrot_training - Step 21663: {'lr': 0.0004785379206235155, 'samples': 11091968, 'steps': 21663, 'loss/train': 2.219818592071533} +03/04/2022 14:54:43 - INFO - codeparrot_training - Skipping example with length 622 (seq_length=1024) +03/04/2022 14:54:49 - INFO - codeparrot_training - Step 21664: {'lr': 0.00047853576936486764, 'samples': 11092480, 'steps': 21664, 'loss/train': 1.9400922060012817} +03/04/2022 14:54:52 - INFO - codeparrot_training - Step 21665: {'lr': 0.00047853361800324516, 'samples': 11092992, 'steps': 21665, 'loss/train': 2.5393707752227783} +03/04/2022 14:54:52 - INFO - codeparrot_training - Skipping example with length 1010 (seq_length=1024) +03/04/2022 14:54:57 - INFO - codeparrot_training - Step 21666: {'lr': 0.0004785314665386489, 'samples': 11093504, 'steps': 21666, 'loss/train': 1.2286263704299927} +03/04/2022 14:55:00 - INFO - codeparrot_training - Step 21667: {'lr': 0.00047852931497107987, 'samples': 11094016, 'steps': 21667, 'loss/train': 1.368502140045166} +03/04/2022 14:55:00 - INFO - codeparrot_training - Skipping example with length 512 (seq_length=1024) +03/04/2022 14:55:06 - INFO - codeparrot_training - Step 21668: {'lr': 0.0004785271633005391, 'samples': 11094528, 'steps': 21668, 'loss/train': 2.007194757461548} +03/04/2022 14:55:09 - INFO - codeparrot_training - Step 21669: {'lr': 0.0004785250115270275, 'samples': 11095040, 'steps': 21669, 'loss/train': 2.440783739089966} +03/04/2022 14:55:10 - INFO - codeparrot_training - Skipping example with length 634 (seq_length=1024) +03/04/2022 14:55:14 - INFO - codeparrot_training - Step 21670: {'lr': 0.00047852285965054606, 'samples': 11095552, 'steps': 21670, 'loss/train': 2.2348263263702393} +03/04/2022 14:55:18 - INFO - codeparrot_training - Step 21671: {'lr': 0.00047852070767109573, 'samples': 11096064, 'steps': 21671, 'loss/train': 2.220184087753296} +03/04/2022 14:55:19 - INFO - codeparrot_training - Skipping example with length 608 (seq_length=1024) +03/04/2022 14:55:23 - INFO - codeparrot_training - Step 21672: {'lr': 0.00047851855558867754, 'samples': 11096576, 'steps': 21672, 'loss/train': 1.1613482236862183} +03/04/2022 14:55:26 - INFO - codeparrot_training - Step 21673: {'lr': 0.0004785164034032924, 'samples': 11097088, 'steps': 21673, 'loss/train': 1.9185817241668701} +03/04/2022 14:55:28 - INFO - codeparrot_training - Skipping example with length 873 (seq_length=1024) +03/04/2022 14:55:32 - INFO - codeparrot_training - Step 21674: {'lr': 0.0004785142511149412, 'samples': 11097600, 'steps': 21674, 'loss/train': 1.819951057434082} +03/04/2022 14:55:35 - INFO - codeparrot_training - Step 21675: {'lr': 0.0004785120987236251, 'samples': 11098112, 'steps': 21675, 'loss/train': 0.316189706325531} +03/04/2022 14:55:36 - INFO - codeparrot_training - Skipping example with length 66 (seq_length=1024) +03/04/2022 14:55:40 - INFO - codeparrot_training - Step 21676: {'lr': 0.00047850994622934494, 'samples': 11098624, 'steps': 21676, 'loss/train': 2.013767957687378} +03/04/2022 14:55:43 - INFO - codeparrot_training - Step 21677: {'lr': 0.0004785077936321018, 'samples': 11099136, 'steps': 21677, 'loss/train': 2.2385544776916504} +03/04/2022 14:55:45 - INFO - codeparrot_training - Skipping example with length 1017 (seq_length=1024) +03/04/2022 14:55:48 - INFO - codeparrot_training - Step 21678: {'lr': 0.00047850564093189653, 'samples': 11099648, 'steps': 21678, 'loss/train': 3.0174810886383057} +03/04/2022 14:55:52 - INFO - codeparrot_training - Step 21679: {'lr': 0.0004785034881287301, 'samples': 11100160, 'steps': 21679, 'loss/train': 1.6125528812408447} +03/04/2022 14:55:53 - INFO - codeparrot_training - Skipping example with length 427 (seq_length=1024) +03/04/2022 14:55:57 - INFO - codeparrot_training - Step 21680: {'lr': 0.0004785013352226035, 'samples': 11100672, 'steps': 21680, 'loss/train': 3.0373880863189697} +03/04/2022 14:56:00 - INFO - codeparrot_training - Step 21681: {'lr': 0.00047849918221351783, 'samples': 11101184, 'steps': 21681, 'loss/train': 1.4415744543075562} +03/04/2022 14:56:01 - INFO - codeparrot_training - Skipping example with length 52 (seq_length=1024) +03/04/2022 14:56:05 - INFO - codeparrot_training - Step 21682: {'lr': 0.0004784970291014739, 'samples': 11101696, 'steps': 21682, 'loss/train': 1.8824474811553955} +03/04/2022 14:56:08 - INFO - codeparrot_training - Step 21683: {'lr': 0.0004784948758864727, 'samples': 11102208, 'steps': 21683, 'loss/train': 1.5492140054702759} +03/04/2022 14:56:09 - INFO - codeparrot_training - Skipping example with length 241 (seq_length=1024) +03/04/2022 14:56:14 - INFO - codeparrot_training - Step 21684: {'lr': 0.0004784927225685153, 'samples': 11102720, 'steps': 21684, 'loss/train': 1.8457438945770264} +03/04/2022 14:56:17 - INFO - codeparrot_training - Step 21685: {'lr': 0.00047849056914760256, 'samples': 11103232, 'steps': 21685, 'loss/train': 1.9551037549972534} +03/04/2022 14:56:18 - INFO - codeparrot_training - Skipping example with length 150 (seq_length=1024) +03/04/2022 14:56:22 - INFO - codeparrot_training - Step 21686: {'lr': 0.00047848841562373557, 'samples': 11103744, 'steps': 21686, 'loss/train': 1.2601282596588135} +03/04/2022 14:56:25 - INFO - codeparrot_training - Step 21687: {'lr': 0.00047848626199691513, 'samples': 11104256, 'steps': 21687, 'loss/train': 1.7526013851165771} +03/04/2022 14:56:26 - INFO - codeparrot_training - Skipping example with length 632 (seq_length=1024) +03/04/2022 14:56:30 - INFO - codeparrot_training - Step 21688: {'lr': 0.00047848410826714237, 'samples': 11104768, 'steps': 21688, 'loss/train': 2.0657994747161865} +03/04/2022 14:56:34 - INFO - codeparrot_training - Step 21689: {'lr': 0.00047848195443441817, 'samples': 11105280, 'steps': 21689, 'loss/train': 1.0728436708450317} +03/04/2022 14:56:35 - INFO - codeparrot_training - Skipping example with length 746 (seq_length=1024) +03/04/2022 14:56:39 - INFO - codeparrot_training - Step 21690: {'lr': 0.0004784798004987435, 'samples': 11105792, 'steps': 21690, 'loss/train': 2.465104818344116} +03/04/2022 14:56:42 - INFO - codeparrot_training - Step 21691: {'lr': 0.00047847764646011937, 'samples': 11106304, 'steps': 21691, 'loss/train': 2.1175787448883057} +03/04/2022 14:56:43 - INFO - codeparrot_training - Skipping example with length 915 (seq_length=1024) +03/04/2022 14:56:47 - INFO - codeparrot_training - Step 21692: {'lr': 0.0004784754923185468, 'samples': 11106816, 'steps': 21692, 'loss/train': 1.729608178138733} +03/04/2022 14:56:50 - INFO - codeparrot_training - Step 21693: {'lr': 0.00047847333807402666, 'samples': 11107328, 'steps': 21693, 'loss/train': 0.4215858578681946} +03/04/2022 14:56:51 - INFO - codeparrot_training - Skipping example with length 394 (seq_length=1024) +03/04/2022 14:56:56 - INFO - codeparrot_training - Step 21694: {'lr': 0.00047847118372655996, 'samples': 11107840, 'steps': 21694, 'loss/train': 2.21457576751709} +03/04/2022 14:56:59 - INFO - codeparrot_training - Step 21695: {'lr': 0.00047846902927614767, 'samples': 11108352, 'steps': 21695, 'loss/train': 1.8113813400268555} +03/04/2022 14:57:00 - INFO - codeparrot_training - Skipping example with length 769 (seq_length=1024) +03/04/2022 14:57:04 - INFO - codeparrot_training - Step 21696: {'lr': 0.0004784668747227907, 'samples': 11108864, 'steps': 21696, 'loss/train': 1.9821256399154663} +03/04/2022 14:57:08 - INFO - codeparrot_training - Step 21697: {'lr': 0.00047846472006649016, 'samples': 11109376, 'steps': 21697, 'loss/train': 2.344200372695923} +03/04/2022 14:57:09 - INFO - codeparrot_training - Skipping example with length 835 (seq_length=1024) +03/04/2022 14:57:13 - INFO - codeparrot_training - Step 21698: {'lr': 0.0004784625653072469, 'samples': 11109888, 'steps': 21698, 'loss/train': 1.6732059717178345} +03/04/2022 14:57:16 - INFO - codeparrot_training - Step 21699: {'lr': 0.00047846041044506194, 'samples': 11110400, 'steps': 21699, 'loss/train': 1.6493163108825684} +03/04/2022 14:57:17 - INFO - codeparrot_training - Skipping example with length 448 (seq_length=1024) +03/04/2022 14:57:21 - INFO - codeparrot_training - Step 21700: {'lr': 0.00047845825547993627, 'samples': 11110912, 'steps': 21700, 'loss/train': 2.19193696975708} +03/04/2022 14:57:24 - INFO - codeparrot_training - Step 21701: {'lr': 0.0004784561004118708, 'samples': 11111424, 'steps': 21701, 'loss/train': 1.7970657348632812} +03/04/2022 14:57:26 - INFO - codeparrot_training - Skipping example with length 980 (seq_length=1024) +03/04/2022 14:57:30 - INFO - codeparrot_training - Step 21702: {'lr': 0.0004784539452408666, 'samples': 11111936, 'steps': 21702, 'loss/train': 2.316287040710449} +03/04/2022 14:57:33 - INFO - codeparrot_training - Step 21703: {'lr': 0.0004784517899669245, 'samples': 11112448, 'steps': 21703, 'loss/train': 1.5626832246780396} +03/04/2022 14:57:34 - INFO - codeparrot_training - Skipping example with length 382 (seq_length=1024) +03/04/2022 14:57:38 - INFO - codeparrot_training - Step 21704: {'lr': 0.00047844963459004565, 'samples': 11112960, 'steps': 21704, 'loss/train': 1.337782621383667} +03/04/2022 14:57:41 - INFO - codeparrot_training - Step 21705: {'lr': 0.00047844747911023077, 'samples': 11113472, 'steps': 21705, 'loss/train': 2.074763536453247} +03/04/2022 14:57:42 - INFO - codeparrot_training - Skipping example with length 219 (seq_length=1024) +03/04/2022 14:57:46 - INFO - codeparrot_training - Step 21706: {'lr': 0.00047844532352748115, 'samples': 11113984, 'steps': 21706, 'loss/train': 2.3312253952026367} +03/04/2022 14:57:50 - INFO - codeparrot_training - Step 21707: {'lr': 0.0004784431678417975, 'samples': 11114496, 'steps': 21707, 'loss/train': 1.8276689052581787} +03/04/2022 14:57:51 - INFO - codeparrot_training - Skipping example with length 688 (seq_length=1024) +03/04/2022 14:57:55 - INFO - codeparrot_training - Step 21708: {'lr': 0.00047844101205318085, 'samples': 11115008, 'steps': 21708, 'loss/train': 1.8994722366333008} +03/04/2022 14:57:58 - INFO - codeparrot_training - Step 21709: {'lr': 0.0004784388561616323, 'samples': 11115520, 'steps': 21709, 'loss/train': 1.519018292427063} +03/04/2022 14:57:59 - INFO - codeparrot_training - Skipping example with length 378 (seq_length=1024) +03/04/2022 14:58:03 - INFO - codeparrot_training - Step 21710: {'lr': 0.0004784367001671526, 'samples': 11116032, 'steps': 21710, 'loss/train': 1.8370649814605713} +03/04/2022 14:58:06 - INFO - codeparrot_training - Step 21711: {'lr': 0.00047843454406974295, 'samples': 11116544, 'steps': 21711, 'loss/train': 2.0374484062194824} +03/04/2022 14:58:07 - INFO - codeparrot_training - Skipping example with length 731 (seq_length=1024) +03/04/2022 14:58:12 - INFO - codeparrot_training - Step 21712: {'lr': 0.00047843238786940423, 'samples': 11117056, 'steps': 21712, 'loss/train': 2.071685552597046} +03/04/2022 14:58:15 - INFO - codeparrot_training - Step 21713: {'lr': 0.0004784302315661373, 'samples': 11117568, 'steps': 21713, 'loss/train': 1.0735783576965332} +03/04/2022 14:58:16 - INFO - codeparrot_training - Skipping example with length 848 (seq_length=1024) +03/04/2022 14:58:20 - INFO - codeparrot_training - Step 21714: {'lr': 0.00047842807515994335, 'samples': 11118080, 'steps': 21714, 'loss/train': 1.7324104309082031} +03/04/2022 14:58:23 - INFO - codeparrot_training - Step 21715: {'lr': 0.00047842591865082315, 'samples': 11118592, 'steps': 21715, 'loss/train': 1.5595492124557495} +03/04/2022 14:58:24 - INFO - codeparrot_training - Skipping example with length 936 (seq_length=1024) +03/04/2022 14:58:29 - INFO - codeparrot_training - Step 21716: {'lr': 0.0004784237620387778, 'samples': 11119104, 'steps': 21716, 'loss/train': 1.6675643920898438} +03/04/2022 14:58:32 - INFO - codeparrot_training - Step 21717: {'lr': 0.0004784216053238082, 'samples': 11119616, 'steps': 21717, 'loss/train': 1.787959337234497} +03/04/2022 14:58:32 - INFO - codeparrot_training - Skipping example with length 31 (seq_length=1024) +03/04/2022 14:58:37 - INFO - codeparrot_training - Step 21718: {'lr': 0.00047841944850591535, 'samples': 11120128, 'steps': 21718, 'loss/train': 1.1109071969985962} +03/04/2022 14:58:40 - INFO - codeparrot_training - Step 21719: {'lr': 0.0004784172915851003, 'samples': 11120640, 'steps': 21719, 'loss/train': 1.6595643758773804} +03/04/2022 14:58:41 - INFO - codeparrot_training - Skipping example with length 317 (seq_length=1024) +03/04/2022 14:58:45 - INFO - codeparrot_training - Step 21720: {'lr': 0.00047841513456136383, 'samples': 11121152, 'steps': 21720, 'loss/train': 2.393651247024536} +03/04/2022 14:58:49 - INFO - codeparrot_training - Step 21721: {'lr': 0.000478412977434707, 'samples': 11121664, 'steps': 21721, 'loss/train': 2.4198668003082275} +03/04/2022 14:58:49 - INFO - codeparrot_training - Skipping example with length 471 (seq_length=1024) +03/04/2022 14:58:54 - INFO - codeparrot_training - Step 21722: {'lr': 0.00047841082020513094, 'samples': 11122176, 'steps': 21722, 'loss/train': 1.7189557552337646} +03/04/2022 14:58:57 - INFO - codeparrot_training - Step 21723: {'lr': 0.0004784086628726364, 'samples': 11122688, 'steps': 21723, 'loss/train': 2.244342088699341} +03/04/2022 14:58:58 - INFO - codeparrot_training - Skipping example with length 779 (seq_length=1024) +03/04/2022 14:59:02 - INFO - codeparrot_training - Step 21724: {'lr': 0.0004784065054372245, 'samples': 11123200, 'steps': 21724, 'loss/train': 1.615106463432312} +03/04/2022 14:59:05 - INFO - codeparrot_training - Step 21725: {'lr': 0.0004784043478988961, 'samples': 11123712, 'steps': 21725, 'loss/train': 1.6999868154525757} +03/04/2022 14:59:06 - INFO - codeparrot_training - Skipping example with length 68 (seq_length=1024) +03/04/2022 14:59:11 - INFO - codeparrot_training - Step 21726: {'lr': 0.00047840219025765225, 'samples': 11124224, 'steps': 21726, 'loss/train': 1.6159816980361938} +03/04/2022 14:59:14 - INFO - codeparrot_training - Step 21727: {'lr': 0.0004784000325134939, 'samples': 11124736, 'steps': 21727, 'loss/train': 1.9638736248016357} +03/04/2022 14:59:14 - INFO - codeparrot_training - Skipping example with length 824 (seq_length=1024) +03/04/2022 14:59:19 - INFO - codeparrot_training - Step 21728: {'lr': 0.00047839787466642206, 'samples': 11125248, 'steps': 21728, 'loss/train': 2.0567774772644043} +03/04/2022 14:59:22 - INFO - codeparrot_training - Step 21729: {'lr': 0.00047839571671643756, 'samples': 11125760, 'steps': 21729, 'loss/train': 1.733555793762207} +03/04/2022 14:59:23 - INFO - codeparrot_training - Skipping example with length 86 (seq_length=1024) +03/04/2022 14:59:28 - INFO - codeparrot_training - Step 21730: {'lr': 0.0004783935586635415, 'samples': 11126272, 'steps': 21730, 'loss/train': 1.6846046447753906} +03/04/2022 14:59:31 - INFO - codeparrot_training - Step 21731: {'lr': 0.0004783914005077349, 'samples': 11126784, 'steps': 21731, 'loss/train': 1.7137106657028198} +03/04/2022 14:59:32 - INFO - codeparrot_training - Skipping example with length 661 (seq_length=1024) +03/04/2022 14:59:36 - INFO - codeparrot_training - Step 21732: {'lr': 0.0004783892422490186, 'samples': 11127296, 'steps': 21732, 'loss/train': 1.7345740795135498} +03/04/2022 14:59:39 - INFO - codeparrot_training - Step 21733: {'lr': 0.00047838708388739365, 'samples': 11127808, 'steps': 21733, 'loss/train': 0.2502008378505707} +03/04/2022 14:59:41 - INFO - codeparrot_training - Skipping example with length 653 (seq_length=1024) +03/04/2022 14:59:45 - INFO - codeparrot_training - Step 21734: {'lr': 0.000478384925422861, 'samples': 11128320, 'steps': 21734, 'loss/train': 2.210984945297241} +03/04/2022 14:59:48 - INFO - codeparrot_training - Step 21735: {'lr': 0.00047838276685542157, 'samples': 11128832, 'steps': 21735, 'loss/train': 1.765194058418274} +03/04/2022 14:59:50 - INFO - codeparrot_training - Skipping example with length 734 (seq_length=1024) +03/04/2022 14:59:53 - INFO - codeparrot_training - Step 21736: {'lr': 0.0004783806081850765, 'samples': 11129344, 'steps': 21736, 'loss/train': 2.104729652404785} +03/04/2022 14:59:56 - INFO - codeparrot_training - Step 21737: {'lr': 0.0004783784494118266, 'samples': 11129856, 'steps': 21737, 'loss/train': 2.6189260482788086} +03/04/2022 14:59:59 - INFO - codeparrot_training - Skipping example with length 1001 (seq_length=1024) +03/04/2022 15:00:02 - INFO - codeparrot_training - Step 21738: {'lr': 0.00047837629053567286, 'samples': 11130368, 'steps': 21738, 'loss/train': 2.0532193183898926} +03/04/2022 15:00:05 - INFO - codeparrot_training - Step 21739: {'lr': 0.00047837413155661635, 'samples': 11130880, 'steps': 21739, 'loss/train': 2.3693361282348633} +03/04/2022 15:00:08 - INFO - codeparrot_training - Skipping example with length 827 (seq_length=1024) +03/04/2022 15:00:10 - INFO - codeparrot_training - Step 21740: {'lr': 0.000478371972474658, 'samples': 11131392, 'steps': 21740, 'loss/train': 1.2502208948135376} +03/04/2022 15:00:13 - INFO - codeparrot_training - Step 21741: {'lr': 0.00047836981328979865, 'samples': 11131904, 'steps': 21741, 'loss/train': 0.970448911190033} +03/04/2022 15:00:16 - INFO - codeparrot_training - Skipping example with length 207 (seq_length=1024) +03/04/2022 15:00:19 - INFO - codeparrot_training - Step 21742: {'lr': 0.00047836765400203953, 'samples': 11132416, 'steps': 21742, 'loss/train': 1.3334587812423706} +03/04/2022 15:00:22 - INFO - codeparrot_training - Step 21743: {'lr': 0.00047836549461138133, 'samples': 11132928, 'steps': 21743, 'loss/train': 2.1743268966674805} +03/04/2022 15:00:24 - INFO - codeparrot_training - Skipping example with length 331 (seq_length=1024) +03/04/2022 15:00:27 - INFO - codeparrot_training - Step 21744: {'lr': 0.00047836333511782524, 'samples': 11133440, 'steps': 21744, 'loss/train': 1.9751198291778564} +03/04/2022 15:00:30 - INFO - codeparrot_training - Step 21745: {'lr': 0.00047836117552137213, 'samples': 11133952, 'steps': 21745, 'loss/train': 2.1148226261138916} +03/04/2022 15:00:33 - INFO - codeparrot_training - Skipping example with length 428 (seq_length=1024) +03/04/2022 15:00:36 - INFO - codeparrot_training - Step 21746: {'lr': 0.00047835901582202303, 'samples': 11134464, 'steps': 21746, 'loss/train': 1.7244793176651} +03/04/2022 15:00:39 - INFO - codeparrot_training - Step 21747: {'lr': 0.00047835685601977886, 'samples': 11134976, 'steps': 21747, 'loss/train': 2.2816364765167236} +03/04/2022 15:00:41 - INFO - codeparrot_training - Skipping example with length 622 (seq_length=1024) +03/04/2022 15:00:44 - INFO - codeparrot_training - Step 21748: {'lr': 0.00047835469611464055, 'samples': 11135488, 'steps': 21748, 'loss/train': 1.4153542518615723} +03/04/2022 15:00:47 - INFO - codeparrot_training - Step 21749: {'lr': 0.0004783525361066092, 'samples': 11136000, 'steps': 21749, 'loss/train': 2.523851156234741} +03/04/2022 15:00:50 - INFO - codeparrot_training - Skipping example with length 576 (seq_length=1024) +03/04/2022 15:00:53 - INFO - codeparrot_training - Step 21750: {'lr': 0.00047835037599568576, 'samples': 11136512, 'steps': 21750, 'loss/train': 1.9915122985839844} +03/04/2022 15:00:56 - INFO - codeparrot_training - Step 21751: {'lr': 0.0004783482157818711, 'samples': 11137024, 'steps': 21751, 'loss/train': 1.9623181819915771} +03/04/2022 15:00:58 - INFO - codeparrot_training - Skipping example with length 882 (seq_length=1024) +03/04/2022 15:01:01 - INFO - codeparrot_training - Step 21752: {'lr': 0.0004783460554651663, 'samples': 11137536, 'steps': 21752, 'loss/train': 1.7801955938339233} +03/04/2022 15:01:04 - INFO - codeparrot_training - Step 21753: {'lr': 0.0004783438950455723, 'samples': 11138048, 'steps': 21753, 'loss/train': 1.9983958005905151} +03/04/2022 15:01:06 - INFO - codeparrot_training - Skipping example with length 509 (seq_length=1024) +03/04/2022 15:01:09 - INFO - codeparrot_training - Step 21754: {'lr': 0.00047834173452309005, 'samples': 11138560, 'steps': 21754, 'loss/train': 2.329423427581787} +03/04/2022 15:01:13 - INFO - codeparrot_training - Step 21755: {'lr': 0.00047833957389772046, 'samples': 11139072, 'steps': 21755, 'loss/train': 0.6731268167495728} +03/04/2022 15:01:14 - INFO - codeparrot_training - Skipping example with length 652 (seq_length=1024) +03/04/2022 15:01:18 - INFO - codeparrot_training - Step 21756: {'lr': 0.0004783374131694647, 'samples': 11139584, 'steps': 21756, 'loss/train': 2.144912004470825} +03/04/2022 15:01:21 - INFO - codeparrot_training - Step 21757: {'lr': 0.00047833525233832356, 'samples': 11140096, 'steps': 21757, 'loss/train': 1.8825634717941284} +03/04/2022 15:01:24 - INFO - codeparrot_training - Skipping example with length 472 (seq_length=1024) +03/04/2022 15:01:26 - INFO - codeparrot_training - Step 21758: {'lr': 0.00047833309140429803, 'samples': 11140608, 'steps': 21758, 'loss/train': 1.7558887004852295} +03/04/2022 15:01:30 - INFO - codeparrot_training - Step 21759: {'lr': 0.0004783309303673892, 'samples': 11141120, 'steps': 21759, 'loss/train': 1.4758727550506592} +03/04/2022 15:01:32 - INFO - codeparrot_training - Skipping example with length 572 (seq_length=1024) +03/04/2022 15:01:35 - INFO - codeparrot_training - Step 21760: {'lr': 0.00047832876922759805, 'samples': 11141632, 'steps': 21760, 'loss/train': 1.8323588371276855} +03/04/2022 15:01:38 - INFO - codeparrot_training - Step 21761: {'lr': 0.0004783266079849253, 'samples': 11142144, 'steps': 21761, 'loss/train': 1.8024146556854248} +03/04/2022 15:01:41 - INFO - codeparrot_training - Skipping example with length 669 (seq_length=1024) +03/04/2022 15:01:44 - INFO - codeparrot_training - Step 21762: {'lr': 0.00047832444663937227, 'samples': 11142656, 'steps': 21762, 'loss/train': 1.5333456993103027} +03/04/2022 15:01:47 - INFO - codeparrot_training - Step 21763: {'lr': 0.0004783222851909397, 'samples': 11143168, 'steps': 21763, 'loss/train': 3.0703651905059814} +03/04/2022 15:01:50 - INFO - codeparrot_training - Step 21764: {'lr': 0.0004783201236396286, 'samples': 11143680, 'steps': 21764, 'loss/train': 6.750307083129883} +03/04/2022 15:01:50 - INFO - codeparrot_training - Skipping example with length 927 (seq_length=1024) +03/04/2022 15:01:55 - INFO - codeparrot_training - Step 21765: {'lr': 0.00047831796198544, 'samples': 11144192, 'steps': 21765, 'loss/train': 1.9327011108398438} +03/04/2022 15:01:58 - INFO - codeparrot_training - Step 21766: {'lr': 0.0004783158002283749, 'samples': 11144704, 'steps': 21766, 'loss/train': 2.030412197113037} +03/04/2022 15:01:59 - INFO - codeparrot_training - Skipping example with length 252 (seq_length=1024) +03/04/2022 15:02:04 - INFO - codeparrot_training - Step 21767: {'lr': 0.0004783136383684342, 'samples': 11145216, 'steps': 21767, 'loss/train': 2.0477700233459473} +03/04/2022 15:02:07 - INFO - codeparrot_training - Step 21768: {'lr': 0.0004783114764056188, 'samples': 11145728, 'steps': 21768, 'loss/train': 2.0731847286224365} +03/04/2022 15:02:08 - INFO - codeparrot_training - Skipping example with length 960 (seq_length=1024) +03/04/2022 15:02:12 - INFO - codeparrot_training - Step 21769: {'lr': 0.00047830931433992985, 'samples': 11146240, 'steps': 21769, 'loss/train': 0.8285195231437683} +03/04/2022 15:02:16 - INFO - codeparrot_training - Step 21770: {'lr': 0.00047830715217136825, 'samples': 11146752, 'steps': 21770, 'loss/train': 1.6179840564727783} +03/04/2022 15:02:16 - INFO - codeparrot_training - Skipping example with length 359 (seq_length=1024) +03/04/2022 15:02:22 - INFO - codeparrot_training - Step 21771: {'lr': 0.000478304989899935, 'samples': 11147264, 'steps': 21771, 'loss/train': 2.338785409927368} +03/04/2022 15:02:25 - INFO - codeparrot_training - Step 21772: {'lr': 0.00047830282752563103, 'samples': 11147776, 'steps': 21772, 'loss/train': 2.212019920349121} +03/04/2022 15:02:28 - INFO - codeparrot_training - Step 21773: {'lr': 0.00047830066504845725, 'samples': 11148288, 'steps': 21773, 'loss/train': 1.7606385946273804} +03/04/2022 15:02:28 - INFO - codeparrot_training - Skipping example with length 419 (seq_length=1024) +03/04/2022 15:02:33 - INFO - codeparrot_training - Step 21774: {'lr': 0.0004782985024684148, 'samples': 11148800, 'steps': 21774, 'loss/train': 1.7152084112167358} +03/04/2022 15:02:37 - INFO - codeparrot_training - Step 21775: {'lr': 0.0004782963397855046, 'samples': 11149312, 'steps': 21775, 'loss/train': 2.0207455158233643} +03/04/2022 15:02:37 - INFO - codeparrot_training - Skipping example with length 389 (seq_length=1024) +03/04/2022 15:02:42 - INFO - codeparrot_training - Step 21776: {'lr': 0.00047829417699972747, 'samples': 11149824, 'steps': 21776, 'loss/train': 1.9341248273849487} +03/04/2022 15:02:45 - INFO - codeparrot_training - Step 21777: {'lr': 0.0004782920141110846, 'samples': 11150336, 'steps': 21777, 'loss/train': 1.9932191371917725} +03/04/2022 15:02:45 - INFO - codeparrot_training - Skipping example with length 398 (seq_length=1024) +03/04/2022 15:02:50 - INFO - codeparrot_training - Step 21778: {'lr': 0.0004782898511195768, 'samples': 11150848, 'steps': 21778, 'loss/train': 1.6232519149780273} +03/04/2022 15:02:53 - INFO - codeparrot_training - Step 21779: {'lr': 0.00047828768802520515, 'samples': 11151360, 'steps': 21779, 'loss/train': 1.546098232269287} +03/04/2022 15:02:54 - INFO - codeparrot_training - Skipping example with length 204 (seq_length=1024) +03/04/2022 15:02:59 - INFO - codeparrot_training - Step 21780: {'lr': 0.0004782855248279706, 'samples': 11151872, 'steps': 21780, 'loss/train': 1.3727585077285767} +03/04/2022 15:03:02 - INFO - codeparrot_training - Step 21781: {'lr': 0.0004782833615278741, 'samples': 11152384, 'steps': 21781, 'loss/train': 0.88832688331604} +03/04/2022 15:03:02 - INFO - codeparrot_training - Skipping example with length 222 (seq_length=1024) +03/04/2022 15:03:07 - INFO - codeparrot_training - Step 21782: {'lr': 0.00047828119812491664, 'samples': 11152896, 'steps': 21782, 'loss/train': 1.9670883417129517} +03/04/2022 15:03:10 - INFO - codeparrot_training - Step 21783: {'lr': 0.0004782790346190993, 'samples': 11153408, 'steps': 21783, 'loss/train': 1.6801140308380127} +03/04/2022 15:03:11 - INFO - codeparrot_training - Skipping example with length 667 (seq_length=1024) +03/04/2022 15:03:16 - INFO - codeparrot_training - Step 21784: {'lr': 0.00047827687101042283, 'samples': 11153920, 'steps': 21784, 'loss/train': 1.8986095190048218} +03/04/2022 15:03:19 - INFO - codeparrot_training - Step 21785: {'lr': 0.00047827470729888834, 'samples': 11154432, 'steps': 21785, 'loss/train': 1.5478370189666748} +03/04/2022 15:03:19 - INFO - codeparrot_training - Skipping example with length 784 (seq_length=1024) +03/04/2022 15:03:24 - INFO - codeparrot_training - Step 21786: {'lr': 0.0004782725434844968, 'samples': 11154944, 'steps': 21786, 'loss/train': 1.7326003313064575} +03/04/2022 15:03:27 - INFO - codeparrot_training - Step 21787: {'lr': 0.00047827037956724915, 'samples': 11155456, 'steps': 21787, 'loss/train': 1.4197208881378174} +03/04/2022 15:03:28 - INFO - codeparrot_training - Skipping example with length 1006 (seq_length=1024) +03/04/2022 15:03:32 - INFO - codeparrot_training - Step 21788: {'lr': 0.00047826821554714644, 'samples': 11155968, 'steps': 21788, 'loss/train': 1.9779229164123535} +03/04/2022 15:03:36 - INFO - codeparrot_training - Step 21789: {'lr': 0.00047826605142418954, 'samples': 11156480, 'steps': 21789, 'loss/train': 1.8464950323104858} +03/04/2022 15:03:36 - INFO - codeparrot_training - Skipping example with length 508 (seq_length=1024) +03/04/2022 15:03:41 - INFO - codeparrot_training - Step 21790: {'lr': 0.0004782638871983795, 'samples': 11156992, 'steps': 21790, 'loss/train': 2.5263938903808594} +03/04/2022 15:03:44 - INFO - codeparrot_training - Step 21791: {'lr': 0.0004782617228697173, 'samples': 11157504, 'steps': 21791, 'loss/train': 1.3994050025939941} +03/04/2022 15:03:44 - INFO - codeparrot_training - Skipping example with length 816 (seq_length=1024) +03/04/2022 15:03:49 - INFO - codeparrot_training - Step 21792: {'lr': 0.0004782595584382039, 'samples': 11158016, 'steps': 21792, 'loss/train': 2.4515292644500732} +03/04/2022 15:03:52 - INFO - codeparrot_training - Step 21793: {'lr': 0.0004782573939038402, 'samples': 11158528, 'steps': 21793, 'loss/train': 1.7702375650405884} +03/04/2022 15:03:52 - INFO - codeparrot_training - Skipping example with length 5 (seq_length=1024) +03/04/2022 15:03:58 - INFO - codeparrot_training - Step 21794: {'lr': 0.0004782552292666273, 'samples': 11159040, 'steps': 21794, 'loss/train': 2.139216661453247} +03/04/2022 15:04:01 - INFO - codeparrot_training - Step 21795: {'lr': 0.0004782530645265661, 'samples': 11159552, 'steps': 21795, 'loss/train': 1.4114508628845215} +03/04/2022 15:04:01 - INFO - codeparrot_training - Skipping example with length 328 (seq_length=1024) +03/04/2022 15:04:06 - INFO - codeparrot_training - Step 21796: {'lr': 0.0004782508996836576, 'samples': 11160064, 'steps': 21796, 'loss/train': 0.22169779241085052} +03/04/2022 15:04:09 - INFO - codeparrot_training - Step 21797: {'lr': 0.00047824873473790275, 'samples': 11160576, 'steps': 21797, 'loss/train': 1.795056700706482} +03/04/2022 15:04:09 - INFO - codeparrot_training - Skipping example with length 172 (seq_length=1024) +03/04/2022 15:04:14 - INFO - codeparrot_training - Step 21798: {'lr': 0.0004782465696893025, 'samples': 11161088, 'steps': 21798, 'loss/train': 2.2166190147399902} +03/04/2022 15:04:18 - INFO - codeparrot_training - Step 21799: {'lr': 0.0004782444045378579, 'samples': 11161600, 'steps': 21799, 'loss/train': 2.013003349304199} +03/04/2022 15:04:18 - INFO - codeparrot_training - Skipping example with length 875 (seq_length=1024) +03/04/2022 15:04:23 - INFO - codeparrot_training - Step 21800: {'lr': 0.00047824223928356993, 'samples': 11162112, 'steps': 21800, 'loss/train': 1.4470890760421753} +03/04/2022 15:04:26 - INFO - codeparrot_training - Step 21801: {'lr': 0.0004782400739264395, 'samples': 11162624, 'steps': 21801, 'loss/train': 1.2553268671035767} +03/04/2022 15:04:26 - INFO - codeparrot_training - Skipping example with length 329 (seq_length=1024) +03/04/2022 15:04:32 - INFO - codeparrot_training - Step 21802: {'lr': 0.00047823790846646764, 'samples': 11163136, 'steps': 21802, 'loss/train': 2.2673146724700928} +03/04/2022 15:04:35 - INFO - codeparrot_training - Step 21803: {'lr': 0.0004782357429036553, 'samples': 11163648, 'steps': 21803, 'loss/train': 1.3851523399353027} +03/04/2022 15:04:35 - INFO - codeparrot_training - Skipping example with length 517 (seq_length=1024) +03/04/2022 15:04:40 - INFO - codeparrot_training - Step 21804: {'lr': 0.00047823357723800344, 'samples': 11164160, 'steps': 21804, 'loss/train': 1.148398518562317} +03/04/2022 15:04:43 - INFO - codeparrot_training - Step 21805: {'lr': 0.000478231411469513, 'samples': 11164672, 'steps': 21805, 'loss/train': 2.158350944519043} +03/04/2022 15:04:43 - INFO - codeparrot_training - Skipping example with length 816 (seq_length=1024) +03/04/2022 15:04:48 - INFO - codeparrot_training - Step 21806: {'lr': 0.000478229245598185, 'samples': 11165184, 'steps': 21806, 'loss/train': 1.5747545957565308} +03/04/2022 15:04:51 - INFO - codeparrot_training - Skipping example with length 536 (seq_length=1024) +03/04/2022 15:04:54 - INFO - codeparrot_training - Step 21807: {'lr': 0.00047822707962402055, 'samples': 11165696, 'steps': 21807, 'loss/train': 2.123668909072876} +03/04/2022 15:04:57 - INFO - codeparrot_training - Step 21808: {'lr': 0.00047822491354702044, 'samples': 11166208, 'steps': 21808, 'loss/train': 2.398266077041626} +03/04/2022 15:05:00 - INFO - codeparrot_training - Step 21809: {'lr': 0.0004782227473671857, 'samples': 11166720, 'steps': 21809, 'loss/train': 2.150418519973755} +03/04/2022 15:05:00 - INFO - codeparrot_training - Skipping example with length 179 (seq_length=1024) +03/04/2022 15:05:05 - INFO - codeparrot_training - Step 21810: {'lr': 0.00047822058108451727, 'samples': 11167232, 'steps': 21810, 'loss/train': 1.6845968961715698} +03/04/2022 15:05:09 - INFO - codeparrot_training - Step 21811: {'lr': 0.0004782184146990162, 'samples': 11167744, 'steps': 21811, 'loss/train': 1.9970160722732544} +03/04/2022 15:05:09 - INFO - codeparrot_training - Skipping example with length 213 (seq_length=1024) +03/04/2022 15:05:14 - INFO - codeparrot_training - Step 21812: {'lr': 0.00047821624821068346, 'samples': 11168256, 'steps': 21812, 'loss/train': 1.906862497329712} +03/04/2022 15:05:17 - INFO - codeparrot_training - Step 21813: {'lr': 0.00047821408161952, 'samples': 11168768, 'steps': 21813, 'loss/train': 0.9338131546974182} +03/04/2022 15:05:17 - INFO - codeparrot_training - Skipping example with length 617 (seq_length=1024) +03/04/2022 15:05:22 - INFO - codeparrot_training - Step 21814: {'lr': 0.00047821191492552676, 'samples': 11169280, 'steps': 21814, 'loss/train': 2.402505397796631} +03/04/2022 15:05:26 - INFO - codeparrot_training - Step 21815: {'lr': 0.00047820974812870477, 'samples': 11169792, 'steps': 21815, 'loss/train': 1.173385739326477} +03/04/2022 15:05:26 - INFO - codeparrot_training - Skipping example with length 201 (seq_length=1024) +03/04/2022 15:05:31 - INFO - codeparrot_training - Step 21816: {'lr': 0.00047820758122905493, 'samples': 11170304, 'steps': 21816, 'loss/train': 2.1541244983673096} +03/04/2022 15:05:34 - INFO - codeparrot_training - Step 21817: {'lr': 0.0004782054142265784, 'samples': 11170816, 'steps': 21817, 'loss/train': 2.2916057109832764} +03/04/2022 15:05:40 - INFO - codeparrot_training - Step 21818: {'lr': 0.00047820324712127593, 'samples': 11171328, 'steps': 21818, 'loss/train': 2.580711841583252} +03/04/2022 15:05:43 - INFO - codeparrot_training - Step 21819: {'lr': 0.0004782010799131487, 'samples': 11171840, 'steps': 21819, 'loss/train': 2.334886074066162} +03/04/2022 15:05:45 - INFO - codeparrot_training - Skipping example with length 331 (seq_length=1024) +03/04/2022 15:05:48 - INFO - codeparrot_training - Step 21820: {'lr': 0.0004781989126021975, 'samples': 11172352, 'steps': 21820, 'loss/train': 1.8340622186660767} +03/04/2022 15:05:51 - INFO - codeparrot_training - Step 21821: {'lr': 0.00047819674518842335, 'samples': 11172864, 'steps': 21821, 'loss/train': 1.7518725395202637} +03/04/2022 15:05:53 - INFO - codeparrot_training - Skipping example with length 801 (seq_length=1024) +03/04/2022 15:05:56 - INFO - codeparrot_training - Step 21822: {'lr': 0.00047819457767182735, 'samples': 11173376, 'steps': 21822, 'loss/train': 2.2857983112335205} +03/04/2022 15:06:00 - INFO - codeparrot_training - Step 21823: {'lr': 0.0004781924100524104, 'samples': 11173888, 'steps': 21823, 'loss/train': 2.1611151695251465} +03/04/2022 15:06:01 - INFO - codeparrot_training - Skipping example with length 895 (seq_length=1024) +03/04/2022 15:06:05 - INFO - codeparrot_training - Step 21824: {'lr': 0.00047819024233017337, 'samples': 11174400, 'steps': 21824, 'loss/train': 2.8073413372039795} +03/04/2022 15:06:08 - INFO - codeparrot_training - Step 21825: {'lr': 0.00047818807450511746, 'samples': 11174912, 'steps': 21825, 'loss/train': 1.690420389175415} +03/04/2022 15:06:11 - INFO - codeparrot_training - Step 21826: {'lr': 0.00047818590657724345, 'samples': 11175424, 'steps': 21826, 'loss/train': 2.0372583866119385} +03/04/2022 15:06:11 - INFO - codeparrot_training - Skipping example with length 42 (seq_length=1024) +03/04/2022 15:06:17 - INFO - codeparrot_training - Step 21827: {'lr': 0.0004781837385465524, 'samples': 11175936, 'steps': 21827, 'loss/train': 0.8191588521003723} +03/04/2022 15:06:20 - INFO - codeparrot_training - Step 21828: {'lr': 0.00047818157041304535, 'samples': 11176448, 'steps': 21828, 'loss/train': 2.6526706218719482} +03/04/2022 15:06:20 - INFO - codeparrot_training - Skipping example with length 259 (seq_length=1024) +03/04/2022 15:06:25 - INFO - codeparrot_training - Step 21829: {'lr': 0.00047817940217672315, 'samples': 11176960, 'steps': 21829, 'loss/train': 2.260767698287964} +03/04/2022 15:06:28 - INFO - codeparrot_training - Step 21830: {'lr': 0.0004781772338375868, 'samples': 11177472, 'steps': 21830, 'loss/train': 1.464534878730774} +03/04/2022 15:06:28 - INFO - codeparrot_training - Skipping example with length 277 (seq_length=1024) +03/04/2022 15:06:34 - INFO - codeparrot_training - Step 21831: {'lr': 0.0004781750653956374, 'samples': 11177984, 'steps': 21831, 'loss/train': 1.628416657447815} +03/04/2022 15:06:37 - INFO - codeparrot_training - Step 21832: {'lr': 0.00047817289685087575, 'samples': 11178496, 'steps': 21832, 'loss/train': 2.012073040008545} +03/04/2022 15:06:37 - INFO - codeparrot_training - Skipping example with length 236 (seq_length=1024) +03/04/2022 15:06:42 - INFO - codeparrot_training - Step 21833: {'lr': 0.00047817072820330287, 'samples': 11179008, 'steps': 21833, 'loss/train': 1.7664902210235596} +03/04/2022 15:06:45 - INFO - codeparrot_training - Step 21834: {'lr': 0.0004781685594529199, 'samples': 11179520, 'steps': 21834, 'loss/train': 1.1208529472351074} +03/04/2022 15:06:46 - INFO - codeparrot_training - Skipping example with length 525 (seq_length=1024) +03/04/2022 15:06:51 - INFO - codeparrot_training - Step 21835: {'lr': 0.00047816639059972767, 'samples': 11180032, 'steps': 21835, 'loss/train': 1.084551215171814} +03/04/2022 15:06:54 - INFO - codeparrot_training - Step 21836: {'lr': 0.00047816422164372713, 'samples': 11180544, 'steps': 21836, 'loss/train': 1.924899935722351} +03/04/2022 15:06:55 - INFO - codeparrot_training - Skipping example with length 898 (seq_length=1024) +03/04/2022 15:06:59 - INFO - codeparrot_training - Step 21837: {'lr': 0.00047816205258491935, 'samples': 11181056, 'steps': 21837, 'loss/train': 1.776485562324524} +03/04/2022 15:07:02 - INFO - codeparrot_training - Step 21838: {'lr': 0.0004781598834233053, 'samples': 11181568, 'steps': 21838, 'loss/train': 1.2393256425857544} +03/04/2022 15:07:03 - INFO - codeparrot_training - Skipping example with length 980 (seq_length=1024) +03/04/2022 15:07:08 - INFO - codeparrot_training - Step 21839: {'lr': 0.0004781577141588859, 'samples': 11182080, 'steps': 21839, 'loss/train': 1.3320717811584473} +03/04/2022 15:07:11 - INFO - codeparrot_training - Step 21840: {'lr': 0.0004781555447916621, 'samples': 11182592, 'steps': 21840, 'loss/train': 2.4278759956359863} +03/04/2022 15:07:11 - INFO - codeparrot_training - Skipping example with length 781 (seq_length=1024) +03/04/2022 15:07:16 - INFO - codeparrot_training - Step 21841: {'lr': 0.000478153375321635, 'samples': 11183104, 'steps': 21841, 'loss/train': 0.885839581489563} +03/04/2022 15:07:19 - INFO - codeparrot_training - Step 21842: {'lr': 0.0004781512057488055, 'samples': 11183616, 'steps': 21842, 'loss/train': 2.154135227203369} +03/04/2022 15:07:20 - INFO - codeparrot_training - Skipping example with length 987 (seq_length=1024) +03/04/2022 15:07:25 - INFO - codeparrot_training - Step 21843: {'lr': 0.00047814903607317454, 'samples': 11184128, 'steps': 21843, 'loss/train': 2.189629554748535} +03/04/2022 15:07:28 - INFO - codeparrot_training - Step 21844: {'lr': 0.00047814686629474323, 'samples': 11184640, 'steps': 21844, 'loss/train': 1.5701254606246948} +03/04/2022 15:07:30 - INFO - codeparrot_training - Skipping example with length 868 (seq_length=1024) +03/04/2022 15:07:33 - INFO - codeparrot_training - Step 21845: {'lr': 0.00047814469641351237, 'samples': 11185152, 'steps': 21845, 'loss/train': 1.7391749620437622} +03/04/2022 15:07:37 - INFO - codeparrot_training - Step 21846: {'lr': 0.0004781425264294831, 'samples': 11185664, 'steps': 21846, 'loss/train': 1.8192226886749268} +03/04/2022 15:07:38 - INFO - codeparrot_training - Skipping example with length 696 (seq_length=1024) +03/04/2022 15:07:42 - INFO - codeparrot_training - Step 21847: {'lr': 0.0004781403563426563, 'samples': 11186176, 'steps': 21847, 'loss/train': 2.321241855621338} +03/04/2022 15:07:45 - INFO - codeparrot_training - Step 21848: {'lr': 0.00047813818615303295, 'samples': 11186688, 'steps': 21848, 'loss/train': 1.6791707277297974} +03/04/2022 15:07:47 - INFO - codeparrot_training - Skipping example with length 683 (seq_length=1024) +03/04/2022 15:07:50 - INFO - codeparrot_training - Step 21849: {'lr': 0.00047813601586061414, 'samples': 11187200, 'steps': 21849, 'loss/train': 1.761741042137146} +03/04/2022 15:07:53 - INFO - codeparrot_training - Step 21850: {'lr': 0.0004781338454654007, 'samples': 11187712, 'steps': 21850, 'loss/train': 2.7285876274108887} +03/04/2022 15:07:55 - INFO - codeparrot_training - Skipping example with length 416 (seq_length=1024) +03/04/2022 15:07:59 - INFO - codeparrot_training - Step 21851: {'lr': 0.00047813167496739363, 'samples': 11188224, 'steps': 21851, 'loss/train': 2.0957930088043213} +03/04/2022 15:08:02 - INFO - codeparrot_training - Step 21852: {'lr': 0.00047812950436659405, 'samples': 11188736, 'steps': 21852, 'loss/train': 1.8517420291900635} +03/04/2022 15:08:03 - INFO - codeparrot_training - Skipping example with length 758 (seq_length=1024) +03/04/2022 15:08:07 - INFO - codeparrot_training - Step 21853: {'lr': 0.0004781273336630028, 'samples': 11189248, 'steps': 21853, 'loss/train': 2.5016672611236572} +03/04/2022 15:08:10 - INFO - codeparrot_training - Step 21854: {'lr': 0.00047812516285662086, 'samples': 11189760, 'steps': 21854, 'loss/train': 0.9420574307441711} +03/04/2022 15:08:12 - INFO - codeparrot_training - Skipping example with length 243 (seq_length=1024) +03/04/2022 15:08:16 - INFO - codeparrot_training - Step 21855: {'lr': 0.00047812299194744924, 'samples': 11190272, 'steps': 21855, 'loss/train': 2.1337785720825195} +03/04/2022 15:08:19 - INFO - codeparrot_training - Step 21856: {'lr': 0.0004781208209354889, 'samples': 11190784, 'steps': 21856, 'loss/train': 2.581223726272583} +03/04/2022 15:08:20 - INFO - codeparrot_training - Skipping example with length 347 (seq_length=1024) +03/04/2022 15:08:24 - INFO - codeparrot_training - Step 21857: {'lr': 0.00047811864982074087, 'samples': 11191296, 'steps': 21857, 'loss/train': 2.2595906257629395} +03/04/2022 15:08:28 - INFO - codeparrot_training - Step 21858: {'lr': 0.0004781164786032061, 'samples': 11191808, 'steps': 21858, 'loss/train': 1.933264970779419} +03/04/2022 15:08:29 - INFO - codeparrot_training - Skipping example with length 989 (seq_length=1024) +03/04/2022 15:08:33 - INFO - codeparrot_training - Step 21859: {'lr': 0.0004781143072828856, 'samples': 11192320, 'steps': 21859, 'loss/train': 2.4519965648651123} +03/04/2022 15:08:36 - INFO - codeparrot_training - Step 21860: {'lr': 0.00047811213585978023, 'samples': 11192832, 'steps': 21860, 'loss/train': 1.7040185928344727} +03/04/2022 15:08:37 - INFO - codeparrot_training - Skipping example with length 622 (seq_length=1024) +03/04/2022 15:08:42 - INFO - codeparrot_training - Step 21861: {'lr': 0.0004781099643338911, 'samples': 11193344, 'steps': 21861, 'loss/train': 2.434128761291504} +03/04/2022 15:08:45 - INFO - codeparrot_training - Step 21862: {'lr': 0.00047810779270521914, 'samples': 11193856, 'steps': 21862, 'loss/train': 2.8304762840270996} +03/04/2022 15:08:46 - INFO - codeparrot_training - Skipping example with length 580 (seq_length=1024) +03/04/2022 15:08:50 - INFO - codeparrot_training - Step 21863: {'lr': 0.0004781056209737653, 'samples': 11194368, 'steps': 21863, 'loss/train': 2.906376600265503} +03/04/2022 15:08:53 - INFO - codeparrot_training - Step 21864: {'lr': 0.00047810344913953065, 'samples': 11194880, 'steps': 21864, 'loss/train': 1.102380633354187} +03/04/2022 15:08:54 - INFO - codeparrot_training - Skipping example with length 295 (seq_length=1024) +03/04/2022 15:08:59 - INFO - codeparrot_training - Step 21865: {'lr': 0.0004781012772025161, 'samples': 11195392, 'steps': 21865, 'loss/train': 1.9105682373046875} +03/04/2022 15:09:02 - INFO - codeparrot_training - Step 21866: {'lr': 0.0004780991051627226, 'samples': 11195904, 'steps': 21866, 'loss/train': 2.0018744468688965} +03/04/2022 15:09:03 - INFO - codeparrot_training - Skipping example with length 112 (seq_length=1024) +03/04/2022 15:09:07 - INFO - codeparrot_training - Step 21867: {'lr': 0.0004780969330201511, 'samples': 11196416, 'steps': 21867, 'loss/train': 2.1055076122283936} +03/04/2022 15:09:10 - INFO - codeparrot_training - Step 21868: {'lr': 0.0004780947607748027, 'samples': 11196928, 'steps': 21868, 'loss/train': 1.8085702657699585} +03/04/2022 15:09:12 - INFO - codeparrot_training - Skipping example with length 702 (seq_length=1024) +03/04/2022 15:09:15 - INFO - codeparrot_training - Step 21869: {'lr': 0.00047809258842667837, 'samples': 11197440, 'steps': 21869, 'loss/train': 1.9983018636703491} +03/04/2022 15:09:19 - INFO - codeparrot_training - Step 21870: {'lr': 0.000478090415975779, 'samples': 11197952, 'steps': 21870, 'loss/train': 1.7375638484954834} +03/04/2022 15:09:20 - INFO - codeparrot_training - Skipping example with length 914 (seq_length=1024) +03/04/2022 15:09:24 - INFO - codeparrot_training - Step 21871: {'lr': 0.00047808824342210565, 'samples': 11198464, 'steps': 21871, 'loss/train': 2.9153366088867188} +03/04/2022 15:09:28 - INFO - codeparrot_training - Step 21872: {'lr': 0.0004780860707656592, 'samples': 11198976, 'steps': 21872, 'loss/train': 2.194629669189453} +03/04/2022 15:09:31 - INFO - codeparrot_training - Step 21873: {'lr': 0.0004780838980064407, 'samples': 11199488, 'steps': 21873, 'loss/train': 1.9374475479125977} +03/04/2022 15:09:31 - INFO - codeparrot_training - Skipping example with length 952 (seq_length=1024) +03/04/2022 15:09:36 - INFO - codeparrot_training - Step 21874: {'lr': 0.00047808172514445115, 'samples': 11200000, 'steps': 21874, 'loss/train': 2.315654754638672} +03/04/2022 15:09:39 - INFO - codeparrot_training - Step 21875: {'lr': 0.0004780795521796914, 'samples': 11200512, 'steps': 21875, 'loss/train': 2.5133540630340576} +03/04/2022 15:09:40 - INFO - codeparrot_training - Skipping example with length 634 (seq_length=1024) +03/04/2022 15:09:45 - INFO - codeparrot_training - Step 21876: {'lr': 0.0004780773791121626, 'samples': 11201024, 'steps': 21876, 'loss/train': 1.17023503780365} +03/04/2022 15:09:48 - INFO - codeparrot_training - Step 21877: {'lr': 0.0004780752059418656, 'samples': 11201536, 'steps': 21877, 'loss/train': 2.0274531841278076} +03/04/2022 15:09:49 - INFO - codeparrot_training - Skipping example with length 224 (seq_length=1024) +03/04/2022 15:09:53 - INFO - codeparrot_training - Step 21878: {'lr': 0.0004780730326688015, 'samples': 11202048, 'steps': 21878, 'loss/train': 2.395108699798584} +03/04/2022 15:09:56 - INFO - codeparrot_training - Step 21879: {'lr': 0.0004780708592929712, 'samples': 11202560, 'steps': 21879, 'loss/train': 0.8744766116142273} +03/04/2022 15:09:58 - INFO - codeparrot_training - Skipping example with length 68 (seq_length=1024) +03/04/2022 15:10:02 - INFO - codeparrot_training - Step 21880: {'lr': 0.0004780686858143756, 'samples': 11203072, 'steps': 21880, 'loss/train': 2.3479812145233154} +03/04/2022 15:10:05 - INFO - codeparrot_training - Step 21881: {'lr': 0.0004780665122330159, 'samples': 11203584, 'steps': 21881, 'loss/train': 2.292370319366455} +03/04/2022 15:10:06 - INFO - codeparrot_training - Skipping example with length 573 (seq_length=1024) +03/04/2022 15:10:10 - INFO - codeparrot_training - Step 21882: {'lr': 0.00047806433854889285, 'samples': 11204096, 'steps': 21882, 'loss/train': 2.1839635372161865} +03/04/2022 15:10:13 - INFO - codeparrot_training - Step 21883: {'lr': 0.0004780621647620076, 'samples': 11204608, 'steps': 21883, 'loss/train': 1.233628511428833} +03/04/2022 15:10:14 - INFO - codeparrot_training - Skipping example with length 612 (seq_length=1024) +03/04/2022 15:10:18 - INFO - codeparrot_training - Step 21884: {'lr': 0.00047805999087236097, 'samples': 11205120, 'steps': 21884, 'loss/train': 1.3520617485046387} +03/04/2022 15:10:22 - INFO - codeparrot_training - Step 21885: {'lr': 0.0004780578168799541, 'samples': 11205632, 'steps': 21885, 'loss/train': 1.4650671482086182} +03/04/2022 15:10:23 - INFO - codeparrot_training - Skipping example with length 478 (seq_length=1024) +03/04/2022 15:10:27 - INFO - codeparrot_training - Step 21886: {'lr': 0.00047805564278478787, 'samples': 11206144, 'steps': 21886, 'loss/train': 1.2267347574234009} +03/04/2022 15:10:30 - INFO - codeparrot_training - Step 21887: {'lr': 0.00047805346858686325, 'samples': 11206656, 'steps': 21887, 'loss/train': 1.8517637252807617} +03/04/2022 15:10:31 - INFO - codeparrot_training - Skipping example with length 676 (seq_length=1024) +03/04/2022 15:10:35 - INFO - codeparrot_training - Step 21888: {'lr': 0.0004780512942861813, 'samples': 11207168, 'steps': 21888, 'loss/train': 1.3061579465866089} +03/04/2022 15:10:38 - INFO - codeparrot_training - Step 21889: {'lr': 0.00047804911988274303, 'samples': 11207680, 'steps': 21889, 'loss/train': 0.7447634935379028} +03/04/2022 15:10:40 - INFO - codeparrot_training - Skipping example with length 64 (seq_length=1024) +03/04/2022 15:10:44 - INFO - codeparrot_training - Step 21890: {'lr': 0.00047804694537654927, 'samples': 11208192, 'steps': 21890, 'loss/train': 2.9917595386505127} +03/04/2022 15:10:47 - INFO - codeparrot_training - Step 21891: {'lr': 0.00047804477076760106, 'samples': 11208704, 'steps': 21891, 'loss/train': 1.2507126331329346} +03/04/2022 15:10:48 - INFO - codeparrot_training - Skipping example with length 963 (seq_length=1024) +03/04/2022 15:10:52 - INFO - codeparrot_training - Step 21892: {'lr': 0.0004780425960558994, 'samples': 11209216, 'steps': 21892, 'loss/train': 2.321552276611328} +03/04/2022 15:10:56 - INFO - codeparrot_training - Step 21893: {'lr': 0.00047804042124144526, 'samples': 11209728, 'steps': 21893, 'loss/train': 2.0158417224884033} +03/04/2022 15:10:57 - INFO - codeparrot_training - Skipping example with length 686 (seq_length=1024) +03/04/2022 15:11:01 - INFO - codeparrot_training - Step 21894: {'lr': 0.00047803824632423967, 'samples': 11210240, 'steps': 21894, 'loss/train': 1.6909152269363403} +03/04/2022 15:11:04 - INFO - codeparrot_training - Step 21895: {'lr': 0.0004780360713042835, 'samples': 11210752, 'steps': 21895, 'loss/train': 1.3176530599594116} +03/04/2022 15:11:06 - INFO - codeparrot_training - Skipping example with length 186 (seq_length=1024) +03/04/2022 15:11:09 - INFO - codeparrot_training - Step 21896: {'lr': 0.0004780338961815779, 'samples': 11211264, 'steps': 21896, 'loss/train': 1.0510034561157227} +03/04/2022 15:11:12 - INFO - codeparrot_training - Step 21897: {'lr': 0.00047803172095612365, 'samples': 11211776, 'steps': 21897, 'loss/train': 2.5747287273406982} +03/04/2022 15:11:15 - INFO - codeparrot_training - Skipping example with length 784 (seq_length=1024) +03/04/2022 15:11:18 - INFO - codeparrot_training - Step 21898: {'lr': 0.00047802954562792185, 'samples': 11212288, 'steps': 21898, 'loss/train': 1.6837868690490723} +03/04/2022 15:11:21 - INFO - codeparrot_training - Step 21899: {'lr': 0.0004780273701969734, 'samples': 11212800, 'steps': 21899, 'loss/train': 1.8674604892730713} +03/04/2022 15:11:23 - INFO - codeparrot_training - Skipping example with length 330 (seq_length=1024) +03/04/2022 15:11:26 - INFO - codeparrot_training - Step 21900: {'lr': 0.00047802519466327945, 'samples': 11213312, 'steps': 21900, 'loss/train': 2.1394357681274414} +03/04/2022 15:11:29 - INFO - codeparrot_training - Step 21901: {'lr': 0.00047802301902684076, 'samples': 11213824, 'steps': 21901, 'loss/train': 1.7948881387710571} +03/04/2022 15:11:31 - INFO - codeparrot_training - Skipping example with length 286 (seq_length=1024) +03/04/2022 15:11:35 - INFO - codeparrot_training - Step 21902: {'lr': 0.0004780208432876585, 'samples': 11214336, 'steps': 21902, 'loss/train': 1.7493559122085571} +03/04/2022 15:11:38 - INFO - codeparrot_training - Step 21903: {'lr': 0.00047801866744573353, 'samples': 11214848, 'steps': 21903, 'loss/train': 1.5645321607589722} +03/04/2022 15:11:40 - INFO - codeparrot_training - Skipping example with length 947 (seq_length=1024) +03/04/2022 15:11:43 - INFO - codeparrot_training - Step 21904: {'lr': 0.00047801649150106684, 'samples': 11215360, 'steps': 21904, 'loss/train': 1.8310548067092896} +03/04/2022 15:11:46 - INFO - codeparrot_training - Step 21905: {'lr': 0.00047801431545365947, 'samples': 11215872, 'steps': 21905, 'loss/train': 1.6645469665527344} +03/04/2022 15:11:48 - INFO - codeparrot_training - Skipping example with length 173 (seq_length=1024) +03/04/2022 15:11:51 - INFO - codeparrot_training - Step 21906: {'lr': 0.0004780121393035124, 'samples': 11216384, 'steps': 21906, 'loss/train': 2.0497524738311768} +03/04/2022 15:11:55 - INFO - codeparrot_training - Step 21907: {'lr': 0.0004780099630506265, 'samples': 11216896, 'steps': 21907, 'loss/train': 1.5563349723815918} +03/04/2022 15:11:56 - INFO - codeparrot_training - Skipping example with length 607 (seq_length=1024) +03/04/2022 15:12:00 - INFO - codeparrot_training - Step 21908: {'lr': 0.0004780077866950029, 'samples': 11217408, 'steps': 21908, 'loss/train': 1.4954856634140015} +03/04/2022 15:12:03 - INFO - codeparrot_training - Step 21909: {'lr': 0.00047800561023664246, 'samples': 11217920, 'steps': 21909, 'loss/train': 1.5529009103775024} +03/04/2022 15:12:07 - INFO - codeparrot_training - Step 21910: {'lr': 0.0004780034336755462, 'samples': 11218432, 'steps': 21910, 'loss/train': 0.8435540199279785} +03/04/2022 15:12:07 - INFO - codeparrot_training - Skipping example with length 26 (seq_length=1024) +03/04/2022 15:12:12 - INFO - codeparrot_training - Step 21911: {'lr': 0.00047800125701171517, 'samples': 11218944, 'steps': 21911, 'loss/train': 1.6277611255645752} +03/04/2022 15:12:15 - INFO - codeparrot_training - Step 21912: {'lr': 0.00047799908024515026, 'samples': 11219456, 'steps': 21912, 'loss/train': 2.128458023071289} +03/04/2022 15:12:15 - INFO - codeparrot_training - Skipping example with length 895 (seq_length=1024) +03/04/2022 15:12:20 - INFO - codeparrot_training - Step 21913: {'lr': 0.0004779969033758525, 'samples': 11219968, 'steps': 21913, 'loss/train': 1.6970218420028687} +03/04/2022 15:12:23 - INFO - codeparrot_training - Step 21914: {'lr': 0.00047799472640382287, 'samples': 11220480, 'steps': 21914, 'loss/train': 1.6252541542053223} +03/04/2022 15:12:23 - INFO - codeparrot_training - Skipping example with length 1015 (seq_length=1024) +03/04/2022 15:12:28 - INFO - codeparrot_training - Step 21915: {'lr': 0.0004779925493290623, 'samples': 11220992, 'steps': 21915, 'loss/train': 1.781416893005371} +03/04/2022 15:12:32 - INFO - codeparrot_training - Step 21916: {'lr': 0.00047799037215157184, 'samples': 11221504, 'steps': 21916, 'loss/train': 1.6600096225738525} +03/04/2022 15:12:32 - INFO - codeparrot_training - Skipping example with length 788 (seq_length=1024) +03/04/2022 15:12:37 - INFO - codeparrot_training - Step 21917: {'lr': 0.0004779881948713524, 'samples': 11222016, 'steps': 21917, 'loss/train': 0.763556957244873} +03/04/2022 15:12:40 - INFO - codeparrot_training - Step 21918: {'lr': 0.000477986017488405, 'samples': 11222528, 'steps': 21918, 'loss/train': 1.7606571912765503} +03/04/2022 15:12:40 - INFO - codeparrot_training - Skipping example with length 706 (seq_length=1024) +03/04/2022 15:12:45 - INFO - codeparrot_training - Step 21919: {'lr': 0.00047798384000273053, 'samples': 11223040, 'steps': 21919, 'loss/train': 1.799996256828308} +03/04/2022 15:12:49 - INFO - codeparrot_training - Step 21920: {'lr': 0.0004779816624143302, 'samples': 11223552, 'steps': 21920, 'loss/train': 1.1845853328704834} +03/04/2022 15:12:49 - INFO - codeparrot_training - Skipping example with length 182 (seq_length=1024) +03/04/2022 15:12:54 - INFO - codeparrot_training - Step 21921: {'lr': 0.0004779794847232048, 'samples': 11224064, 'steps': 21921, 'loss/train': 0.5976961255073547} +03/04/2022 15:12:57 - INFO - codeparrot_training - Step 21922: {'lr': 0.0004779773069293554, 'samples': 11224576, 'steps': 21922, 'loss/train': 1.7079509496688843} +03/04/2022 15:12:57 - INFO - codeparrot_training - Skipping example with length 92 (seq_length=1024) +03/04/2022 15:13:02 - INFO - codeparrot_training - Step 21923: {'lr': 0.00047797512903278283, 'samples': 11225088, 'steps': 21923, 'loss/train': 1.7016901969909668} +03/04/2022 15:13:05 - INFO - codeparrot_training - Skipping example with length 55 (seq_length=1024) +03/04/2022 15:13:08 - INFO - codeparrot_training - Step 21924: {'lr': 0.0004779729510334883, 'samples': 11225600, 'steps': 21924, 'loss/train': 1.265492558479309} +03/04/2022 15:13:11 - INFO - codeparrot_training - Step 21925: {'lr': 0.0004779707729314726, 'samples': 11226112, 'steps': 21925, 'loss/train': 2.2427430152893066} +03/04/2022 15:13:14 - INFO - codeparrot_training - Skipping example with length 272 (seq_length=1024) +03/04/2022 15:13:16 - INFO - codeparrot_training - Step 21926: {'lr': 0.0004779685947267369, 'samples': 11226624, 'steps': 21926, 'loss/train': 1.3757342100143433} +03/04/2022 15:13:19 - INFO - codeparrot_training - Step 21927: {'lr': 0.00047796641641928195, 'samples': 11227136, 'steps': 21927, 'loss/train': 2.5644285678863525} +03/04/2022 15:13:22 - INFO - codeparrot_training - Skipping example with length 384 (seq_length=1024) +03/04/2022 15:13:25 - INFO - codeparrot_training - Step 21928: {'lr': 0.00047796423800910894, 'samples': 11227648, 'steps': 21928, 'loss/train': 2.0892622470855713} +03/04/2022 15:13:28 - INFO - codeparrot_training - Step 21929: {'lr': 0.00047796205949621873, 'samples': 11228160, 'steps': 21929, 'loss/train': 1.2484568357467651} +03/04/2022 15:13:31 - INFO - codeparrot_training - Step 21930: {'lr': 0.00047795988088061224, 'samples': 11228672, 'steps': 21930, 'loss/train': 2.413625955581665} +03/04/2022 15:13:31 - INFO - codeparrot_training - Skipping example with length 792 (seq_length=1024) +03/04/2022 15:13:36 - INFO - codeparrot_training - Step 21931: {'lr': 0.00047795770216229065, 'samples': 11229184, 'steps': 21931, 'loss/train': 2.2180533409118652} +03/04/2022 15:13:39 - INFO - codeparrot_training - Step 21932: {'lr': 0.0004779555233412548, 'samples': 11229696, 'steps': 21932, 'loss/train': 0.7903336882591248} +03/04/2022 15:13:40 - INFO - codeparrot_training - Skipping example with length 1019 (seq_length=1024) +03/04/2022 15:13:45 - INFO - codeparrot_training - Step 21933: {'lr': 0.0004779533444175058, 'samples': 11230208, 'steps': 21933, 'loss/train': 1.8956271409988403} +03/04/2022 15:13:48 - INFO - codeparrot_training - Step 21934: {'lr': 0.00047795116539104445, 'samples': 11230720, 'steps': 21934, 'loss/train': 1.2361618280410767} +03/04/2022 15:13:48 - INFO - codeparrot_training - Skipping example with length 963 (seq_length=1024) +03/04/2022 15:13:53 - INFO - codeparrot_training - Step 21935: {'lr': 0.0004779489862618718, 'samples': 11231232, 'steps': 21935, 'loss/train': 1.2130106687545776} +03/04/2022 15:13:57 - INFO - codeparrot_training - Step 21936: {'lr': 0.00047794680702998893, 'samples': 11231744, 'steps': 21936, 'loss/train': 1.6484390497207642} +03/04/2022 15:13:57 - INFO - codeparrot_training - Skipping example with length 150 (seq_length=1024) +03/04/2022 15:14:02 - INFO - codeparrot_training - Step 21937: {'lr': 0.0004779446276953967, 'samples': 11232256, 'steps': 21937, 'loss/train': 2.1189677715301514} +03/04/2022 15:14:05 - INFO - codeparrot_training - Step 21938: {'lr': 0.00047794244825809614, 'samples': 11232768, 'steps': 21938, 'loss/train': 2.193443536758423} +03/04/2022 15:14:05 - INFO - codeparrot_training - Skipping example with length 450 (seq_length=1024) +03/04/2022 15:14:10 - INFO - codeparrot_training - Step 21939: {'lr': 0.0004779402687180882, 'samples': 11233280, 'steps': 21939, 'loss/train': 1.525948405265808} +03/04/2022 15:14:13 - INFO - codeparrot_training - Step 21940: {'lr': 0.00047793808907537394, 'samples': 11233792, 'steps': 21940, 'loss/train': 1.7107340097427368} +03/04/2022 15:14:14 - INFO - codeparrot_training - Skipping example with length 75 (seq_length=1024) +03/04/2022 15:14:19 - INFO - codeparrot_training - Step 21941: {'lr': 0.0004779359093299543, 'samples': 11234304, 'steps': 21941, 'loss/train': 1.7578213214874268} +03/04/2022 15:14:22 - INFO - codeparrot_training - Step 21942: {'lr': 0.00047793372948183024, 'samples': 11234816, 'steps': 21942, 'loss/train': 1.7391029596328735} +03/04/2022 15:14:23 - INFO - codeparrot_training - Skipping example with length 952 (seq_length=1024) +03/04/2022 15:14:27 - INFO - codeparrot_training - Step 21943: {'lr': 0.0004779315495310027, 'samples': 11235328, 'steps': 21943, 'loss/train': 1.7528722286224365} +03/04/2022 15:14:30 - INFO - codeparrot_training - Step 21944: {'lr': 0.00047792936947747285, 'samples': 11235840, 'steps': 21944, 'loss/train': 2.034247636795044} +03/04/2022 15:14:31 - INFO - codeparrot_training - Skipping example with length 265 (seq_length=1024) +03/04/2022 15:14:35 - INFO - codeparrot_training - Step 21945: {'lr': 0.00047792718932124147, 'samples': 11236352, 'steps': 21945, 'loss/train': 1.9893953800201416} +03/04/2022 15:14:39 - INFO - codeparrot_training - Step 21946: {'lr': 0.00047792500906230963, 'samples': 11236864, 'steps': 21946, 'loss/train': 1.4049264192581177} +03/04/2022 15:14:39 - INFO - codeparrot_training - Skipping example with length 274 (seq_length=1024) +03/04/2022 15:14:44 - INFO - codeparrot_training - Step 21947: {'lr': 0.00047792282870067827, 'samples': 11237376, 'steps': 21947, 'loss/train': 2.1591737270355225} +03/04/2022 15:14:47 - INFO - codeparrot_training - Step 21948: {'lr': 0.0004779206482363484, 'samples': 11237888, 'steps': 21948, 'loss/train': 2.408374071121216} +03/04/2022 15:14:47 - INFO - codeparrot_training - Skipping example with length 676 (seq_length=1024) +03/04/2022 15:14:52 - INFO - codeparrot_training - Step 21949: {'lr': 0.000477918467669321, 'samples': 11238400, 'steps': 21949, 'loss/train': 3.3060591220855713} +03/04/2022 15:14:55 - INFO - codeparrot_training - Step 21950: {'lr': 0.0004779162869995971, 'samples': 11238912, 'steps': 21950, 'loss/train': 2.0688822269439697} +03/04/2022 15:14:56 - INFO - codeparrot_training - Skipping example with length 122 (seq_length=1024) +03/04/2022 15:15:01 - INFO - codeparrot_training - Step 21951: {'lr': 0.00047791410622717757, 'samples': 11239424, 'steps': 21951, 'loss/train': 2.2057876586914062} +03/04/2022 15:15:04 - INFO - codeparrot_training - Step 21952: {'lr': 0.0004779119253520635, 'samples': 11239936, 'steps': 21952, 'loss/train': 0.6143112182617188} +03/04/2022 15:15:04 - INFO - codeparrot_training - Skipping example with length 821 (seq_length=1024) +03/04/2022 15:15:09 - INFO - codeparrot_training - Step 21953: {'lr': 0.0004779097443742558, 'samples': 11240448, 'steps': 21953, 'loss/train': 1.707655668258667} +03/04/2022 15:15:12 - INFO - codeparrot_training - Step 21954: {'lr': 0.0004779075632937556, 'samples': 11240960, 'steps': 21954, 'loss/train': 1.8211047649383545} +03/04/2022 15:15:13 - INFO - codeparrot_training - Skipping example with length 487 (seq_length=1024) +03/04/2022 15:15:18 - INFO - codeparrot_training - Step 21955: {'lr': 0.00047790538211056366, 'samples': 11241472, 'steps': 21955, 'loss/train': 1.1796493530273438} +03/04/2022 15:15:21 - INFO - codeparrot_training - Step 21956: {'lr': 0.00047790320082468106, 'samples': 11241984, 'steps': 21956, 'loss/train': 1.7350280284881592} +03/04/2022 15:15:21 - INFO - codeparrot_training - Skipping example with length 82 (seq_length=1024) +03/04/2022 15:15:27 - INFO - codeparrot_training - Step 21957: {'lr': 0.00047790101943610884, 'samples': 11242496, 'steps': 21957, 'loss/train': 1.922612190246582} +03/04/2022 15:15:30 - INFO - codeparrot_training - Step 21958: {'lr': 0.000477898837944848, 'samples': 11243008, 'steps': 21958, 'loss/train': 2.4324440956115723} +03/04/2022 15:15:32 - INFO - codeparrot_training - Skipping example with length 177 (seq_length=1024) +03/04/2022 15:15:35 - INFO - codeparrot_training - Step 21959: {'lr': 0.0004778966563508994, 'samples': 11243520, 'steps': 21959, 'loss/train': 1.0910776853561401} +03/04/2022 15:15:38 - INFO - codeparrot_training - Step 21960: {'lr': 0.00047789447465426406, 'samples': 11244032, 'steps': 21960, 'loss/train': 1.9292227029800415} +03/04/2022 15:15:41 - INFO - codeparrot_training - Skipping example with length 50 (seq_length=1024) +03/04/2022 15:15:44 - INFO - codeparrot_training - Step 21961: {'lr': 0.000477892292854943, 'samples': 11244544, 'steps': 21961, 'loss/train': 2.1197433471679688} +03/04/2022 15:15:47 - INFO - codeparrot_training - Step 21962: {'lr': 0.00047789011095293723, 'samples': 11245056, 'steps': 21962, 'loss/train': 2.1343019008636475} +03/04/2022 15:15:49 - INFO - codeparrot_training - Skipping example with length 660 (seq_length=1024) +03/04/2022 15:15:52 - INFO - codeparrot_training - Step 21963: {'lr': 0.0004778879289482476, 'samples': 11245568, 'steps': 21963, 'loss/train': 1.8667480945587158} +03/04/2022 15:15:55 - INFO - codeparrot_training - Step 21964: {'lr': 0.00047788574684087527, 'samples': 11246080, 'steps': 21964, 'loss/train': 2.2299346923828125} +03/04/2022 15:15:59 - INFO - codeparrot_training - Step 21965: {'lr': 0.0004778835646308211, 'samples': 11246592, 'steps': 21965, 'loss/train': 0.9373946785926819} +03/04/2022 15:15:59 - INFO - codeparrot_training - Skipping example with length 157 (seq_length=1024) +03/04/2022 15:16:04 - INFO - codeparrot_training - Step 21966: {'lr': 0.0004778813823180861, 'samples': 11247104, 'steps': 21966, 'loss/train': 1.511555552482605} +03/04/2022 15:16:07 - INFO - codeparrot_training - Step 21967: {'lr': 0.0004778791999026713, 'samples': 11247616, 'steps': 21967, 'loss/train': 1.9830635786056519} +03/04/2022 15:16:07 - INFO - codeparrot_training - Skipping example with length 971 (seq_length=1024) +03/04/2022 15:16:12 - INFO - codeparrot_training - Step 21968: {'lr': 0.0004778770173845777, 'samples': 11248128, 'steps': 21968, 'loss/train': 2.013211727142334} +03/04/2022 15:16:16 - INFO - codeparrot_training - Step 21969: {'lr': 0.00047787483476380613, 'samples': 11248640, 'steps': 21969, 'loss/train': 2.1828744411468506} +03/04/2022 15:16:16 - INFO - codeparrot_training - Skipping example with length 356 (seq_length=1024) +03/04/2022 15:16:21 - INFO - codeparrot_training - Step 21970: {'lr': 0.0004778726520403577, 'samples': 11249152, 'steps': 21970, 'loss/train': 1.5246416330337524} +03/04/2022 15:16:24 - INFO - codeparrot_training - Step 21971: {'lr': 0.00047787046921423336, 'samples': 11249664, 'steps': 21971, 'loss/train': 1.9714858531951904} +03/04/2022 15:16:24 - INFO - codeparrot_training - Skipping example with length 406 (seq_length=1024) +03/04/2022 15:16:29 - INFO - codeparrot_training - Step 21972: {'lr': 0.00047786828628543416, 'samples': 11250176, 'steps': 21972, 'loss/train': 1.3408609628677368} +03/04/2022 15:16:32 - INFO - codeparrot_training - Step 21973: {'lr': 0.00047786610325396096, 'samples': 11250688, 'steps': 21973, 'loss/train': 1.411632776260376} +03/04/2022 15:16:33 - INFO - codeparrot_training - Skipping example with length 386 (seq_length=1024) +03/04/2022 15:16:38 - INFO - codeparrot_training - Step 21974: {'lr': 0.0004778639201198149, 'samples': 11251200, 'steps': 21974, 'loss/train': 1.6963574886322021} +03/04/2022 15:16:41 - INFO - codeparrot_training - Step 21975: {'lr': 0.00047786173688299684, 'samples': 11251712, 'steps': 21975, 'loss/train': 1.9533493518829346} +03/04/2022 15:16:41 - INFO - codeparrot_training - Skipping example with length 700 (seq_length=1024) +03/04/2022 15:16:46 - INFO - codeparrot_training - Step 21976: {'lr': 0.00047785955354350776, 'samples': 11252224, 'steps': 21976, 'loss/train': 1.78054678440094} +03/04/2022 15:16:49 - INFO - codeparrot_training - Step 21977: {'lr': 0.00047785737010134865, 'samples': 11252736, 'steps': 21977, 'loss/train': 1.3489006757736206} +03/04/2022 15:16:49 - INFO - codeparrot_training - Skipping example with length 384 (seq_length=1024) +03/04/2022 15:16:54 - INFO - codeparrot_training - Step 21978: {'lr': 0.0004778551865565206, 'samples': 11253248, 'steps': 21978, 'loss/train': 1.6467453241348267} +03/04/2022 15:16:58 - INFO - codeparrot_training - Step 21979: {'lr': 0.00047785300290902446, 'samples': 11253760, 'steps': 21979, 'loss/train': 2.0208308696746826} +03/04/2022 15:16:58 - INFO - codeparrot_training - Skipping example with length 877 (seq_length=1024) +03/04/2022 15:17:03 - INFO - codeparrot_training - Step 21980: {'lr': 0.0004778508191588613, 'samples': 11254272, 'steps': 21980, 'loss/train': 2.4197616577148438} +03/04/2022 15:17:06 - INFO - codeparrot_training - Step 21981: {'lr': 0.00047784863530603213, 'samples': 11254784, 'steps': 21981, 'loss/train': 1.2553106546401978} +03/04/2022 15:17:06 - INFO - codeparrot_training - Skipping example with length 614 (seq_length=1024) +03/04/2022 15:17:11 - INFO - codeparrot_training - Step 21982: {'lr': 0.0004778464513505378, 'samples': 11255296, 'steps': 21982, 'loss/train': 2.1986725330352783} +03/04/2022 15:17:14 - INFO - codeparrot_training - Step 21983: {'lr': 0.0004778442672923794, 'samples': 11255808, 'steps': 21983, 'loss/train': 1.7437392473220825} +03/04/2022 15:17:15 - INFO - codeparrot_training - Skipping example with length 132 (seq_length=1024) +03/04/2022 15:17:20 - INFO - codeparrot_training - Step 21984: {'lr': 0.0004778420831315579, 'samples': 11256320, 'steps': 21984, 'loss/train': 2.50854229927063} +03/04/2022 15:17:23 - INFO - codeparrot_training - Step 21985: {'lr': 0.0004778398988680743, 'samples': 11256832, 'steps': 21985, 'loss/train': 1.6756504774093628} +03/04/2022 15:17:23 - INFO - codeparrot_training - Skipping example with length 336 (seq_length=1024) +03/04/2022 15:17:28 - INFO - codeparrot_training - Step 21986: {'lr': 0.00047783771450192946, 'samples': 11257344, 'steps': 21986, 'loss/train': 2.267674684524536} +03/04/2022 15:17:31 - INFO - codeparrot_training - Step 21987: {'lr': 0.00047783553003312456, 'samples': 11257856, 'steps': 21987, 'loss/train': 2.1100542545318604} +03/04/2022 15:17:31 - INFO - codeparrot_training - Skipping example with length 633 (seq_length=1024) +03/04/2022 15:17:37 - INFO - codeparrot_training - Step 21988: {'lr': 0.00047783334546166046, 'samples': 11258368, 'steps': 21988, 'loss/train': 2.270996332168579} +03/04/2022 15:17:40 - INFO - codeparrot_training - Step 21989: {'lr': 0.0004778311607875382, 'samples': 11258880, 'steps': 21989, 'loss/train': 2.2060697078704834} +03/04/2022 15:17:40 - INFO - codeparrot_training - Skipping example with length 993 (seq_length=1024) +03/04/2022 15:17:45 - INFO - codeparrot_training - Step 21990: {'lr': 0.0004778289760107587, 'samples': 11259392, 'steps': 21990, 'loss/train': 1.3154963254928589} +03/04/2022 15:17:48 - INFO - codeparrot_training - Step 21991: {'lr': 0.00047782679113132293, 'samples': 11259904, 'steps': 21991, 'loss/train': 1.5757817029953003} +03/04/2022 15:17:49 - INFO - codeparrot_training - Skipping example with length 682 (seq_length=1024) +03/04/2022 15:17:54 - INFO - codeparrot_training - Step 21992: {'lr': 0.00047782460614923195, 'samples': 11260416, 'steps': 21992, 'loss/train': 2.063816547393799} +03/04/2022 15:17:57 - INFO - codeparrot_training - Step 21993: {'lr': 0.00047782242106448675, 'samples': 11260928, 'steps': 21993, 'loss/train': 1.6355797052383423} +03/04/2022 15:18:00 - INFO - codeparrot_training - Step 21994: {'lr': 0.00047782023587708826, 'samples': 11261440, 'steps': 21994, 'loss/train': 2.315589666366577} +03/04/2022 15:18:00 - INFO - codeparrot_training - Skipping example with length 1014 (seq_length=1024) +03/04/2022 15:18:06 - INFO - codeparrot_training - Step 21995: {'lr': 0.0004778180505870375, 'samples': 11261952, 'steps': 21995, 'loss/train': 1.960210919380188} +03/04/2022 15:18:09 - INFO - codeparrot_training - Step 21996: {'lr': 0.0004778158651943355, 'samples': 11262464, 'steps': 21996, 'loss/train': 2.1770856380462646} +03/04/2022 15:18:09 - INFO - codeparrot_training - Skipping example with length 659 (seq_length=1024) +03/04/2022 15:18:14 - INFO - codeparrot_training - Step 21997: {'lr': 0.0004778136796989831, 'samples': 11262976, 'steps': 21997, 'loss/train': 1.9209784269332886} +03/04/2022 15:18:17 - INFO - codeparrot_training - Step 21998: {'lr': 0.0004778114941009814, 'samples': 11263488, 'steps': 21998, 'loss/train': 1.9639023542404175} +03/04/2022 15:18:17 - INFO - codeparrot_training - Skipping example with length 889 (seq_length=1024) +03/04/2022 15:18:22 - INFO - codeparrot_training - Step 21999: {'lr': 0.0004778093084003313, 'samples': 11264000, 'steps': 21999, 'loss/train': 1.5556331872940063} +03/04/2022 15:18:26 - INFO - codeparrot_training - Step 22000: {'lr': 0.00047780712259703394, 'samples': 11264512, 'steps': 22000, 'loss/train': 1.489432454109192} +03/04/2022 15:18:26 - INFO - codeparrot_training - Skipping example with length 718 (seq_length=1024) +03/04/2022 15:18:31 - INFO - codeparrot_training - Step 22001: {'lr': 0.00047780493669109017, 'samples': 11265024, 'steps': 22001, 'loss/train': 2.092963933944702} +03/04/2022 15:18:34 - INFO - codeparrot_training - Step 22002: {'lr': 0.000477802750682501, 'samples': 11265536, 'steps': 22002, 'loss/train': 1.696277379989624} +03/04/2022 15:18:34 - INFO - codeparrot_training - Skipping example with length 205 (seq_length=1024) +03/04/2022 15:18:40 - INFO - codeparrot_training - Step 22003: {'lr': 0.0004778005645712674, 'samples': 11266048, 'steps': 22003, 'loss/train': 2.6606054306030273} +03/04/2022 15:18:43 - INFO - codeparrot_training - Step 22004: {'lr': 0.00047779837835739043, 'samples': 11266560, 'steps': 22004, 'loss/train': 0.653022050857544} +03/04/2022 15:18:43 - INFO - codeparrot_training - Skipping example with length 372 (seq_length=1024) +03/04/2022 15:18:48 - INFO - codeparrot_training - Step 22005: {'lr': 0.000477796192040871, 'samples': 11267072, 'steps': 22005, 'loss/train': 1.668879747390747} +03/04/2022 15:18:51 - INFO - codeparrot_training - Step 22006: {'lr': 0.00047779400562171016, 'samples': 11267584, 'steps': 22006, 'loss/train': 1.1293333768844604} +03/04/2022 15:18:51 - INFO - codeparrot_training - Skipping example with length 647 (seq_length=1024) +03/04/2022 15:18:57 - INFO - codeparrot_training - Step 22007: {'lr': 0.00047779181909990876, 'samples': 11268096, 'steps': 22007, 'loss/train': 2.0693626403808594} +03/04/2022 15:19:00 - INFO - codeparrot_training - Step 22008: {'lr': 0.000477789632475468, 'samples': 11268608, 'steps': 22008, 'loss/train': 1.37947416305542} +03/04/2022 15:19:00 - INFO - codeparrot_training - Skipping example with length 617 (seq_length=1024) +03/04/2022 15:19:05 - INFO - codeparrot_training - Step 22009: {'lr': 0.00047778744574838864, 'samples': 11269120, 'steps': 22009, 'loss/train': 1.8734935522079468} +03/04/2022 15:19:08 - INFO - codeparrot_training - Step 22010: {'lr': 0.00047778525891867187, 'samples': 11269632, 'steps': 22010, 'loss/train': 1.6540238857269287} +03/04/2022 15:19:08 - INFO - codeparrot_training - Skipping example with length 781 (seq_length=1024) +03/04/2022 15:19:13 - INFO - codeparrot_training - Step 22011: {'lr': 0.00047778307198631856, 'samples': 11270144, 'steps': 22011, 'loss/train': 1.8097188472747803} +03/04/2022 15:19:16 - INFO - codeparrot_training - Skipping example with length 616 (seq_length=1024) +03/04/2022 15:19:19 - INFO - codeparrot_training - Step 22012: {'lr': 0.00047778088495132963, 'samples': 11270656, 'steps': 22012, 'loss/train': 1.5720757246017456} +03/04/2022 15:19:22 - INFO - codeparrot_training - Step 22013: {'lr': 0.0004777786978137062, 'samples': 11271168, 'steps': 22013, 'loss/train': 1.8225023746490479} +03/04/2022 15:19:25 - INFO - codeparrot_training - Step 22014: {'lr': 0.00047777651057344915, 'samples': 11271680, 'steps': 22014, 'loss/train': 1.7627143859863281} +03/04/2022 15:19:27 - INFO - codeparrot_training - Skipping example with length 566 (seq_length=1024) +03/04/2022 15:19:31 - INFO - codeparrot_training - Step 22015: {'lr': 0.0004777743232305596, 'samples': 11272192, 'steps': 22015, 'loss/train': 1.4306069612503052} +03/04/2022 15:19:34 - INFO - codeparrot_training - Step 22016: {'lr': 0.00047777213578503844, 'samples': 11272704, 'steps': 22016, 'loss/train': 1.5909779071807861} +03/04/2022 15:19:36 - INFO - codeparrot_training - Skipping example with length 525 (seq_length=1024) +03/04/2022 15:19:39 - INFO - codeparrot_training - Step 22017: {'lr': 0.0004777699482368867, 'samples': 11273216, 'steps': 22017, 'loss/train': 0.946075439453125} +03/04/2022 15:19:42 - INFO - codeparrot_training - Step 22018: {'lr': 0.00047776776058610525, 'samples': 11273728, 'steps': 22018, 'loss/train': 1.881159782409668} +03/04/2022 15:19:44 - INFO - codeparrot_training - Skipping example with length 300 (seq_length=1024) +03/04/2022 15:19:48 - INFO - codeparrot_training - Step 22019: {'lr': 0.0004777655728326952, 'samples': 11274240, 'steps': 22019, 'loss/train': 1.3619402647018433} +03/04/2022 15:19:51 - INFO - codeparrot_training - Step 22020: {'lr': 0.0004777633849766575, 'samples': 11274752, 'steps': 22020, 'loss/train': 1.4718782901763916} +03/04/2022 15:19:52 - INFO - codeparrot_training - Skipping example with length 1004 (seq_length=1024) +03/04/2022 15:19:56 - INFO - codeparrot_training - Step 22021: {'lr': 0.00047776119701799317, 'samples': 11275264, 'steps': 22021, 'loss/train': 1.8714056015014648} +03/04/2022 15:19:59 - INFO - codeparrot_training - Step 22022: {'lr': 0.0004777590089567031, 'samples': 11275776, 'steps': 22022, 'loss/train': 1.5571768283843994} +03/04/2022 15:20:01 - INFO - codeparrot_training - Skipping example with length 417 (seq_length=1024) +03/04/2022 15:20:05 - INFO - codeparrot_training - Step 22023: {'lr': 0.00047775682079278836, 'samples': 11276288, 'steps': 22023, 'loss/train': 1.7106083631515503} +03/04/2022 15:20:08 - INFO - codeparrot_training - Step 22024: {'lr': 0.0004777546325262499, 'samples': 11276800, 'steps': 22024, 'loss/train': 2.4517662525177} +03/04/2022 15:20:09 - INFO - codeparrot_training - Skipping example with length 900 (seq_length=1024) +03/04/2022 15:20:13 - INFO - codeparrot_training - Step 22025: {'lr': 0.00047775244415708873, 'samples': 11277312, 'steps': 22025, 'loss/train': 1.987546443939209} +03/04/2022 15:20:16 - INFO - codeparrot_training - Step 22026: {'lr': 0.0004777502556853058, 'samples': 11277824, 'steps': 22026, 'loss/train': 2.0129141807556152} +03/04/2022 15:20:18 - INFO - codeparrot_training - Skipping example with length 241 (seq_length=1024) +03/04/2022 15:20:22 - INFO - codeparrot_training - Step 22027: {'lr': 0.00047774806711090213, 'samples': 11278336, 'steps': 22027, 'loss/train': 2.078073024749756} +03/04/2022 15:20:25 - INFO - codeparrot_training - Step 22028: {'lr': 0.0004777458784338787, 'samples': 11278848, 'steps': 22028, 'loss/train': 1.2995330095291138} +03/04/2022 15:20:27 - INFO - codeparrot_training - Skipping example with length 14 (seq_length=1024) +03/04/2022 15:20:30 - INFO - codeparrot_training - Step 22029: {'lr': 0.00047774368965423653, 'samples': 11279360, 'steps': 22029, 'loss/train': 2.0523064136505127} +03/04/2022 15:20:33 - INFO - codeparrot_training - Step 22030: {'lr': 0.0004777415007719765, 'samples': 11279872, 'steps': 22030, 'loss/train': 1.2059636116027832} +03/04/2022 15:20:35 - INFO - codeparrot_training - Skipping example with length 604 (seq_length=1024) +03/04/2022 15:20:38 - INFO - codeparrot_training - Step 22031: {'lr': 0.00047773931178709975, 'samples': 11280384, 'steps': 22031, 'loss/train': 2.255063533782959} +03/04/2022 15:20:42 - INFO - codeparrot_training - Step 22032: {'lr': 0.00047773712269960714, 'samples': 11280896, 'steps': 22032, 'loss/train': 0.68230140209198} +03/04/2022 15:20:43 - INFO - codeparrot_training - Skipping example with length 301 (seq_length=1024) +03/04/2022 15:20:47 - INFO - codeparrot_training - Step 22033: {'lr': 0.00047773493350949963, 'samples': 11281408, 'steps': 22033, 'loss/train': 1.1164714097976685} +03/04/2022 15:20:50 - INFO - codeparrot_training - Step 22034: {'lr': 0.00047773274421677834, 'samples': 11281920, 'steps': 22034, 'loss/train': 2.594212532043457} +03/04/2022 15:20:51 - INFO - codeparrot_training - Skipping example with length 996 (seq_length=1024) +03/04/2022 15:20:55 - INFO - codeparrot_training - Step 22035: {'lr': 0.0004777305548214442, 'samples': 11282432, 'steps': 22035, 'loss/train': 1.8005529642105103} +03/04/2022 15:20:58 - INFO - codeparrot_training - Step 22036: {'lr': 0.0004777283653234982, 'samples': 11282944, 'steps': 22036, 'loss/train': 1.9482887983322144} +03/04/2022 15:21:00 - INFO - codeparrot_training - Skipping example with length 450 (seq_length=1024) +03/04/2022 15:21:04 - INFO - codeparrot_training - Step 22037: {'lr': 0.00047772617572294123, 'samples': 11283456, 'steps': 22037, 'loss/train': 2.0005550384521484} +03/04/2022 15:21:07 - INFO - codeparrot_training - Step 22038: {'lr': 0.0004777239860197744, 'samples': 11283968, 'steps': 22038, 'loss/train': 1.4353426694869995} +03/04/2022 15:21:08 - INFO - codeparrot_training - Skipping example with length 429 (seq_length=1024) +03/04/2022 15:21:12 - INFO - codeparrot_training - Step 22039: {'lr': 0.0004777217962139987, 'samples': 11284480, 'steps': 22039, 'loss/train': 2.328906297683716} +03/04/2022 15:21:15 - INFO - codeparrot_training - Step 22040: {'lr': 0.000477719606305615, 'samples': 11284992, 'steps': 22040, 'loss/train': 1.821467638015747} +03/04/2022 15:21:16 - INFO - codeparrot_training - Skipping example with length 218 (seq_length=1024) +03/04/2022 15:21:21 - INFO - codeparrot_training - Step 22041: {'lr': 0.0004777174162946244, 'samples': 11285504, 'steps': 22041, 'loss/train': 2.6764228343963623} +03/04/2022 15:21:24 - INFO - codeparrot_training - Step 22042: {'lr': 0.0004777152261810279, 'samples': 11286016, 'steps': 22042, 'loss/train': 2.2285783290863037} +03/04/2022 15:21:27 - INFO - codeparrot_training - Step 22043: {'lr': 0.0004777130359648263, 'samples': 11286528, 'steps': 22043, 'loss/train': 2.2540347576141357} +03/04/2022 15:21:27 - INFO - codeparrot_training - Skipping example with length 715 (seq_length=1024) +03/04/2022 15:21:33 - INFO - codeparrot_training - Step 22044: {'lr': 0.0004777108456460208, 'samples': 11287040, 'steps': 22044, 'loss/train': 1.9135873317718506} +03/04/2022 15:21:36 - INFO - codeparrot_training - Step 22045: {'lr': 0.00047770865522461233, 'samples': 11287552, 'steps': 22045, 'loss/train': 1.4600987434387207} +03/04/2022 15:21:36 - INFO - codeparrot_training - Skipping example with length 180 (seq_length=1024) +03/04/2022 15:21:41 - INFO - codeparrot_training - Step 22046: {'lr': 0.0004777064647006018, 'samples': 11288064, 'steps': 22046, 'loss/train': 2.368587017059326} +03/04/2022 15:21:44 - INFO - codeparrot_training - Step 22047: {'lr': 0.0004777042740739903, 'samples': 11288576, 'steps': 22047, 'loss/train': 1.941925048828125} +03/04/2022 15:21:44 - INFO - codeparrot_training - Skipping example with length 147 (seq_length=1024) +03/04/2022 15:21:49 - INFO - codeparrot_training - Step 22048: {'lr': 0.0004777020833447787, 'samples': 11289088, 'steps': 22048, 'loss/train': 1.3183236122131348} +03/04/2022 15:21:53 - INFO - codeparrot_training - Step 22049: {'lr': 0.0004776998925129681, 'samples': 11289600, 'steps': 22049, 'loss/train': 1.8844407796859741} +03/04/2022 15:21:53 - INFO - codeparrot_training - Skipping example with length 953 (seq_length=1024) +03/04/2022 15:21:58 - INFO - codeparrot_training - Step 22050: {'lr': 0.0004776977015785595, 'samples': 11290112, 'steps': 22050, 'loss/train': 0.652808427810669} +03/04/2022 15:22:01 - INFO - codeparrot_training - Step 22051: {'lr': 0.0004776955105415537, 'samples': 11290624, 'steps': 22051, 'loss/train': 2.3172860145568848} +03/04/2022 15:22:01 - INFO - codeparrot_training - Skipping example with length 365 (seq_length=1024) +03/04/2022 15:22:06 - INFO - codeparrot_training - Step 22052: {'lr': 0.00047769331940195194, 'samples': 11291136, 'steps': 22052, 'loss/train': 2.280590057373047} +03/04/2022 15:22:09 - INFO - codeparrot_training - Skipping example with length 184 (seq_length=1024) +03/04/2022 15:22:12 - INFO - codeparrot_training - Step 22053: {'lr': 0.00047769112815975503, 'samples': 11291648, 'steps': 22053, 'loss/train': 2.7132418155670166} +03/04/2022 15:22:15 - INFO - codeparrot_training - Step 22054: {'lr': 0.00047768893681496397, 'samples': 11292160, 'steps': 22054, 'loss/train': 1.6149077415466309} +03/04/2022 15:22:18 - INFO - codeparrot_training - Skipping example with length 23 (seq_length=1024) +03/04/2022 15:22:20 - INFO - codeparrot_training - Step 22055: {'lr': 0.00047768674536757984, 'samples': 11292672, 'steps': 22055, 'loss/train': 1.3395129442214966} +03/04/2022 15:22:23 - INFO - codeparrot_training - Step 22056: {'lr': 0.00047768455381760357, 'samples': 11293184, 'steps': 22056, 'loss/train': 1.3069261312484741} +03/04/2022 15:22:26 - INFO - codeparrot_training - Skipping example with length 536 (seq_length=1024) +03/04/2022 15:22:28 - INFO - codeparrot_training - Step 22057: {'lr': 0.00047768236216503613, 'samples': 11293696, 'steps': 22057, 'loss/train': 1.9505176544189453} +03/04/2022 15:22:32 - INFO - codeparrot_training - Step 22058: {'lr': 0.00047768017040987856, 'samples': 11294208, 'steps': 22058, 'loss/train': 3.4990997314453125} +03/04/2022 15:22:34 - INFO - codeparrot_training - Skipping example with length 690 (seq_length=1024) +03/04/2022 15:22:37 - INFO - codeparrot_training - Step 22059: {'lr': 0.0004776779785521318, 'samples': 11294720, 'steps': 22059, 'loss/train': 1.1544699668884277} +03/04/2022 15:22:40 - INFO - codeparrot_training - Step 22060: {'lr': 0.0004776757865917969, 'samples': 11295232, 'steps': 22060, 'loss/train': 0.9072554111480713} +03/04/2022 15:22:42 - INFO - codeparrot_training - Skipping example with length 952 (seq_length=1024) +03/04/2022 15:22:45 - INFO - codeparrot_training - Step 22061: {'lr': 0.0004776735945288747, 'samples': 11295744, 'steps': 22061, 'loss/train': 2.2614681720733643} +03/04/2022 15:22:49 - INFO - codeparrot_training - Step 22062: {'lr': 0.00047767140236336635, 'samples': 11296256, 'steps': 22062, 'loss/train': 2.565476179122925} +03/04/2022 15:22:51 - INFO - codeparrot_training - Skipping example with length 589 (seq_length=1024) +03/04/2022 15:22:54 - INFO - codeparrot_training - Step 22063: {'lr': 0.00047766921009527284, 'samples': 11296768, 'steps': 22063, 'loss/train': 2.4209182262420654} +03/04/2022 15:22:57 - INFO - codeparrot_training - Step 22064: {'lr': 0.00047766701772459505, 'samples': 11297280, 'steps': 22064, 'loss/train': 1.689549207687378} +03/04/2022 15:22:59 - INFO - codeparrot_training - Skipping example with length 914 (seq_length=1024) +03/04/2022 15:23:02 - INFO - codeparrot_training - Step 22065: {'lr': 0.00047766482525133405, 'samples': 11297792, 'steps': 22065, 'loss/train': 1.6977654695510864} +03/04/2022 15:23:06 - INFO - codeparrot_training - Step 22066: {'lr': 0.00047766263267549073, 'samples': 11298304, 'steps': 22066, 'loss/train': 2.003960609436035} +03/04/2022 15:23:08 - INFO - codeparrot_training - Skipping example with length 539 (seq_length=1024) +03/04/2022 15:23:11 - INFO - codeparrot_training - Step 22067: {'lr': 0.0004776604399970661, 'samples': 11298816, 'steps': 22067, 'loss/train': 2.128910779953003} +03/04/2022 15:23:14 - INFO - codeparrot_training - Step 22068: {'lr': 0.0004776582472160613, 'samples': 11299328, 'steps': 22068, 'loss/train': 1.5377613306045532} +03/04/2022 15:23:16 - INFO - codeparrot_training - Skipping example with length 766 (seq_length=1024) +03/04/2022 15:23:19 - INFO - codeparrot_training - Step 22069: {'lr': 0.0004776560543324772, 'samples': 11299840, 'steps': 22069, 'loss/train': 3.7584848403930664} +03/04/2022 15:23:22 - INFO - codeparrot_training - Step 22070: {'lr': 0.0004776538613463147, 'samples': 11300352, 'steps': 22070, 'loss/train': 0.5112701058387756} +03/04/2022 15:23:24 - INFO - codeparrot_training - Skipping example with length 508 (seq_length=1024) +03/04/2022 15:23:28 - INFO - codeparrot_training - Step 22071: {'lr': 0.00047765166825757487, 'samples': 11300864, 'steps': 22071, 'loss/train': 1.6936335563659668} +03/04/2022 15:23:31 - INFO - codeparrot_training - Step 22072: {'lr': 0.00047764947506625887, 'samples': 11301376, 'steps': 22072, 'loss/train': 1.8575464487075806} +03/04/2022 15:23:33 - INFO - codeparrot_training - Skipping example with length 998 (seq_length=1024) +03/04/2022 15:23:36 - INFO - codeparrot_training - Step 22073: {'lr': 0.00047764728177236736, 'samples': 11301888, 'steps': 22073, 'loss/train': 2.0432019233703613} +03/04/2022 15:23:39 - INFO - codeparrot_training - Step 22074: {'lr': 0.0004776450883759016, 'samples': 11302400, 'steps': 22074, 'loss/train': 1.9877384901046753} +03/04/2022 15:23:41 - INFO - codeparrot_training - Skipping example with length 413 (seq_length=1024) +03/04/2022 15:23:45 - INFO - codeparrot_training - Step 22075: {'lr': 0.0004776428948768625, 'samples': 11302912, 'steps': 22075, 'loss/train': 2.1814913749694824} +03/04/2022 15:23:48 - INFO - codeparrot_training - Step 22076: {'lr': 0.00047764070127525096, 'samples': 11303424, 'steps': 22076, 'loss/train': 1.649192214012146} +03/04/2022 15:23:50 - INFO - codeparrot_training - Skipping example with length 453 (seq_length=1024) +03/04/2022 15:23:53 - INFO - codeparrot_training - Step 22077: {'lr': 0.00047763850757106803, 'samples': 11303936, 'steps': 22077, 'loss/train': 1.643643856048584} +03/04/2022 15:23:56 - INFO - codeparrot_training - Step 22078: {'lr': 0.0004776363137643147, 'samples': 11304448, 'steps': 22078, 'loss/train': 1.4457054138183594} +03/04/2022 15:23:59 - INFO - codeparrot_training - Skipping example with length 246 (seq_length=1024) +03/04/2022 15:24:02 - INFO - codeparrot_training - Step 22079: {'lr': 0.000477634119854992, 'samples': 11304960, 'steps': 22079, 'loss/train': 0.4887719750404358} +03/04/2022 15:24:05 - INFO - codeparrot_training - Step 22080: {'lr': 0.00047763192584310087, 'samples': 11305472, 'steps': 22080, 'loss/train': 2.061272144317627} +03/04/2022 15:24:09 - INFO - codeparrot_training - Step 22081: {'lr': 0.0004776297317286423, 'samples': 11305984, 'steps': 22081, 'loss/train': 2.3910043239593506} +03/04/2022 15:24:10 - INFO - codeparrot_training - Skipping example with length 484 (seq_length=1024) +03/04/2022 15:24:14 - INFO - codeparrot_training - Step 22082: {'lr': 0.00047762753751161725, 'samples': 11306496, 'steps': 22082, 'loss/train': 2.178981065750122} +03/04/2022 15:24:17 - INFO - codeparrot_training - Step 22083: {'lr': 0.0004776253431920268, 'samples': 11307008, 'steps': 22083, 'loss/train': 2.106750249862671} +03/04/2022 15:24:21 - INFO - codeparrot_training - Step 22084: {'lr': 0.00047762314876987185, 'samples': 11307520, 'steps': 22084, 'loss/train': 1.6975114345550537} +03/04/2022 15:24:21 - INFO - codeparrot_training - Skipping example with length 257 (seq_length=1024) +03/04/2022 15:24:26 - INFO - codeparrot_training - Step 22085: {'lr': 0.0004776209542451534, 'samples': 11308032, 'steps': 22085, 'loss/train': 1.9828957319259644} +03/04/2022 15:24:29 - INFO - codeparrot_training - Step 22086: {'lr': 0.0004776187596178725, 'samples': 11308544, 'steps': 22086, 'loss/train': 2.6098997592926025} +03/04/2022 15:24:29 - INFO - codeparrot_training - Skipping example with length 916 (seq_length=1024) +03/04/2022 15:24:34 - INFO - codeparrot_training - Step 22087: {'lr': 0.00047761656488803006, 'samples': 11309056, 'steps': 22087, 'loss/train': 1.83033287525177} +03/04/2022 15:24:38 - INFO - codeparrot_training - Step 22088: {'lr': 0.00047761437005562716, 'samples': 11309568, 'steps': 22088, 'loss/train': 1.8869757652282715} +03/04/2022 15:24:38 - INFO - codeparrot_training - Skipping example with length 459 (seq_length=1024) +03/04/2022 15:24:43 - INFO - codeparrot_training - Step 22089: {'lr': 0.00047761217512066475, 'samples': 11310080, 'steps': 22089, 'loss/train': 1.8974472284317017} +03/04/2022 15:24:46 - INFO - codeparrot_training - Step 22090: {'lr': 0.0004776099800831437, 'samples': 11310592, 'steps': 22090, 'loss/train': 1.6315171718597412} +03/04/2022 15:24:46 - INFO - codeparrot_training - Skipping example with length 974 (seq_length=1024) +03/04/2022 15:24:51 - INFO - codeparrot_training - Step 22091: {'lr': 0.0004776077849430652, 'samples': 11311104, 'steps': 22091, 'loss/train': 2.0767128467559814} +03/04/2022 15:24:54 - INFO - codeparrot_training - Skipping example with length 113 (seq_length=1024) +03/04/2022 15:24:56 - INFO - codeparrot_training - Step 22092: {'lr': 0.0004776055897004301, 'samples': 11311616, 'steps': 22092, 'loss/train': 1.8744815587997437} +03/04/2022 15:25:00 - INFO - codeparrot_training - Step 22093: {'lr': 0.0004776033943552395, 'samples': 11312128, 'steps': 22093, 'loss/train': 0.7583313584327698} +03/04/2022 15:25:02 - INFO - codeparrot_training - Skipping example with length 127 (seq_length=1024) +03/04/2022 15:25:05 - INFO - codeparrot_training - Step 22094: {'lr': 0.0004776011989074943, 'samples': 11312640, 'steps': 22094, 'loss/train': 1.9622167348861694} +03/04/2022 15:25:08 - INFO - codeparrot_training - Step 22095: {'lr': 0.00047759900335719543, 'samples': 11313152, 'steps': 22095, 'loss/train': 2.2771923542022705} +03/04/2022 15:25:11 - INFO - codeparrot_training - Skipping example with length 500 (seq_length=1024) +03/04/2022 15:25:13 - INFO - codeparrot_training - Step 22096: {'lr': 0.00047759680770434405, 'samples': 11313664, 'steps': 22096, 'loss/train': 2.0840837955474854} +03/04/2022 15:25:16 - INFO - codeparrot_training - Step 22097: {'lr': 0.00047759461194894103, 'samples': 11314176, 'steps': 22097, 'loss/train': 1.9387022256851196} +03/04/2022 15:25:19 - INFO - codeparrot_training - Skipping example with length 234 (seq_length=1024) +03/04/2022 15:25:22 - INFO - codeparrot_training - Step 22098: {'lr': 0.00047759241609098734, 'samples': 11314688, 'steps': 22098, 'loss/train': 1.8203294277191162} +03/04/2022 15:25:25 - INFO - codeparrot_training - Step 22099: {'lr': 0.00047759022013048417, 'samples': 11315200, 'steps': 22099, 'loss/train': 1.6407618522644043} +03/04/2022 15:25:28 - INFO - codeparrot_training - Step 22100: {'lr': 0.00047758802406743217, 'samples': 11315712, 'steps': 22100, 'loss/train': 1.1392550468444824} +03/04/2022 15:25:28 - INFO - codeparrot_training - Skipping example with length 58 (seq_length=1024) +03/04/2022 15:25:33 - INFO - codeparrot_training - Step 22101: {'lr': 0.0004775858279018326, 'samples': 11316224, 'steps': 22101, 'loss/train': 1.2836378812789917} +03/04/2022 15:25:37 - INFO - codeparrot_training - Step 22102: {'lr': 0.0004775836316336864, 'samples': 11316736, 'steps': 22102, 'loss/train': 1.9055947065353394} +03/04/2022 15:25:37 - INFO - codeparrot_training - Skipping example with length 861 (seq_length=1024) +03/04/2022 15:25:42 - INFO - codeparrot_training - Step 22103: {'lr': 0.00047758143526299446, 'samples': 11317248, 'steps': 22103, 'loss/train': 1.8054033517837524} +03/04/2022 15:25:45 - INFO - codeparrot_training - Step 22104: {'lr': 0.0004775792387897579, 'samples': 11317760, 'steps': 22104, 'loss/train': 2.1396689414978027} +03/04/2022 15:25:45 - INFO - codeparrot_training - Skipping example with length 883 (seq_length=1024) +03/04/2022 15:25:50 - INFO - codeparrot_training - Step 22105: {'lr': 0.0004775770422139776, 'samples': 11318272, 'steps': 22105, 'loss/train': 2.4275412559509277} +03/04/2022 15:25:54 - INFO - codeparrot_training - Step 22106: {'lr': 0.00047757484553565465, 'samples': 11318784, 'steps': 22106, 'loss/train': 1.9097182750701904} +03/04/2022 15:25:54 - INFO - codeparrot_training - Skipping example with length 935 (seq_length=1024) +03/04/2022 15:25:59 - INFO - codeparrot_training - Step 22107: {'lr': 0.00047757264875478996, 'samples': 11319296, 'steps': 22107, 'loss/train': 2.1254944801330566} +03/04/2022 15:26:02 - INFO - codeparrot_training - Step 22108: {'lr': 0.0004775704518713845, 'samples': 11319808, 'steps': 22108, 'loss/train': 1.6625523567199707} +03/04/2022 15:26:02 - INFO - codeparrot_training - Skipping example with length 789 (seq_length=1024) +03/04/2022 15:26:07 - INFO - codeparrot_training - Step 22109: {'lr': 0.0004775682548854394, 'samples': 11320320, 'steps': 22109, 'loss/train': 1.638593316078186} +03/04/2022 15:26:10 - INFO - codeparrot_training - Skipping example with length 870 (seq_length=1024) +03/04/2022 15:26:12 - INFO - codeparrot_training - Step 22110: {'lr': 0.0004775660577969555, 'samples': 11320832, 'steps': 22110, 'loss/train': 1.3889845609664917} +03/04/2022 15:26:16 - INFO - codeparrot_training - Step 22111: {'lr': 0.0004775638606059338, 'samples': 11321344, 'steps': 22111, 'loss/train': 1.2430495023727417} +03/04/2022 15:26:18 - INFO - codeparrot_training - Skipping example with length 666 (seq_length=1024) +03/04/2022 15:26:21 - INFO - codeparrot_training - Step 22112: {'lr': 0.00047756166331237545, 'samples': 11321856, 'steps': 22112, 'loss/train': 2.8328857421875} +03/04/2022 15:26:24 - INFO - codeparrot_training - Step 22113: {'lr': 0.00047755946591628126, 'samples': 11322368, 'steps': 22113, 'loss/train': 1.9316152334213257} +03/04/2022 15:26:27 - INFO - codeparrot_training - Skipping example with length 476 (seq_length=1024) +03/04/2022 15:26:29 - INFO - codeparrot_training - Step 22114: {'lr': 0.00047755726841765224, 'samples': 11322880, 'steps': 22114, 'loss/train': 3.223008155822754} +03/04/2022 15:26:33 - INFO - codeparrot_training - Step 22115: {'lr': 0.0004775550708164895, 'samples': 11323392, 'steps': 22115, 'loss/train': 1.8795032501220703} +03/04/2022 15:26:36 - INFO - codeparrot_training - Step 22116: {'lr': 0.00047755287311279394, 'samples': 11323904, 'steps': 22116, 'loss/train': 1.7812528610229492} +03/04/2022 15:26:36 - INFO - codeparrot_training - Skipping example with length 197 (seq_length=1024) +03/04/2022 15:26:41 - INFO - codeparrot_training - Step 22117: {'lr': 0.00047755067530656656, 'samples': 11324416, 'steps': 22117, 'loss/train': 1.8688631057739258} +03/04/2022 15:26:44 - INFO - codeparrot_training - Step 22118: {'lr': 0.00047754847739780835, 'samples': 11324928, 'steps': 22118, 'loss/train': 1.6323686838150024} +03/04/2022 15:26:44 - INFO - codeparrot_training - Skipping example with length 893 (seq_length=1024) +03/04/2022 15:26:49 - INFO - codeparrot_training - Step 22119: {'lr': 0.0004775462793865203, 'samples': 11325440, 'steps': 22119, 'loss/train': 1.9232386350631714} +03/04/2022 15:26:53 - INFO - codeparrot_training - Step 22120: {'lr': 0.00047754408127270346, 'samples': 11325952, 'steps': 22120, 'loss/train': 2.177112102508545} +03/04/2022 15:26:53 - INFO - codeparrot_training - Skipping example with length 630 (seq_length=1024) +03/04/2022 15:26:58 - INFO - codeparrot_training - Step 22121: {'lr': 0.0004775418830563587, 'samples': 11326464, 'steps': 22121, 'loss/train': 1.774481177330017} +03/04/2022 15:27:01 - INFO - codeparrot_training - Step 22122: {'lr': 0.0004775396847374871, 'samples': 11326976, 'steps': 22122, 'loss/train': 2.301882743835449} +03/04/2022 15:27:02 - INFO - codeparrot_training - Skipping example with length 691 (seq_length=1024) +03/04/2022 15:27:07 - INFO - codeparrot_training - Step 22123: {'lr': 0.0004775374863160896, 'samples': 11327488, 'steps': 22123, 'loss/train': 2.515514612197876} +03/04/2022 15:27:10 - INFO - codeparrot_training - Step 22124: {'lr': 0.0004775352877921673, 'samples': 11328000, 'steps': 22124, 'loss/train': 2.440469264984131} +03/04/2022 15:27:11 - INFO - codeparrot_training - Skipping example with length 184 (seq_length=1024) +03/04/2022 15:27:15 - INFO - codeparrot_training - Step 22125: {'lr': 0.000477533089165721, 'samples': 11328512, 'steps': 22125, 'loss/train': 1.8881285190582275} +03/04/2022 15:27:19 - INFO - codeparrot_training - Step 22126: {'lr': 0.0004775308904367519, 'samples': 11329024, 'steps': 22126, 'loss/train': 1.1306160688400269} +03/04/2022 15:27:21 - INFO - codeparrot_training - Skipping example with length 227 (seq_length=1024) +03/04/2022 15:27:24 - INFO - codeparrot_training - Step 22127: {'lr': 0.0004775286916052609, 'samples': 11329536, 'steps': 22127, 'loss/train': 1.890255331993103} +03/04/2022 15:27:27 - INFO - codeparrot_training - Step 22128: {'lr': 0.00047752649267124894, 'samples': 11330048, 'steps': 22128, 'loss/train': 0.8676541447639465} +03/04/2022 15:27:30 - INFO - codeparrot_training - Skipping example with length 898 (seq_length=1024) +03/04/2022 15:27:32 - INFO - codeparrot_training - Step 22129: {'lr': 0.0004775242936347171, 'samples': 11330560, 'steps': 22129, 'loss/train': 2.0903730392456055} +03/04/2022 15:27:36 - INFO - codeparrot_training - Step 22130: {'lr': 0.0004775220944956662, 'samples': 11331072, 'steps': 22130, 'loss/train': 1.5051261186599731} +03/04/2022 15:27:38 - INFO - codeparrot_training - Skipping example with length 583 (seq_length=1024) +03/04/2022 15:27:41 - INFO - codeparrot_training - Step 22131: {'lr': 0.00047751989525409745, 'samples': 11331584, 'steps': 22131, 'loss/train': 1.2066582441329956} +03/04/2022 15:27:44 - INFO - codeparrot_training - Step 22132: {'lr': 0.0004775176959100117, 'samples': 11332096, 'steps': 22132, 'loss/train': 1.9609605073928833} +03/04/2022 15:27:47 - INFO - codeparrot_training - Skipping example with length 440 (seq_length=1024) +03/04/2022 15:27:49 - INFO - codeparrot_training - Step 22133: {'lr': 0.00047751549646341007, 'samples': 11332608, 'steps': 22133, 'loss/train': 1.881510853767395} +03/04/2022 15:27:53 - INFO - codeparrot_training - Step 22134: {'lr': 0.0004775132969142934, 'samples': 11333120, 'steps': 22134, 'loss/train': 1.3929623365402222} +03/04/2022 15:27:55 - INFO - codeparrot_training - Skipping example with length 447 (seq_length=1024) +03/04/2022 15:27:58 - INFO - codeparrot_training - Step 22135: {'lr': 0.00047751109726266273, 'samples': 11333632, 'steps': 22135, 'loss/train': 1.5073732137680054} +03/04/2022 15:28:01 - INFO - codeparrot_training - Step 22136: {'lr': 0.00047750889750851913, 'samples': 11334144, 'steps': 22136, 'loss/train': 2.3854384422302246} +03/04/2022 15:28:04 - INFO - codeparrot_training - Skipping example with length 854 (seq_length=1024) +03/04/2022 15:28:06 - INFO - codeparrot_training - Step 22137: {'lr': 0.0004775066976518635, 'samples': 11334656, 'steps': 22137, 'loss/train': 1.7985275983810425} +03/04/2022 15:28:09 - INFO - codeparrot_training - Step 22138: {'lr': 0.00047750449769269686, 'samples': 11335168, 'steps': 22138, 'loss/train': 2.312749147415161} +03/04/2022 15:28:12 - INFO - codeparrot_training - Skipping example with length 33 (seq_length=1024) +03/04/2022 15:28:15 - INFO - codeparrot_training - Step 22139: {'lr': 0.0004775022976310203, 'samples': 11335680, 'steps': 22139, 'loss/train': 1.6366263628005981} +03/04/2022 15:28:18 - INFO - codeparrot_training - Step 22140: {'lr': 0.0004775000974668345, 'samples': 11336192, 'steps': 22140, 'loss/train': 2.004296064376831} +03/04/2022 15:28:21 - INFO - codeparrot_training - Skipping example with length 75 (seq_length=1024) +03/04/2022 15:28:23 - INFO - codeparrot_training - Step 22141: {'lr': 0.00047749789720014085, 'samples': 11336704, 'steps': 22141, 'loss/train': 1.2976937294006348} +03/04/2022 15:28:27 - INFO - codeparrot_training - Step 22142: {'lr': 0.00047749569683094015, 'samples': 11337216, 'steps': 22142, 'loss/train': 1.2908931970596313} +03/04/2022 15:28:30 - INFO - codeparrot_training - Step 22143: {'lr': 0.00047749349635923334, 'samples': 11337728, 'steps': 22143, 'loss/train': 2.150090217590332} +03/04/2022 15:28:30 - INFO - codeparrot_training - Skipping example with length 392 (seq_length=1024) +03/04/2022 15:28:35 - INFO - codeparrot_training - Step 22144: {'lr': 0.0004774912957850215, 'samples': 11338240, 'steps': 22144, 'loss/train': 2.138568878173828} +03/04/2022 15:28:38 - INFO - codeparrot_training - Skipping example with length 1015 (seq_length=1024) +03/04/2022 15:28:40 - INFO - codeparrot_training - Step 22145: {'lr': 0.0004774890951083055, 'samples': 11338752, 'steps': 22145, 'loss/train': 2.0056004524230957} +03/04/2022 15:28:43 - INFO - codeparrot_training - Step 22146: {'lr': 0.00047748689432908654, 'samples': 11339264, 'steps': 22146, 'loss/train': 2.105956792831421} +03/04/2022 15:28:46 - INFO - codeparrot_training - Skipping example with length 485 (seq_length=1024) +03/04/2022 15:28:49 - INFO - codeparrot_training - Step 22147: {'lr': 0.00047748469344736547, 'samples': 11339776, 'steps': 22147, 'loss/train': 1.8261003494262695} +03/04/2022 15:28:52 - INFO - codeparrot_training - Step 22148: {'lr': 0.00047748249246314323, 'samples': 11340288, 'steps': 22148, 'loss/train': 1.7923105955123901} +03/04/2022 15:28:55 - INFO - codeparrot_training - Skipping example with length 676 (seq_length=1024) +03/04/2022 15:28:57 - INFO - codeparrot_training - Step 22149: {'lr': 0.000477480291376421, 'samples': 11340800, 'steps': 22149, 'loss/train': 1.5919932126998901} +03/04/2022 15:29:00 - INFO - codeparrot_training - Step 22150: {'lr': 0.0004774780901871996, 'samples': 11341312, 'steps': 22150, 'loss/train': 1.6192091703414917} +03/04/2022 15:29:03 - INFO - codeparrot_training - Skipping example with length 238 (seq_length=1024) +03/04/2022 15:29:06 - INFO - codeparrot_training - Step 22151: {'lr': 0.0004774758888954801, 'samples': 11341824, 'steps': 22151, 'loss/train': 2.214567184448242} +03/04/2022 15:29:09 - INFO - codeparrot_training - Step 22152: {'lr': 0.00047747368750126345, 'samples': 11342336, 'steps': 22152, 'loss/train': 1.5895987749099731} +03/04/2022 15:29:12 - INFO - codeparrot_training - Step 22153: {'lr': 0.0004774714860045507, 'samples': 11342848, 'steps': 22153, 'loss/train': 0.42950475215911865} +03/04/2022 15:29:12 - INFO - codeparrot_training - Skipping example with length 639 (seq_length=1024) +03/04/2022 15:29:17 - INFO - codeparrot_training - Step 22154: {'lr': 0.0004774692844053428, 'samples': 11343360, 'steps': 22154, 'loss/train': 2.304457187652588} +03/04/2022 15:29:20 - INFO - codeparrot_training - Skipping example with length 730 (seq_length=1024) +03/04/2022 15:29:23 - INFO - codeparrot_training - Step 22155: {'lr': 0.00047746708270364073, 'samples': 11343872, 'steps': 22155, 'loss/train': 2.3379828929901123} +03/04/2022 15:29:26 - INFO - codeparrot_training - Step 22156: {'lr': 0.0004774648808994455, 'samples': 11344384, 'steps': 22156, 'loss/train': 0.9081267714500427} +03/04/2022 15:29:28 - INFO - codeparrot_training - Skipping example with length 642 (seq_length=1024) +03/04/2022 15:29:31 - INFO - codeparrot_training - Step 22157: {'lr': 0.0004774626789927582, 'samples': 11344896, 'steps': 22157, 'loss/train': 1.761013388633728} +03/04/2022 15:29:34 - INFO - codeparrot_training - Step 22158: {'lr': 0.0004774604769835796, 'samples': 11345408, 'steps': 22158, 'loss/train': 2.0900137424468994} +03/04/2022 15:29:37 - INFO - codeparrot_training - Skipping example with length 170 (seq_length=1024) +03/04/2022 15:29:40 - INFO - codeparrot_training - Step 22159: {'lr': 0.00047745827487191087, 'samples': 11345920, 'steps': 22159, 'loss/train': 2.0795984268188477} +03/04/2022 15:29:43 - INFO - codeparrot_training - Step 22160: {'lr': 0.00047745607265775293, 'samples': 11346432, 'steps': 22160, 'loss/train': 1.7852567434310913} +03/04/2022 15:29:45 - INFO - codeparrot_training - Skipping example with length 485 (seq_length=1024) +03/04/2022 15:29:48 - INFO - codeparrot_training - Step 22161: {'lr': 0.0004774538703411069, 'samples': 11346944, 'steps': 22161, 'loss/train': 1.7994163036346436} +03/04/2022 15:29:51 - INFO - codeparrot_training - Step 22162: {'lr': 0.00047745166792197353, 'samples': 11347456, 'steps': 22162, 'loss/train': 1.9477393627166748} +03/04/2022 15:29:54 - INFO - codeparrot_training - Skipping example with length 471 (seq_length=1024) +03/04/2022 15:29:56 - INFO - codeparrot_training - Step 22163: {'lr': 0.000477449465400354, 'samples': 11347968, 'steps': 22163, 'loss/train': 2.093964099884033} +03/04/2022 15:30:00 - INFO - codeparrot_training - Step 22164: {'lr': 0.00047744726277624926, 'samples': 11348480, 'steps': 22164, 'loss/train': 1.5399622917175293} +03/04/2022 15:30:02 - INFO - codeparrot_training - Skipping example with length 134 (seq_length=1024) +03/04/2022 15:30:05 - INFO - codeparrot_training - Step 22165: {'lr': 0.00047744506004966024, 'samples': 11348992, 'steps': 22165, 'loss/train': 1.3139619827270508} +03/04/2022 15:30:08 - INFO - codeparrot_training - Step 22166: {'lr': 0.00047744285722058804, 'samples': 11349504, 'steps': 22166, 'loss/train': 2.1864373683929443} +03/04/2022 15:30:11 - INFO - codeparrot_training - Skipping example with length 28 (seq_length=1024) +03/04/2022 15:30:13 - INFO - codeparrot_training - Step 22167: {'lr': 0.0004774406542890336, 'samples': 11350016, 'steps': 22167, 'loss/train': 1.7841740846633911} +03/04/2022 15:30:16 - INFO - codeparrot_training - Step 22168: {'lr': 0.0004774384512549979, 'samples': 11350528, 'steps': 22168, 'loss/train': 0.10313055664300919} +03/04/2022 15:30:19 - INFO - codeparrot_training - Skipping example with length 342 (seq_length=1024) +03/04/2022 15:30:22 - INFO - codeparrot_training - Step 22169: {'lr': 0.00047743624811848195, 'samples': 11351040, 'steps': 22169, 'loss/train': 2.0613551139831543} +03/04/2022 15:30:25 - INFO - codeparrot_training - Step 22170: {'lr': 0.00047743404487948673, 'samples': 11351552, 'steps': 22170, 'loss/train': 1.4357821941375732} +03/04/2022 15:30:28 - INFO - codeparrot_training - Skipping example with length 1012 (seq_length=1024) +03/04/2022 15:30:30 - INFO - codeparrot_training - Step 22171: {'lr': 0.0004774318415380132, 'samples': 11352064, 'steps': 22171, 'loss/train': 1.162430763244629} +03/04/2022 15:30:33 - INFO - codeparrot_training - Step 22172: {'lr': 0.0004774296380940625, 'samples': 11352576, 'steps': 22172, 'loss/train': 1.048547625541687} +03/04/2022 15:30:36 - INFO - codeparrot_training - Skipping example with length 110 (seq_length=1024) +03/04/2022 15:30:39 - INFO - codeparrot_training - Step 22173: {'lr': 0.0004774274345476354, 'samples': 11353088, 'steps': 22173, 'loss/train': 1.8127728700637817} +03/04/2022 15:30:42 - INFO - codeparrot_training - Step 22174: {'lr': 0.00047742523089873304, 'samples': 11353600, 'steps': 22174, 'loss/train': 1.5749802589416504} +03/04/2022 15:30:45 - INFO - codeparrot_training - Skipping example with length 481 (seq_length=1024) +03/04/2022 15:30:47 - INFO - codeparrot_training - Step 22175: {'lr': 0.0004774230271473564, 'samples': 11354112, 'steps': 22175, 'loss/train': 2.324392318725586} +03/04/2022 15:30:50 - INFO - codeparrot_training - Step 22176: {'lr': 0.00047742082329350644, 'samples': 11354624, 'steps': 22176, 'loss/train': 1.9879522323608398} +03/04/2022 15:30:53 - INFO - codeparrot_training - Skipping example with length 455 (seq_length=1024) +03/04/2022 15:30:55 - INFO - codeparrot_training - Step 22177: {'lr': 0.0004774186193371841, 'samples': 11355136, 'steps': 22177, 'loss/train': 0.48985105752944946} +03/04/2022 15:30:59 - INFO - codeparrot_training - Step 22178: {'lr': 0.00047741641527839054, 'samples': 11355648, 'steps': 22178, 'loss/train': 1.7213879823684692} +03/04/2022 15:31:01 - INFO - codeparrot_training - Skipping example with length 97 (seq_length=1024) +03/04/2022 15:31:04 - INFO - codeparrot_training - Step 22179: {'lr': 0.00047741421111712666, 'samples': 11356160, 'steps': 22179, 'loss/train': 1.4045706987380981} +03/04/2022 15:31:07 - INFO - codeparrot_training - Step 22180: {'lr': 0.00047741200685339337, 'samples': 11356672, 'steps': 22180, 'loss/train': 1.4715582132339478} +03/04/2022 15:31:10 - INFO - codeparrot_training - Skipping example with length 930 (seq_length=1024) +03/04/2022 15:31:12 - INFO - codeparrot_training - Step 22181: {'lr': 0.0004774098024871918, 'samples': 11357184, 'steps': 22181, 'loss/train': 1.4860867261886597} +03/04/2022 15:31:16 - INFO - codeparrot_training - Step 22182: {'lr': 0.00047740759801852284, 'samples': 11357696, 'steps': 22182, 'loss/train': 2.0933032035827637} +03/04/2022 15:31:19 - INFO - codeparrot_training - Step 22183: {'lr': 0.00047740539344738754, 'samples': 11358208, 'steps': 22183, 'loss/train': 2.062950849533081} +03/04/2022 15:31:19 - INFO - codeparrot_training - Skipping example with length 987 (seq_length=1024) +03/04/2022 15:31:24 - INFO - codeparrot_training - Step 22184: {'lr': 0.00047740318877378685, 'samples': 11358720, 'steps': 22184, 'loss/train': 1.3282326459884644} +03/04/2022 15:31:27 - INFO - codeparrot_training - Step 22185: {'lr': 0.00047740098399772185, 'samples': 11359232, 'steps': 22185, 'loss/train': 1.544287085533142} +03/04/2022 15:31:28 - INFO - codeparrot_training - Skipping example with length 750 (seq_length=1024) +03/04/2022 15:31:33 - INFO - codeparrot_training - Step 22186: {'lr': 0.0004773987791191935, 'samples': 11359744, 'steps': 22186, 'loss/train': 1.0829787254333496} +03/04/2022 15:31:36 - INFO - codeparrot_training - Step 22187: {'lr': 0.0004773965741382027, 'samples': 11360256, 'steps': 22187, 'loss/train': 1.729000210762024} +03/04/2022 15:31:36 - INFO - codeparrot_training - Skipping example with length 706 (seq_length=1024) +03/04/2022 15:31:41 - INFO - codeparrot_training - Step 22188: {'lr': 0.00047739436905475054, 'samples': 11360768, 'steps': 22188, 'loss/train': 2.700211763381958} +03/04/2022 15:31:44 - INFO - codeparrot_training - Step 22189: {'lr': 0.00047739216386883797, 'samples': 11361280, 'steps': 22189, 'loss/train': 1.390215277671814} +03/04/2022 15:31:44 - INFO - codeparrot_training - Skipping example with length 1017 (seq_length=1024) +03/04/2022 15:31:49 - INFO - codeparrot_training - Step 22190: {'lr': 0.000477389958580466, 'samples': 11361792, 'steps': 22190, 'loss/train': 1.6053754091262817} +03/04/2022 15:31:53 - INFO - codeparrot_training - Step 22191: {'lr': 0.0004773877531896356, 'samples': 11362304, 'steps': 22191, 'loss/train': 1.0694011449813843} +03/04/2022 15:31:53 - INFO - codeparrot_training - Skipping example with length 867 (seq_length=1024) +03/04/2022 15:31:58 - INFO - codeparrot_training - Step 22192: {'lr': 0.00047738554769634784, 'samples': 11362816, 'steps': 22192, 'loss/train': 2.3320562839508057} +03/04/2022 15:32:01 - INFO - codeparrot_training - Step 22193: {'lr': 0.00047738334210060366, 'samples': 11363328, 'steps': 22193, 'loss/train': 2.533250331878662} +03/04/2022 15:32:01 - INFO - codeparrot_training - Skipping example with length 307 (seq_length=1024) +03/04/2022 15:32:07 - INFO - codeparrot_training - Step 22194: {'lr': 0.000477381136402404, 'samples': 11363840, 'steps': 22194, 'loss/train': 1.622113585472107} +03/04/2022 15:32:10 - INFO - codeparrot_training - Step 22195: {'lr': 0.00047737893060175, 'samples': 11364352, 'steps': 22195, 'loss/train': 1.4974398612976074} +03/04/2022 15:32:10 - INFO - codeparrot_training - Skipping example with length 269 (seq_length=1024) +03/04/2022 15:32:15 - INFO - codeparrot_training - Step 22196: {'lr': 0.00047737672469864246, 'samples': 11364864, 'steps': 22196, 'loss/train': 1.3227462768554688} +03/04/2022 15:32:18 - INFO - codeparrot_training - Step 22197: {'lr': 0.0004773745186930825, 'samples': 11365376, 'steps': 22197, 'loss/train': 1.5945134162902832} +03/04/2022 15:32:19 - INFO - codeparrot_training - Skipping example with length 250 (seq_length=1024) +03/04/2022 15:32:24 - INFO - codeparrot_training - Step 22198: {'lr': 0.00047737231258507116, 'samples': 11365888, 'steps': 22198, 'loss/train': 1.3415532112121582} +03/04/2022 15:32:27 - INFO - codeparrot_training - Step 22199: {'lr': 0.00047737010637460934, 'samples': 11366400, 'steps': 22199, 'loss/train': 2.521217107772827} +03/04/2022 15:32:27 - INFO - codeparrot_training - Skipping example with length 412 (seq_length=1024) +03/04/2022 15:32:32 - INFO - codeparrot_training - Step 22200: {'lr': 0.00047736790006169794, 'samples': 11366912, 'steps': 22200, 'loss/train': 0.458636611700058} +03/04/2022 15:32:35 - INFO - codeparrot_training - Step 22201: {'lr': 0.00047736569364633817, 'samples': 11367424, 'steps': 22201, 'loss/train': 1.4262664318084717} +03/04/2022 15:32:36 - INFO - codeparrot_training - Skipping example with length 797 (seq_length=1024) +03/04/2022 15:32:41 - INFO - codeparrot_training - Step 22202: {'lr': 0.00047736348712853094, 'samples': 11367936, 'steps': 22202, 'loss/train': 2.044226884841919} +03/04/2022 15:32:44 - INFO - codeparrot_training - Step 22203: {'lr': 0.0004773612805082772, 'samples': 11368448, 'steps': 22203, 'loss/train': 0.9254205226898193} +03/04/2022 15:32:44 - INFO - codeparrot_training - Skipping example with length 805 (seq_length=1024) +03/04/2022 15:32:49 - INFO - codeparrot_training - Step 22204: {'lr': 0.000477359073785578, 'samples': 11368960, 'steps': 22204, 'loss/train': 2.8265929222106934} +03/04/2022 15:32:52 - INFO - codeparrot_training - Step 22205: {'lr': 0.00047735686696043434, 'samples': 11369472, 'steps': 22205, 'loss/train': 1.9036263227462769} +03/04/2022 15:32:52 - INFO - codeparrot_training - Skipping example with length 582 (seq_length=1024) +03/04/2022 15:32:57 - INFO - codeparrot_training - Step 22206: {'lr': 0.0004773546600328471, 'samples': 11369984, 'steps': 22206, 'loss/train': 2.488102436065674} +03/04/2022 15:33:01 - INFO - codeparrot_training - Step 22207: {'lr': 0.00047735245300281745, 'samples': 11370496, 'steps': 22207, 'loss/train': 6.530094623565674} +03/04/2022 15:33:01 - INFO - codeparrot_training - Skipping example with length 710 (seq_length=1024) +03/04/2022 15:33:06 - INFO - codeparrot_training - Step 22208: {'lr': 0.00047735024587034625, 'samples': 11371008, 'steps': 22208, 'loss/train': 1.543666958808899} +03/04/2022 15:33:09 - INFO - codeparrot_training - Step 22209: {'lr': 0.00047734803863543453, 'samples': 11371520, 'steps': 22209, 'loss/train': 1.9861810207366943} +03/04/2022 15:33:10 - INFO - codeparrot_training - Skipping example with length 62 (seq_length=1024) +03/04/2022 15:33:15 - INFO - codeparrot_training - Step 22210: {'lr': 0.00047734583129808327, 'samples': 11372032, 'steps': 22210, 'loss/train': 2.106640338897705} +03/04/2022 15:33:18 - INFO - codeparrot_training - Step 22211: {'lr': 0.00047734362385829356, 'samples': 11372544, 'steps': 22211, 'loss/train': 1.6649212837219238} +03/04/2022 15:33:18 - INFO - codeparrot_training - Skipping example with length 117 (seq_length=1024) +03/04/2022 15:33:23 - INFO - codeparrot_training - Step 22212: {'lr': 0.0004773414163160662, 'samples': 11373056, 'steps': 22212, 'loss/train': 1.8969519138336182} +03/04/2022 15:33:26 - INFO - codeparrot_training - Step 22213: {'lr': 0.00047733920867140244, 'samples': 11373568, 'steps': 22213, 'loss/train': 2.3982975482940674} +03/04/2022 15:33:28 - INFO - codeparrot_training - Skipping example with length 941 (seq_length=1024) +03/04/2022 15:33:32 - INFO - codeparrot_training - Step 22214: {'lr': 0.00047733700092430305, 'samples': 11374080, 'steps': 22214, 'loss/train': 1.5661745071411133} +03/04/2022 15:33:35 - INFO - codeparrot_training - Step 22215: {'lr': 0.0004773347930747691, 'samples': 11374592, 'steps': 22215, 'loss/train': 2.312277317047119} +03/04/2022 15:33:36 - INFO - codeparrot_training - Skipping example with length 159 (seq_length=1024) +03/04/2022 15:33:40 - INFO - codeparrot_training - Step 22216: {'lr': 0.0004773325851228017, 'samples': 11375104, 'steps': 22216, 'loss/train': 1.1213816404342651} +03/04/2022 15:33:43 - INFO - codeparrot_training - Step 22217: {'lr': 0.00047733037706840166, 'samples': 11375616, 'steps': 22217, 'loss/train': 1.9684717655181885} +03/04/2022 15:33:45 - INFO - codeparrot_training - Skipping example with length 755 (seq_length=1024) +03/04/2022 15:33:49 - INFO - codeparrot_training - Step 22218: {'lr': 0.0004773281689115701, 'samples': 11376128, 'steps': 22218, 'loss/train': 1.942457914352417} +03/04/2022 15:33:52 - INFO - codeparrot_training - Step 22219: {'lr': 0.000477325960652308, 'samples': 11376640, 'steps': 22219, 'loss/train': 2.034529447555542} +03/04/2022 15:33:53 - INFO - codeparrot_training - Skipping example with length 399 (seq_length=1024) +03/04/2022 15:33:57 - INFO - codeparrot_training - Step 22220: {'lr': 0.0004773237522906163, 'samples': 11377152, 'steps': 22220, 'loss/train': 2.3409042358398438} +03/04/2022 15:34:00 - INFO - codeparrot_training - Step 22221: {'lr': 0.000477321543826496, 'samples': 11377664, 'steps': 22221, 'loss/train': 2.5458621978759766} +03/04/2022 15:34:02 - INFO - codeparrot_training - Skipping example with length 904 (seq_length=1024) +03/04/2022 15:34:06 - INFO - codeparrot_training - Step 22222: {'lr': 0.00047731933525994814, 'samples': 11378176, 'steps': 22222, 'loss/train': 1.4944298267364502} +03/04/2022 15:34:09 - INFO - codeparrot_training - Step 22223: {'lr': 0.0004773171265909737, 'samples': 11378688, 'steps': 22223, 'loss/train': 2.341916799545288} +03/04/2022 15:34:10 - INFO - codeparrot_training - Skipping example with length 788 (seq_length=1024) +03/04/2022 15:34:14 - INFO - codeparrot_training - Step 22224: {'lr': 0.00047731491781957366, 'samples': 11379200, 'steps': 22224, 'loss/train': 1.1500470638275146} +03/04/2022 15:34:17 - INFO - codeparrot_training - Step 22225: {'lr': 0.0004773127089457491, 'samples': 11379712, 'steps': 22225, 'loss/train': 2.523146152496338} +03/04/2022 15:34:19 - INFO - codeparrot_training - Skipping example with length 1007 (seq_length=1024) +03/04/2022 15:34:22 - INFO - codeparrot_training - Step 22226: {'lr': 0.0004773104999695008, 'samples': 11380224, 'steps': 22226, 'loss/train': 1.7590314149856567} +03/04/2022 15:34:26 - INFO - codeparrot_training - Step 22227: {'lr': 0.00047730829089082994, 'samples': 11380736, 'steps': 22227, 'loss/train': 2.0174756050109863} +03/04/2022 15:34:27 - INFO - codeparrot_training - Skipping example with length 256 (seq_length=1024) +03/04/2022 15:34:31 - INFO - codeparrot_training - Step 22228: {'lr': 0.00047730608170973754, 'samples': 11381248, 'steps': 22228, 'loss/train': 1.6932587623596191} +03/04/2022 15:34:34 - INFO - codeparrot_training - Step 22229: {'lr': 0.00047730387242622446, 'samples': 11381760, 'steps': 22229, 'loss/train': 0.9918410778045654} +03/04/2022 15:34:37 - INFO - codeparrot_training - Step 22230: {'lr': 0.00047730166304029185, 'samples': 11382272, 'steps': 22230, 'loss/train': 2.60172700881958} +03/04/2022 15:34:37 - INFO - codeparrot_training - Skipping example with length 842 (seq_length=1024) +03/04/2022 15:34:43 - INFO - codeparrot_training - Step 22231: {'lr': 0.0004772994535519405, 'samples': 11382784, 'steps': 22231, 'loss/train': 1.4549503326416016} +03/04/2022 15:34:46 - INFO - codeparrot_training - Step 22232: {'lr': 0.0004772972439611716, 'samples': 11383296, 'steps': 22232, 'loss/train': 1.8812918663024902} +03/04/2022 15:34:46 - INFO - codeparrot_training - Skipping example with length 263 (seq_length=1024) +03/04/2022 15:34:52 - INFO - codeparrot_training - Step 22233: {'lr': 0.00047729503426798605, 'samples': 11383808, 'steps': 22233, 'loss/train': 2.0896060466766357} +03/04/2022 15:34:55 - INFO - codeparrot_training - Step 22234: {'lr': 0.0004772928244723849, 'samples': 11384320, 'steps': 22234, 'loss/train': 6.667899131774902} +03/04/2022 15:34:56 - INFO - codeparrot_training - Skipping example with length 736 (seq_length=1024) +03/04/2022 15:35:00 - INFO - codeparrot_training - Step 22235: {'lr': 0.00047729061457436905, 'samples': 11384832, 'steps': 22235, 'loss/train': 1.5816584825515747} +03/04/2022 15:35:03 - INFO - codeparrot_training - Step 22236: {'lr': 0.0004772884045739396, 'samples': 11385344, 'steps': 22236, 'loss/train': 2.3815054893493652} +03/04/2022 15:35:04 - INFO - codeparrot_training - Skipping example with length 995 (seq_length=1024) +03/04/2022 15:35:09 - INFO - codeparrot_training - Step 22237: {'lr': 0.0004772861944710974, 'samples': 11385856, 'steps': 22237, 'loss/train': 2.0315628051757812} +03/04/2022 15:35:12 - INFO - codeparrot_training - Step 22238: {'lr': 0.00047728398426584375, 'samples': 11386368, 'steps': 22238, 'loss/train': 0.9229505658149719} +03/04/2022 15:35:13 - INFO - codeparrot_training - Skipping example with length 70 (seq_length=1024) +03/04/2022 15:35:17 - INFO - codeparrot_training - Step 22239: {'lr': 0.0004772817739581793, 'samples': 11386880, 'steps': 22239, 'loss/train': 2.113133430480957} +03/04/2022 15:35:20 - INFO - codeparrot_training - Step 22240: {'lr': 0.0004772795635481052, 'samples': 11387392, 'steps': 22240, 'loss/train': 2.7265355587005615} +03/04/2022 15:35:21 - INFO - codeparrot_training - Skipping example with length 14 (seq_length=1024) +03/04/2022 15:35:25 - INFO - codeparrot_training - Step 22241: {'lr': 0.00047727735303562246, 'samples': 11387904, 'steps': 22241, 'loss/train': 0.9764037728309631} +03/04/2022 15:35:28 - INFO - codeparrot_training - Step 22242: {'lr': 0.000477275142420732, 'samples': 11388416, 'steps': 22242, 'loss/train': 1.3164842128753662} +03/04/2022 15:35:30 - INFO - codeparrot_training - Skipping example with length 386 (seq_length=1024) +03/04/2022 15:35:34 - INFO - codeparrot_training - Step 22243: {'lr': 0.000477272931703435, 'samples': 11388928, 'steps': 22243, 'loss/train': 1.9129084348678589} +03/04/2022 15:35:37 - INFO - codeparrot_training - Step 22244: {'lr': 0.0004772707208837322, 'samples': 11389440, 'steps': 22244, 'loss/train': 1.6567832231521606} +03/04/2022 15:35:39 - INFO - codeparrot_training - Skipping example with length 478 (seq_length=1024) +03/04/2022 15:35:42 - INFO - codeparrot_training - Step 22245: {'lr': 0.0004772685099616247, 'samples': 11389952, 'steps': 22245, 'loss/train': 1.7662593126296997} +03/04/2022 15:35:45 - INFO - codeparrot_training - Step 22246: {'lr': 0.0004772662989371136, 'samples': 11390464, 'steps': 22246, 'loss/train': 1.3090407848358154} +03/04/2022 15:35:47 - INFO - codeparrot_training - Skipping example with length 582 (seq_length=1024) +03/04/2022 15:35:51 - INFO - codeparrot_training - Step 22247: {'lr': 0.0004772640878101998, 'samples': 11390976, 'steps': 22247, 'loss/train': 1.368567705154419} +03/04/2022 15:35:54 - INFO - codeparrot_training - Step 22248: {'lr': 0.00047726187658088425, 'samples': 11391488, 'steps': 22248, 'loss/train': 1.6496580839157104} +03/04/2022 15:35:56 - INFO - codeparrot_training - Skipping example with length 632 (seq_length=1024) +03/04/2022 15:35:59 - INFO - codeparrot_training - Step 22249: {'lr': 0.0004772596652491681, 'samples': 11392000, 'steps': 22249, 'loss/train': 1.45890212059021} +03/04/2022 15:36:02 - INFO - codeparrot_training - Step 22250: {'lr': 0.0004772574538150522, 'samples': 11392512, 'steps': 22250, 'loss/train': 1.843206763267517} +03/04/2022 15:36:05 - INFO - codeparrot_training - Skipping example with length 726 (seq_length=1024) +03/04/2022 15:36:08 - INFO - codeparrot_training - Step 22251: {'lr': 0.0004772552422785376, 'samples': 11393024, 'steps': 22251, 'loss/train': 1.441948652267456} +03/04/2022 15:36:11 - INFO - codeparrot_training - Step 22252: {'lr': 0.00047725303063962535, 'samples': 11393536, 'steps': 22252, 'loss/train': 2.45949649810791} +03/04/2022 15:36:13 - INFO - codeparrot_training - Skipping example with length 422 (seq_length=1024) +03/04/2022 15:36:16 - INFO - codeparrot_training - Step 22253: {'lr': 0.00047725081889831626, 'samples': 11394048, 'steps': 22253, 'loss/train': 1.977330207824707} +03/04/2022 15:36:19 - INFO - codeparrot_training - Step 22254: {'lr': 0.0004772486070546116, 'samples': 11394560, 'steps': 22254, 'loss/train': 1.642802357673645} +03/04/2022 15:36:22 - INFO - codeparrot_training - Skipping example with length 356 (seq_length=1024) +03/04/2022 15:36:25 - INFO - codeparrot_training - Step 22255: {'lr': 0.0004772463951085121, 'samples': 11395072, 'steps': 22255, 'loss/train': 1.6976144313812256} +03/04/2022 15:36:28 - INFO - codeparrot_training - Step 22256: {'lr': 0.00047724418306001895, 'samples': 11395584, 'steps': 22256, 'loss/train': 2.4152402877807617} +03/04/2022 15:36:30 - INFO - codeparrot_training - Skipping example with length 958 (seq_length=1024) +03/04/2022 15:36:33 - INFO - codeparrot_training - Step 22257: {'lr': 0.0004772419709091331, 'samples': 11396096, 'steps': 22257, 'loss/train': 1.7744184732437134} +03/04/2022 15:36:36 - INFO - codeparrot_training - Step 22258: {'lr': 0.00047723975865585544, 'samples': 11396608, 'steps': 22258, 'loss/train': 1.1372560262680054} +03/04/2022 15:36:38 - INFO - codeparrot_training - Skipping example with length 298 (seq_length=1024) +03/04/2022 15:36:41 - INFO - codeparrot_training - Step 22259: {'lr': 0.00047723754630018715, 'samples': 11397120, 'steps': 22259, 'loss/train': 1.1425553560256958} +03/04/2022 15:36:44 - INFO - codeparrot_training - Step 22260: {'lr': 0.000477235333842129, 'samples': 11397632, 'steps': 22260, 'loss/train': 1.6570017337799072} +03/04/2022 15:36:47 - INFO - codeparrot_training - Skipping example with length 477 (seq_length=1024) +03/04/2022 15:36:50 - INFO - codeparrot_training - Step 22261: {'lr': 0.00047723312128168226, 'samples': 11398144, 'steps': 22261, 'loss/train': 1.2306241989135742} +03/04/2022 15:36:53 - INFO - codeparrot_training - Step 22262: {'lr': 0.00047723090861884773, 'samples': 11398656, 'steps': 22262, 'loss/train': 2.264705181121826} +03/04/2022 15:36:55 - INFO - codeparrot_training - Skipping example with length 243 (seq_length=1024) +03/04/2022 15:36:58 - INFO - codeparrot_training - Step 22263: {'lr': 0.00047722869585362646, 'samples': 11399168, 'steps': 22263, 'loss/train': 1.8475779294967651} +03/04/2022 15:37:01 - INFO - codeparrot_training - Step 22264: {'lr': 0.0004772264829860194, 'samples': 11399680, 'steps': 22264, 'loss/train': 1.7155795097351074} +03/04/2022 15:37:07 - INFO - codeparrot_training - Step 22265: {'lr': 0.00047722427001602765, 'samples': 11400192, 'steps': 22265, 'loss/train': 2.118985891342163} +03/04/2022 15:37:10 - INFO - codeparrot_training - Step 22266: {'lr': 0.0004772220569436521, 'samples': 11400704, 'steps': 22266, 'loss/train': 1.783207654953003} +03/04/2022 15:37:12 - INFO - codeparrot_training - Skipping example with length 560 (seq_length=1024) +03/04/2022 15:37:15 - INFO - codeparrot_training - Step 22267: {'lr': 0.0004772198437688938, 'samples': 11401216, 'steps': 22267, 'loss/train': 1.7310057878494263} +03/04/2022 15:37:18 - INFO - codeparrot_training - Step 22268: {'lr': 0.0004772176304917538, 'samples': 11401728, 'steps': 22268, 'loss/train': 1.3361843824386597} +03/04/2022 15:37:20 - INFO - codeparrot_training - Skipping example with length 941 (seq_length=1024) +03/04/2022 15:37:24 - INFO - codeparrot_training - Step 22269: {'lr': 0.00047721541711223306, 'samples': 11402240, 'steps': 22269, 'loss/train': 1.8218023777008057} +03/04/2022 15:37:27 - INFO - codeparrot_training - Step 22270: {'lr': 0.00047721320363033247, 'samples': 11402752, 'steps': 22270, 'loss/train': 2.75347900390625} +03/04/2022 15:37:30 - INFO - codeparrot_training - Skipping example with length 337 (seq_length=1024) +03/04/2022 15:37:32 - INFO - codeparrot_training - Step 22271: {'lr': 0.00047721099004605316, 'samples': 11403264, 'steps': 22271, 'loss/train': 2.8340041637420654} +03/04/2022 15:37:36 - INFO - codeparrot_training - Step 22272: {'lr': 0.00047720877635939606, 'samples': 11403776, 'steps': 22272, 'loss/train': 0.6918489933013916} +03/04/2022 15:37:38 - INFO - codeparrot_training - Skipping example with length 753 (seq_length=1024) +03/04/2022 15:37:41 - INFO - codeparrot_training - Step 22273: {'lr': 0.0004772065625703622, 'samples': 11404288, 'steps': 22273, 'loss/train': 0.9932120442390442} +03/04/2022 15:37:44 - INFO - codeparrot_training - Step 22274: {'lr': 0.0004772043486789526, 'samples': 11404800, 'steps': 22274, 'loss/train': 2.4619579315185547} +03/04/2022 15:37:46 - INFO - codeparrot_training - Skipping example with length 998 (seq_length=1024) +03/04/2022 15:37:49 - INFO - codeparrot_training - Step 22275: {'lr': 0.0004772021346851682, 'samples': 11405312, 'steps': 22275, 'loss/train': 2.0812244415283203} +03/04/2022 15:37:52 - INFO - codeparrot_training - Step 22276: {'lr': 0.00047719992058901006, 'samples': 11405824, 'steps': 22276, 'loss/train': 1.539668321609497} +03/04/2022 15:37:55 - INFO - codeparrot_training - Skipping example with length 211 (seq_length=1024) +03/04/2022 15:37:58 - INFO - codeparrot_training - Step 22277: {'lr': 0.0004771977063904791, 'samples': 11406336, 'steps': 22277, 'loss/train': 2.096402406692505} +03/04/2022 15:38:01 - INFO - codeparrot_training - Step 22278: {'lr': 0.00047719549208957636, 'samples': 11406848, 'steps': 22278, 'loss/train': 2.2655439376831055} +03/04/2022 15:38:04 - INFO - codeparrot_training - Skipping example with length 430 (seq_length=1024) +03/04/2022 15:38:06 - INFO - codeparrot_training - Step 22279: {'lr': 0.0004771932776863028, 'samples': 11407360, 'steps': 22279, 'loss/train': 2.152956247329712} +03/04/2022 15:38:09 - INFO - codeparrot_training - Step 22280: {'lr': 0.0004771910631806595, 'samples': 11407872, 'steps': 22280, 'loss/train': 2.6602895259857178} +03/04/2022 15:38:13 - INFO - codeparrot_training - Step 22281: {'lr': 0.00047718884857264745, 'samples': 11408384, 'steps': 22281, 'loss/train': 1.6638280153274536} +03/04/2022 15:38:13 - INFO - codeparrot_training - Skipping example with length 956 (seq_length=1024) +03/04/2022 15:38:18 - INFO - codeparrot_training - Step 22282: {'lr': 0.0004771866338622676, 'samples': 11408896, 'steps': 22282, 'loss/train': 3.609278440475464} +03/04/2022 15:38:21 - INFO - codeparrot_training - Step 22283: {'lr': 0.0004771844190495209, 'samples': 11409408, 'steps': 22283, 'loss/train': 2.2452914714813232} +03/04/2022 15:38:22 - INFO - codeparrot_training - Skipping example with length 160 (seq_length=1024) +03/04/2022 15:38:27 - INFO - codeparrot_training - Step 22284: {'lr': 0.0004771822041344085, 'samples': 11409920, 'steps': 22284, 'loss/train': 1.7666600942611694} +03/04/2022 15:38:30 - INFO - codeparrot_training - Step 22285: {'lr': 0.0004771799891169312, 'samples': 11410432, 'steps': 22285, 'loss/train': 2.1622772216796875} +03/04/2022 15:38:30 - INFO - codeparrot_training - Skipping example with length 523 (seq_length=1024) +03/04/2022 15:38:35 - INFO - codeparrot_training - Step 22286: {'lr': 0.0004771777739970902, 'samples': 11410944, 'steps': 22286, 'loss/train': 1.982214093208313} +03/04/2022 15:38:38 - INFO - codeparrot_training - Step 22287: {'lr': 0.0004771755587748863, 'samples': 11411456, 'steps': 22287, 'loss/train': 1.998295545578003} +03/04/2022 15:38:39 - INFO - codeparrot_training - Skipping example with length 112 (seq_length=1024) +03/04/2022 15:38:44 - INFO - codeparrot_training - Step 22288: {'lr': 0.00047717334345032065, 'samples': 11411968, 'steps': 22288, 'loss/train': 1.9068212509155273} +03/04/2022 15:38:47 - INFO - codeparrot_training - Step 22289: {'lr': 0.0004771711280233942, 'samples': 11412480, 'steps': 22289, 'loss/train': 1.9397213459014893} +03/04/2022 15:38:48 - INFO - codeparrot_training - Skipping example with length 198 (seq_length=1024) +03/04/2022 15:38:52 - INFO - codeparrot_training - Step 22290: {'lr': 0.000477168912494108, 'samples': 11412992, 'steps': 22290, 'loss/train': 2.200573682785034} +03/04/2022 15:38:55 - INFO - codeparrot_training - Step 22291: {'lr': 0.00047716669686246287, 'samples': 11413504, 'steps': 22291, 'loss/train': 0.6923785209655762} +03/04/2022 15:38:56 - INFO - codeparrot_training - Skipping example with length 159 (seq_length=1024) +03/04/2022 15:39:00 - INFO - codeparrot_training - Step 22292: {'lr': 0.00047716448112846, 'samples': 11414016, 'steps': 22292, 'loss/train': 1.8292288780212402} +03/04/2022 15:39:04 - INFO - codeparrot_training - Step 22293: {'lr': 0.00047716226529210035, 'samples': 11414528, 'steps': 22293, 'loss/train': 1.7958790063858032} +03/04/2022 15:39:05 - INFO - codeparrot_training - Skipping example with length 345 (seq_length=1024) +03/04/2022 15:39:09 - INFO - codeparrot_training - Step 22294: {'lr': 0.00047716004935338484, 'samples': 11415040, 'steps': 22294, 'loss/train': 1.7815150022506714} +03/04/2022 15:39:12 - INFO - codeparrot_training - Step 22295: {'lr': 0.0004771578333123145, 'samples': 11415552, 'steps': 22295, 'loss/train': 1.5924257040023804} +03/04/2022 15:39:13 - INFO - codeparrot_training - Skipping example with length 572 (seq_length=1024) +03/04/2022 15:39:17 - INFO - codeparrot_training - Step 22296: {'lr': 0.00047715561716889037, 'samples': 11416064, 'steps': 22296, 'loss/train': 0.35261061787605286} +03/04/2022 15:39:20 - INFO - codeparrot_training - Step 22297: {'lr': 0.0004771534009231134, 'samples': 11416576, 'steps': 22297, 'loss/train': 1.3618861436843872} +03/04/2022 15:39:22 - INFO - codeparrot_training - Skipping example with length 157 (seq_length=1024) +03/04/2022 15:39:26 - INFO - codeparrot_training - Step 22298: {'lr': 0.00047715118457498473, 'samples': 11417088, 'steps': 22298, 'loss/train': 1.5753991603851318} +03/04/2022 15:39:29 - INFO - codeparrot_training - Step 22299: {'lr': 0.00047714896812450514, 'samples': 11417600, 'steps': 22299, 'loss/train': 2.196711540222168} +03/04/2022 15:39:31 - INFO - codeparrot_training - Skipping example with length 744 (seq_length=1024) +03/04/2022 15:39:35 - INFO - codeparrot_training - Step 22300: {'lr': 0.00047714675157167573, 'samples': 11418112, 'steps': 22300, 'loss/train': 2.676680564880371} +03/04/2022 15:39:38 - INFO - codeparrot_training - Step 22301: {'lr': 0.00047714453491649753, 'samples': 11418624, 'steps': 22301, 'loss/train': 1.1340454816818237} +03/04/2022 15:39:39 - INFO - codeparrot_training - Skipping example with length 977 (seq_length=1024) +03/04/2022 15:39:43 - INFO - codeparrot_training - Step 22302: {'lr': 0.00047714231815897145, 'samples': 11419136, 'steps': 22302, 'loss/train': 1.1262229681015015} +03/04/2022 15:39:46 - INFO - codeparrot_training - Step 22303: {'lr': 0.0004771401012990986, 'samples': 11419648, 'steps': 22303, 'loss/train': 1.9904206991195679} +03/04/2022 15:39:48 - INFO - codeparrot_training - Skipping example with length 963 (seq_length=1024) +03/04/2022 15:39:52 - INFO - codeparrot_training - Step 22304: {'lr': 0.0004771378843368799, 'samples': 11420160, 'steps': 22304, 'loss/train': 2.1478192806243896} +03/04/2022 15:39:55 - INFO - codeparrot_training - Step 22305: {'lr': 0.0004771356672723164, 'samples': 11420672, 'steps': 22305, 'loss/train': 1.4392684698104858} +03/04/2022 15:39:58 - INFO - codeparrot_training - Step 22306: {'lr': 0.0004771334501054091, 'samples': 11421184, 'steps': 22306, 'loss/train': 2.2167985439300537} +03/04/2022 15:39:59 - INFO - codeparrot_training - Skipping example with length 795 (seq_length=1024) +03/04/2022 15:40:04 - INFO - codeparrot_training - Step 22307: {'lr': 0.0004771312328361589, 'samples': 11421696, 'steps': 22307, 'loss/train': 1.5921905040740967} +03/04/2022 15:40:07 - INFO - codeparrot_training - Step 22308: {'lr': 0.0004771290154645669, 'samples': 11422208, 'steps': 22308, 'loss/train': 1.3252410888671875} +03/04/2022 15:40:08 - INFO - codeparrot_training - Skipping example with length 572 (seq_length=1024) +03/04/2022 15:40:12 - INFO - codeparrot_training - Step 22309: {'lr': 0.0004771267979906341, 'samples': 11422720, 'steps': 22309, 'loss/train': 1.9257309436798096} +03/04/2022 15:40:15 - INFO - codeparrot_training - Step 22310: {'lr': 0.0004771245804143615, 'samples': 11423232, 'steps': 22310, 'loss/train': 2.038508892059326} +03/04/2022 15:40:17 - INFO - codeparrot_training - Skipping example with length 355 (seq_length=1024) +03/04/2022 15:40:21 - INFO - codeparrot_training - Step 22311: {'lr': 0.00047712236273574993, 'samples': 11423744, 'steps': 22311, 'loss/train': 1.9827864170074463} +03/04/2022 15:40:24 - INFO - codeparrot_training - Step 22312: {'lr': 0.0004771201449548006, 'samples': 11424256, 'steps': 22312, 'loss/train': 2.0457041263580322} +03/04/2022 15:40:25 - INFO - codeparrot_training - Skipping example with length 895 (seq_length=1024) +03/04/2022 15:40:29 - INFO - codeparrot_training - Step 22313: {'lr': 0.0004771179270715145, 'samples': 11424768, 'steps': 22313, 'loss/train': 1.5745621919631958} +03/04/2022 15:40:32 - INFO - codeparrot_training - Step 22314: {'lr': 0.0004771157090858925, 'samples': 11425280, 'steps': 22314, 'loss/train': 2.0680339336395264} +03/04/2022 15:40:34 - INFO - codeparrot_training - Skipping example with length 998 (seq_length=1024) +03/04/2022 15:40:38 - INFO - codeparrot_training - Step 22315: {'lr': 0.00047711349099793565, 'samples': 11425792, 'steps': 22315, 'loss/train': 1.7564970254898071} +03/04/2022 15:40:41 - INFO - codeparrot_training - Step 22316: {'lr': 0.00047711127280764497, 'samples': 11426304, 'steps': 22316, 'loss/train': 2.1914710998535156} +03/04/2022 15:40:43 - INFO - codeparrot_training - Skipping example with length 315 (seq_length=1024) +03/04/2022 15:40:46 - INFO - codeparrot_training - Step 22317: {'lr': 0.0004771090545150215, 'samples': 11426816, 'steps': 22317, 'loss/train': 1.8899551630020142} +03/04/2022 15:40:49 - INFO - codeparrot_training - Step 22318: {'lr': 0.00047710683612006623, 'samples': 11427328, 'steps': 22318, 'loss/train': 1.6784968376159668} +03/04/2022 15:40:52 - INFO - codeparrot_training - Skipping example with length 92 (seq_length=1024) +03/04/2022 15:40:55 - INFO - codeparrot_training - Step 22319: {'lr': 0.00047710461762278, 'samples': 11427840, 'steps': 22319, 'loss/train': 1.9902827739715576} +03/04/2022 15:40:58 - INFO - codeparrot_training - Step 22320: {'lr': 0.00047710239902316404, 'samples': 11428352, 'steps': 22320, 'loss/train': 0.675197422504425} +03/04/2022 15:41:01 - INFO - codeparrot_training - Skipping example with length 814 (seq_length=1024) +03/04/2022 15:41:03 - INFO - codeparrot_training - Step 22321: {'lr': 0.0004771001803212192, 'samples': 11428864, 'steps': 22321, 'loss/train': 1.7486975193023682} +03/04/2022 15:41:06 - INFO - codeparrot_training - Step 22322: {'lr': 0.0004770979615169466, 'samples': 11429376, 'steps': 22322, 'loss/train': 1.6954245567321777} +03/04/2022 15:41:09 - INFO - codeparrot_training - Skipping example with length 910 (seq_length=1024) +03/04/2022 15:41:11 - INFO - codeparrot_training - Step 22323: {'lr': 0.00047709574261034705, 'samples': 11429888, 'steps': 22323, 'loss/train': 1.8200124502182007} +03/04/2022 15:41:15 - INFO - codeparrot_training - Step 22324: {'lr': 0.0004770935236014217, 'samples': 11430400, 'steps': 22324, 'loss/train': 2.0899829864501953} +03/04/2022 15:41:17 - INFO - codeparrot_training - Skipping example with length 297 (seq_length=1024) +03/04/2022 15:41:20 - INFO - codeparrot_training - Step 22325: {'lr': 0.00047709130449017154, 'samples': 11430912, 'steps': 22325, 'loss/train': 1.521140456199646} +03/04/2022 15:41:23 - INFO - codeparrot_training - Step 22326: {'lr': 0.0004770890852765975, 'samples': 11431424, 'steps': 22326, 'loss/train': 1.193521499633789} +03/04/2022 15:41:26 - INFO - codeparrot_training - Skipping example with length 35 (seq_length=1024) +03/04/2022 15:41:29 - INFO - codeparrot_training - Step 22327: {'lr': 0.00047708686596070065, 'samples': 11431936, 'steps': 22327, 'loss/train': 1.7612429857254028} +03/04/2022 15:41:32 - INFO - codeparrot_training - Step 22328: {'lr': 0.00047708464654248195, 'samples': 11432448, 'steps': 22328, 'loss/train': 2.2048349380493164} +03/04/2022 15:41:35 - INFO - codeparrot_training - Step 22329: {'lr': 0.0004770824270219424, 'samples': 11432960, 'steps': 22329, 'loss/train': 2.541520595550537} +03/04/2022 15:41:36 - INFO - codeparrot_training - Skipping example with length 59 (seq_length=1024) +03/04/2022 15:41:41 - INFO - codeparrot_training - Step 22330: {'lr': 0.0004770802073990831, 'samples': 11433472, 'steps': 22330, 'loss/train': 1.1763559579849243} +03/04/2022 15:41:44 - INFO - codeparrot_training - Step 22331: {'lr': 0.00047707798767390486, 'samples': 11433984, 'steps': 22331, 'loss/train': 1.6634845733642578} +03/04/2022 15:41:47 - INFO - codeparrot_training - Step 22332: {'lr': 0.00047707576784640883, 'samples': 11434496, 'steps': 22332, 'loss/train': 1.0085744857788086} +03/04/2022 15:41:49 - INFO - codeparrot_training - Skipping example with length 573 (seq_length=1024) +03/04/2022 15:41:53 - INFO - codeparrot_training - Step 22333: {'lr': 0.00047707354791659594, 'samples': 11435008, 'steps': 22333, 'loss/train': 1.9908726215362549} +03/04/2022 15:41:56 - INFO - codeparrot_training - Step 22334: {'lr': 0.0004770713278844672, 'samples': 11435520, 'steps': 22334, 'loss/train': 2.233520984649658} +03/04/2022 15:41:58 - INFO - codeparrot_training - Skipping example with length 309 (seq_length=1024) +03/04/2022 15:42:01 - INFO - codeparrot_training - Step 22335: {'lr': 0.00047706910775002363, 'samples': 11436032, 'steps': 22335, 'loss/train': 2.00911808013916} +03/04/2022 15:42:04 - INFO - codeparrot_training - Step 22336: {'lr': 0.0004770668875132663, 'samples': 11436544, 'steps': 22336, 'loss/train': 1.6298381090164185} +03/04/2022 15:42:07 - INFO - codeparrot_training - Skipping example with length 979 (seq_length=1024) +03/04/2022 15:42:10 - INFO - codeparrot_training - Step 22337: {'lr': 0.00047706466717419607, 'samples': 11437056, 'steps': 22337, 'loss/train': 1.8705356121063232} +03/04/2022 15:42:13 - INFO - codeparrot_training - Step 22338: {'lr': 0.000477062446732814, 'samples': 11437568, 'steps': 22338, 'loss/train': 0.38395625352859497} +03/04/2022 15:42:15 - INFO - codeparrot_training - Skipping example with length 762 (seq_length=1024) +03/04/2022 15:42:18 - INFO - codeparrot_training - Step 22339: {'lr': 0.0004770602261891211, 'samples': 11438080, 'steps': 22339, 'loss/train': 2.2905497550964355} +03/04/2022 15:42:21 - INFO - codeparrot_training - Step 22340: {'lr': 0.00047705800554311836, 'samples': 11438592, 'steps': 22340, 'loss/train': 1.857138991355896} +03/04/2022 15:42:25 - INFO - codeparrot_training - Step 22341: {'lr': 0.0004770557847948068, 'samples': 11439104, 'steps': 22341, 'loss/train': 1.9479671716690063} +03/04/2022 15:42:25 - INFO - codeparrot_training - Skipping example with length 216 (seq_length=1024) +03/04/2022 15:42:30 - INFO - codeparrot_training - Step 22342: {'lr': 0.0004770535639441874, 'samples': 11439616, 'steps': 22342, 'loss/train': 2.412079095840454} +03/04/2022 15:42:33 - INFO - codeparrot_training - Step 22343: {'lr': 0.0004770513429912612, 'samples': 11440128, 'steps': 22343, 'loss/train': 1.9400339126586914} +03/04/2022 15:42:33 - INFO - codeparrot_training - Skipping example with length 971 (seq_length=1024) +03/04/2022 15:42:38 - INFO - codeparrot_training - Step 22344: {'lr': 0.0004770491219360291, 'samples': 11440640, 'steps': 22344, 'loss/train': 1.485713005065918} +03/04/2022 15:42:42 - INFO - codeparrot_training - Step 22345: {'lr': 0.00047704690077849223, 'samples': 11441152, 'steps': 22345, 'loss/train': 0.8323150277137756} +03/04/2022 15:42:42 - INFO - codeparrot_training - Skipping example with length 517 (seq_length=1024) +03/04/2022 15:42:47 - INFO - codeparrot_training - Step 22346: {'lr': 0.0004770446795186515, 'samples': 11441664, 'steps': 22346, 'loss/train': 0.8790633082389832} +03/04/2022 15:42:50 - INFO - codeparrot_training - Step 22347: {'lr': 0.0004770424581565079, 'samples': 11442176, 'steps': 22347, 'loss/train': 2.1988582611083984} +03/04/2022 15:42:50 - INFO - codeparrot_training - Skipping example with length 626 (seq_length=1024) +03/04/2022 15:42:55 - INFO - codeparrot_training - Step 22348: {'lr': 0.0004770402366920625, 'samples': 11442688, 'steps': 22348, 'loss/train': 1.530765175819397} +03/04/2022 15:42:58 - INFO - codeparrot_training - Skipping example with length 740 (seq_length=1024) +03/04/2022 15:43:00 - INFO - codeparrot_training - Step 22349: {'lr': 0.00047703801512531636, 'samples': 11443200, 'steps': 22349, 'loss/train': 2.006002187728882} +03/04/2022 15:43:04 - INFO - codeparrot_training - Step 22350: {'lr': 0.00047703579345627036, 'samples': 11443712, 'steps': 22350, 'loss/train': 2.183684825897217} +03/04/2022 15:43:06 - INFO - codeparrot_training - Skipping example with length 142 (seq_length=1024) +03/04/2022 15:43:09 - INFO - codeparrot_training - Step 22351: {'lr': 0.00047703357168492544, 'samples': 11444224, 'steps': 22351, 'loss/train': 2.0730032920837402} +03/04/2022 15:43:12 - INFO - codeparrot_training - Step 22352: {'lr': 0.0004770313498112828, 'samples': 11444736, 'steps': 22352, 'loss/train': 2.0986359119415283} +03/04/2022 15:43:15 - INFO - codeparrot_training - Skipping example with length 69 (seq_length=1024) +03/04/2022 15:43:17 - INFO - codeparrot_training - Step 22353: {'lr': 0.0004770291278353433, 'samples': 11445248, 'steps': 22353, 'loss/train': 3.2045609951019287} +03/04/2022 15:43:20 - INFO - codeparrot_training - Step 22354: {'lr': 0.00047702690575710796, 'samples': 11445760, 'steps': 22354, 'loss/train': 2.330094814300537} +03/04/2022 15:43:23 - INFO - codeparrot_training - Skipping example with length 963 (seq_length=1024) +03/04/2022 15:43:26 - INFO - codeparrot_training - Step 22355: {'lr': 0.0004770246835765778, 'samples': 11446272, 'steps': 22355, 'loss/train': 2.08205509185791} +03/04/2022 15:43:29 - INFO - codeparrot_training - Step 22356: {'lr': 0.0004770224612937538, 'samples': 11446784, 'steps': 22356, 'loss/train': 2.1141655445098877} +03/04/2022 15:43:32 - INFO - codeparrot_training - Skipping example with length 649 (seq_length=1024) +03/04/2022 15:43:34 - INFO - codeparrot_training - Step 22357: {'lr': 0.0004770202389086371, 'samples': 11447296, 'steps': 22357, 'loss/train': 1.9168789386749268} +03/04/2022 15:43:37 - INFO - codeparrot_training - Step 22358: {'lr': 0.0004770180164212284, 'samples': 11447808, 'steps': 22358, 'loss/train': 2.5445199012756348} +03/04/2022 15:43:40 - INFO - codeparrot_training - Skipping example with length 931 (seq_length=1024) +03/04/2022 15:43:43 - INFO - codeparrot_training - Step 22359: {'lr': 0.00047701579383152906, 'samples': 11448320, 'steps': 22359, 'loss/train': 1.7233607769012451} +03/04/2022 15:43:46 - INFO - codeparrot_training - Step 22360: {'lr': 0.0004770135711395398, 'samples': 11448832, 'steps': 22360, 'loss/train': 1.9921740293502808} +03/04/2022 15:43:48 - INFO - codeparrot_training - Skipping example with length 712 (seq_length=1024) +03/04/2022 15:43:51 - INFO - codeparrot_training - Step 22361: {'lr': 0.0004770113483452618, 'samples': 11449344, 'steps': 22361, 'loss/train': 2.339416742324829} +03/04/2022 15:43:54 - INFO - codeparrot_training - Step 22362: {'lr': 0.00047700912544869595, 'samples': 11449856, 'steps': 22362, 'loss/train': 1.277938961982727} +03/04/2022 15:43:57 - INFO - codeparrot_training - Step 22363: {'lr': 0.0004770069024498433, 'samples': 11450368, 'steps': 22363, 'loss/train': 1.9749418497085571} +03/04/2022 15:43:58 - INFO - codeparrot_training - Skipping example with length 932 (seq_length=1024) +03/04/2022 15:44:03 - INFO - codeparrot_training - Step 22364: {'lr': 0.00047700467934870484, 'samples': 11450880, 'steps': 22364, 'loss/train': 2.426556348800659} +03/04/2022 15:44:06 - INFO - codeparrot_training - Step 22365: {'lr': 0.0004770024561452816, 'samples': 11451392, 'steps': 22365, 'loss/train': 2.0053937435150146} +03/04/2022 15:44:07 - INFO - codeparrot_training - Skipping example with length 16 (seq_length=1024) +03/04/2022 15:44:11 - INFO - codeparrot_training - Step 22366: {'lr': 0.0004770002328395745, 'samples': 11451904, 'steps': 22366, 'loss/train': 2.1311402320861816} +03/04/2022 15:44:14 - INFO - codeparrot_training - Step 22367: {'lr': 0.00047699800943158454, 'samples': 11452416, 'steps': 22367, 'loss/train': 1.9790606498718262} +03/04/2022 15:44:15 - INFO - codeparrot_training - Skipping example with length 483 (seq_length=1024) +03/04/2022 15:44:20 - INFO - codeparrot_training - Step 22368: {'lr': 0.0004769957859213129, 'samples': 11452928, 'steps': 22368, 'loss/train': 2.0900065898895264} +03/04/2022 15:44:23 - INFO - codeparrot_training - Step 22369: {'lr': 0.00047699356230876047, 'samples': 11453440, 'steps': 22369, 'loss/train': 1.4526458978652954} +03/04/2022 15:44:24 - INFO - codeparrot_training - Skipping example with length 366 (seq_length=1024) +03/04/2022 15:44:28 - INFO - codeparrot_training - Step 22370: {'lr': 0.0004769913385939282, 'samples': 11453952, 'steps': 22370, 'loss/train': 2.0202369689941406} +03/04/2022 15:44:31 - INFO - codeparrot_training - Step 22371: {'lr': 0.0004769891147768171, 'samples': 11454464, 'steps': 22371, 'loss/train': 2.375291347503662} +03/04/2022 15:44:32 - INFO - codeparrot_training - Skipping example with length 824 (seq_length=1024) +03/04/2022 15:44:37 - INFO - codeparrot_training - Step 22372: {'lr': 0.00047698689085742823, 'samples': 11454976, 'steps': 22372, 'loss/train': 1.9892005920410156} +03/04/2022 15:44:40 - INFO - codeparrot_training - Step 22373: {'lr': 0.00047698466683576256, 'samples': 11455488, 'steps': 22373, 'loss/train': 1.0614806413650513} +03/04/2022 15:44:41 - INFO - codeparrot_training - Skipping example with length 616 (seq_length=1024) +03/04/2022 15:44:45 - INFO - codeparrot_training - Step 22374: {'lr': 0.0004769824427118211, 'samples': 11456000, 'steps': 22374, 'loss/train': 1.7671689987182617} +03/04/2022 15:44:48 - INFO - codeparrot_training - Step 22375: {'lr': 0.00047698021848560494, 'samples': 11456512, 'steps': 22375, 'loss/train': 1.735493779182434} +03/04/2022 15:44:50 - INFO - codeparrot_training - Skipping example with length 375 (seq_length=1024) +03/04/2022 15:44:54 - INFO - codeparrot_training - Step 22376: {'lr': 0.0004769779941571149, 'samples': 11457024, 'steps': 22376, 'loss/train': 2.0558762550354004} +03/04/2022 15:44:57 - INFO - codeparrot_training - Step 22377: {'lr': 0.00047697576972635213, 'samples': 11457536, 'steps': 22377, 'loss/train': 1.4781200885772705} +03/04/2022 15:44:59 - INFO - codeparrot_training - Skipping example with length 704 (seq_length=1024) +03/04/2022 15:45:02 - INFO - codeparrot_training - Step 22378: {'lr': 0.0004769735451933176, 'samples': 11458048, 'steps': 22378, 'loss/train': 2.319829225540161} +03/04/2022 15:45:05 - INFO - codeparrot_training - Step 22379: {'lr': 0.0004769713205580122, 'samples': 11458560, 'steps': 22379, 'loss/train': 2.630206346511841} +03/04/2022 15:45:07 - INFO - codeparrot_training - Skipping example with length 856 (seq_length=1024) +03/04/2022 15:45:11 - INFO - codeparrot_training - Step 22380: {'lr': 0.0004769690958204371, 'samples': 11459072, 'steps': 22380, 'loss/train': 1.7876747846603394} +03/04/2022 15:45:14 - INFO - codeparrot_training - Step 22381: {'lr': 0.0004769668709805932, 'samples': 11459584, 'steps': 22381, 'loss/train': 1.9131865501403809} +03/04/2022 15:45:16 - INFO - codeparrot_training - Skipping example with length 532 (seq_length=1024) +03/04/2022 15:45:19 - INFO - codeparrot_training - Step 22382: {'lr': 0.0004769646460384816, 'samples': 11460096, 'steps': 22382, 'loss/train': 1.5190072059631348} +03/04/2022 15:45:22 - INFO - codeparrot_training - Step 22383: {'lr': 0.00047696242099410307, 'samples': 11460608, 'steps': 22383, 'loss/train': 1.8266668319702148} +03/04/2022 15:45:25 - INFO - codeparrot_training - Skipping example with length 447 (seq_length=1024) +03/04/2022 15:45:28 - INFO - codeparrot_training - Step 22384: {'lr': 0.00047696019584745887, 'samples': 11461120, 'steps': 22384, 'loss/train': 1.5854839086532593} +03/04/2022 15:45:31 - INFO - codeparrot_training - Step 22385: {'lr': 0.00047695797059854996, 'samples': 11461632, 'steps': 22385, 'loss/train': 1.596534013748169} +03/04/2022 15:45:33 - INFO - codeparrot_training - Skipping example with length 115 (seq_length=1024) +03/04/2022 15:45:36 - INFO - codeparrot_training - Step 22386: {'lr': 0.0004769557452473772, 'samples': 11462144, 'steps': 22386, 'loss/train': 1.2219512462615967} +03/04/2022 15:45:39 - INFO - codeparrot_training - Step 22387: {'lr': 0.00047695351979394173, 'samples': 11462656, 'steps': 22387, 'loss/train': 2.179931879043579} +03/04/2022 15:45:42 - INFO - codeparrot_training - Skipping example with length 509 (seq_length=1024) +03/04/2022 15:45:44 - INFO - codeparrot_training - Step 22388: {'lr': 0.00047695129423824454, 'samples': 11463168, 'steps': 22388, 'loss/train': 1.9552170038223267} +03/04/2022 15:45:48 - INFO - codeparrot_training - Step 22389: {'lr': 0.0004769490685802865, 'samples': 11463680, 'steps': 22389, 'loss/train': 0.9540486335754395} +03/04/2022 15:45:50 - INFO - codeparrot_training - Skipping example with length 433 (seq_length=1024) +03/04/2022 15:45:53 - INFO - codeparrot_training - Step 22390: {'lr': 0.00047694684282006885, 'samples': 11464192, 'steps': 22390, 'loss/train': 2.1730754375457764} +03/04/2022 15:45:56 - INFO - codeparrot_training - Step 22391: {'lr': 0.00047694461695759236, 'samples': 11464704, 'steps': 22391, 'loss/train': 1.9229793548583984} +03/04/2022 15:45:58 - INFO - codeparrot_training - Skipping example with length 516 (seq_length=1024) +03/04/2022 15:46:01 - INFO - codeparrot_training - Step 22392: {'lr': 0.00047694239099285815, 'samples': 11465216, 'steps': 22392, 'loss/train': 1.3976644277572632} +03/04/2022 15:46:05 - INFO - codeparrot_training - Step 22393: {'lr': 0.00047694016492586715, 'samples': 11465728, 'steps': 22393, 'loss/train': 2.3815677165985107} +03/04/2022 15:46:07 - INFO - codeparrot_training - Skipping example with length 334 (seq_length=1024) +03/04/2022 15:46:10 - INFO - codeparrot_training - Step 22394: {'lr': 0.0004769379387566205, 'samples': 11466240, 'steps': 22394, 'loss/train': 2.52591872215271} +03/04/2022 15:46:13 - INFO - codeparrot_training - Step 22395: {'lr': 0.000476935712485119, 'samples': 11466752, 'steps': 22395, 'loss/train': 1.505041241645813} +03/04/2022 15:46:15 - INFO - codeparrot_training - Skipping example with length 396 (seq_length=1024) +03/04/2022 15:46:18 - INFO - codeparrot_training - Step 22396: {'lr': 0.0004769334861113639, 'samples': 11467264, 'steps': 22396, 'loss/train': 0.9479007124900818} +03/04/2022 15:46:21 - INFO - codeparrot_training - Step 22397: {'lr': 0.000476931259635356, 'samples': 11467776, 'steps': 22397, 'loss/train': 1.8733607530593872} +03/04/2022 15:46:24 - INFO - codeparrot_training - Skipping example with length 983 (seq_length=1024) +03/04/2022 15:46:27 - INFO - codeparrot_training - Step 22398: {'lr': 0.00047692903305709646, 'samples': 11468288, 'steps': 22398, 'loss/train': 1.9552898406982422} +03/04/2022 15:46:30 - INFO - codeparrot_training - Step 22399: {'lr': 0.0004769268063765861, 'samples': 11468800, 'steps': 22399, 'loss/train': 2.0450286865234375} +03/04/2022 15:46:32 - INFO - codeparrot_training - Skipping example with length 69 (seq_length=1024) +03/04/2022 15:46:35 - INFO - codeparrot_training - Step 22400: {'lr': 0.00047692457959382605, 'samples': 11469312, 'steps': 22400, 'loss/train': 3.4000706672668457} +03/04/2022 15:46:38 - INFO - codeparrot_training - Step 22401: {'lr': 0.0004769223527088173, 'samples': 11469824, 'steps': 22401, 'loss/train': 1.5526716709136963} +03/04/2022 15:46:41 - INFO - codeparrot_training - Skipping example with length 944 (seq_length=1024) +03/04/2022 15:46:44 - INFO - codeparrot_training - Step 22402: {'lr': 0.00047692012572156086, 'samples': 11470336, 'steps': 22402, 'loss/train': 2.104238986968994} +03/04/2022 15:46:47 - INFO - codeparrot_training - Step 22403: {'lr': 0.00047691789863205764, 'samples': 11470848, 'steps': 22403, 'loss/train': 1.1048665046691895} +03/04/2022 15:46:49 - INFO - codeparrot_training - Skipping example with length 355 (seq_length=1024) +03/04/2022 15:46:52 - INFO - codeparrot_training - Step 22404: {'lr': 0.0004769156714403088, 'samples': 11471360, 'steps': 22404, 'loss/train': 1.678782343864441} +03/04/2022 15:46:55 - INFO - codeparrot_training - Step 22405: {'lr': 0.0004769134441463152, 'samples': 11471872, 'steps': 22405, 'loss/train': 2.4589037895202637} +03/04/2022 15:46:57 - INFO - codeparrot_training - Skipping example with length 401 (seq_length=1024) +03/04/2022 15:47:01 - INFO - codeparrot_training - Step 22406: {'lr': 0.0004769112167500779, 'samples': 11472384, 'steps': 22406, 'loss/train': 2.0313570499420166} +03/04/2022 15:47:04 - INFO - codeparrot_training - Step 22407: {'lr': 0.00047690898925159796, 'samples': 11472896, 'steps': 22407, 'loss/train': 1.5635850429534912} +03/04/2022 15:47:07 - INFO - codeparrot_training - Step 22408: {'lr': 0.0004769067616508763, 'samples': 11473408, 'steps': 22408, 'loss/train': 2.1659297943115234} +03/04/2022 15:47:07 - INFO - codeparrot_training - Skipping example with length 683 (seq_length=1024) +03/04/2022 15:47:12 - INFO - codeparrot_training - Step 22409: {'lr': 0.00047690453394791393, 'samples': 11473920, 'steps': 22409, 'loss/train': 2.2377867698669434} +03/04/2022 15:47:15 - INFO - codeparrot_training - Step 22410: {'lr': 0.0004769023061427119, 'samples': 11474432, 'steps': 22410, 'loss/train': 2.085585594177246} +03/04/2022 15:47:15 - INFO - codeparrot_training - Skipping example with length 262 (seq_length=1024) +03/04/2022 15:47:21 - INFO - codeparrot_training - Step 22411: {'lr': 0.0004769000782352713, 'samples': 11474944, 'steps': 22411, 'loss/train': 1.5014328956604004} +03/04/2022 15:47:24 - INFO - codeparrot_training - Step 22412: {'lr': 0.00047689785022559284, 'samples': 11475456, 'steps': 22412, 'loss/train': 1.8411418199539185} +03/04/2022 15:47:24 - INFO - codeparrot_training - Skipping example with length 737 (seq_length=1024) +03/04/2022 15:47:29 - INFO - codeparrot_training - Step 22413: {'lr': 0.0004768956221136778, 'samples': 11475968, 'steps': 22413, 'loss/train': 1.9553931951522827} +03/04/2022 15:47:32 - INFO - codeparrot_training - Step 22414: {'lr': 0.00047689339389952713, 'samples': 11476480, 'steps': 22414, 'loss/train': 1.9445511102676392} +03/04/2022 15:47:32 - INFO - codeparrot_training - Skipping example with length 643 (seq_length=1024) +03/04/2022 15:47:38 - INFO - codeparrot_training - Step 22415: {'lr': 0.0004768911655831417, 'samples': 11476992, 'steps': 22415, 'loss/train': 2.0988261699676514} +03/04/2022 15:47:41 - INFO - codeparrot_training - Step 22416: {'lr': 0.0004768889371645227, 'samples': 11477504, 'steps': 22416, 'loss/train': 1.9009219408035278} +03/04/2022 15:47:42 - INFO - codeparrot_training - Skipping example with length 200 (seq_length=1024) +03/04/2022 15:47:46 - INFO - codeparrot_training - Step 22417: {'lr': 0.000476886708643671, 'samples': 11478016, 'steps': 22417, 'loss/train': 2.716937303543091} +03/04/2022 15:47:49 - INFO - codeparrot_training - Step 22418: {'lr': 0.0004768844800205877, 'samples': 11478528, 'steps': 22418, 'loss/train': 1.7089838981628418} +03/04/2022 15:47:51 - INFO - codeparrot_training - Skipping example with length 859 (seq_length=1024) +03/04/2022 15:47:55 - INFO - codeparrot_training - Step 22419: {'lr': 0.0004768822512952737, 'samples': 11479040, 'steps': 22419, 'loss/train': 1.8988728523254395} +03/04/2022 15:47:58 - INFO - codeparrot_training - Step 22420: {'lr': 0.0004768800224677301, 'samples': 11479552, 'steps': 22420, 'loss/train': 2.19171404838562} +03/04/2022 15:47:59 - INFO - codeparrot_training - Skipping example with length 786 (seq_length=1024) +03/04/2022 15:48:03 - INFO - codeparrot_training - Step 22421: {'lr': 0.0004768777935379578, 'samples': 11480064, 'steps': 22421, 'loss/train': 2.3457744121551514} +03/04/2022 15:48:07 - INFO - codeparrot_training - Step 22422: {'lr': 0.0004768755645059579, 'samples': 11480576, 'steps': 22422, 'loss/train': 1.5806716680526733} +03/04/2022 15:48:08 - INFO - codeparrot_training - Skipping example with length 79 (seq_length=1024) +03/04/2022 15:48:12 - INFO - codeparrot_training - Step 22423: {'lr': 0.00047687333537173136, 'samples': 11481088, 'steps': 22423, 'loss/train': 2.1720352172851562} +03/04/2022 15:48:15 - INFO - codeparrot_training - Step 22424: {'lr': 0.00047687110613527924, 'samples': 11481600, 'steps': 22424, 'loss/train': 2.2620861530303955} +03/04/2022 15:48:16 - INFO - codeparrot_training - Skipping example with length 510 (seq_length=1024) +03/04/2022 15:48:20 - INFO - codeparrot_training - Step 22425: {'lr': 0.00047686887679660253, 'samples': 11482112, 'steps': 22425, 'loss/train': 3.5006890296936035} +03/04/2022 15:48:23 - INFO - codeparrot_training - Step 22426: {'lr': 0.0004768666473557021, 'samples': 11482624, 'steps': 22426, 'loss/train': 2.3809986114501953} +03/04/2022 15:48:25 - INFO - codeparrot_training - Skipping example with length 549 (seq_length=1024) +03/04/2022 15:48:29 - INFO - codeparrot_training - Step 22427: {'lr': 0.0004768644178125791, 'samples': 11483136, 'steps': 22427, 'loss/train': 2.2899534702301025} +03/04/2022 15:48:32 - INFO - codeparrot_training - Step 22428: {'lr': 0.0004768621881672345, 'samples': 11483648, 'steps': 22428, 'loss/train': 1.4964479207992554} +03/04/2022 15:48:33 - INFO - codeparrot_training - Skipping example with length 758 (seq_length=1024) +03/04/2022 15:48:37 - INFO - codeparrot_training - Step 22429: {'lr': 0.00047685995841966936, 'samples': 11484160, 'steps': 22429, 'loss/train': 1.91129732131958} +03/04/2022 15:48:40 - INFO - codeparrot_training - Step 22430: {'lr': 0.0004768577285698845, 'samples': 11484672, 'steps': 22430, 'loss/train': 4.377689361572266} +03/04/2022 15:48:42 - INFO - codeparrot_training - Skipping example with length 883 (seq_length=1024) +03/04/2022 15:48:45 - INFO - codeparrot_training - Step 22431: {'lr': 0.00047685549861788113, 'samples': 11485184, 'steps': 22431, 'loss/train': 2.3613810539245605} +03/04/2022 15:48:49 - INFO - codeparrot_training - Step 22432: {'lr': 0.0004768532685636602, 'samples': 11485696, 'steps': 22432, 'loss/train': 1.6671661138534546} +03/04/2022 15:48:50 - INFO - codeparrot_training - Skipping example with length 365 (seq_length=1024) +03/04/2022 15:48:54 - INFO - codeparrot_training - Step 22433: {'lr': 0.0004768510384072226, 'samples': 11486208, 'steps': 22433, 'loss/train': 1.9080430269241333} +03/04/2022 15:48:57 - INFO - codeparrot_training - Step 22434: {'lr': 0.0004768488081485695, 'samples': 11486720, 'steps': 22434, 'loss/train': 0.7797421813011169} +03/04/2022 15:48:58 - INFO - codeparrot_training - Skipping example with length 602 (seq_length=1024) +03/04/2022 15:49:02 - INFO - codeparrot_training - Step 22435: {'lr': 0.0004768465777877018, 'samples': 11487232, 'steps': 22435, 'loss/train': 1.5904635190963745} +03/04/2022 15:49:05 - INFO - codeparrot_training - Step 22436: {'lr': 0.0004768443473246205, 'samples': 11487744, 'steps': 22436, 'loss/train': 1.3765811920166016} +03/04/2022 15:49:07 - INFO - codeparrot_training - Skipping example with length 128 (seq_length=1024) +03/04/2022 15:49:11 - INFO - codeparrot_training - Step 22437: {'lr': 0.00047684211675932665, 'samples': 11488256, 'steps': 22437, 'loss/train': 2.326796770095825} +03/04/2022 15:49:14 - INFO - codeparrot_training - Step 22438: {'lr': 0.0004768398860918213, 'samples': 11488768, 'steps': 22438, 'loss/train': 1.441772222518921} +03/04/2022 15:49:15 - INFO - codeparrot_training - Skipping example with length 871 (seq_length=1024) +03/04/2022 15:49:19 - INFO - codeparrot_training - Step 22439: {'lr': 0.0004768376553221053, 'samples': 11489280, 'steps': 22439, 'loss/train': 2.22330379486084} +03/04/2022 15:49:22 - INFO - codeparrot_training - Step 22440: {'lr': 0.0004768354244501798, 'samples': 11489792, 'steps': 22440, 'loss/train': 1.7069092988967896} +03/04/2022 15:49:24 - INFO - codeparrot_training - Skipping example with length 264 (seq_length=1024) +03/04/2022 15:49:28 - INFO - codeparrot_training - Step 22441: {'lr': 0.0004768331934760458, 'samples': 11490304, 'steps': 22441, 'loss/train': 2.24930477142334} +03/04/2022 15:49:31 - INFO - codeparrot_training - Step 22442: {'lr': 0.00047683096239970423, 'samples': 11490816, 'steps': 22442, 'loss/train': 2.0926597118377686} +03/04/2022 15:49:32 - INFO - codeparrot_training - Skipping example with length 684 (seq_length=1024) +03/04/2022 15:49:36 - INFO - codeparrot_training - Step 22443: {'lr': 0.0004768287312211561, 'samples': 11491328, 'steps': 22443, 'loss/train': 1.8300825357437134} +03/04/2022 15:49:39 - INFO - codeparrot_training - Step 22444: {'lr': 0.0004768264999404025, 'samples': 11491840, 'steps': 22444, 'loss/train': 1.6856225728988647} +03/04/2022 15:49:40 - INFO - codeparrot_training - Skipping example with length 52 (seq_length=1024) +03/04/2022 15:49:44 - INFO - codeparrot_training - Step 22445: {'lr': 0.00047682426855744434, 'samples': 11492352, 'steps': 22445, 'loss/train': 2.133347272872925} +03/04/2022 15:49:48 - INFO - codeparrot_training - Step 22446: {'lr': 0.00047682203707228264, 'samples': 11492864, 'steps': 22446, 'loss/train': 1.9194990396499634} +03/04/2022 15:49:48 - INFO - codeparrot_training - Skipping example with length 778 (seq_length=1024) +03/04/2022 15:49:53 - INFO - codeparrot_training - Step 22447: {'lr': 0.00047681980548491853, 'samples': 11493376, 'steps': 22447, 'loss/train': 1.4561387300491333} +03/04/2022 15:49:56 - INFO - codeparrot_training - Step 22448: {'lr': 0.00047681757379535285, 'samples': 11493888, 'steps': 22448, 'loss/train': 1.2268905639648438} +03/04/2022 15:49:57 - INFO - codeparrot_training - Skipping example with length 552 (seq_length=1024) +03/04/2022 15:50:01 - INFO - codeparrot_training - Step 22449: {'lr': 0.00047681534200358665, 'samples': 11494400, 'steps': 22449, 'loss/train': 1.752184510231018} +03/04/2022 15:50:04 - INFO - codeparrot_training - Step 22450: {'lr': 0.000476813110109621, 'samples': 11494912, 'steps': 22450, 'loss/train': 0.9723352789878845} +03/04/2022 15:50:06 - INFO - codeparrot_training - Skipping example with length 833 (seq_length=1024) +03/04/2022 15:50:10 - INFO - codeparrot_training - Step 22451: {'lr': 0.0004768108781134568, 'samples': 11495424, 'steps': 22451, 'loss/train': 1.312381625175476} +03/04/2022 15:50:13 - INFO - codeparrot_training - Step 22452: {'lr': 0.0004768086460150952, 'samples': 11495936, 'steps': 22452, 'loss/train': 1.2121964693069458} +03/04/2022 15:50:14 - INFO - codeparrot_training - Skipping example with length 998 (seq_length=1024) +03/04/2022 15:50:18 - INFO - codeparrot_training - Step 22453: {'lr': 0.00047680641381453703, 'samples': 11496448, 'steps': 22453, 'loss/train': 2.477135181427002} +03/04/2022 15:50:21 - INFO - codeparrot_training - Step 22454: {'lr': 0.0004768041815117835, 'samples': 11496960, 'steps': 22454, 'loss/train': 2.1526434421539307} +03/04/2022 15:50:22 - INFO - codeparrot_training - Skipping example with length 838 (seq_length=1024) +03/04/2022 15:50:27 - INFO - codeparrot_training - Step 22455: {'lr': 0.00047680194910683545, 'samples': 11497472, 'steps': 22455, 'loss/train': 1.3500245809555054} +03/04/2022 15:50:30 - INFO - codeparrot_training - Step 22456: {'lr': 0.0004767997165996939, 'samples': 11497984, 'steps': 22456, 'loss/train': 1.5976893901824951} +03/04/2022 15:50:31 - INFO - codeparrot_training - Skipping example with length 384 (seq_length=1024) +03/04/2022 15:50:35 - INFO - codeparrot_training - Step 22457: {'lr': 0.00047679748399035994, 'samples': 11498496, 'steps': 22457, 'loss/train': 2.302093982696533} +03/04/2022 15:50:38 - INFO - codeparrot_training - Step 22458: {'lr': 0.00047679525127883456, 'samples': 11499008, 'steps': 22458, 'loss/train': 1.4814116954803467} +03/04/2022 15:50:39 - INFO - codeparrot_training - Skipping example with length 77 (seq_length=1024) +03/04/2022 15:50:43 - INFO - codeparrot_training - Step 22459: {'lr': 0.0004767930184651187, 'samples': 11499520, 'steps': 22459, 'loss/train': 1.1676796674728394} +03/04/2022 15:50:47 - INFO - codeparrot_training - Step 22460: {'lr': 0.0004767907855492134, 'samples': 11500032, 'steps': 22460, 'loss/train': 1.2024859189987183} +03/04/2022 15:50:47 - INFO - codeparrot_training - Skipping example with length 550 (seq_length=1024) +03/04/2022 15:50:52 - INFO - codeparrot_training - Step 22461: {'lr': 0.0004767885525311197, 'samples': 11500544, 'steps': 22461, 'loss/train': 1.7455363273620605} +03/04/2022 15:50:55 - INFO - codeparrot_training - Step 22462: {'lr': 0.0004767863194108386, 'samples': 11501056, 'steps': 22462, 'loss/train': 1.544686198234558} +03/04/2022 15:50:56 - INFO - codeparrot_training - Skipping example with length 191 (seq_length=1024) +03/04/2022 15:51:01 - INFO - codeparrot_training - Step 22463: {'lr': 0.000476784086188371, 'samples': 11501568, 'steps': 22463, 'loss/train': 3.3219072818756104} +03/04/2022 15:51:04 - INFO - codeparrot_training - Step 22464: {'lr': 0.00047678185286371803, 'samples': 11502080, 'steps': 22464, 'loss/train': 1.7140549421310425} +03/04/2022 15:51:06 - INFO - codeparrot_training - Skipping example with length 354 (seq_length=1024) +03/04/2022 15:51:09 - INFO - codeparrot_training - Step 22465: {'lr': 0.0004767796194368807, 'samples': 11502592, 'steps': 22465, 'loss/train': 1.0810924768447876} +03/04/2022 15:51:12 - INFO - codeparrot_training - Step 22466: {'lr': 0.00047677738590786, 'samples': 11503104, 'steps': 22466, 'loss/train': 1.381034255027771} +03/04/2022 15:51:15 - INFO - codeparrot_training - Skipping example with length 592 (seq_length=1024) +03/04/2022 15:51:18 - INFO - codeparrot_training - Step 22467: {'lr': 0.0004767751522766568, 'samples': 11503616, 'steps': 22467, 'loss/train': 2.795308828353882} +03/04/2022 15:51:21 - INFO - codeparrot_training - Step 22468: {'lr': 0.00047677291854327224, 'samples': 11504128, 'steps': 22468, 'loss/train': 2.104806423187256} +03/04/2022 15:51:23 - INFO - codeparrot_training - Skipping example with length 666 (seq_length=1024) +03/04/2022 15:51:26 - INFO - codeparrot_training - Step 22469: {'lr': 0.00047677068470770737, 'samples': 11504640, 'steps': 22469, 'loss/train': 1.9409903287887573} +03/04/2022 15:51:29 - INFO - codeparrot_training - Step 22470: {'lr': 0.00047676845076996305, 'samples': 11505152, 'steps': 22470, 'loss/train': 1.545997142791748} +03/04/2022 15:51:32 - INFO - codeparrot_training - Skipping example with length 827 (seq_length=1024) +03/04/2022 15:51:34 - INFO - codeparrot_training - Step 22471: {'lr': 0.0004767662167300404, 'samples': 11505664, 'steps': 22471, 'loss/train': 2.5562846660614014} +03/04/2022 15:51:38 - INFO - codeparrot_training - Step 22472: {'lr': 0.0004767639825879404, 'samples': 11506176, 'steps': 22472, 'loss/train': 3.3168678283691406} +03/04/2022 15:51:40 - INFO - codeparrot_training - Skipping example with length 649 (seq_length=1024) +03/04/2022 15:51:43 - INFO - codeparrot_training - Step 22473: {'lr': 0.000476761748343664, 'samples': 11506688, 'steps': 22473, 'loss/train': 4.830148696899414} +03/04/2022 15:51:46 - INFO - codeparrot_training - Step 22474: {'lr': 0.00047675951399721235, 'samples': 11507200, 'steps': 22474, 'loss/train': 1.583141803741455} +03/04/2022 15:51:49 - INFO - codeparrot_training - Skipping example with length 857 (seq_length=1024) +03/04/2022 15:51:51 - INFO - codeparrot_training - Step 22475: {'lr': 0.0004767572795485863, 'samples': 11507712, 'steps': 22475, 'loss/train': 1.974922776222229} +03/04/2022 15:51:55 - INFO - codeparrot_training - Step 22476: {'lr': 0.00047675504499778695, 'samples': 11508224, 'steps': 22476, 'loss/train': 2.2848598957061768} +03/04/2022 15:51:57 - INFO - codeparrot_training - Skipping example with length 977 (seq_length=1024) +03/04/2022 15:52:00 - INFO - codeparrot_training - Step 22477: {'lr': 0.0004767528103448152, 'samples': 11508736, 'steps': 22477, 'loss/train': 2.3041112422943115} +03/04/2022 15:52:03 - INFO - codeparrot_training - Step 22478: {'lr': 0.00047675057558967224, 'samples': 11509248, 'steps': 22478, 'loss/train': 1.721275806427002} +03/04/2022 15:52:06 - INFO - codeparrot_training - Skipping example with length 411 (seq_length=1024) +03/04/2022 15:52:08 - INFO - codeparrot_training - Step 22479: {'lr': 0.0004767483407323589, 'samples': 11509760, 'steps': 22479, 'loss/train': 0.6822611093521118} +03/04/2022 15:52:12 - INFO - codeparrot_training - Step 22480: {'lr': 0.00047674610577287625, 'samples': 11510272, 'steps': 22480, 'loss/train': 1.825452446937561} +03/04/2022 15:52:14 - INFO - codeparrot_training - Skipping example with length 862 (seq_length=1024) +03/04/2022 15:52:17 - INFO - codeparrot_training - Step 22481: {'lr': 0.00047674387071122536, 'samples': 11510784, 'steps': 22481, 'loss/train': 1.6028469800949097} +03/04/2022 15:52:20 - INFO - codeparrot_training - Step 22482: {'lr': 0.0004767416355474071, 'samples': 11511296, 'steps': 22482, 'loss/train': 2.698817253112793} +03/04/2022 15:52:23 - INFO - codeparrot_training - Skipping example with length 566 (seq_length=1024) +03/04/2022 15:52:25 - INFO - codeparrot_training - Step 22483: {'lr': 0.00047673940028142265, 'samples': 11511808, 'steps': 22483, 'loss/train': 1.3137794733047485} +03/04/2022 15:52:28 - INFO - codeparrot_training - Step 22484: {'lr': 0.0004767371649132729, 'samples': 11512320, 'steps': 22484, 'loss/train': 2.040895700454712} +03/04/2022 15:52:32 - INFO - codeparrot_training - Step 22485: {'lr': 0.00047673492944295883, 'samples': 11512832, 'steps': 22485, 'loss/train': 6.617819309234619} +03/04/2022 15:52:32 - INFO - codeparrot_training - Skipping example with length 978 (seq_length=1024) +03/04/2022 15:52:37 - INFO - codeparrot_training - Step 22486: {'lr': 0.0004767326938704816, 'samples': 11513344, 'steps': 22486, 'loss/train': 1.1397531032562256} +03/04/2022 15:52:40 - INFO - codeparrot_training - Step 22487: {'lr': 0.00047673045819584197, 'samples': 11513856, 'steps': 22487, 'loss/train': 2.2056562900543213} +03/04/2022 15:52:40 - INFO - codeparrot_training - Skipping example with length 818 (seq_length=1024) +03/04/2022 15:52:45 - INFO - codeparrot_training - Step 22488: {'lr': 0.0004767282224190412, 'samples': 11514368, 'steps': 22488, 'loss/train': 2.1432173252105713} +03/04/2022 15:52:48 - INFO - codeparrot_training - Step 22489: {'lr': 0.00047672598654008015, 'samples': 11514880, 'steps': 22489, 'loss/train': 2.137909412384033} +03/04/2022 15:52:49 - INFO - codeparrot_training - Skipping example with length 997 (seq_length=1024) +03/04/2022 15:52:54 - INFO - codeparrot_training - Step 22490: {'lr': 0.0004767237505589599, 'samples': 11515392, 'steps': 22490, 'loss/train': 1.5522429943084717} +03/04/2022 15:52:57 - INFO - codeparrot_training - Step 22491: {'lr': 0.0004767215144756814, 'samples': 11515904, 'steps': 22491, 'loss/train': 1.896788239479065} +03/04/2022 15:52:57 - INFO - codeparrot_training - Skipping example with length 396 (seq_length=1024) +03/04/2022 15:53:02 - INFO - codeparrot_training - Step 22492: {'lr': 0.0004767192782902457, 'samples': 11516416, 'steps': 22492, 'loss/train': 2.0163357257843018} +03/04/2022 15:53:05 - INFO - codeparrot_training - Step 22493: {'lr': 0.0004767170420026538, 'samples': 11516928, 'steps': 22493, 'loss/train': 1.80930495262146} +03/04/2022 15:53:05 - INFO - codeparrot_training - Skipping example with length 205 (seq_length=1024) +03/04/2022 15:53:11 - INFO - codeparrot_training - Step 22494: {'lr': 0.0004767148056129067, 'samples': 11517440, 'steps': 22494, 'loss/train': 2.143876552581787} +03/04/2022 15:53:14 - INFO - codeparrot_training - Step 22495: {'lr': 0.0004767125691210054, 'samples': 11517952, 'steps': 22495, 'loss/train': 2.2034823894500732} +03/04/2022 15:53:14 - INFO - codeparrot_training - Skipping example with length 8 (seq_length=1024) +03/04/2022 15:53:19 - INFO - codeparrot_training - Step 22496: {'lr': 0.00047671033252695083, 'samples': 11518464, 'steps': 22496, 'loss/train': 1.6189838647842407} +03/04/2022 15:53:22 - INFO - codeparrot_training - Step 22497: {'lr': 0.0004767080958307442, 'samples': 11518976, 'steps': 22497, 'loss/train': 0.9233405590057373} +03/04/2022 15:53:23 - INFO - codeparrot_training - Skipping example with length 133 (seq_length=1024) +03/04/2022 15:53:27 - INFO - codeparrot_training - Step 22498: {'lr': 0.0004767058590323864, 'samples': 11519488, 'steps': 22498, 'loss/train': 2.3180761337280273} +03/04/2022 15:53:31 - INFO - codeparrot_training - Step 22499: {'lr': 0.00047670362213187833, 'samples': 11520000, 'steps': 22499, 'loss/train': 1.8340678215026855} +03/04/2022 15:53:31 - INFO - codeparrot_training - Skipping example with length 982 (seq_length=1024) +03/04/2022 15:53:36 - INFO - codeparrot_training - Step 22500: {'lr': 0.0004767013851292212, 'samples': 11520512, 'steps': 22500, 'loss/train': 1.3954896926879883} +03/04/2022 15:53:39 - INFO - codeparrot_training - Step 22501: {'lr': 0.0004766991480244159, 'samples': 11521024, 'steps': 22501, 'loss/train': 1.6389615535736084} +03/04/2022 15:53:40 - INFO - codeparrot_training - Skipping example with length 17 (seq_length=1024) +03/04/2022 15:53:44 - INFO - codeparrot_training - Step 22502: {'lr': 0.0004766969108174635, 'samples': 11521536, 'steps': 22502, 'loss/train': 1.53696608543396} +03/04/2022 15:53:48 - INFO - codeparrot_training - Step 22503: {'lr': 0.0004766946735083649, 'samples': 11522048, 'steps': 22503, 'loss/train': 5.497185230255127} +03/04/2022 15:53:48 - INFO - codeparrot_training - Skipping example with length 38 (seq_length=1024) +03/04/2022 15:53:53 - INFO - codeparrot_training - Step 22504: {'lr': 0.0004766924360971212, 'samples': 11522560, 'steps': 22504, 'loss/train': 2.12186598777771} +03/04/2022 15:53:56 - INFO - codeparrot_training - Step 22505: {'lr': 0.00047669019858373343, 'samples': 11523072, 'steps': 22505, 'loss/train': 2.363293409347534} +03/04/2022 15:53:58 - INFO - codeparrot_training - Skipping example with length 224 (seq_length=1024) +03/04/2022 15:54:01 - INFO - codeparrot_training - Step 22506: {'lr': 0.00047668796096820247, 'samples': 11523584, 'steps': 22506, 'loss/train': 0.5293604731559753} +03/04/2022 15:54:05 - INFO - codeparrot_training - Step 22507: {'lr': 0.00047668572325052953, 'samples': 11524096, 'steps': 22507, 'loss/train': 1.743024230003357} +03/04/2022 15:54:06 - INFO - codeparrot_training - Skipping example with length 260 (seq_length=1024) +03/04/2022 15:54:10 - INFO - codeparrot_training - Step 22508: {'lr': 0.00047668348543071536, 'samples': 11524608, 'steps': 22508, 'loss/train': 1.6122633218765259} +03/04/2022 15:54:13 - INFO - codeparrot_training - Step 22509: {'lr': 0.00047668124750876117, 'samples': 11525120, 'steps': 22509, 'loss/train': 2.0255401134490967} +03/04/2022 15:54:14 - INFO - codeparrot_training - Skipping example with length 1004 (seq_length=1024) +03/04/2022 15:54:18 - INFO - codeparrot_training - Step 22510: {'lr': 0.0004766790094846679, 'samples': 11525632, 'steps': 22510, 'loss/train': 2.4109208583831787} +03/04/2022 15:54:21 - INFO - codeparrot_training - Step 22511: {'lr': 0.0004766767713584367, 'samples': 11526144, 'steps': 22511, 'loss/train': 2.465013265609741} +03/04/2022 15:54:23 - INFO - codeparrot_training - Skipping example with length 360 (seq_length=1024) +03/04/2022 15:54:27 - INFO - codeparrot_training - Step 22512: {'lr': 0.00047667453313006826, 'samples': 11526656, 'steps': 22512, 'loss/train': 1.4937057495117188} +03/04/2022 15:54:30 - INFO - codeparrot_training - Step 22513: {'lr': 0.00047667229479956386, 'samples': 11527168, 'steps': 22513, 'loss/train': 1.3148878812789917} +03/04/2022 15:54:31 - INFO - codeparrot_training - Skipping example with length 894 (seq_length=1024) +03/04/2022 15:54:35 - INFO - codeparrot_training - Step 22514: {'lr': 0.0004766700563669244, 'samples': 11527680, 'steps': 22514, 'loss/train': 1.7674264907836914} +03/04/2022 15:54:38 - INFO - codeparrot_training - Step 22515: {'lr': 0.0004766678178321509, 'samples': 11528192, 'steps': 22515, 'loss/train': 1.6861134767532349} +03/04/2022 15:54:40 - INFO - codeparrot_training - Skipping example with length 978 (seq_length=1024) +03/04/2022 15:54:44 - INFO - codeparrot_training - Step 22516: {'lr': 0.0004766655791952444, 'samples': 11528704, 'steps': 22516, 'loss/train': 2.0013840198516846} +03/04/2022 15:54:47 - INFO - codeparrot_training - Step 22517: {'lr': 0.0004766633404562059, 'samples': 11529216, 'steps': 22517, 'loss/train': 1.7278904914855957} +03/04/2022 15:54:48 - INFO - codeparrot_training - Skipping example with length 353 (seq_length=1024) +03/04/2022 15:54:52 - INFO - codeparrot_training - Step 22518: {'lr': 0.0004766611016150364, 'samples': 11529728, 'steps': 22518, 'loss/train': 1.906418800354004} +03/04/2022 15:54:55 - INFO - codeparrot_training - Step 22519: {'lr': 0.00047665886267173686, 'samples': 11530240, 'steps': 22519, 'loss/train': 2.3232064247131348} +03/04/2022 15:54:57 - INFO - codeparrot_training - Skipping example with length 882 (seq_length=1024) +03/04/2022 15:55:01 - INFO - codeparrot_training - Step 22520: {'lr': 0.00047665662362630836, 'samples': 11530752, 'steps': 22520, 'loss/train': 1.5617451667785645} +03/04/2022 15:55:04 - INFO - codeparrot_training - Step 22521: {'lr': 0.00047665438447875186, 'samples': 11531264, 'steps': 22521, 'loss/train': 1.9652162790298462} +03/04/2022 15:55:06 - INFO - codeparrot_training - Skipping example with length 668 (seq_length=1024) +03/04/2022 15:55:09 - INFO - codeparrot_training - Step 22522: {'lr': 0.0004766521452290684, 'samples': 11531776, 'steps': 22522, 'loss/train': 2.05790376663208} +03/04/2022 15:55:12 - INFO - codeparrot_training - Step 22523: {'lr': 0.00047664990587725905, 'samples': 11532288, 'steps': 22523, 'loss/train': 2.624528169631958} +03/04/2022 15:55:14 - INFO - codeparrot_training - Skipping example with length 619 (seq_length=1024) +03/04/2022 15:55:17 - INFO - codeparrot_training - Step 22524: {'lr': 0.0004766476664233247, 'samples': 11532800, 'steps': 22524, 'loss/train': 1.4314969778060913} +03/04/2022 15:55:21 - INFO - codeparrot_training - Step 22525: {'lr': 0.0004766454268672664, 'samples': 11533312, 'steps': 22525, 'loss/train': 1.3567208051681519} +03/04/2022 15:55:22 - INFO - codeparrot_training - Skipping example with length 749 (seq_length=1024) +03/04/2022 15:55:26 - INFO - codeparrot_training - Step 22526: {'lr': 0.00047664318720908516, 'samples': 11533824, 'steps': 22526, 'loss/train': 1.8593688011169434} +03/04/2022 15:55:29 - INFO - codeparrot_training - Step 22527: {'lr': 0.000476640947448782, 'samples': 11534336, 'steps': 22527, 'loss/train': 1.9574041366577148} +03/04/2022 15:55:32 - INFO - codeparrot_training - Skipping example with length 669 (seq_length=1024) +03/04/2022 15:55:34 - INFO - codeparrot_training - Step 22528: {'lr': 0.000476638707586358, 'samples': 11534848, 'steps': 22528, 'loss/train': 2.1821541786193848} +03/04/2022 15:55:38 - INFO - codeparrot_training - Step 22529: {'lr': 0.000476636467621814, 'samples': 11535360, 'steps': 22529, 'loss/train': 2.1727144718170166} +03/04/2022 15:55:40 - INFO - codeparrot_training - Skipping example with length 801 (seq_length=1024) +03/04/2022 15:55:43 - INFO - codeparrot_training - Step 22530: {'lr': 0.00047663422755515113, 'samples': 11535872, 'steps': 22530, 'loss/train': 1.0078575611114502} +03/04/2022 15:55:46 - INFO - codeparrot_training - Step 22531: {'lr': 0.00047663198738637035, 'samples': 11536384, 'steps': 22531, 'loss/train': 2.5753650665283203} +03/04/2022 15:55:49 - INFO - codeparrot_training - Step 22532: {'lr': 0.00047662974711547274, 'samples': 11536896, 'steps': 22532, 'loss/train': 1.5151337385177612} +03/04/2022 15:55:50 - INFO - codeparrot_training - Skipping example with length 551 (seq_length=1024) +03/04/2022 15:55:55 - INFO - codeparrot_training - Step 22533: {'lr': 0.0004766275067424593, 'samples': 11537408, 'steps': 22533, 'loss/train': 1.4739265441894531} +03/04/2022 15:55:58 - INFO - codeparrot_training - Step 22534: {'lr': 0.0004766252662673309, 'samples': 11537920, 'steps': 22534, 'loss/train': 0.5902225971221924} +03/04/2022 15:55:58 - INFO - codeparrot_training - Skipping example with length 704 (seq_length=1024) +03/04/2022 15:56:03 - INFO - codeparrot_training - Step 22535: {'lr': 0.0004766230256900887, 'samples': 11538432, 'steps': 22535, 'loss/train': 1.6894992589950562} +03/04/2022 15:56:06 - INFO - codeparrot_training - Step 22536: {'lr': 0.0004766207850107337, 'samples': 11538944, 'steps': 22536, 'loss/train': 1.088456153869629} +03/04/2022 15:56:07 - INFO - codeparrot_training - Skipping example with length 556 (seq_length=1024) +03/04/2022 15:56:12 - INFO - codeparrot_training - Step 22537: {'lr': 0.00047661854422926674, 'samples': 11539456, 'steps': 22537, 'loss/train': 1.5934957265853882} +03/04/2022 15:56:15 - INFO - codeparrot_training - Step 22538: {'lr': 0.0004766163033456891, 'samples': 11539968, 'steps': 22538, 'loss/train': 2.377973794937134} +03/04/2022 15:56:15 - INFO - codeparrot_training - Skipping example with length 762 (seq_length=1024) +03/04/2022 15:56:20 - INFO - codeparrot_training - Step 22539: {'lr': 0.0004766140623600016, 'samples': 11540480, 'steps': 22539, 'loss/train': 1.418641448020935} +03/04/2022 15:56:24 - INFO - codeparrot_training - Step 22540: {'lr': 0.0004766118212722053, 'samples': 11540992, 'steps': 22540, 'loss/train': 2.613971710205078} +03/04/2022 15:56:24 - INFO - codeparrot_training - Skipping example with length 349 (seq_length=1024) +03/04/2022 15:56:29 - INFO - codeparrot_training - Step 22541: {'lr': 0.0004766095800823013, 'samples': 11541504, 'steps': 22541, 'loss/train': 1.6881815195083618} +03/04/2022 15:56:32 - INFO - codeparrot_training - Step 22542: {'lr': 0.0004766073387902904, 'samples': 11542016, 'steps': 22542, 'loss/train': 2.552833080291748} +03/04/2022 15:56:32 - INFO - codeparrot_training - Skipping example with length 590 (seq_length=1024) +03/04/2022 15:56:37 - INFO - codeparrot_training - Step 22543: {'lr': 0.00047660509739617376, 'samples': 11542528, 'steps': 22543, 'loss/train': 1.949157953262329} +03/04/2022 15:56:40 - INFO - codeparrot_training - Step 22544: {'lr': 0.00047660285589995233, 'samples': 11543040, 'steps': 22544, 'loss/train': 3.074531078338623} +03/04/2022 15:56:40 - INFO - codeparrot_training - Skipping example with length 226 (seq_length=1024) +03/04/2022 15:56:46 - INFO - codeparrot_training - Step 22545: {'lr': 0.0004766006143016272, 'samples': 11543552, 'steps': 22545, 'loss/train': 0.4609377980232239} +03/04/2022 15:56:49 - INFO - codeparrot_training - Step 22546: {'lr': 0.0004765983726011993, 'samples': 11544064, 'steps': 22546, 'loss/train': 1.9356595277786255} +03/04/2022 15:56:49 - INFO - codeparrot_training - Skipping example with length 103 (seq_length=1024) +03/04/2022 15:56:54 - INFO - codeparrot_training - Step 22547: {'lr': 0.0004765961307986697, 'samples': 11544576, 'steps': 22547, 'loss/train': 1.354476809501648} +03/04/2022 15:56:57 - INFO - codeparrot_training - Step 22548: {'lr': 0.0004765938888940393, 'samples': 11545088, 'steps': 22548, 'loss/train': 2.055955171585083} +03/04/2022 15:56:58 - INFO - codeparrot_training - Skipping example with length 97 (seq_length=1024) +03/04/2022 15:57:03 - INFO - codeparrot_training - Step 22549: {'lr': 0.00047659164688730935, 'samples': 11545600, 'steps': 22549, 'loss/train': 0.9884545207023621} +03/04/2022 15:57:06 - INFO - codeparrot_training - Step 22550: {'lr': 0.00047658940477848056, 'samples': 11546112, 'steps': 22550, 'loss/train': 1.931716799736023} +03/04/2022 15:57:07 - INFO - codeparrot_training - Skipping example with length 447 (seq_length=1024) +03/04/2022 15:57:11 - INFO - codeparrot_training - Step 22551: {'lr': 0.00047658716256755414, 'samples': 11546624, 'steps': 22551, 'loss/train': 1.811279296875} +03/04/2022 15:57:14 - INFO - codeparrot_training - Step 22552: {'lr': 0.00047658492025453106, 'samples': 11547136, 'steps': 22552, 'loss/train': 1.874588966369629} +03/04/2022 15:57:16 - INFO - codeparrot_training - Skipping example with length 790 (seq_length=1024) +03/04/2022 15:57:20 - INFO - codeparrot_training - Step 22553: {'lr': 0.00047658267783941223, 'samples': 11547648, 'steps': 22553, 'loss/train': 1.7483720779418945} +03/04/2022 15:57:23 - INFO - codeparrot_training - Step 22554: {'lr': 0.0004765804353221988, 'samples': 11548160, 'steps': 22554, 'loss/train': 2.398430585861206} +03/04/2022 15:57:24 - INFO - codeparrot_training - Skipping example with length 718 (seq_length=1024) +03/04/2022 15:57:28 - INFO - codeparrot_training - Step 22555: {'lr': 0.0004765781927028917, 'samples': 11548672, 'steps': 22555, 'loss/train': 1.5815105438232422} +03/04/2022 15:57:31 - INFO - codeparrot_training - Step 22556: {'lr': 0.000476575949981492, 'samples': 11549184, 'steps': 22556, 'loss/train': 1.9444653987884521} +03/04/2022 15:57:33 - INFO - codeparrot_training - Skipping example with length 279 (seq_length=1024) +03/04/2022 15:57:36 - INFO - codeparrot_training - Step 22557: {'lr': 0.00047657370715800066, 'samples': 11549696, 'steps': 22557, 'loss/train': 2.27104115486145} +03/04/2022 15:57:40 - INFO - codeparrot_training - Step 22558: {'lr': 0.0004765714642324187, 'samples': 11550208, 'steps': 22558, 'loss/train': 2.0951380729675293} +03/04/2022 15:57:41 - INFO - codeparrot_training - Skipping example with length 642 (seq_length=1024) +03/04/2022 15:57:45 - INFO - codeparrot_training - Step 22559: {'lr': 0.0004765692212047471, 'samples': 11550720, 'steps': 22559, 'loss/train': 0.9666503071784973} +03/04/2022 15:57:48 - INFO - codeparrot_training - Step 22560: {'lr': 0.00047656697807498693, 'samples': 11551232, 'steps': 22560, 'loss/train': 1.8167030811309814} +03/04/2022 15:57:50 - INFO - codeparrot_training - Skipping example with length 73 (seq_length=1024) +03/04/2022 15:57:54 - INFO - codeparrot_training - Step 22561: {'lr': 0.0004765647348431392, 'samples': 11551744, 'steps': 22561, 'loss/train': 1.4436991214752197} +03/04/2022 15:57:57 - INFO - codeparrot_training - Step 22562: {'lr': 0.00047656249150920485, 'samples': 11552256, 'steps': 22562, 'loss/train': 2.581723213195801} +03/04/2022 15:57:58 - INFO - codeparrot_training - Skipping example with length 497 (seq_length=1024) +03/04/2022 15:58:02 - INFO - codeparrot_training - Step 22563: {'lr': 0.000476560248073185, 'samples': 11552768, 'steps': 22563, 'loss/train': 2.346001386642456} +03/04/2022 15:58:05 - INFO - codeparrot_training - Step 22564: {'lr': 0.0004765580045350805, 'samples': 11553280, 'steps': 22564, 'loss/train': 1.369173526763916} +03/04/2022 15:58:07 - INFO - codeparrot_training - Skipping example with length 682 (seq_length=1024) +03/04/2022 15:58:10 - INFO - codeparrot_training - Step 22565: {'lr': 0.00047655576089489254, 'samples': 11553792, 'steps': 22565, 'loss/train': 1.4972577095031738} +03/04/2022 15:58:14 - INFO - codeparrot_training - Step 22566: {'lr': 0.00047655351715262205, 'samples': 11554304, 'steps': 22566, 'loss/train': 1.991624116897583} +03/04/2022 15:58:15 - INFO - codeparrot_training - Skipping example with length 198 (seq_length=1024) +03/04/2022 15:58:19 - INFO - codeparrot_training - Step 22567: {'lr': 0.00047655127330827, 'samples': 11554816, 'steps': 22567, 'loss/train': 1.6280491352081299} +03/04/2022 15:58:22 - INFO - codeparrot_training - Step 22568: {'lr': 0.00047654902936183745, 'samples': 11555328, 'steps': 22568, 'loss/train': 1.8660573959350586} +03/04/2022 15:58:24 - INFO - codeparrot_training - Skipping example with length 679 (seq_length=1024) +03/04/2022 15:58:27 - INFO - codeparrot_training - Step 22569: {'lr': 0.00047654678531332544, 'samples': 11555840, 'steps': 22569, 'loss/train': 1.9200700521469116} +03/04/2022 15:58:30 - INFO - codeparrot_training - Step 22570: {'lr': 0.00047654454116273493, 'samples': 11556352, 'steps': 22570, 'loss/train': 2.5916576385498047} +03/04/2022 15:58:32 - INFO - codeparrot_training - Skipping example with length 67 (seq_length=1024) +03/04/2022 15:58:36 - INFO - codeparrot_training - Step 22571: {'lr': 0.0004765422969100669, 'samples': 11556864, 'steps': 22571, 'loss/train': 1.3937246799468994} +03/04/2022 15:58:39 - INFO - codeparrot_training - Step 22572: {'lr': 0.00047654005255532247, 'samples': 11557376, 'steps': 22572, 'loss/train': 2.65877628326416} +03/04/2022 15:58:41 - INFO - codeparrot_training - Skipping example with length 116 (seq_length=1024) +03/04/2022 15:58:44 - INFO - codeparrot_training - Step 22573: {'lr': 0.0004765378080985026, 'samples': 11557888, 'steps': 22573, 'loss/train': 1.0780423879623413} +03/04/2022 15:58:47 - INFO - codeparrot_training - Step 22574: {'lr': 0.00047653556353960825, 'samples': 11558400, 'steps': 22574, 'loss/train': 1.8875901699066162} +03/04/2022 15:58:49 - INFO - codeparrot_training - Skipping example with length 135 (seq_length=1024) +03/04/2022 15:58:53 - INFO - codeparrot_training - Step 22575: {'lr': 0.0004765333188786404, 'samples': 11558912, 'steps': 22575, 'loss/train': 2.4469127655029297} +03/04/2022 15:58:56 - INFO - codeparrot_training - Step 22576: {'lr': 0.00047653107411560025, 'samples': 11559424, 'steps': 22576, 'loss/train': 1.5975571870803833} +03/04/2022 15:58:57 - INFO - codeparrot_training - Skipping example with length 485 (seq_length=1024) +03/04/2022 15:59:01 - INFO - codeparrot_training - Step 22577: {'lr': 0.00047652882925048863, 'samples': 11559936, 'steps': 22577, 'loss/train': 2.0104424953460693} +03/04/2022 15:59:04 - INFO - codeparrot_training - Step 22578: {'lr': 0.00047652658428330664, 'samples': 11560448, 'steps': 22578, 'loss/train': 1.6819692850112915} +03/04/2022 15:59:06 - INFO - codeparrot_training - Skipping example with length 120 (seq_length=1024) +03/04/2022 15:59:10 - INFO - codeparrot_training - Step 22579: {'lr': 0.00047652433921405526, 'samples': 11560960, 'steps': 22579, 'loss/train': 2.1550815105438232} +03/04/2022 15:59:13 - INFO - codeparrot_training - Step 22580: {'lr': 0.0004765220940427355, 'samples': 11561472, 'steps': 22580, 'loss/train': 1.942596197128296} +03/04/2022 15:59:15 - INFO - codeparrot_training - Skipping example with length 715 (seq_length=1024) +03/04/2022 15:59:18 - INFO - codeparrot_training - Step 22581: {'lr': 0.0004765198487693484, 'samples': 11561984, 'steps': 22581, 'loss/train': 2.454638719558716} +03/04/2022 15:59:21 - INFO - codeparrot_training - Step 22582: {'lr': 0.00047651760339389494, 'samples': 11562496, 'steps': 22582, 'loss/train': 1.1270755529403687} +03/04/2022 15:59:23 - INFO - codeparrot_training - Skipping example with length 949 (seq_length=1024) +03/04/2022 15:59:27 - INFO - codeparrot_training - Step 22583: {'lr': 0.0004765153579163761, 'samples': 11563008, 'steps': 22583, 'loss/train': 1.6555266380310059} +03/04/2022 15:59:30 - INFO - codeparrot_training - Step 22584: {'lr': 0.000476513112336793, 'samples': 11563520, 'steps': 22584, 'loss/train': 1.6461373567581177} +03/04/2022 15:59:32 - INFO - codeparrot_training - Skipping example with length 325 (seq_length=1024) +03/04/2022 15:59:35 - INFO - codeparrot_training - Step 22585: {'lr': 0.00047651086665514655, 'samples': 11564032, 'steps': 22585, 'loss/train': 1.7901215553283691} +03/04/2022 15:59:38 - INFO - codeparrot_training - Step 22586: {'lr': 0.00047650862087143787, 'samples': 11564544, 'steps': 22586, 'loss/train': 1.8084999322891235} +03/04/2022 15:59:40 - INFO - codeparrot_training - Skipping example with length 478 (seq_length=1024) +03/04/2022 15:59:44 - INFO - codeparrot_training - Step 22587: {'lr': 0.0004765063749856678, 'samples': 11565056, 'steps': 22587, 'loss/train': 1.0192201137542725} +03/04/2022 15:59:47 - INFO - codeparrot_training - Step 22588: {'lr': 0.00047650412899783747, 'samples': 11565568, 'steps': 22588, 'loss/train': 2.1669023036956787} +03/04/2022 15:59:49 - INFO - codeparrot_training - Skipping example with length 980 (seq_length=1024) +03/04/2022 15:59:52 - INFO - codeparrot_training - Step 22589: {'lr': 0.0004765018829079479, 'samples': 11566080, 'steps': 22589, 'loss/train': 2.085214376449585} +03/04/2022 15:59:55 - INFO - codeparrot_training - Step 22590: {'lr': 0.0004764996367160001, 'samples': 11566592, 'steps': 22590, 'loss/train': 2.2782580852508545} +03/04/2022 15:59:58 - INFO - codeparrot_training - Skipping example with length 11 (seq_length=1024) +03/04/2022 16:00:00 - INFO - codeparrot_training - Step 22591: {'lr': 0.000476497390421995, 'samples': 11567104, 'steps': 22591, 'loss/train': 1.7893284559249878} +03/04/2022 16:00:04 - INFO - codeparrot_training - Step 22592: {'lr': 0.00047649514402593377, 'samples': 11567616, 'steps': 22592, 'loss/train': 1.5248842239379883} +03/04/2022 16:00:06 - INFO - codeparrot_training - Skipping example with length 772 (seq_length=1024) +03/04/2022 16:00:09 - INFO - codeparrot_training - Step 22593: {'lr': 0.0004764928975278172, 'samples': 11568128, 'steps': 22593, 'loss/train': 0.3378801941871643} +03/04/2022 16:00:12 - INFO - codeparrot_training - Step 22594: {'lr': 0.0004764906509276465, 'samples': 11568640, 'steps': 22594, 'loss/train': 2.554926872253418} +03/04/2022 16:00:14 - INFO - codeparrot_training - Skipping example with length 509 (seq_length=1024) +03/04/2022 16:00:17 - INFO - codeparrot_training - Step 22595: {'lr': 0.0004764884042254226, 'samples': 11569152, 'steps': 22595, 'loss/train': 1.769107699394226} +03/04/2022 16:00:21 - INFO - codeparrot_training - Step 22596: {'lr': 0.0004764861574211465, 'samples': 11569664, 'steps': 22596, 'loss/train': 1.8507225513458252} +03/04/2022 16:00:23 - INFO - codeparrot_training - Skipping example with length 201 (seq_length=1024) +03/04/2022 16:00:26 - INFO - codeparrot_training - Step 22597: {'lr': 0.0004764839105148193, 'samples': 11570176, 'steps': 22597, 'loss/train': 1.958627462387085} +03/04/2022 16:00:29 - INFO - codeparrot_training - Step 22598: {'lr': 0.00047648166350644185, 'samples': 11570688, 'steps': 22598, 'loss/train': 2.347872257232666} +03/04/2022 16:00:32 - INFO - codeparrot_training - Skipping example with length 194 (seq_length=1024) +03/04/2022 16:00:34 - INFO - codeparrot_training - Step 22599: {'lr': 0.00047647941639601535, 'samples': 11571200, 'steps': 22599, 'loss/train': 1.5985592603683472} +03/04/2022 16:00:37 - INFO - codeparrot_training - Step 22600: {'lr': 0.00047647716918354066, 'samples': 11571712, 'steps': 22600, 'loss/train': 1.9171409606933594} +03/04/2022 16:00:40 - INFO - codeparrot_training - Skipping example with length 344 (seq_length=1024) +03/04/2022 16:00:43 - INFO - codeparrot_training - Step 22601: {'lr': 0.00047647492186901884, 'samples': 11572224, 'steps': 22601, 'loss/train': 1.7307279109954834} +03/04/2022 16:00:46 - INFO - codeparrot_training - Step 22602: {'lr': 0.0004764726744524509, 'samples': 11572736, 'steps': 22602, 'loss/train': 2.0675957202911377} +03/04/2022 16:00:49 - INFO - codeparrot_training - Skipping example with length 300 (seq_length=1024) +03/04/2022 16:00:51 - INFO - codeparrot_training - Step 22603: {'lr': 0.0004764704269338379, 'samples': 11573248, 'steps': 22603, 'loss/train': 2.919102907180786} +03/04/2022 16:00:54 - INFO - codeparrot_training - Step 22604: {'lr': 0.00047646817931318086, 'samples': 11573760, 'steps': 22604, 'loss/train': 1.592376947402954} +03/04/2022 16:00:57 - INFO - codeparrot_training - Skipping example with length 352 (seq_length=1024) +03/04/2022 16:01:00 - INFO - codeparrot_training - Step 22605: {'lr': 0.0004764659315904807, 'samples': 11574272, 'steps': 22605, 'loss/train': 1.4732460975646973} +03/04/2022 16:01:03 - INFO - codeparrot_training - Step 22606: {'lr': 0.0004764636837657385, 'samples': 11574784, 'steps': 22606, 'loss/train': 0.3842104971408844} +03/04/2022 16:01:05 - INFO - codeparrot_training - Skipping example with length 664 (seq_length=1024) +03/04/2022 16:01:08 - INFO - codeparrot_training - Step 22607: {'lr': 0.0004764614358389553, 'samples': 11575296, 'steps': 22607, 'loss/train': 2.1826395988464355} +03/04/2022 16:01:11 - INFO - codeparrot_training - Step 22608: {'lr': 0.00047645918781013196, 'samples': 11575808, 'steps': 22608, 'loss/train': 2.3344006538391113} +03/04/2022 16:01:14 - INFO - codeparrot_training - Skipping example with length 470 (seq_length=1024) +03/04/2022 16:01:17 - INFO - codeparrot_training - Step 22609: {'lr': 0.0004764569396792697, 'samples': 11576320, 'steps': 22609, 'loss/train': 2.26448392868042} +03/04/2022 16:01:20 - INFO - codeparrot_training - Step 22610: {'lr': 0.0004764546914463694, 'samples': 11576832, 'steps': 22610, 'loss/train': 1.5200139284133911} +03/04/2022 16:01:23 - INFO - codeparrot_training - Skipping example with length 373 (seq_length=1024) +03/04/2022 16:01:25 - INFO - codeparrot_training - Step 22611: {'lr': 0.0004764524431114321, 'samples': 11577344, 'steps': 22611, 'loss/train': 1.6231499910354614} +03/04/2022 16:01:28 - INFO - codeparrot_training - Step 22612: {'lr': 0.0004764501946744589, 'samples': 11577856, 'steps': 22612, 'loss/train': 1.6300301551818848} +03/04/2022 16:01:31 - INFO - codeparrot_training - Skipping example with length 466 (seq_length=1024) +03/04/2022 16:01:34 - INFO - codeparrot_training - Step 22613: {'lr': 0.00047644794613545065, 'samples': 11578368, 'steps': 22613, 'loss/train': 2.000049591064453} +03/04/2022 16:01:37 - INFO - codeparrot_training - Step 22614: {'lr': 0.00047644569749440846, 'samples': 11578880, 'steps': 22614, 'loss/train': 2.1220104694366455} +03/04/2022 16:01:39 - INFO - codeparrot_training - Skipping example with length 478 (seq_length=1024) +03/04/2022 16:01:42 - INFO - codeparrot_training - Step 22615: {'lr': 0.0004764434487513334, 'samples': 11579392, 'steps': 22615, 'loss/train': 2.661520004272461} +03/04/2022 16:01:45 - INFO - codeparrot_training - Step 22616: {'lr': 0.00047644119990622637, 'samples': 11579904, 'steps': 22616, 'loss/train': 1.886074185371399} +03/04/2022 16:01:48 - INFO - codeparrot_training - Step 22617: {'lr': 0.0004764389509590884, 'samples': 11580416, 'steps': 22617, 'loss/train': 1.822066307067871} +03/04/2022 16:01:49 - INFO - codeparrot_training - Skipping example with length 25 (seq_length=1024) +03/04/2022 16:01:54 - INFO - codeparrot_training - Step 22618: {'lr': 0.0004764367019099206, 'samples': 11580928, 'steps': 22618, 'loss/train': 1.6211234331130981} +03/04/2022 16:01:57 - INFO - codeparrot_training - Step 22619: {'lr': 0.0004764344527587239, 'samples': 11581440, 'steps': 22619, 'loss/train': 1.3695306777954102} +03/04/2022 16:01:57 - INFO - codeparrot_training - Skipping example with length 409 (seq_length=1024) +03/04/2022 16:02:02 - INFO - codeparrot_training - Step 22620: {'lr': 0.00047643220350549934, 'samples': 11581952, 'steps': 22620, 'loss/train': 2.206536293029785} +03/04/2022 16:02:05 - INFO - codeparrot_training - Step 22621: {'lr': 0.0004764299541502478, 'samples': 11582464, 'steps': 22621, 'loss/train': 1.4475903511047363} +03/04/2022 16:02:05 - INFO - codeparrot_training - Skipping example with length 232 (seq_length=1024) +03/04/2022 16:02:10 - INFO - codeparrot_training - Step 22622: {'lr': 0.0004764277046929706, 'samples': 11582976, 'steps': 22622, 'loss/train': 0.5897603631019592} +03/04/2022 16:02:13 - INFO - codeparrot_training - Step 22623: {'lr': 0.00047642545513366843, 'samples': 11583488, 'steps': 22623, 'loss/train': 1.7979912757873535} +03/04/2022 16:02:14 - INFO - codeparrot_training - Skipping example with length 1005 (seq_length=1024) +03/04/2022 16:02:19 - INFO - codeparrot_training - Step 22624: {'lr': 0.0004764232054723425, 'samples': 11584000, 'steps': 22624, 'loss/train': 3.314054489135742} +03/04/2022 16:02:22 - INFO - codeparrot_training - Step 22625: {'lr': 0.0004764209557089938, 'samples': 11584512, 'steps': 22625, 'loss/train': 1.6141459941864014} +03/04/2022 16:02:24 - INFO - codeparrot_training - Skipping example with length 364 (seq_length=1024) +03/04/2022 16:02:28 - INFO - codeparrot_training - Step 22626: {'lr': 0.00047641870584362323, 'samples': 11585024, 'steps': 22626, 'loss/train': 2.0690886974334717} +03/04/2022 16:02:31 - INFO - codeparrot_training - Step 22627: {'lr': 0.00047641645587623196, 'samples': 11585536, 'steps': 22627, 'loss/train': 1.6066988706588745} +03/04/2022 16:02:35 - INFO - codeparrot_training - Step 22628: {'lr': 0.0004764142058068209, 'samples': 11586048, 'steps': 22628, 'loss/train': 2.2944929599761963} +03/04/2022 16:02:36 - INFO - codeparrot_training - Skipping example with length 27 (seq_length=1024) +03/04/2022 16:02:40 - INFO - codeparrot_training - Step 22629: {'lr': 0.00047641195563539107, 'samples': 11586560, 'steps': 22629, 'loss/train': 1.8909772634506226} +03/04/2022 16:02:43 - INFO - codeparrot_training - Step 22630: {'lr': 0.0004764097053619435, 'samples': 11587072, 'steps': 22630, 'loss/train': 2.3390133380889893} +03/04/2022 16:02:44 - INFO - codeparrot_training - Skipping example with length 687 (seq_length=1024) +03/04/2022 16:02:48 - INFO - codeparrot_training - Step 22631: {'lr': 0.00047640745498647925, 'samples': 11587584, 'steps': 22631, 'loss/train': 1.6426366567611694} +03/04/2022 16:02:52 - INFO - codeparrot_training - Step 22632: {'lr': 0.00047640520450899926, 'samples': 11588096, 'steps': 22632, 'loss/train': 2.283498525619507} +03/04/2022 16:02:54 - INFO - codeparrot_training - Skipping example with length 429 (seq_length=1024) +03/04/2022 16:02:57 - INFO - codeparrot_training - Step 22633: {'lr': 0.0004764029539295046, 'samples': 11588608, 'steps': 22633, 'loss/train': 1.8708990812301636} +03/04/2022 16:03:00 - INFO - codeparrot_training - Step 22634: {'lr': 0.0004764007032479963, 'samples': 11589120, 'steps': 22634, 'loss/train': 2.1835198402404785} +03/04/2022 16:03:02 - INFO - codeparrot_training - Skipping example with length 178 (seq_length=1024) +03/04/2022 16:03:05 - INFO - codeparrot_training - Step 22635: {'lr': 0.00047639845246447534, 'samples': 11589632, 'steps': 22635, 'loss/train': 1.0752500295639038} +03/04/2022 16:03:09 - INFO - codeparrot_training - Step 22636: {'lr': 0.00047639620157894264, 'samples': 11590144, 'steps': 22636, 'loss/train': 0.21507570147514343} +03/04/2022 16:03:11 - INFO - codeparrot_training - Skipping example with length 923 (seq_length=1024) +03/04/2022 16:03:14 - INFO - codeparrot_training - Step 22637: {'lr': 0.00047639395059139936, 'samples': 11590656, 'steps': 22637, 'loss/train': 2.565269947052002} +03/04/2022 16:03:17 - INFO - codeparrot_training - Step 22638: {'lr': 0.0004763916995018465, 'samples': 11591168, 'steps': 22638, 'loss/train': 1.5301684141159058} +03/04/2022 16:03:19 - INFO - codeparrot_training - Skipping example with length 292 (seq_length=1024) +03/04/2022 16:03:22 - INFO - codeparrot_training - Step 22639: {'lr': 0.00047638944831028497, 'samples': 11591680, 'steps': 22639, 'loss/train': 2.566046953201294} +03/04/2022 16:03:25 - INFO - codeparrot_training - Step 22640: {'lr': 0.00047638719701671587, 'samples': 11592192, 'steps': 22640, 'loss/train': 1.4349173307418823} +03/04/2022 16:03:28 - INFO - codeparrot_training - Skipping example with length 474 (seq_length=1024) +03/04/2022 16:03:31 - INFO - codeparrot_training - Step 22641: {'lr': 0.00047638494562114015, 'samples': 11592704, 'steps': 22641, 'loss/train': 0.7630588412284851} +03/04/2022 16:03:34 - INFO - codeparrot_training - Step 22642: {'lr': 0.0004763826941235589, 'samples': 11593216, 'steps': 22642, 'loss/train': 1.948015570640564} +03/04/2022 16:03:36 - INFO - codeparrot_training - Skipping example with length 724 (seq_length=1024) +03/04/2022 16:03:40 - INFO - codeparrot_training - Step 22643: {'lr': 0.00047638044252397313, 'samples': 11593728, 'steps': 22643, 'loss/train': 1.6665282249450684} +03/04/2022 16:03:43 - INFO - codeparrot_training - Step 22644: {'lr': 0.0004763781908223838, 'samples': 11594240, 'steps': 22644, 'loss/train': 1.452444076538086} +03/04/2022 16:03:46 - INFO - codeparrot_training - Step 22645: {'lr': 0.00047637593901879194, 'samples': 11594752, 'steps': 22645, 'loss/train': 3.2267260551452637} +03/04/2022 16:03:49 - INFO - codeparrot_training - Skipping example with length 942 (seq_length=1024) +03/04/2022 16:03:52 - INFO - codeparrot_training - Step 22646: {'lr': 0.00047637368711319863, 'samples': 11595264, 'steps': 22646, 'loss/train': 1.648328185081482} +03/04/2022 16:03:55 - INFO - codeparrot_training - Step 22647: {'lr': 0.00047637143510560477, 'samples': 11595776, 'steps': 22647, 'loss/train': 2.0546798706054688} +03/04/2022 16:03:58 - INFO - codeparrot_training - Skipping example with length 15 (seq_length=1024) +03/04/2022 16:04:00 - INFO - codeparrot_training - Step 22648: {'lr': 0.0004763691829960114, 'samples': 11596288, 'steps': 22648, 'loss/train': 1.8836004734039307} +03/04/2022 16:04:03 - INFO - codeparrot_training - Step 22649: {'lr': 0.00047636693078441963, 'samples': 11596800, 'steps': 22649, 'loss/train': 2.3728530406951904} +03/04/2022 16:04:06 - INFO - codeparrot_training - Skipping example with length 613 (seq_length=1024) +03/04/2022 16:04:09 - INFO - codeparrot_training - Step 22650: {'lr': 0.0004763646784708304, 'samples': 11597312, 'steps': 22650, 'loss/train': 1.896786093711853} +03/04/2022 16:04:12 - INFO - codeparrot_training - Step 22651: {'lr': 0.00047636242605524477, 'samples': 11597824, 'steps': 22651, 'loss/train': 1.9026237726211548} +03/04/2022 16:04:15 - INFO - codeparrot_training - Step 22652: {'lr': 0.0004763601735376637, 'samples': 11598336, 'steps': 22652, 'loss/train': 0.977907121181488} +03/04/2022 16:04:15 - INFO - codeparrot_training - Skipping example with length 961 (seq_length=1024) +03/04/2022 16:04:20 - INFO - codeparrot_training - Step 22653: {'lr': 0.0004763579209180882, 'samples': 11598848, 'steps': 22653, 'loss/train': 2.013745069503784} +03/04/2022 16:04:23 - INFO - codeparrot_training - Step 22654: {'lr': 0.00047635566819651936, 'samples': 11599360, 'steps': 22654, 'loss/train': 1.9207147359848022} +03/04/2022 16:04:29 - INFO - codeparrot_training - Step 22655: {'lr': 0.00047635341537295814, 'samples': 11599872, 'steps': 22655, 'loss/train': 1.819262981414795} +03/04/2022 16:04:32 - INFO - codeparrot_training - Step 22656: {'lr': 0.0004763511624474055, 'samples': 11600384, 'steps': 22656, 'loss/train': 2.0213611125946045} +03/04/2022 16:04:32 - INFO - codeparrot_training - Skipping example with length 769 (seq_length=1024) +03/04/2022 16:04:37 - INFO - codeparrot_training - Step 22657: {'lr': 0.00047634890941986263, 'samples': 11600896, 'steps': 22657, 'loss/train': 2.116319179534912} +03/04/2022 16:04:40 - INFO - codeparrot_training - Skipping example with length 356 (seq_length=1024) +03/04/2022 16:04:43 - INFO - codeparrot_training - Step 22658: {'lr': 0.00047634665629033035, 'samples': 11601408, 'steps': 22658, 'loss/train': 1.5030882358551025} +03/04/2022 16:04:46 - INFO - codeparrot_training - Step 22659: {'lr': 0.00047634440305880976, 'samples': 11601920, 'steps': 22659, 'loss/train': 1.4850702285766602} +03/04/2022 16:04:48 - INFO - codeparrot_training - Skipping example with length 764 (seq_length=1024) +03/04/2022 16:04:51 - INFO - codeparrot_training - Step 22660: {'lr': 0.0004763421497253019, 'samples': 11602432, 'steps': 22660, 'loss/train': 1.2798643112182617} +03/04/2022 16:04:54 - INFO - codeparrot_training - Step 22661: {'lr': 0.0004763398962898078, 'samples': 11602944, 'steps': 22661, 'loss/train': 2.254098415374756} +03/04/2022 16:04:57 - INFO - codeparrot_training - Skipping example with length 710 (seq_length=1024) +03/04/2022 16:04:59 - INFO - codeparrot_training - Step 22662: {'lr': 0.0004763376427523284, 'samples': 11603456, 'steps': 22662, 'loss/train': 3.083556890487671} +03/04/2022 16:05:02 - INFO - codeparrot_training - Step 22663: {'lr': 0.0004763353891128648, 'samples': 11603968, 'steps': 22663, 'loss/train': 0.16208121180534363} +03/04/2022 16:05:05 - INFO - codeparrot_training - Skipping example with length 621 (seq_length=1024) +03/04/2022 16:05:08 - INFO - codeparrot_training - Step 22664: {'lr': 0.00047633313537141786, 'samples': 11604480, 'steps': 22664, 'loss/train': 1.9340626001358032} +03/04/2022 16:05:11 - INFO - codeparrot_training - Step 22665: {'lr': 0.00047633088152798875, 'samples': 11604992, 'steps': 22665, 'loss/train': 2.0426924228668213} +03/04/2022 16:05:14 - INFO - codeparrot_training - Skipping example with length 601 (seq_length=1024) +03/04/2022 16:05:16 - INFO - codeparrot_training - Step 22666: {'lr': 0.00047632862758257845, 'samples': 11605504, 'steps': 22666, 'loss/train': 1.7117700576782227} +03/04/2022 16:05:19 - INFO - codeparrot_training - Step 22667: {'lr': 0.0004763263735351879, 'samples': 11606016, 'steps': 22667, 'loss/train': 1.7728177309036255} +03/04/2022 16:05:22 - INFO - codeparrot_training - Skipping example with length 755 (seq_length=1024) +03/04/2022 16:05:25 - INFO - codeparrot_training - Step 22668: {'lr': 0.0004763241193858183, 'samples': 11606528, 'steps': 22668, 'loss/train': 1.4793943166732788} +03/04/2022 16:05:28 - INFO - codeparrot_training - Step 22669: {'lr': 0.00047632186513447045, 'samples': 11607040, 'steps': 22669, 'loss/train': 2.451044797897339} +03/04/2022 16:05:30 - INFO - codeparrot_training - Skipping example with length 683 (seq_length=1024) +03/04/2022 16:05:33 - INFO - codeparrot_training - Step 22670: {'lr': 0.0004763196107811455, 'samples': 11607552, 'steps': 22670, 'loss/train': 1.0946629047393799} +03/04/2022 16:05:36 - INFO - codeparrot_training - Step 22671: {'lr': 0.0004763173563258444, 'samples': 11608064, 'steps': 22671, 'loss/train': 1.7203924655914307} +03/04/2022 16:05:38 - INFO - codeparrot_training - Skipping example with length 840 (seq_length=1024) +03/04/2022 16:05:42 - INFO - codeparrot_training - Step 22672: {'lr': 0.0004763151017685682, 'samples': 11608576, 'steps': 22672, 'loss/train': 1.4023966789245605} +03/04/2022 16:05:45 - INFO - codeparrot_training - Step 22673: {'lr': 0.0004763128471093179, 'samples': 11609088, 'steps': 22673, 'loss/train': 1.921789526939392} +03/04/2022 16:05:47 - INFO - codeparrot_training - Skipping example with length 479 (seq_length=1024) +03/04/2022 16:05:50 - INFO - codeparrot_training - Step 22674: {'lr': 0.0004763105923480946, 'samples': 11609600, 'steps': 22674, 'loss/train': 0.5169785618782043} +03/04/2022 16:05:53 - INFO - codeparrot_training - Step 22675: {'lr': 0.0004763083374848991, 'samples': 11610112, 'steps': 22675, 'loss/train': 1.7987031936645508} +03/04/2022 16:05:55 - INFO - codeparrot_training - Skipping example with length 787 (seq_length=1024) +03/04/2022 16:05:59 - INFO - codeparrot_training - Step 22676: {'lr': 0.00047630608251973265, 'samples': 11610624, 'steps': 22676, 'loss/train': 1.7626670598983765} +03/04/2022 16:06:02 - INFO - codeparrot_training - Step 22677: {'lr': 0.00047630382745259616, 'samples': 11611136, 'steps': 22677, 'loss/train': 1.9836996793746948} +03/04/2022 16:06:05 - INFO - codeparrot_training - Step 22678: {'lr': 0.0004763015722834907, 'samples': 11611648, 'steps': 22678, 'loss/train': 2.206925392150879} +03/04/2022 16:06:07 - INFO - codeparrot_training - Skipping example with length 70 (seq_length=1024) +03/04/2022 16:06:10 - INFO - codeparrot_training - Step 22679: {'lr': 0.00047629931701241715, 'samples': 11612160, 'steps': 22679, 'loss/train': 1.5580421686172485} +03/04/2022 16:06:14 - INFO - codeparrot_training - Step 22680: {'lr': 0.0004762970616393767, 'samples': 11612672, 'steps': 22680, 'loss/train': 2.068885087966919} +03/04/2022 16:06:15 - INFO - codeparrot_training - Skipping example with length 1020 (seq_length=1024) +03/04/2022 16:06:19 - INFO - codeparrot_training - Step 22681: {'lr': 0.0004762948061643702, 'samples': 11613184, 'steps': 22681, 'loss/train': 2.1621265411376953} +03/04/2022 16:06:22 - INFO - codeparrot_training - Step 22682: {'lr': 0.0004762925505873988, 'samples': 11613696, 'steps': 22682, 'loss/train': 1.8693995475769043} +03/04/2022 16:06:24 - INFO - codeparrot_training - Skipping example with length 892 (seq_length=1024) +03/04/2022 16:06:27 - INFO - codeparrot_training - Step 22683: {'lr': 0.00047629029490846346, 'samples': 11614208, 'steps': 22683, 'loss/train': 2.0313539505004883} +03/04/2022 16:06:31 - INFO - codeparrot_training - Step 22684: {'lr': 0.00047628803912756523, 'samples': 11614720, 'steps': 22684, 'loss/train': 1.6997673511505127} +03/04/2022 16:06:32 - INFO - codeparrot_training - Skipping example with length 152 (seq_length=1024) +03/04/2022 16:06:36 - INFO - codeparrot_training - Step 22685: {'lr': 0.00047628578324470505, 'samples': 11615232, 'steps': 22685, 'loss/train': 1.9626903533935547} +03/04/2022 16:06:39 - INFO - codeparrot_training - Step 22686: {'lr': 0.00047628352725988406, 'samples': 11615744, 'steps': 22686, 'loss/train': 1.327711820602417} +03/04/2022 16:06:41 - INFO - codeparrot_training - Skipping example with length 345 (seq_length=1024) +03/04/2022 16:06:44 - INFO - codeparrot_training - Step 22687: {'lr': 0.0004762812711731032, 'samples': 11616256, 'steps': 22687, 'loss/train': 2.008180618286133} +03/04/2022 16:06:47 - INFO - codeparrot_training - Step 22688: {'lr': 0.00047627901498436344, 'samples': 11616768, 'steps': 22688, 'loss/train': 1.9528369903564453} +03/04/2022 16:06:50 - INFO - codeparrot_training - Skipping example with length 445 (seq_length=1024) +03/04/2022 16:06:53 - INFO - codeparrot_training - Step 22689: {'lr': 0.0004762767586936658, 'samples': 11617280, 'steps': 22689, 'loss/train': 1.912990927696228} +03/04/2022 16:06:56 - INFO - codeparrot_training - Step 22690: {'lr': 0.00047627450230101144, 'samples': 11617792, 'steps': 22690, 'loss/train': 2.3304872512817383} +03/04/2022 16:06:58 - INFO - codeparrot_training - Skipping example with length 752 (seq_length=1024) +03/04/2022 16:07:01 - INFO - codeparrot_training - Step 22691: {'lr': 0.0004762722458064013, 'samples': 11618304, 'steps': 22691, 'loss/train': 1.426804780960083} +03/04/2022 16:07:04 - INFO - codeparrot_training - Step 22692: {'lr': 0.0004762699892098363, 'samples': 11618816, 'steps': 22692, 'loss/train': 1.1183570623397827} +03/04/2022 16:07:06 - INFO - codeparrot_training - Skipping example with length 797 (seq_length=1024) +03/04/2022 16:07:10 - INFO - codeparrot_training - Step 22693: {'lr': 0.0004762677325113176, 'samples': 11619328, 'steps': 22693, 'loss/train': 1.8661285638809204} +03/04/2022 16:07:13 - INFO - codeparrot_training - Step 22694: {'lr': 0.0004762654757108461, 'samples': 11619840, 'steps': 22694, 'loss/train': 1.5826612710952759} +03/04/2022 16:07:15 - INFO - codeparrot_training - Skipping example with length 452 (seq_length=1024) +03/04/2022 16:07:18 - INFO - codeparrot_training - Step 22695: {'lr': 0.00047626321880842287, 'samples': 11620352, 'steps': 22695, 'loss/train': 2.739610433578491} +03/04/2022 16:07:21 - INFO - codeparrot_training - Step 22696: {'lr': 0.00047626096180404895, 'samples': 11620864, 'steps': 22696, 'loss/train': 2.1063945293426514} +03/04/2022 16:07:23 - INFO - codeparrot_training - Skipping example with length 22 (seq_length=1024) +03/04/2022 16:07:26 - INFO - codeparrot_training - Step 22697: {'lr': 0.0004762587046977253, 'samples': 11621376, 'steps': 22697, 'loss/train': 1.4844012260437012} +03/04/2022 16:07:30 - INFO - codeparrot_training - Step 22698: {'lr': 0.000476256447489453, 'samples': 11621888, 'steps': 22698, 'loss/train': 2.2075445652008057} +03/04/2022 16:07:31 - INFO - codeparrot_training - Skipping example with length 900 (seq_length=1024) +03/04/2022 16:07:35 - INFO - codeparrot_training - Step 22699: {'lr': 0.000476254190179233, 'samples': 11622400, 'steps': 22699, 'loss/train': 2.445279121398926} +03/04/2022 16:07:38 - INFO - codeparrot_training - Step 22700: {'lr': 0.0004762519327670664, 'samples': 11622912, 'steps': 22700, 'loss/train': 0.778886079788208} +03/04/2022 16:07:40 - INFO - codeparrot_training - Skipping example with length 93 (seq_length=1024) +03/04/2022 16:07:44 - INFO - codeparrot_training - Step 22701: {'lr': 0.0004762496752529541, 'samples': 11623424, 'steps': 22701, 'loss/train': 1.7996127605438232} +03/04/2022 16:07:47 - INFO - codeparrot_training - Step 22702: {'lr': 0.0004762474176368973, 'samples': 11623936, 'steps': 22702, 'loss/train': 1.610295295715332} +03/04/2022 16:07:49 - INFO - codeparrot_training - Skipping example with length 366 (seq_length=1024) +03/04/2022 16:07:52 - INFO - codeparrot_training - Step 22703: {'lr': 0.00047624515991889684, 'samples': 11624448, 'steps': 22703, 'loss/train': 1.8571619987487793} +03/04/2022 16:07:55 - INFO - codeparrot_training - Step 22704: {'lr': 0.00047624290209895384, 'samples': 11624960, 'steps': 22704, 'loss/train': 1.5894479751586914} +03/04/2022 16:07:58 - INFO - codeparrot_training - Skipping example with length 864 (seq_length=1024) +03/04/2022 16:08:00 - INFO - codeparrot_training - Step 22705: {'lr': 0.00047624064417706917, 'samples': 11625472, 'steps': 22705, 'loss/train': 2.102832555770874} +03/04/2022 16:08:04 - INFO - codeparrot_training - Step 22706: {'lr': 0.00047623838615324407, 'samples': 11625984, 'steps': 22706, 'loss/train': 1.170867919921875} +03/04/2022 16:08:06 - INFO - codeparrot_training - Skipping example with length 651 (seq_length=1024) +03/04/2022 16:08:09 - INFO - codeparrot_training - Step 22707: {'lr': 0.0004762361280274794, 'samples': 11626496, 'steps': 22707, 'loss/train': 1.5442910194396973} +03/04/2022 16:08:12 - INFO - codeparrot_training - Step 22708: {'lr': 0.0004762338697997762, 'samples': 11627008, 'steps': 22708, 'loss/train': 2.1116220951080322} +03/04/2022 16:08:14 - INFO - codeparrot_training - Skipping example with length 15 (seq_length=1024) +03/04/2022 16:08:17 - INFO - codeparrot_training - Step 22709: {'lr': 0.00047623161147013557, 'samples': 11627520, 'steps': 22709, 'loss/train': 1.5288029909133911} +03/04/2022 16:08:20 - INFO - codeparrot_training - Step 22710: {'lr': 0.0004762293530385584, 'samples': 11628032, 'steps': 22710, 'loss/train': 1.672132134437561} +03/04/2022 16:08:23 - INFO - codeparrot_training - Skipping example with length 522 (seq_length=1024) +03/04/2022 16:08:26 - INFO - codeparrot_training - Step 22711: {'lr': 0.0004762270945050458, 'samples': 11628544, 'steps': 22711, 'loss/train': 1.8892134428024292} +03/04/2022 16:08:29 - INFO - codeparrot_training - Step 22712: {'lr': 0.00047622483586959877, 'samples': 11629056, 'steps': 22712, 'loss/train': 1.66095769405365} +03/04/2022 16:08:32 - INFO - codeparrot_training - Skipping example with length 514 (seq_length=1024) +03/04/2022 16:08:34 - INFO - codeparrot_training - Step 22713: {'lr': 0.00047622257713221826, 'samples': 11629568, 'steps': 22713, 'loss/train': 1.8577181100845337} +03/04/2022 16:08:37 - INFO - codeparrot_training - Step 22714: {'lr': 0.00047622031829290545, 'samples': 11630080, 'steps': 22714, 'loss/train': 1.7829153537750244} +03/04/2022 16:08:40 - INFO - codeparrot_training - Skipping example with length 440 (seq_length=1024) +03/04/2022 16:08:42 - INFO - codeparrot_training - Step 22715: {'lr': 0.0004762180593516612, 'samples': 11630592, 'steps': 22715, 'loss/train': 1.548112392425537} +03/04/2022 16:08:46 - INFO - codeparrot_training - Step 22716: {'lr': 0.0004762158003084867, 'samples': 11631104, 'steps': 22716, 'loss/train': 1.9854425191879272} +03/04/2022 16:08:48 - INFO - codeparrot_training - Skipping example with length 558 (seq_length=1024) +03/04/2022 16:08:51 - INFO - codeparrot_training - Step 22717: {'lr': 0.0004762135411633827, 'samples': 11631616, 'steps': 22717, 'loss/train': 1.712017297744751} +03/04/2022 16:08:54 - INFO - codeparrot_training - Step 22718: {'lr': 0.0004762112819163504, 'samples': 11632128, 'steps': 22718, 'loss/train': 2.221590518951416} +03/04/2022 16:08:57 - INFO - codeparrot_training - Skipping example with length 261 (seq_length=1024) +03/04/2022 16:08:59 - INFO - codeparrot_training - Step 22719: {'lr': 0.0004762090225673908, 'samples': 11632640, 'steps': 22719, 'loss/train': 2.351417303085327} +03/04/2022 16:09:02 - INFO - codeparrot_training - Step 22720: {'lr': 0.0004762067631165049, 'samples': 11633152, 'steps': 22720, 'loss/train': 1.9199585914611816} +03/04/2022 16:09:05 - INFO - codeparrot_training - Skipping example with length 624 (seq_length=1024) +03/04/2022 16:09:08 - INFO - codeparrot_training - Step 22721: {'lr': 0.0004762045035636937, 'samples': 11633664, 'steps': 22721, 'loss/train': 1.9231112003326416} +03/04/2022 16:09:11 - INFO - codeparrot_training - Step 22722: {'lr': 0.0004762022439089583, 'samples': 11634176, 'steps': 22722, 'loss/train': 2.0572171211242676} +03/04/2022 16:09:13 - INFO - codeparrot_training - Skipping example with length 261 (seq_length=1024) +03/04/2022 16:09:16 - INFO - codeparrot_training - Step 22723: {'lr': 0.0004761999841522996, 'samples': 11634688, 'steps': 22723, 'loss/train': 1.6132575273513794} +03/04/2022 16:09:19 - INFO - codeparrot_training - Step 22724: {'lr': 0.0004761977242937188, 'samples': 11635200, 'steps': 22724, 'loss/train': 2.027494192123413} +03/04/2022 16:09:22 - INFO - codeparrot_training - Skipping example with length 77 (seq_length=1024) +03/04/2022 16:09:25 - INFO - codeparrot_training - Step 22725: {'lr': 0.00047619546433321663, 'samples': 11635712, 'steps': 22725, 'loss/train': 1.6223715543746948} +03/04/2022 16:09:28 - INFO - codeparrot_training - Step 22726: {'lr': 0.00047619320427079437, 'samples': 11636224, 'steps': 22726, 'loss/train': 1.6766353845596313} +03/04/2022 16:09:30 - INFO - codeparrot_training - Skipping example with length 607 (seq_length=1024) +03/04/2022 16:09:33 - INFO - codeparrot_training - Step 22727: {'lr': 0.00047619094410645293, 'samples': 11636736, 'steps': 22727, 'loss/train': 1.3781834840774536} +03/04/2022 16:09:36 - INFO - codeparrot_training - Step 22728: {'lr': 0.0004761886838401933, 'samples': 11637248, 'steps': 22728, 'loss/train': 1.0411885976791382} +03/04/2022 16:09:39 - INFO - codeparrot_training - Step 22729: {'lr': 0.0004761864234720166, 'samples': 11637760, 'steps': 22729, 'loss/train': 2.4113283157348633} +03/04/2022 16:09:40 - INFO - codeparrot_training - Skipping example with length 752 (seq_length=1024) +03/04/2022 16:09:45 - INFO - codeparrot_training - Step 22730: {'lr': 0.00047618416300192375, 'samples': 11638272, 'steps': 22730, 'loss/train': 1.104407787322998} +03/04/2022 16:09:48 - INFO - codeparrot_training - Step 22731: {'lr': 0.0004761819024299158, 'samples': 11638784, 'steps': 22731, 'loss/train': 2.0100302696228027} +03/04/2022 16:09:48 - INFO - codeparrot_training - Skipping example with length 725 (seq_length=1024) +03/04/2022 16:09:53 - INFO - codeparrot_training - Step 22732: {'lr': 0.0004761796417559938, 'samples': 11639296, 'steps': 22732, 'loss/train': 2.6371166706085205} +03/04/2022 16:09:56 - INFO - codeparrot_training - Step 22733: {'lr': 0.0004761773809801587, 'samples': 11639808, 'steps': 22733, 'loss/train': 2.1136434078216553} +03/04/2022 16:09:57 - INFO - codeparrot_training - Skipping example with length 74 (seq_length=1024) +03/04/2022 16:10:02 - INFO - codeparrot_training - Step 22734: {'lr': 0.0004761751201024116, 'samples': 11640320, 'steps': 22734, 'loss/train': 0.630546510219574} +03/04/2022 16:10:05 - INFO - codeparrot_training - Step 22735: {'lr': 0.0004761728591227535, 'samples': 11640832, 'steps': 22735, 'loss/train': 0.5983878374099731} +03/04/2022 16:10:05 - INFO - codeparrot_training - Skipping example with length 26 (seq_length=1024) +03/04/2022 16:10:10 - INFO - codeparrot_training - Step 22736: {'lr': 0.00047617059804118536, 'samples': 11641344, 'steps': 22736, 'loss/train': 1.5056216716766357} +03/04/2022 16:10:13 - INFO - codeparrot_training - Step 22737: {'lr': 0.0004761683368577083, 'samples': 11641856, 'steps': 22737, 'loss/train': 0.25127357244491577} +03/04/2022 16:10:13 - INFO - codeparrot_training - Skipping example with length 101 (seq_length=1024) +03/04/2022 16:10:19 - INFO - codeparrot_training - Step 22738: {'lr': 0.0004761660755723232, 'samples': 11642368, 'steps': 22738, 'loss/train': 0.20365694165229797} +03/04/2022 16:10:22 - INFO - codeparrot_training - Step 22739: {'lr': 0.0004761638141850312, 'samples': 11642880, 'steps': 22739, 'loss/train': 1.0232809782028198} +03/04/2022 16:10:23 - INFO - codeparrot_training - Skipping example with length 120 (seq_length=1024) +03/04/2022 16:10:27 - INFO - codeparrot_training - Step 22740: {'lr': 0.0004761615526958333, 'samples': 11643392, 'steps': 22740, 'loss/train': 2.081066370010376} +03/04/2022 16:10:30 - INFO - codeparrot_training - Step 22741: {'lr': 0.0004761592911047304, 'samples': 11643904, 'steps': 22741, 'loss/train': 2.209012031555176} +03/04/2022 16:10:31 - INFO - codeparrot_training - Skipping example with length 406 (seq_length=1024) +03/04/2022 16:10:36 - INFO - codeparrot_training - Step 22742: {'lr': 0.00047615702941172366, 'samples': 11644416, 'steps': 22742, 'loss/train': 2.0221681594848633} +03/04/2022 16:10:39 - INFO - codeparrot_training - Step 22743: {'lr': 0.0004761547676168141, 'samples': 11644928, 'steps': 22743, 'loss/train': 1.688999056816101} +03/04/2022 16:10:39 - INFO - codeparrot_training - Skipping example with length 148 (seq_length=1024) +03/04/2022 16:10:44 - INFO - codeparrot_training - Step 22744: {'lr': 0.0004761525057200027, 'samples': 11645440, 'steps': 22744, 'loss/train': 1.1519862413406372} +03/04/2022 16:10:47 - INFO - codeparrot_training - Step 22745: {'lr': 0.00047615024372129033, 'samples': 11645952, 'steps': 22745, 'loss/train': 1.599500060081482} +03/04/2022 16:10:48 - INFO - codeparrot_training - Skipping example with length 606 (seq_length=1024) +03/04/2022 16:10:53 - INFO - codeparrot_training - Step 22746: {'lr': 0.0004761479816206783, 'samples': 11646464, 'steps': 22746, 'loss/train': 2.074352264404297} +03/04/2022 16:10:56 - INFO - codeparrot_training - Step 22747: {'lr': 0.00047614571941816743, 'samples': 11646976, 'steps': 22747, 'loss/train': 1.9758764505386353} +03/04/2022 16:10:57 - INFO - codeparrot_training - Skipping example with length 284 (seq_length=1024) +03/04/2022 16:11:01 - INFO - codeparrot_training - Step 22748: {'lr': 0.00047614345711375874, 'samples': 11647488, 'steps': 22748, 'loss/train': 2.2397775650024414} +03/04/2022 16:11:04 - INFO - codeparrot_training - Step 22749: {'lr': 0.0004761411947074533, 'samples': 11648000, 'steps': 22749, 'loss/train': 1.8849570751190186} +03/04/2022 16:11:06 - INFO - codeparrot_training - Skipping example with length 725 (seq_length=1024) +03/04/2022 16:11:09 - INFO - codeparrot_training - Step 22750: {'lr': 0.00047613893219925217, 'samples': 11648512, 'steps': 22750, 'loss/train': 1.5778478384017944} +03/04/2022 16:11:13 - INFO - codeparrot_training - Step 22751: {'lr': 0.00047613666958915636, 'samples': 11649024, 'steps': 22751, 'loss/train': 1.908392071723938} +03/04/2022 16:11:14 - INFO - codeparrot_training - Skipping example with length 349 (seq_length=1024) +03/04/2022 16:11:18 - INFO - codeparrot_training - Step 22752: {'lr': 0.0004761344068771668, 'samples': 11649536, 'steps': 22752, 'loss/train': 2.2471325397491455} +03/04/2022 16:11:21 - INFO - codeparrot_training - Step 22753: {'lr': 0.0004761321440632846, 'samples': 11650048, 'steps': 22753, 'loss/train': 2.328744649887085} +03/04/2022 16:11:23 - INFO - codeparrot_training - Skipping example with length 918 (seq_length=1024) +03/04/2022 16:11:26 - INFO - codeparrot_training - Step 22754: {'lr': 0.00047612988114751074, 'samples': 11650560, 'steps': 22754, 'loss/train': 1.4198843240737915} +03/04/2022 16:11:30 - INFO - codeparrot_training - Step 22755: {'lr': 0.00047612761812984626, 'samples': 11651072, 'steps': 22755, 'loss/train': 1.788591980934143} +03/04/2022 16:11:31 - INFO - codeparrot_training - Skipping example with length 250 (seq_length=1024) +03/04/2022 16:11:35 - INFO - codeparrot_training - Step 22756: {'lr': 0.00047612535501029215, 'samples': 11651584, 'steps': 22756, 'loss/train': 2.0703320503234863} +03/04/2022 16:11:38 - INFO - codeparrot_training - Step 22757: {'lr': 0.0004761230917888494, 'samples': 11652096, 'steps': 22757, 'loss/train': 2.3961472511291504} +03/04/2022 16:11:39 - INFO - codeparrot_training - Skipping example with length 920 (seq_length=1024) +03/04/2022 16:11:43 - INFO - codeparrot_training - Step 22758: {'lr': 0.00047612082846551913, 'samples': 11652608, 'steps': 22758, 'loss/train': 2.0931332111358643} +03/04/2022 16:11:46 - INFO - codeparrot_training - Step 22759: {'lr': 0.0004761185650403023, 'samples': 11653120, 'steps': 22759, 'loss/train': 1.7007685899734497} +03/04/2022 16:11:52 - INFO - codeparrot_training - Step 22760: {'lr': 0.0004761163015131999, 'samples': 11653632, 'steps': 22760, 'loss/train': 1.5417068004608154} +03/04/2022 16:11:55 - INFO - codeparrot_training - Step 22761: {'lr': 0.00047611403788421305, 'samples': 11654144, 'steps': 22761, 'loss/train': 1.684132695198059} +03/04/2022 16:11:56 - INFO - codeparrot_training - Skipping example with length 439 (seq_length=1024) +03/04/2022 16:12:00 - INFO - codeparrot_training - Step 22762: {'lr': 0.0004761117741533426, 'samples': 11654656, 'steps': 22762, 'loss/train': 2.491093397140503} +03/04/2022 16:12:04 - INFO - codeparrot_training - Step 22763: {'lr': 0.0004761095103205897, 'samples': 11655168, 'steps': 22763, 'loss/train': 1.538343071937561} +03/04/2022 16:12:05 - INFO - codeparrot_training - Skipping example with length 683 (seq_length=1024) +03/04/2022 16:12:09 - INFO - codeparrot_training - Step 22764: {'lr': 0.00047610724638595545, 'samples': 11655680, 'steps': 22764, 'loss/train': 1.8998663425445557} +03/04/2022 16:12:12 - INFO - codeparrot_training - Step 22765: {'lr': 0.00047610498234944065, 'samples': 11656192, 'steps': 22765, 'loss/train': 2.556018829345703} +03/04/2022 16:12:14 - INFO - codeparrot_training - Skipping example with length 419 (seq_length=1024) +03/04/2022 16:12:18 - INFO - codeparrot_training - Step 22766: {'lr': 0.00047610271821104647, 'samples': 11656704, 'steps': 22766, 'loss/train': 2.347198486328125} +03/04/2022 16:12:21 - INFO - codeparrot_training - Step 22767: {'lr': 0.0004761004539707739, 'samples': 11657216, 'steps': 22767, 'loss/train': 1.8554072380065918} +03/04/2022 16:12:22 - INFO - codeparrot_training - Skipping example with length 987 (seq_length=1024) +03/04/2022 16:12:26 - INFO - codeparrot_training - Step 22768: {'lr': 0.00047609818962862394, 'samples': 11657728, 'steps': 22768, 'loss/train': 1.814836859703064} +03/04/2022 16:12:29 - INFO - codeparrot_training - Step 22769: {'lr': 0.00047609592518459766, 'samples': 11658240, 'steps': 22769, 'loss/train': 1.9793697595596313} +03/04/2022 16:12:31 - INFO - codeparrot_training - Skipping example with length 303 (seq_length=1024) +03/04/2022 16:12:34 - INFO - codeparrot_training - Step 22770: {'lr': 0.00047609366063869595, 'samples': 11658752, 'steps': 22770, 'loss/train': 1.9207111597061157} +03/04/2022 16:12:38 - INFO - codeparrot_training - Step 22771: {'lr': 0.00047609139599092006, 'samples': 11659264, 'steps': 22771, 'loss/train': 0.24691298604011536} +03/04/2022 16:12:39 - INFO - codeparrot_training - Skipping example with length 587 (seq_length=1024) +03/04/2022 16:12:43 - INFO - codeparrot_training - Step 22772: {'lr': 0.0004760891312412708, 'samples': 11659776, 'steps': 22772, 'loss/train': 2.069065570831299} +03/04/2022 16:12:46 - INFO - codeparrot_training - Step 22773: {'lr': 0.0004760868663897493, 'samples': 11660288, 'steps': 22773, 'loss/train': 2.096614122390747} +03/04/2022 16:12:47 - INFO - codeparrot_training - Skipping example with length 103 (seq_length=1024) +03/04/2022 16:12:51 - INFO - codeparrot_training - Step 22774: {'lr': 0.0004760846014363565, 'samples': 11660800, 'steps': 22774, 'loss/train': 2.151154041290283} +03/04/2022 16:12:55 - INFO - codeparrot_training - Step 22775: {'lr': 0.0004760823363810935, 'samples': 11661312, 'steps': 22775, 'loss/train': 2.274588108062744} +03/04/2022 16:12:56 - INFO - codeparrot_training - Skipping example with length 794 (seq_length=1024) +03/04/2022 16:13:00 - INFO - codeparrot_training - Step 22776: {'lr': 0.0004760800712239612, 'samples': 11661824, 'steps': 22776, 'loss/train': 1.9499043226242065} +03/04/2022 16:13:03 - INFO - codeparrot_training - Step 22777: {'lr': 0.0004760778059649609, 'samples': 11662336, 'steps': 22777, 'loss/train': 2.3628435134887695} +03/04/2022 16:13:04 - INFO - codeparrot_training - Skipping example with length 161 (seq_length=1024) +03/04/2022 16:13:08 - INFO - codeparrot_training - Step 22778: {'lr': 0.0004760755406040933, 'samples': 11662848, 'steps': 22778, 'loss/train': 1.9469445943832397} +03/04/2022 16:13:11 - INFO - codeparrot_training - Step 22779: {'lr': 0.00047607327514135955, 'samples': 11663360, 'steps': 22779, 'loss/train': 1.6754398345947266} +03/04/2022 16:13:12 - INFO - codeparrot_training - Skipping example with length 421 (seq_length=1024) +03/04/2022 16:13:17 - INFO - codeparrot_training - Step 22780: {'lr': 0.00047607100957676067, 'samples': 11663872, 'steps': 22780, 'loss/train': 0.7512257695198059} +03/04/2022 16:13:20 - INFO - codeparrot_training - Step 22781: {'lr': 0.0004760687439102977, 'samples': 11664384, 'steps': 22781, 'loss/train': 1.9664580821990967} +03/04/2022 16:13:21 - INFO - codeparrot_training - Skipping example with length 859 (seq_length=1024) +03/04/2022 16:13:25 - INFO - codeparrot_training - Step 22782: {'lr': 0.0004760664781419717, 'samples': 11664896, 'steps': 22782, 'loss/train': 1.4796637296676636} +03/04/2022 16:13:28 - INFO - codeparrot_training - Step 22783: {'lr': 0.00047606421227178354, 'samples': 11665408, 'steps': 22783, 'loss/train': 1.8408528566360474} +03/04/2022 16:13:29 - INFO - codeparrot_training - Skipping example with length 352 (seq_length=1024) +03/04/2022 16:13:34 - INFO - codeparrot_training - Step 22784: {'lr': 0.0004760619462997343, 'samples': 11665920, 'steps': 22784, 'loss/train': 1.5811431407928467} +03/04/2022 16:13:37 - INFO - codeparrot_training - Step 22785: {'lr': 0.00047605968022582513, 'samples': 11666432, 'steps': 22785, 'loss/train': 1.2370188236236572} +03/04/2022 16:13:37 - INFO - codeparrot_training - Skipping example with length 835 (seq_length=1024) +03/04/2022 16:13:42 - INFO - codeparrot_training - Step 22786: {'lr': 0.000476057414050057, 'samples': 11666944, 'steps': 22786, 'loss/train': 2.135481119155884} +03/04/2022 16:13:45 - INFO - codeparrot_training - Step 22787: {'lr': 0.00047605514777243076, 'samples': 11667456, 'steps': 22787, 'loss/train': 1.6781362295150757} +03/04/2022 16:13:46 - INFO - codeparrot_training - Skipping example with length 471 (seq_length=1024) +03/04/2022 16:13:50 - INFO - codeparrot_training - Step 22788: {'lr': 0.0004760528813929476, 'samples': 11667968, 'steps': 22788, 'loss/train': 1.9624316692352295} +03/04/2022 16:13:54 - INFO - codeparrot_training - Step 22789: {'lr': 0.0004760506149116085, 'samples': 11668480, 'steps': 22789, 'loss/train': 1.8866328001022339} +03/04/2022 16:13:55 - INFO - codeparrot_training - Skipping example with length 105 (seq_length=1024) +03/04/2022 16:13:59 - INFO - codeparrot_training - Step 22790: {'lr': 0.0004760483483284145, 'samples': 11668992, 'steps': 22790, 'loss/train': 1.7144771814346313} +03/04/2022 16:14:02 - INFO - codeparrot_training - Step 22791: {'lr': 0.0004760460816433666, 'samples': 11669504, 'steps': 22791, 'loss/train': 1.3186818361282349} +03/04/2022 16:14:04 - INFO - codeparrot_training - Skipping example with length 969 (seq_length=1024) +03/04/2022 16:14:08 - INFO - codeparrot_training - Step 22792: {'lr': 0.0004760438148564659, 'samples': 11670016, 'steps': 22792, 'loss/train': 2.6404786109924316} +03/04/2022 16:14:11 - INFO - codeparrot_training - Step 22793: {'lr': 0.00047604154796771327, 'samples': 11670528, 'steps': 22793, 'loss/train': 2.4536826610565186} +03/04/2022 16:14:12 - INFO - codeparrot_training - Skipping example with length 404 (seq_length=1024) +03/04/2022 16:14:16 - INFO - codeparrot_training - Step 22794: {'lr': 0.0004760392809771098, 'samples': 11671040, 'steps': 22794, 'loss/train': 1.5103991031646729} +03/04/2022 16:14:19 - INFO - codeparrot_training - Step 22795: {'lr': 0.00047603701388465646, 'samples': 11671552, 'steps': 22795, 'loss/train': 2.3070404529571533} +03/04/2022 16:14:21 - INFO - codeparrot_training - Skipping example with length 668 (seq_length=1024) +03/04/2022 16:14:24 - INFO - codeparrot_training - Step 22796: {'lr': 0.0004760347466903544, 'samples': 11672064, 'steps': 22796, 'loss/train': 1.8583471775054932} +03/04/2022 16:14:28 - INFO - codeparrot_training - Step 22797: {'lr': 0.0004760324793942046, 'samples': 11672576, 'steps': 22797, 'loss/train': 1.5414626598358154} +03/04/2022 16:14:29 - INFO - codeparrot_training - Skipping example with length 445 (seq_length=1024) +03/04/2022 16:14:33 - INFO - codeparrot_training - Step 22798: {'lr': 0.000476030211996208, 'samples': 11673088, 'steps': 22798, 'loss/train': 2.4006781578063965} +03/04/2022 16:14:36 - INFO - codeparrot_training - Step 22799: {'lr': 0.0004760279444963657, 'samples': 11673600, 'steps': 22799, 'loss/train': 0.9450362324714661} +03/04/2022 16:14:38 - INFO - codeparrot_training - Skipping example with length 985 (seq_length=1024) +03/04/2022 16:14:41 - INFO - codeparrot_training - Step 22800: {'lr': 0.0004760256768946787, 'samples': 11674112, 'steps': 22800, 'loss/train': 1.8798339366912842} +03/04/2022 16:14:45 - INFO - codeparrot_training - Step 22801: {'lr': 0.00047602340919114793, 'samples': 11674624, 'steps': 22801, 'loss/train': 2.0531322956085205} +03/04/2022 16:14:46 - INFO - codeparrot_training - Skipping example with length 466 (seq_length=1024) +03/04/2022 16:14:50 - INFO - codeparrot_training - Step 22802: {'lr': 0.00047602114138577464, 'samples': 11675136, 'steps': 22802, 'loss/train': 1.7071006298065186} +03/04/2022 16:14:53 - INFO - codeparrot_training - Step 22803: {'lr': 0.00047601887347855965, 'samples': 11675648, 'steps': 22803, 'loss/train': 1.580344319343567} +03/04/2022 16:14:54 - INFO - codeparrot_training - Skipping example with length 977 (seq_length=1024) +03/04/2022 16:14:58 - INFO - codeparrot_training - Step 22804: {'lr': 0.00047601660546950396, 'samples': 11676160, 'steps': 22804, 'loss/train': 1.777332067489624} +03/04/2022 16:15:02 - INFO - codeparrot_training - Step 22805: {'lr': 0.00047601433735860885, 'samples': 11676672, 'steps': 22805, 'loss/train': 3.8403055667877197} +03/04/2022 16:15:03 - INFO - codeparrot_training - Skipping example with length 456 (seq_length=1024) +03/04/2022 16:15:07 - INFO - codeparrot_training - Step 22806: {'lr': 0.000476012069145875, 'samples': 11677184, 'steps': 22806, 'loss/train': 1.9083597660064697} +03/04/2022 16:15:10 - INFO - codeparrot_training - Step 22807: {'lr': 0.00047600980083130367, 'samples': 11677696, 'steps': 22807, 'loss/train': 1.925580382347107} +03/04/2022 16:15:12 - INFO - codeparrot_training - Skipping example with length 402 (seq_length=1024) +03/04/2022 16:15:15 - INFO - codeparrot_training - Step 22808: {'lr': 0.0004760075324148959, 'samples': 11678208, 'steps': 22808, 'loss/train': 1.7848302125930786} +03/04/2022 16:15:18 - INFO - codeparrot_training - Step 22809: {'lr': 0.00047600526389665246, 'samples': 11678720, 'steps': 22809, 'loss/train': 0.8680102229118347} +03/04/2022 16:15:20 - INFO - codeparrot_training - Skipping example with length 698 (seq_length=1024) +03/04/2022 16:15:24 - INFO - codeparrot_training - Step 22810: {'lr': 0.00047600299527657464, 'samples': 11679232, 'steps': 22810, 'loss/train': 1.278043508529663} +03/04/2022 16:15:27 - INFO - codeparrot_training - Step 22811: {'lr': 0.0004760007265546633, 'samples': 11679744, 'steps': 22811, 'loss/train': 0.26948729157447815} +03/04/2022 16:15:29 - INFO - codeparrot_training - Skipping example with length 482 (seq_length=1024) +03/04/2022 16:15:32 - INFO - codeparrot_training - Step 22812: {'lr': 0.00047599845773091957, 'samples': 11680256, 'steps': 22812, 'loss/train': 1.9197040796279907} +03/04/2022 16:15:35 - INFO - codeparrot_training - Step 22813: {'lr': 0.0004759961888053444, 'samples': 11680768, 'steps': 22813, 'loss/train': 1.6887156963348389} +03/04/2022 16:15:38 - INFO - codeparrot_training - Skipping example with length 696 (seq_length=1024) +03/04/2022 16:15:41 - INFO - codeparrot_training - Step 22814: {'lr': 0.00047599391977793884, 'samples': 11681280, 'steps': 22814, 'loss/train': 1.5206798315048218} +03/04/2022 16:15:44 - INFO - codeparrot_training - Step 22815: {'lr': 0.00047599165064870385, 'samples': 11681792, 'steps': 22815, 'loss/train': 1.7349610328674316} +03/04/2022 16:15:46 - INFO - codeparrot_training - Skipping example with length 912 (seq_length=1024) +03/04/2022 16:15:49 - INFO - codeparrot_training - Step 22816: {'lr': 0.0004759893814176406, 'samples': 11682304, 'steps': 22816, 'loss/train': 1.872040867805481} +03/04/2022 16:15:52 - INFO - codeparrot_training - Step 22817: {'lr': 0.00047598711208475, 'samples': 11682816, 'steps': 22817, 'loss/train': 1.8899965286254883} +03/04/2022 16:15:54 - INFO - codeparrot_training - Skipping example with length 64 (seq_length=1024) +03/04/2022 16:15:58 - INFO - codeparrot_training - Step 22818: {'lr': 0.00047598484265003307, 'samples': 11683328, 'steps': 22818, 'loss/train': 2.4383301734924316} +03/04/2022 16:16:01 - INFO - codeparrot_training - Step 22819: {'lr': 0.00047598257311349087, 'samples': 11683840, 'steps': 22819, 'loss/train': 1.9012706279754639} +03/04/2022 16:16:03 - INFO - codeparrot_training - Skipping example with length 708 (seq_length=1024) +03/04/2022 16:16:06 - INFO - codeparrot_training - Step 22820: {'lr': 0.0004759803034751244, 'samples': 11684352, 'steps': 22820, 'loss/train': 2.6568329334259033} +03/04/2022 16:16:09 - INFO - codeparrot_training - Step 22821: {'lr': 0.0004759780337349347, 'samples': 11684864, 'steps': 22821, 'loss/train': 2.0336782932281494} +03/04/2022 16:16:11 - INFO - codeparrot_training - Skipping example with length 308 (seq_length=1024) +03/04/2022 16:16:14 - INFO - codeparrot_training - Step 22822: {'lr': 0.0004759757638929227, 'samples': 11685376, 'steps': 22822, 'loss/train': 2.2956860065460205} +03/04/2022 16:16:18 - INFO - codeparrot_training - Step 22823: {'lr': 0.00047597349394908967, 'samples': 11685888, 'steps': 22823, 'loss/train': 1.692417860031128} +03/04/2022 16:16:20 - INFO - codeparrot_training - Skipping example with length 804 (seq_length=1024) +03/04/2022 16:16:23 - INFO - codeparrot_training - Step 22824: {'lr': 0.0004759712239034364, 'samples': 11686400, 'steps': 22824, 'loss/train': 1.8119562864303589} +03/04/2022 16:16:26 - INFO - codeparrot_training - Step 22825: {'lr': 0.0004759689537559639, 'samples': 11686912, 'steps': 22825, 'loss/train': 1.2337623834609985} +03/04/2022 16:16:28 - INFO - codeparrot_training - Skipping example with length 358 (seq_length=1024) +03/04/2022 16:16:31 - INFO - codeparrot_training - Step 22826: {'lr': 0.0004759666835066734, 'samples': 11687424, 'steps': 22826, 'loss/train': 1.7540007829666138} +03/04/2022 16:16:34 - INFO - codeparrot_training - Step 22827: {'lr': 0.00047596441315556575, 'samples': 11687936, 'steps': 22827, 'loss/train': 1.8777952194213867} +03/04/2022 16:16:36 - INFO - codeparrot_training - Skipping example with length 534 (seq_length=1024) +03/04/2022 16:16:40 - INFO - codeparrot_training - Step 22828: {'lr': 0.00047596214270264204, 'samples': 11688448, 'steps': 22828, 'loss/train': 0.8626569509506226} +03/04/2022 16:16:43 - INFO - codeparrot_training - Step 22829: {'lr': 0.00047595987214790324, 'samples': 11688960, 'steps': 22829, 'loss/train': 2.044384002685547} +03/04/2022 16:16:45 - INFO - codeparrot_training - Skipping example with length 310 (seq_length=1024) +03/04/2022 16:16:48 - INFO - codeparrot_training - Step 22830: {'lr': 0.0004759576014913505, 'samples': 11689472, 'steps': 22830, 'loss/train': 2.301889657974243} +03/04/2022 16:16:51 - INFO - codeparrot_training - Step 22831: {'lr': 0.0004759553307329846, 'samples': 11689984, 'steps': 22831, 'loss/train': 2.602731943130493} +03/04/2022 16:16:53 - INFO - codeparrot_training - Skipping example with length 354 (seq_length=1024) +03/04/2022 16:16:56 - INFO - codeparrot_training - Step 22832: {'lr': 0.0004759530598728068, 'samples': 11690496, 'steps': 22832, 'loss/train': 0.7168457508087158} +03/04/2022 16:17:00 - INFO - codeparrot_training - Step 22833: {'lr': 0.000475950788910818, 'samples': 11691008, 'steps': 22833, 'loss/train': 5.3758344650268555} +03/04/2022 16:17:02 - INFO - codeparrot_training - Skipping example with length 505 (seq_length=1024) +03/04/2022 16:17:05 - INFO - codeparrot_training - Step 22834: {'lr': 0.0004759485178470193, 'samples': 11691520, 'steps': 22834, 'loss/train': 2.7386538982391357} +03/04/2022 16:17:08 - INFO - codeparrot_training - Step 22835: {'lr': 0.0004759462466814117, 'samples': 11692032, 'steps': 22835, 'loss/train': 1.9544155597686768} +03/04/2022 16:17:10 - INFO - codeparrot_training - Skipping example with length 969 (seq_length=1024) +03/04/2022 16:17:13 - INFO - codeparrot_training - Step 22836: {'lr': 0.0004759439754139962, 'samples': 11692544, 'steps': 22836, 'loss/train': 1.9805902242660522} +03/04/2022 16:17:16 - INFO - codeparrot_training - Step 22837: {'lr': 0.0004759417040447738, 'samples': 11693056, 'steps': 22837, 'loss/train': 1.2868802547454834} +03/04/2022 16:17:18 - INFO - codeparrot_training - Skipping example with length 735 (seq_length=1024) +03/04/2022 16:17:22 - INFO - codeparrot_training - Step 22838: {'lr': 0.00047593943257374563, 'samples': 11693568, 'steps': 22838, 'loss/train': 2.5074782371520996} +03/04/2022 16:17:25 - INFO - codeparrot_training - Step 22839: {'lr': 0.00047593716100091253, 'samples': 11694080, 'steps': 22839, 'loss/train': 2.6331851482391357} +03/04/2022 16:17:27 - INFO - codeparrot_training - Skipping example with length 917 (seq_length=1024) +03/04/2022 16:17:30 - INFO - codeparrot_training - Step 22840: {'lr': 0.00047593488932627567, 'samples': 11694592, 'steps': 22840, 'loss/train': 1.8131109476089478} +03/04/2022 16:17:33 - INFO - codeparrot_training - Step 22841: {'lr': 0.00047593261754983607, 'samples': 11695104, 'steps': 22841, 'loss/train': 1.6030670404434204} +03/04/2022 16:17:35 - INFO - codeparrot_training - Skipping example with length 966 (seq_length=1024) +03/04/2022 16:17:39 - INFO - codeparrot_training - Step 22842: {'lr': 0.00047593034567159465, 'samples': 11695616, 'steps': 22842, 'loss/train': 1.712217092514038} +03/04/2022 16:17:42 - INFO - codeparrot_training - Step 22843: {'lr': 0.00047592807369155256, 'samples': 11696128, 'steps': 22843, 'loss/train': 1.966463327407837} +03/04/2022 16:17:46 - INFO - codeparrot_training - Step 22844: {'lr': 0.0004759258016097108, 'samples': 11696640, 'steps': 22844, 'loss/train': 1.0743975639343262} +03/04/2022 16:17:47 - INFO - codeparrot_training - Skipping example with length 520 (seq_length=1024) +03/04/2022 16:17:51 - INFO - codeparrot_training - Step 22845: {'lr': 0.0004759235294260703, 'samples': 11697152, 'steps': 22845, 'loss/train': 1.9415068626403809} +03/04/2022 16:17:54 - INFO - codeparrot_training - Step 22846: {'lr': 0.0004759212571406321, 'samples': 11697664, 'steps': 22846, 'loss/train': 2.2889270782470703} +03/04/2022 16:17:56 - INFO - codeparrot_training - Skipping example with length 753 (seq_length=1024) +03/04/2022 16:17:59 - INFO - codeparrot_training - Step 22847: {'lr': 0.00047591898475339735, 'samples': 11698176, 'steps': 22847, 'loss/train': 2.18639874458313} +03/04/2022 16:18:03 - INFO - codeparrot_training - Step 22848: {'lr': 0.00047591671226436695, 'samples': 11698688, 'steps': 22848, 'loss/train': 1.4723293781280518} +03/04/2022 16:18:05 - INFO - codeparrot_training - Skipping example with length 661 (seq_length=1024) +03/04/2022 16:18:08 - INFO - codeparrot_training - Step 22849: {'lr': 0.00047591443967354196, 'samples': 11699200, 'steps': 22849, 'loss/train': 0.8739657402038574} +03/04/2022 16:18:11 - INFO - codeparrot_training - Step 22850: {'lr': 0.00047591216698092344, 'samples': 11699712, 'steps': 22850, 'loss/train': 1.4261035919189453} +03/04/2022 16:18:13 - INFO - codeparrot_training - Skipping example with length 435 (seq_length=1024) +03/04/2022 16:18:16 - INFO - codeparrot_training - Step 22851: {'lr': 0.00047590989418651243, 'samples': 11700224, 'steps': 22851, 'loss/train': 0.09826712310314178} +03/04/2022 16:18:19 - INFO - codeparrot_training - Step 22852: {'lr': 0.00047590762129030986, 'samples': 11700736, 'steps': 22852, 'loss/train': 1.2697683572769165} +03/04/2022 16:18:22 - INFO - codeparrot_training - Skipping example with length 642 (seq_length=1024) +03/04/2022 16:18:25 - INFO - codeparrot_training - Step 22853: {'lr': 0.00047590534829231675, 'samples': 11701248, 'steps': 22853, 'loss/train': 1.8740471601486206} +03/04/2022 16:18:28 - INFO - codeparrot_training - Step 22854: {'lr': 0.00047590307519253423, 'samples': 11701760, 'steps': 22854, 'loss/train': 1.0300347805023193} +03/04/2022 16:18:31 - INFO - codeparrot_training - Step 22855: {'lr': 0.00047590080199096324, 'samples': 11702272, 'steps': 22855, 'loss/train': 1.5674493312835693} +03/04/2022 16:18:31 - INFO - codeparrot_training - Skipping example with length 622 (seq_length=1024) +03/04/2022 16:18:36 - INFO - codeparrot_training - Step 22856: {'lr': 0.00047589852868760486, 'samples': 11702784, 'steps': 22856, 'loss/train': 1.3125629425048828} +03/04/2022 16:18:39 - INFO - codeparrot_training - Skipping example with length 774 (seq_length=1024) +03/04/2022 16:18:42 - INFO - codeparrot_training - Step 22857: {'lr': 0.00047589625528246006, 'samples': 11703296, 'steps': 22857, 'loss/train': 2.0716586112976074} +03/04/2022 16:18:45 - INFO - codeparrot_training - Step 22858: {'lr': 0.0004758939817755299, 'samples': 11703808, 'steps': 22858, 'loss/train': 2.5346128940582275} +03/04/2022 16:18:48 - INFO - codeparrot_training - Step 22859: {'lr': 0.0004758917081668155, 'samples': 11704320, 'steps': 22859, 'loss/train': 2.07692551612854} +03/04/2022 16:18:48 - INFO - codeparrot_training - Skipping example with length 226 (seq_length=1024) +03/04/2022 16:18:53 - INFO - codeparrot_training - Step 22860: {'lr': 0.00047588943445631767, 'samples': 11704832, 'steps': 22860, 'loss/train': 2.0187056064605713} +03/04/2022 16:18:56 - INFO - codeparrot_training - Step 22861: {'lr': 0.0004758871606440376, 'samples': 11705344, 'steps': 22861, 'loss/train': 2.0269904136657715} +03/04/2022 16:18:57 - INFO - codeparrot_training - Skipping example with length 834 (seq_length=1024) +03/04/2022 16:19:02 - INFO - codeparrot_training - Step 22862: {'lr': 0.0004758848867299762, 'samples': 11705856, 'steps': 22862, 'loss/train': 1.879315733909607} +03/04/2022 16:19:05 - INFO - codeparrot_training - Step 22863: {'lr': 0.0004758826127141346, 'samples': 11706368, 'steps': 22863, 'loss/train': 1.1295838356018066} +03/04/2022 16:19:05 - INFO - codeparrot_training - Skipping example with length 735 (seq_length=1024) +03/04/2022 16:19:11 - INFO - codeparrot_training - Step 22864: {'lr': 0.00047588033859651376, 'samples': 11706880, 'steps': 22864, 'loss/train': 1.194993495941162} +03/04/2022 16:19:14 - INFO - codeparrot_training - Step 22865: {'lr': 0.00047587806437711475, 'samples': 11707392, 'steps': 22865, 'loss/train': 1.7857860326766968} +03/04/2022 16:19:15 - INFO - codeparrot_training - Skipping example with length 675 (seq_length=1024) +03/04/2022 16:19:19 - INFO - codeparrot_training - Step 22866: {'lr': 0.0004758757900559385, 'samples': 11707904, 'steps': 22866, 'loss/train': 1.4991466999053955} +03/04/2022 16:19:22 - INFO - codeparrot_training - Step 22867: {'lr': 0.0004758735156329862, 'samples': 11708416, 'steps': 22867, 'loss/train': 1.9092925786972046} +03/04/2022 16:19:24 - INFO - codeparrot_training - Skipping example with length 41 (seq_length=1024) +03/04/2022 16:19:28 - INFO - codeparrot_training - Step 22868: {'lr': 0.00047587124110825874, 'samples': 11708928, 'steps': 22868, 'loss/train': 2.8576369285583496} +03/04/2022 16:19:31 - INFO - codeparrot_training - Step 22869: {'lr': 0.00047586896648175715, 'samples': 11709440, 'steps': 22869, 'loss/train': 2.0213112831115723} +03/04/2022 16:19:32 - INFO - codeparrot_training - Skipping example with length 937 (seq_length=1024) +03/04/2022 16:19:36 - INFO - codeparrot_training - Step 22870: {'lr': 0.00047586669175348254, 'samples': 11709952, 'steps': 22870, 'loss/train': 2.01139760017395} +03/04/2022 16:19:39 - INFO - codeparrot_training - Step 22871: {'lr': 0.0004758644169234359, 'samples': 11710464, 'steps': 22871, 'loss/train': 1.6415945291519165} +03/04/2022 16:19:41 - INFO - codeparrot_training - Skipping example with length 696 (seq_length=1024) +03/04/2022 16:19:44 - INFO - codeparrot_training - Step 22872: {'lr': 0.00047586214199161814, 'samples': 11710976, 'steps': 22872, 'loss/train': 2.0412938594818115} +03/04/2022 16:19:48 - INFO - codeparrot_training - Step 22873: {'lr': 0.00047585986695803046, 'samples': 11711488, 'steps': 22873, 'loss/train': 1.9360533952713013} +03/04/2022 16:19:49 - INFO - codeparrot_training - Skipping example with length 928 (seq_length=1024) +03/04/2022 16:19:53 - INFO - codeparrot_training - Step 22874: {'lr': 0.0004758575918226738, 'samples': 11712000, 'steps': 22874, 'loss/train': 1.5150266885757446} +03/04/2022 16:19:56 - INFO - codeparrot_training - Step 22875: {'lr': 0.0004758553165855492, 'samples': 11712512, 'steps': 22875, 'loss/train': 4.4166259765625} +03/04/2022 16:19:58 - INFO - codeparrot_training - Skipping example with length 929 (seq_length=1024) +03/04/2022 16:20:01 - INFO - codeparrot_training - Step 22876: {'lr': 0.00047585304124665766, 'samples': 11713024, 'steps': 22876, 'loss/train': 2.2174160480499268} +03/04/2022 16:20:05 - INFO - codeparrot_training - Step 22877: {'lr': 0.0004758507658060003, 'samples': 11713536, 'steps': 22877, 'loss/train': 2.52005934715271} +03/04/2022 16:20:06 - INFO - codeparrot_training - Skipping example with length 865 (seq_length=1024) +03/04/2022 16:20:10 - INFO - codeparrot_training - Step 22878: {'lr': 0.00047584849026357796, 'samples': 11714048, 'steps': 22878, 'loss/train': 1.2430938482284546} +03/04/2022 16:20:13 - INFO - codeparrot_training - Step 22879: {'lr': 0.0004758462146193918, 'samples': 11714560, 'steps': 22879, 'loss/train': 2.15004825592041} +03/04/2022 16:20:14 - INFO - codeparrot_training - Skipping example with length 68 (seq_length=1024) +03/04/2022 16:20:18 - INFO - codeparrot_training - Step 22880: {'lr': 0.00047584393887344285, 'samples': 11715072, 'steps': 22880, 'loss/train': 1.5511045455932617} +03/04/2022 16:20:21 - INFO - codeparrot_training - Step 22881: {'lr': 0.00047584166302573204, 'samples': 11715584, 'steps': 22881, 'loss/train': 1.7538604736328125} +03/04/2022 16:20:23 - INFO - codeparrot_training - Skipping example with length 237 (seq_length=1024) +03/04/2022 16:20:27 - INFO - codeparrot_training - Step 22882: {'lr': 0.0004758393870762606, 'samples': 11716096, 'steps': 22882, 'loss/train': 1.5002350807189941} +03/04/2022 16:20:30 - INFO - codeparrot_training - Step 22883: {'lr': 0.00047583711102502934, 'samples': 11716608, 'steps': 22883, 'loss/train': 1.954115867614746} +03/04/2022 16:20:31 - INFO - codeparrot_training - Skipping example with length 584 (seq_length=1024) +03/04/2022 16:20:35 - INFO - codeparrot_training - Step 22884: {'lr': 0.0004758348348720393, 'samples': 11717120, 'steps': 22884, 'loss/train': 0.8601232767105103} +03/04/2022 16:20:38 - INFO - codeparrot_training - Step 22885: {'lr': 0.00047583255861729167, 'samples': 11717632, 'steps': 22885, 'loss/train': 0.36167776584625244} +03/04/2022 16:20:40 - INFO - codeparrot_training - Skipping example with length 99 (seq_length=1024) +03/04/2022 16:20:44 - INFO - codeparrot_training - Step 22886: {'lr': 0.00047583028226078734, 'samples': 11718144, 'steps': 22886, 'loss/train': 2.0548055171966553} +03/04/2022 16:20:47 - INFO - codeparrot_training - Step 22887: {'lr': 0.0004758280058025274, 'samples': 11718656, 'steps': 22887, 'loss/train': 2.02970027923584} +03/04/2022 16:20:48 - INFO - codeparrot_training - Skipping example with length 713 (seq_length=1024) +03/04/2022 16:20:52 - INFO - codeparrot_training - Step 22888: {'lr': 0.00047582572924251276, 'samples': 11719168, 'steps': 22888, 'loss/train': 1.9080907106399536} +03/04/2022 16:20:55 - INFO - codeparrot_training - Step 22889: {'lr': 0.00047582345258074453, 'samples': 11719680, 'steps': 22889, 'loss/train': 1.0518872737884521} +03/04/2022 16:20:56 - INFO - codeparrot_training - Skipping example with length 740 (seq_length=1024) +03/04/2022 16:21:00 - INFO - codeparrot_training - Step 22890: {'lr': 0.0004758211758172238, 'samples': 11720192, 'steps': 22890, 'loss/train': 0.95332270860672} +03/04/2022 16:21:03 - INFO - codeparrot_training - Step 22891: {'lr': 0.00047581889895195154, 'samples': 11720704, 'steps': 22891, 'loss/train': 1.528998613357544} +03/04/2022 16:21:05 - INFO - codeparrot_training - Skipping example with length 660 (seq_length=1024) +03/04/2022 16:21:09 - INFO - codeparrot_training - Step 22892: {'lr': 0.00047581662198492873, 'samples': 11721216, 'steps': 22892, 'loss/train': 1.4116953611373901} +03/04/2022 16:21:12 - INFO - codeparrot_training - Step 22893: {'lr': 0.0004758143449161565, 'samples': 11721728, 'steps': 22893, 'loss/train': 0.6959846615791321} +03/04/2022 16:21:13 - INFO - codeparrot_training - Skipping example with length 75 (seq_length=1024) +03/04/2022 16:21:17 - INFO - codeparrot_training - Step 22894: {'lr': 0.00047581206774563575, 'samples': 11722240, 'steps': 22894, 'loss/train': 2.263852596282959} +03/04/2022 16:21:20 - INFO - codeparrot_training - Step 22895: {'lr': 0.0004758097904733676, 'samples': 11722752, 'steps': 22895, 'loss/train': 1.755010962486267} +03/04/2022 16:21:22 - INFO - codeparrot_training - Skipping example with length 724 (seq_length=1024) +03/04/2022 16:21:26 - INFO - codeparrot_training - Step 22896: {'lr': 0.000475807513099353, 'samples': 11723264, 'steps': 22896, 'loss/train': 2.370687961578369} +03/04/2022 16:21:29 - INFO - codeparrot_training - Step 22897: {'lr': 0.000475805235623593, 'samples': 11723776, 'steps': 22897, 'loss/train': 1.2427406311035156} +03/04/2022 16:21:30 - INFO - codeparrot_training - Skipping example with length 662 (seq_length=1024) +03/04/2022 16:21:34 - INFO - codeparrot_training - Step 22898: {'lr': 0.0004758029580460887, 'samples': 11724288, 'steps': 22898, 'loss/train': 2.382619619369507} +03/04/2022 16:21:37 - INFO - codeparrot_training - Step 22899: {'lr': 0.0004758006803668411, 'samples': 11724800, 'steps': 22899, 'loss/train': 0.23487669229507446} +03/04/2022 16:21:38 - INFO - codeparrot_training - Skipping example with length 821 (seq_length=1024) +03/04/2022 16:21:42 - INFO - codeparrot_training - Step 22900: {'lr': 0.0004757984025858511, 'samples': 11725312, 'steps': 22900, 'loss/train': 1.7162998914718628} +03/04/2022 16:21:46 - INFO - codeparrot_training - Step 22901: {'lr': 0.0004757961247031199, 'samples': 11725824, 'steps': 22901, 'loss/train': 1.060091257095337} +03/04/2022 16:21:46 - INFO - codeparrot_training - Skipping example with length 653 (seq_length=1024) +03/04/2022 16:21:51 - INFO - codeparrot_training - Step 22902: {'lr': 0.00047579384671864845, 'samples': 11726336, 'steps': 22902, 'loss/train': 1.9821926355361938} +03/04/2022 16:21:54 - INFO - codeparrot_training - Step 22903: {'lr': 0.0004757915686324377, 'samples': 11726848, 'steps': 22903, 'loss/train': 0.19560517370700836} +03/04/2022 16:21:55 - INFO - codeparrot_training - Skipping example with length 111 (seq_length=1024) +03/04/2022 16:21:59 - INFO - codeparrot_training - Step 22904: {'lr': 0.00047578929044448883, 'samples': 11727360, 'steps': 22904, 'loss/train': 1.213908314704895} +03/04/2022 16:22:03 - INFO - codeparrot_training - Step 22905: {'lr': 0.0004757870121548028, 'samples': 11727872, 'steps': 22905, 'loss/train': 2.1588170528411865} +03/04/2022 16:22:03 - INFO - codeparrot_training - Skipping example with length 337 (seq_length=1024) +03/04/2022 16:22:08 - INFO - codeparrot_training - Step 22906: {'lr': 0.0004757847337633806, 'samples': 11728384, 'steps': 22906, 'loss/train': 2.1488897800445557} +03/04/2022 16:22:11 - INFO - codeparrot_training - Step 22907: {'lr': 0.0004757824552702232, 'samples': 11728896, 'steps': 22907, 'loss/train': 2.1493983268737793} +03/04/2022 16:22:12 - INFO - codeparrot_training - Skipping example with length 214 (seq_length=1024) +03/04/2022 16:22:16 - INFO - codeparrot_training - Step 22908: {'lr': 0.0004757801766753318, 'samples': 11729408, 'steps': 22908, 'loss/train': 1.3219431638717651} +03/04/2022 16:22:20 - INFO - codeparrot_training - Step 22909: {'lr': 0.00047577789797870743, 'samples': 11729920, 'steps': 22909, 'loss/train': 1.6207704544067383} +03/04/2022 16:22:21 - INFO - codeparrot_training - Skipping example with length 615 (seq_length=1024) +03/04/2022 16:22:25 - INFO - codeparrot_training - Step 22910: {'lr': 0.0004757756191803508, 'samples': 11730432, 'steps': 22910, 'loss/train': 1.7265653610229492} +03/04/2022 16:22:28 - INFO - codeparrot_training - Step 22911: {'lr': 0.0004757733402802633, 'samples': 11730944, 'steps': 22911, 'loss/train': 2.2151176929473877} +03/04/2022 16:22:29 - INFO - codeparrot_training - Skipping example with length 446 (seq_length=1024) +03/04/2022 16:22:33 - INFO - codeparrot_training - Step 22912: {'lr': 0.0004757710612784458, 'samples': 11731456, 'steps': 22912, 'loss/train': 1.8647834062576294} +03/04/2022 16:22:37 - INFO - codeparrot_training - Step 22913: {'lr': 0.0004757687821748994, 'samples': 11731968, 'steps': 22913, 'loss/train': 1.0429507493972778} +03/04/2022 16:22:38 - INFO - codeparrot_training - Skipping example with length 6 (seq_length=1024) +03/04/2022 16:22:42 - INFO - codeparrot_training - Step 22914: {'lr': 0.00047576650296962496, 'samples': 11732480, 'steps': 22914, 'loss/train': 1.3514175415039062} +03/04/2022 16:22:45 - INFO - codeparrot_training - Step 22915: {'lr': 0.0004757642236626237, 'samples': 11732992, 'steps': 22915, 'loss/train': 2.3628201484680176} +03/04/2022 16:22:47 - INFO - codeparrot_training - Skipping example with length 672 (seq_length=1024) +03/04/2022 16:22:50 - INFO - codeparrot_training - Step 22916: {'lr': 0.00047576194425389654, 'samples': 11733504, 'steps': 22916, 'loss/train': 0.7548831105232239} +03/04/2022 16:22:53 - INFO - codeparrot_training - Step 22917: {'lr': 0.00047575966474344445, 'samples': 11734016, 'steps': 22917, 'loss/train': 1.9331450462341309} +03/04/2022 16:22:56 - INFO - codeparrot_training - Skipping example with length 730 (seq_length=1024) +03/04/2022 16:22:59 - INFO - codeparrot_training - Step 22918: {'lr': 0.00047575738513126867, 'samples': 11734528, 'steps': 22918, 'loss/train': 1.871644139289856} +03/04/2022 16:23:02 - INFO - codeparrot_training - Step 22919: {'lr': 0.00047575510541737, 'samples': 11735040, 'steps': 22919, 'loss/train': 1.9274777173995972} +03/04/2022 16:23:04 - INFO - codeparrot_training - Skipping example with length 700 (seq_length=1024) +03/04/2022 16:23:07 - INFO - codeparrot_training - Step 22920: {'lr': 0.0004757528256017496, 'samples': 11735552, 'steps': 22920, 'loss/train': 1.7512754201889038} +03/04/2022 16:23:10 - INFO - codeparrot_training - Step 22921: {'lr': 0.00047575054568440846, 'samples': 11736064, 'steps': 22921, 'loss/train': 0.29790833592414856} +03/04/2022 16:23:12 - INFO - codeparrot_training - Skipping example with length 591 (seq_length=1024) +03/04/2022 16:23:16 - INFO - codeparrot_training - Step 22922: {'lr': 0.00047574826566534764, 'samples': 11736576, 'steps': 22922, 'loss/train': 1.6377239227294922} +03/04/2022 16:23:19 - INFO - codeparrot_training - Step 22923: {'lr': 0.0004757459855445681, 'samples': 11737088, 'steps': 22923, 'loss/train': 1.2359236478805542} +03/04/2022 16:23:21 - INFO - codeparrot_training - Skipping example with length 747 (seq_length=1024) +03/04/2022 16:23:24 - INFO - codeparrot_training - Step 22924: {'lr': 0.0004757437053220709, 'samples': 11737600, 'steps': 22924, 'loss/train': 2.1611945629119873} +03/04/2022 16:23:28 - INFO - codeparrot_training - Step 22925: {'lr': 0.0004757414249978571, 'samples': 11738112, 'steps': 22925, 'loss/train': 0.8202895522117615} +03/04/2022 16:23:30 - INFO - codeparrot_training - Skipping example with length 638 (seq_length=1024) +03/04/2022 16:23:33 - INFO - codeparrot_training - Step 22926: {'lr': 0.0004757391445719277, 'samples': 11738624, 'steps': 22926, 'loss/train': 2.0811893939971924} +03/04/2022 16:23:36 - INFO - codeparrot_training - Step 22927: {'lr': 0.00047573686404428365, 'samples': 11739136, 'steps': 22927, 'loss/train': 2.065178632736206} +03/04/2022 16:23:38 - INFO - codeparrot_training - Skipping example with length 686 (seq_length=1024) +03/04/2022 16:23:41 - INFO - codeparrot_training - Step 22928: {'lr': 0.0004757345834149261, 'samples': 11739648, 'steps': 22928, 'loss/train': 1.4111851453781128} +03/04/2022 16:23:44 - INFO - codeparrot_training - Step 22929: {'lr': 0.00047573230268385604, 'samples': 11740160, 'steps': 22929, 'loss/train': 1.8747248649597168} +03/04/2022 16:23:46 - INFO - codeparrot_training - Skipping example with length 833 (seq_length=1024) +03/04/2022 16:23:50 - INFO - codeparrot_training - Step 22930: {'lr': 0.0004757300218510745, 'samples': 11740672, 'steps': 22930, 'loss/train': 2.2454559803009033} +03/04/2022 16:23:53 - INFO - codeparrot_training - Step 22931: {'lr': 0.00047572774091658243, 'samples': 11741184, 'steps': 22931, 'loss/train': 2.5826971530914307} +03/04/2022 16:23:55 - INFO - codeparrot_training - Skipping example with length 567 (seq_length=1024) +03/04/2022 16:23:58 - INFO - codeparrot_training - Step 22932: {'lr': 0.000475725459880381, 'samples': 11741696, 'steps': 22932, 'loss/train': 1.6706597805023193} +03/04/2022 16:24:01 - INFO - codeparrot_training - Step 22933: {'lr': 0.00047572317874247107, 'samples': 11742208, 'steps': 22933, 'loss/train': 2.3651859760284424} +03/04/2022 16:24:03 - INFO - codeparrot_training - Skipping example with length 13 (seq_length=1024) +03/04/2022 16:24:07 - INFO - codeparrot_training - Step 22934: {'lr': 0.00047572089750285383, 'samples': 11742720, 'steps': 22934, 'loss/train': 1.698809027671814} +03/04/2022 16:24:10 - INFO - codeparrot_training - Step 22935: {'lr': 0.00047571861616153025, 'samples': 11743232, 'steps': 22935, 'loss/train': 2.33353328704834} +03/04/2022 16:24:12 - INFO - codeparrot_training - Skipping example with length 976 (seq_length=1024) +03/04/2022 16:24:15 - INFO - codeparrot_training - Step 22936: {'lr': 0.0004757163347185013, 'samples': 11743744, 'steps': 22936, 'loss/train': 0.9967316389083862} +03/04/2022 16:24:18 - INFO - codeparrot_training - Step 22937: {'lr': 0.00047571405317376803, 'samples': 11744256, 'steps': 22937, 'loss/train': 1.9393481016159058} +03/04/2022 16:24:21 - INFO - codeparrot_training - Skipping example with length 340 (seq_length=1024) +03/04/2022 16:24:24 - INFO - codeparrot_training - Step 22938: {'lr': 0.0004757117715273316, 'samples': 11744768, 'steps': 22938, 'loss/train': 1.32143235206604} +03/04/2022 16:24:27 - INFO - codeparrot_training - Step 22939: {'lr': 0.00047570948977919284, 'samples': 11745280, 'steps': 22939, 'loss/train': 1.8810559511184692} +03/04/2022 16:24:30 - INFO - codeparrot_training - Step 22940: {'lr': 0.00047570720792935284, 'samples': 11745792, 'steps': 22940, 'loss/train': 2.8084115982055664} +03/04/2022 16:24:30 - INFO - codeparrot_training - Skipping example with length 47 (seq_length=1024) +03/04/2022 16:24:35 - INFO - codeparrot_training - Step 22941: {'lr': 0.00047570492597781274, 'samples': 11746304, 'steps': 22941, 'loss/train': 2.0575263500213623} +03/04/2022 16:24:39 - INFO - codeparrot_training - Step 22942: {'lr': 0.0004757026439245735, 'samples': 11746816, 'steps': 22942, 'loss/train': 1.5483179092407227} +03/04/2022 16:24:39 - INFO - codeparrot_training - Skipping example with length 838 (seq_length=1024) +03/04/2022 16:24:44 - INFO - codeparrot_training - Step 22943: {'lr': 0.0004757003617696361, 'samples': 11747328, 'steps': 22943, 'loss/train': 2.5535199642181396} +03/04/2022 16:24:47 - INFO - codeparrot_training - Step 22944: {'lr': 0.0004756980795130015, 'samples': 11747840, 'steps': 22944, 'loss/train': 1.9382071495056152} +03/04/2022 16:24:47 - INFO - codeparrot_training - Skipping example with length 505 (seq_length=1024) +03/04/2022 16:24:52 - INFO - codeparrot_training - Step 22945: {'lr': 0.00047569579715467093, 'samples': 11748352, 'steps': 22945, 'loss/train': 1.9776567220687866} +03/04/2022 16:24:55 - INFO - codeparrot_training - Step 22946: {'lr': 0.00047569351469464526, 'samples': 11748864, 'steps': 22946, 'loss/train': 1.6974549293518066} +03/04/2022 16:24:55 - INFO - codeparrot_training - Skipping example with length 879 (seq_length=1024) +03/04/2022 16:25:01 - INFO - codeparrot_training - Step 22947: {'lr': 0.0004756912321329256, 'samples': 11749376, 'steps': 22947, 'loss/train': 1.902990460395813} +03/04/2022 16:25:04 - INFO - codeparrot_training - Step 22948: {'lr': 0.000475688949469513, 'samples': 11749888, 'steps': 22948, 'loss/train': 1.766126036643982} +03/04/2022 16:25:04 - INFO - codeparrot_training - Skipping example with length 403 (seq_length=1024) +03/04/2022 16:25:09 - INFO - codeparrot_training - Step 22949: {'lr': 0.0004756866667044084, 'samples': 11750400, 'steps': 22949, 'loss/train': 1.1813002824783325} +03/04/2022 16:25:12 - INFO - codeparrot_training - Step 22950: {'lr': 0.0004756843838376128, 'samples': 11750912, 'steps': 22950, 'loss/train': 2.3913285732269287} +03/04/2022 16:25:12 - INFO - codeparrot_training - Skipping example with length 449 (seq_length=1024) +03/04/2022 16:25:18 - INFO - codeparrot_training - Step 22951: {'lr': 0.0004756821008691274, 'samples': 11751424, 'steps': 22951, 'loss/train': 2.086440324783325} +03/04/2022 16:25:21 - INFO - codeparrot_training - Step 22952: {'lr': 0.0004756798177989531, 'samples': 11751936, 'steps': 22952, 'loss/train': 1.5182918310165405} +03/04/2022 16:25:21 - INFO - codeparrot_training - Skipping example with length 276 (seq_length=1024) +03/04/2022 16:25:26 - INFO - codeparrot_training - Step 22953: {'lr': 0.00047567753462709095, 'samples': 11752448, 'steps': 22953, 'loss/train': 2.460782766342163} +03/04/2022 16:25:29 - INFO - codeparrot_training - Step 22954: {'lr': 0.00047567525135354193, 'samples': 11752960, 'steps': 22954, 'loss/train': 1.339983344078064} +03/04/2022 16:25:29 - INFO - codeparrot_training - Skipping example with length 234 (seq_length=1024) +03/04/2022 16:25:35 - INFO - codeparrot_training - Step 22955: {'lr': 0.00047567296797830727, 'samples': 11753472, 'steps': 22955, 'loss/train': 2.493532180786133} +03/04/2022 16:25:38 - INFO - codeparrot_training - Step 22956: {'lr': 0.00047567068450138773, 'samples': 11753984, 'steps': 22956, 'loss/train': 1.7990163564682007} +03/04/2022 16:25:38 - INFO - codeparrot_training - Skipping example with length 851 (seq_length=1024) +03/04/2022 16:25:43 - INFO - codeparrot_training - Step 22957: {'lr': 0.0004756684009227845, 'samples': 11754496, 'steps': 22957, 'loss/train': 2.379865884780884} +03/04/2022 16:25:46 - INFO - codeparrot_training - Step 22958: {'lr': 0.0004756661172424986, 'samples': 11755008, 'steps': 22958, 'loss/train': 1.4592702388763428} +03/04/2022 16:25:46 - INFO - codeparrot_training - Skipping example with length 179 (seq_length=1024) +03/04/2022 16:25:51 - INFO - codeparrot_training - Step 22959: {'lr': 0.000475663833460531, 'samples': 11755520, 'steps': 22959, 'loss/train': 1.8321703672409058} +03/04/2022 16:25:55 - INFO - codeparrot_training - Step 22960: {'lr': 0.00047566154957688275, 'samples': 11756032, 'steps': 22960, 'loss/train': 1.7337323427200317} +03/04/2022 16:25:55 - INFO - codeparrot_training - Skipping example with length 432 (seq_length=1024) +03/04/2022 16:26:00 - INFO - codeparrot_training - Step 22961: {'lr': 0.0004756592655915549, 'samples': 11756544, 'steps': 22961, 'loss/train': 1.3006869554519653} +03/04/2022 16:26:03 - INFO - codeparrot_training - Step 22962: {'lr': 0.00047565698150454845, 'samples': 11757056, 'steps': 22962, 'loss/train': 2.3456032276153564} +03/04/2022 16:26:03 - INFO - codeparrot_training - Skipping example with length 708 (seq_length=1024) +03/04/2022 16:26:09 - INFO - codeparrot_training - Step 22963: {'lr': 0.0004756546973158644, 'samples': 11757568, 'steps': 22963, 'loss/train': 1.941611886024475} +03/04/2022 16:26:12 - INFO - codeparrot_training - Step 22964: {'lr': 0.00047565241302550395, 'samples': 11758080, 'steps': 22964, 'loss/train': 2.023801565170288} +03/04/2022 16:26:12 - INFO - codeparrot_training - Skipping example with length 587 (seq_length=1024) +03/04/2022 16:26:17 - INFO - codeparrot_training - Step 22965: {'lr': 0.0004756501286334679, 'samples': 11758592, 'steps': 22965, 'loss/train': 2.3101296424865723} +03/04/2022 16:26:20 - INFO - codeparrot_training - Skipping example with length 775 (seq_length=1024) +03/04/2022 16:26:22 - INFO - codeparrot_training - Step 22966: {'lr': 0.0004756478441397575, 'samples': 11759104, 'steps': 22966, 'loss/train': 1.7900031805038452} +03/04/2022 16:26:26 - INFO - codeparrot_training - Step 22967: {'lr': 0.0004756455595443735, 'samples': 11759616, 'steps': 22967, 'loss/train': 2.0853164196014404} +03/04/2022 16:26:29 - INFO - codeparrot_training - Step 22968: {'lr': 0.00047564327484731725, 'samples': 11760128, 'steps': 22968, 'loss/train': 1.4200279712677002} +03/04/2022 16:26:29 - INFO - codeparrot_training - Skipping example with length 5 (seq_length=1024) +03/04/2022 16:26:34 - INFO - codeparrot_training - Step 22969: {'lr': 0.0004756409900485895, 'samples': 11760640, 'steps': 22969, 'loss/train': 1.9931896924972534} +03/04/2022 16:26:37 - INFO - codeparrot_training - Step 22970: {'lr': 0.00047563870514819154, 'samples': 11761152, 'steps': 22970, 'loss/train': 1.9345331192016602} +03/04/2022 16:26:37 - INFO - codeparrot_training - Skipping example with length 267 (seq_length=1024) +03/04/2022 16:26:43 - INFO - codeparrot_training - Step 22971: {'lr': 0.0004756364201461241, 'samples': 11761664, 'steps': 22971, 'loss/train': 1.6411733627319336} +03/04/2022 16:26:45 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/04/2022 16:26:48 - INFO - codeparrot_training - Step 22972: {'lr': 0.00047563413504238847, 'samples': 11762176, 'steps': 22972, 'loss/train': 1.4019917249679565} +03/04/2022 16:26:51 - INFO - codeparrot_training - Step 22973: {'lr': 0.0004756318498369855, 'samples': 11762688, 'steps': 22973, 'loss/train': 1.0531474351882935} +03/04/2022 16:26:54 - INFO - codeparrot_training - Skipping example with length 213 (seq_length=1024) +03/04/2022 16:26:56 - INFO - codeparrot_training - Step 22974: {'lr': 0.0004756295645299164, 'samples': 11763200, 'steps': 22974, 'loss/train': 1.4114187955856323} +03/04/2022 16:26:59 - INFO - codeparrot_training - Step 22975: {'lr': 0.00047562727912118206, 'samples': 11763712, 'steps': 22975, 'loss/train': 1.5655065774917603} +03/04/2022 16:27:02 - INFO - codeparrot_training - Skipping example with length 498 (seq_length=1024) +03/04/2022 16:27:05 - INFO - codeparrot_training - Step 22976: {'lr': 0.00047562499361078356, 'samples': 11764224, 'steps': 22976, 'loss/train': 1.260461449623108} +03/04/2022 16:27:08 - INFO - codeparrot_training - Step 22977: {'lr': 0.00047562270799872186, 'samples': 11764736, 'steps': 22977, 'loss/train': 2.3474597930908203} +03/04/2022 16:27:11 - INFO - codeparrot_training - Skipping example with length 61 (seq_length=1024) +03/04/2022 16:27:13 - INFO - codeparrot_training - Step 22978: {'lr': 0.00047562042228499815, 'samples': 11765248, 'steps': 22978, 'loss/train': 1.6275286674499512} +03/04/2022 16:27:16 - INFO - codeparrot_training - Step 22979: {'lr': 0.00047561813646961325, 'samples': 11765760, 'steps': 22979, 'loss/train': 2.516315221786499} +03/04/2022 16:27:19 - INFO - codeparrot_training - Skipping example with length 127 (seq_length=1024) +03/04/2022 16:27:21 - INFO - codeparrot_training - Step 22980: {'lr': 0.0004756158505525684, 'samples': 11766272, 'steps': 22980, 'loss/train': 2.3076066970825195} +03/04/2022 16:27:25 - INFO - codeparrot_training - Step 22981: {'lr': 0.0004756135645338644, 'samples': 11766784, 'steps': 22981, 'loss/train': 1.6677684783935547} +03/04/2022 16:27:27 - INFO - codeparrot_training - Skipping example with length 637 (seq_length=1024) +03/04/2022 16:27:30 - INFO - codeparrot_training - Step 22982: {'lr': 0.00047561127841350256, 'samples': 11767296, 'steps': 22982, 'loss/train': 1.734668493270874} +03/04/2022 16:27:33 - INFO - codeparrot_training - Step 22983: {'lr': 0.0004756089921914837, 'samples': 11767808, 'steps': 22983, 'loss/train': 2.0724599361419678} +03/04/2022 16:27:36 - INFO - codeparrot_training - Step 22984: {'lr': 0.00047560670586780886, 'samples': 11768320, 'steps': 22984, 'loss/train': 2.037463665008545} +03/04/2022 16:27:36 - INFO - codeparrot_training - Skipping example with length 180 (seq_length=1024) +03/04/2022 16:27:41 - INFO - codeparrot_training - Step 22985: {'lr': 0.0004756044194424792, 'samples': 11768832, 'steps': 22985, 'loss/train': 3.4214882850646973} +03/04/2022 16:27:44 - INFO - codeparrot_training - Skipping example with length 1022 (seq_length=1024) +03/04/2022 16:27:47 - INFO - codeparrot_training - Step 22986: {'lr': 0.0004756021329154956, 'samples': 11769344, 'steps': 22986, 'loss/train': 2.0647096633911133} +03/04/2022 16:27:50 - INFO - codeparrot_training - Step 22987: {'lr': 0.0004755998462868592, 'samples': 11769856, 'steps': 22987, 'loss/train': 2.0848288536071777} +03/04/2022 16:27:53 - INFO - codeparrot_training - Skipping example with length 502 (seq_length=1024) +03/04/2022 16:27:55 - INFO - codeparrot_training - Step 22988: {'lr': 0.00047559755955657097, 'samples': 11770368, 'steps': 22988, 'loss/train': 1.0303720235824585} +03/04/2022 16:27:58 - INFO - codeparrot_training - Step 22989: {'lr': 0.000475595272724632, 'samples': 11770880, 'steps': 22989, 'loss/train': 1.9435657262802124} +03/04/2022 16:28:01 - INFO - codeparrot_training - Skipping example with length 445 (seq_length=1024) +03/04/2022 16:28:04 - INFO - codeparrot_training - Step 22990: {'lr': 0.00047559298579104325, 'samples': 11771392, 'steps': 22990, 'loss/train': 2.1524455547332764} +03/04/2022 16:28:07 - INFO - codeparrot_training - Step 22991: {'lr': 0.00047559069875580573, 'samples': 11771904, 'steps': 22991, 'loss/train': 1.128570795059204} +03/04/2022 16:28:09 - INFO - codeparrot_training - Skipping example with length 376 (seq_length=1024) +03/04/2022 16:28:12 - INFO - codeparrot_training - Step 22992: {'lr': 0.00047558841161892063, 'samples': 11772416, 'steps': 22992, 'loss/train': 2.0047812461853027} +03/04/2022 16:28:15 - INFO - codeparrot_training - Step 22993: {'lr': 0.00047558612438038887, 'samples': 11772928, 'steps': 22993, 'loss/train': 1.740499496459961} +03/04/2022 16:28:17 - INFO - codeparrot_training - Skipping example with length 693 (seq_length=1024) +03/04/2022 16:28:20 - INFO - codeparrot_training - Step 22994: {'lr': 0.00047558383704021136, 'samples': 11773440, 'steps': 22994, 'loss/train': 1.771320104598999} +03/04/2022 16:28:23 - INFO - codeparrot_training - Step 22995: {'lr': 0.00047558154959838935, 'samples': 11773952, 'steps': 22995, 'loss/train': 2.0950584411621094} +03/04/2022 16:28:26 - INFO - codeparrot_training - Skipping example with length 345 (seq_length=1024) +03/04/2022 16:28:29 - INFO - codeparrot_training - Step 22996: {'lr': 0.0004755792620549237, 'samples': 11774464, 'steps': 22996, 'loss/train': 1.9343205690383911} +03/04/2022 16:28:32 - INFO - codeparrot_training - Step 22997: {'lr': 0.0004755769744098156, 'samples': 11774976, 'steps': 22997, 'loss/train': 2.8106653690338135} +03/04/2022 16:28:34 - INFO - codeparrot_training - Skipping example with length 400 (seq_length=1024) +03/04/2022 16:28:37 - INFO - codeparrot_training - Step 22998: {'lr': 0.00047557468666306596, 'samples': 11775488, 'steps': 22998, 'loss/train': 1.1588821411132812} +03/04/2022 16:28:40 - INFO - codeparrot_training - Step 22999: {'lr': 0.00047557239881467584, 'samples': 11776000, 'steps': 22999, 'loss/train': 2.5552332401275635} +03/04/2022 16:28:43 - INFO - codeparrot_training - Skipping example with length 493 (seq_length=1024) +03/04/2022 16:28:46 - INFO - codeparrot_training - Step 23000: {'lr': 0.0004755701108646463, 'samples': 11776512, 'steps': 23000, 'loss/train': 1.994852900505066} +03/04/2022 16:28:49 - INFO - codeparrot_training - Step 23001: {'lr': 0.0004755678228129784, 'samples': 11777024, 'steps': 23001, 'loss/train': 1.7953674793243408} +03/04/2022 16:28:51 - INFO - codeparrot_training - Skipping example with length 472 (seq_length=1024) +03/04/2022 16:28:55 - INFO - codeparrot_training - Step 23002: {'lr': 0.000475565534659673, 'samples': 11777536, 'steps': 23002, 'loss/train': 2.0082006454467773} +03/04/2022 16:28:58 - INFO - codeparrot_training - Step 23003: {'lr': 0.00047556324640473134, 'samples': 11778048, 'steps': 23003, 'loss/train': 2.4463489055633545} +03/04/2022 16:29:01 - INFO - codeparrot_training - Step 23004: {'lr': 0.0004755609580481543, 'samples': 11778560, 'steps': 23004, 'loss/train': 0.4766114056110382} +03/04/2022 16:29:01 - INFO - codeparrot_training - Skipping example with length 1013 (seq_length=1024) +03/04/2022 16:29:06 - INFO - codeparrot_training - Step 23005: {'lr': 0.00047555866958994296, 'samples': 11779072, 'steps': 23005, 'loss/train': 2.430408000946045} +03/04/2022 16:29:09 - INFO - codeparrot_training - Step 23006: {'lr': 0.00047555638103009845, 'samples': 11779584, 'steps': 23006, 'loss/train': 2.374987840652466} +03/04/2022 16:29:10 - INFO - codeparrot_training - Skipping example with length 946 (seq_length=1024) +03/04/2022 16:29:15 - INFO - codeparrot_training - Step 23007: {'lr': 0.0004755540923686217, 'samples': 11780096, 'steps': 23007, 'loss/train': 1.8597408533096313} +03/04/2022 16:29:18 - INFO - codeparrot_training - Step 23008: {'lr': 0.0004755518036055137, 'samples': 11780608, 'steps': 23008, 'loss/train': 2.040605306625366} +03/04/2022 16:29:18 - INFO - codeparrot_training - Skipping example with length 953 (seq_length=1024) +03/04/2022 16:29:23 - INFO - codeparrot_training - Step 23009: {'lr': 0.0004755495147407756, 'samples': 11781120, 'steps': 23009, 'loss/train': 2.5775997638702393} +03/04/2022 16:29:26 - INFO - codeparrot_training - Step 23010: {'lr': 0.00047554722577440833, 'samples': 11781632, 'steps': 23010, 'loss/train': 1.8656977415084839} +03/04/2022 16:29:26 - INFO - codeparrot_training - Skipping example with length 499 (seq_length=1024) +03/04/2022 16:29:32 - INFO - codeparrot_training - Step 23011: {'lr': 0.00047554493670641296, 'samples': 11782144, 'steps': 23011, 'loss/train': 1.6410373449325562} +03/04/2022 16:29:35 - INFO - codeparrot_training - Step 23012: {'lr': 0.0004755426475367905, 'samples': 11782656, 'steps': 23012, 'loss/train': 1.3066271543502808} +03/04/2022 16:29:35 - INFO - codeparrot_training - Skipping example with length 548 (seq_length=1024) +03/04/2022 16:29:40 - INFO - codeparrot_training - Step 23013: {'lr': 0.00047554035826554206, 'samples': 11783168, 'steps': 23013, 'loss/train': 1.8616985082626343} +03/04/2022 16:29:43 - INFO - codeparrot_training - Step 23014: {'lr': 0.0004755380688926686, 'samples': 11783680, 'steps': 23014, 'loss/train': 1.6147518157958984} +03/04/2022 16:29:43 - INFO - codeparrot_training - Skipping example with length 746 (seq_length=1024) +03/04/2022 16:29:48 - INFO - codeparrot_training - Step 23015: {'lr': 0.00047553577941817114, 'samples': 11784192, 'steps': 23015, 'loss/train': 1.8571584224700928} +03/04/2022 16:29:51 - INFO - codeparrot_training - Step 23016: {'lr': 0.0004755334898420507, 'samples': 11784704, 'steps': 23016, 'loss/train': 2.8200230598449707} +03/04/2022 16:29:52 - INFO - codeparrot_training - Skipping example with length 158 (seq_length=1024) +03/04/2022 16:29:57 - INFO - codeparrot_training - Step 23017: {'lr': 0.00047553120016430837, 'samples': 11785216, 'steps': 23017, 'loss/train': 1.8625218868255615} +03/04/2022 16:30:00 - INFO - codeparrot_training - Step 23018: {'lr': 0.0004755289103849453, 'samples': 11785728, 'steps': 23018, 'loss/train': 1.34120774269104} +03/04/2022 16:30:00 - INFO - codeparrot_training - Skipping example with length 562 (seq_length=1024) +03/04/2022 16:30:05 - INFO - codeparrot_training - Step 23019: {'lr': 0.0004755266205039622, 'samples': 11786240, 'steps': 23019, 'loss/train': 1.4224921464920044} +03/04/2022 16:30:09 - INFO - codeparrot_training - Step 23020: {'lr': 0.00047552433052136034, 'samples': 11786752, 'steps': 23020, 'loss/train': 1.2657737731933594} +03/04/2022 16:30:09 - INFO - codeparrot_training - Skipping example with length 777 (seq_length=1024) +03/04/2022 16:30:14 - INFO - codeparrot_training - Step 23021: {'lr': 0.00047552204043714076, 'samples': 11787264, 'steps': 23021, 'loss/train': 2.0489630699157715} +03/04/2022 16:30:17 - INFO - codeparrot_training - Step 23022: {'lr': 0.0004755197502513043, 'samples': 11787776, 'steps': 23022, 'loss/train': 3.2108404636383057} +03/04/2022 16:30:17 - INFO - codeparrot_training - Skipping example with length 93 (seq_length=1024) +03/04/2022 16:30:22 - INFO - codeparrot_training - Step 23023: {'lr': 0.00047551745996385233, 'samples': 11788288, 'steps': 23023, 'loss/train': 2.0837550163269043} +03/04/2022 16:30:25 - INFO - codeparrot_training - Step 23024: {'lr': 0.00047551516957478545, 'samples': 11788800, 'steps': 23024, 'loss/train': 1.6003386974334717} +03/04/2022 16:30:26 - INFO - codeparrot_training - Skipping example with length 690 (seq_length=1024) +03/04/2022 16:30:31 - INFO - codeparrot_training - Step 23025: {'lr': 0.0004755128790841051, 'samples': 11789312, 'steps': 23025, 'loss/train': 6.2289018630981445} +03/04/2022 16:30:34 - INFO - codeparrot_training - Step 23026: {'lr': 0.000475510588491812, 'samples': 11789824, 'steps': 23026, 'loss/train': 1.8534855842590332} +03/04/2022 16:30:36 - INFO - codeparrot_training - Skipping example with length 906 (seq_length=1024) +03/04/2022 16:30:39 - INFO - codeparrot_training - Step 23027: {'lr': 0.00047550829779790735, 'samples': 11790336, 'steps': 23027, 'loss/train': 2.139492988586426} +03/04/2022 16:30:43 - INFO - codeparrot_training - Step 23028: {'lr': 0.0004755060070023921, 'samples': 11790848, 'steps': 23028, 'loss/train': 2.332176685333252} +03/04/2022 16:30:45 - INFO - codeparrot_training - Skipping example with length 252 (seq_length=1024) +03/04/2022 16:30:48 - INFO - codeparrot_training - Step 23029: {'lr': 0.0004755037161052674, 'samples': 11791360, 'steps': 23029, 'loss/train': 1.9241465330123901} +03/04/2022 16:30:51 - INFO - codeparrot_training - Step 23030: {'lr': 0.00047550142510653415, 'samples': 11791872, 'steps': 23030, 'loss/train': 1.7896934747695923} +03/04/2022 16:30:53 - INFO - codeparrot_training - Skipping example with length 641 (seq_length=1024) +03/04/2022 16:30:56 - INFO - codeparrot_training - Step 23031: {'lr': 0.0004754991340061935, 'samples': 11792384, 'steps': 23031, 'loss/train': 1.6287156343460083} +03/04/2022 16:30:59 - INFO - codeparrot_training - Step 23032: {'lr': 0.0004754968428042463, 'samples': 11792896, 'steps': 23032, 'loss/train': 1.6246886253356934} +03/04/2022 16:31:02 - INFO - codeparrot_training - Skipping example with length 473 (seq_length=1024) +03/04/2022 16:31:05 - INFO - codeparrot_training - Step 23033: {'lr': 0.0004754945515006938, 'samples': 11793408, 'steps': 23033, 'loss/train': 2.1187543869018555} +03/04/2022 16:31:08 - INFO - codeparrot_training - Step 23034: {'lr': 0.0004754922600955369, 'samples': 11793920, 'steps': 23034, 'loss/train': 1.6580743789672852} +03/04/2022 16:31:10 - INFO - codeparrot_training - Skipping example with length 262 (seq_length=1024) +03/04/2022 16:31:13 - INFO - codeparrot_training - Step 23035: {'lr': 0.0004754899685887767, 'samples': 11794432, 'steps': 23035, 'loss/train': 1.9432101249694824} +03/04/2022 16:31:16 - INFO - codeparrot_training - Step 23036: {'lr': 0.0004754876769804142, 'samples': 11794944, 'steps': 23036, 'loss/train': 2.1283154487609863} +03/04/2022 16:31:19 - INFO - codeparrot_training - Skipping example with length 908 (seq_length=1024) +03/04/2022 16:31:22 - INFO - codeparrot_training - Step 23037: {'lr': 0.00047548538527045035, 'samples': 11795456, 'steps': 23037, 'loss/train': 1.5924506187438965} +03/04/2022 16:31:25 - INFO - codeparrot_training - Step 23038: {'lr': 0.00047548309345888637, 'samples': 11795968, 'steps': 23038, 'loss/train': 1.8154445886611938} +03/04/2022 16:31:29 - INFO - codeparrot_training - Step 23039: {'lr': 0.00047548080154572315, 'samples': 11796480, 'steps': 23039, 'loss/train': 2.1351099014282227} +03/04/2022 16:31:30 - INFO - codeparrot_training - Skipping example with length 307 (seq_length=1024) +03/04/2022 16:31:34 - INFO - codeparrot_training - Step 23040: {'lr': 0.00047547850953096174, 'samples': 11796992, 'steps': 23040, 'loss/train': 1.688396692276001} +03/04/2022 16:31:37 - INFO - codeparrot_training - Step 23041: {'lr': 0.0004754762174146032, 'samples': 11797504, 'steps': 23041, 'loss/train': 2.04152512550354} +03/04/2022 16:31:39 - INFO - codeparrot_training - Skipping example with length 529 (seq_length=1024) +03/04/2022 16:31:42 - INFO - codeparrot_training - Step 23042: {'lr': 0.00047547392519664853, 'samples': 11798016, 'steps': 23042, 'loss/train': 2.2936015129089355} +03/04/2022 16:31:45 - INFO - codeparrot_training - Step 23043: {'lr': 0.0004754716328770988, 'samples': 11798528, 'steps': 23043, 'loss/train': 1.445263147354126} +03/04/2022 16:31:47 - INFO - codeparrot_training - Skipping example with length 78 (seq_length=1024) +03/04/2022 16:31:51 - INFO - codeparrot_training - Step 23044: {'lr': 0.00047546934045595516, 'samples': 11799040, 'steps': 23044, 'loss/train': 2.192905902862549} +03/04/2022 16:31:54 - INFO - codeparrot_training - Step 23045: {'lr': 0.00047546704793321835, 'samples': 11799552, 'steps': 23045, 'loss/train': 1.726205825805664} +03/04/2022 16:31:55 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/04/2022 16:31:59 - INFO - codeparrot_training - Step 23046: {'lr': 0.0004754647553088896, 'samples': 11800064, 'steps': 23046, 'loss/train': 1.5424405336380005} +03/04/2022 16:32:02 - INFO - codeparrot_training - Step 23047: {'lr': 0.00047546246258297, 'samples': 11800576, 'steps': 23047, 'loss/train': 1.509690761566162} +03/04/2022 16:32:04 - INFO - codeparrot_training - Skipping example with length 759 (seq_length=1024) +03/04/2022 16:32:07 - INFO - codeparrot_training - Step 23048: {'lr': 0.00047546016975546037, 'samples': 11801088, 'steps': 23048, 'loss/train': 1.2615535259246826} +03/04/2022 16:32:11 - INFO - codeparrot_training - Step 23049: {'lr': 0.00047545787682636194, 'samples': 11801600, 'steps': 23049, 'loss/train': 2.250267267227173} +03/04/2022 16:32:12 - INFO - codeparrot_training - Skipping example with length 821 (seq_length=1024) +03/04/2022 16:32:16 - INFO - codeparrot_training - Step 23050: {'lr': 0.00047545558379567565, 'samples': 11802112, 'steps': 23050, 'loss/train': 1.4646823406219482} +03/04/2022 16:32:19 - INFO - codeparrot_training - Step 23051: {'lr': 0.00047545329066340256, 'samples': 11802624, 'steps': 23051, 'loss/train': 1.7581777572631836} +03/04/2022 16:32:20 - INFO - codeparrot_training - Skipping example with length 98 (seq_length=1024) +03/04/2022 16:32:24 - INFO - codeparrot_training - Step 23052: {'lr': 0.00047545099742954367, 'samples': 11803136, 'steps': 23052, 'loss/train': 1.7429590225219727} +03/04/2022 16:32:27 - INFO - codeparrot_training - Step 23053: {'lr': 0.0004754487040941001, 'samples': 11803648, 'steps': 23053, 'loss/train': 1.5483179092407227} +03/04/2022 16:32:30 - INFO - codeparrot_training - Skipping example with length 788 (seq_length=1024) +03/04/2022 16:32:33 - INFO - codeparrot_training - Step 23054: {'lr': 0.0004754464106570727, 'samples': 11804160, 'steps': 23054, 'loss/train': 1.3968583345413208} +03/04/2022 16:32:36 - INFO - codeparrot_training - Step 23055: {'lr': 0.00047544411711846277, 'samples': 11804672, 'steps': 23055, 'loss/train': 2.6310253143310547} +03/04/2022 16:32:38 - INFO - codeparrot_training - Skipping example with length 861 (seq_length=1024) +03/04/2022 16:32:41 - INFO - codeparrot_training - Step 23056: {'lr': 0.00047544182347827114, 'samples': 11805184, 'steps': 23056, 'loss/train': 1.6130162477493286} +03/04/2022 16:32:44 - INFO - codeparrot_training - Step 23057: {'lr': 0.0004754395297364989, 'samples': 11805696, 'steps': 23057, 'loss/train': 0.7534849047660828} +03/04/2022 16:32:50 - INFO - codeparrot_training - Step 23058: {'lr': 0.0004754372358931471, 'samples': 11806208, 'steps': 23058, 'loss/train': 2.144620418548584} +03/04/2022 16:32:53 - INFO - codeparrot_training - Step 23059: {'lr': 0.00047543494194821675, 'samples': 11806720, 'steps': 23059, 'loss/train': 1.540574312210083} +03/04/2022 16:32:55 - INFO - codeparrot_training - Skipping example with length 817 (seq_length=1024) +03/04/2022 16:32:59 - INFO - codeparrot_training - Step 23060: {'lr': 0.00047543264790170887, 'samples': 11807232, 'steps': 23060, 'loss/train': 1.6122313737869263} +03/04/2022 16:33:02 - INFO - codeparrot_training - Step 23061: {'lr': 0.00047543035375362453, 'samples': 11807744, 'steps': 23061, 'loss/train': 2.5422613620758057} +03/04/2022 16:33:05 - INFO - codeparrot_training - Step 23062: {'lr': 0.00047542805950396476, 'samples': 11808256, 'steps': 23062, 'loss/train': 2.2535688877105713} +03/04/2022 16:33:06 - INFO - codeparrot_training - Skipping example with length 433 (seq_length=1024) +03/04/2022 16:33:10 - INFO - codeparrot_training - Step 23063: {'lr': 0.00047542576515273064, 'samples': 11808768, 'steps': 23063, 'loss/train': 2.1243669986724854} +03/04/2022 16:33:14 - INFO - codeparrot_training - Step 23064: {'lr': 0.0004754234706999231, 'samples': 11809280, 'steps': 23064, 'loss/train': 1.436521291732788} +03/04/2022 16:33:14 - INFO - codeparrot_training - Skipping example with length 967 (seq_length=1024) +03/04/2022 16:33:19 - INFO - codeparrot_training - Step 23065: {'lr': 0.0004754211761455432, 'samples': 11809792, 'steps': 23065, 'loss/train': 2.0821533203125} +03/04/2022 16:33:22 - INFO - codeparrot_training - Step 23066: {'lr': 0.000475418881489592, 'samples': 11810304, 'steps': 23066, 'loss/train': 0.8612276315689087} +03/04/2022 16:33:23 - INFO - codeparrot_training - Skipping example with length 1021 (seq_length=1024) +03/04/2022 16:33:27 - INFO - codeparrot_training - Step 23067: {'lr': 0.0004754165867320706, 'samples': 11810816, 'steps': 23067, 'loss/train': 2.5535988807678223} +03/04/2022 16:33:31 - INFO - codeparrot_training - Step 23068: {'lr': 0.00047541429187297984, 'samples': 11811328, 'steps': 23068, 'loss/train': 1.878833532333374} +03/04/2022 16:33:32 - INFO - codeparrot_training - Skipping example with length 804 (seq_length=1024) +03/04/2022 16:33:36 - INFO - codeparrot_training - Step 23069: {'lr': 0.00047541199691232094, 'samples': 11811840, 'steps': 23069, 'loss/train': 1.6413850784301758} +03/04/2022 16:33:39 - INFO - codeparrot_training - Step 23070: {'lr': 0.0004754097018500949, 'samples': 11812352, 'steps': 23070, 'loss/train': 1.790895700454712} +03/04/2022 16:33:40 - INFO - codeparrot_training - Skipping example with length 463 (seq_length=1024) +03/04/2022 16:33:44 - INFO - codeparrot_training - Step 23071: {'lr': 0.0004754074066863027, 'samples': 11812864, 'steps': 23071, 'loss/train': 2.0991432666778564} +03/04/2022 16:33:48 - INFO - codeparrot_training - Step 23072: {'lr': 0.0004754051114209454, 'samples': 11813376, 'steps': 23072, 'loss/train': 1.0667691230773926} +03/04/2022 16:33:49 - INFO - codeparrot_training - Skipping example with length 325 (seq_length=1024) +03/04/2022 16:33:53 - INFO - codeparrot_training - Step 23073: {'lr': 0.0004754028160540241, 'samples': 11813888, 'steps': 23073, 'loss/train': 1.6967476606369019} +03/04/2022 16:33:56 - INFO - codeparrot_training - Step 23074: {'lr': 0.0004754005205855397, 'samples': 11814400, 'steps': 23074, 'loss/train': 1.9562907218933105} +03/04/2022 16:33:57 - INFO - codeparrot_training - Skipping example with length 570 (seq_length=1024) +03/04/2022 16:34:01 - INFO - codeparrot_training - Step 23075: {'lr': 0.0004753982250154933, 'samples': 11814912, 'steps': 23075, 'loss/train': 1.7984105348587036} +03/04/2022 16:34:04 - INFO - codeparrot_training - Step 23076: {'lr': 0.00047539592934388596, 'samples': 11815424, 'steps': 23076, 'loss/train': 1.6219463348388672} +03/04/2022 16:34:06 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/04/2022 16:34:10 - INFO - codeparrot_training - Step 23077: {'lr': 0.0004753936335707187, 'samples': 11815936, 'steps': 23077, 'loss/train': 2.232764720916748} +03/04/2022 16:34:13 - INFO - codeparrot_training - Step 23078: {'lr': 0.0004753913376959925, 'samples': 11816448, 'steps': 23078, 'loss/train': 2.0959174633026123} +03/04/2022 16:34:14 - INFO - codeparrot_training - Skipping example with length 159 (seq_length=1024) +03/04/2022 16:34:18 - INFO - codeparrot_training - Step 23079: {'lr': 0.00047538904171970847, 'samples': 11816960, 'steps': 23079, 'loss/train': 2.0631630420684814} +03/04/2022 16:34:22 - INFO - codeparrot_training - Step 23080: {'lr': 0.0004753867456418677, 'samples': 11817472, 'steps': 23080, 'loss/train': 0.5025739669799805} +03/04/2022 16:34:23 - INFO - codeparrot_training - Skipping example with length 128 (seq_length=1024) +03/04/2022 16:34:27 - INFO - codeparrot_training - Step 23081: {'lr': 0.000475384449462471, 'samples': 11817984, 'steps': 23081, 'loss/train': 2.032984733581543} +03/04/2022 16:34:30 - INFO - codeparrot_training - Step 23082: {'lr': 0.00047538215318151955, 'samples': 11818496, 'steps': 23082, 'loss/train': 1.2163959741592407} +03/04/2022 16:34:32 - INFO - codeparrot_training - Skipping example with length 452 (seq_length=1024) +03/04/2022 16:34:35 - INFO - codeparrot_training - Step 23083: {'lr': 0.0004753798567990145, 'samples': 11819008, 'steps': 23083, 'loss/train': 2.0322654247283936} +03/04/2022 16:34:39 - INFO - codeparrot_training - Step 23084: {'lr': 0.00047537756031495673, 'samples': 11819520, 'steps': 23084, 'loss/train': 3.804889440536499} +03/04/2022 16:34:41 - INFO - codeparrot_training - Skipping example with length 301 (seq_length=1024) +03/04/2022 16:34:44 - INFO - codeparrot_training - Step 23085: {'lr': 0.0004753752637293473, 'samples': 11820032, 'steps': 23085, 'loss/train': 1.8004741668701172} +03/04/2022 16:34:47 - INFO - codeparrot_training - Step 23086: {'lr': 0.0004753729670421871, 'samples': 11820544, 'steps': 23086, 'loss/train': 1.7894450426101685} +03/04/2022 16:34:49 - INFO - codeparrot_training - Skipping example with length 966 (seq_length=1024) +03/04/2022 16:34:52 - INFO - codeparrot_training - Step 23087: {'lr': 0.0004753706702534775, 'samples': 11821056, 'steps': 23087, 'loss/train': 3.3105361461639404} +03/04/2022 16:34:55 - INFO - codeparrot_training - Step 23088: {'lr': 0.0004753683733632193, 'samples': 11821568, 'steps': 23088, 'loss/train': 2.203911542892456} +03/04/2022 16:34:58 - INFO - codeparrot_training - Skipping example with length 33 (seq_length=1024) +03/04/2022 16:35:01 - INFO - codeparrot_training - Step 23089: {'lr': 0.0004753660763714136, 'samples': 11822080, 'steps': 23089, 'loss/train': 0.8561584949493408} +03/04/2022 16:35:04 - INFO - codeparrot_training - Step 23090: {'lr': 0.00047536377927806143, 'samples': 11822592, 'steps': 23090, 'loss/train': 2.3126933574676514} +03/04/2022 16:35:07 - INFO - codeparrot_training - Step 23091: {'lr': 0.0004753614820831638, 'samples': 11823104, 'steps': 23091, 'loss/train': 2.8475263118743896} +03/04/2022 16:35:08 - INFO - codeparrot_training - Skipping example with length 633 (seq_length=1024) +03/04/2022 16:35:13 - INFO - codeparrot_training - Step 23092: {'lr': 0.0004753591847867218, 'samples': 11823616, 'steps': 23092, 'loss/train': 1.6417335271835327} +03/04/2022 16:35:16 - INFO - codeparrot_training - Step 23093: {'lr': 0.0004753568873887364, 'samples': 11824128, 'steps': 23093, 'loss/train': 2.3573687076568604} +03/04/2022 16:35:16 - INFO - codeparrot_training - Skipping example with length 686 (seq_length=1024) +03/04/2022 16:35:22 - INFO - codeparrot_training - Step 23094: {'lr': 0.00047535458988920865, 'samples': 11824640, 'steps': 23094, 'loss/train': 2.2507686614990234} +03/04/2022 16:35:25 - INFO - codeparrot_training - Step 23095: {'lr': 0.0004753522922881396, 'samples': 11825152, 'steps': 23095, 'loss/train': 1.3484667539596558} +03/04/2022 16:35:28 - INFO - codeparrot_training - Step 23096: {'lr': 0.00047534999458553027, 'samples': 11825664, 'steps': 23096, 'loss/train': 1.5285576581954956} +03/04/2022 16:35:28 - INFO - codeparrot_training - Skipping example with length 791 (seq_length=1024) +03/04/2022 16:35:33 - INFO - codeparrot_training - Step 23097: {'lr': 0.00047534769678138177, 'samples': 11826176, 'steps': 23097, 'loss/train': 1.8699986934661865} +03/04/2022 16:35:36 - INFO - codeparrot_training - Step 23098: {'lr': 0.00047534539887569507, 'samples': 11826688, 'steps': 23098, 'loss/train': 2.5110912322998047} +03/04/2022 16:35:37 - INFO - codeparrot_training - Skipping example with length 768 (seq_length=1024) +03/04/2022 16:35:42 - INFO - codeparrot_training - Step 23099: {'lr': 0.00047534310086847116, 'samples': 11827200, 'steps': 23099, 'loss/train': 1.549009084701538} +03/04/2022 16:35:45 - INFO - codeparrot_training - Step 23100: {'lr': 0.0004753408027597111, 'samples': 11827712, 'steps': 23100, 'loss/train': 2.0238702297210693} +03/04/2022 16:35:45 - INFO - codeparrot_training - Skipping example with length 431 (seq_length=1024) +03/04/2022 16:35:50 - INFO - codeparrot_training - Step 23101: {'lr': 0.0004753385045494161, 'samples': 11828224, 'steps': 23101, 'loss/train': 1.4151208400726318} +03/04/2022 16:35:53 - INFO - codeparrot_training - Step 23102: {'lr': 0.0004753362062375869, 'samples': 11828736, 'steps': 23102, 'loss/train': 2.0502240657806396} +03/04/2022 16:35:54 - INFO - codeparrot_training - Skipping example with length 498 (seq_length=1024) +03/04/2022 16:35:59 - INFO - codeparrot_training - Step 23103: {'lr': 0.0004753339078242247, 'samples': 11829248, 'steps': 23103, 'loss/train': 1.7592886686325073} +03/04/2022 16:36:02 - INFO - codeparrot_training - Step 23104: {'lr': 0.00047533160930933054, 'samples': 11829760, 'steps': 23104, 'loss/train': 2.433387279510498} +03/04/2022 16:36:02 - INFO - codeparrot_training - Skipping example with length 635 (seq_length=1024) +03/04/2022 16:36:07 - INFO - codeparrot_training - Step 23105: {'lr': 0.00047532931069290546, 'samples': 11830272, 'steps': 23105, 'loss/train': 2.179884433746338} +03/04/2022 16:36:10 - INFO - codeparrot_training - Step 23106: {'lr': 0.00047532701197495043, 'samples': 11830784, 'steps': 23106, 'loss/train': 4.407009601593018} +03/04/2022 16:36:11 - INFO - codeparrot_training - Skipping example with length 927 (seq_length=1024) +03/04/2022 16:36:16 - INFO - codeparrot_training - Step 23107: {'lr': 0.00047532471315546654, 'samples': 11831296, 'steps': 23107, 'loss/train': 1.6212294101715088} +03/04/2022 16:36:19 - INFO - codeparrot_training - Step 23108: {'lr': 0.00047532241423445487, 'samples': 11831808, 'steps': 23108, 'loss/train': 1.8825764656066895} +03/04/2022 16:36:19 - INFO - codeparrot_training - Skipping example with length 969 (seq_length=1024) +03/04/2022 16:36:24 - INFO - codeparrot_training - Step 23109: {'lr': 0.00047532011521191634, 'samples': 11832320, 'steps': 23109, 'loss/train': 2.4214417934417725} +03/04/2022 16:36:27 - INFO - codeparrot_training - Step 23110: {'lr': 0.00047531781608785203, 'samples': 11832832, 'steps': 23110, 'loss/train': 1.827053189277649} +03/04/2022 16:36:27 - INFO - codeparrot_training - Skipping example with length 494 (seq_length=1024) +03/04/2022 16:36:33 - INFO - codeparrot_training - Step 23111: {'lr': 0.00047531551686226303, 'samples': 11833344, 'steps': 23111, 'loss/train': 0.8318002820014954} +03/04/2022 16:36:36 - INFO - codeparrot_training - Step 23112: {'lr': 0.00047531321753515026, 'samples': 11833856, 'steps': 23112, 'loss/train': 1.63663911819458} +03/04/2022 16:36:36 - INFO - codeparrot_training - Skipping example with length 873 (seq_length=1024) +03/04/2022 16:36:41 - INFO - codeparrot_training - Step 23113: {'lr': 0.0004753109181065149, 'samples': 11834368, 'steps': 23113, 'loss/train': 2.08450984954834} +03/04/2022 16:36:44 - INFO - codeparrot_training - Step 23114: {'lr': 0.00047530861857635786, 'samples': 11834880, 'steps': 23114, 'loss/train': 2.0302882194519043} +03/04/2022 16:36:45 - INFO - codeparrot_training - Skipping example with length 221 (seq_length=1024) +03/04/2022 16:36:50 - INFO - codeparrot_training - Step 23115: {'lr': 0.00047530631894468034, 'samples': 11835392, 'steps': 23115, 'loss/train': 1.8528622388839722} +03/04/2022 16:36:53 - INFO - codeparrot_training - Step 23116: {'lr': 0.0004753040192114831, 'samples': 11835904, 'steps': 23116, 'loss/train': 1.9577503204345703} +03/04/2022 16:36:53 - INFO - codeparrot_training - Skipping example with length 290 (seq_length=1024) +03/04/2022 16:36:58 - INFO - codeparrot_training - Step 23117: {'lr': 0.00047530171937676754, 'samples': 11836416, 'steps': 23117, 'loss/train': 1.5306718349456787} +03/04/2022 16:37:01 - INFO - codeparrot_training - Step 23118: {'lr': 0.0004752994194405344, 'samples': 11836928, 'steps': 23118, 'loss/train': 2.3487672805786133} +03/04/2022 16:37:02 - INFO - codeparrot_training - Skipping example with length 43 (seq_length=1024) +03/04/2022 16:37:07 - INFO - codeparrot_training - Step 23119: {'lr': 0.0004752971194027848, 'samples': 11837440, 'steps': 23119, 'loss/train': 1.8173080682754517} +03/04/2022 16:37:10 - INFO - codeparrot_training - Step 23120: {'lr': 0.0004752948192635198, 'samples': 11837952, 'steps': 23120, 'loss/train': 1.9992766380310059} +03/04/2022 16:37:10 - INFO - codeparrot_training - Skipping example with length 1022 (seq_length=1024) +03/04/2022 16:37:15 - INFO - codeparrot_training - Step 23121: {'lr': 0.0004752925190227405, 'samples': 11838464, 'steps': 23121, 'loss/train': 0.9863783121109009} +03/04/2022 16:37:18 - INFO - codeparrot_training - Step 23122: {'lr': 0.0004752902186804478, 'samples': 11838976, 'steps': 23122, 'loss/train': 2.241175413131714} +03/04/2022 16:37:19 - INFO - codeparrot_training - Skipping example with length 850 (seq_length=1024) +03/04/2022 16:37:23 - INFO - codeparrot_training - Step 23123: {'lr': 0.0004752879182366429, 'samples': 11839488, 'steps': 23123, 'loss/train': 2.085106134414673} +03/04/2022 16:37:26 - INFO - codeparrot_training - Step 23124: {'lr': 0.0004752856176913266, 'samples': 11840000, 'steps': 23124, 'loss/train': 1.8239665031433105} +03/04/2022 16:37:27 - INFO - codeparrot_training - Skipping example with length 29 (seq_length=1024) +03/04/2022 16:37:32 - INFO - codeparrot_training - Step 23125: {'lr': 0.0004752833170445001, 'samples': 11840512, 'steps': 23125, 'loss/train': 1.6510252952575684} +03/04/2022 16:37:35 - INFO - codeparrot_training - Step 23126: {'lr': 0.0004752810162961645, 'samples': 11841024, 'steps': 23126, 'loss/train': 2.223454713821411} +03/04/2022 16:37:36 - INFO - codeparrot_training - Skipping example with length 160 (seq_length=1024) +03/04/2022 16:37:40 - INFO - codeparrot_training - Step 23127: {'lr': 0.0004752787154463207, 'samples': 11841536, 'steps': 23127, 'loss/train': 1.3779000043869019} +03/04/2022 16:37:43 - INFO - codeparrot_training - Step 23128: {'lr': 0.0004752764144949698, 'samples': 11842048, 'steps': 23128, 'loss/train': 2.237727403640747} +03/04/2022 16:37:45 - INFO - codeparrot_training - Skipping example with length 662 (seq_length=1024) +03/04/2022 16:37:49 - INFO - codeparrot_training - Step 23129: {'lr': 0.0004752741134421128, 'samples': 11842560, 'steps': 23129, 'loss/train': 1.6545382738113403} +03/04/2022 16:37:52 - INFO - codeparrot_training - Step 23130: {'lr': 0.00047527181228775077, 'samples': 11843072, 'steps': 23130, 'loss/train': 1.7035603523254395} +03/04/2022 16:37:53 - INFO - codeparrot_training - Skipping example with length 390 (seq_length=1024) +03/04/2022 16:37:57 - INFO - codeparrot_training - Step 23131: {'lr': 0.0004752695110318848, 'samples': 11843584, 'steps': 23131, 'loss/train': 1.5229278802871704} +03/04/2022 16:38:00 - INFO - codeparrot_training - Step 23132: {'lr': 0.00047526720967451573, 'samples': 11844096, 'steps': 23132, 'loss/train': 1.8826240301132202} +03/04/2022 16:38:02 - INFO - codeparrot_training - Skipping example with length 454 (seq_length=1024) +03/04/2022 16:38:06 - INFO - codeparrot_training - Step 23133: {'lr': 0.0004752649082156448, 'samples': 11844608, 'steps': 23133, 'loss/train': 1.5709871053695679} +03/04/2022 16:38:09 - INFO - codeparrot_training - Step 23134: {'lr': 0.00047526260665527306, 'samples': 11845120, 'steps': 23134, 'loss/train': 4.263085842132568} +03/04/2022 16:38:10 - INFO - codeparrot_training - Skipping example with length 423 (seq_length=1024) +03/04/2022 16:38:14 - INFO - codeparrot_training - Step 23135: {'lr': 0.0004752603049934014, 'samples': 11845632, 'steps': 23135, 'loss/train': 0.931387186050415} +03/04/2022 16:38:17 - INFO - codeparrot_training - Step 23136: {'lr': 0.0004752580032300309, 'samples': 11846144, 'steps': 23136, 'loss/train': 0.9280657172203064} +03/04/2022 16:38:19 - INFO - codeparrot_training - Skipping example with length 122 (seq_length=1024) +03/04/2022 16:38:22 - INFO - codeparrot_training - Step 23137: {'lr': 0.0004752557013651626, 'samples': 11846656, 'steps': 23137, 'loss/train': 1.8201422691345215} +03/04/2022 16:38:26 - INFO - codeparrot_training - Step 23138: {'lr': 0.00047525339939879764, 'samples': 11847168, 'steps': 23138, 'loss/train': 1.7588602304458618} +03/04/2022 16:38:27 - INFO - codeparrot_training - Skipping example with length 291 (seq_length=1024) +03/04/2022 16:38:31 - INFO - codeparrot_training - Step 23139: {'lr': 0.0004752510973309369, 'samples': 11847680, 'steps': 23139, 'loss/train': 1.0945727825164795} +03/04/2022 16:38:34 - INFO - codeparrot_training - Step 23140: {'lr': 0.00047524879516158155, 'samples': 11848192, 'steps': 23140, 'loss/train': 2.300016403198242} +03/04/2022 16:38:36 - INFO - codeparrot_training - Skipping example with length 878 (seq_length=1024) +03/04/2022 16:38:39 - INFO - codeparrot_training - Step 23141: {'lr': 0.00047524649289073254, 'samples': 11848704, 'steps': 23141, 'loss/train': 1.8613742589950562} +03/04/2022 16:38:43 - INFO - codeparrot_training - Step 23142: {'lr': 0.00047524419051839093, 'samples': 11849216, 'steps': 23142, 'loss/train': 1.7196059226989746} +03/04/2022 16:38:45 - INFO - codeparrot_training - Skipping example with length 895 (seq_length=1024) +03/04/2022 16:38:48 - INFO - codeparrot_training - Step 23143: {'lr': 0.00047524188804455776, 'samples': 11849728, 'steps': 23143, 'loss/train': 2.2095608711242676} +03/04/2022 16:38:51 - INFO - codeparrot_training - Step 23144: {'lr': 0.0004752395854692341, 'samples': 11850240, 'steps': 23144, 'loss/train': 2.335162878036499} +03/04/2022 16:38:53 - INFO - codeparrot_training - Skipping example with length 535 (seq_length=1024) +03/04/2022 16:38:56 - INFO - codeparrot_training - Step 23145: {'lr': 0.0004752372827924209, 'samples': 11850752, 'steps': 23145, 'loss/train': 1.929175853729248} +03/04/2022 16:38:59 - INFO - codeparrot_training - Step 23146: {'lr': 0.0004752349800141193, 'samples': 11851264, 'steps': 23146, 'loss/train': 2.6965572834014893} +03/04/2022 16:39:02 - INFO - codeparrot_training - Skipping example with length 765 (seq_length=1024) +03/04/2022 16:39:05 - INFO - codeparrot_training - Step 23147: {'lr': 0.0004752326771343303, 'samples': 11851776, 'steps': 23147, 'loss/train': 1.53349769115448} +03/04/2022 16:39:08 - INFO - codeparrot_training - Step 23148: {'lr': 0.00047523037415305494, 'samples': 11852288, 'steps': 23148, 'loss/train': 0.49059218168258667} +03/04/2022 16:39:10 - INFO - codeparrot_training - Skipping example with length 615 (seq_length=1024) +03/04/2022 16:39:13 - INFO - codeparrot_training - Step 23149: {'lr': 0.0004752280710702942, 'samples': 11852800, 'steps': 23149, 'loss/train': 2.1101698875427246} +03/04/2022 16:39:16 - INFO - codeparrot_training - Step 23150: {'lr': 0.0004752257678860492, 'samples': 11853312, 'steps': 23150, 'loss/train': 2.3917670249938965} +03/04/2022 16:39:20 - INFO - codeparrot_training - Step 23151: {'lr': 0.00047522346460032093, 'samples': 11853824, 'steps': 23151, 'loss/train': 1.9076279401779175} +03/04/2022 16:39:20 - INFO - codeparrot_training - Skipping example with length 680 (seq_length=1024) +03/04/2022 16:39:25 - INFO - codeparrot_training - Step 23152: {'lr': 0.0004752211612131104, 'samples': 11854336, 'steps': 23152, 'loss/train': 1.5884345769882202} +03/04/2022 16:39:28 - INFO - codeparrot_training - Step 23153: {'lr': 0.00047521885772441874, 'samples': 11854848, 'steps': 23153, 'loss/train': 1.9376013278961182} +03/04/2022 16:39:28 - INFO - codeparrot_training - Skipping example with length 32 (seq_length=1024) +03/04/2022 16:39:34 - INFO - codeparrot_training - Step 23154: {'lr': 0.00047521655413424705, 'samples': 11855360, 'steps': 23154, 'loss/train': 1.9507161378860474} +03/04/2022 16:39:37 - INFO - codeparrot_training - Step 23155: {'lr': 0.0004752142504425961, 'samples': 11855872, 'steps': 23155, 'loss/train': 1.5065698623657227} +03/04/2022 16:39:37 - INFO - codeparrot_training - Skipping example with length 472 (seq_length=1024) +03/04/2022 16:39:42 - INFO - codeparrot_training - Step 23156: {'lr': 0.0004752119466494671, 'samples': 11856384, 'steps': 23156, 'loss/train': 2.0272810459136963} +03/04/2022 16:39:45 - INFO - codeparrot_training - Step 23157: {'lr': 0.0004752096427548611, 'samples': 11856896, 'steps': 23157, 'loss/train': 1.5631744861602783} +03/04/2022 16:39:46 - INFO - codeparrot_training - Skipping example with length 281 (seq_length=1024) +03/04/2022 16:39:51 - INFO - codeparrot_training - Step 23158: {'lr': 0.00047520733875877906, 'samples': 11857408, 'steps': 23158, 'loss/train': 0.9624627232551575} +03/04/2022 16:39:54 - INFO - codeparrot_training - Step 23159: {'lr': 0.00047520503466122216, 'samples': 11857920, 'steps': 23159, 'loss/train': 2.444775342941284} +03/04/2022 16:39:55 - INFO - codeparrot_training - Skipping example with length 618 (seq_length=1024) +03/04/2022 16:39:59 - INFO - codeparrot_training - Step 23160: {'lr': 0.0004752027304621913, 'samples': 11858432, 'steps': 23160, 'loss/train': 1.7572758197784424} +03/04/2022 16:40:02 - INFO - codeparrot_training - Step 23161: {'lr': 0.0004752004261616876, 'samples': 11858944, 'steps': 23161, 'loss/train': 1.698852777481079} +03/04/2022 16:40:03 - INFO - codeparrot_training - Skipping example with length 909 (seq_length=1024) +03/04/2022 16:40:08 - INFO - codeparrot_training - Step 23162: {'lr': 0.000475198121759712, 'samples': 11859456, 'steps': 23162, 'loss/train': 1.5456608533859253} +03/04/2022 16:40:11 - INFO - codeparrot_training - Step 23163: {'lr': 0.0004751958172562656, 'samples': 11859968, 'steps': 23163, 'loss/train': 1.4350578784942627} +03/04/2022 16:40:11 - INFO - codeparrot_training - Skipping example with length 430 (seq_length=1024) +03/04/2022 16:40:16 - INFO - codeparrot_training - Step 23164: {'lr': 0.00047519351265134954, 'samples': 11860480, 'steps': 23164, 'loss/train': 1.976426601409912} +03/04/2022 16:40:19 - INFO - codeparrot_training - Step 23165: {'lr': 0.00047519120794496466, 'samples': 11860992, 'steps': 23165, 'loss/train': 0.5072129368782043} +03/04/2022 16:40:20 - INFO - codeparrot_training - Skipping example with length 412 (seq_length=1024) +03/04/2022 16:40:25 - INFO - codeparrot_training - Step 23166: {'lr': 0.00047518890313711217, 'samples': 11861504, 'steps': 23166, 'loss/train': 2.3028182983398438} +03/04/2022 16:40:28 - INFO - codeparrot_training - Step 23167: {'lr': 0.000475186598227793, 'samples': 11862016, 'steps': 23167, 'loss/train': 3.029904365539551} +03/04/2022 16:40:30 - INFO - codeparrot_training - Skipping example with length 83 (seq_length=1024) +03/04/2022 16:40:33 - INFO - codeparrot_training - Step 23168: {'lr': 0.0004751842932170082, 'samples': 11862528, 'steps': 23168, 'loss/train': 2.0368826389312744} +03/04/2022 16:40:36 - INFO - codeparrot_training - Step 23169: {'lr': 0.00047518198810475885, 'samples': 11863040, 'steps': 23169, 'loss/train': 2.1336557865142822} +03/04/2022 16:40:38 - INFO - codeparrot_training - Skipping example with length 129 (seq_length=1024) +03/04/2022 16:40:42 - INFO - codeparrot_training - Step 23170: {'lr': 0.00047517968289104596, 'samples': 11863552, 'steps': 23170, 'loss/train': 0.4163428246974945} +03/04/2022 16:40:45 - INFO - codeparrot_training - Step 23171: {'lr': 0.0004751773775758706, 'samples': 11864064, 'steps': 23171, 'loss/train': 2.602907657623291} +03/04/2022 16:40:47 - INFO - codeparrot_training - Skipping example with length 651 (seq_length=1024) +03/04/2022 16:40:50 - INFO - codeparrot_training - Step 23172: {'lr': 0.00047517507215923376, 'samples': 11864576, 'steps': 23172, 'loss/train': 1.5484733581542969} +03/04/2022 16:40:53 - INFO - codeparrot_training - Step 23173: {'lr': 0.00047517276664113653, 'samples': 11865088, 'steps': 23173, 'loss/train': 2.38571834564209} +03/04/2022 16:40:55 - INFO - codeparrot_training - Skipping example with length 214 (seq_length=1024) +03/04/2022 16:40:59 - INFO - codeparrot_training - Step 23174: {'lr': 0.0004751704610215799, 'samples': 11865600, 'steps': 23174, 'loss/train': 1.8911632299423218} +03/04/2022 16:41:02 - INFO - codeparrot_training - Step 23175: {'lr': 0.000475168155300565, 'samples': 11866112, 'steps': 23175, 'loss/train': 1.920558214187622} +03/04/2022 16:41:04 - INFO - codeparrot_training - Skipping example with length 541 (seq_length=1024) +03/04/2022 16:41:07 - INFO - codeparrot_training - Step 23176: {'lr': 0.00047516584947809274, 'samples': 11866624, 'steps': 23176, 'loss/train': 2.195065498352051} +03/04/2022 16:41:10 - INFO - codeparrot_training - Step 23177: {'lr': 0.00047516354355416426, 'samples': 11867136, 'steps': 23177, 'loss/train': 1.7592113018035889} +03/04/2022 16:41:12 - INFO - codeparrot_training - Skipping example with length 524 (seq_length=1024) +03/04/2022 16:41:16 - INFO - codeparrot_training - Step 23178: {'lr': 0.00047516123752878054, 'samples': 11867648, 'steps': 23178, 'loss/train': 0.650576651096344} +03/04/2022 16:41:19 - INFO - codeparrot_training - Step 23179: {'lr': 0.00047515893140194265, 'samples': 11868160, 'steps': 23179, 'loss/train': 2.4016125202178955} +03/04/2022 16:41:21 - INFO - codeparrot_training - Skipping example with length 30 (seq_length=1024) +03/04/2022 16:41:24 - INFO - codeparrot_training - Step 23180: {'lr': 0.0004751566251736516, 'samples': 11868672, 'steps': 23180, 'loss/train': 1.367958664894104} +03/04/2022 16:41:27 - INFO - codeparrot_training - Step 23181: {'lr': 0.00047515431884390845, 'samples': 11869184, 'steps': 23181, 'loss/train': 0.4842330515384674} +03/04/2022 16:41:29 - INFO - codeparrot_training - Skipping example with length 811 (seq_length=1024) +03/04/2022 16:41:32 - INFO - codeparrot_training - Step 23182: {'lr': 0.00047515201241271426, 'samples': 11869696, 'steps': 23182, 'loss/train': 2.0785350799560547} +03/04/2022 16:41:36 - INFO - codeparrot_training - Step 23183: {'lr': 0.00047514970588007007, 'samples': 11870208, 'steps': 23183, 'loss/train': 2.246145725250244} +03/04/2022 16:41:37 - INFO - codeparrot_training - Skipping example with length 764 (seq_length=1024) +03/04/2022 16:41:41 - INFO - codeparrot_training - Step 23184: {'lr': 0.0004751473992459768, 'samples': 11870720, 'steps': 23184, 'loss/train': 0.2184327393770218} +03/04/2022 16:41:44 - INFO - codeparrot_training - Step 23185: {'lr': 0.0004751450925104357, 'samples': 11871232, 'steps': 23185, 'loss/train': 2.077575206756592} +03/04/2022 16:41:46 - INFO - codeparrot_training - Skipping example with length 355 (seq_length=1024) +03/04/2022 16:41:49 - INFO - codeparrot_training - Step 23186: {'lr': 0.00047514278567344765, 'samples': 11871744, 'steps': 23186, 'loss/train': 2.5997774600982666} +03/04/2022 16:41:52 - INFO - codeparrot_training - Step 23187: {'lr': 0.00047514047873501374, 'samples': 11872256, 'steps': 23187, 'loss/train': 1.9030721187591553} +03/04/2022 16:41:54 - INFO - codeparrot_training - Skipping example with length 56 (seq_length=1024) +03/04/2022 16:41:58 - INFO - codeparrot_training - Step 23188: {'lr': 0.000475138171695135, 'samples': 11872768, 'steps': 23188, 'loss/train': 1.8333888053894043} +03/04/2022 16:42:01 - INFO - codeparrot_training - Step 23189: {'lr': 0.00047513586455381245, 'samples': 11873280, 'steps': 23189, 'loss/train': 3.1604721546173096} +03/04/2022 16:42:02 - INFO - codeparrot_training - Skipping example with length 387 (seq_length=1024) +03/04/2022 16:42:06 - INFO - codeparrot_training - Step 23190: {'lr': 0.00047513355731104717, 'samples': 11873792, 'steps': 23190, 'loss/train': 1.048467993736267} +03/04/2022 16:42:09 - INFO - codeparrot_training - Step 23191: {'lr': 0.0004751312499668402, 'samples': 11874304, 'steps': 23191, 'loss/train': 1.2705897092819214} +03/04/2022 16:42:11 - INFO - codeparrot_training - Skipping example with length 1019 (seq_length=1024) +03/04/2022 16:42:15 - INFO - codeparrot_training - Step 23192: {'lr': 0.00047512894252119256, 'samples': 11874816, 'steps': 23192, 'loss/train': 1.7074545621871948} +03/04/2022 16:42:18 - INFO - codeparrot_training - Step 23193: {'lr': 0.0004751266349741053, 'samples': 11875328, 'steps': 23193, 'loss/train': 1.554880142211914} +03/04/2022 16:42:20 - INFO - codeparrot_training - Skipping example with length 1013 (seq_length=1024) +03/04/2022 16:42:23 - INFO - codeparrot_training - Step 23194: {'lr': 0.0004751243273255794, 'samples': 11875840, 'steps': 23194, 'loss/train': 1.5918833017349243} +03/04/2022 16:42:26 - INFO - codeparrot_training - Step 23195: {'lr': 0.000475122019575616, 'samples': 11876352, 'steps': 23195, 'loss/train': 2.129276990890503} +03/04/2022 16:42:28 - INFO - codeparrot_training - Skipping example with length 548 (seq_length=1024) +03/04/2022 16:42:32 - INFO - codeparrot_training - Step 23196: {'lr': 0.0004751197117242161, 'samples': 11876864, 'steps': 23196, 'loss/train': 1.8442822694778442} +03/04/2022 16:42:35 - INFO - codeparrot_training - Step 23197: {'lr': 0.0004751174037713807, 'samples': 11877376, 'steps': 23197, 'loss/train': 2.510540008544922} +03/04/2022 16:42:37 - INFO - codeparrot_training - Skipping example with length 767 (seq_length=1024) +03/04/2022 16:42:40 - INFO - codeparrot_training - Step 23198: {'lr': 0.00047511509571711085, 'samples': 11877888, 'steps': 23198, 'loss/train': 2.797114372253418} +03/04/2022 16:42:43 - INFO - codeparrot_training - Step 23199: {'lr': 0.00047511278756140766, 'samples': 11878400, 'steps': 23199, 'loss/train': 1.7481770515441895} +03/04/2022 16:42:45 - INFO - codeparrot_training - Skipping example with length 922 (seq_length=1024) +03/04/2022 16:42:48 - INFO - codeparrot_training - Step 23200: {'lr': 0.00047511047930427216, 'samples': 11878912, 'steps': 23200, 'loss/train': 2.042721748352051} +03/04/2022 16:42:52 - INFO - codeparrot_training - Step 23201: {'lr': 0.00047510817094570526, 'samples': 11879424, 'steps': 23201, 'loss/train': 2.2307982444763184} +03/04/2022 16:42:53 - INFO - codeparrot_training - Skipping example with length 372 (seq_length=1024) +03/04/2022 16:42:57 - INFO - codeparrot_training - Step 23202: {'lr': 0.00047510586248570815, 'samples': 11879936, 'steps': 23202, 'loss/train': 2.3846328258514404} +03/04/2022 16:43:00 - INFO - codeparrot_training - Step 23203: {'lr': 0.00047510355392428176, 'samples': 11880448, 'steps': 23203, 'loss/train': 2.149144411087036} +03/04/2022 16:43:02 - INFO - codeparrot_training - Skipping example with length 820 (seq_length=1024) +03/04/2022 16:43:05 - INFO - codeparrot_training - Step 23204: {'lr': 0.00047510124526142723, 'samples': 11880960, 'steps': 23204, 'loss/train': 2.113197088241577} +03/04/2022 16:43:09 - INFO - codeparrot_training - Step 23205: {'lr': 0.00047509893649714554, 'samples': 11881472, 'steps': 23205, 'loss/train': 1.2601821422576904} +03/04/2022 16:43:11 - INFO - codeparrot_training - Skipping example with length 613 (seq_length=1024) +03/04/2022 16:43:14 - INFO - codeparrot_training - Step 23206: {'lr': 0.00047509662763143775, 'samples': 11881984, 'steps': 23206, 'loss/train': 1.681097388267517} +03/04/2022 16:43:17 - INFO - codeparrot_training - Step 23207: {'lr': 0.00047509431866430487, 'samples': 11882496, 'steps': 23207, 'loss/train': 0.9996436834335327} +03/04/2022 16:43:19 - INFO - codeparrot_training - Skipping example with length 284 (seq_length=1024) +03/04/2022 16:43:22 - INFO - codeparrot_training - Step 23208: {'lr': 0.000475092009595748, 'samples': 11883008, 'steps': 23208, 'loss/train': 2.12515926361084} +03/04/2022 16:43:26 - INFO - codeparrot_training - Step 23209: {'lr': 0.0004750897004257681, 'samples': 11883520, 'steps': 23209, 'loss/train': 1.9865151643753052} +03/04/2022 16:43:31 - INFO - codeparrot_training - Step 23210: {'lr': 0.0004750873911543663, 'samples': 11884032, 'steps': 23210, 'loss/train': 1.2823290824890137} +03/04/2022 16:43:34 - INFO - codeparrot_training - Step 23211: {'lr': 0.00047508508178154354, 'samples': 11884544, 'steps': 23211, 'loss/train': 1.5993494987487793} +03/04/2022 16:43:37 - INFO - codeparrot_training - Skipping example with length 658 (seq_length=1024) +03/04/2022 16:43:39 - INFO - codeparrot_training - Step 23212: {'lr': 0.00047508277230730095, 'samples': 11885056, 'steps': 23212, 'loss/train': 2.489546537399292} +03/04/2022 16:43:43 - INFO - codeparrot_training - Step 23213: {'lr': 0.00047508046273163953, 'samples': 11885568, 'steps': 23213, 'loss/train': 1.9989416599273682} +03/04/2022 16:43:45 - INFO - codeparrot_training - Skipping example with length 123 (seq_length=1024) +03/04/2022 16:43:48 - INFO - codeparrot_training - Step 23214: {'lr': 0.0004750781530545603, 'samples': 11886080, 'steps': 23214, 'loss/train': 0.9731683135032654} +03/04/2022 16:43:51 - INFO - codeparrot_training - Step 23215: {'lr': 0.0004750758432760644, 'samples': 11886592, 'steps': 23215, 'loss/train': 2.5060012340545654} +03/04/2022 16:43:54 - INFO - codeparrot_training - Skipping example with length 720 (seq_length=1024) +03/04/2022 16:43:56 - INFO - codeparrot_training - Step 23216: {'lr': 0.0004750735333961527, 'samples': 11887104, 'steps': 23216, 'loss/train': 1.6150681972503662} +03/04/2022 16:43:59 - INFO - codeparrot_training - Step 23217: {'lr': 0.00047507122341482644, 'samples': 11887616, 'steps': 23217, 'loss/train': 1.8424334526062012} +03/04/2022 16:44:02 - INFO - codeparrot_training - Skipping example with length 748 (seq_length=1024) +03/04/2022 16:44:05 - INFO - codeparrot_training - Step 23218: {'lr': 0.00047506891333208654, 'samples': 11888128, 'steps': 23218, 'loss/train': 1.9041028022766113} +03/04/2022 16:44:08 - INFO - codeparrot_training - Step 23219: {'lr': 0.000475066603147934, 'samples': 11888640, 'steps': 23219, 'loss/train': 1.6045600175857544} +03/04/2022 16:44:11 - INFO - codeparrot_training - Skipping example with length 77 (seq_length=1024) +03/04/2022 16:44:13 - INFO - codeparrot_training - Step 23220: {'lr': 0.00047506429286236997, 'samples': 11889152, 'steps': 23220, 'loss/train': 1.3972278833389282} +03/04/2022 16:44:16 - INFO - codeparrot_training - Step 23221: {'lr': 0.00047506198247539546, 'samples': 11889664, 'steps': 23221, 'loss/train': 2.0493619441986084} +03/04/2022 16:44:20 - INFO - codeparrot_training - Step 23222: {'lr': 0.0004750596719870114, 'samples': 11890176, 'steps': 23222, 'loss/train': 1.0680570602416992} +03/04/2022 16:44:20 - INFO - codeparrot_training - Skipping example with length 426 (seq_length=1024) +03/04/2022 16:44:25 - INFO - codeparrot_training - Step 23223: {'lr': 0.000475057361397219, 'samples': 11890688, 'steps': 23223, 'loss/train': 2.067452907562256} +03/04/2022 16:44:28 - INFO - codeparrot_training - Step 23224: {'lr': 0.0004750550507060192, 'samples': 11891200, 'steps': 23224, 'loss/train': 2.375077962875366} +03/04/2022 16:44:30 - INFO - codeparrot_training - Skipping example with length 683 (seq_length=1024) +03/04/2022 16:44:34 - INFO - codeparrot_training - Step 23225: {'lr': 0.0004750527399134131, 'samples': 11891712, 'steps': 23225, 'loss/train': 2.5771713256835938} +03/04/2022 16:44:37 - INFO - codeparrot_training - Step 23226: {'lr': 0.00047505042901940163, 'samples': 11892224, 'steps': 23226, 'loss/train': 4.00886869430542} +03/04/2022 16:44:39 - INFO - codeparrot_training - Skipping example with length 463 (seq_length=1024) +03/04/2022 16:44:42 - INFO - codeparrot_training - Step 23227: {'lr': 0.00047504811802398603, 'samples': 11892736, 'steps': 23227, 'loss/train': 2.4122233390808105} +03/04/2022 16:44:45 - INFO - codeparrot_training - Step 23228: {'lr': 0.0004750458069271671, 'samples': 11893248, 'steps': 23228, 'loss/train': 1.5195339918136597} +03/04/2022 16:44:47 - INFO - codeparrot_training - Skipping example with length 832 (seq_length=1024) +03/04/2022 16:44:51 - INFO - codeparrot_training - Step 23229: {'lr': 0.0004750434957289461, 'samples': 11893760, 'steps': 23229, 'loss/train': 2.001708984375} +03/04/2022 16:44:54 - INFO - codeparrot_training - Step 23230: {'lr': 0.0004750411844293239, 'samples': 11894272, 'steps': 23230, 'loss/train': 1.887816309928894} +03/04/2022 16:44:56 - INFO - codeparrot_training - Skipping example with length 170 (seq_length=1024) +03/04/2022 16:44:59 - INFO - codeparrot_training - Step 23231: {'lr': 0.0004750388730283016, 'samples': 11894784, 'steps': 23231, 'loss/train': 1.5888465642929077} +03/04/2022 16:45:02 - INFO - codeparrot_training - Step 23232: {'lr': 0.0004750365615258804, 'samples': 11895296, 'steps': 23232, 'loss/train': 2.467845916748047} +03/04/2022 16:45:04 - INFO - codeparrot_training - Skipping example with length 630 (seq_length=1024) +03/04/2022 16:45:08 - INFO - codeparrot_training - Step 23233: {'lr': 0.00047503424992206107, 'samples': 11895808, 'steps': 23233, 'loss/train': 1.9950379133224487} +03/04/2022 16:45:11 - INFO - codeparrot_training - Step 23234: {'lr': 0.00047503193821684476, 'samples': 11896320, 'steps': 23234, 'loss/train': 2.179570436477661} +03/04/2022 16:45:12 - INFO - codeparrot_training - Skipping example with length 80 (seq_length=1024) +03/04/2022 16:45:16 - INFO - codeparrot_training - Step 23235: {'lr': 0.0004750296264102326, 'samples': 11896832, 'steps': 23235, 'loss/train': 2.0285236835479736} +03/04/2022 16:45:19 - INFO - codeparrot_training - Step 23236: {'lr': 0.0004750273145022256, 'samples': 11897344, 'steps': 23236, 'loss/train': 1.436415433883667} +03/04/2022 16:45:20 - INFO - codeparrot_training - Skipping example with length 867 (seq_length=1024) +03/04/2022 16:45:24 - INFO - codeparrot_training - Step 23237: {'lr': 0.00047502500249282464, 'samples': 11897856, 'steps': 23237, 'loss/train': 2.2570817470550537} +03/04/2022 16:45:28 - INFO - codeparrot_training - Step 23238: {'lr': 0.000475022690382031, 'samples': 11898368, 'steps': 23238, 'loss/train': 1.4422721862792969} +03/04/2022 16:45:29 - INFO - codeparrot_training - Skipping example with length 614 (seq_length=1024) +03/04/2022 16:45:33 - INFO - codeparrot_training - Step 23239: {'lr': 0.0004750203781698456, 'samples': 11898880, 'steps': 23239, 'loss/train': 0.9484816193580627} +03/04/2022 16:45:36 - INFO - codeparrot_training - Step 23240: {'lr': 0.0004750180658562694, 'samples': 11899392, 'steps': 23240, 'loss/train': 1.9728771448135376} +03/04/2022 16:45:37 - INFO - codeparrot_training - Skipping example with length 346 (seq_length=1024) +03/04/2022 16:45:41 - INFO - codeparrot_training - Step 23241: {'lr': 0.00047501575344130356, 'samples': 11899904, 'steps': 23241, 'loss/train': 1.2998861074447632} +03/04/2022 16:45:44 - INFO - codeparrot_training - Step 23242: {'lr': 0.00047501344092494915, 'samples': 11900416, 'steps': 23242, 'loss/train': 1.3667830228805542} +03/04/2022 16:45:46 - INFO - codeparrot_training - Skipping example with length 74 (seq_length=1024) +03/04/2022 16:45:50 - INFO - codeparrot_training - Step 23243: {'lr': 0.0004750111283072071, 'samples': 11900928, 'steps': 23243, 'loss/train': 2.3024253845214844} +03/04/2022 16:45:53 - INFO - codeparrot_training - Step 23244: {'lr': 0.00047500881558807854, 'samples': 11901440, 'steps': 23244, 'loss/train': 1.5687717199325562} +03/04/2022 16:45:54 - INFO - codeparrot_training - Skipping example with length 771 (seq_length=1024) +03/04/2022 16:45:58 - INFO - codeparrot_training - Step 23245: {'lr': 0.00047500650276756455, 'samples': 11901952, 'steps': 23245, 'loss/train': 2.361098527908325} +03/04/2022 16:46:01 - INFO - codeparrot_training - Step 23246: {'lr': 0.00047500418984566594, 'samples': 11902464, 'steps': 23246, 'loss/train': 1.5479620695114136} +03/04/2022 16:46:03 - INFO - codeparrot_training - Skipping example with length 431 (seq_length=1024) +03/04/2022 16:46:06 - INFO - codeparrot_training - Step 23247: {'lr': 0.000475001876822384, 'samples': 11902976, 'steps': 23247, 'loss/train': 1.529552936553955} +03/04/2022 16:46:10 - INFO - codeparrot_training - Step 23248: {'lr': 0.00047499956369771967, 'samples': 11903488, 'steps': 23248, 'loss/train': 2.109652280807495} +03/04/2022 16:46:11 - INFO - codeparrot_training - Skipping example with length 790 (seq_length=1024) +03/04/2022 16:46:15 - INFO - codeparrot_training - Step 23249: {'lr': 0.00047499725047167406, 'samples': 11904000, 'steps': 23249, 'loss/train': 2.3392512798309326} +03/04/2022 16:46:18 - INFO - codeparrot_training - Step 23250: {'lr': 0.0004749949371442481, 'samples': 11904512, 'steps': 23250, 'loss/train': 1.2236404418945312} +03/04/2022 16:46:23 - INFO - codeparrot_training - Step 23251: {'lr': 0.00047499262371544294, 'samples': 11905024, 'steps': 23251, 'loss/train': 1.9393457174301147} +03/04/2022 16:46:26 - INFO - codeparrot_training - Step 23252: {'lr': 0.00047499031018525953, 'samples': 11905536, 'steps': 23252, 'loss/train': 1.4881577491760254} +03/04/2022 16:46:28 - INFO - codeparrot_training - Skipping example with length 220 (seq_length=1024) +03/04/2022 16:46:32 - INFO - codeparrot_training - Step 23253: {'lr': 0.00047498799655369895, 'samples': 11906048, 'steps': 23253, 'loss/train': 1.5180965662002563} +03/04/2022 16:46:35 - INFO - codeparrot_training - Step 23254: {'lr': 0.0004749856828207623, 'samples': 11906560, 'steps': 23254, 'loss/train': 2.0907058715820312} +03/04/2022 16:46:36 - INFO - codeparrot_training - Skipping example with length 421 (seq_length=1024) +03/04/2022 16:46:40 - INFO - codeparrot_training - Step 23255: {'lr': 0.00047498336898645055, 'samples': 11907072, 'steps': 23255, 'loss/train': 6.035892486572266} +03/04/2022 16:46:44 - INFO - codeparrot_training - Step 23256: {'lr': 0.00047498105505076475, 'samples': 11907584, 'steps': 23256, 'loss/train': 1.687117338180542} +03/04/2022 16:46:45 - INFO - codeparrot_training - Skipping example with length 87 (seq_length=1024) +03/04/2022 16:46:49 - INFO - codeparrot_training - Step 23257: {'lr': 0.000474978741013706, 'samples': 11908096, 'steps': 23257, 'loss/train': 1.9847980737686157} +03/04/2022 16:46:52 - INFO - codeparrot_training - Step 23258: {'lr': 0.0004749764268752753, 'samples': 11908608, 'steps': 23258, 'loss/train': 0.7909811735153198} +03/04/2022 16:46:54 - INFO - codeparrot_training - Skipping example with length 915 (seq_length=1024) +03/04/2022 16:46:57 - INFO - codeparrot_training - Step 23259: {'lr': 0.0004749741126354736, 'samples': 11909120, 'steps': 23259, 'loss/train': 2.4033703804016113} +03/04/2022 16:47:01 - INFO - codeparrot_training - Step 23260: {'lr': 0.00047497179829430217, 'samples': 11909632, 'steps': 23260, 'loss/train': 1.2061327695846558} +03/04/2022 16:47:02 - INFO - codeparrot_training - Skipping example with length 560 (seq_length=1024) +03/04/2022 16:47:06 - INFO - codeparrot_training - Step 23261: {'lr': 0.0004749694838517619, 'samples': 11910144, 'steps': 23261, 'loss/train': 1.9675450325012207} +03/04/2022 16:47:09 - INFO - codeparrot_training - Step 23262: {'lr': 0.0004749671693078538, 'samples': 11910656, 'steps': 23262, 'loss/train': 0.8200609683990479} +03/04/2022 16:47:11 - INFO - codeparrot_training - Skipping example with length 133 (seq_length=1024) +03/04/2022 16:47:14 - INFO - codeparrot_training - Step 23263: {'lr': 0.00047496485466257896, 'samples': 11911168, 'steps': 23263, 'loss/train': 1.0342795848846436} +03/04/2022 16:47:17 - INFO - codeparrot_training - Step 23264: {'lr': 0.0004749625399159384, 'samples': 11911680, 'steps': 23264, 'loss/train': 1.7826805114746094} +03/04/2022 16:47:20 - INFO - codeparrot_training - Skipping example with length 181 (seq_length=1024) +03/04/2022 16:47:23 - INFO - codeparrot_training - Step 23265: {'lr': 0.0004749602250679332, 'samples': 11912192, 'steps': 23265, 'loss/train': 2.282989263534546} +03/04/2022 16:47:26 - INFO - codeparrot_training - Step 23266: {'lr': 0.00047495791011856447, 'samples': 11912704, 'steps': 23266, 'loss/train': 1.7917360067367554} +03/04/2022 16:47:29 - INFO - codeparrot_training - Skipping example with length 941 (seq_length=1024) +03/04/2022 16:47:31 - INFO - codeparrot_training - Step 23267: {'lr': 0.00047495559506783317, 'samples': 11913216, 'steps': 23267, 'loss/train': 2.00886869430542} +03/04/2022 16:47:34 - INFO - codeparrot_training - Step 23268: {'lr': 0.00047495327991574034, 'samples': 11913728, 'steps': 23268, 'loss/train': 1.374742031097412} +03/04/2022 16:47:37 - INFO - codeparrot_training - Skipping example with length 975 (seq_length=1024) +03/04/2022 16:47:40 - INFO - codeparrot_training - Step 23269: {'lr': 0.0004749509646622869, 'samples': 11914240, 'steps': 23269, 'loss/train': 1.2179855108261108} +03/04/2022 16:47:43 - INFO - codeparrot_training - Step 23270: {'lr': 0.00047494864930747415, 'samples': 11914752, 'steps': 23270, 'loss/train': 1.9321541786193848} +03/04/2022 16:47:45 - INFO - codeparrot_training - Skipping example with length 172 (seq_length=1024) +03/04/2022 16:47:48 - INFO - codeparrot_training - Step 23271: {'lr': 0.000474946333851303, 'samples': 11915264, 'steps': 23271, 'loss/train': 0.9668684005737305} +03/04/2022 16:47:51 - INFO - codeparrot_training - Step 23272: {'lr': 0.0004749440182937745, 'samples': 11915776, 'steps': 23272, 'loss/train': 1.0239180326461792} +03/04/2022 16:47:54 - INFO - codeparrot_training - Skipping example with length 165 (seq_length=1024) +03/04/2022 16:47:56 - INFO - codeparrot_training - Step 23273: {'lr': 0.0004749417026348897, 'samples': 11916288, 'steps': 23273, 'loss/train': 1.9658229351043701} +03/04/2022 16:48:00 - INFO - codeparrot_training - Step 23274: {'lr': 0.0004749393868746497, 'samples': 11916800, 'steps': 23274, 'loss/train': 1.0590659379959106} +03/04/2022 16:48:02 - INFO - codeparrot_training - Skipping example with length 853 (seq_length=1024) +03/04/2022 16:48:05 - INFO - codeparrot_training - Step 23275: {'lr': 0.0004749370710130554, 'samples': 11917312, 'steps': 23275, 'loss/train': 1.9139764308929443} +03/04/2022 16:48:09 - INFO - codeparrot_training - Step 23276: {'lr': 0.00047493475505010793, 'samples': 11917824, 'steps': 23276, 'loss/train': 1.865933895111084} +03/04/2022 16:48:12 - INFO - codeparrot_training - Step 23277: {'lr': 0.0004749324389858083, 'samples': 11918336, 'steps': 23277, 'loss/train': 2.1843745708465576} +03/04/2022 16:48:13 - INFO - codeparrot_training - Skipping example with length 729 (seq_length=1024) +03/04/2022 16:48:17 - INFO - codeparrot_training - Step 23278: {'lr': 0.00047493012282015767, 'samples': 11918848, 'steps': 23278, 'loss/train': 2.208540916442871} +03/04/2022 16:48:20 - INFO - codeparrot_training - Step 23279: {'lr': 0.00047492780655315693, 'samples': 11919360, 'steps': 23279, 'loss/train': 1.549017071723938} +03/04/2022 16:48:22 - INFO - codeparrot_training - Skipping example with length 256 (seq_length=1024) +03/04/2022 16:48:26 - INFO - codeparrot_training - Step 23280: {'lr': 0.00047492549018480725, 'samples': 11919872, 'steps': 23280, 'loss/train': 1.855757236480713} +03/04/2022 16:48:29 - INFO - codeparrot_training - Step 23281: {'lr': 0.00047492317371510955, 'samples': 11920384, 'steps': 23281, 'loss/train': 1.514805793762207} +03/04/2022 16:48:31 - INFO - codeparrot_training - Skipping example with length 903 (seq_length=1024) +03/04/2022 16:48:34 - INFO - codeparrot_training - Step 23282: {'lr': 0.00047492085714406497, 'samples': 11920896, 'steps': 23282, 'loss/train': 2.5938451290130615} +03/04/2022 16:48:37 - INFO - codeparrot_training - Step 23283: {'lr': 0.00047491854047167453, 'samples': 11921408, 'steps': 23283, 'loss/train': 1.7550021409988403} +03/04/2022 16:48:39 - INFO - codeparrot_training - Skipping example with length 77 (seq_length=1024) +03/04/2022 16:48:42 - INFO - codeparrot_training - Step 23284: {'lr': 0.0004749162236979393, 'samples': 11921920, 'steps': 23284, 'loss/train': 1.1320462226867676} +03/04/2022 16:48:46 - INFO - codeparrot_training - Step 23285: {'lr': 0.0004749139068228602, 'samples': 11922432, 'steps': 23285, 'loss/train': 2.1778528690338135} +03/04/2022 16:48:47 - INFO - codeparrot_training - Skipping example with length 539 (seq_length=1024) +03/04/2022 16:48:51 - INFO - codeparrot_training - Step 23286: {'lr': 0.00047491158984643846, 'samples': 11922944, 'steps': 23286, 'loss/train': 2.530402421951294} +03/04/2022 16:48:54 - INFO - codeparrot_training - Step 23287: {'lr': 0.0004749092727686749, 'samples': 11923456, 'steps': 23287, 'loss/train': 2.2478601932525635} +03/04/2022 16:48:56 - INFO - codeparrot_training - Skipping example with length 1015 (seq_length=1024) +03/04/2022 16:48:59 - INFO - codeparrot_training - Step 23288: {'lr': 0.00047490695558957083, 'samples': 11923968, 'steps': 23288, 'loss/train': 1.8344285488128662} +03/04/2022 16:49:02 - INFO - codeparrot_training - Step 23289: {'lr': 0.00047490463830912713, 'samples': 11924480, 'steps': 23289, 'loss/train': 2.0236165523529053} +03/04/2022 16:49:04 - INFO - codeparrot_training - Skipping example with length 374 (seq_length=1024) +03/04/2022 16:49:08 - INFO - codeparrot_training - Step 23290: {'lr': 0.0004749023209273448, 'samples': 11924992, 'steps': 23290, 'loss/train': 1.8343675136566162} +03/04/2022 16:49:11 - INFO - codeparrot_training - Step 23291: {'lr': 0.000474900003444225, 'samples': 11925504, 'steps': 23291, 'loss/train': 1.7070770263671875} +03/04/2022 16:49:13 - INFO - codeparrot_training - Skipping example with length 293 (seq_length=1024) +03/04/2022 16:49:16 - INFO - codeparrot_training - Step 23292: {'lr': 0.0004748976858597687, 'samples': 11926016, 'steps': 23292, 'loss/train': 2.586672782897949} +03/04/2022 16:49:19 - INFO - codeparrot_training - Step 23293: {'lr': 0.00047489536817397706, 'samples': 11926528, 'steps': 23293, 'loss/train': 1.5994175672531128} +03/04/2022 16:49:21 - INFO - codeparrot_training - Skipping example with length 836 (seq_length=1024) +03/04/2022 16:49:25 - INFO - codeparrot_training - Step 23294: {'lr': 0.00047489305038685094, 'samples': 11927040, 'steps': 23294, 'loss/train': 0.3096446990966797} +03/04/2022 16:49:28 - INFO - codeparrot_training - Step 23295: {'lr': 0.00047489073249839153, 'samples': 11927552, 'steps': 23295, 'loss/train': 2.141629934310913} +03/04/2022 16:49:30 - INFO - codeparrot_training - Skipping example with length 697 (seq_length=1024) +03/04/2022 16:49:33 - INFO - codeparrot_training - Step 23296: {'lr': 0.0004748884145085998, 'samples': 11928064, 'steps': 23296, 'loss/train': 1.8182371854782104} +03/04/2022 16:49:36 - INFO - codeparrot_training - Step 23297: {'lr': 0.0004748860964174768, 'samples': 11928576, 'steps': 23297, 'loss/train': 6.501819610595703} +03/04/2022 16:49:38 - INFO - codeparrot_training - Skipping example with length 326 (seq_length=1024) +03/04/2022 16:49:41 - INFO - codeparrot_training - Step 23298: {'lr': 0.00047488377822502365, 'samples': 11929088, 'steps': 23298, 'loss/train': 1.8701205253601074} +03/04/2022 16:49:45 - INFO - codeparrot_training - Step 23299: {'lr': 0.00047488145993124134, 'samples': 11929600, 'steps': 23299, 'loss/train': 2.367201566696167} +03/04/2022 16:49:47 - INFO - codeparrot_training - Skipping example with length 147 (seq_length=1024) +03/04/2022 16:49:50 - INFO - codeparrot_training - Step 23300: {'lr': 0.0004748791415361309, 'samples': 11930112, 'steps': 23300, 'loss/train': 3.082859754562378} +03/04/2022 16:49:53 - INFO - codeparrot_training - Step 23301: {'lr': 0.00047487682303969336, 'samples': 11930624, 'steps': 23301, 'loss/train': 2.5544567108154297} +03/04/2022 16:49:56 - INFO - codeparrot_training - Skipping example with length 27 (seq_length=1024) +03/04/2022 16:49:59 - INFO - codeparrot_training - Step 23302: {'lr': 0.0004748745044419298, 'samples': 11931136, 'steps': 23302, 'loss/train': 2.31608510017395} +03/04/2022 16:50:02 - INFO - codeparrot_training - Step 23303: {'lr': 0.0004748721857428413, 'samples': 11931648, 'steps': 23303, 'loss/train': 1.6009459495544434} +03/04/2022 16:50:04 - INFO - codeparrot_training - Skipping example with length 1001 (seq_length=1024) +03/04/2022 16:50:07 - INFO - codeparrot_training - Step 23304: {'lr': 0.00047486986694242887, 'samples': 11932160, 'steps': 23304, 'loss/train': 2.2071337699890137} +03/04/2022 16:50:10 - INFO - codeparrot_training - Step 23305: {'lr': 0.0004748675480406934, 'samples': 11932672, 'steps': 23305, 'loss/train': 2.213888168334961} +03/04/2022 16:50:13 - INFO - codeparrot_training - Skipping example with length 21 (seq_length=1024) +03/04/2022 16:50:16 - INFO - codeparrot_training - Step 23306: {'lr': 0.0004748652290376363, 'samples': 11933184, 'steps': 23306, 'loss/train': 2.6588938236236572} +03/04/2022 16:50:19 - INFO - codeparrot_training - Step 23307: {'lr': 0.00047486290993325824, 'samples': 11933696, 'steps': 23307, 'loss/train': 2.8442652225494385} +03/04/2022 16:50:22 - INFO - codeparrot_training - Step 23308: {'lr': 0.00047486059072756047, 'samples': 11934208, 'steps': 23308, 'loss/train': 2.2317230701446533} +03/04/2022 16:50:22 - INFO - codeparrot_training - Skipping example with length 413 (seq_length=1024) +03/04/2022 16:50:27 - INFO - codeparrot_training - Step 23309: {'lr': 0.00047485827142054407, 'samples': 11934720, 'steps': 23309, 'loss/train': 1.5069676637649536} +03/04/2022 16:50:30 - INFO - codeparrot_training - Step 23310: {'lr': 0.0004748559520122099, 'samples': 11935232, 'steps': 23310, 'loss/train': 0.9620670080184937} +03/04/2022 16:50:31 - INFO - codeparrot_training - Skipping example with length 977 (seq_length=1024) +03/04/2022 16:50:36 - INFO - codeparrot_training - Step 23311: {'lr': 0.0004748536325025591, 'samples': 11935744, 'steps': 23311, 'loss/train': 0.6787362694740295} +03/04/2022 16:50:39 - INFO - codeparrot_training - Step 23312: {'lr': 0.0004748513128915928, 'samples': 11936256, 'steps': 23312, 'loss/train': 1.9569050073623657} +03/04/2022 16:50:40 - INFO - codeparrot_training - Skipping example with length 397 (seq_length=1024) +03/04/2022 16:50:44 - INFO - codeparrot_training - Step 23313: {'lr': 0.0004748489931793119, 'samples': 11936768, 'steps': 23313, 'loss/train': 1.692866563796997} +03/04/2022 16:50:47 - INFO - codeparrot_training - Step 23314: {'lr': 0.00047484667336571753, 'samples': 11937280, 'steps': 23314, 'loss/train': 1.9662597179412842} +03/04/2022 16:50:48 - INFO - codeparrot_training - Skipping example with length 96 (seq_length=1024) +03/04/2022 16:50:53 - INFO - codeparrot_training - Step 23315: {'lr': 0.0004748443534508107, 'samples': 11937792, 'steps': 23315, 'loss/train': 2.0747299194335938} +03/04/2022 16:50:56 - INFO - codeparrot_training - Step 23316: {'lr': 0.00047484203343459256, 'samples': 11938304, 'steps': 23316, 'loss/train': 2.2545511722564697} +03/04/2022 16:50:57 - INFO - codeparrot_training - Skipping example with length 753 (seq_length=1024) +03/04/2022 16:51:01 - INFO - codeparrot_training - Step 23317: {'lr': 0.000474839713317064, 'samples': 11938816, 'steps': 23317, 'loss/train': 2.180447578430176} +03/04/2022 16:51:04 - INFO - codeparrot_training - Step 23318: {'lr': 0.00047483739309822615, 'samples': 11939328, 'steps': 23318, 'loss/train': 1.6996933221817017} +03/04/2022 16:51:05 - INFO - codeparrot_training - Skipping example with length 562 (seq_length=1024) +03/04/2022 16:51:10 - INFO - codeparrot_training - Step 23319: {'lr': 0.00047483507277808, 'samples': 11939840, 'steps': 23319, 'loss/train': 2.0626962184906006} +03/04/2022 16:51:13 - INFO - codeparrot_training - Step 23320: {'lr': 0.0004748327523566267, 'samples': 11940352, 'steps': 23320, 'loss/train': 1.7217775583267212} +03/04/2022 16:51:14 - INFO - codeparrot_training - Skipping example with length 112 (seq_length=1024) +03/04/2022 16:51:18 - INFO - codeparrot_training - Step 23321: {'lr': 0.0004748304318338672, 'samples': 11940864, 'steps': 23321, 'loss/train': 1.1312798261642456} +03/04/2022 16:51:21 - INFO - codeparrot_training - Step 23322: {'lr': 0.00047482811120980254, 'samples': 11941376, 'steps': 23322, 'loss/train': 2.1096572875976562} +03/04/2022 16:51:22 - INFO - codeparrot_training - Skipping example with length 649 (seq_length=1024) +03/04/2022 16:51:27 - INFO - codeparrot_training - Step 23323: {'lr': 0.0004748257904844339, 'samples': 11941888, 'steps': 23323, 'loss/train': 2.714707851409912} +03/04/2022 16:51:30 - INFO - codeparrot_training - Step 23324: {'lr': 0.00047482346965776215, 'samples': 11942400, 'steps': 23324, 'loss/train': 0.7907835841178894} +03/04/2022 16:51:30 - INFO - codeparrot_training - Skipping example with length 360 (seq_length=1024) +03/04/2022 16:51:35 - INFO - codeparrot_training - Step 23325: {'lr': 0.0004748211487297884, 'samples': 11942912, 'steps': 23325, 'loss/train': 2.409842014312744} +03/04/2022 16:51:38 - INFO - codeparrot_training - Step 23326: {'lr': 0.00047481882770051377, 'samples': 11943424, 'steps': 23326, 'loss/train': 1.2467879056930542} +03/04/2022 16:51:39 - INFO - codeparrot_training - Skipping example with length 467 (seq_length=1024) +03/04/2022 16:51:44 - INFO - codeparrot_training - Step 23327: {'lr': 0.00047481650656993924, 'samples': 11943936, 'steps': 23327, 'loss/train': 2.0101513862609863} +03/04/2022 16:51:47 - INFO - codeparrot_training - Step 23328: {'lr': 0.00047481418533806586, 'samples': 11944448, 'steps': 23328, 'loss/train': 1.5548689365386963} +03/04/2022 16:51:48 - INFO - codeparrot_training - Skipping example with length 764 (seq_length=1024) +03/04/2022 16:51:52 - INFO - codeparrot_training - Step 23329: {'lr': 0.0004748118640048946, 'samples': 11944960, 'steps': 23329, 'loss/train': 1.6938414573669434} +03/04/2022 16:51:55 - INFO - codeparrot_training - Step 23330: {'lr': 0.00047480954257042666, 'samples': 11945472, 'steps': 23330, 'loss/train': 2.250408887863159} +03/04/2022 16:51:56 - INFO - codeparrot_training - Skipping example with length 697 (seq_length=1024) +03/04/2022 16:52:01 - INFO - codeparrot_training - Step 23331: {'lr': 0.000474807221034663, 'samples': 11945984, 'steps': 23331, 'loss/train': 1.1894129514694214} +03/04/2022 16:52:04 - INFO - codeparrot_training - Step 23332: {'lr': 0.0004748048993976046, 'samples': 11946496, 'steps': 23332, 'loss/train': 1.5454301834106445} +03/04/2022 16:52:05 - INFO - codeparrot_training - Skipping example with length 315 (seq_length=1024) +03/04/2022 16:52:09 - INFO - codeparrot_training - Step 23333: {'lr': 0.0004748025776592527, 'samples': 11947008, 'steps': 23333, 'loss/train': 1.8648681640625} +03/04/2022 16:52:12 - INFO - codeparrot_training - Step 23334: {'lr': 0.00047480025581960817, 'samples': 11947520, 'steps': 23334, 'loss/train': 1.5155541896820068} +03/04/2022 16:52:14 - INFO - codeparrot_training - Skipping example with length 266 (seq_length=1024) +03/04/2022 16:52:18 - INFO - codeparrot_training - Step 23335: {'lr': 0.0004747979338786721, 'samples': 11948032, 'steps': 23335, 'loss/train': 2.2037553787231445} +03/04/2022 16:52:21 - INFO - codeparrot_training - Step 23336: {'lr': 0.00047479561183644557, 'samples': 11948544, 'steps': 23336, 'loss/train': 2.0614449977874756} +03/04/2022 16:52:23 - INFO - codeparrot_training - Skipping example with length 99 (seq_length=1024) +03/04/2022 16:52:26 - INFO - codeparrot_training - Step 23337: {'lr': 0.00047479328969292963, 'samples': 11949056, 'steps': 23337, 'loss/train': 1.6581941843032837} +03/04/2022 16:52:29 - INFO - codeparrot_training - Step 23338: {'lr': 0.0004747909674481253, 'samples': 11949568, 'steps': 23338, 'loss/train': 2.0797245502471924} +03/04/2022 16:52:32 - INFO - codeparrot_training - Skipping example with length 788 (seq_length=1024) +03/04/2022 16:52:35 - INFO - codeparrot_training - Step 23339: {'lr': 0.00047478864510203355, 'samples': 11950080, 'steps': 23339, 'loss/train': 1.4287866353988647} +03/04/2022 16:52:38 - INFO - codeparrot_training - Step 23340: {'lr': 0.0004747863226546556, 'samples': 11950592, 'steps': 23340, 'loss/train': 2.6435933113098145} +03/04/2022 16:52:40 - INFO - codeparrot_training - Skipping example with length 862 (seq_length=1024) +03/04/2022 16:52:43 - INFO - codeparrot_training - Step 23341: {'lr': 0.0004747840001059923, 'samples': 11951104, 'steps': 23341, 'loss/train': 1.4793064594268799} +03/04/2022 16:52:46 - INFO - codeparrot_training - Step 23342: {'lr': 0.00047478167745604495, 'samples': 11951616, 'steps': 23342, 'loss/train': 1.3045340776443481} +03/04/2022 16:52:49 - INFO - codeparrot_training - Skipping example with length 858 (seq_length=1024) +03/04/2022 16:52:52 - INFO - codeparrot_training - Step 23343: {'lr': 0.00047477935470481434, 'samples': 11952128, 'steps': 23343, 'loss/train': 2.1830544471740723} +03/04/2022 16:52:55 - INFO - codeparrot_training - Step 23344: {'lr': 0.00047477703185230157, 'samples': 11952640, 'steps': 23344, 'loss/train': 1.7340668439865112} +03/04/2022 16:52:57 - INFO - codeparrot_training - Skipping example with length 749 (seq_length=1024) +03/04/2022 16:53:00 - INFO - codeparrot_training - Step 23345: {'lr': 0.00047477470889850784, 'samples': 11953152, 'steps': 23345, 'loss/train': 1.2518830299377441} +03/04/2022 16:53:03 - INFO - codeparrot_training - Step 23346: {'lr': 0.00047477238584343407, 'samples': 11953664, 'steps': 23346, 'loss/train': 1.9134511947631836} +03/04/2022 16:53:06 - INFO - codeparrot_training - Skipping example with length 653 (seq_length=1024) +03/04/2022 16:53:09 - INFO - codeparrot_training - Step 23347: {'lr': 0.00047477006268708134, 'samples': 11954176, 'steps': 23347, 'loss/train': 4.755134105682373} +03/04/2022 16:53:12 - INFO - codeparrot_training - Step 23348: {'lr': 0.00047476773942945063, 'samples': 11954688, 'steps': 23348, 'loss/train': 1.439097285270691} +03/04/2022 16:53:15 - INFO - codeparrot_training - Skipping example with length 530 (seq_length=1024) +03/04/2022 16:53:17 - INFO - codeparrot_training - Step 23349: {'lr': 0.00047476541607054313, 'samples': 11955200, 'steps': 23349, 'loss/train': 1.1304434537887573} +03/04/2022 16:53:20 - INFO - codeparrot_training - Step 23350: {'lr': 0.0004747630926103597, 'samples': 11955712, 'steps': 23350, 'loss/train': 0.5623083114624023} +03/04/2022 16:53:23 - INFO - codeparrot_training - Skipping example with length 701 (seq_length=1024) +03/04/2022 16:53:26 - INFO - codeparrot_training - Step 23351: {'lr': 0.0004747607690489015, 'samples': 11956224, 'steps': 23351, 'loss/train': 1.5679458379745483} +03/04/2022 16:53:29 - INFO - codeparrot_training - Step 23352: {'lr': 0.00047475844538616966, 'samples': 11956736, 'steps': 23352, 'loss/train': 1.2520536184310913} +03/04/2022 16:53:32 - INFO - codeparrot_training - Skipping example with length 649 (seq_length=1024) +03/04/2022 16:53:34 - INFO - codeparrot_training - Step 23353: {'lr': 0.0004747561216221651, 'samples': 11957248, 'steps': 23353, 'loss/train': 1.7548965215682983} +03/04/2022 16:53:37 - INFO - codeparrot_training - Step 23354: {'lr': 0.0004747537977568889, 'samples': 11957760, 'steps': 23354, 'loss/train': 2.0397846698760986} +03/04/2022 16:53:40 - INFO - codeparrot_training - Skipping example with length 664 (seq_length=1024) +03/04/2022 16:53:43 - INFO - codeparrot_training - Step 23355: {'lr': 0.00047475147379034206, 'samples': 11958272, 'steps': 23355, 'loss/train': 1.9109156131744385} +03/04/2022 16:53:46 - INFO - codeparrot_training - Step 23356: {'lr': 0.0004747491497225257, 'samples': 11958784, 'steps': 23356, 'loss/train': 1.8210498094558716} +03/04/2022 16:53:49 - INFO - codeparrot_training - Step 23357: {'lr': 0.00047474682555344083, 'samples': 11959296, 'steps': 23357, 'loss/train': 2.2388362884521484} +03/04/2022 16:53:49 - INFO - codeparrot_training - Skipping example with length 432 (seq_length=1024) +03/04/2022 16:53:54 - INFO - codeparrot_training - Step 23358: {'lr': 0.00047474450128308853, 'samples': 11959808, 'steps': 23358, 'loss/train': 1.6404907703399658} +03/04/2022 16:53:58 - INFO - codeparrot_training - Step 23359: {'lr': 0.0004747421769114698, 'samples': 11960320, 'steps': 23359, 'loss/train': 1.749161958694458} +03/04/2022 16:53:58 - INFO - codeparrot_training - Skipping example with length 510 (seq_length=1024) +03/04/2022 16:54:03 - INFO - codeparrot_training - Step 23360: {'lr': 0.00047473985243858577, 'samples': 11960832, 'steps': 23360, 'loss/train': 2.251490592956543} +03/04/2022 16:54:06 - INFO - codeparrot_training - Step 23361: {'lr': 0.00047473752786443736, 'samples': 11961344, 'steps': 23361, 'loss/train': 1.8118185997009277} +03/04/2022 16:54:06 - INFO - codeparrot_training - Skipping example with length 922 (seq_length=1024) +03/04/2022 16:54:11 - INFO - codeparrot_training - Step 23362: {'lr': 0.0004747352031890257, 'samples': 11961856, 'steps': 23362, 'loss/train': 2.0672035217285156} +03/04/2022 16:54:15 - INFO - codeparrot_training - Step 23363: {'lr': 0.0004747328784123519, 'samples': 11962368, 'steps': 23363, 'loss/train': 1.6104357242584229} +03/04/2022 16:54:15 - INFO - codeparrot_training - Skipping example with length 792 (seq_length=1024) +03/04/2022 16:54:20 - INFO - codeparrot_training - Step 23364: {'lr': 0.00047473055353441685, 'samples': 11962880, 'steps': 23364, 'loss/train': 2.3544888496398926} +03/04/2022 16:54:23 - INFO - codeparrot_training - Step 23365: {'lr': 0.0004747282285552217, 'samples': 11963392, 'steps': 23365, 'loss/train': 1.8929945230484009} +03/04/2022 16:54:24 - INFO - codeparrot_training - Skipping example with length 911 (seq_length=1024) +03/04/2022 16:54:28 - INFO - codeparrot_training - Step 23366: {'lr': 0.0004747259034747675, 'samples': 11963904, 'steps': 23366, 'loss/train': 2.221660614013672} +03/04/2022 16:54:32 - INFO - codeparrot_training - Step 23367: {'lr': 0.00047472357829305524, 'samples': 11964416, 'steps': 23367, 'loss/train': 2.539306640625} +03/04/2022 16:54:33 - INFO - codeparrot_training - Skipping example with length 874 (seq_length=1024) +03/04/2022 16:54:37 - INFO - codeparrot_training - Step 23368: {'lr': 0.0004747212530100861, 'samples': 11964928, 'steps': 23368, 'loss/train': 1.8161391019821167} +03/04/2022 16:54:40 - INFO - codeparrot_training - Step 23369: {'lr': 0.0004747189276258609, 'samples': 11965440, 'steps': 23369, 'loss/train': 2.1547319889068604} +03/04/2022 16:54:41 - INFO - codeparrot_training - Skipping example with length 62 (seq_length=1024) +03/04/2022 16:54:45 - INFO - codeparrot_training - Step 23370: {'lr': 0.0004747166021403809, 'samples': 11965952, 'steps': 23370, 'loss/train': 1.621546745300293} +03/04/2022 16:54:49 - INFO - codeparrot_training - Step 23371: {'lr': 0.000474714276553647, 'samples': 11966464, 'steps': 23371, 'loss/train': 1.6483749151229858} +03/04/2022 16:54:50 - INFO - codeparrot_training - Skipping example with length 671 (seq_length=1024) +03/04/2022 16:54:54 - INFO - codeparrot_training - Step 23372: {'lr': 0.00047471195086566035, 'samples': 11966976, 'steps': 23372, 'loss/train': 1.6799837350845337} +03/04/2022 16:54:57 - INFO - codeparrot_training - Step 23373: {'lr': 0.000474709625076422, 'samples': 11967488, 'steps': 23373, 'loss/train': 2.281834125518799} +03/04/2022 16:55:00 - INFO - codeparrot_training - Skipping example with length 176 (seq_length=1024) +03/04/2022 16:55:03 - INFO - codeparrot_training - Step 23374: {'lr': 0.0004747072991859329, 'samples': 11968000, 'steps': 23374, 'loss/train': 1.6455888748168945} +03/04/2022 16:55:06 - INFO - codeparrot_training - Step 23375: {'lr': 0.0004747049731941942, 'samples': 11968512, 'steps': 23375, 'loss/train': 2.113337516784668} +03/04/2022 16:55:08 - INFO - codeparrot_training - Skipping example with length 529 (seq_length=1024) +03/04/2022 16:55:11 - INFO - codeparrot_training - Step 23376: {'lr': 0.0004747026471012069, 'samples': 11969024, 'steps': 23376, 'loss/train': 0.46364861726760864} +03/04/2022 16:55:14 - INFO - codeparrot_training - Step 23377: {'lr': 0.000474700320906972, 'samples': 11969536, 'steps': 23377, 'loss/train': 0.826941728591919} +03/04/2022 16:55:17 - INFO - codeparrot_training - Skipping example with length 579 (seq_length=1024) +03/04/2022 16:55:20 - INFO - codeparrot_training - Step 23378: {'lr': 0.0004746979946114907, 'samples': 11970048, 'steps': 23378, 'loss/train': 2.1099185943603516} +03/04/2022 16:55:23 - INFO - codeparrot_training - Step 23379: {'lr': 0.000474695668214764, 'samples': 11970560, 'steps': 23379, 'loss/train': 2.617692232131958} +03/04/2022 16:55:25 - INFO - codeparrot_training - Skipping example with length 160 (seq_length=1024) +03/04/2022 16:55:28 - INFO - codeparrot_training - Step 23380: {'lr': 0.00047469334171679266, 'samples': 11971072, 'steps': 23380, 'loss/train': 1.7464921474456787} +03/04/2022 16:55:31 - INFO - codeparrot_training - Step 23381: {'lr': 0.00047469101511757815, 'samples': 11971584, 'steps': 23381, 'loss/train': 2.2422678470611572} +03/04/2022 16:55:33 - INFO - codeparrot_training - Skipping example with length 557 (seq_length=1024) +03/04/2022 16:55:37 - INFO - codeparrot_training - Step 23382: {'lr': 0.00047468868841712134, 'samples': 11972096, 'steps': 23382, 'loss/train': 0.4601757526397705} +03/04/2022 16:55:40 - INFO - codeparrot_training - Step 23383: {'lr': 0.00047468636161542325, 'samples': 11972608, 'steps': 23383, 'loss/train': 2.0826609134674072} +03/04/2022 16:55:42 - INFO - codeparrot_training - Skipping example with length 568 (seq_length=1024) +03/04/2022 16:55:45 - INFO - codeparrot_training - Step 23384: {'lr': 0.0004746840347124849, 'samples': 11973120, 'steps': 23384, 'loss/train': 1.9405590295791626} +03/04/2022 16:55:48 - INFO - codeparrot_training - Step 23385: {'lr': 0.0004746817077083074, 'samples': 11973632, 'steps': 23385, 'loss/train': 1.1801667213439941} +03/04/2022 16:55:50 - INFO - codeparrot_training - Skipping example with length 562 (seq_length=1024) +03/04/2022 16:55:53 - INFO - codeparrot_training - Step 23386: {'lr': 0.00047467938060289185, 'samples': 11974144, 'steps': 23386, 'loss/train': 1.4176239967346191} +03/04/2022 16:55:57 - INFO - codeparrot_training - Step 23387: {'lr': 0.0004746770533962391, 'samples': 11974656, 'steps': 23387, 'loss/train': 1.4661380052566528} +03/04/2022 16:55:59 - INFO - codeparrot_training - Skipping example with length 762 (seq_length=1024) +03/04/2022 16:56:02 - INFO - codeparrot_training - Step 23388: {'lr': 0.0004746747260883505, 'samples': 11975168, 'steps': 23388, 'loss/train': 2.1951780319213867} +03/04/2022 16:56:05 - INFO - codeparrot_training - Step 23389: {'lr': 0.0004746723986792268, 'samples': 11975680, 'steps': 23389, 'loss/train': 1.2097113132476807} +03/04/2022 16:56:07 - INFO - codeparrot_training - Skipping example with length 260 (seq_length=1024) +03/04/2022 16:56:10 - INFO - codeparrot_training - Step 23390: {'lr': 0.0004746700711688693, 'samples': 11976192, 'steps': 23390, 'loss/train': 0.19772501289844513} +03/04/2022 16:56:13 - INFO - codeparrot_training - Step 23391: {'lr': 0.0004746677435572789, 'samples': 11976704, 'steps': 23391, 'loss/train': 0.9094952344894409} +03/04/2022 16:56:16 - INFO - codeparrot_training - Skipping example with length 151 (seq_length=1024) +03/04/2022 16:56:19 - INFO - codeparrot_training - Step 23392: {'lr': 0.00047466541584445667, 'samples': 11977216, 'steps': 23392, 'loss/train': 2.0294089317321777} +03/04/2022 16:56:22 - INFO - codeparrot_training - Step 23393: {'lr': 0.0004746630880304037, 'samples': 11977728, 'steps': 23393, 'loss/train': 1.804544448852539} +03/04/2022 16:56:24 - INFO - codeparrot_training - Skipping example with length 885 (seq_length=1024) +03/04/2022 16:56:27 - INFO - codeparrot_training - Step 23394: {'lr': 0.0004746607601151209, 'samples': 11978240, 'steps': 23394, 'loss/train': 1.5381548404693604} +03/04/2022 16:56:31 - INFO - codeparrot_training - Step 23395: {'lr': 0.0004746584320986096, 'samples': 11978752, 'steps': 23395, 'loss/train': 2.3071064949035645} +03/04/2022 16:56:33 - INFO - codeparrot_training - Skipping example with length 537 (seq_length=1024) +03/04/2022 16:56:36 - INFO - codeparrot_training - Step 23396: {'lr': 0.0004746561039808706, 'samples': 11979264, 'steps': 23396, 'loss/train': 1.8148623704910278} +03/04/2022 16:56:39 - INFO - codeparrot_training - Step 23397: {'lr': 0.0004746537757619049, 'samples': 11979776, 'steps': 23397, 'loss/train': 1.9050732851028442} +03/04/2022 16:56:41 - INFO - codeparrot_training - Skipping example with length 320 (seq_length=1024) +03/04/2022 16:56:44 - INFO - codeparrot_training - Step 23398: {'lr': 0.00047465144744171387, 'samples': 11980288, 'steps': 23398, 'loss/train': 0.9550286531448364} +03/04/2022 16:56:47 - INFO - codeparrot_training - Step 23399: {'lr': 0.0004746491190202983, 'samples': 11980800, 'steps': 23399, 'loss/train': 1.219398856163025} +03/04/2022 16:56:50 - INFO - codeparrot_training - Skipping example with length 607 (seq_length=1024) +03/04/2022 16:56:53 - INFO - codeparrot_training - Step 23400: {'lr': 0.00047464679049765926, 'samples': 11981312, 'steps': 23400, 'loss/train': 1.648595929145813} +03/04/2022 16:56:56 - INFO - codeparrot_training - Step 23401: {'lr': 0.00047464446187379787, 'samples': 11981824, 'steps': 23401, 'loss/train': 2.1689915657043457} +03/04/2022 16:56:59 - INFO - codeparrot_training - Step 23402: {'lr': 0.00047464213314871514, 'samples': 11982336, 'steps': 23402, 'loss/train': 0.6996098160743713} +03/04/2022 16:56:59 - INFO - codeparrot_training - Skipping example with length 603 (seq_length=1024) +03/04/2022 16:57:05 - INFO - codeparrot_training - Step 23403: {'lr': 0.0004746398043224122, 'samples': 11982848, 'steps': 23403, 'loss/train': 1.2064995765686035} +03/04/2022 16:57:08 - INFO - codeparrot_training - Step 23404: {'lr': 0.0004746374753948899, 'samples': 11983360, 'steps': 23404, 'loss/train': 0.9354748129844666} +03/04/2022 16:57:08 - INFO - codeparrot_training - Skipping example with length 798 (seq_length=1024) +03/04/2022 16:57:13 - INFO - codeparrot_training - Step 23405: {'lr': 0.00047463514636614945, 'samples': 11983872, 'steps': 23405, 'loss/train': 2.2265141010284424} +03/04/2022 16:57:16 - INFO - codeparrot_training - Step 23406: {'lr': 0.00047463281723619203, 'samples': 11984384, 'steps': 23406, 'loss/train': 2.0179860591888428} +03/04/2022 16:57:18 - INFO - codeparrot_training - Skipping example with length 935 (seq_length=1024) +03/04/2022 16:57:22 - INFO - codeparrot_training - Step 23407: {'lr': 0.00047463048800501837, 'samples': 11984896, 'steps': 23407, 'loss/train': 1.6085623502731323} +03/04/2022 16:57:25 - INFO - codeparrot_training - Step 23408: {'lr': 0.00047462815867262967, 'samples': 11985408, 'steps': 23408, 'loss/train': 0.5775954127311707} +03/04/2022 16:57:26 - INFO - codeparrot_training - Skipping example with length 701 (seq_length=1024) +03/04/2022 16:57:30 - INFO - codeparrot_training - Step 23409: {'lr': 0.0004746258292390271, 'samples': 11985920, 'steps': 23409, 'loss/train': 2.027233123779297} +03/04/2022 16:57:33 - INFO - codeparrot_training - Step 23410: {'lr': 0.00047462349970421147, 'samples': 11986432, 'steps': 23410, 'loss/train': 1.7761943340301514} +03/04/2022 16:57:35 - INFO - codeparrot_training - Skipping example with length 394 (seq_length=1024) +03/04/2022 16:57:39 - INFO - codeparrot_training - Step 23411: {'lr': 0.0004746211700681841, 'samples': 11986944, 'steps': 23411, 'loss/train': 2.169621706008911} +03/04/2022 16:57:42 - INFO - codeparrot_training - Step 23412: {'lr': 0.0004746188403309457, 'samples': 11987456, 'steps': 23412, 'loss/train': 2.430370807647705} +03/04/2022 16:57:43 - INFO - codeparrot_training - Skipping example with length 836 (seq_length=1024) +03/04/2022 16:57:47 - INFO - codeparrot_training - Step 23413: {'lr': 0.00047461651049249764, 'samples': 11987968, 'steps': 23413, 'loss/train': 2.0490705966949463} +03/04/2022 16:57:50 - INFO - codeparrot_training - Step 23414: {'lr': 0.0004746141805528409, 'samples': 11988480, 'steps': 23414, 'loss/train': 2.2713117599487305} +03/04/2022 16:57:51 - INFO - codeparrot_training - Skipping example with length 61 (seq_length=1024) +03/04/2022 16:57:55 - INFO - codeparrot_training - Step 23415: {'lr': 0.00047461185051197644, 'samples': 11988992, 'steps': 23415, 'loss/train': 3.4521114826202393} +03/04/2022 16:57:59 - INFO - codeparrot_training - Step 23416: {'lr': 0.0004746095203699053, 'samples': 11989504, 'steps': 23416, 'loss/train': 0.10956083238124847} +03/04/2022 16:58:00 - INFO - codeparrot_training - Skipping example with length 45 (seq_length=1024) +03/04/2022 16:58:04 - INFO - codeparrot_training - Step 23417: {'lr': 0.00047460719012662857, 'samples': 11990016, 'steps': 23417, 'loss/train': 1.0276734828948975} +03/04/2022 16:58:07 - INFO - codeparrot_training - Step 23418: {'lr': 0.00047460485978214733, 'samples': 11990528, 'steps': 23418, 'loss/train': 2.5082268714904785} +03/04/2022 16:58:09 - INFO - codeparrot_training - Skipping example with length 171 (seq_length=1024) +03/04/2022 16:58:12 - INFO - codeparrot_training - Step 23419: {'lr': 0.00047460252933646265, 'samples': 11991040, 'steps': 23419, 'loss/train': 1.8034281730651855} +03/04/2022 16:58:16 - INFO - codeparrot_training - Step 23420: {'lr': 0.0004746001987895755, 'samples': 11991552, 'steps': 23420, 'loss/train': 2.1163768768310547} +03/04/2022 16:58:18 - INFO - codeparrot_training - Skipping example with length 601 (seq_length=1024) +03/04/2022 16:58:21 - INFO - codeparrot_training - Step 23421: {'lr': 0.00047459786814148697, 'samples': 11992064, 'steps': 23421, 'loss/train': 1.9776972532272339} +03/04/2022 16:58:24 - INFO - codeparrot_training - Step 23422: {'lr': 0.0004745955373921981, 'samples': 11992576, 'steps': 23422, 'loss/train': 1.6589242219924927} +03/04/2022 16:58:27 - INFO - codeparrot_training - Skipping example with length 640 (seq_length=1024) +03/04/2022 16:58:29 - INFO - codeparrot_training - Step 23423: {'lr': 0.0004745932065417099, 'samples': 11993088, 'steps': 23423, 'loss/train': 1.3764723539352417} +03/04/2022 16:58:32 - INFO - codeparrot_training - Step 23424: {'lr': 0.00047459087559002355, 'samples': 11993600, 'steps': 23424, 'loss/train': 2.5065011978149414} +03/04/2022 16:58:35 - INFO - codeparrot_training - Skipping example with length 578 (seq_length=1024) +03/04/2022 16:58:38 - INFO - codeparrot_training - Step 23425: {'lr': 0.00047458854453713995, 'samples': 11994112, 'steps': 23425, 'loss/train': 1.5518229007720947} +03/04/2022 16:58:41 - INFO - codeparrot_training - Step 23426: {'lr': 0.0004745862133830603, 'samples': 11994624, 'steps': 23426, 'loss/train': 2.317072868347168} +03/04/2022 16:58:44 - INFO - codeparrot_training - Step 23427: {'lr': 0.00047458388212778547, 'samples': 11995136, 'steps': 23427, 'loss/train': 2.2092509269714355} +03/04/2022 16:58:44 - INFO - codeparrot_training - Skipping example with length 418 (seq_length=1024) +03/04/2022 16:58:50 - INFO - codeparrot_training - Step 23428: {'lr': 0.00047458155077131664, 'samples': 11995648, 'steps': 23428, 'loss/train': 1.8494211435317993} +03/04/2022 16:58:52 - INFO - codeparrot_training - Skipping example with length 622 (seq_length=1024) +03/04/2022 16:58:55 - INFO - codeparrot_training - Step 23429: {'lr': 0.0004745792193136549, 'samples': 11996160, 'steps': 23429, 'loss/train': 2.2434237003326416} +03/04/2022 16:58:58 - INFO - codeparrot_training - Step 23430: {'lr': 0.00047457688775480114, 'samples': 11996672, 'steps': 23430, 'loss/train': 0.8261879086494446} +03/04/2022 16:59:01 - INFO - codeparrot_training - Skipping example with length 204 (seq_length=1024) +03/04/2022 16:59:03 - INFO - codeparrot_training - Step 23431: {'lr': 0.0004745745560947565, 'samples': 11997184, 'steps': 23431, 'loss/train': 1.5480263233184814} +03/04/2022 16:59:06 - INFO - codeparrot_training - Step 23432: {'lr': 0.0004745722243335221, 'samples': 11997696, 'steps': 23432, 'loss/train': 1.6461553573608398} +03/04/2022 16:59:09 - INFO - codeparrot_training - Skipping example with length 399 (seq_length=1024) +03/04/2022 16:59:12 - INFO - codeparrot_training - Step 23433: {'lr': 0.0004745698924710988, 'samples': 11998208, 'steps': 23433, 'loss/train': 2.0817010402679443} +03/04/2022 16:59:15 - INFO - codeparrot_training - Step 23434: {'lr': 0.00047456756050748793, 'samples': 11998720, 'steps': 23434, 'loss/train': 1.2123945951461792} +03/04/2022 16:59:18 - INFO - codeparrot_training - Step 23435: {'lr': 0.0004745652284426903, 'samples': 11999232, 'steps': 23435, 'loss/train': 0.5027723908424377} +03/04/2022 16:59:19 - INFO - codeparrot_training - Skipping example with length 484 (seq_length=1024) +03/04/2022 16:59:24 - INFO - codeparrot_training - Step 23436: {'lr': 0.00047456289627670703, 'samples': 11999744, 'steps': 23436, 'loss/train': 1.8438464403152466} +03/04/2022 16:59:27 - INFO - codeparrot_training - Step 23437: {'lr': 0.0004745605640095392, 'samples': 12000256, 'steps': 23437, 'loss/train': 1.4618467092514038} +03/04/2022 16:59:27 - INFO - codeparrot_training - Skipping example with length 151 (seq_length=1024) +03/04/2022 16:59:32 - INFO - codeparrot_training - Step 23438: {'lr': 0.00047455823164118787, 'samples': 12000768, 'steps': 23438, 'loss/train': 2.0180745124816895} +03/04/2022 16:59:35 - INFO - codeparrot_training - Step 23439: {'lr': 0.00047455589917165406, 'samples': 12001280, 'steps': 23439, 'loss/train': 2.3782050609588623} +03/04/2022 16:59:35 - INFO - codeparrot_training - Skipping example with length 896 (seq_length=1024) +03/04/2022 16:59:40 - INFO - codeparrot_training - Step 23440: {'lr': 0.00047455356660093886, 'samples': 12001792, 'steps': 23440, 'loss/train': 1.8602378368377686} +03/04/2022 16:59:44 - INFO - codeparrot_training - Step 23441: {'lr': 0.0004745512339290432, 'samples': 12002304, 'steps': 23441, 'loss/train': 1.1343469619750977} +03/04/2022 16:59:44 - INFO - codeparrot_training - Skipping example with length 701 (seq_length=1024) +03/04/2022 16:59:49 - INFO - codeparrot_training - Step 23442: {'lr': 0.00047454890115596824, 'samples': 12002816, 'steps': 23442, 'loss/train': 1.3163840770721436} +03/04/2022 16:59:52 - INFO - codeparrot_training - Step 23443: {'lr': 0.00047454656828171504, 'samples': 12003328, 'steps': 23443, 'loss/train': 1.6837388277053833} +03/04/2022 16:59:52 - INFO - codeparrot_training - Skipping example with length 196 (seq_length=1024) +03/04/2022 16:59:57 - INFO - codeparrot_training - Step 23444: {'lr': 0.0004745442353062846, 'samples': 12003840, 'steps': 23444, 'loss/train': 1.6450188159942627} +03/04/2022 17:00:00 - INFO - codeparrot_training - Skipping example with length 54 (seq_length=1024) +03/04/2022 17:00:03 - INFO - codeparrot_training - Step 23445: {'lr': 0.000474541902229678, 'samples': 12004352, 'steps': 23445, 'loss/train': 1.9965230226516724} +03/04/2022 17:00:06 - INFO - codeparrot_training - Step 23446: {'lr': 0.0004745395690518963, 'samples': 12004864, 'steps': 23446, 'loss/train': 2.395139217376709} +03/04/2022 17:00:09 - INFO - codeparrot_training - Step 23447: {'lr': 0.0004745372357729405, 'samples': 12005376, 'steps': 23447, 'loss/train': 1.1257965564727783} +03/04/2022 17:00:10 - INFO - codeparrot_training - Skipping example with length 455 (seq_length=1024) +03/04/2022 17:00:14 - INFO - codeparrot_training - Step 23448: {'lr': 0.0004745349023928117, 'samples': 12005888, 'steps': 23448, 'loss/train': 1.3181939125061035} +03/04/2022 17:00:18 - INFO - codeparrot_training - Step 23449: {'lr': 0.000474532568911511, 'samples': 12006400, 'steps': 23449, 'loss/train': 1.7687578201293945} +03/04/2022 17:00:18 - INFO - codeparrot_training - Skipping example with length 441 (seq_length=1024) +03/04/2022 17:00:23 - INFO - codeparrot_training - Step 23450: {'lr': 0.00047453023532903927, 'samples': 12006912, 'steps': 23450, 'loss/train': 0.8469099402427673} +03/04/2022 17:00:26 - INFO - codeparrot_training - Step 23451: {'lr': 0.00047452790164539775, 'samples': 12007424, 'steps': 23451, 'loss/train': 1.7994046211242676} +03/04/2022 17:00:27 - INFO - codeparrot_training - Skipping example with length 852 (seq_length=1024) +03/04/2022 17:00:31 - INFO - codeparrot_training - Step 23452: {'lr': 0.00047452556786058744, 'samples': 12007936, 'steps': 23452, 'loss/train': 3.012195348739624} +03/04/2022 17:00:35 - INFO - codeparrot_training - Step 23453: {'lr': 0.0004745232339746094, 'samples': 12008448, 'steps': 23453, 'loss/train': 1.2046812772750854} +03/04/2022 17:00:35 - INFO - codeparrot_training - Skipping example with length 886 (seq_length=1024) +03/04/2022 17:00:40 - INFO - codeparrot_training - Step 23454: {'lr': 0.00047452089998746463, 'samples': 12008960, 'steps': 23454, 'loss/train': 1.482425570487976} +03/04/2022 17:00:43 - INFO - codeparrot_training - Step 23455: {'lr': 0.0004745185658991541, 'samples': 12009472, 'steps': 23455, 'loss/train': 1.8183541297912598} +03/04/2022 17:00:44 - INFO - codeparrot_training - Skipping example with length 222 (seq_length=1024) +03/04/2022 17:00:49 - INFO - codeparrot_training - Step 23456: {'lr': 0.0004745162317096791, 'samples': 12009984, 'steps': 23456, 'loss/train': 2.0668609142303467} +03/04/2022 17:00:52 - INFO - codeparrot_training - Step 23457: {'lr': 0.0004745138974190405, 'samples': 12010496, 'steps': 23457, 'loss/train': 1.8630073070526123} +03/04/2022 17:00:53 - INFO - codeparrot_training - Skipping example with length 812 (seq_length=1024) +03/04/2022 17:00:57 - INFO - codeparrot_training - Step 23458: {'lr': 0.0004745115630272394, 'samples': 12011008, 'steps': 23458, 'loss/train': 2.25524640083313} +03/04/2022 17:01:00 - INFO - codeparrot_training - Step 23459: {'lr': 0.00047450922853427686, 'samples': 12011520, 'steps': 23459, 'loss/train': 2.0050432682037354} +03/04/2022 17:01:01 - INFO - codeparrot_training - Skipping example with length 596 (seq_length=1024) +03/04/2022 17:01:06 - INFO - codeparrot_training - Step 23460: {'lr': 0.0004745068939401539, 'samples': 12012032, 'steps': 23460, 'loss/train': 1.227063536643982} +03/04/2022 17:01:09 - INFO - codeparrot_training - Step 23461: {'lr': 0.0004745045592448717, 'samples': 12012544, 'steps': 23461, 'loss/train': 1.941652536392212} +03/04/2022 17:01:10 - INFO - codeparrot_training - Skipping example with length 343 (seq_length=1024) +03/04/2022 17:01:14 - INFO - codeparrot_training - Step 23462: {'lr': 0.00047450222444843105, 'samples': 12013056, 'steps': 23462, 'loss/train': 1.8349345922470093} +03/04/2022 17:01:17 - INFO - codeparrot_training - Step 23463: {'lr': 0.0004744998895508333, 'samples': 12013568, 'steps': 23463, 'loss/train': 2.3058278560638428} +03/04/2022 17:01:18 - INFO - codeparrot_training - Skipping example with length 873 (seq_length=1024) +03/04/2022 17:01:22 - INFO - codeparrot_training - Step 23464: {'lr': 0.0004744975545520793, 'samples': 12014080, 'steps': 23464, 'loss/train': 1.762920618057251} +03/04/2022 17:01:25 - INFO - codeparrot_training - Step 23465: {'lr': 0.00047449521945217016, 'samples': 12014592, 'steps': 23465, 'loss/train': 2.111161708831787} +03/04/2022 17:01:26 - INFO - codeparrot_training - Skipping example with length 158 (seq_length=1024) +03/04/2022 17:01:31 - INFO - codeparrot_training - Step 23466: {'lr': 0.00047449288425110693, 'samples': 12015104, 'steps': 23466, 'loss/train': 1.8523049354553223} +03/04/2022 17:01:34 - INFO - codeparrot_training - Step 23467: {'lr': 0.00047449054894889073, 'samples': 12015616, 'steps': 23467, 'loss/train': 2.217637062072754} +03/04/2022 17:01:35 - INFO - codeparrot_training - Skipping example with length 985 (seq_length=1024) +03/04/2022 17:01:39 - INFO - codeparrot_training - Step 23468: {'lr': 0.00047448821354552253, 'samples': 12016128, 'steps': 23468, 'loss/train': 1.8780640363693237} +03/04/2022 17:01:42 - INFO - codeparrot_training - Step 23469: {'lr': 0.0004744858780410034, 'samples': 12016640, 'steps': 23469, 'loss/train': 1.4454163312911987} +03/04/2022 17:01:43 - INFO - codeparrot_training - Skipping example with length 931 (seq_length=1024) +03/04/2022 17:01:48 - INFO - codeparrot_training - Step 23470: {'lr': 0.0004744835424353344, 'samples': 12017152, 'steps': 23470, 'loss/train': 1.566304087638855} +03/04/2022 17:01:51 - INFO - codeparrot_training - Step 23471: {'lr': 0.00047448120672851653, 'samples': 12017664, 'steps': 23471, 'loss/train': 1.8868640661239624} +03/04/2022 17:01:51 - INFO - codeparrot_training - Skipping example with length 560 (seq_length=1024) +03/04/2022 17:01:56 - INFO - codeparrot_training - Step 23472: {'lr': 0.0004744788709205509, 'samples': 12018176, 'steps': 23472, 'loss/train': 2.040874481201172} +03/04/2022 17:01:59 - INFO - codeparrot_training - Step 23473: {'lr': 0.0004744765350114386, 'samples': 12018688, 'steps': 23473, 'loss/train': 1.7099310159683228} +03/04/2022 17:01:59 - INFO - codeparrot_training - Skipping example with length 90 (seq_length=1024) +03/04/2022 17:02:04 - INFO - codeparrot_training - Step 23474: {'lr': 0.00047447419900118067, 'samples': 12019200, 'steps': 23474, 'loss/train': 1.608444094657898} +03/04/2022 17:02:07 - INFO - codeparrot_training - Step 23475: {'lr': 0.00047447186288977804, 'samples': 12019712, 'steps': 23475, 'loss/train': 2.0884017944335938} +03/04/2022 17:02:08 - INFO - codeparrot_training - Skipping example with length 125 (seq_length=1024) +03/04/2022 17:02:13 - INFO - codeparrot_training - Step 23476: {'lr': 0.0004744695266772319, 'samples': 12020224, 'steps': 23476, 'loss/train': 1.5034143924713135} +03/04/2022 17:02:16 - INFO - codeparrot_training - Step 23477: {'lr': 0.00047446719036354324, 'samples': 12020736, 'steps': 23477, 'loss/train': 1.3621755838394165} +03/04/2022 17:02:16 - INFO - codeparrot_training - Skipping example with length 747 (seq_length=1024) +03/04/2022 17:02:21 - INFO - codeparrot_training - Step 23478: {'lr': 0.0004744648539487132, 'samples': 12021248, 'steps': 23478, 'loss/train': 1.4267070293426514} +03/04/2022 17:02:24 - INFO - codeparrot_training - Step 23479: {'lr': 0.00047446251743274263, 'samples': 12021760, 'steps': 23479, 'loss/train': 1.8810547590255737} +03/04/2022 17:02:24 - INFO - codeparrot_training - Skipping example with length 145 (seq_length=1024) +03/04/2022 17:02:30 - INFO - codeparrot_training - Step 23480: {'lr': 0.0004744601808156328, 'samples': 12022272, 'steps': 23480, 'loss/train': 0.9832499027252197} +03/04/2022 17:02:33 - INFO - codeparrot_training - Step 23481: {'lr': 0.00047445784409738467, 'samples': 12022784, 'steps': 23481, 'loss/train': 1.813855767250061} +03/04/2022 17:02:33 - INFO - codeparrot_training - Skipping example with length 837 (seq_length=1024) +03/04/2022 17:02:38 - INFO - codeparrot_training - Step 23482: {'lr': 0.0004744555072779993, 'samples': 12023296, 'steps': 23482, 'loss/train': 1.9193674325942993} +03/04/2022 17:02:41 - INFO - codeparrot_training - Step 23483: {'lr': 0.0004744531703574777, 'samples': 12023808, 'steps': 23483, 'loss/train': 1.9211030006408691} +03/04/2022 17:02:41 - INFO - codeparrot_training - Skipping example with length 569 (seq_length=1024) +03/04/2022 17:02:46 - INFO - codeparrot_training - Step 23484: {'lr': 0.00047445083333582104, 'samples': 12024320, 'steps': 23484, 'loss/train': 1.236321210861206} +03/04/2022 17:02:49 - INFO - codeparrot_training - Skipping example with length 143 (seq_length=1024) +03/04/2022 17:02:52 - INFO - codeparrot_training - Step 23485: {'lr': 0.00047444849621303023, 'samples': 12024832, 'steps': 23485, 'loss/train': 2.413282632827759} +03/04/2022 17:02:55 - INFO - codeparrot_training - Step 23486: {'lr': 0.00047444615898910644, 'samples': 12025344, 'steps': 23486, 'loss/train': 1.1261612176895142} +03/04/2022 17:02:58 - INFO - codeparrot_training - Skipping example with length 202 (seq_length=1024) +03/04/2022 17:03:00 - INFO - codeparrot_training - Step 23487: {'lr': 0.00047444382166405067, 'samples': 12025856, 'steps': 23487, 'loss/train': 1.9765197038650513} +03/04/2022 17:03:03 - INFO - codeparrot_training - Step 23488: {'lr': 0.0004744414842378639, 'samples': 12026368, 'steps': 23488, 'loss/train': 1.2254644632339478} +03/04/2022 17:03:06 - INFO - codeparrot_training - Skipping example with length 597 (seq_length=1024) +03/04/2022 17:03:09 - INFO - codeparrot_training - Step 23489: {'lr': 0.0004744391467105473, 'samples': 12026880, 'steps': 23489, 'loss/train': 1.5679121017456055} +03/04/2022 17:03:12 - INFO - codeparrot_training - Step 23490: {'lr': 0.00047443680908210194, 'samples': 12027392, 'steps': 23490, 'loss/train': 2.325765371322632} +03/04/2022 17:03:15 - INFO - codeparrot_training - Step 23491: {'lr': 0.00047443447135252876, 'samples': 12027904, 'steps': 23491, 'loss/train': 1.0986268520355225} +03/04/2022 17:03:15 - INFO - codeparrot_training - Skipping example with length 648 (seq_length=1024) +03/04/2022 17:03:20 - INFO - codeparrot_training - Step 23492: {'lr': 0.0004744321335218289, 'samples': 12028416, 'steps': 23492, 'loss/train': 2.4800143241882324} +03/04/2022 17:03:23 - INFO - codeparrot_training - Skipping example with length 904 (seq_length=1024) +03/04/2022 17:03:26 - INFO - codeparrot_training - Step 23493: {'lr': 0.0004744297955900034, 'samples': 12028928, 'steps': 23493, 'loss/train': 1.912929892539978} +03/04/2022 17:03:29 - INFO - codeparrot_training - Step 23494: {'lr': 0.00047442745755705326, 'samples': 12029440, 'steps': 23494, 'loss/train': 1.6387978792190552} +03/04/2022 17:03:32 - INFO - codeparrot_training - Step 23495: {'lr': 0.00047442511942297953, 'samples': 12029952, 'steps': 23495, 'loss/train': 2.175175189971924} +03/04/2022 17:03:32 - INFO - codeparrot_training - Skipping example with length 644 (seq_length=1024) +03/04/2022 17:03:37 - INFO - codeparrot_training - Step 23496: {'lr': 0.00047442278118778336, 'samples': 12030464, 'steps': 23496, 'loss/train': 1.7177956104278564} +03/04/2022 17:03:40 - INFO - codeparrot_training - Skipping example with length 301 (seq_length=1024) +03/04/2022 17:03:42 - INFO - codeparrot_training - Step 23497: {'lr': 0.0004744204428514658, 'samples': 12030976, 'steps': 23497, 'loss/train': 1.0397047996520996} +03/04/2022 17:03:46 - INFO - codeparrot_training - Step 23498: {'lr': 0.00047441810441402777, 'samples': 12031488, 'steps': 23498, 'loss/train': 1.153900146484375} +03/04/2022 17:03:48 - INFO - codeparrot_training - Skipping example with length 163 (seq_length=1024) +03/04/2022 17:03:51 - INFO - codeparrot_training - Step 23499: {'lr': 0.0004744157658754704, 'samples': 12032000, 'steps': 23499, 'loss/train': 1.704856038093567} +03/04/2022 17:03:54 - INFO - codeparrot_training - Step 23500: {'lr': 0.0004744134272357948, 'samples': 12032512, 'steps': 23500, 'loss/train': 1.55928635597229} +03/04/2022 17:03:56 - INFO - codeparrot_training - Skipping example with length 766 (seq_length=1024) +03/04/2022 17:03:59 - INFO - codeparrot_training - Step 23501: {'lr': 0.0004744110884950019, 'samples': 12033024, 'steps': 23501, 'loss/train': 2.0780394077301025} +03/04/2022 17:04:02 - INFO - codeparrot_training - Step 23502: {'lr': 0.00047440874965309286, 'samples': 12033536, 'steps': 23502, 'loss/train': 1.1428110599517822} +03/04/2022 17:04:05 - INFO - codeparrot_training - Skipping example with length 956 (seq_length=1024) +03/04/2022 17:04:08 - INFO - codeparrot_training - Step 23503: {'lr': 0.00047440641071006874, 'samples': 12034048, 'steps': 23503, 'loss/train': 1.0073317289352417} +03/04/2022 17:04:11 - INFO - codeparrot_training - Step 23504: {'lr': 0.00047440407166593056, 'samples': 12034560, 'steps': 23504, 'loss/train': 1.7784956693649292} +03/04/2022 17:04:13 - INFO - codeparrot_training - Skipping example with length 625 (seq_length=1024) +03/04/2022 17:04:16 - INFO - codeparrot_training - Step 23505: {'lr': 0.0004744017325206793, 'samples': 12035072, 'steps': 23505, 'loss/train': 2.369910478591919} +03/04/2022 17:04:19 - INFO - codeparrot_training - Step 23506: {'lr': 0.00047439939327431613, 'samples': 12035584, 'steps': 23506, 'loss/train': 2.3670477867126465} +03/04/2022 17:04:23 - INFO - codeparrot_training - Step 23507: {'lr': 0.0004743970539268421, 'samples': 12036096, 'steps': 23507, 'loss/train': 0.7686943411827087} +03/04/2022 17:04:23 - INFO - codeparrot_training - Skipping example with length 485 (seq_length=1024) +03/04/2022 17:04:28 - INFO - codeparrot_training - Step 23508: {'lr': 0.00047439471447825813, 'samples': 12036608, 'steps': 23508, 'loss/train': 2.1253182888031006} +03/04/2022 17:04:31 - INFO - codeparrot_training - Step 23509: {'lr': 0.00047439237492856543, 'samples': 12037120, 'steps': 23509, 'loss/train': 2.4260661602020264} +03/04/2022 17:04:31 - INFO - codeparrot_training - Skipping example with length 651 (seq_length=1024) +03/04/2022 17:04:36 - INFO - codeparrot_training - Step 23510: {'lr': 0.0004743900352777649, 'samples': 12037632, 'steps': 23510, 'loss/train': 2.1083579063415527} +03/04/2022 17:04:39 - INFO - codeparrot_training - Step 23511: {'lr': 0.0004743876955258578, 'samples': 12038144, 'steps': 23511, 'loss/train': 0.2786150872707367} +03/04/2022 17:04:40 - INFO - codeparrot_training - Skipping example with length 599 (seq_length=1024) +03/04/2022 17:04:45 - INFO - codeparrot_training - Step 23512: {'lr': 0.00047438535567284504, 'samples': 12038656, 'steps': 23512, 'loss/train': 2.179335355758667} +03/04/2022 17:04:48 - INFO - codeparrot_training - Step 23513: {'lr': 0.00047438301571872763, 'samples': 12039168, 'steps': 23513, 'loss/train': 2.132192611694336} +03/04/2022 17:04:48 - INFO - codeparrot_training - Skipping example with length 281 (seq_length=1024) +03/04/2022 17:04:53 - INFO - codeparrot_training - Step 23514: {'lr': 0.00047438067566350675, 'samples': 12039680, 'steps': 23514, 'loss/train': 1.8619375228881836} +03/04/2022 17:04:56 - INFO - codeparrot_training - Step 23515: {'lr': 0.00047437833550718336, 'samples': 12040192, 'steps': 23515, 'loss/train': 2.378436803817749} +03/04/2022 17:04:57 - INFO - codeparrot_training - Skipping example with length 1003 (seq_length=1024) +03/04/2022 17:05:02 - INFO - codeparrot_training - Step 23516: {'lr': 0.0004743759952497586, 'samples': 12040704, 'steps': 23516, 'loss/train': 0.8898215889930725} +03/04/2022 17:05:05 - INFO - codeparrot_training - Step 23517: {'lr': 0.0004743736548912334, 'samples': 12041216, 'steps': 23517, 'loss/train': 1.524056315422058} +03/04/2022 17:05:05 - INFO - codeparrot_training - Skipping example with length 105 (seq_length=1024) +03/04/2022 17:05:10 - INFO - codeparrot_training - Step 23518: {'lr': 0.00047437131443160897, 'samples': 12041728, 'steps': 23518, 'loss/train': 1.9366751909255981} +03/04/2022 17:05:13 - INFO - codeparrot_training - Step 23519: {'lr': 0.0004743689738708863, 'samples': 12042240, 'steps': 23519, 'loss/train': 1.8843666315078735} +03/04/2022 17:05:14 - INFO - codeparrot_training - Skipping example with length 388 (seq_length=1024) +03/04/2022 17:05:18 - INFO - codeparrot_training - Step 23520: {'lr': 0.0004743666332090664, 'samples': 12042752, 'steps': 23520, 'loss/train': 2.3018317222595215} +03/04/2022 17:05:22 - INFO - codeparrot_training - Step 23521: {'lr': 0.00047436429244615037, 'samples': 12043264, 'steps': 23521, 'loss/train': 2.0193848609924316} +03/04/2022 17:05:22 - INFO - codeparrot_training - Skipping example with length 765 (seq_length=1024) +03/04/2022 17:05:27 - INFO - codeparrot_training - Step 23522: {'lr': 0.0004743619515821392, 'samples': 12043776, 'steps': 23522, 'loss/train': 2.4107556343078613} +03/04/2022 17:05:30 - INFO - codeparrot_training - Step 23523: {'lr': 0.00047435961061703403, 'samples': 12044288, 'steps': 23523, 'loss/train': 1.341386079788208} +03/04/2022 17:05:30 - INFO - codeparrot_training - Skipping example with length 442 (seq_length=1024) +03/04/2022 17:05:35 - INFO - codeparrot_training - Step 23524: {'lr': 0.00047435726955083593, 'samples': 12044800, 'steps': 23524, 'loss/train': 2.6173596382141113} +03/04/2022 17:05:39 - INFO - codeparrot_training - Step 23525: {'lr': 0.0004743549283835459, 'samples': 12045312, 'steps': 23525, 'loss/train': 1.7510402202606201} +03/04/2022 17:05:39 - INFO - codeparrot_training - Skipping example with length 413 (seq_length=1024) +03/04/2022 17:05:44 - INFO - codeparrot_training - Step 23526: {'lr': 0.00047435258711516496, 'samples': 12045824, 'steps': 23526, 'loss/train': 1.5542188882827759} +03/04/2022 17:05:47 - INFO - codeparrot_training - Step 23527: {'lr': 0.0004743502457456942, 'samples': 12046336, 'steps': 23527, 'loss/train': 2.670391798019409} +03/04/2022 17:05:47 - INFO - codeparrot_training - Skipping example with length 170 (seq_length=1024) +03/04/2022 17:05:52 - INFO - codeparrot_training - Step 23528: {'lr': 0.0004743479042751347, 'samples': 12046848, 'steps': 23528, 'loss/train': 1.172585368156433} +03/04/2022 17:05:55 - INFO - codeparrot_training - Step 23529: {'lr': 0.0004743455627034875, 'samples': 12047360, 'steps': 23529, 'loss/train': 2.2008514404296875} +03/04/2022 17:05:55 - INFO - codeparrot_training - Skipping example with length 303 (seq_length=1024) +03/04/2022 17:06:01 - INFO - codeparrot_training - Step 23530: {'lr': 0.0004743432210307536, 'samples': 12047872, 'steps': 23530, 'loss/train': 1.9725878238677979} +03/04/2022 17:06:04 - INFO - codeparrot_training - Step 23531: {'lr': 0.00047434087925693415, 'samples': 12048384, 'steps': 23531, 'loss/train': 1.465010166168213} +03/04/2022 17:06:04 - INFO - codeparrot_training - Skipping example with length 656 (seq_length=1024) +03/04/2022 17:06:09 - INFO - codeparrot_training - Step 23532: {'lr': 0.00047433853738203013, 'samples': 12048896, 'steps': 23532, 'loss/train': 1.5991606712341309} +03/04/2022 17:06:12 - INFO - codeparrot_training - Step 23533: {'lr': 0.00047433619540604264, 'samples': 12049408, 'steps': 23533, 'loss/train': 1.849231481552124} +03/04/2022 17:06:12 - INFO - codeparrot_training - Skipping example with length 705 (seq_length=1024) +03/04/2022 17:06:18 - INFO - codeparrot_training - Step 23534: {'lr': 0.0004743338533289728, 'samples': 12049920, 'steps': 23534, 'loss/train': 0.566605269908905} +03/04/2022 17:06:21 - INFO - codeparrot_training - Step 23535: {'lr': 0.0004743315111508215, 'samples': 12050432, 'steps': 23535, 'loss/train': 1.4799652099609375} +03/04/2022 17:06:21 - INFO - codeparrot_training - Skipping example with length 29 (seq_length=1024) +03/04/2022 17:06:26 - INFO - codeparrot_training - Step 23536: {'lr': 0.00047432916887158995, 'samples': 12050944, 'steps': 23536, 'loss/train': 2.2299134731292725} +03/04/2022 17:06:30 - INFO - codeparrot_training - Step 23537: {'lr': 0.00047432682649127913, 'samples': 12051456, 'steps': 23537, 'loss/train': 1.3016821146011353} +03/04/2022 17:06:30 - INFO - codeparrot_training - Skipping example with length 208 (seq_length=1024) +03/04/2022 17:06:35 - INFO - codeparrot_training - Step 23538: {'lr': 0.00047432448400989004, 'samples': 12051968, 'steps': 23538, 'loss/train': 1.8141227960586548} +03/04/2022 17:06:38 - INFO - codeparrot_training - Step 23539: {'lr': 0.0004743221414274238, 'samples': 12052480, 'steps': 23539, 'loss/train': 2.004836320877075} +03/04/2022 17:06:38 - INFO - codeparrot_training - Skipping example with length 192 (seq_length=1024) +03/04/2022 17:06:43 - INFO - codeparrot_training - Step 23540: {'lr': 0.00047431979874388154, 'samples': 12052992, 'steps': 23540, 'loss/train': 2.150920867919922} +03/04/2022 17:06:46 - INFO - codeparrot_training - Skipping example with length 728 (seq_length=1024) +03/04/2022 17:06:49 - INFO - codeparrot_training - Step 23541: {'lr': 0.0004743174559592642, 'samples': 12053504, 'steps': 23541, 'loss/train': 2.4125759601593018} +03/04/2022 17:06:52 - INFO - codeparrot_training - Step 23542: {'lr': 0.0004743151130735729, 'samples': 12054016, 'steps': 23542, 'loss/train': 1.8427445888519287} +03/04/2022 17:06:55 - INFO - codeparrot_training - Step 23543: {'lr': 0.0004743127700868086, 'samples': 12054528, 'steps': 23543, 'loss/train': 2.0841121673583984} +03/04/2022 17:06:55 - INFO - codeparrot_training - Skipping example with length 607 (seq_length=1024) +03/04/2022 17:07:00 - INFO - codeparrot_training - Step 23544: {'lr': 0.00047431042699897245, 'samples': 12055040, 'steps': 23544, 'loss/train': 2.097198963165283} +03/04/2022 17:07:04 - INFO - codeparrot_training - Step 23545: {'lr': 0.0004743080838100655, 'samples': 12055552, 'steps': 23545, 'loss/train': 1.3232982158660889} +03/04/2022 17:07:04 - INFO - codeparrot_training - Skipping example with length 835 (seq_length=1024) +03/04/2022 17:07:09 - INFO - codeparrot_training - Step 23546: {'lr': 0.0004743057405200888, 'samples': 12056064, 'steps': 23546, 'loss/train': 1.6844393014907837} +03/04/2022 17:07:12 - INFO - codeparrot_training - Skipping example with length 307 (seq_length=1024) +03/04/2022 17:07:14 - INFO - codeparrot_training - Step 23547: {'lr': 0.0004743033971290434, 'samples': 12056576, 'steps': 23547, 'loss/train': 2.1217081546783447} +03/04/2022 17:07:18 - INFO - codeparrot_training - Step 23548: {'lr': 0.00047430105363693034, 'samples': 12057088, 'steps': 23548, 'loss/train': 1.9872835874557495} +03/04/2022 17:07:20 - INFO - codeparrot_training - Skipping example with length 749 (seq_length=1024) +03/04/2022 17:07:23 - INFO - codeparrot_training - Step 23549: {'lr': 0.0004742987100437507, 'samples': 12057600, 'steps': 23549, 'loss/train': 2.7428267002105713} +03/04/2022 17:07:26 - INFO - codeparrot_training - Step 23550: {'lr': 0.00047429636634950545, 'samples': 12058112, 'steps': 23550, 'loss/train': 1.4733387231826782} +03/04/2022 17:07:29 - INFO - codeparrot_training - Step 23551: {'lr': 0.0004742940225541958, 'samples': 12058624, 'steps': 23551, 'loss/train': 2.3202245235443115} +03/04/2022 17:07:29 - INFO - codeparrot_training - Skipping example with length 450 (seq_length=1024) +03/04/2022 17:07:35 - INFO - codeparrot_training - Step 23552: {'lr': 0.0004742916786578227, 'samples': 12059136, 'steps': 23552, 'loss/train': 2.0435256958007812} +03/04/2022 17:07:38 - INFO - codeparrot_training - Step 23553: {'lr': 0.00047428933466038726, 'samples': 12059648, 'steps': 23553, 'loss/train': 1.517012119293213} +03/04/2022 17:07:38 - INFO - codeparrot_training - Skipping example with length 936 (seq_length=1024) +03/04/2022 17:07:43 - INFO - codeparrot_training - Step 23554: {'lr': 0.00047428699056189047, 'samples': 12060160, 'steps': 23554, 'loss/train': 0.7259073853492737} +03/04/2022 17:07:46 - INFO - codeparrot_training - Step 23555: {'lr': 0.0004742846463623334, 'samples': 12060672, 'steps': 23555, 'loss/train': 1.9352827072143555} +03/04/2022 17:07:46 - INFO - codeparrot_training - Skipping example with length 842 (seq_length=1024) +03/04/2022 17:07:52 - INFO - codeparrot_training - Step 23556: {'lr': 0.0004742823020617172, 'samples': 12061184, 'steps': 23556, 'loss/train': 1.9622342586517334} +03/04/2022 17:07:55 - INFO - codeparrot_training - Step 23557: {'lr': 0.0004742799576600427, 'samples': 12061696, 'steps': 23557, 'loss/train': 0.8845088481903076} +03/04/2022 17:07:55 - INFO - codeparrot_training - Skipping example with length 225 (seq_length=1024) +03/04/2022 17:08:00 - INFO - codeparrot_training - Step 23558: {'lr': 0.00047427761315731133, 'samples': 12062208, 'steps': 23558, 'loss/train': 1.967919111251831} +03/04/2022 17:08:03 - INFO - codeparrot_training - Step 23559: {'lr': 0.0004742752685535238, 'samples': 12062720, 'steps': 23559, 'loss/train': 2.3167214393615723} +03/04/2022 17:08:04 - INFO - codeparrot_training - Skipping example with length 708 (seq_length=1024) +03/04/2022 17:08:09 - INFO - codeparrot_training - Step 23560: {'lr': 0.00047427292384868134, 'samples': 12063232, 'steps': 23560, 'loss/train': 1.4588333368301392} +03/04/2022 17:08:12 - INFO - codeparrot_training - Step 23561: {'lr': 0.0004742705790427849, 'samples': 12063744, 'steps': 23561, 'loss/train': 1.6884890794754028} +03/04/2022 17:08:12 - INFO - codeparrot_training - Skipping example with length 301 (seq_length=1024) +03/04/2022 17:08:17 - INFO - codeparrot_training - Step 23562: {'lr': 0.00047426823413583563, 'samples': 12064256, 'steps': 23562, 'loss/train': 2.415215253829956} +03/04/2022 17:08:20 - INFO - codeparrot_training - Step 23563: {'lr': 0.0004742658891278346, 'samples': 12064768, 'steps': 23563, 'loss/train': 1.3076276779174805} +03/04/2022 17:08:21 - INFO - codeparrot_training - Skipping example with length 744 (seq_length=1024) +03/04/2022 17:08:25 - INFO - codeparrot_training - Step 23564: {'lr': 0.0004742635440187828, 'samples': 12065280, 'steps': 23564, 'loss/train': 2.0545005798339844} +03/04/2022 17:08:29 - INFO - codeparrot_training - Step 23565: {'lr': 0.00047426119880868123, 'samples': 12065792, 'steps': 23565, 'loss/train': 1.8653738498687744} +03/04/2022 17:08:29 - INFO - codeparrot_training - Skipping example with length 6 (seq_length=1024) +03/04/2022 17:08:34 - INFO - codeparrot_training - Step 23566: {'lr': 0.00047425885349753114, 'samples': 12066304, 'steps': 23566, 'loss/train': 1.3180036544799805} +03/04/2022 17:08:37 - INFO - codeparrot_training - Step 23567: {'lr': 0.0004742565080853334, 'samples': 12066816, 'steps': 23567, 'loss/train': 1.856196403503418} +03/04/2022 17:08:37 - INFO - codeparrot_training - Skipping example with length 757 (seq_length=1024) +03/04/2022 17:08:42 - INFO - codeparrot_training - Step 23568: {'lr': 0.00047425416257208916, 'samples': 12067328, 'steps': 23568, 'loss/train': 1.5974044799804688} +03/04/2022 17:08:45 - INFO - codeparrot_training - Step 23569: {'lr': 0.0004742518169577994, 'samples': 12067840, 'steps': 23569, 'loss/train': 2.042630910873413} +03/04/2022 17:08:46 - INFO - codeparrot_training - Skipping example with length 152 (seq_length=1024) +03/04/2022 17:08:51 - INFO - codeparrot_training - Step 23570: {'lr': 0.0004742494712424653, 'samples': 12068352, 'steps': 23570, 'loss/train': 1.7507457733154297} +03/04/2022 17:08:54 - INFO - codeparrot_training - Step 23571: {'lr': 0.0004742471254260878, 'samples': 12068864, 'steps': 23571, 'loss/train': 2.1467745304107666} +03/04/2022 17:08:54 - INFO - codeparrot_training - Skipping example with length 105 (seq_length=1024) +03/04/2022 17:08:59 - INFO - codeparrot_training - Step 23572: {'lr': 0.0004742447795086681, 'samples': 12069376, 'steps': 23572, 'loss/train': 1.8383021354675293} +03/04/2022 17:09:02 - INFO - codeparrot_training - Step 23573: {'lr': 0.00047424243349020705, 'samples': 12069888, 'steps': 23573, 'loss/train': 1.8616427183151245} +03/04/2022 17:09:03 - INFO - codeparrot_training - Skipping example with length 899 (seq_length=1024) +03/04/2022 17:09:08 - INFO - codeparrot_training - Step 23574: {'lr': 0.0004742400873707059, 'samples': 12070400, 'steps': 23574, 'loss/train': 1.8526434898376465} +03/04/2022 17:09:11 - INFO - codeparrot_training - Step 23575: {'lr': 0.0004742377411501656, 'samples': 12070912, 'steps': 23575, 'loss/train': 1.5271366834640503} +03/04/2022 17:09:11 - INFO - codeparrot_training - Skipping example with length 629 (seq_length=1024) +03/04/2022 17:09:16 - INFO - codeparrot_training - Step 23576: {'lr': 0.00047423539482858724, 'samples': 12071424, 'steps': 23576, 'loss/train': 0.7044313549995422} +03/04/2022 17:09:19 - INFO - codeparrot_training - Step 23577: {'lr': 0.0004742330484059718, 'samples': 12071936, 'steps': 23577, 'loss/train': 2.0854175090789795} +03/04/2022 17:09:20 - INFO - codeparrot_training - Skipping example with length 483 (seq_length=1024) +03/04/2022 17:09:25 - INFO - codeparrot_training - Step 23578: {'lr': 0.0004742307018823205, 'samples': 12072448, 'steps': 23578, 'loss/train': 2.156951904296875} +03/04/2022 17:09:28 - INFO - codeparrot_training - Step 23579: {'lr': 0.0004742283552576343, 'samples': 12072960, 'steps': 23579, 'loss/train': 0.9826322793960571} +03/04/2022 17:09:28 - INFO - codeparrot_training - Skipping example with length 744 (seq_length=1024) +03/04/2022 17:09:33 - INFO - codeparrot_training - Step 23580: {'lr': 0.0004742260085319142, 'samples': 12073472, 'steps': 23580, 'loss/train': 1.7294145822525024} +03/04/2022 17:09:36 - INFO - codeparrot_training - Step 23581: {'lr': 0.0004742236617051614, 'samples': 12073984, 'steps': 23581, 'loss/train': 2.805243968963623} +03/04/2022 17:09:37 - INFO - codeparrot_training - Skipping example with length 435 (seq_length=1024) +03/04/2022 17:09:41 - INFO - codeparrot_training - Step 23582: {'lr': 0.00047422131477737684, 'samples': 12074496, 'steps': 23582, 'loss/train': 1.8112674951553345} +03/04/2022 17:09:45 - INFO - codeparrot_training - Step 23583: {'lr': 0.00047421896774856156, 'samples': 12075008, 'steps': 23583, 'loss/train': 1.5441052913665771} +03/04/2022 17:09:45 - INFO - codeparrot_training - Skipping example with length 94 (seq_length=1024) +03/04/2022 17:09:50 - INFO - codeparrot_training - Step 23584: {'lr': 0.00047421662061871675, 'samples': 12075520, 'steps': 23584, 'loss/train': 1.4458791017532349} +03/04/2022 17:09:53 - INFO - codeparrot_training - Step 23585: {'lr': 0.0004742142733878433, 'samples': 12076032, 'steps': 23585, 'loss/train': 1.349284291267395} +03/04/2022 17:09:54 - INFO - codeparrot_training - Skipping example with length 697 (seq_length=1024) +03/04/2022 17:09:58 - INFO - codeparrot_training - Step 23586: {'lr': 0.0004742119260559424, 'samples': 12076544, 'steps': 23586, 'loss/train': 1.6640053987503052} +03/04/2022 17:10:01 - INFO - codeparrot_training - Step 23587: {'lr': 0.0004742095786230152, 'samples': 12077056, 'steps': 23587, 'loss/train': 1.9211853742599487} +03/04/2022 17:10:02 - INFO - codeparrot_training - Skipping example with length 855 (seq_length=1024) +03/04/2022 17:10:07 - INFO - codeparrot_training - Step 23588: {'lr': 0.00047420723108906247, 'samples': 12077568, 'steps': 23588, 'loss/train': 1.4528789520263672} +03/04/2022 17:10:10 - INFO - codeparrot_training - Step 23589: {'lr': 0.0004742048834540855, 'samples': 12078080, 'steps': 23589, 'loss/train': 0.8327783346176147} +03/04/2022 17:10:10 - INFO - codeparrot_training - Skipping example with length 710 (seq_length=1024) +03/04/2022 17:10:15 - INFO - codeparrot_training - Step 23590: {'lr': 0.0004742025357180852, 'samples': 12078592, 'steps': 23590, 'loss/train': 1.0128346681594849} +03/04/2022 17:10:18 - INFO - codeparrot_training - Step 23591: {'lr': 0.00047420018788106274, 'samples': 12079104, 'steps': 23591, 'loss/train': 1.473543643951416} +03/04/2022 17:10:19 - INFO - codeparrot_training - Skipping example with length 128 (seq_length=1024) +03/04/2022 17:10:24 - INFO - codeparrot_training - Step 23592: {'lr': 0.00047419783994301915, 'samples': 12079616, 'steps': 23592, 'loss/train': 2.0908610820770264} +03/04/2022 17:10:27 - INFO - codeparrot_training - Step 23593: {'lr': 0.0004741954919039554, 'samples': 12080128, 'steps': 23593, 'loss/train': 1.4416587352752686} +03/04/2022 17:10:27 - INFO - codeparrot_training - Skipping example with length 263 (seq_length=1024) +03/04/2022 17:10:32 - INFO - codeparrot_training - Step 23594: {'lr': 0.0004741931437638727, 'samples': 12080640, 'steps': 23594, 'loss/train': 2.2837343215942383} +03/04/2022 17:10:35 - INFO - codeparrot_training - Step 23595: {'lr': 0.000474190795522772, 'samples': 12081152, 'steps': 23595, 'loss/train': 1.3336920738220215} +03/04/2022 17:10:36 - INFO - codeparrot_training - Skipping example with length 302 (seq_length=1024) +03/04/2022 17:10:41 - INFO - codeparrot_training - Step 23596: {'lr': 0.00047418844718065433, 'samples': 12081664, 'steps': 23596, 'loss/train': 1.7545489072799683} +03/04/2022 17:10:44 - INFO - codeparrot_training - Step 23597: {'lr': 0.0004741860987375209, 'samples': 12082176, 'steps': 23597, 'loss/train': 2.0765738487243652} +03/04/2022 17:10:44 - INFO - codeparrot_training - Skipping example with length 244 (seq_length=1024) +03/04/2022 17:10:49 - INFO - codeparrot_training - Step 23598: {'lr': 0.00047418375019337263, 'samples': 12082688, 'steps': 23598, 'loss/train': 1.6435551643371582} +03/04/2022 17:10:52 - INFO - codeparrot_training - Step 23599: {'lr': 0.00047418140154821065, 'samples': 12083200, 'steps': 23599, 'loss/train': 1.867525339126587} +03/04/2022 17:10:53 - INFO - codeparrot_training - Skipping example with length 567 (seq_length=1024) +03/04/2022 17:10:57 - INFO - codeparrot_training - Step 23600: {'lr': 0.00047417905280203594, 'samples': 12083712, 'steps': 23600, 'loss/train': 1.9708377122879028} +03/04/2022 17:11:00 - INFO - codeparrot_training - Step 23601: {'lr': 0.00047417670395484963, 'samples': 12084224, 'steps': 23601, 'loss/train': 1.2928062677383423} +03/04/2022 17:11:01 - INFO - codeparrot_training - Skipping example with length 622 (seq_length=1024) +03/04/2022 17:11:06 - INFO - codeparrot_training - Step 23602: {'lr': 0.0004741743550066527, 'samples': 12084736, 'steps': 23602, 'loss/train': 2.3730242252349854} +03/04/2022 17:11:09 - INFO - codeparrot_training - Step 23603: {'lr': 0.00047417200595744637, 'samples': 12085248, 'steps': 23603, 'loss/train': 1.125169038772583} +03/04/2022 17:11:09 - INFO - codeparrot_training - Skipping example with length 111 (seq_length=1024) +03/04/2022 17:11:14 - INFO - codeparrot_training - Step 23604: {'lr': 0.0004741696568072316, 'samples': 12085760, 'steps': 23604, 'loss/train': 1.5960971117019653} +03/04/2022 17:11:17 - INFO - codeparrot_training - Step 23605: {'lr': 0.00047416730755600936, 'samples': 12086272, 'steps': 23605, 'loss/train': 2.3962724208831787} +03/04/2022 17:11:18 - INFO - codeparrot_training - Skipping example with length 257 (seq_length=1024) +03/04/2022 17:11:23 - INFO - codeparrot_training - Step 23606: {'lr': 0.0004741649582037808, 'samples': 12086784, 'steps': 23606, 'loss/train': 0.5510353446006775} +03/04/2022 17:11:26 - INFO - codeparrot_training - Step 23607: {'lr': 0.000474162608750547, 'samples': 12087296, 'steps': 23607, 'loss/train': 2.0942811965942383} +03/04/2022 17:11:26 - INFO - codeparrot_training - Skipping example with length 747 (seq_length=1024) +03/04/2022 17:11:31 - INFO - codeparrot_training - Step 23608: {'lr': 0.000474160259196309, 'samples': 12087808, 'steps': 23608, 'loss/train': 1.9427745342254639} +03/04/2022 17:11:34 - INFO - codeparrot_training - Step 23609: {'lr': 0.0004741579095410678, 'samples': 12088320, 'steps': 23609, 'loss/train': 2.088855266571045} +03/04/2022 17:11:34 - INFO - codeparrot_training - Skipping example with length 494 (seq_length=1024) +03/04/2022 17:11:39 - INFO - codeparrot_training - Step 23610: {'lr': 0.0004741555597848245, 'samples': 12088832, 'steps': 23610, 'loss/train': 2.0565576553344727} +03/04/2022 17:11:43 - INFO - codeparrot_training - Step 23611: {'lr': 0.00047415320992758025, 'samples': 12089344, 'steps': 23611, 'loss/train': 2.374455451965332} +03/04/2022 17:11:43 - INFO - codeparrot_training - Skipping example with length 725 (seq_length=1024) +03/04/2022 17:11:48 - INFO - codeparrot_training - Step 23612: {'lr': 0.00047415085996933593, 'samples': 12089856, 'steps': 23612, 'loss/train': 1.7058137655258179} +03/04/2022 17:11:51 - INFO - codeparrot_training - Step 23613: {'lr': 0.00047414850991009275, 'samples': 12090368, 'steps': 23613, 'loss/train': 1.9095895290374756} +03/04/2022 17:11:51 - INFO - codeparrot_training - Skipping example with length 1000 (seq_length=1024) +03/04/2022 17:11:56 - INFO - codeparrot_training - Step 23614: {'lr': 0.00047414615974985164, 'samples': 12090880, 'steps': 23614, 'loss/train': 1.8851245641708374} +03/04/2022 17:12:00 - INFO - codeparrot_training - Step 23615: {'lr': 0.0004741438094886138, 'samples': 12091392, 'steps': 23615, 'loss/train': 2.1114790439605713} +03/04/2022 17:12:00 - INFO - codeparrot_training - Skipping example with length 78 (seq_length=1024) +03/04/2022 17:12:06 - INFO - codeparrot_training - Step 23616: {'lr': 0.00047414145912638017, 'samples': 12091904, 'steps': 23616, 'loss/train': 1.9975779056549072} +03/04/2022 17:12:09 - INFO - codeparrot_training - Step 23617: {'lr': 0.00047413910866315193, 'samples': 12092416, 'steps': 23617, 'loss/train': 1.2613639831542969} +03/04/2022 17:12:12 - INFO - codeparrot_training - Skipping example with length 35 (seq_length=1024) +03/04/2022 17:12:14 - INFO - codeparrot_training - Step 23618: {'lr': 0.00047413675809893, 'samples': 12092928, 'steps': 23618, 'loss/train': 2.2748653888702393} +03/04/2022 17:12:17 - INFO - codeparrot_training - Step 23619: {'lr': 0.0004741344074337155, 'samples': 12093440, 'steps': 23619, 'loss/train': 1.9464027881622314} +03/04/2022 17:12:20 - INFO - codeparrot_training - Skipping example with length 69 (seq_length=1024) +03/04/2022 17:12:22 - INFO - codeparrot_training - Step 23620: {'lr': 0.00047413205666750955, 'samples': 12093952, 'steps': 23620, 'loss/train': 2.141181468963623} +03/04/2022 17:12:26 - INFO - codeparrot_training - Step 23621: {'lr': 0.0004741297058003131, 'samples': 12094464, 'steps': 23621, 'loss/train': 1.5295274257659912} +03/04/2022 17:12:28 - INFO - codeparrot_training - Skipping example with length 766 (seq_length=1024) +03/04/2022 17:12:31 - INFO - codeparrot_training - Step 23622: {'lr': 0.00047412735483212725, 'samples': 12094976, 'steps': 23622, 'loss/train': 1.7531987428665161} +03/04/2022 17:12:34 - INFO - codeparrot_training - Step 23623: {'lr': 0.0004741250037629531, 'samples': 12095488, 'steps': 23623, 'loss/train': 2.047591209411621} +03/04/2022 17:12:37 - INFO - codeparrot_training - Skipping example with length 562 (seq_length=1024) +03/04/2022 17:12:39 - INFO - codeparrot_training - Step 23624: {'lr': 0.00047412265259279176, 'samples': 12096000, 'steps': 23624, 'loss/train': 2.372868299484253} +03/04/2022 17:12:42 - INFO - codeparrot_training - Step 23625: {'lr': 0.0004741203013216441, 'samples': 12096512, 'steps': 23625, 'loss/train': 2.2884137630462646} +03/04/2022 17:12:45 - INFO - codeparrot_training - Skipping example with length 122 (seq_length=1024) +03/04/2022 17:12:48 - INFO - codeparrot_training - Step 23626: {'lr': 0.0004741179499495113, 'samples': 12097024, 'steps': 23626, 'loss/train': 0.9960039258003235} +03/04/2022 17:12:51 - INFO - codeparrot_training - Step 23627: {'lr': 0.00047411559847639447, 'samples': 12097536, 'steps': 23627, 'loss/train': 1.1575157642364502} +03/04/2022 17:12:54 - INFO - codeparrot_training - Step 23628: {'lr': 0.0004741132469022946, 'samples': 12098048, 'steps': 23628, 'loss/train': 0.948320209980011} +03/04/2022 17:12:54 - INFO - codeparrot_training - Skipping example with length 207 (seq_length=1024) +03/04/2022 17:12:59 - INFO - codeparrot_training - Step 23629: {'lr': 0.00047411089522721275, 'samples': 12098560, 'steps': 23629, 'loss/train': 2.1054747104644775} +03/04/2022 17:13:02 - INFO - codeparrot_training - Step 23630: {'lr': 0.00047410854345114996, 'samples': 12099072, 'steps': 23630, 'loss/train': 2.1440908908843994} +03/04/2022 17:13:03 - INFO - codeparrot_training - Skipping example with length 481 (seq_length=1024) +03/04/2022 17:13:08 - INFO - codeparrot_training - Step 23631: {'lr': 0.0004741061915741073, 'samples': 12099584, 'steps': 23631, 'loss/train': 2.049994707107544} +03/04/2022 17:13:11 - INFO - codeparrot_training - Skipping example with length 1022 (seq_length=1024) +03/04/2022 17:13:13 - INFO - codeparrot_training - Step 23632: {'lr': 0.0004741038395960859, 'samples': 12100096, 'steps': 23632, 'loss/train': 1.2414079904556274} +03/04/2022 17:13:16 - INFO - codeparrot_training - Step 23633: {'lr': 0.0004741014875170867, 'samples': 12100608, 'steps': 23633, 'loss/train': 1.8847893476486206} +03/04/2022 17:13:19 - INFO - codeparrot_training - Skipping example with length 12 (seq_length=1024) +03/04/2022 17:13:21 - INFO - codeparrot_training - Step 23634: {'lr': 0.0004740991353371109, 'samples': 12101120, 'steps': 23634, 'loss/train': 2.2249531745910645} +03/04/2022 17:13:25 - INFO - codeparrot_training - Step 23635: {'lr': 0.0004740967830561595, 'samples': 12101632, 'steps': 23635, 'loss/train': 1.290770411491394} +03/04/2022 17:13:27 - INFO - codeparrot_training - Skipping example with length 604 (seq_length=1024) +03/04/2022 17:13:30 - INFO - codeparrot_training - Step 23636: {'lr': 0.0004740944306742335, 'samples': 12102144, 'steps': 23636, 'loss/train': 2.369663715362549} +03/04/2022 17:13:33 - INFO - codeparrot_training - Step 23637: {'lr': 0.00047409207819133406, 'samples': 12102656, 'steps': 23637, 'loss/train': 2.040771007537842} +03/04/2022 17:13:36 - INFO - codeparrot_training - Skipping example with length 315 (seq_length=1024) +03/04/2022 17:13:38 - INFO - codeparrot_training - Step 23638: {'lr': 0.0004740897256074621, 'samples': 12103168, 'steps': 23638, 'loss/train': 2.007297992706299} +03/04/2022 17:13:42 - INFO - codeparrot_training - Step 23639: {'lr': 0.00047408737292261883, 'samples': 12103680, 'steps': 23639, 'loss/train': 3.635899305343628} +03/04/2022 17:13:44 - INFO - codeparrot_training - Skipping example with length 468 (seq_length=1024) +03/04/2022 17:13:47 - INFO - codeparrot_training - Step 23640: {'lr': 0.0004740850201368052, 'samples': 12104192, 'steps': 23640, 'loss/train': 2.0706632137298584} +03/04/2022 17:13:50 - INFO - codeparrot_training - Step 23641: {'lr': 0.00047408266725002234, 'samples': 12104704, 'steps': 23641, 'loss/train': 1.956384539604187} +03/04/2022 17:13:53 - INFO - codeparrot_training - Step 23642: {'lr': 0.00047408031426227136, 'samples': 12105216, 'steps': 23642, 'loss/train': 1.8224693536758423} +03/04/2022 17:13:54 - INFO - codeparrot_training - Skipping example with length 979 (seq_length=1024) +03/04/2022 17:13:59 - INFO - codeparrot_training - Step 23643: {'lr': 0.0004740779611735532, 'samples': 12105728, 'steps': 23643, 'loss/train': 1.6501497030258179} +03/04/2022 17:14:02 - INFO - codeparrot_training - Step 23644: {'lr': 0.00047407560798386894, 'samples': 12106240, 'steps': 23644, 'loss/train': 2.0401883125305176} +03/04/2022 17:14:02 - INFO - codeparrot_training - Skipping example with length 460 (seq_length=1024) +03/04/2022 17:14:07 - INFO - codeparrot_training - Step 23645: {'lr': 0.00047407325469321973, 'samples': 12106752, 'steps': 23645, 'loss/train': 1.2853413820266724} +03/04/2022 17:14:10 - INFO - codeparrot_training - Step 23646: {'lr': 0.0004740709013016065, 'samples': 12107264, 'steps': 23646, 'loss/train': 2.1101059913635254} +03/04/2022 17:14:11 - INFO - codeparrot_training - Skipping example with length 959 (seq_length=1024) +03/04/2022 17:14:16 - INFO - codeparrot_training - Step 23647: {'lr': 0.0004740685478090304, 'samples': 12107776, 'steps': 23647, 'loss/train': 2.25382924079895} +03/04/2022 17:14:19 - INFO - codeparrot_training - Step 23648: {'lr': 0.00047406619421549247, 'samples': 12108288, 'steps': 23648, 'loss/train': 1.7696454524993896} +03/04/2022 17:14:19 - INFO - codeparrot_training - Skipping example with length 997 (seq_length=1024) +03/04/2022 17:14:24 - INFO - codeparrot_training - Step 23649: {'lr': 0.0004740638405209938, 'samples': 12108800, 'steps': 23649, 'loss/train': 1.336594820022583} +03/04/2022 17:14:27 - INFO - codeparrot_training - Step 23650: {'lr': 0.0004740614867255353, 'samples': 12109312, 'steps': 23650, 'loss/train': 1.204626441001892} +03/04/2022 17:14:28 - INFO - codeparrot_training - Skipping example with length 379 (seq_length=1024) +03/04/2022 17:14:33 - INFO - codeparrot_training - Step 23651: {'lr': 0.0004740591328291183, 'samples': 12109824, 'steps': 23651, 'loss/train': 1.911062479019165} +03/04/2022 17:14:36 - INFO - codeparrot_training - Step 23652: {'lr': 0.0004740567788317437, 'samples': 12110336, 'steps': 23652, 'loss/train': 1.620890736579895} +03/04/2022 17:14:37 - INFO - codeparrot_training - Skipping example with length 503 (seq_length=1024) +03/04/2022 17:14:41 - INFO - codeparrot_training - Step 23653: {'lr': 0.00047405442473341246, 'samples': 12110848, 'steps': 23653, 'loss/train': 1.7959572076797485} +03/04/2022 17:14:44 - INFO - codeparrot_training - Step 23654: {'lr': 0.0004740520705341259, 'samples': 12111360, 'steps': 23654, 'loss/train': 2.5285186767578125} +03/04/2022 17:14:45 - INFO - codeparrot_training - Skipping example with length 175 (seq_length=1024) +03/04/2022 17:14:50 - INFO - codeparrot_training - Step 23655: {'lr': 0.0004740497162338848, 'samples': 12111872, 'steps': 23655, 'loss/train': 1.4534214735031128} +03/04/2022 17:14:53 - INFO - codeparrot_training - Step 23656: {'lr': 0.00047404736183269045, 'samples': 12112384, 'steps': 23656, 'loss/train': 1.4537698030471802} +03/04/2022 17:14:54 - INFO - codeparrot_training - Skipping example with length 665 (seq_length=1024) +03/04/2022 17:14:58 - INFO - codeparrot_training - Step 23657: {'lr': 0.0004740450073305438, 'samples': 12112896, 'steps': 23657, 'loss/train': 1.4667598009109497} +03/04/2022 17:15:01 - INFO - codeparrot_training - Step 23658: {'lr': 0.00047404265272744586, 'samples': 12113408, 'steps': 23658, 'loss/train': 1.3832734823226929} +03/04/2022 17:15:02 - INFO - codeparrot_training - Skipping example with length 413 (seq_length=1024) +03/04/2022 17:15:06 - INFO - codeparrot_training - Step 23659: {'lr': 0.0004740402980233978, 'samples': 12113920, 'steps': 23659, 'loss/train': 2.09853196144104} +03/04/2022 17:15:10 - INFO - codeparrot_training - Step 23660: {'lr': 0.00047403794321840064, 'samples': 12114432, 'steps': 23660, 'loss/train': 1.8587846755981445} +03/04/2022 17:15:11 - INFO - codeparrot_training - Skipping example with length 802 (seq_length=1024) +03/04/2022 17:15:15 - INFO - codeparrot_training - Step 23661: {'lr': 0.0004740355883124555, 'samples': 12114944, 'steps': 23661, 'loss/train': 1.103425145149231} +03/04/2022 17:15:18 - INFO - codeparrot_training - Step 23662: {'lr': 0.0004740332333055633, 'samples': 12115456, 'steps': 23662, 'loss/train': 2.531468629837036} +03/04/2022 17:15:19 - INFO - codeparrot_training - Skipping example with length 1016 (seq_length=1024) +03/04/2022 17:15:23 - INFO - codeparrot_training - Step 23663: {'lr': 0.00047403087819772517, 'samples': 12115968, 'steps': 23663, 'loss/train': 2.1275932788848877} +03/04/2022 17:15:26 - INFO - codeparrot_training - Step 23664: {'lr': 0.0004740285229889423, 'samples': 12116480, 'steps': 23664, 'loss/train': 1.7240160703659058} +03/04/2022 17:15:28 - INFO - codeparrot_training - Skipping example with length 602 (seq_length=1024) +03/04/2022 17:15:32 - INFO - codeparrot_training - Step 23665: {'lr': 0.0004740261676792155, 'samples': 12116992, 'steps': 23665, 'loss/train': 2.259674310684204} +03/04/2022 17:15:35 - INFO - codeparrot_training - Step 23666: {'lr': 0.00047402381226854606, 'samples': 12117504, 'steps': 23666, 'loss/train': 1.846083164215088} +03/04/2022 17:15:36 - INFO - codeparrot_training - Skipping example with length 208 (seq_length=1024) +03/04/2022 17:15:40 - INFO - codeparrot_training - Step 23667: {'lr': 0.0004740214567569349, 'samples': 12118016, 'steps': 23667, 'loss/train': 2.290923595428467} +03/04/2022 17:15:44 - INFO - codeparrot_training - Step 23668: {'lr': 0.00047401910114438313, 'samples': 12118528, 'steps': 23668, 'loss/train': 2.701794147491455} +03/04/2022 17:15:45 - INFO - codeparrot_training - Skipping example with length 550 (seq_length=1024) +03/04/2022 17:15:49 - INFO - codeparrot_training - Step 23669: {'lr': 0.0004740167454308918, 'samples': 12119040, 'steps': 23669, 'loss/train': 1.6841286420822144} +03/04/2022 17:15:52 - INFO - codeparrot_training - Step 23670: {'lr': 0.00047401438961646206, 'samples': 12119552, 'steps': 23670, 'loss/train': 2.0812065601348877} +03/04/2022 17:15:53 - INFO - codeparrot_training - Skipping example with length 573 (seq_length=1024) +03/04/2022 17:15:57 - INFO - codeparrot_training - Step 23671: {'lr': 0.0004740120337010948, 'samples': 12120064, 'steps': 23671, 'loss/train': 2.4943108558654785} +03/04/2022 17:16:01 - INFO - codeparrot_training - Step 23672: {'lr': 0.0004740096776847912, 'samples': 12120576, 'steps': 23672, 'loss/train': 1.3477282524108887} +03/04/2022 17:16:02 - INFO - codeparrot_training - Skipping example with length 309 (seq_length=1024) +03/04/2022 17:16:06 - INFO - codeparrot_training - Step 23673: {'lr': 0.0004740073215675523, 'samples': 12121088, 'steps': 23673, 'loss/train': 2.1261520385742188} +03/04/2022 17:16:09 - INFO - codeparrot_training - Step 23674: {'lr': 0.00047400496534937914, 'samples': 12121600, 'steps': 23674, 'loss/train': 1.7481192350387573} +03/04/2022 17:16:10 - INFO - codeparrot_training - Skipping example with length 50 (seq_length=1024) +03/04/2022 17:16:14 - INFO - codeparrot_training - Step 23675: {'lr': 0.00047400260903027283, 'samples': 12122112, 'steps': 23675, 'loss/train': 1.3841853141784668} +03/04/2022 17:16:18 - INFO - codeparrot_training - Step 23676: {'lr': 0.0004740002526102344, 'samples': 12122624, 'steps': 23676, 'loss/train': 1.9130616188049316} +03/04/2022 17:16:19 - INFO - codeparrot_training - Skipping example with length 477 (seq_length=1024) +03/04/2022 17:16:23 - INFO - codeparrot_training - Step 23677: {'lr': 0.0004739978960892649, 'samples': 12123136, 'steps': 23677, 'loss/train': 1.8244049549102783} +03/04/2022 17:16:26 - INFO - codeparrot_training - Step 23678: {'lr': 0.0004739955394673654, 'samples': 12123648, 'steps': 23678, 'loss/train': 2.190603256225586} +03/04/2022 17:16:27 - INFO - codeparrot_training - Skipping example with length 588 (seq_length=1024) +03/04/2022 17:16:31 - INFO - codeparrot_training - Step 23679: {'lr': 0.000473993182744537, 'samples': 12124160, 'steps': 23679, 'loss/train': 1.7991740703582764} +03/04/2022 17:16:35 - INFO - codeparrot_training - Step 23680: {'lr': 0.0004739908259207807, 'samples': 12124672, 'steps': 23680, 'loss/train': 2.238150119781494} +03/04/2022 17:16:36 - INFO - codeparrot_training - Skipping example with length 587 (seq_length=1024) +03/04/2022 17:16:40 - INFO - codeparrot_training - Step 23681: {'lr': 0.00047398846899609755, 'samples': 12125184, 'steps': 23681, 'loss/train': 2.4663662910461426} +03/04/2022 17:16:43 - INFO - codeparrot_training - Step 23682: {'lr': 0.0004739861119704887, 'samples': 12125696, 'steps': 23682, 'loss/train': 1.0735864639282227} +03/04/2022 17:16:45 - INFO - codeparrot_training - Skipping example with length 506 (seq_length=1024) +03/04/2022 17:16:48 - INFO - codeparrot_training - Step 23683: {'lr': 0.00047398375484395517, 'samples': 12126208, 'steps': 23683, 'loss/train': 1.6151368618011475} +03/04/2022 17:16:52 - INFO - codeparrot_training - Step 23684: {'lr': 0.00047398139761649794, 'samples': 12126720, 'steps': 23684, 'loss/train': 1.4145499467849731} +03/04/2022 17:16:53 - INFO - codeparrot_training - Skipping example with length 209 (seq_length=1024) +03/04/2022 17:16:57 - INFO - codeparrot_training - Step 23685: {'lr': 0.00047397904028811824, 'samples': 12127232, 'steps': 23685, 'loss/train': 2.20929217338562} +03/04/2022 17:17:00 - INFO - codeparrot_training - Step 23686: {'lr': 0.000473976682858817, 'samples': 12127744, 'steps': 23686, 'loss/train': 2.404446601867676} +03/04/2022 17:17:04 - INFO - codeparrot_training - Step 23687: {'lr': 0.00047397432532859533, 'samples': 12128256, 'steps': 23687, 'loss/train': 1.5859482288360596} +03/04/2022 17:17:04 - INFO - codeparrot_training - Skipping example with length 752 (seq_length=1024) +03/04/2022 17:17:09 - INFO - codeparrot_training - Step 23688: {'lr': 0.00047397196769745435, 'samples': 12128768, 'steps': 23688, 'loss/train': 2.225498676300049} +03/04/2022 17:17:12 - INFO - codeparrot_training - Step 23689: {'lr': 0.00047396960996539495, 'samples': 12129280, 'steps': 23689, 'loss/train': 1.8858722448349} +03/04/2022 17:17:12 - INFO - codeparrot_training - Skipping example with length 698 (seq_length=1024) +03/04/2022 17:17:17 - INFO - codeparrot_training - Step 23690: {'lr': 0.00047396725213241835, 'samples': 12129792, 'steps': 23690, 'loss/train': 0.3528008460998535} +03/04/2022 17:17:21 - INFO - codeparrot_training - Step 23691: {'lr': 0.0004739648941985256, 'samples': 12130304, 'steps': 23691, 'loss/train': 2.063058614730835} +03/04/2022 17:17:21 - INFO - codeparrot_training - Skipping example with length 203 (seq_length=1024) +03/04/2022 17:17:26 - INFO - codeparrot_training - Step 23692: {'lr': 0.00047396253616371767, 'samples': 12130816, 'steps': 23692, 'loss/train': 2.606527090072632} +03/04/2022 17:17:29 - INFO - codeparrot_training - Step 23693: {'lr': 0.00047396017802799566, 'samples': 12131328, 'steps': 23693, 'loss/train': 2.0827784538269043} +03/04/2022 17:17:29 - INFO - codeparrot_training - Skipping example with length 43 (seq_length=1024) +03/04/2022 17:17:34 - INFO - codeparrot_training - Step 23694: {'lr': 0.0004739578197913607, 'samples': 12131840, 'steps': 23694, 'loss/train': 1.569892406463623} +03/04/2022 17:17:37 - INFO - codeparrot_training - Step 23695: {'lr': 0.00047395546145381377, 'samples': 12132352, 'steps': 23695, 'loss/train': 1.2835166454315186} +03/04/2022 17:17:37 - INFO - codeparrot_training - Skipping example with length 857 (seq_length=1024) +03/04/2022 17:17:43 - INFO - codeparrot_training - Step 23696: {'lr': 0.000473953103015356, 'samples': 12132864, 'steps': 23696, 'loss/train': 2.1140060424804688} +03/04/2022 17:17:46 - INFO - codeparrot_training - Step 23697: {'lr': 0.0004739507444759884, 'samples': 12133376, 'steps': 23697, 'loss/train': 2.259042263031006} +03/04/2022 17:17:46 - INFO - codeparrot_training - Skipping example with length 976 (seq_length=1024) +03/04/2022 17:17:51 - INFO - codeparrot_training - Step 23698: {'lr': 0.0004739483858357121, 'samples': 12133888, 'steps': 23698, 'loss/train': 1.558167815208435} +03/04/2022 17:17:54 - INFO - codeparrot_training - Skipping example with length 124 (seq_length=1024) +03/04/2022 17:17:56 - INFO - codeparrot_training - Step 23699: {'lr': 0.00047394602709452806, 'samples': 12134400, 'steps': 23699, 'loss/train': 2.066560745239258} +03/04/2022 17:17:59 - INFO - codeparrot_training - Step 23700: {'lr': 0.0004739436682524373, 'samples': 12134912, 'steps': 23700, 'loss/train': 1.818149209022522} +03/04/2022 17:18:02 - INFO - codeparrot_training - Skipping example with length 336 (seq_length=1024) +03/04/2022 17:18:05 - INFO - codeparrot_training - Step 23701: {'lr': 0.00047394130930944115, 'samples': 12135424, 'steps': 23701, 'loss/train': 1.6560214757919312} +03/04/2022 17:18:08 - INFO - codeparrot_training - Step 23702: {'lr': 0.0004739389502655404, 'samples': 12135936, 'steps': 23702, 'loss/train': 1.8994088172912598} +03/04/2022 17:18:11 - INFO - codeparrot_training - Step 23703: {'lr': 0.0004739365911207363, 'samples': 12136448, 'steps': 23703, 'loss/train': 2.0355587005615234} +03/04/2022 17:18:11 - INFO - codeparrot_training - Skipping example with length 504 (seq_length=1024) +03/04/2022 17:18:16 - INFO - codeparrot_training - Step 23704: {'lr': 0.0004739342318750297, 'samples': 12136960, 'steps': 23704, 'loss/train': 1.6351745128631592} +03/04/2022 17:18:19 - INFO - codeparrot_training - Skipping example with length 797 (seq_length=1024) +03/04/2022 17:18:22 - INFO - codeparrot_training - Step 23705: {'lr': 0.00047393187252842183, 'samples': 12137472, 'steps': 23705, 'loss/train': 1.9731441736221313} +03/04/2022 17:18:25 - INFO - codeparrot_training - Step 23706: {'lr': 0.0004739295130809138, 'samples': 12137984, 'steps': 23706, 'loss/train': 2.0211598873138428} +03/04/2022 17:18:28 - INFO - codeparrot_training - Skipping example with length 195 (seq_length=1024) +03/04/2022 17:18:30 - INFO - codeparrot_training - Step 23707: {'lr': 0.0004739271535325065, 'samples': 12138496, 'steps': 23707, 'loss/train': 2.0636277198791504} +03/04/2022 17:18:33 - INFO - codeparrot_training - Step 23708: {'lr': 0.00047392479388320106, 'samples': 12139008, 'steps': 23708, 'loss/train': 2.6288678646087646} +03/04/2022 17:18:36 - INFO - codeparrot_training - Skipping example with length 800 (seq_length=1024) +03/04/2022 17:18:39 - INFO - codeparrot_training - Step 23709: {'lr': 0.0004739224341329987, 'samples': 12139520, 'steps': 23709, 'loss/train': 1.1857538223266602} +03/04/2022 17:18:42 - INFO - codeparrot_training - Step 23710: {'lr': 0.0004739200742819002, 'samples': 12140032, 'steps': 23710, 'loss/train': 1.7754943370819092} +03/04/2022 17:18:45 - INFO - codeparrot_training - Step 23711: {'lr': 0.0004739177143299068, 'samples': 12140544, 'steps': 23711, 'loss/train': 2.097775936126709} +03/04/2022 17:18:45 - INFO - codeparrot_training - Skipping example with length 51 (seq_length=1024) +03/04/2022 17:18:50 - INFO - codeparrot_training - Step 23712: {'lr': 0.00047391535427701966, 'samples': 12141056, 'steps': 23712, 'loss/train': 2.475356340408325} +03/04/2022 17:18:53 - INFO - codeparrot_training - Step 23713: {'lr': 0.0004739129941232396, 'samples': 12141568, 'steps': 23713, 'loss/train': 1.3106184005737305} +03/04/2022 17:18:54 - INFO - codeparrot_training - Skipping example with length 790 (seq_length=1024) +03/04/2022 17:18:59 - INFO - codeparrot_training - Step 23714: {'lr': 0.0004739106338685678, 'samples': 12142080, 'steps': 23714, 'loss/train': 1.8121132850646973} +03/04/2022 17:19:02 - INFO - codeparrot_training - Step 23715: {'lr': 0.00047390827351300537, 'samples': 12142592, 'steps': 23715, 'loss/train': 1.4897972345352173} +03/04/2022 17:19:02 - INFO - codeparrot_training - Skipping example with length 644 (seq_length=1024) +03/04/2022 17:19:07 - INFO - codeparrot_training - Step 23716: {'lr': 0.00047390591305655327, 'samples': 12143104, 'steps': 23716, 'loss/train': 1.3802684545516968} +03/04/2022 17:19:10 - INFO - codeparrot_training - Step 23717: {'lr': 0.0004739035524992127, 'samples': 12143616, 'steps': 23717, 'loss/train': 2.931997299194336} +03/04/2022 17:19:11 - INFO - codeparrot_training - Skipping example with length 780 (seq_length=1024) +03/04/2022 17:19:15 - INFO - codeparrot_training - Step 23718: {'lr': 0.00047390119184098455, 'samples': 12144128, 'steps': 23718, 'loss/train': 2.1765103340148926} +03/04/2022 17:19:19 - INFO - codeparrot_training - Step 23719: {'lr': 0.00047389883108187004, 'samples': 12144640, 'steps': 23719, 'loss/train': 2.733281373977661} +03/04/2022 17:19:19 - INFO - codeparrot_training - Skipping example with length 516 (seq_length=1024) +03/04/2022 17:19:24 - INFO - codeparrot_training - Step 23720: {'lr': 0.00047389647022187014, 'samples': 12145152, 'steps': 23720, 'loss/train': 1.1348062753677368} +03/04/2022 17:19:27 - INFO - codeparrot_training - Step 23721: {'lr': 0.000473894109260986, 'samples': 12145664, 'steps': 23721, 'loss/train': 1.650702953338623} +03/04/2022 17:19:28 - INFO - codeparrot_training - Skipping example with length 447 (seq_length=1024) +03/04/2022 17:19:32 - INFO - codeparrot_training - Step 23722: {'lr': 0.00047389174819921856, 'samples': 12146176, 'steps': 23722, 'loss/train': 2.044210195541382} +03/04/2022 17:19:35 - INFO - codeparrot_training - Step 23723: {'lr': 0.000473889387036569, 'samples': 12146688, 'steps': 23723, 'loss/train': 2.248384714126587} +03/04/2022 17:19:36 - INFO - codeparrot_training - Skipping example with length 132 (seq_length=1024) +03/04/2022 17:19:41 - INFO - codeparrot_training - Step 23724: {'lr': 0.0004738870257730383, 'samples': 12147200, 'steps': 23724, 'loss/train': 0.9605239033699036} +03/04/2022 17:19:44 - INFO - codeparrot_training - Step 23725: {'lr': 0.00047388466440862755, 'samples': 12147712, 'steps': 23725, 'loss/train': 1.6707936525344849} +03/04/2022 17:19:45 - INFO - codeparrot_training - Skipping example with length 733 (seq_length=1024) +03/04/2022 17:19:49 - INFO - codeparrot_training - Step 23726: {'lr': 0.0004738823029433379, 'samples': 12148224, 'steps': 23726, 'loss/train': 2.073342800140381} +03/04/2022 17:19:52 - INFO - codeparrot_training - Step 23727: {'lr': 0.0004738799413771703, 'samples': 12148736, 'steps': 23727, 'loss/train': 1.8634250164031982} +03/04/2022 17:19:53 - INFO - codeparrot_training - Skipping example with length 981 (seq_length=1024) +03/04/2022 17:19:58 - INFO - codeparrot_training - Step 23728: {'lr': 0.0004738775797101258, 'samples': 12149248, 'steps': 23728, 'loss/train': 1.5589054822921753} +03/04/2022 17:20:01 - INFO - codeparrot_training - Step 23729: {'lr': 0.0004738752179422056, 'samples': 12149760, 'steps': 23729, 'loss/train': 2.0648515224456787} +03/04/2022 17:20:01 - INFO - codeparrot_training - Skipping example with length 867 (seq_length=1024) +03/04/2022 17:20:06 - INFO - codeparrot_training - Step 23730: {'lr': 0.00047387285607341064, 'samples': 12150272, 'steps': 23730, 'loss/train': 1.9939045906066895} +03/04/2022 17:20:09 - INFO - codeparrot_training - Step 23731: {'lr': 0.00047387049410374207, 'samples': 12150784, 'steps': 23731, 'loss/train': 2.070875883102417} +03/04/2022 17:20:10 - INFO - codeparrot_training - Skipping example with length 440 (seq_length=1024) +03/04/2022 17:20:14 - INFO - codeparrot_training - Step 23732: {'lr': 0.00047386813203320084, 'samples': 12151296, 'steps': 23732, 'loss/train': 1.861603856086731} +03/04/2022 17:20:18 - INFO - codeparrot_training - Step 23733: {'lr': 0.0004738657698617881, 'samples': 12151808, 'steps': 23733, 'loss/train': 1.8075370788574219} +03/04/2022 17:20:18 - INFO - codeparrot_training - Skipping example with length 166 (seq_length=1024) +03/04/2022 17:20:23 - INFO - codeparrot_training - Step 23734: {'lr': 0.00047386340758950494, 'samples': 12152320, 'steps': 23734, 'loss/train': 2.202697992324829} +03/04/2022 17:20:26 - INFO - codeparrot_training - Step 23735: {'lr': 0.0004738610452163523, 'samples': 12152832, 'steps': 23735, 'loss/train': 2.1121022701263428} +03/04/2022 17:20:26 - INFO - codeparrot_training - Skipping example with length 179 (seq_length=1024) +03/04/2022 17:20:31 - INFO - codeparrot_training - Step 23736: {'lr': 0.00047385868274233144, 'samples': 12153344, 'steps': 23736, 'loss/train': 2.2542190551757812} +03/04/2022 17:20:34 - INFO - codeparrot_training - Step 23737: {'lr': 0.0004738563201674432, 'samples': 12153856, 'steps': 23737, 'loss/train': 2.7913613319396973} +03/04/2022 17:20:35 - INFO - codeparrot_training - Skipping example with length 979 (seq_length=1024) +03/04/2022 17:20:40 - INFO - codeparrot_training - Step 23738: {'lr': 0.00047385395749168885, 'samples': 12154368, 'steps': 23738, 'loss/train': 2.452979326248169} +03/04/2022 17:20:43 - INFO - codeparrot_training - Step 23739: {'lr': 0.00047385159471506936, 'samples': 12154880, 'steps': 23739, 'loss/train': 1.2685240507125854} +03/04/2022 17:20:44 - INFO - codeparrot_training - Skipping example with length 614 (seq_length=1024) +03/04/2022 17:20:48 - INFO - codeparrot_training - Step 23740: {'lr': 0.00047384923183758573, 'samples': 12155392, 'steps': 23740, 'loss/train': 1.3219494819641113} +03/04/2022 17:20:52 - INFO - codeparrot_training - Step 23741: {'lr': 0.0004738468688592391, 'samples': 12155904, 'steps': 23741, 'loss/train': 3.1401445865631104} +03/04/2022 17:20:52 - INFO - codeparrot_training - Skipping example with length 65 (seq_length=1024) +03/04/2022 17:20:57 - INFO - codeparrot_training - Step 23742: {'lr': 0.00047384450578003055, 'samples': 12156416, 'steps': 23742, 'loss/train': 1.402923583984375} +03/04/2022 17:21:00 - INFO - codeparrot_training - Step 23743: {'lr': 0.00047384214259996117, 'samples': 12156928, 'steps': 23743, 'loss/train': 0.37630972266197205} +03/04/2022 17:21:00 - INFO - codeparrot_training - Skipping example with length 422 (seq_length=1024) +03/04/2022 17:21:05 - INFO - codeparrot_training - Step 23744: {'lr': 0.0004738397793190319, 'samples': 12157440, 'steps': 23744, 'loss/train': 1.6994844675064087} +03/04/2022 17:21:08 - INFO - codeparrot_training - Step 23745: {'lr': 0.00047383741593724386, 'samples': 12157952, 'steps': 23745, 'loss/train': 1.9833449125289917} +03/04/2022 17:21:09 - INFO - codeparrot_training - Skipping example with length 1000 (seq_length=1024) +03/04/2022 17:21:14 - INFO - codeparrot_training - Step 23746: {'lr': 0.0004738350524545982, 'samples': 12158464, 'steps': 23746, 'loss/train': 1.8134338855743408} +03/04/2022 17:21:17 - INFO - codeparrot_training - Step 23747: {'lr': 0.0004738326888710959, 'samples': 12158976, 'steps': 23747, 'loss/train': 2.431206464767456} +03/04/2022 17:21:17 - INFO - codeparrot_training - Skipping example with length 640 (seq_length=1024) +03/04/2022 17:21:22 - INFO - codeparrot_training - Step 23748: {'lr': 0.000473830325186738, 'samples': 12159488, 'steps': 23748, 'loss/train': 1.065942645072937} +03/04/2022 17:21:25 - INFO - codeparrot_training - Step 23749: {'lr': 0.0004738279614015257, 'samples': 12160000, 'steps': 23749, 'loss/train': 1.8762257099151611} +03/04/2022 17:21:26 - INFO - codeparrot_training - Skipping example with length 703 (seq_length=1024) +03/04/2022 17:21:30 - INFO - codeparrot_training - Step 23750: {'lr': 0.0004738255975154599, 'samples': 12160512, 'steps': 23750, 'loss/train': 1.9966421127319336} +03/04/2022 17:21:34 - INFO - codeparrot_training - Step 23751: {'lr': 0.0004738232335285417, 'samples': 12161024, 'steps': 23751, 'loss/train': 1.7481709718704224} +03/04/2022 17:21:34 - INFO - codeparrot_training - Skipping example with length 865 (seq_length=1024) +03/04/2022 17:21:39 - INFO - codeparrot_training - Step 23752: {'lr': 0.0004738208694407723, 'samples': 12161536, 'steps': 23752, 'loss/train': 1.4975959062576294} +03/04/2022 17:21:42 - INFO - codeparrot_training - Step 23753: {'lr': 0.00047381850525215265, 'samples': 12162048, 'steps': 23753, 'loss/train': 2.1139724254608154} +03/04/2022 17:21:43 - INFO - codeparrot_training - Skipping example with length 111 (seq_length=1024) +03/04/2022 17:21:47 - INFO - codeparrot_training - Step 23754: {'lr': 0.0004738161409626838, 'samples': 12162560, 'steps': 23754, 'loss/train': 1.6707584857940674} +03/04/2022 17:21:50 - INFO - codeparrot_training - Step 23755: {'lr': 0.0004738137765723669, 'samples': 12163072, 'steps': 23755, 'loss/train': 1.4585719108581543} +03/04/2022 17:21:51 - INFO - codeparrot_training - Skipping example with length 283 (seq_length=1024) +03/04/2022 17:21:56 - INFO - codeparrot_training - Step 23756: {'lr': 0.0004738114120812029, 'samples': 12163584, 'steps': 23756, 'loss/train': 1.399327278137207} +03/04/2022 17:21:59 - INFO - codeparrot_training - Step 23757: {'lr': 0.000473809047489193, 'samples': 12164096, 'steps': 23757, 'loss/train': 1.1605526208877563} +03/04/2022 17:22:00 - INFO - codeparrot_training - Skipping example with length 287 (seq_length=1024) +03/04/2022 17:22:04 - INFO - codeparrot_training - Step 23758: {'lr': 0.00047380668279633814, 'samples': 12164608, 'steps': 23758, 'loss/train': 1.6233943700790405} +03/04/2022 17:22:07 - INFO - codeparrot_training - Step 23759: {'lr': 0.00047380431800263945, 'samples': 12165120, 'steps': 23759, 'loss/train': 0.8393142819404602} +03/04/2022 17:22:08 - INFO - codeparrot_training - Skipping example with length 690 (seq_length=1024) +03/04/2022 17:22:13 - INFO - codeparrot_training - Step 23760: {'lr': 0.000473801953108098, 'samples': 12165632, 'steps': 23760, 'loss/train': 1.7899975776672363} +03/04/2022 17:22:16 - INFO - codeparrot_training - Step 23761: {'lr': 0.0004737995881127149, 'samples': 12166144, 'steps': 23761, 'loss/train': 1.5440043210983276} +03/04/2022 17:22:17 - INFO - codeparrot_training - Skipping example with length 749 (seq_length=1024) +03/04/2022 17:22:21 - INFO - codeparrot_training - Step 23762: {'lr': 0.0004737972230164911, 'samples': 12166656, 'steps': 23762, 'loss/train': 2.426323413848877} +03/04/2022 17:22:24 - INFO - codeparrot_training - Step 23763: {'lr': 0.0004737948578194278, 'samples': 12167168, 'steps': 23763, 'loss/train': 0.5034106969833374} +03/04/2022 17:22:25 - INFO - codeparrot_training - Skipping example with length 835 (seq_length=1024) +03/04/2022 17:22:30 - INFO - codeparrot_training - Step 23764: {'lr': 0.00047379249252152585, 'samples': 12167680, 'steps': 23764, 'loss/train': 2.5461409091949463} +03/04/2022 17:22:33 - INFO - codeparrot_training - Step 23765: {'lr': 0.00047379012712278656, 'samples': 12168192, 'steps': 23765, 'loss/train': 1.6752080917358398} +03/04/2022 17:22:33 - INFO - codeparrot_training - Skipping example with length 863 (seq_length=1024) +03/04/2022 17:22:38 - INFO - codeparrot_training - Step 23766: {'lr': 0.0004737877616232108, 'samples': 12168704, 'steps': 23766, 'loss/train': 1.714516520500183} +03/04/2022 17:22:41 - INFO - codeparrot_training - Step 23767: {'lr': 0.0004737853960227998, 'samples': 12169216, 'steps': 23767, 'loss/train': 1.1917070150375366} +03/04/2022 17:22:42 - INFO - codeparrot_training - Skipping example with length 826 (seq_length=1024) +03/04/2022 17:22:46 - INFO - codeparrot_training - Step 23768: {'lr': 0.00047378303032155454, 'samples': 12169728, 'steps': 23768, 'loss/train': 2.0745646953582764} +03/04/2022 17:22:50 - INFO - codeparrot_training - Step 23769: {'lr': 0.0004737806645194761, 'samples': 12170240, 'steps': 23769, 'loss/train': 2.1327240467071533} +03/04/2022 17:22:51 - INFO - codeparrot_training - Skipping example with length 614 (seq_length=1024) +03/04/2022 17:22:55 - INFO - codeparrot_training - Step 23770: {'lr': 0.00047377829861656556, 'samples': 12170752, 'steps': 23770, 'loss/train': 1.5674903392791748} +03/04/2022 17:22:58 - INFO - codeparrot_training - Step 23771: {'lr': 0.000473775932612824, 'samples': 12171264, 'steps': 23771, 'loss/train': 0.6945176720619202} +03/04/2022 17:22:59 - INFO - codeparrot_training - Skipping example with length 587 (seq_length=1024) +03/04/2022 17:23:03 - INFO - codeparrot_training - Step 23772: {'lr': 0.00047377356650825245, 'samples': 12171776, 'steps': 23772, 'loss/train': 1.8209755420684814} +03/04/2022 17:23:06 - INFO - codeparrot_training - Step 23773: {'lr': 0.00047377120030285194, 'samples': 12172288, 'steps': 23773, 'loss/train': 2.033637285232544} +03/04/2022 17:23:07 - INFO - codeparrot_training - Skipping example with length 75 (seq_length=1024) +03/04/2022 17:23:12 - INFO - codeparrot_training - Step 23774: {'lr': 0.0004737688339966235, 'samples': 12172800, 'steps': 23774, 'loss/train': 1.318041443824768} +03/04/2022 17:23:15 - INFO - codeparrot_training - Step 23775: {'lr': 0.00047376646758956844, 'samples': 12173312, 'steps': 23775, 'loss/train': 1.529974341392517} +03/04/2022 17:23:16 - INFO - codeparrot_training - Skipping example with length 413 (seq_length=1024) +03/04/2022 17:23:20 - INFO - codeparrot_training - Step 23776: {'lr': 0.00047376410108168756, 'samples': 12173824, 'steps': 23776, 'loss/train': 1.8632819652557373} +03/04/2022 17:23:23 - INFO - codeparrot_training - Step 23777: {'lr': 0.0004737617344729821, 'samples': 12174336, 'steps': 23777, 'loss/train': 1.1773691177368164} +03/04/2022 17:23:24 - INFO - codeparrot_training - Skipping example with length 252 (seq_length=1024) +03/04/2022 17:23:29 - INFO - codeparrot_training - Step 23778: {'lr': 0.00047375936776345297, 'samples': 12174848, 'steps': 23778, 'loss/train': 2.519303798675537} +03/04/2022 17:23:32 - INFO - codeparrot_training - Step 23779: {'lr': 0.00047375700095310136, 'samples': 12175360, 'steps': 23779, 'loss/train': 2.1506848335266113} +03/04/2022 17:23:33 - INFO - codeparrot_training - Skipping example with length 681 (seq_length=1024) +03/04/2022 17:23:37 - INFO - codeparrot_training - Step 23780: {'lr': 0.0004737546340419283, 'samples': 12175872, 'steps': 23780, 'loss/train': 1.461902379989624} +03/04/2022 17:23:40 - INFO - codeparrot_training - Step 23781: {'lr': 0.0004737522670299349, 'samples': 12176384, 'steps': 23781, 'loss/train': 1.752360224723816} +03/04/2022 17:23:41 - INFO - codeparrot_training - Skipping example with length 162 (seq_length=1024) +03/04/2022 17:23:46 - INFO - codeparrot_training - Step 23782: {'lr': 0.00047374989991712214, 'samples': 12176896, 'steps': 23782, 'loss/train': 2.0341153144836426} +03/04/2022 17:23:49 - INFO - codeparrot_training - Step 23783: {'lr': 0.00047374753270349113, 'samples': 12177408, 'steps': 23783, 'loss/train': 1.2026571035385132} +03/04/2022 17:23:50 - INFO - codeparrot_training - Skipping example with length 100 (seq_length=1024) +03/04/2022 17:23:54 - INFO - codeparrot_training - Step 23784: {'lr': 0.00047374516538904287, 'samples': 12177920, 'steps': 23784, 'loss/train': 2.0054843425750732} +03/04/2022 17:23:57 - INFO - codeparrot_training - Step 23785: {'lr': 0.0004737427979737786, 'samples': 12178432, 'steps': 23785, 'loss/train': 1.8798655271530151} +03/04/2022 17:23:58 - INFO - codeparrot_training - Skipping example with length 511 (seq_length=1024) +03/04/2022 17:24:02 - INFO - codeparrot_training - Step 23786: {'lr': 0.0004737404304576992, 'samples': 12178944, 'steps': 23786, 'loss/train': 1.8634406328201294} +03/04/2022 17:24:06 - INFO - codeparrot_training - Step 23787: {'lr': 0.0004737380628408059, 'samples': 12179456, 'steps': 23787, 'loss/train': 2.1125690937042236} +03/04/2022 17:24:06 - INFO - codeparrot_training - Skipping example with length 1004 (seq_length=1024) +03/04/2022 17:24:11 - INFO - codeparrot_training - Step 23788: {'lr': 0.00047373569512309963, 'samples': 12179968, 'steps': 23788, 'loss/train': 1.8610633611679077} +03/04/2022 17:24:14 - INFO - codeparrot_training - Step 23789: {'lr': 0.0004737333273045815, 'samples': 12180480, 'steps': 23789, 'loss/train': 1.2013576030731201} +03/04/2022 17:24:15 - INFO - codeparrot_training - Skipping example with length 316 (seq_length=1024) +03/04/2022 17:24:19 - INFO - codeparrot_training - Step 23790: {'lr': 0.00047373095938525256, 'samples': 12180992, 'steps': 23790, 'loss/train': 2.0161452293395996} +03/04/2022 17:24:22 - INFO - codeparrot_training - Step 23791: {'lr': 0.0004737285913651139, 'samples': 12181504, 'steps': 23791, 'loss/train': 1.1773390769958496} +03/04/2022 17:24:23 - INFO - codeparrot_training - Skipping example with length 126 (seq_length=1024) +03/04/2022 17:24:28 - INFO - codeparrot_training - Step 23792: {'lr': 0.0004737262232441667, 'samples': 12182016, 'steps': 23792, 'loss/train': 1.6415743827819824} +03/04/2022 17:24:31 - INFO - codeparrot_training - Step 23793: {'lr': 0.00047372385502241176, 'samples': 12182528, 'steps': 23793, 'loss/train': 1.8930208683013916} +03/04/2022 17:24:31 - INFO - codeparrot_training - Skipping example with length 742 (seq_length=1024) +03/04/2022 17:24:36 - INFO - codeparrot_training - Step 23794: {'lr': 0.0004737214866998504, 'samples': 12183040, 'steps': 23794, 'loss/train': 1.823832392692566} +03/04/2022 17:24:39 - INFO - codeparrot_training - Step 23795: {'lr': 0.0004737191182764836, 'samples': 12183552, 'steps': 23795, 'loss/train': 2.3182008266448975} +03/04/2022 17:24:40 - INFO - codeparrot_training - Skipping example with length 962 (seq_length=1024) +03/04/2022 17:24:44 - INFO - codeparrot_training - Step 23796: {'lr': 0.0004737167497523124, 'samples': 12184064, 'steps': 23796, 'loss/train': 1.6382757425308228} +03/04/2022 17:24:48 - INFO - codeparrot_training - Step 23797: {'lr': 0.0004737143811273379, 'samples': 12184576, 'steps': 23797, 'loss/train': 1.9215400218963623} +03/04/2022 17:24:48 - INFO - codeparrot_training - Skipping example with length 600 (seq_length=1024) +03/04/2022 17:24:53 - INFO - codeparrot_training - Step 23798: {'lr': 0.0004737120124015611, 'samples': 12185088, 'steps': 23798, 'loss/train': 1.6340798139572144} +03/04/2022 17:24:56 - INFO - codeparrot_training - Step 23799: {'lr': 0.00047370964357498313, 'samples': 12185600, 'steps': 23799, 'loss/train': 2.1025044918060303} +03/04/2022 17:24:56 - INFO - codeparrot_training - Skipping example with length 925 (seq_length=1024) +03/04/2022 17:25:01 - INFO - codeparrot_training - Step 23800: {'lr': 0.0004737072746476051, 'samples': 12186112, 'steps': 23800, 'loss/train': 1.9751176834106445} +03/04/2022 17:25:04 - INFO - codeparrot_training - Step 23801: {'lr': 0.00047370490561942795, 'samples': 12186624, 'steps': 23801, 'loss/train': 2.1546971797943115} +03/04/2022 17:25:05 - INFO - codeparrot_training - Skipping example with length 987 (seq_length=1024) +03/04/2022 17:25:10 - INFO - codeparrot_training - Step 23802: {'lr': 0.00047370253649045286, 'samples': 12187136, 'steps': 23802, 'loss/train': 0.9956389665603638} +03/04/2022 17:25:13 - INFO - codeparrot_training - Step 23803: {'lr': 0.00047370016726068086, 'samples': 12187648, 'steps': 23803, 'loss/train': 1.6071703433990479} +03/04/2022 17:25:13 - INFO - codeparrot_training - Skipping example with length 99 (seq_length=1024) +03/04/2022 17:25:18 - INFO - codeparrot_training - Step 23804: {'lr': 0.000473697797930113, 'samples': 12188160, 'steps': 23804, 'loss/train': 1.7049932479858398} +03/04/2022 17:25:21 - INFO - codeparrot_training - Step 23805: {'lr': 0.00047369542849875037, 'samples': 12188672, 'steps': 23805, 'loss/train': 2.2715654373168945} +03/04/2022 17:25:21 - INFO - codeparrot_training - Skipping example with length 148 (seq_length=1024) +03/04/2022 17:25:26 - INFO - codeparrot_training - Step 23806: {'lr': 0.0004736930589665941, 'samples': 12189184, 'steps': 23806, 'loss/train': 1.4308083057403564} +03/04/2022 17:25:30 - INFO - codeparrot_training - Step 23807: {'lr': 0.0004736906893336451, 'samples': 12189696, 'steps': 23807, 'loss/train': 2.0052084922790527} +03/04/2022 17:25:30 - INFO - codeparrot_training - Skipping example with length 968 (seq_length=1024) +03/04/2022 17:25:35 - INFO - codeparrot_training - Step 23808: {'lr': 0.00047368831959990453, 'samples': 12190208, 'steps': 23808, 'loss/train': 1.7973670959472656} +03/04/2022 17:25:38 - INFO - codeparrot_training - Step 23809: {'lr': 0.0004736859497653735, 'samples': 12190720, 'steps': 23809, 'loss/train': 0.4463362991809845} +03/04/2022 17:25:39 - INFO - codeparrot_training - Skipping example with length 373 (seq_length=1024) +03/04/2022 17:25:44 - INFO - codeparrot_training - Step 23810: {'lr': 0.0004736835798300531, 'samples': 12191232, 'steps': 23810, 'loss/train': 1.3963499069213867} +03/04/2022 17:25:47 - INFO - codeparrot_training - Step 23811: {'lr': 0.00047368120979394415, 'samples': 12191744, 'steps': 23811, 'loss/train': 1.2847224473953247} +03/04/2022 17:25:48 - INFO - codeparrot_training - Skipping example with length 704 (seq_length=1024) +03/04/2022 17:25:52 - INFO - codeparrot_training - Step 23812: {'lr': 0.000473678839657048, 'samples': 12192256, 'steps': 23812, 'loss/train': 1.5554486513137817} +03/04/2022 17:25:56 - INFO - codeparrot_training - Step 23813: {'lr': 0.0004736764694193656, 'samples': 12192768, 'steps': 23813, 'loss/train': 2.592989921569824} +03/04/2022 17:25:57 - INFO - codeparrot_training - Skipping example with length 785 (seq_length=1024) +03/04/2022 17:26:01 - INFO - codeparrot_training - Step 23814: {'lr': 0.0004736740990808981, 'samples': 12193280, 'steps': 23814, 'loss/train': 1.992108702659607} +03/04/2022 17:26:04 - INFO - codeparrot_training - Step 23815: {'lr': 0.0004736717286416464, 'samples': 12193792, 'steps': 23815, 'loss/train': 0.7423959374427795} +03/04/2022 17:26:06 - INFO - codeparrot_training - Skipping example with length 280 (seq_length=1024) +03/04/2022 17:26:09 - INFO - codeparrot_training - Step 23816: {'lr': 0.0004736693581016117, 'samples': 12194304, 'steps': 23816, 'loss/train': 0.14589422941207886} +03/04/2022 17:26:13 - INFO - codeparrot_training - Step 23817: {'lr': 0.00047366698746079507, 'samples': 12194816, 'steps': 23817, 'loss/train': 1.6043869256973267} +03/04/2022 17:26:14 - INFO - codeparrot_training - Skipping example with length 775 (seq_length=1024) +03/04/2022 17:26:18 - INFO - codeparrot_training - Step 23818: {'lr': 0.0004736646167191975, 'samples': 12195328, 'steps': 23818, 'loss/train': 1.9798797369003296} +03/04/2022 17:26:21 - INFO - codeparrot_training - Step 23819: {'lr': 0.00047366224587682017, 'samples': 12195840, 'steps': 23819, 'loss/train': 2.308527708053589} +03/04/2022 17:26:23 - INFO - codeparrot_training - Skipping example with length 591 (seq_length=1024) +03/04/2022 17:26:26 - INFO - codeparrot_training - Step 23820: {'lr': 0.000473659874933664, 'samples': 12196352, 'steps': 23820, 'loss/train': 1.7681331634521484} +03/04/2022 17:26:30 - INFO - codeparrot_training - Step 23821: {'lr': 0.0004736575038897303, 'samples': 12196864, 'steps': 23821, 'loss/train': 1.3930654525756836} +03/04/2022 17:26:31 - INFO - codeparrot_training - Skipping example with length 259 (seq_length=1024) +03/04/2022 17:26:35 - INFO - codeparrot_training - Step 23822: {'lr': 0.0004736551327450198, 'samples': 12197376, 'steps': 23822, 'loss/train': 1.3321549892425537} +03/04/2022 17:26:38 - INFO - codeparrot_training - Step 23823: {'lr': 0.00047365276149953387, 'samples': 12197888, 'steps': 23823, 'loss/train': 1.489685297012329} +03/04/2022 17:26:39 - INFO - codeparrot_training - Skipping example with length 450 (seq_length=1024) +03/04/2022 17:26:43 - INFO - codeparrot_training - Step 23824: {'lr': 0.0004736503901532734, 'samples': 12198400, 'steps': 23824, 'loss/train': 1.849236249923706} +03/04/2022 17:26:46 - INFO - codeparrot_training - Step 23825: {'lr': 0.00047364801870623954, 'samples': 12198912, 'steps': 23825, 'loss/train': 1.9351019859313965} +03/04/2022 17:26:48 - INFO - codeparrot_training - Skipping example with length 298 (seq_length=1024) +03/04/2022 17:26:52 - INFO - codeparrot_training - Step 23826: {'lr': 0.00047364564715843326, 'samples': 12199424, 'steps': 23826, 'loss/train': 2.2503044605255127} +03/04/2022 17:26:55 - INFO - codeparrot_training - Step 23827: {'lr': 0.00047364327550985575, 'samples': 12199936, 'steps': 23827, 'loss/train': 1.9199212789535522} +03/04/2022 17:26:56 - INFO - codeparrot_training - Skipping example with length 268 (seq_length=1024) +03/04/2022 17:27:01 - INFO - codeparrot_training - Step 23828: {'lr': 0.00047364090376050805, 'samples': 12200448, 'steps': 23828, 'loss/train': 2.0211966037750244} +03/04/2022 17:27:04 - INFO - codeparrot_training - Step 23829: {'lr': 0.0004736385319103912, 'samples': 12200960, 'steps': 23829, 'loss/train': 1.7986865043640137} +03/04/2022 17:27:06 - INFO - codeparrot_training - Skipping example with length 969 (seq_length=1024) +03/04/2022 17:27:09 - INFO - codeparrot_training - Step 23830: {'lr': 0.00047363615995950624, 'samples': 12201472, 'steps': 23830, 'loss/train': 1.3328986167907715} +03/04/2022 17:27:12 - INFO - codeparrot_training - Step 23831: {'lr': 0.0004736337879078544, 'samples': 12201984, 'steps': 23831, 'loss/train': 1.6971014738082886} +03/04/2022 17:27:14 - INFO - codeparrot_training - Skipping example with length 704 (seq_length=1024) +03/04/2022 17:27:18 - INFO - codeparrot_training - Step 23832: {'lr': 0.0004736314157554365, 'samples': 12202496, 'steps': 23832, 'loss/train': 1.5707414150238037} +03/04/2022 17:27:21 - INFO - codeparrot_training - Step 23833: {'lr': 0.00047362904350225376, 'samples': 12203008, 'steps': 23833, 'loss/train': 0.6342618465423584} +03/04/2022 17:27:23 - INFO - codeparrot_training - Skipping example with length 882 (seq_length=1024) +03/04/2022 17:27:26 - INFO - codeparrot_training - Step 23834: {'lr': 0.0004736266711483073, 'samples': 12203520, 'steps': 23834, 'loss/train': 1.6019678115844727} +03/04/2022 17:27:29 - INFO - codeparrot_training - Step 23835: {'lr': 0.00047362429869359803, 'samples': 12204032, 'steps': 23835, 'loss/train': 2.322605609893799} +03/04/2022 17:27:31 - INFO - codeparrot_training - Skipping example with length 904 (seq_length=1024) +03/04/2022 17:27:34 - INFO - codeparrot_training - Step 23836: {'lr': 0.0004736219261381271, 'samples': 12204544, 'steps': 23836, 'loss/train': 0.500464141368866} +03/04/2022 17:27:38 - INFO - codeparrot_training - Step 23837: {'lr': 0.0004736195534818956, 'samples': 12205056, 'steps': 23837, 'loss/train': 1.456661343574524} +03/04/2022 17:27:40 - INFO - codeparrot_training - Skipping example with length 1012 (seq_length=1024) +03/04/2022 17:27:43 - INFO - codeparrot_training - Step 23838: {'lr': 0.00047361718072490457, 'samples': 12205568, 'steps': 23838, 'loss/train': 1.9324203729629517} +03/04/2022 17:27:46 - INFO - codeparrot_training - Step 23839: {'lr': 0.00047361480786715514, 'samples': 12206080, 'steps': 23839, 'loss/train': 1.3988748788833618} +03/04/2022 17:27:48 - INFO - codeparrot_training - Skipping example with length 218 (seq_length=1024) +03/04/2022 17:27:51 - INFO - codeparrot_training - Step 23840: {'lr': 0.00047361243490864826, 'samples': 12206592, 'steps': 23840, 'loss/train': 1.4323186874389648} +03/04/2022 17:27:54 - INFO - codeparrot_training - Step 23841: {'lr': 0.00047361006184938517, 'samples': 12207104, 'steps': 23841, 'loss/train': 1.8228693008422852} +03/04/2022 17:27:56 - INFO - codeparrot_training - Skipping example with length 211 (seq_length=1024) +03/04/2022 17:28:00 - INFO - codeparrot_training - Step 23842: {'lr': 0.00047360768868936673, 'samples': 12207616, 'steps': 23842, 'loss/train': 1.984322190284729} +03/04/2022 17:28:03 - INFO - codeparrot_training - Step 23843: {'lr': 0.00047360531542859415, 'samples': 12208128, 'steps': 23843, 'loss/train': 1.494211196899414} +03/04/2022 17:28:05 - INFO - codeparrot_training - Skipping example with length 17 (seq_length=1024) +03/04/2022 17:28:08 - INFO - codeparrot_training - Step 23844: {'lr': 0.00047360294206706845, 'samples': 12208640, 'steps': 23844, 'loss/train': 1.333129644393921} +03/04/2022 17:28:11 - INFO - codeparrot_training - Step 23845: {'lr': 0.0004736005686047907, 'samples': 12209152, 'steps': 23845, 'loss/train': 0.9041134715080261} +03/04/2022 17:28:13 - INFO - codeparrot_training - Skipping example with length 581 (seq_length=1024) +03/04/2022 17:28:17 - INFO - codeparrot_training - Step 23846: {'lr': 0.000473598195041762, 'samples': 12209664, 'steps': 23846, 'loss/train': 1.2231093645095825} +03/04/2022 17:28:20 - INFO - codeparrot_training - Step 23847: {'lr': 0.0004735958213779835, 'samples': 12210176, 'steps': 23847, 'loss/train': 0.9231230020523071} +03/04/2022 17:28:22 - INFO - codeparrot_training - Skipping example with length 207 (seq_length=1024) +03/04/2022 17:28:25 - INFO - codeparrot_training - Step 23848: {'lr': 0.0004735934476134561, 'samples': 12210688, 'steps': 23848, 'loss/train': 0.6269928812980652} +03/04/2022 17:28:28 - INFO - codeparrot_training - Step 23849: {'lr': 0.0004735910737481809, 'samples': 12211200, 'steps': 23849, 'loss/train': 2.0149922370910645} +03/04/2022 17:28:30 - INFO - codeparrot_training - Skipping example with length 968 (seq_length=1024) +03/04/2022 17:28:34 - INFO - codeparrot_training - Step 23850: {'lr': 0.0004735886997821591, 'samples': 12211712, 'steps': 23850, 'loss/train': 1.9665768146514893} +03/04/2022 17:28:37 - INFO - codeparrot_training - Step 23851: {'lr': 0.00047358632571539163, 'samples': 12212224, 'steps': 23851, 'loss/train': 1.7910634279251099} +03/04/2022 17:28:39 - INFO - codeparrot_training - Skipping example with length 775 (seq_length=1024) +03/04/2022 17:28:42 - INFO - codeparrot_training - Step 23852: {'lr': 0.0004735839515478796, 'samples': 12212736, 'steps': 23852, 'loss/train': 1.2872527837753296} +03/04/2022 17:28:45 - INFO - codeparrot_training - Step 23853: {'lr': 0.0004735815772796241, 'samples': 12213248, 'steps': 23853, 'loss/train': 2.1632232666015625} +03/04/2022 17:28:47 - INFO - codeparrot_training - Skipping example with length 169 (seq_length=1024) +03/04/2022 17:28:50 - INFO - codeparrot_training - Step 23854: {'lr': 0.0004735792029106262, 'samples': 12213760, 'steps': 23854, 'loss/train': 1.5957587957382202} +03/04/2022 17:28:54 - INFO - codeparrot_training - Step 23855: {'lr': 0.0004735768284408869, 'samples': 12214272, 'steps': 23855, 'loss/train': 2.112071990966797} +03/04/2022 17:28:59 - INFO - codeparrot_training - Step 23856: {'lr': 0.00047357445387040745, 'samples': 12214784, 'steps': 23856, 'loss/train': 1.7045769691467285} +03/04/2022 17:29:02 - INFO - codeparrot_training - Step 23857: {'lr': 0.0004735720791991887, 'samples': 12215296, 'steps': 23857, 'loss/train': 1.773215651512146} +03/04/2022 17:29:03 - INFO - codeparrot_training - Skipping example with length 666 (seq_length=1024) +03/04/2022 17:29:07 - INFO - codeparrot_training - Step 23858: {'lr': 0.00047356970442723184, 'samples': 12215808, 'steps': 23858, 'loss/train': 1.8741148710250854} +03/04/2022 17:29:10 - INFO - codeparrot_training - Step 23859: {'lr': 0.00047356732955453794, 'samples': 12216320, 'steps': 23859, 'loss/train': 0.4049205780029297} +03/04/2022 17:29:12 - INFO - codeparrot_training - Skipping example with length 440 (seq_length=1024) +03/04/2022 17:29:16 - INFO - codeparrot_training - Step 23860: {'lr': 0.00047356495458110806, 'samples': 12216832, 'steps': 23860, 'loss/train': 1.4999581575393677} +03/04/2022 17:29:19 - INFO - codeparrot_training - Step 23861: {'lr': 0.00047356257950694326, 'samples': 12217344, 'steps': 23861, 'loss/train': 1.5400173664093018} +03/04/2022 17:29:21 - INFO - codeparrot_training - Skipping example with length 977 (seq_length=1024) +03/04/2022 17:29:24 - INFO - codeparrot_training - Step 23862: {'lr': 0.0004735602043320446, 'samples': 12217856, 'steps': 23862, 'loss/train': 3.077800750732422} +03/04/2022 17:29:27 - INFO - codeparrot_training - Step 23863: {'lr': 0.0004735578290564132, 'samples': 12218368, 'steps': 23863, 'loss/train': 1.6511355638504028} +03/04/2022 17:29:29 - INFO - codeparrot_training - Skipping example with length 887 (seq_length=1024) +03/04/2022 17:29:33 - INFO - codeparrot_training - Step 23864: {'lr': 0.00047355545368005003, 'samples': 12218880, 'steps': 23864, 'loss/train': 1.6906017065048218} +03/04/2022 17:29:36 - INFO - codeparrot_training - Step 23865: {'lr': 0.00047355307820295625, 'samples': 12219392, 'steps': 23865, 'loss/train': 2.152430772781372} +03/04/2022 17:29:38 - INFO - codeparrot_training - Skipping example with length 859 (seq_length=1024) +03/04/2022 17:29:41 - INFO - codeparrot_training - Step 23866: {'lr': 0.00047355070262513287, 'samples': 12219904, 'steps': 23866, 'loss/train': 1.952852725982666} +03/04/2022 17:29:44 - INFO - codeparrot_training - Step 23867: {'lr': 0.00047354832694658104, 'samples': 12220416, 'steps': 23867, 'loss/train': 1.8325508832931519} +03/04/2022 17:29:46 - INFO - codeparrot_training - Skipping example with length 432 (seq_length=1024) +03/04/2022 17:29:49 - INFO - codeparrot_training - Step 23868: {'lr': 0.0004735459511673018, 'samples': 12220928, 'steps': 23868, 'loss/train': 2.407017946243286} +03/04/2022 17:29:53 - INFO - codeparrot_training - Step 23869: {'lr': 0.0004735435752872962, 'samples': 12221440, 'steps': 23869, 'loss/train': 1.8293453454971313} +03/04/2022 17:29:55 - INFO - codeparrot_training - Skipping example with length 668 (seq_length=1024) +03/04/2022 17:29:58 - INFO - codeparrot_training - Step 23870: {'lr': 0.00047354119930656524, 'samples': 12221952, 'steps': 23870, 'loss/train': 1.750207781791687} +03/04/2022 17:30:01 - INFO - codeparrot_training - Step 23871: {'lr': 0.0004735388232251101, 'samples': 12222464, 'steps': 23871, 'loss/train': 2.172213077545166} +03/04/2022 17:30:03 - INFO - codeparrot_training - Skipping example with length 797 (seq_length=1024) +03/04/2022 17:30:06 - INFO - codeparrot_training - Step 23872: {'lr': 0.00047353644704293185, 'samples': 12222976, 'steps': 23872, 'loss/train': 3.1647231578826904} +03/04/2022 17:30:09 - INFO - codeparrot_training - Step 23873: {'lr': 0.0004735340707600315, 'samples': 12223488, 'steps': 23873, 'loss/train': 1.7137821912765503} +03/04/2022 17:30:11 - INFO - codeparrot_training - Skipping example with length 932 (seq_length=1024) +03/04/2022 17:30:15 - INFO - codeparrot_training - Step 23874: {'lr': 0.0004735316943764102, 'samples': 12224000, 'steps': 23874, 'loss/train': 0.6968318819999695} +03/04/2022 17:30:18 - INFO - codeparrot_training - Step 23875: {'lr': 0.0004735293178920689, 'samples': 12224512, 'steps': 23875, 'loss/train': 1.5216295719146729} +03/04/2022 17:30:20 - INFO - codeparrot_training - Skipping example with length 780 (seq_length=1024) +03/04/2022 17:30:23 - INFO - codeparrot_training - Step 23876: {'lr': 0.00047352694130700873, 'samples': 12225024, 'steps': 23876, 'loss/train': 2.2832508087158203} +03/04/2022 17:30:26 - INFO - codeparrot_training - Step 23877: {'lr': 0.00047352456462123086, 'samples': 12225536, 'steps': 23877, 'loss/train': 1.9380419254302979} +03/04/2022 17:30:28 - INFO - codeparrot_training - Skipping example with length 795 (seq_length=1024) +03/04/2022 17:30:32 - INFO - codeparrot_training - Step 23878: {'lr': 0.00047352218783473614, 'samples': 12226048, 'steps': 23878, 'loss/train': 1.7024190425872803} +03/04/2022 17:30:35 - INFO - codeparrot_training - Step 23879: {'lr': 0.0004735198109475258, 'samples': 12226560, 'steps': 23879, 'loss/train': 0.7260159850120544} +03/04/2022 17:30:37 - INFO - codeparrot_training - Skipping example with length 278 (seq_length=1024) +03/04/2022 17:30:40 - INFO - codeparrot_training - Step 23880: {'lr': 0.000473517433959601, 'samples': 12227072, 'steps': 23880, 'loss/train': 1.4091675281524658} +03/04/2022 17:30:43 - INFO - codeparrot_training - Step 23881: {'lr': 0.00047351505687096257, 'samples': 12227584, 'steps': 23881, 'loss/train': 0.33322060108184814} +03/04/2022 17:30:45 - INFO - codeparrot_training - Skipping example with length 256 (seq_length=1024) +03/04/2022 17:30:48 - INFO - codeparrot_training - Step 23882: {'lr': 0.00047351267968161176, 'samples': 12228096, 'steps': 23882, 'loss/train': 2.1897242069244385} +03/04/2022 17:30:51 - INFO - codeparrot_training - Step 23883: {'lr': 0.0004735103023915496, 'samples': 12228608, 'steps': 23883, 'loss/train': 2.036616325378418} +03/04/2022 17:30:53 - INFO - codeparrot_training - Skipping example with length 217 (seq_length=1024) +03/04/2022 17:30:57 - INFO - codeparrot_training - Step 23884: {'lr': 0.0004735079250007771, 'samples': 12229120, 'steps': 23884, 'loss/train': 1.09048593044281} +03/04/2022 17:31:00 - INFO - codeparrot_training - Step 23885: {'lr': 0.00047350554750929543, 'samples': 12229632, 'steps': 23885, 'loss/train': 1.5406955480575562} +03/04/2022 17:31:02 - INFO - codeparrot_training - Skipping example with length 780 (seq_length=1024) +03/04/2022 17:31:05 - INFO - codeparrot_training - Step 23886: {'lr': 0.0004735031699171055, 'samples': 12230144, 'steps': 23886, 'loss/train': 1.7726279497146606} +03/04/2022 17:31:08 - INFO - codeparrot_training - Step 23887: {'lr': 0.0004735007922242086, 'samples': 12230656, 'steps': 23887, 'loss/train': 2.173550844192505} +03/04/2022 17:31:11 - INFO - codeparrot_training - Skipping example with length 735 (seq_length=1024) +03/04/2022 17:31:14 - INFO - codeparrot_training - Step 23888: {'lr': 0.0004734984144306057, 'samples': 12231168, 'steps': 23888, 'loss/train': 0.677206814289093} +03/04/2022 17:31:17 - INFO - codeparrot_training - Step 23889: {'lr': 0.0004734960365362978, 'samples': 12231680, 'steps': 23889, 'loss/train': 2.071901559829712} +03/04/2022 17:31:19 - INFO - codeparrot_training - Skipping example with length 47 (seq_length=1024) +03/04/2022 17:31:22 - INFO - codeparrot_training - Step 23890: {'lr': 0.0004734936585412861, 'samples': 12232192, 'steps': 23890, 'loss/train': 2.3489248752593994} +03/04/2022 17:31:25 - INFO - codeparrot_training - Step 23891: {'lr': 0.00047349128044557153, 'samples': 12232704, 'steps': 23891, 'loss/train': 1.4023581743240356} +03/04/2022 17:31:27 - INFO - codeparrot_training - Skipping example with length 951 (seq_length=1024) +03/04/2022 17:31:31 - INFO - codeparrot_training - Step 23892: {'lr': 0.0004734889022491553, 'samples': 12233216, 'steps': 23892, 'loss/train': 1.9757378101348877} +03/04/2022 17:31:34 - INFO - codeparrot_training - Step 23893: {'lr': 0.0004734865239520384, 'samples': 12233728, 'steps': 23893, 'loss/train': 1.0370274782180786} +03/04/2022 17:31:36 - INFO - codeparrot_training - Skipping example with length 61 (seq_length=1024) +03/04/2022 17:31:39 - INFO - codeparrot_training - Step 23894: {'lr': 0.0004734841455542219, 'samples': 12234240, 'steps': 23894, 'loss/train': 0.3951893150806427} +03/04/2022 17:31:42 - INFO - codeparrot_training - Step 23895: {'lr': 0.0004734817670557069, 'samples': 12234752, 'steps': 23895, 'loss/train': 2.115211248397827} +03/04/2022 17:31:44 - INFO - codeparrot_training - Skipping example with length 965 (seq_length=1024) +03/04/2022 17:31:48 - INFO - codeparrot_training - Step 23896: {'lr': 0.00047347938845649447, 'samples': 12235264, 'steps': 23896, 'loss/train': 1.919592261314392} +03/04/2022 17:31:51 - INFO - codeparrot_training - Step 23897: {'lr': 0.0004734770097565857, 'samples': 12235776, 'steps': 23897, 'loss/train': 2.262985944747925} +03/04/2022 17:31:53 - INFO - codeparrot_training - Skipping example with length 859 (seq_length=1024) +03/04/2022 17:31:56 - INFO - codeparrot_training - Step 23898: {'lr': 0.00047347463095598157, 'samples': 12236288, 'steps': 23898, 'loss/train': 1.5039560794830322} +03/04/2022 17:31:59 - INFO - codeparrot_training - Step 23899: {'lr': 0.00047347225205468323, 'samples': 12236800, 'steps': 23899, 'loss/train': 2.096666097640991} +03/04/2022 17:32:01 - INFO - codeparrot_training - Skipping example with length 938 (seq_length=1024) +03/04/2022 17:32:05 - INFO - codeparrot_training - Step 23900: {'lr': 0.00047346987305269184, 'samples': 12237312, 'steps': 23900, 'loss/train': 2.646045207977295} +03/04/2022 17:32:08 - INFO - codeparrot_training - Step 23901: {'lr': 0.0004734674939500083, 'samples': 12237824, 'steps': 23901, 'loss/train': 1.7790571451187134} +03/04/2022 17:32:09 - INFO - codeparrot_training - Skipping example with length 742 (seq_length=1024) +03/04/2022 17:32:13 - INFO - codeparrot_training - Step 23902: {'lr': 0.0004734651147466338, 'samples': 12238336, 'steps': 23902, 'loss/train': 1.5829920768737793} +03/04/2022 17:32:16 - INFO - codeparrot_training - Step 23903: {'lr': 0.00047346273544256927, 'samples': 12238848, 'steps': 23903, 'loss/train': 1.910275936126709} +03/04/2022 17:32:18 - INFO - codeparrot_training - Skipping example with length 964 (seq_length=1024) +03/04/2022 17:32:21 - INFO - codeparrot_training - Step 23904: {'lr': 0.00047346035603781597, 'samples': 12239360, 'steps': 23904, 'loss/train': 1.4436780214309692} +03/04/2022 17:32:25 - INFO - codeparrot_training - Step 23905: {'lr': 0.00047345797653237486, 'samples': 12239872, 'steps': 23905, 'loss/train': 1.614989995956421} +03/04/2022 17:32:26 - INFO - codeparrot_training - Skipping example with length 146 (seq_length=1024) +03/04/2022 17:32:30 - INFO - codeparrot_training - Step 23906: {'lr': 0.000473455596926247, 'samples': 12240384, 'steps': 23906, 'loss/train': 0.2401173859834671} +03/04/2022 17:32:33 - INFO - codeparrot_training - Step 23907: {'lr': 0.0004734532172194335, 'samples': 12240896, 'steps': 23907, 'loss/train': 1.7380670309066772} +03/04/2022 17:32:35 - INFO - codeparrot_training - Skipping example with length 306 (seq_length=1024) +03/04/2022 17:32:38 - INFO - codeparrot_training - Step 23908: {'lr': 0.0004734508374119355, 'samples': 12241408, 'steps': 23908, 'loss/train': 1.709637999534607} +03/04/2022 17:32:42 - INFO - codeparrot_training - Step 23909: {'lr': 0.0004734484575037539, 'samples': 12241920, 'steps': 23909, 'loss/train': 1.2764054536819458} +03/04/2022 17:32:44 - INFO - codeparrot_training - Skipping example with length 798 (seq_length=1024) +03/04/2022 17:32:47 - INFO - codeparrot_training - Step 23910: {'lr': 0.00047344607749489, 'samples': 12242432, 'steps': 23910, 'loss/train': 2.9722962379455566} +03/04/2022 17:32:50 - INFO - codeparrot_training - Step 23911: {'lr': 0.00047344369738534466, 'samples': 12242944, 'steps': 23911, 'loss/train': 2.166679620742798} +03/04/2022 17:32:53 - INFO - codeparrot_training - Skipping example with length 653 (seq_length=1024) +03/04/2022 17:32:55 - INFO - codeparrot_training - Step 23912: {'lr': 0.000473441317175119, 'samples': 12243456, 'steps': 23912, 'loss/train': 1.763956069946289} +03/04/2022 17:32:59 - INFO - codeparrot_training - Step 23913: {'lr': 0.0004734389368642142, 'samples': 12243968, 'steps': 23913, 'loss/train': 1.8178014755249023} +03/04/2022 17:33:01 - INFO - codeparrot_training - Skipping example with length 820 (seq_length=1024) +03/04/2022 17:33:04 - INFO - codeparrot_training - Step 23914: {'lr': 0.0004734365564526313, 'samples': 12244480, 'steps': 23914, 'loss/train': 1.9510051012039185} +03/04/2022 17:33:07 - INFO - codeparrot_training - Step 23915: {'lr': 0.00047343417594037117, 'samples': 12244992, 'steps': 23915, 'loss/train': 1.7640701532363892} +03/04/2022 17:33:10 - INFO - codeparrot_training - Skipping example with length 828 (seq_length=1024) +03/04/2022 17:33:12 - INFO - codeparrot_training - Step 23916: {'lr': 0.00047343179532743516, 'samples': 12245504, 'steps': 23916, 'loss/train': 1.808117389678955} +03/04/2022 17:33:16 - INFO - codeparrot_training - Step 23917: {'lr': 0.00047342941461382427, 'samples': 12246016, 'steps': 23917, 'loss/train': 1.5267900228500366} +03/04/2022 17:33:18 - INFO - codeparrot_training - Skipping example with length 743 (seq_length=1024) +03/04/2022 17:33:21 - INFO - codeparrot_training - Step 23918: {'lr': 0.0004734270337995395, 'samples': 12246528, 'steps': 23918, 'loss/train': 1.9189980030059814} +03/04/2022 17:33:24 - INFO - codeparrot_training - Step 23919: {'lr': 0.0004734246528845819, 'samples': 12247040, 'steps': 23919, 'loss/train': 1.418204665184021} +03/04/2022 17:33:27 - INFO - codeparrot_training - Skipping example with length 101 (seq_length=1024) +03/04/2022 17:33:29 - INFO - codeparrot_training - Step 23920: {'lr': 0.0004734222718689527, 'samples': 12247552, 'steps': 23920, 'loss/train': 1.419854998588562} +03/04/2022 17:33:32 - INFO - codeparrot_training - Step 23921: {'lr': 0.0004734198907526528, 'samples': 12248064, 'steps': 23921, 'loss/train': 1.628233551979065} +03/04/2022 17:33:36 - INFO - codeparrot_training - Step 23922: {'lr': 0.00047341750953568335, 'samples': 12248576, 'steps': 23922, 'loss/train': 2.187562942504883} +03/04/2022 17:33:36 - INFO - codeparrot_training - Skipping example with length 528 (seq_length=1024) +03/04/2022 17:33:41 - INFO - codeparrot_training - Step 23923: {'lr': 0.0004734151282180454, 'samples': 12249088, 'steps': 23923, 'loss/train': 1.1269090175628662} +03/04/2022 17:33:44 - INFO - codeparrot_training - Step 23924: {'lr': 0.0004734127467997401, 'samples': 12249600, 'steps': 23924, 'loss/train': 1.4481438398361206} +03/04/2022 17:33:44 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/04/2022 17:33:50 - INFO - codeparrot_training - Step 23925: {'lr': 0.0004734103652807684, 'samples': 12250112, 'steps': 23925, 'loss/train': 2.157829761505127} +03/04/2022 17:33:53 - INFO - codeparrot_training - Step 23926: {'lr': 0.0004734079836611315, 'samples': 12250624, 'steps': 23926, 'loss/train': 2.034489393234253} +03/04/2022 17:33:53 - INFO - codeparrot_training - Skipping example with length 25 (seq_length=1024) +03/04/2022 17:33:58 - INFO - codeparrot_training - Step 23927: {'lr': 0.0004734056019408304, 'samples': 12251136, 'steps': 23927, 'loss/train': 1.8642380237579346} +03/04/2022 17:34:01 - INFO - codeparrot_training - Step 23928: {'lr': 0.00047340322011986614, 'samples': 12251648, 'steps': 23928, 'loss/train': 2.344905376434326} +03/04/2022 17:34:01 - INFO - codeparrot_training - Skipping example with length 633 (seq_length=1024) +03/04/2022 17:34:07 - INFO - codeparrot_training - Step 23929: {'lr': 0.0004734008381982399, 'samples': 12252160, 'steps': 23929, 'loss/train': 1.4171013832092285} +03/04/2022 17:34:10 - INFO - codeparrot_training - Step 23930: {'lr': 0.0004733984561759527, 'samples': 12252672, 'steps': 23930, 'loss/train': 1.8908283710479736} +03/04/2022 17:34:10 - INFO - codeparrot_training - Skipping example with length 204 (seq_length=1024) +03/04/2022 17:34:15 - INFO - codeparrot_training - Step 23931: {'lr': 0.0004733960740530055, 'samples': 12253184, 'steps': 23931, 'loss/train': 1.677435040473938} +03/04/2022 17:34:18 - INFO - codeparrot_training - Step 23932: {'lr': 0.0004733936918293995, 'samples': 12253696, 'steps': 23932, 'loss/train': 0.3762935698032379} +03/04/2022 17:34:18 - INFO - codeparrot_training - Skipping example with length 826 (seq_length=1024) +03/04/2022 17:34:24 - INFO - codeparrot_training - Step 23933: {'lr': 0.0004733913095051358, 'samples': 12254208, 'steps': 23933, 'loss/train': 2.461766004562378} +03/04/2022 17:34:27 - INFO - codeparrot_training - Skipping example with length 470 (seq_length=1024) +03/04/2022 17:34:29 - INFO - codeparrot_training - Step 23934: {'lr': 0.0004733889270802154, 'samples': 12254720, 'steps': 23934, 'loss/train': 1.6262305974960327} +03/04/2022 17:34:32 - INFO - codeparrot_training - Step 23935: {'lr': 0.00047338654455463935, 'samples': 12255232, 'steps': 23935, 'loss/train': 1.2580357789993286} +03/04/2022 17:34:36 - INFO - codeparrot_training - Step 23936: {'lr': 0.00047338416192840887, 'samples': 12255744, 'steps': 23936, 'loss/train': 0.8711478114128113} +03/04/2022 17:34:36 - INFO - codeparrot_training - Skipping example with length 848 (seq_length=1024) +03/04/2022 17:34:41 - INFO - codeparrot_training - Step 23937: {'lr': 0.0004733817792015249, 'samples': 12256256, 'steps': 23937, 'loss/train': 2.540367364883423} +03/04/2022 17:34:44 - INFO - codeparrot_training - Step 23938: {'lr': 0.00047337939637398855, 'samples': 12256768, 'steps': 23938, 'loss/train': 2.43109393119812} +03/04/2022 17:34:44 - INFO - codeparrot_training - Skipping example with length 236 (seq_length=1024) +03/04/2022 17:34:49 - INFO - codeparrot_training - Step 23939: {'lr': 0.0004733770134458009, 'samples': 12257280, 'steps': 23939, 'loss/train': 1.8598753213882446} +03/04/2022 17:34:52 - INFO - codeparrot_training - Step 23940: {'lr': 0.0004733746304169629, 'samples': 12257792, 'steps': 23940, 'loss/train': 1.7228165864944458} +03/04/2022 17:34:53 - INFO - codeparrot_training - Skipping example with length 392 (seq_length=1024) +03/04/2022 17:34:58 - INFO - codeparrot_training - Step 23941: {'lr': 0.0004733722472874759, 'samples': 12258304, 'steps': 23941, 'loss/train': 1.557556390762329} +03/04/2022 17:35:01 - INFO - codeparrot_training - Step 23942: {'lr': 0.0004733698640573407, 'samples': 12258816, 'steps': 23942, 'loss/train': 0.5993305444717407} +03/04/2022 17:35:01 - INFO - codeparrot_training - Skipping example with length 152 (seq_length=1024) +03/04/2022 17:35:06 - INFO - codeparrot_training - Step 23943: {'lr': 0.0004733674807265585, 'samples': 12259328, 'steps': 23943, 'loss/train': 1.4512182474136353} +03/04/2022 17:35:09 - INFO - codeparrot_training - Step 23944: {'lr': 0.0004733650972951304, 'samples': 12259840, 'steps': 23944, 'loss/train': 2.135054349899292} +03/04/2022 17:35:10 - INFO - codeparrot_training - Skipping example with length 367 (seq_length=1024) +03/04/2022 17:35:15 - INFO - codeparrot_training - Step 23945: {'lr': 0.0004733627137630574, 'samples': 12260352, 'steps': 23945, 'loss/train': 2.1388466358184814} +03/04/2022 17:35:18 - INFO - codeparrot_training - Step 23946: {'lr': 0.00047336033013034063, 'samples': 12260864, 'steps': 23946, 'loss/train': 1.8357486724853516} +03/04/2022 17:35:18 - INFO - codeparrot_training - Skipping example with length 52 (seq_length=1024) +03/04/2022 17:35:23 - INFO - codeparrot_training - Step 23947: {'lr': 0.00047335794639698117, 'samples': 12261376, 'steps': 23947, 'loss/train': 1.9736883640289307} +03/04/2022 17:35:26 - INFO - codeparrot_training - Step 23948: {'lr': 0.00047335556256298, 'samples': 12261888, 'steps': 23948, 'loss/train': 1.2310667037963867} +03/04/2022 17:35:26 - INFO - codeparrot_training - Skipping example with length 779 (seq_length=1024) +03/04/2022 17:35:32 - INFO - codeparrot_training - Step 23949: {'lr': 0.0004733531786283383, 'samples': 12262400, 'steps': 23949, 'loss/train': 1.3384042978286743} +03/04/2022 17:35:35 - INFO - codeparrot_training - Step 23950: {'lr': 0.0004733507945930571, 'samples': 12262912, 'steps': 23950, 'loss/train': 1.9888116121292114} +03/04/2022 17:35:35 - INFO - codeparrot_training - Skipping example with length 531 (seq_length=1024) +03/04/2022 17:35:40 - INFO - codeparrot_training - Step 23951: {'lr': 0.0004733484104571375, 'samples': 12263424, 'steps': 23951, 'loss/train': 1.578676700592041} +03/04/2022 17:35:43 - INFO - codeparrot_training - Step 23952: {'lr': 0.0004733460262205805, 'samples': 12263936, 'steps': 23952, 'loss/train': 1.6494693756103516} +03/04/2022 17:35:43 - INFO - codeparrot_training - Skipping example with length 157 (seq_length=1024) +03/04/2022 17:35:49 - INFO - codeparrot_training - Step 23953: {'lr': 0.00047334364188338725, 'samples': 12264448, 'steps': 23953, 'loss/train': 2.2031571865081787} +03/04/2022 17:35:51 - INFO - codeparrot_training - Skipping example with length 1006 (seq_length=1024) +03/04/2022 17:35:54 - INFO - codeparrot_training - Step 23954: {'lr': 0.0004733412574455588, 'samples': 12264960, 'steps': 23954, 'loss/train': 1.836792230606079} +03/04/2022 17:35:57 - INFO - codeparrot_training - Step 23955: {'lr': 0.00047333887290709623, 'samples': 12265472, 'steps': 23955, 'loss/train': 1.553954839706421} +03/04/2022 17:36:00 - INFO - codeparrot_training - Step 23956: {'lr': 0.00047333648826800056, 'samples': 12265984, 'steps': 23956, 'loss/train': 4.7044501304626465} +03/04/2022 17:36:01 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/04/2022 17:36:06 - INFO - codeparrot_training - Step 23957: {'lr': 0.000473334103528273, 'samples': 12266496, 'steps': 23957, 'loss/train': 1.8536087274551392} +03/04/2022 17:36:09 - INFO - codeparrot_training - Step 23958: {'lr': 0.00047333171868791453, 'samples': 12267008, 'steps': 23958, 'loss/train': 1.9488499164581299} +03/04/2022 17:36:10 - INFO - codeparrot_training - Skipping example with length 354 (seq_length=1024) +03/04/2022 17:36:14 - INFO - codeparrot_training - Step 23959: {'lr': 0.00047332933374692623, 'samples': 12267520, 'steps': 23959, 'loss/train': 2.128302812576294} +03/04/2022 17:36:17 - INFO - codeparrot_training - Step 23960: {'lr': 0.0004733269487053091, 'samples': 12268032, 'steps': 23960, 'loss/train': 2.0150668621063232} +03/04/2022 17:36:18 - INFO - codeparrot_training - Skipping example with length 647 (seq_length=1024) +03/04/2022 17:36:23 - INFO - codeparrot_training - Step 23961: {'lr': 0.0004733245635630644, 'samples': 12268544, 'steps': 23961, 'loss/train': 0.8445484042167664} +03/04/2022 17:36:26 - INFO - codeparrot_training - Step 23962: {'lr': 0.000473322178320193, 'samples': 12269056, 'steps': 23962, 'loss/train': 2.204127550125122} +03/04/2022 17:36:27 - INFO - codeparrot_training - Skipping example with length 814 (seq_length=1024) +03/04/2022 17:36:31 - INFO - codeparrot_training - Step 23963: {'lr': 0.0004733197929766961, 'samples': 12269568, 'steps': 23963, 'loss/train': 1.1881022453308105} +03/04/2022 17:36:35 - INFO - codeparrot_training - Step 23964: {'lr': 0.0004733174075325748, 'samples': 12270080, 'steps': 23964, 'loss/train': 1.2383887767791748} +03/04/2022 17:36:37 - INFO - codeparrot_training - Skipping example with length 340 (seq_length=1024) +03/04/2022 17:36:40 - INFO - codeparrot_training - Step 23965: {'lr': 0.0004733150219878301, 'samples': 12270592, 'steps': 23965, 'loss/train': 1.5078856945037842} +03/04/2022 17:36:43 - INFO - codeparrot_training - Step 23966: {'lr': 0.00047331263634246314, 'samples': 12271104, 'steps': 23966, 'loss/train': 1.5834245681762695} +03/04/2022 17:36:45 - INFO - codeparrot_training - Skipping example with length 600 (seq_length=1024) +03/04/2022 17:36:48 - INFO - codeparrot_training - Step 23967: {'lr': 0.0004733102505964749, 'samples': 12271616, 'steps': 23967, 'loss/train': 1.9126179218292236} +03/04/2022 17:36:51 - INFO - codeparrot_training - Step 23968: {'lr': 0.00047330786474986645, 'samples': 12272128, 'steps': 23968, 'loss/train': 1.7528331279754639} +03/04/2022 17:36:53 - INFO - codeparrot_training - Skipping example with length 254 (seq_length=1024) +03/04/2022 17:36:57 - INFO - codeparrot_training - Step 23969: {'lr': 0.00047330547880263896, 'samples': 12272640, 'steps': 23969, 'loss/train': 1.6887513399124146} +03/04/2022 17:37:00 - INFO - codeparrot_training - Step 23970: {'lr': 0.00047330309275479354, 'samples': 12273152, 'steps': 23970, 'loss/train': 2.1356794834136963} +03/04/2022 17:37:05 - INFO - codeparrot_training - Step 23971: {'lr': 0.00047330070660633113, 'samples': 12273664, 'steps': 23971, 'loss/train': 2.0459330081939697} +03/04/2022 17:37:08 - INFO - codeparrot_training - Step 23972: {'lr': 0.00047329832035725286, 'samples': 12274176, 'steps': 23972, 'loss/train': 1.8139877319335938} +03/04/2022 17:37:10 - INFO - codeparrot_training - Skipping example with length 34 (seq_length=1024) +03/04/2022 17:37:13 - INFO - codeparrot_training - Step 23973: {'lr': 0.0004732959340075598, 'samples': 12274688, 'steps': 23973, 'loss/train': 2.2142395973205566} +03/04/2022 17:37:17 - INFO - codeparrot_training - Step 23974: {'lr': 0.0004732935475572531, 'samples': 12275200, 'steps': 23974, 'loss/train': 1.9302705526351929} +03/04/2022 17:37:18 - INFO - codeparrot_training - Skipping example with length 468 (seq_length=1024) +03/04/2022 17:37:22 - INFO - codeparrot_training - Step 23975: {'lr': 0.00047329116100633373, 'samples': 12275712, 'steps': 23975, 'loss/train': 1.5582859516143799} +03/04/2022 17:37:25 - INFO - codeparrot_training - Step 23976: {'lr': 0.0004732887743548028, 'samples': 12276224, 'steps': 23976, 'loss/train': 1.7522284984588623} +03/04/2022 17:37:28 - INFO - codeparrot_training - Skipping example with length 878 (seq_length=1024) +03/04/2022 17:37:30 - INFO - codeparrot_training - Step 23977: {'lr': 0.0004732863876026614, 'samples': 12276736, 'steps': 23977, 'loss/train': 2.160885810852051} +03/04/2022 17:37:34 - INFO - codeparrot_training - Step 23978: {'lr': 0.00047328400074991064, 'samples': 12277248, 'steps': 23978, 'loss/train': 0.8982470631599426} +03/04/2022 17:37:36 - INFO - codeparrot_training - Skipping example with length 327 (seq_length=1024) +03/04/2022 17:37:39 - INFO - codeparrot_training - Step 23979: {'lr': 0.00047328161379655155, 'samples': 12277760, 'steps': 23979, 'loss/train': 2.4329946041107178} +03/04/2022 17:37:42 - INFO - codeparrot_training - Step 23980: {'lr': 0.00047327922674258516, 'samples': 12278272, 'steps': 23980, 'loss/train': 2.042363166809082} +03/04/2022 17:37:44 - INFO - codeparrot_training - Skipping example with length 825 (seq_length=1024) +03/04/2022 17:37:47 - INFO - codeparrot_training - Step 23981: {'lr': 0.00047327683958801257, 'samples': 12278784, 'steps': 23981, 'loss/train': 2.079195737838745} +03/04/2022 17:37:51 - INFO - codeparrot_training - Step 23982: {'lr': 0.00047327445233283496, 'samples': 12279296, 'steps': 23982, 'loss/train': 1.0602015256881714} +03/04/2022 17:37:53 - INFO - codeparrot_training - Skipping example with length 715 (seq_length=1024) +03/04/2022 17:37:56 - INFO - codeparrot_training - Step 23983: {'lr': 0.0004732720649770533, 'samples': 12279808, 'steps': 23983, 'loss/train': 1.8659113645553589} +03/04/2022 17:37:59 - INFO - codeparrot_training - Step 23984: {'lr': 0.00047326967752066876, 'samples': 12280320, 'steps': 23984, 'loss/train': 2.1499650478363037} +03/04/2022 17:38:02 - INFO - codeparrot_training - Skipping example with length 448 (seq_length=1024) +03/04/2022 17:38:05 - INFO - codeparrot_training - Step 23985: {'lr': 0.0004732672899636822, 'samples': 12280832, 'steps': 23985, 'loss/train': 1.7113415002822876} +03/04/2022 17:38:08 - INFO - codeparrot_training - Step 23986: {'lr': 0.00047326490230609495, 'samples': 12281344, 'steps': 23986, 'loss/train': 1.9768426418304443} +03/04/2022 17:38:10 - INFO - codeparrot_training - Skipping example with length 784 (seq_length=1024) +03/04/2022 17:38:13 - INFO - codeparrot_training - Step 23987: {'lr': 0.000473262514547908, 'samples': 12281856, 'steps': 23987, 'loss/train': 1.4057209491729736} +03/04/2022 17:38:16 - INFO - codeparrot_training - Step 23988: {'lr': 0.00047326012668912233, 'samples': 12282368, 'steps': 23988, 'loss/train': 1.2061398029327393} +03/04/2022 17:38:19 - INFO - codeparrot_training - Step 23989: {'lr': 0.0004732577387297391, 'samples': 12282880, 'steps': 23989, 'loss/train': 1.1489900350570679} +03/04/2022 17:38:19 - INFO - codeparrot_training - Skipping example with length 512 (seq_length=1024) +03/04/2022 17:38:25 - INFO - codeparrot_training - Step 23990: {'lr': 0.00047325535066975946, 'samples': 12283392, 'steps': 23990, 'loss/train': 1.8282450437545776} +03/04/2022 17:38:28 - INFO - codeparrot_training - Step 23991: {'lr': 0.0004732529625091843, 'samples': 12283904, 'steps': 23991, 'loss/train': 1.712887167930603} +03/04/2022 17:38:28 - INFO - codeparrot_training - Skipping example with length 251 (seq_length=1024) +03/04/2022 17:38:33 - INFO - codeparrot_training - Step 23992: {'lr': 0.0004732505742480149, 'samples': 12284416, 'steps': 23992, 'loss/train': 1.0021024942398071} +03/04/2022 17:38:36 - INFO - codeparrot_training - Step 23993: {'lr': 0.00047324818588625214, 'samples': 12284928, 'steps': 23993, 'loss/train': 1.4539430141448975} +03/04/2022 17:38:37 - INFO - codeparrot_training - Skipping example with length 532 (seq_length=1024) +03/04/2022 17:38:42 - INFO - codeparrot_training - Step 23994: {'lr': 0.0004732457974238972, 'samples': 12285440, 'steps': 23994, 'loss/train': 0.9547393918037415} +03/04/2022 17:38:45 - INFO - codeparrot_training - Step 23995: {'lr': 0.0004732434088609512, 'samples': 12285952, 'steps': 23995, 'loss/train': 1.734820008277893} +03/04/2022 17:38:45 - INFO - codeparrot_training - Skipping example with length 646 (seq_length=1024) +03/04/2022 17:38:50 - INFO - codeparrot_training - Step 23996: {'lr': 0.00047324102019741514, 'samples': 12286464, 'steps': 23996, 'loss/train': 1.3970927000045776} +03/04/2022 17:38:53 - INFO - codeparrot_training - Step 23997: {'lr': 0.00047323863143329016, 'samples': 12286976, 'steps': 23997, 'loss/train': 2.0924859046936035} +03/04/2022 17:38:54 - INFO - codeparrot_training - Skipping example with length 741 (seq_length=1024) +03/04/2022 17:38:59 - INFO - codeparrot_training - Step 23998: {'lr': 0.00047323624256857724, 'samples': 12287488, 'steps': 23998, 'loss/train': 1.594757080078125} +03/04/2022 17:39:02 - INFO - codeparrot_training - Step 23999: {'lr': 0.0004732338536032775, 'samples': 12288000, 'steps': 23999, 'loss/train': 1.2504795789718628} +03/04/2022 17:39:03 - INFO - codeparrot_training - Skipping example with length 370 (seq_length=1024) +03/04/2022 17:39:07 - INFO - codeparrot_training - Step 24000: {'lr': 0.0004732314645373921, 'samples': 12288512, 'steps': 24000, 'loss/train': 2.314279079437256} +03/04/2022 17:39:11 - INFO - codeparrot_training - Step 24001: {'lr': 0.0004732290753709221, 'samples': 12289024, 'steps': 24001, 'loss/train': 2.141962766647339} +03/04/2022 17:39:13 - INFO - codeparrot_training - Skipping example with length 894 (seq_length=1024) +03/04/2022 17:39:16 - INFO - codeparrot_training - Step 24002: {'lr': 0.0004732266861038684, 'samples': 12289536, 'steps': 24002, 'loss/train': 2.798734188079834} +03/04/2022 17:39:19 - INFO - codeparrot_training - Step 24003: {'lr': 0.0004732242967362322, 'samples': 12290048, 'steps': 24003, 'loss/train': 1.7036367654800415} +03/04/2022 17:39:22 - INFO - codeparrot_training - Skipping example with length 870 (seq_length=1024) +03/04/2022 17:39:24 - INFO - codeparrot_training - Step 24004: {'lr': 0.00047322190726801464, 'samples': 12290560, 'steps': 24004, 'loss/train': 1.954026460647583} +03/04/2022 17:39:27 - INFO - codeparrot_training - Step 24005: {'lr': 0.0004732195176992167, 'samples': 12291072, 'steps': 24005, 'loss/train': 1.6941807270050049} +03/04/2022 17:39:30 - INFO - codeparrot_training - Skipping example with length 933 (seq_length=1024) +03/04/2022 17:39:33 - INFO - codeparrot_training - Step 24006: {'lr': 0.0004732171280298395, 'samples': 12291584, 'steps': 24006, 'loss/train': 1.7170467376708984} +03/04/2022 17:39:36 - INFO - codeparrot_training - Step 24007: {'lr': 0.0004732147382598842, 'samples': 12292096, 'steps': 24007, 'loss/train': 2.024646520614624} +03/04/2022 17:39:38 - INFO - codeparrot_training - Skipping example with length 49 (seq_length=1024) +03/04/2022 17:39:41 - INFO - codeparrot_training - Step 24008: {'lr': 0.00047321234838935164, 'samples': 12292608, 'steps': 24008, 'loss/train': 2.142948627471924} +03/04/2022 17:39:44 - INFO - codeparrot_training - Step 24009: {'lr': 0.0004732099584182431, 'samples': 12293120, 'steps': 24009, 'loss/train': 2.047590970993042} +03/04/2022 17:39:47 - INFO - codeparrot_training - Skipping example with length 91 (seq_length=1024) +03/04/2022 17:39:49 - INFO - codeparrot_training - Step 24010: {'lr': 0.00047320756834655955, 'samples': 12293632, 'steps': 24010, 'loss/train': 1.8889671564102173} +03/04/2022 17:39:53 - INFO - codeparrot_training - Step 24011: {'lr': 0.0004732051781743022, 'samples': 12294144, 'steps': 24011, 'loss/train': 2.0373222827911377} +03/04/2022 17:39:55 - INFO - codeparrot_training - Skipping example with length 268 (seq_length=1024) +03/04/2022 17:39:58 - INFO - codeparrot_training - Step 24012: {'lr': 0.00047320278790147197, 'samples': 12294656, 'steps': 24012, 'loss/train': 1.8597666025161743} +03/04/2022 17:40:01 - INFO - codeparrot_training - Step 24013: {'lr': 0.00047320039752807, 'samples': 12295168, 'steps': 24013, 'loss/train': 1.8789314031600952} +03/04/2022 17:40:04 - INFO - codeparrot_training - Skipping example with length 178 (seq_length=1024) +03/04/2022 17:40:07 - INFO - codeparrot_training - Step 24014: {'lr': 0.0004731980070540974, 'samples': 12295680, 'steps': 24014, 'loss/train': 1.380476713180542} +03/04/2022 17:40:10 - INFO - codeparrot_training - Step 24015: {'lr': 0.0004731956164795552, 'samples': 12296192, 'steps': 24015, 'loss/train': 2.4772913455963135} +03/04/2022 17:40:13 - INFO - codeparrot_training - Step 24016: {'lr': 0.0004731932258044446, 'samples': 12296704, 'steps': 24016, 'loss/train': 2.029371976852417} +03/04/2022 17:40:15 - INFO - codeparrot_training - Skipping example with length 41 (seq_length=1024) +03/04/2022 17:40:18 - INFO - codeparrot_training - Step 24017: {'lr': 0.00047319083502876647, 'samples': 12297216, 'steps': 24017, 'loss/train': 2.1769299507141113} +03/04/2022 17:40:22 - INFO - codeparrot_training - Step 24018: {'lr': 0.00047318844415252204, 'samples': 12297728, 'steps': 24018, 'loss/train': 2.0114574432373047} +03/04/2022 17:40:23 - INFO - codeparrot_training - Skipping example with length 369 (seq_length=1024) +03/04/2022 17:40:27 - INFO - codeparrot_training - Step 24019: {'lr': 0.00047318605317571227, 'samples': 12298240, 'steps': 24019, 'loss/train': 1.3914356231689453} +03/04/2022 17:40:30 - INFO - codeparrot_training - Step 24020: {'lr': 0.0004731836620983384, 'samples': 12298752, 'steps': 24020, 'loss/train': 2.696396589279175} +03/04/2022 17:40:32 - INFO - codeparrot_training - Skipping example with length 16 (seq_length=1024) +03/04/2022 17:40:36 - INFO - codeparrot_training - Step 24021: {'lr': 0.00047318127092040144, 'samples': 12299264, 'steps': 24021, 'loss/train': 0.6576046943664551} +03/04/2022 17:40:39 - INFO - codeparrot_training - Step 24022: {'lr': 0.00047317887964190233, 'samples': 12299776, 'steps': 24022, 'loss/train': 2.228574514389038} +03/04/2022 17:40:42 - INFO - codeparrot_training - Step 24023: {'lr': 0.00047317648826284233, 'samples': 12300288, 'steps': 24023, 'loss/train': 2.692107677459717} +03/04/2022 17:40:43 - INFO - codeparrot_training - Skipping example with length 709 (seq_length=1024) +03/04/2022 17:40:47 - INFO - codeparrot_training - Step 24024: {'lr': 0.0004731740967832224, 'samples': 12300800, 'steps': 24024, 'loss/train': 1.8153022527694702} +03/04/2022 17:40:51 - INFO - codeparrot_training - Step 24025: {'lr': 0.00047317170520304373, 'samples': 12301312, 'steps': 24025, 'loss/train': 2.3017916679382324} +03/04/2022 17:40:52 - INFO - codeparrot_training - Skipping example with length 825 (seq_length=1024) +03/04/2022 17:40:56 - INFO - codeparrot_training - Step 24026: {'lr': 0.0004731693135223073, 'samples': 12301824, 'steps': 24026, 'loss/train': 1.8597946166992188} +03/04/2022 17:40:59 - INFO - codeparrot_training - Step 24027: {'lr': 0.0004731669217410142, 'samples': 12302336, 'steps': 24027, 'loss/train': 2.1309926509857178} +03/04/2022 17:41:00 - INFO - codeparrot_training - Skipping example with length 379 (seq_length=1024) +03/04/2022 17:41:04 - INFO - codeparrot_training - Step 24028: {'lr': 0.0004731645298591656, 'samples': 12302848, 'steps': 24028, 'loss/train': 1.2750903367996216} +03/04/2022 17:41:07 - INFO - codeparrot_training - Step 24029: {'lr': 0.0004731621378767624, 'samples': 12303360, 'steps': 24029, 'loss/train': 2.113081932067871} +03/04/2022 17:41:09 - INFO - codeparrot_training - Skipping example with length 677 (seq_length=1024) +03/04/2022 17:41:13 - INFO - codeparrot_training - Step 24030: {'lr': 0.0004731597457938059, 'samples': 12303872, 'steps': 24030, 'loss/train': 2.7036962509155273} +03/04/2022 17:41:16 - INFO - codeparrot_training - Step 24031: {'lr': 0.000473157353610297, 'samples': 12304384, 'steps': 24031, 'loss/train': 1.3331118822097778} +03/04/2022 17:41:18 - INFO - codeparrot_training - Skipping example with length 381 (seq_length=1024) +03/04/2022 17:41:21 - INFO - codeparrot_training - Step 24032: {'lr': 0.0004731549613262368, 'samples': 12304896, 'steps': 24032, 'loss/train': 1.1536608934402466} +03/04/2022 17:41:25 - INFO - codeparrot_training - Step 24033: {'lr': 0.0004731525689416265, 'samples': 12305408, 'steps': 24033, 'loss/train': 1.8285322189331055} +03/04/2022 17:41:26 - INFO - codeparrot_training - Skipping example with length 812 (seq_length=1024) +03/04/2022 17:41:30 - INFO - codeparrot_training - Step 24034: {'lr': 0.0004731501764564671, 'samples': 12305920, 'steps': 24034, 'loss/train': 2.0663998126983643} +03/04/2022 17:41:33 - INFO - codeparrot_training - Step 24035: {'lr': 0.00047314778387075963, 'samples': 12306432, 'steps': 24035, 'loss/train': 2.880819320678711} +03/04/2022 17:41:35 - INFO - codeparrot_training - Skipping example with length 583 (seq_length=1024) +03/04/2022 17:41:38 - INFO - codeparrot_training - Step 24036: {'lr': 0.00047314539118450516, 'samples': 12306944, 'steps': 24036, 'loss/train': 1.9792269468307495} +03/04/2022 17:41:41 - INFO - codeparrot_training - Step 24037: {'lr': 0.0004731429983977049, 'samples': 12307456, 'steps': 24037, 'loss/train': 1.946357250213623} +03/04/2022 17:41:43 - INFO - codeparrot_training - Skipping example with length 701 (seq_length=1024) +03/04/2022 17:41:47 - INFO - codeparrot_training - Step 24038: {'lr': 0.00047314060551035983, 'samples': 12307968, 'steps': 24038, 'loss/train': 0.681414783000946} +03/04/2022 17:41:50 - INFO - codeparrot_training - Step 24039: {'lr': 0.00047313821252247104, 'samples': 12308480, 'steps': 24039, 'loss/train': 1.6910324096679688} +03/04/2022 17:41:51 - INFO - codeparrot_training - Skipping example with length 287 (seq_length=1024) +03/04/2022 17:41:55 - INFO - codeparrot_training - Step 24040: {'lr': 0.00047313581943403963, 'samples': 12308992, 'steps': 24040, 'loss/train': 1.865984320640564} +03/04/2022 17:41:58 - INFO - codeparrot_training - Step 24041: {'lr': 0.0004731334262450666, 'samples': 12309504, 'steps': 24041, 'loss/train': 1.7499123811721802} +03/04/2022 17:41:59 - INFO - codeparrot_training - Skipping example with length 693 (seq_length=1024) +03/04/2022 17:42:04 - INFO - codeparrot_training - Step 24042: {'lr': 0.00047313103295555317, 'samples': 12310016, 'steps': 24042, 'loss/train': 1.863382339477539} +03/04/2022 17:42:07 - INFO - codeparrot_training - Step 24043: {'lr': 0.0004731286395655003, 'samples': 12310528, 'steps': 24043, 'loss/train': 2.0376853942871094} +03/04/2022 17:42:08 - INFO - codeparrot_training - Skipping example with length 34 (seq_length=1024) +03/04/2022 17:42:12 - INFO - codeparrot_training - Step 24044: {'lr': 0.00047312624607490913, 'samples': 12311040, 'steps': 24044, 'loss/train': 2.291694164276123} +03/04/2022 17:42:16 - INFO - codeparrot_training - Step 24045: {'lr': 0.0004731238524837807, 'samples': 12311552, 'steps': 24045, 'loss/train': 1.5295854806900024} +03/04/2022 17:42:19 - INFO - codeparrot_training - Step 24046: {'lr': 0.00047312145879211607, 'samples': 12312064, 'steps': 24046, 'loss/train': 1.867437720298767} +03/04/2022 17:42:19 - INFO - codeparrot_training - Skipping example with length 630 (seq_length=1024) +03/04/2022 17:42:24 - INFO - codeparrot_training - Step 24047: {'lr': 0.0004731190649999164, 'samples': 12312576, 'steps': 24047, 'loss/train': 1.4578790664672852} +03/04/2022 17:42:27 - INFO - codeparrot_training - Step 24048: {'lr': 0.0004731166711071827, 'samples': 12313088, 'steps': 24048, 'loss/train': 2.1888656616210938} +03/04/2022 17:42:27 - INFO - codeparrot_training - Skipping example with length 372 (seq_length=1024) +03/04/2022 17:42:33 - INFO - codeparrot_training - Step 24049: {'lr': 0.0004731142771139161, 'samples': 12313600, 'steps': 24049, 'loss/train': 1.564119577407837} +03/04/2022 17:42:36 - INFO - codeparrot_training - Step 24050: {'lr': 0.00047311188302011766, 'samples': 12314112, 'steps': 24050, 'loss/train': 1.344059944152832} +03/04/2022 17:42:36 - INFO - codeparrot_training - Skipping example with length 954 (seq_length=1024) +03/04/2022 17:42:41 - INFO - codeparrot_training - Step 24051: {'lr': 0.00047310948882578843, 'samples': 12314624, 'steps': 24051, 'loss/train': 2.639352560043335} +03/04/2022 17:42:44 - INFO - codeparrot_training - Step 24052: {'lr': 0.0004731070945309295, 'samples': 12315136, 'steps': 24052, 'loss/train': 0.25408416986465454} +03/04/2022 17:42:44 - INFO - codeparrot_training - Skipping example with length 549 (seq_length=1024) +03/04/2022 17:42:50 - INFO - codeparrot_training - Step 24053: {'lr': 0.00047310470013554195, 'samples': 12315648, 'steps': 24053, 'loss/train': 1.477995753288269} +03/04/2022 17:42:53 - INFO - codeparrot_training - Step 24054: {'lr': 0.0004731023056396269, 'samples': 12316160, 'steps': 24054, 'loss/train': 2.3762307167053223} +03/04/2022 17:42:53 - INFO - codeparrot_training - Skipping example with length 493 (seq_length=1024) +03/04/2022 17:42:58 - INFO - codeparrot_training - Step 24055: {'lr': 0.00047309991104318533, 'samples': 12316672, 'steps': 24055, 'loss/train': 1.9683674573898315} +03/04/2022 17:43:01 - INFO - codeparrot_training - Step 24056: {'lr': 0.00047309751634621845, 'samples': 12317184, 'steps': 24056, 'loss/train': 0.9495123028755188} +03/04/2022 17:43:01 - INFO - codeparrot_training - Skipping example with length 253 (seq_length=1024) +03/04/2022 17:43:06 - INFO - codeparrot_training - Step 24057: {'lr': 0.0004730951215487272, 'samples': 12317696, 'steps': 24057, 'loss/train': 0.9281378984451294} +03/04/2022 17:43:10 - INFO - codeparrot_training - Step 24058: {'lr': 0.0004730927266507128, 'samples': 12318208, 'steps': 24058, 'loss/train': 1.6143735647201538} +03/04/2022 17:43:10 - INFO - codeparrot_training - Skipping example with length 913 (seq_length=1024) +03/04/2022 17:43:15 - INFO - codeparrot_training - Step 24059: {'lr': 0.00047309033165217617, 'samples': 12318720, 'steps': 24059, 'loss/train': 2.534825086593628} +03/04/2022 17:43:18 - INFO - codeparrot_training - Step 24060: {'lr': 0.00047308793655311855, 'samples': 12319232, 'steps': 24060, 'loss/train': 1.866559624671936} +03/04/2022 17:43:18 - INFO - codeparrot_training - Skipping example with length 876 (seq_length=1024) +03/04/2022 17:43:23 - INFO - codeparrot_training - Step 24061: {'lr': 0.000473085541353541, 'samples': 12319744, 'steps': 24061, 'loss/train': 2.032223701477051} +03/04/2022 17:43:26 - INFO - codeparrot_training - Step 24062: {'lr': 0.00047308314605344447, 'samples': 12320256, 'steps': 24062, 'loss/train': 2.103241205215454} +03/04/2022 17:43:27 - INFO - codeparrot_training - Skipping example with length 142 (seq_length=1024) +03/04/2022 17:43:32 - INFO - codeparrot_training - Step 24063: {'lr': 0.00047308075065283006, 'samples': 12320768, 'steps': 24063, 'loss/train': 2.1359989643096924} +03/04/2022 17:43:35 - INFO - codeparrot_training - Step 24064: {'lr': 0.00047307835515169905, 'samples': 12321280, 'steps': 24064, 'loss/train': 0.9582512378692627} +03/04/2022 17:43:35 - INFO - codeparrot_training - Skipping example with length 768 (seq_length=1024) +03/04/2022 17:43:40 - INFO - codeparrot_training - Step 24065: {'lr': 0.00047307595955005226, 'samples': 12321792, 'steps': 24065, 'loss/train': 1.7214199304580688} +03/04/2022 17:43:43 - INFO - codeparrot_training - Step 24066: {'lr': 0.000473073563847891, 'samples': 12322304, 'steps': 24066, 'loss/train': 2.2545158863067627} +03/04/2022 17:43:44 - INFO - codeparrot_training - Skipping example with length 993 (seq_length=1024) +03/04/2022 17:43:49 - INFO - codeparrot_training - Step 24067: {'lr': 0.0004730711680452161, 'samples': 12322816, 'steps': 24067, 'loss/train': 1.2077125310897827} +03/04/2022 17:43:52 - INFO - codeparrot_training - Step 24068: {'lr': 0.00047306877214202885, 'samples': 12323328, 'steps': 24068, 'loss/train': 1.7463852167129517} +03/04/2022 17:43:52 - INFO - codeparrot_training - Skipping example with length 377 (seq_length=1024) +03/04/2022 17:43:57 - INFO - codeparrot_training - Step 24069: {'lr': 0.00047306637613833024, 'samples': 12323840, 'steps': 24069, 'loss/train': 1.29376220703125} +03/04/2022 17:44:00 - INFO - codeparrot_training - Step 24070: {'lr': 0.00047306398003412137, 'samples': 12324352, 'steps': 24070, 'loss/train': 1.8340561389923096} +03/04/2022 17:44:00 - INFO - codeparrot_training - Skipping example with length 71 (seq_length=1024) +03/04/2022 17:44:06 - INFO - codeparrot_training - Step 24071: {'lr': 0.00047306158382940327, 'samples': 12324864, 'steps': 24071, 'loss/train': 1.7877377271652222} +03/04/2022 17:44:09 - INFO - codeparrot_training - Step 24072: {'lr': 0.0004730591875241771, 'samples': 12325376, 'steps': 24072, 'loss/train': 2.0209290981292725} +03/04/2022 17:44:09 - INFO - codeparrot_training - Skipping example with length 637 (seq_length=1024) +03/04/2022 17:44:14 - INFO - codeparrot_training - Step 24073: {'lr': 0.0004730567911184439, 'samples': 12325888, 'steps': 24073, 'loss/train': 1.5094003677368164} +03/04/2022 17:44:17 - INFO - codeparrot_training - Step 24074: {'lr': 0.00047305439461220477, 'samples': 12326400, 'steps': 24074, 'loss/train': 2.001117706298828} +03/04/2022 17:44:18 - INFO - codeparrot_training - Skipping example with length 642 (seq_length=1024) +03/04/2022 17:44:22 - INFO - codeparrot_training - Step 24075: {'lr': 0.00047305199800546077, 'samples': 12326912, 'steps': 24075, 'loss/train': 2.267198085784912} +03/04/2022 17:44:26 - INFO - codeparrot_training - Step 24076: {'lr': 0.00047304960129821295, 'samples': 12327424, 'steps': 24076, 'loss/train': 1.993097186088562} +03/04/2022 17:44:26 - INFO - codeparrot_training - Skipping example with length 422 (seq_length=1024) +03/04/2022 17:44:31 - INFO - codeparrot_training - Step 24077: {'lr': 0.00047304720449046247, 'samples': 12327936, 'steps': 24077, 'loss/train': 1.6710890531539917} +03/04/2022 17:44:34 - INFO - codeparrot_training - Step 24078: {'lr': 0.0004730448075822103, 'samples': 12328448, 'steps': 24078, 'loss/train': 2.0695934295654297} +03/04/2022 17:44:35 - INFO - codeparrot_training - Skipping example with length 741 (seq_length=1024) +03/04/2022 17:44:39 - INFO - codeparrot_training - Step 24079: {'lr': 0.0004730424105734576, 'samples': 12328960, 'steps': 24079, 'loss/train': 1.1796817779541016} +03/04/2022 17:44:43 - INFO - codeparrot_training - Step 24080: {'lr': 0.00047304001346420543, 'samples': 12329472, 'steps': 24080, 'loss/train': 1.0936622619628906} +03/04/2022 17:44:44 - INFO - codeparrot_training - Skipping example with length 446 (seq_length=1024) +03/04/2022 17:44:48 - INFO - codeparrot_training - Step 24081: {'lr': 0.0004730376162544549, 'samples': 12329984, 'steps': 24081, 'loss/train': 1.9510287046432495} +03/04/2022 17:44:51 - INFO - codeparrot_training - Step 24082: {'lr': 0.00047303521894420707, 'samples': 12330496, 'steps': 24082, 'loss/train': 1.5031731128692627} +03/04/2022 17:44:53 - INFO - codeparrot_training - Skipping example with length 595 (seq_length=1024) +03/04/2022 17:44:56 - INFO - codeparrot_training - Step 24083: {'lr': 0.00047303282153346297, 'samples': 12331008, 'steps': 24083, 'loss/train': 2.164016008377075} +03/04/2022 17:45:00 - INFO - codeparrot_training - Step 24084: {'lr': 0.00047303042402222373, 'samples': 12331520, 'steps': 24084, 'loss/train': 1.9378501176834106} +03/04/2022 17:45:01 - INFO - codeparrot_training - Skipping example with length 665 (seq_length=1024) +03/04/2022 17:45:05 - INFO - codeparrot_training - Step 24085: {'lr': 0.00047302802641049045, 'samples': 12332032, 'steps': 24085, 'loss/train': 1.3063926696777344} +03/04/2022 17:45:08 - INFO - codeparrot_training - Step 24086: {'lr': 0.00047302562869826415, 'samples': 12332544, 'steps': 24086, 'loss/train': 2.0311295986175537} +03/04/2022 17:45:10 - INFO - codeparrot_training - Skipping example with length 67 (seq_length=1024) +03/04/2022 17:45:13 - INFO - codeparrot_training - Step 24087: {'lr': 0.000473023230885546, 'samples': 12333056, 'steps': 24087, 'loss/train': 1.6284961700439453} +03/04/2022 17:45:16 - INFO - codeparrot_training - Step 24088: {'lr': 0.00047302083297233693, 'samples': 12333568, 'steps': 24088, 'loss/train': 2.2828192710876465} +03/04/2022 17:45:18 - INFO - codeparrot_training - Skipping example with length 668 (seq_length=1024) +03/04/2022 17:45:22 - INFO - codeparrot_training - Step 24089: {'lr': 0.0004730184349586382, 'samples': 12334080, 'steps': 24089, 'loss/train': 1.3438918590545654} +03/04/2022 17:45:25 - INFO - codeparrot_training - Step 24090: {'lr': 0.0004730160368444507, 'samples': 12334592, 'steps': 24090, 'loss/train': 1.990256428718567} +03/04/2022 17:45:26 - INFO - codeparrot_training - Skipping example with length 989 (seq_length=1024) +03/04/2022 17:45:30 - INFO - codeparrot_training - Step 24091: {'lr': 0.00047301363862977574, 'samples': 12335104, 'steps': 24091, 'loss/train': 1.713231086730957} +03/04/2022 17:45:33 - INFO - codeparrot_training - Step 24092: {'lr': 0.00047301124031461425, 'samples': 12335616, 'steps': 24092, 'loss/train': 2.353463888168335} +03/04/2022 17:45:35 - INFO - codeparrot_training - Skipping example with length 305 (seq_length=1024) +03/04/2022 17:45:39 - INFO - codeparrot_training - Step 24093: {'lr': 0.00047300884189896734, 'samples': 12336128, 'steps': 24093, 'loss/train': 1.4828822612762451} +03/04/2022 17:45:42 - INFO - codeparrot_training - Step 24094: {'lr': 0.00047300644338283597, 'samples': 12336640, 'steps': 24094, 'loss/train': 1.365534782409668} +03/04/2022 17:45:43 - INFO - codeparrot_training - Skipping example with length 298 (seq_length=1024) +03/04/2022 17:45:47 - INFO - codeparrot_training - Step 24095: {'lr': 0.00047300404476622145, 'samples': 12337152, 'steps': 24095, 'loss/train': 1.539475917816162} +03/04/2022 17:45:50 - INFO - codeparrot_training - Step 24096: {'lr': 0.0004730016460491247, 'samples': 12337664, 'steps': 24096, 'loss/train': 1.6724570989608765} +03/04/2022 17:45:52 - INFO - codeparrot_training - Skipping example with length 315 (seq_length=1024) +03/04/2022 17:45:55 - INFO - codeparrot_training - Step 24097: {'lr': 0.00047299924723154686, 'samples': 12338176, 'steps': 24097, 'loss/train': 1.4161325693130493} +03/04/2022 17:45:59 - INFO - codeparrot_training - Step 24098: {'lr': 0.000472996848313489, 'samples': 12338688, 'steps': 24098, 'loss/train': 1.1850368976593018} +03/04/2022 17:46:00 - INFO - codeparrot_training - Skipping example with length 633 (seq_length=1024) +03/04/2022 17:46:04 - INFO - codeparrot_training - Step 24099: {'lr': 0.0004729944492949523, 'samples': 12339200, 'steps': 24099, 'loss/train': 2.1958770751953125} +03/04/2022 17:46:07 - INFO - codeparrot_training - Step 24100: {'lr': 0.0004729920501759376, 'samples': 12339712, 'steps': 24100, 'loss/train': 2.2872612476348877} +03/04/2022 17:46:09 - INFO - codeparrot_training - Skipping example with length 151 (seq_length=1024) +03/04/2022 17:46:12 - INFO - codeparrot_training - Step 24101: {'lr': 0.0004729896509564462, 'samples': 12340224, 'steps': 24101, 'loss/train': 1.0160070657730103} +03/04/2022 17:46:16 - INFO - codeparrot_training - Step 24102: {'lr': 0.00047298725163647903, 'samples': 12340736, 'steps': 24102, 'loss/train': 1.6186915636062622} +03/04/2022 17:46:17 - INFO - codeparrot_training - Skipping example with length 208 (seq_length=1024) +03/04/2022 17:46:21 - INFO - codeparrot_training - Step 24103: {'lr': 0.00047298485221603735, 'samples': 12341248, 'steps': 24103, 'loss/train': 2.277391195297241} +03/04/2022 17:46:24 - INFO - codeparrot_training - Step 24104: {'lr': 0.0004729824526951221, 'samples': 12341760, 'steps': 24104, 'loss/train': 1.1548101902008057} +03/04/2022 17:46:25 - INFO - codeparrot_training - Skipping example with length 832 (seq_length=1024) +03/04/2022 17:46:29 - INFO - codeparrot_training - Step 24105: {'lr': 0.0004729800530737344, 'samples': 12342272, 'steps': 24105, 'loss/train': 1.977251410484314} +03/04/2022 17:46:32 - INFO - codeparrot_training - Step 24106: {'lr': 0.0004729776533518753, 'samples': 12342784, 'steps': 24106, 'loss/train': 1.1591229438781738} +03/04/2022 17:46:34 - INFO - codeparrot_training - Skipping example with length 997 (seq_length=1024) +03/04/2022 17:46:38 - INFO - codeparrot_training - Step 24107: {'lr': 0.00047297525352954587, 'samples': 12343296, 'steps': 24107, 'loss/train': 1.3035218715667725} +03/04/2022 17:46:41 - INFO - codeparrot_training - Step 24108: {'lr': 0.00047297285360674724, 'samples': 12343808, 'steps': 24108, 'loss/train': 1.6273456811904907} +03/04/2022 17:46:42 - INFO - codeparrot_training - Skipping example with length 838 (seq_length=1024) +03/04/2022 17:46:46 - INFO - codeparrot_training - Step 24109: {'lr': 0.0004729704535834806, 'samples': 12344320, 'steps': 24109, 'loss/train': 2.817026138305664} +03/04/2022 17:46:49 - INFO - codeparrot_training - Step 24110: {'lr': 0.0004729680534597468, 'samples': 12344832, 'steps': 24110, 'loss/train': 1.8457839488983154} +03/04/2022 17:46:51 - INFO - codeparrot_training - Skipping example with length 327 (seq_length=1024) +03/04/2022 17:46:55 - INFO - codeparrot_training - Step 24111: {'lr': 0.0004729656532355471, 'samples': 12345344, 'steps': 24111, 'loss/train': 2.2977142333984375} +03/04/2022 17:46:58 - INFO - codeparrot_training - Step 24112: {'lr': 0.00047296325291088247, 'samples': 12345856, 'steps': 24112, 'loss/train': 0.9623086452484131} +03/04/2022 17:46:59 - INFO - codeparrot_training - Skipping example with length 198 (seq_length=1024) +03/04/2022 17:47:03 - INFO - codeparrot_training - Step 24113: {'lr': 0.00047296085248575405, 'samples': 12346368, 'steps': 24113, 'loss/train': 1.8958494663238525} +03/04/2022 17:47:06 - INFO - codeparrot_training - Step 24114: {'lr': 0.000472958451960163, 'samples': 12346880, 'steps': 24114, 'loss/train': 1.454222559928894} +03/04/2022 17:47:08 - INFO - codeparrot_training - Skipping example with length 421 (seq_length=1024) +03/04/2022 17:47:11 - INFO - codeparrot_training - Step 24115: {'lr': 0.0004729560513341101, 'samples': 12347392, 'steps': 24115, 'loss/train': 1.0426387786865234} +03/04/2022 17:47:15 - INFO - codeparrot_training - Step 24116: {'lr': 0.0004729536506075969, 'samples': 12347904, 'steps': 24116, 'loss/train': 2.1908011436462402} +03/04/2022 17:47:16 - INFO - codeparrot_training - Skipping example with length 451 (seq_length=1024) +03/04/2022 17:47:20 - INFO - codeparrot_training - Step 24117: {'lr': 0.000472951249780624, 'samples': 12348416, 'steps': 24117, 'loss/train': 1.8156861066818237} +03/04/2022 17:47:23 - INFO - codeparrot_training - Step 24118: {'lr': 0.0004729488488531928, 'samples': 12348928, 'steps': 24118, 'loss/train': 1.1649701595306396} +03/04/2022 17:47:25 - INFO - codeparrot_training - Skipping example with length 583 (seq_length=1024) +03/04/2022 17:47:28 - INFO - codeparrot_training - Step 24119: {'lr': 0.00047294644782530437, 'samples': 12349440, 'steps': 24119, 'loss/train': 1.635706901550293} +03/04/2022 17:47:31 - INFO - codeparrot_training - Step 24120: {'lr': 0.0004729440466969596, 'samples': 12349952, 'steps': 24120, 'loss/train': 2.102741003036499} +03/04/2022 17:47:33 - INFO - codeparrot_training - Skipping example with length 662 (seq_length=1024) +03/04/2022 17:47:37 - INFO - codeparrot_training - Step 24121: {'lr': 0.00047294164546815977, 'samples': 12350464, 'steps': 24121, 'loss/train': 1.5973258018493652} +03/04/2022 17:47:40 - INFO - codeparrot_training - Step 24122: {'lr': 0.0004729392441389058, 'samples': 12350976, 'steps': 24122, 'loss/train': 1.2368218898773193} +03/04/2022 17:47:41 - INFO - codeparrot_training - Skipping example with length 300 (seq_length=1024) +03/04/2022 17:47:45 - INFO - codeparrot_training - Step 24123: {'lr': 0.0004729368427091989, 'samples': 12351488, 'steps': 24123, 'loss/train': 1.6933103799819946} +03/04/2022 17:47:48 - INFO - codeparrot_training - Step 24124: {'lr': 0.0004729344411790401, 'samples': 12352000, 'steps': 24124, 'loss/train': 1.779477596282959} +03/04/2022 17:47:49 - INFO - codeparrot_training - Skipping example with length 988 (seq_length=1024) +03/04/2022 17:47:53 - INFO - codeparrot_training - Step 24125: {'lr': 0.00047293203954843036, 'samples': 12352512, 'steps': 24125, 'loss/train': 2.0349197387695312} +03/04/2022 17:47:57 - INFO - codeparrot_training - Step 24126: {'lr': 0.000472929637817371, 'samples': 12353024, 'steps': 24126, 'loss/train': 1.9012538194656372} +03/04/2022 17:47:58 - INFO - codeparrot_training - Skipping example with length 994 (seq_length=1024) +03/04/2022 17:48:02 - INFO - codeparrot_training - Step 24127: {'lr': 0.00047292723598586295, 'samples': 12353536, 'steps': 24127, 'loss/train': 1.712430477142334} +03/04/2022 17:48:05 - INFO - codeparrot_training - Step 24128: {'lr': 0.0004729248340539074, 'samples': 12354048, 'steps': 24128, 'loss/train': 1.263621211051941} +03/04/2022 17:48:07 - INFO - codeparrot_training - Skipping example with length 738 (seq_length=1024) +03/04/2022 17:48:11 - INFO - codeparrot_training - Step 24129: {'lr': 0.00047292243202150524, 'samples': 12354560, 'steps': 24129, 'loss/train': 2.3800225257873535} +03/04/2022 17:48:14 - INFO - codeparrot_training - Step 24130: {'lr': 0.00047292002988865773, 'samples': 12355072, 'steps': 24130, 'loss/train': 1.3819352388381958} +03/04/2022 17:48:17 - INFO - codeparrot_training - Skipping example with length 113 (seq_length=1024) +03/04/2022 17:48:19 - INFO - codeparrot_training - Step 24131: {'lr': 0.0004729176276553659, 'samples': 12355584, 'steps': 24131, 'loss/train': 1.6979447603225708} +03/04/2022 17:48:23 - INFO - codeparrot_training - Step 24132: {'lr': 0.00047291522532163084, 'samples': 12356096, 'steps': 24132, 'loss/train': 2.4203293323516846} +03/04/2022 17:48:26 - INFO - codeparrot_training - Step 24133: {'lr': 0.0004729128228874536, 'samples': 12356608, 'steps': 24133, 'loss/train': 1.396500587463379} +03/04/2022 17:48:26 - INFO - codeparrot_training - Skipping example with length 766 (seq_length=1024) +03/04/2022 17:48:31 - INFO - codeparrot_training - Step 24134: {'lr': 0.0004729104203528353, 'samples': 12357120, 'steps': 24134, 'loss/train': 1.724676489830017} +03/04/2022 17:48:34 - INFO - codeparrot_training - Step 24135: {'lr': 0.0004729080177177769, 'samples': 12357632, 'steps': 24135, 'loss/train': 1.9641534090042114} +03/04/2022 17:48:35 - INFO - codeparrot_training - Skipping example with length 543 (seq_length=1024) +03/04/2022 17:48:40 - INFO - codeparrot_training - Step 24136: {'lr': 0.0004729056149822797, 'samples': 12358144, 'steps': 24136, 'loss/train': 2.251063585281372} +03/04/2022 17:48:43 - INFO - codeparrot_training - Step 24137: {'lr': 0.0004729032121463447, 'samples': 12358656, 'steps': 24137, 'loss/train': 1.8379000425338745} +03/04/2022 17:48:43 - INFO - codeparrot_training - Skipping example with length 36 (seq_length=1024) +03/04/2022 17:48:48 - INFO - codeparrot_training - Step 24138: {'lr': 0.00047290080920997285, 'samples': 12359168, 'steps': 24138, 'loss/train': 1.8562967777252197} +03/04/2022 17:48:51 - INFO - codeparrot_training - Step 24139: {'lr': 0.0004728984061731654, 'samples': 12359680, 'steps': 24139, 'loss/train': 1.772022008895874} +03/04/2022 17:48:52 - INFO - codeparrot_training - Skipping example with length 395 (seq_length=1024) +03/04/2022 17:48:57 - INFO - codeparrot_training - Step 24140: {'lr': 0.00047289600303592334, 'samples': 12360192, 'steps': 24140, 'loss/train': 2.4628498554229736} +03/04/2022 17:49:00 - INFO - codeparrot_training - Step 24141: {'lr': 0.00047289359979824774, 'samples': 12360704, 'steps': 24141, 'loss/train': 1.763590693473816} +03/04/2022 17:49:01 - INFO - codeparrot_training - Skipping example with length 488 (seq_length=1024) +03/04/2022 17:49:05 - INFO - codeparrot_training - Step 24142: {'lr': 0.0004728911964601398, 'samples': 12361216, 'steps': 24142, 'loss/train': 0.7303640842437744} +03/04/2022 17:49:08 - INFO - codeparrot_training - Step 24143: {'lr': 0.00047288879302160046, 'samples': 12361728, 'steps': 24143, 'loss/train': 2.0565216541290283} +03/04/2022 17:49:09 - INFO - codeparrot_training - Skipping example with length 647 (seq_length=1024) +03/04/2022 17:49:14 - INFO - codeparrot_training - Step 24144: {'lr': 0.000472886389482631, 'samples': 12362240, 'steps': 24144, 'loss/train': 2.1034488677978516} +03/04/2022 17:49:17 - INFO - codeparrot_training - Step 24145: {'lr': 0.00047288398584323225, 'samples': 12362752, 'steps': 24145, 'loss/train': 1.5689754486083984} +03/04/2022 17:49:18 - INFO - codeparrot_training - Skipping example with length 264 (seq_length=1024) +03/04/2022 17:49:22 - INFO - codeparrot_training - Step 24146: {'lr': 0.0004728815821034055, 'samples': 12363264, 'steps': 24146, 'loss/train': 1.4876484870910645} +03/04/2022 17:49:25 - INFO - codeparrot_training - Step 24147: {'lr': 0.00047287917826315163, 'samples': 12363776, 'steps': 24147, 'loss/train': 2.3330321311950684} +03/04/2022 17:49:26 - INFO - codeparrot_training - Skipping example with length 738 (seq_length=1024) +03/04/2022 17:49:30 - INFO - codeparrot_training - Step 24148: {'lr': 0.00047287677432247187, 'samples': 12364288, 'steps': 24148, 'loss/train': 1.0961991548538208} +03/04/2022 17:49:34 - INFO - codeparrot_training - Step 24149: {'lr': 0.0004728743702813674, 'samples': 12364800, 'steps': 24149, 'loss/train': 2.125617504119873} +03/04/2022 17:49:35 - INFO - codeparrot_training - Skipping example with length 989 (seq_length=1024) +03/04/2022 17:49:39 - INFO - codeparrot_training - Step 24150: {'lr': 0.00047287196613983906, 'samples': 12365312, 'steps': 24150, 'loss/train': 1.4978234767913818} +03/04/2022 17:49:42 - INFO - codeparrot_training - Step 24151: {'lr': 0.00047286956189788803, 'samples': 12365824, 'steps': 24151, 'loss/train': 2.1263692378997803} +03/04/2022 17:49:43 - INFO - codeparrot_training - Skipping example with length 528 (seq_length=1024) +03/04/2022 17:49:47 - INFO - codeparrot_training - Step 24152: {'lr': 0.0004728671575555155, 'samples': 12366336, 'steps': 24152, 'loss/train': 2.7224557399749756} +03/04/2022 17:49:51 - INFO - codeparrot_training - Step 24153: {'lr': 0.00047286475311272244, 'samples': 12366848, 'steps': 24153, 'loss/train': 1.6270722150802612} +03/04/2022 17:49:52 - INFO - codeparrot_training - Skipping example with length 1012 (seq_length=1024) +03/04/2022 17:49:56 - INFO - codeparrot_training - Step 24154: {'lr': 0.00047286234856950995, 'samples': 12367360, 'steps': 24154, 'loss/train': 0.6083402633666992} +03/04/2022 17:49:59 - INFO - codeparrot_training - Step 24155: {'lr': 0.0004728599439258791, 'samples': 12367872, 'steps': 24155, 'loss/train': 1.332889437675476} +03/04/2022 17:50:01 - INFO - codeparrot_training - Skipping example with length 56 (seq_length=1024) +03/04/2022 17:50:04 - INFO - codeparrot_training - Step 24156: {'lr': 0.00047285753918183105, 'samples': 12368384, 'steps': 24156, 'loss/train': 2.27038311958313} +03/04/2022 17:50:07 - INFO - codeparrot_training - Step 24157: {'lr': 0.0004728551343373668, 'samples': 12368896, 'steps': 24157, 'loss/train': 0.8916676044464111} +03/04/2022 17:50:09 - INFO - codeparrot_training - Skipping example with length 403 (seq_length=1024) +03/04/2022 17:50:13 - INFO - codeparrot_training - Step 24158: {'lr': 0.0004728527293924875, 'samples': 12369408, 'steps': 24158, 'loss/train': 2.1318705081939697} +03/04/2022 17:50:16 - INFO - codeparrot_training - Step 24159: {'lr': 0.0004728503243471941, 'samples': 12369920, 'steps': 24159, 'loss/train': 1.942136287689209} +03/04/2022 17:50:18 - INFO - codeparrot_training - Skipping example with length 499 (seq_length=1024) +03/04/2022 17:50:21 - INFO - codeparrot_training - Step 24160: {'lr': 0.00047284791920148786, 'samples': 12370432, 'steps': 24160, 'loss/train': 1.8322309255599976} +03/04/2022 17:50:24 - INFO - codeparrot_training - Step 24161: {'lr': 0.0004728455139553698, 'samples': 12370944, 'steps': 24161, 'loss/train': 2.179103136062622} +03/04/2022 17:50:27 - INFO - codeparrot_training - Skipping example with length 49 (seq_length=1024) +03/04/2022 17:50:30 - INFO - codeparrot_training - Step 24162: {'lr': 0.00047284310860884097, 'samples': 12371456, 'steps': 24162, 'loss/train': 2.1942903995513916} +03/04/2022 17:50:33 - INFO - codeparrot_training - Step 24163: {'lr': 0.0004728407031619025, 'samples': 12371968, 'steps': 24163, 'loss/train': 1.5318578481674194} +03/04/2022 17:50:35 - INFO - codeparrot_training - Skipping example with length 851 (seq_length=1024) +03/04/2022 17:50:38 - INFO - codeparrot_training - Step 24164: {'lr': 0.00047283829761455545, 'samples': 12372480, 'steps': 24164, 'loss/train': 1.6229345798492432} +03/04/2022 17:50:41 - INFO - codeparrot_training - Step 24165: {'lr': 0.00047283589196680083, 'samples': 12372992, 'steps': 24165, 'loss/train': 1.9523741006851196} +03/04/2022 17:50:44 - INFO - codeparrot_training - Skipping example with length 534 (seq_length=1024) +03/04/2022 17:50:47 - INFO - codeparrot_training - Step 24166: {'lr': 0.00047283348621863987, 'samples': 12373504, 'steps': 24166, 'loss/train': 2.51261043548584} +03/04/2022 17:50:50 - INFO - codeparrot_training - Step 24167: {'lr': 0.0004728310803700735, 'samples': 12374016, 'steps': 24167, 'loss/train': 1.9374743700027466} +03/04/2022 17:50:52 - INFO - codeparrot_training - Skipping example with length 856 (seq_length=1024) +03/04/2022 17:50:55 - INFO - codeparrot_training - Step 24168: {'lr': 0.00047282867442110296, 'samples': 12374528, 'steps': 24168, 'loss/train': 2.0913774967193604} +03/04/2022 17:50:58 - INFO - codeparrot_training - Step 24169: {'lr': 0.0004728262683717292, 'samples': 12375040, 'steps': 24169, 'loss/train': 1.3591228723526} +03/04/2022 17:51:01 - INFO - codeparrot_training - Skipping example with length 107 (seq_length=1024) +03/04/2022 17:51:03 - INFO - codeparrot_training - Step 24170: {'lr': 0.0004728238622219534, 'samples': 12375552, 'steps': 24170, 'loss/train': 1.3195126056671143} +03/04/2022 17:51:07 - INFO - codeparrot_training - Step 24171: {'lr': 0.0004728214559717766, 'samples': 12376064, 'steps': 24171, 'loss/train': 1.9557253122329712} +03/04/2022 17:51:09 - INFO - codeparrot_training - Skipping example with length 105 (seq_length=1024) +03/04/2022 17:51:12 - INFO - codeparrot_training - Step 24172: {'lr': 0.0004728190496211999, 'samples': 12376576, 'steps': 24172, 'loss/train': 2.031425952911377} +03/04/2022 17:51:15 - INFO - codeparrot_training - Step 24173: {'lr': 0.0004728166431702243, 'samples': 12377088, 'steps': 24173, 'loss/train': 2.1004862785339355} +03/04/2022 17:51:18 - INFO - codeparrot_training - Step 24174: {'lr': 0.0004728142366188511, 'samples': 12377600, 'steps': 24174, 'loss/train': 1.667949914932251} +03/04/2022 17:51:19 - INFO - codeparrot_training - Skipping example with length 913 (seq_length=1024) +03/04/2022 17:51:24 - INFO - codeparrot_training - Step 24175: {'lr': 0.0004728118299670812, 'samples': 12378112, 'steps': 24175, 'loss/train': 1.277051329612732} +03/04/2022 17:51:27 - INFO - codeparrot_training - Step 24176: {'lr': 0.0004728094232149156, 'samples': 12378624, 'steps': 24176, 'loss/train': 1.111968755722046} +03/04/2022 17:51:27 - INFO - codeparrot_training - Skipping example with length 953 (seq_length=1024) +03/04/2022 17:51:32 - INFO - codeparrot_training - Step 24177: {'lr': 0.0004728070163623557, 'samples': 12379136, 'steps': 24177, 'loss/train': 2.2291765213012695} +03/04/2022 17:51:35 - INFO - codeparrot_training - Step 24178: {'lr': 0.00047280460940940224, 'samples': 12379648, 'steps': 24178, 'loss/train': 1.8652911186218262} +03/04/2022 17:51:35 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/04/2022 17:51:40 - INFO - codeparrot_training - Step 24179: {'lr': 0.00047280220235605653, 'samples': 12380160, 'steps': 24179, 'loss/train': 1.82382071018219} +03/04/2022 17:51:44 - INFO - codeparrot_training - Step 24180: {'lr': 0.00047279979520231956, 'samples': 12380672, 'steps': 24180, 'loss/train': 0.9858987331390381} +03/04/2022 17:51:44 - INFO - codeparrot_training - Skipping example with length 846 (seq_length=1024) +03/04/2022 17:51:49 - INFO - codeparrot_training - Step 24181: {'lr': 0.0004727973879481925, 'samples': 12381184, 'steps': 24181, 'loss/train': 6.967962265014648} +03/04/2022 17:51:52 - INFO - codeparrot_training - Step 24182: {'lr': 0.0004727949805936763, 'samples': 12381696, 'steps': 24182, 'loss/train': 1.5779021978378296} +03/04/2022 17:51:53 - INFO - codeparrot_training - Skipping example with length 300 (seq_length=1024) +03/04/2022 17:51:57 - INFO - codeparrot_training - Step 24183: {'lr': 0.00047279257313877216, 'samples': 12382208, 'steps': 24183, 'loss/train': 1.7934304475784302} +03/04/2022 17:52:00 - INFO - codeparrot_training - Step 24184: {'lr': 0.00047279016558348107, 'samples': 12382720, 'steps': 24184, 'loss/train': 2.243180513381958} +03/04/2022 17:52:02 - INFO - codeparrot_training - Skipping example with length 477 (seq_length=1024) +03/04/2022 17:52:06 - INFO - codeparrot_training - Step 24185: {'lr': 0.00047278775792780424, 'samples': 12383232, 'steps': 24185, 'loss/train': 1.7482424974441528} +03/04/2022 17:52:09 - INFO - codeparrot_training - Step 24186: {'lr': 0.00047278535017174266, 'samples': 12383744, 'steps': 24186, 'loss/train': 1.3965650796890259} +03/04/2022 17:52:10 - INFO - codeparrot_training - Skipping example with length 350 (seq_length=1024) +03/04/2022 17:52:14 - INFO - codeparrot_training - Step 24187: {'lr': 0.00047278294231529745, 'samples': 12384256, 'steps': 24187, 'loss/train': 1.8189219236373901} +03/04/2022 17:52:17 - INFO - codeparrot_training - Step 24188: {'lr': 0.0004727805343584697, 'samples': 12384768, 'steps': 24188, 'loss/train': 2.2225255966186523} +03/04/2022 17:52:19 - INFO - codeparrot_training - Skipping example with length 656 (seq_length=1024) +03/04/2022 17:52:23 - INFO - codeparrot_training - Step 24189: {'lr': 0.00047277812630126044, 'samples': 12385280, 'steps': 24189, 'loss/train': 0.787157416343689} +03/04/2022 17:52:26 - INFO - codeparrot_training - Step 24190: {'lr': 0.0004727757181436708, 'samples': 12385792, 'steps': 24190, 'loss/train': 1.96137273311615} +03/04/2022 17:52:27 - INFO - codeparrot_training - Skipping example with length 902 (seq_length=1024) +03/04/2022 17:52:31 - INFO - codeparrot_training - Step 24191: {'lr': 0.0004727733098857019, 'samples': 12386304, 'steps': 24191, 'loss/train': 1.1898096799850464} +03/04/2022 17:52:34 - INFO - codeparrot_training - Step 24192: {'lr': 0.0004727709015273547, 'samples': 12386816, 'steps': 24192, 'loss/train': 1.6955680847167969} +03/04/2022 17:52:36 - INFO - codeparrot_training - Skipping example with length 501 (seq_length=1024) +03/04/2022 17:52:40 - INFO - codeparrot_training - Step 24193: {'lr': 0.00047276849306863045, 'samples': 12387328, 'steps': 24193, 'loss/train': 1.356069564819336} +03/04/2022 17:52:43 - INFO - codeparrot_training - Step 24194: {'lr': 0.0004727660845095301, 'samples': 12387840, 'steps': 24194, 'loss/train': 0.5605583190917969} +03/04/2022 17:52:46 - INFO - codeparrot_training - Step 24195: {'lr': 0.0004727636758500548, 'samples': 12388352, 'steps': 24195, 'loss/train': 1.6826062202453613} +03/04/2022 17:52:48 - INFO - codeparrot_training - Skipping example with length 841 (seq_length=1024) +03/04/2022 17:52:52 - INFO - codeparrot_training - Step 24196: {'lr': 0.0004727612670902057, 'samples': 12388864, 'steps': 24196, 'loss/train': 2.06768536567688} +03/04/2022 17:52:55 - INFO - codeparrot_training - Step 24197: {'lr': 0.0004727588582299837, 'samples': 12389376, 'steps': 24197, 'loss/train': 1.405544400215149} +03/04/2022 17:52:57 - INFO - codeparrot_training - Skipping example with length 892 (seq_length=1024) +03/04/2022 17:53:00 - INFO - codeparrot_training - Step 24198: {'lr': 0.00047275644926939004, 'samples': 12389888, 'steps': 24198, 'loss/train': 2.158219575881958} +03/04/2022 17:53:04 - INFO - codeparrot_training - Step 24199: {'lr': 0.0004727540402084258, 'samples': 12390400, 'steps': 24199, 'loss/train': 1.8030403852462769} +03/04/2022 17:53:06 - INFO - codeparrot_training - Skipping example with length 849 (seq_length=1024) +03/04/2022 17:53:09 - INFO - codeparrot_training - Step 24200: {'lr': 0.00047275163104709196, 'samples': 12390912, 'steps': 24200, 'loss/train': 2.058849334716797} +03/04/2022 17:53:12 - INFO - codeparrot_training - Step 24201: {'lr': 0.0004727492217853897, 'samples': 12391424, 'steps': 24201, 'loss/train': 1.6077334880828857} +03/04/2022 17:53:14 - INFO - codeparrot_training - Skipping example with length 847 (seq_length=1024) +03/04/2022 17:53:17 - INFO - codeparrot_training - Step 24202: {'lr': 0.0004727468124233201, 'samples': 12391936, 'steps': 24202, 'loss/train': 1.2564737796783447} +03/04/2022 17:53:21 - INFO - codeparrot_training - Step 24203: {'lr': 0.0004727444029608842, 'samples': 12392448, 'steps': 24203, 'loss/train': 1.886802077293396} +03/04/2022 17:53:23 - INFO - codeparrot_training - Skipping example with length 888 (seq_length=1024) +03/04/2022 17:53:26 - INFO - codeparrot_training - Step 24204: {'lr': 0.0004727419933980831, 'samples': 12392960, 'steps': 24204, 'loss/train': 0.9463183283805847} +03/04/2022 17:53:29 - INFO - codeparrot_training - Step 24205: {'lr': 0.00047273958373491795, 'samples': 12393472, 'steps': 24205, 'loss/train': 2.2054271697998047} +03/04/2022 17:53:31 - INFO - codeparrot_training - Skipping example with length 45 (seq_length=1024) +03/04/2022 17:53:34 - INFO - codeparrot_training - Step 24206: {'lr': 0.0004727371739713897, 'samples': 12393984, 'steps': 24206, 'loss/train': 1.0134239196777344} +03/04/2022 17:53:38 - INFO - codeparrot_training - Step 24207: {'lr': 0.0004727347641074996, 'samples': 12394496, 'steps': 24207, 'loss/train': 1.611152172088623} +03/04/2022 17:53:40 - INFO - codeparrot_training - Skipping example with length 549 (seq_length=1024) +03/04/2022 17:53:43 - INFO - codeparrot_training - Step 24208: {'lr': 0.0004727323541432486, 'samples': 12395008, 'steps': 24208, 'loss/train': 1.29292893409729} +03/04/2022 17:53:46 - INFO - codeparrot_training - Step 24209: {'lr': 0.0004727299440786378, 'samples': 12395520, 'steps': 24209, 'loss/train': 1.5374947786331177} +03/04/2022 17:53:48 - INFO - codeparrot_training - Skipping example with length 693 (seq_length=1024) +03/04/2022 17:53:51 - INFO - codeparrot_training - Step 24210: {'lr': 0.0004727275339136684, 'samples': 12396032, 'steps': 24210, 'loss/train': 0.5444936752319336} +03/04/2022 17:53:54 - INFO - codeparrot_training - Step 24211: {'lr': 0.0004727251236483414, 'samples': 12396544, 'steps': 24211, 'loss/train': 2.217684268951416} +03/04/2022 17:53:57 - INFO - codeparrot_training - Skipping example with length 232 (seq_length=1024) +03/04/2022 17:54:00 - INFO - codeparrot_training - Step 24212: {'lr': 0.0004727227132826579, 'samples': 12397056, 'steps': 24212, 'loss/train': 2.098790407180786} +03/04/2022 17:54:03 - INFO - codeparrot_training - Step 24213: {'lr': 0.00047272030281661894, 'samples': 12397568, 'steps': 24213, 'loss/train': 1.6188281774520874} +03/04/2022 17:54:05 - INFO - codeparrot_training - Skipping example with length 77 (seq_length=1024) +03/04/2022 17:54:08 - INFO - codeparrot_training - Step 24214: {'lr': 0.0004727178922502257, 'samples': 12398080, 'steps': 24214, 'loss/train': 1.2367664575576782} +03/04/2022 17:54:11 - INFO - codeparrot_training - Step 24215: {'lr': 0.00047271548158347917, 'samples': 12398592, 'steps': 24215, 'loss/train': 1.5913017988204956} +03/04/2022 17:54:14 - INFO - codeparrot_training - Skipping example with length 638 (seq_length=1024) +03/04/2022 17:54:17 - INFO - codeparrot_training - Step 24216: {'lr': 0.00047271307081638047, 'samples': 12399104, 'steps': 24216, 'loss/train': 1.8503285646438599} +03/04/2022 17:54:20 - INFO - codeparrot_training - Step 24217: {'lr': 0.0004727106599489307, 'samples': 12399616, 'steps': 24217, 'loss/train': 2.405428886413574} +03/04/2022 17:54:22 - INFO - codeparrot_training - Skipping example with length 222 (seq_length=1024) +03/04/2022 17:54:25 - INFO - codeparrot_training - Step 24218: {'lr': 0.000472708248981131, 'samples': 12400128, 'steps': 24218, 'loss/train': 1.2816799879074097} +03/04/2022 17:54:28 - INFO - codeparrot_training - Step 24219: {'lr': 0.0004727058379129824, 'samples': 12400640, 'steps': 24219, 'loss/train': 0.7940981388092041} +03/04/2022 17:54:34 - INFO - codeparrot_training - Step 24220: {'lr': 0.00047270342674448593, 'samples': 12401152, 'steps': 24220, 'loss/train': 1.9487758874893188} +03/04/2022 17:54:37 - INFO - codeparrot_training - Step 24221: {'lr': 0.0004727010154756427, 'samples': 12401664, 'steps': 24221, 'loss/train': 2.4605300426483154} +03/04/2022 17:54:39 - INFO - codeparrot_training - Skipping example with length 151 (seq_length=1024) +03/04/2022 17:54:42 - INFO - codeparrot_training - Step 24222: {'lr': 0.00047269860410645395, 'samples': 12402176, 'steps': 24222, 'loss/train': 1.4327507019042969} +03/04/2022 17:54:45 - INFO - codeparrot_training - Step 24223: {'lr': 0.00047269619263692056, 'samples': 12402688, 'steps': 24223, 'loss/train': 1.8794963359832764} +03/04/2022 17:54:47 - INFO - codeparrot_training - Skipping example with length 605 (seq_length=1024) +03/04/2022 17:54:51 - INFO - codeparrot_training - Step 24224: {'lr': 0.0004726937810670437, 'samples': 12403200, 'steps': 24224, 'loss/train': 2.1202423572540283} +03/04/2022 17:54:54 - INFO - codeparrot_training - Step 24225: {'lr': 0.00047269136939682445, 'samples': 12403712, 'steps': 24225, 'loss/train': 1.8469197750091553} +03/04/2022 17:54:56 - INFO - codeparrot_training - Skipping example with length 213 (seq_length=1024) +03/04/2022 17:54:59 - INFO - codeparrot_training - Step 24226: {'lr': 0.00047268895762626396, 'samples': 12404224, 'steps': 24226, 'loss/train': 2.1769583225250244} +03/04/2022 17:55:02 - INFO - codeparrot_training - Step 24227: {'lr': 0.00047268654575536326, 'samples': 12404736, 'steps': 24227, 'loss/train': 1.6712318658828735} +03/04/2022 17:55:04 - INFO - codeparrot_training - Skipping example with length 628 (seq_length=1024) +03/04/2022 17:55:08 - INFO - codeparrot_training - Step 24228: {'lr': 0.0004726841337841234, 'samples': 12405248, 'steps': 24228, 'loss/train': 1.8083689212799072} +03/04/2022 17:55:11 - INFO - codeparrot_training - Step 24229: {'lr': 0.00047268172171254554, 'samples': 12405760, 'steps': 24229, 'loss/train': 1.786656379699707} +03/04/2022 17:55:14 - INFO - codeparrot_training - Skipping example with length 253 (seq_length=1024) +03/04/2022 17:55:16 - INFO - codeparrot_training - Step 24230: {'lr': 0.00047267930954063064, 'samples': 12406272, 'steps': 24230, 'loss/train': 1.7426141500473022} +03/04/2022 17:55:19 - INFO - codeparrot_training - Step 24231: {'lr': 0.00047267689726838004, 'samples': 12406784, 'steps': 24231, 'loss/train': 2.165428400039673} +03/04/2022 17:55:23 - INFO - codeparrot_training - Step 24232: {'lr': 0.00047267448489579455, 'samples': 12407296, 'steps': 24232, 'loss/train': 1.243091106414795} +03/04/2022 17:55:23 - INFO - codeparrot_training - Skipping example with length 490 (seq_length=1024) +03/04/2022 17:55:28 - INFO - codeparrot_training - Step 24233: {'lr': 0.00047267207242287536, 'samples': 12407808, 'steps': 24233, 'loss/train': 1.6409904956817627} +03/04/2022 17:55:31 - INFO - codeparrot_training - Step 24234: {'lr': 0.0004726696598496236, 'samples': 12408320, 'steps': 24234, 'loss/train': 3.0037033557891846} +03/04/2022 17:55:32 - INFO - codeparrot_training - Skipping example with length 860 (seq_length=1024) +03/04/2022 17:55:36 - INFO - codeparrot_training - Step 24235: {'lr': 0.0004726672471760404, 'samples': 12408832, 'steps': 24235, 'loss/train': 1.533087968826294} +03/04/2022 17:55:40 - INFO - codeparrot_training - Step 24236: {'lr': 0.0004726648344021267, 'samples': 12409344, 'steps': 24236, 'loss/train': 1.656006932258606} +03/04/2022 17:55:40 - INFO - codeparrot_training - Skipping example with length 204 (seq_length=1024) +03/04/2022 17:55:45 - INFO - codeparrot_training - Step 24237: {'lr': 0.0004726624215278836, 'samples': 12409856, 'steps': 24237, 'loss/train': 2.3111319541931152} +03/04/2022 17:55:48 - INFO - codeparrot_training - Step 24238: {'lr': 0.0004726600085533124, 'samples': 12410368, 'steps': 24238, 'loss/train': 2.4196877479553223} +03/04/2022 17:55:49 - INFO - codeparrot_training - Skipping example with length 613 (seq_length=1024) +03/04/2022 17:55:53 - INFO - codeparrot_training - Step 24239: {'lr': 0.0004726575954784139, 'samples': 12410880, 'steps': 24239, 'loss/train': 1.694189190864563} +03/04/2022 17:55:56 - INFO - codeparrot_training - Step 24240: {'lr': 0.0004726551823031894, 'samples': 12411392, 'steps': 24240, 'loss/train': 2.8977043628692627} +03/04/2022 17:55:57 - INFO - codeparrot_training - Skipping example with length 419 (seq_length=1024) +03/04/2022 17:56:02 - INFO - codeparrot_training - Step 24241: {'lr': 0.0004726527690276399, 'samples': 12411904, 'steps': 24241, 'loss/train': 1.1131377220153809} +03/04/2022 17:56:05 - INFO - codeparrot_training - Step 24242: {'lr': 0.0004726503556517665, 'samples': 12412416, 'steps': 24242, 'loss/train': 1.5966359376907349} +03/04/2022 17:56:05 - INFO - codeparrot_training - Skipping example with length 852 (seq_length=1024) +03/04/2022 17:56:10 - INFO - codeparrot_training - Step 24243: {'lr': 0.0004726479421755703, 'samples': 12412928, 'steps': 24243, 'loss/train': 1.7151087522506714} +03/04/2022 17:56:13 - INFO - codeparrot_training - Step 24244: {'lr': 0.0004726455285990523, 'samples': 12413440, 'steps': 24244, 'loss/train': 1.148912787437439} +03/04/2022 17:56:14 - INFO - codeparrot_training - Skipping example with length 346 (seq_length=1024) +03/04/2022 17:56:18 - INFO - codeparrot_training - Step 24245: {'lr': 0.00047264311492221375, 'samples': 12413952, 'steps': 24245, 'loss/train': 1.2204225063323975} +03/04/2022 17:56:22 - INFO - codeparrot_training - Step 24246: {'lr': 0.00047264070114505556, 'samples': 12414464, 'steps': 24246, 'loss/train': 1.3114022016525269} +03/04/2022 17:56:22 - INFO - codeparrot_training - Skipping example with length 695 (seq_length=1024) +03/04/2022 17:56:27 - INFO - codeparrot_training - Step 24247: {'lr': 0.00047263828726757897, 'samples': 12414976, 'steps': 24247, 'loss/train': 1.5454684495925903} +03/04/2022 17:56:30 - INFO - codeparrot_training - Step 24248: {'lr': 0.00047263587328978495, 'samples': 12415488, 'steps': 24248, 'loss/train': 1.6634445190429688} +03/04/2022 17:56:30 - INFO - codeparrot_training - Skipping example with length 688 (seq_length=1024) +03/04/2022 17:56:35 - INFO - codeparrot_training - Step 24249: {'lr': 0.00047263345921167473, 'samples': 12416000, 'steps': 24249, 'loss/train': 3.235689640045166} +03/04/2022 17:56:39 - INFO - codeparrot_training - Step 24250: {'lr': 0.00047263104503324926, 'samples': 12416512, 'steps': 24250, 'loss/train': 1.3774714469909668} +03/04/2022 17:56:39 - INFO - codeparrot_training - Skipping example with length 92 (seq_length=1024) +03/04/2022 17:56:44 - INFO - codeparrot_training - Step 24251: {'lr': 0.00047262863075450966, 'samples': 12417024, 'steps': 24251, 'loss/train': 2.329606533050537} +03/04/2022 17:56:47 - INFO - codeparrot_training - Step 24252: {'lr': 0.0004726262163754571, 'samples': 12417536, 'steps': 24252, 'loss/train': 0.966454803943634} +03/04/2022 17:56:48 - INFO - codeparrot_training - Skipping example with length 53 (seq_length=1024) +03/04/2022 17:56:52 - INFO - codeparrot_training - Step 24253: {'lr': 0.00047262380189609253, 'samples': 12418048, 'steps': 24253, 'loss/train': 1.990532636642456} +03/04/2022 17:56:55 - INFO - codeparrot_training - Step 24254: {'lr': 0.0004726213873164171, 'samples': 12418560, 'steps': 24254, 'loss/train': 1.827122449874878} +03/04/2022 17:56:56 - INFO - codeparrot_training - Skipping example with length 84 (seq_length=1024) +03/04/2022 17:57:01 - INFO - codeparrot_training - Step 24255: {'lr': 0.00047261897263643196, 'samples': 12419072, 'steps': 24255, 'loss/train': 2.008157968521118} +03/04/2022 17:57:04 - INFO - codeparrot_training - Step 24256: {'lr': 0.0004726165578561381, 'samples': 12419584, 'steps': 24256, 'loss/train': 1.4422613382339478} +03/04/2022 17:57:05 - INFO - codeparrot_training - Skipping example with length 967 (seq_length=1024) +03/04/2022 17:57:09 - INFO - codeparrot_training - Step 24257: {'lr': 0.0004726141429755367, 'samples': 12420096, 'steps': 24257, 'loss/train': 2.193883180618286} +03/04/2022 17:57:12 - INFO - codeparrot_training - Step 24258: {'lr': 0.0004726117279946288, 'samples': 12420608, 'steps': 24258, 'loss/train': 1.925605058670044} +03/04/2022 17:57:13 - INFO - codeparrot_training - Skipping example with length 398 (seq_length=1024) +03/04/2022 17:57:18 - INFO - codeparrot_training - Step 24259: {'lr': 0.0004726093129134155, 'samples': 12421120, 'steps': 24259, 'loss/train': 1.809482455253601} +03/04/2022 17:57:21 - INFO - codeparrot_training - Step 24260: {'lr': 0.0004726068977318978, 'samples': 12421632, 'steps': 24260, 'loss/train': 2.085036516189575} +03/04/2022 17:57:22 - INFO - codeparrot_training - Skipping example with length 160 (seq_length=1024) +03/04/2022 17:57:26 - INFO - codeparrot_training - Step 24261: {'lr': 0.0004726044824500769, 'samples': 12422144, 'steps': 24261, 'loss/train': 1.197949767112732} +03/04/2022 17:57:29 - INFO - codeparrot_training - Step 24262: {'lr': 0.0004726020670679538, 'samples': 12422656, 'steps': 24262, 'loss/train': 1.528116226196289} +03/04/2022 17:57:30 - INFO - codeparrot_training - Skipping example with length 610 (seq_length=1024) +03/04/2022 17:57:35 - INFO - codeparrot_training - Step 24263: {'lr': 0.00047259965158552976, 'samples': 12423168, 'steps': 24263, 'loss/train': 2.322021722793579} +03/04/2022 17:57:38 - INFO - codeparrot_training - Step 24264: {'lr': 0.00047259723600280573, 'samples': 12423680, 'steps': 24264, 'loss/train': 2.0564043521881104} +03/04/2022 17:57:38 - INFO - codeparrot_training - Skipping example with length 253 (seq_length=1024) +03/04/2022 17:57:43 - INFO - codeparrot_training - Step 24265: {'lr': 0.0004725948203197828, 'samples': 12424192, 'steps': 24265, 'loss/train': 2.2129111289978027} +03/04/2022 17:57:46 - INFO - codeparrot_training - Step 24266: {'lr': 0.0004725924045364621, 'samples': 12424704, 'steps': 24266, 'loss/train': 2.3588967323303223} +03/04/2022 17:57:47 - INFO - codeparrot_training - Skipping example with length 515 (seq_length=1024) +03/04/2022 17:57:51 - INFO - codeparrot_training - Step 24267: {'lr': 0.00047258998865284463, 'samples': 12425216, 'steps': 24267, 'loss/train': 2.0209569931030273} +03/04/2022 17:57:55 - INFO - codeparrot_training - Step 24268: {'lr': 0.0004725875726689316, 'samples': 12425728, 'steps': 24268, 'loss/train': 1.6326833963394165} +03/04/2022 17:57:55 - INFO - codeparrot_training - Skipping example with length 885 (seq_length=1024) +03/04/2022 17:58:00 - INFO - codeparrot_training - Step 24269: {'lr': 0.000472585156584724, 'samples': 12426240, 'steps': 24269, 'loss/train': 1.5918077230453491} +03/04/2022 17:58:03 - INFO - codeparrot_training - Step 24270: {'lr': 0.00047258274040022305, 'samples': 12426752, 'steps': 24270, 'loss/train': 2.1289052963256836} +03/04/2022 17:58:04 - INFO - codeparrot_training - Skipping example with length 694 (seq_length=1024) +03/04/2022 17:58:08 - INFO - codeparrot_training - Step 24271: {'lr': 0.0004725803241154297, 'samples': 12427264, 'steps': 24271, 'loss/train': 1.4785586595535278} +03/04/2022 17:58:12 - INFO - codeparrot_training - Step 24272: {'lr': 0.0004725779077303451, 'samples': 12427776, 'steps': 24272, 'loss/train': 2.4786362648010254} +03/04/2022 17:58:13 - INFO - codeparrot_training - Skipping example with length 836 (seq_length=1024) +03/04/2022 17:58:17 - INFO - codeparrot_training - Step 24273: {'lr': 0.0004725754912449703, 'samples': 12428288, 'steps': 24273, 'loss/train': 1.2445313930511475} +03/04/2022 17:58:20 - INFO - codeparrot_training - Step 24274: {'lr': 0.0004725730746593064, 'samples': 12428800, 'steps': 24274, 'loss/train': 1.2462815046310425} +03/04/2022 17:58:21 - INFO - codeparrot_training - Skipping example with length 607 (seq_length=1024) +03/04/2022 17:58:25 - INFO - codeparrot_training - Step 24275: {'lr': 0.0004725706579733546, 'samples': 12429312, 'steps': 24275, 'loss/train': 1.4222785234451294} +03/04/2022 17:58:29 - INFO - codeparrot_training - Step 24276: {'lr': 0.00047256824118711583, 'samples': 12429824, 'steps': 24276, 'loss/train': 3.2636332511901855} +03/04/2022 17:58:30 - INFO - codeparrot_training - Skipping example with length 653 (seq_length=1024) +03/04/2022 17:58:34 - INFO - codeparrot_training - Step 24277: {'lr': 0.00047256582430059126, 'samples': 12430336, 'steps': 24277, 'loss/train': 1.9199249744415283} +03/04/2022 17:58:37 - INFO - codeparrot_training - Step 24278: {'lr': 0.00047256340731378194, 'samples': 12430848, 'steps': 24278, 'loss/train': 1.3929206132888794} +03/04/2022 17:58:38 - INFO - codeparrot_training - Skipping example with length 666 (seq_length=1024) +03/04/2022 17:58:42 - INFO - codeparrot_training - Step 24279: {'lr': 0.00047256099022668896, 'samples': 12431360, 'steps': 24279, 'loss/train': 1.118316411972046} +03/04/2022 17:58:46 - INFO - codeparrot_training - Step 24280: {'lr': 0.00047255857303931347, 'samples': 12431872, 'steps': 24280, 'loss/train': 1.2074640989303589} +03/04/2022 17:58:47 - INFO - codeparrot_training - Skipping example with length 70 (seq_length=1024) +03/04/2022 17:58:51 - INFO - codeparrot_training - Step 24281: {'lr': 0.00047255615575165653, 'samples': 12432384, 'steps': 24281, 'loss/train': 2.0333824157714844} +03/04/2022 17:58:54 - INFO - codeparrot_training - Step 24282: {'lr': 0.0004725537383637193, 'samples': 12432896, 'steps': 24282, 'loss/train': 1.473068118095398} +03/04/2022 17:58:55 - INFO - codeparrot_training - Skipping example with length 714 (seq_length=1024) +03/04/2022 17:58:59 - INFO - codeparrot_training - Step 24283: {'lr': 0.0004725513208755027, 'samples': 12433408, 'steps': 24283, 'loss/train': 1.9178364276885986} +03/04/2022 17:59:02 - INFO - codeparrot_training - Step 24284: {'lr': 0.0004725489032870079, 'samples': 12433920, 'steps': 24284, 'loss/train': 1.0104693174362183} +03/04/2022 17:59:04 - INFO - codeparrot_training - Skipping example with length 1003 (seq_length=1024) +03/04/2022 17:59:08 - INFO - codeparrot_training - Step 24285: {'lr': 0.000472546485598236, 'samples': 12434432, 'steps': 24285, 'loss/train': 1.3684591054916382} +03/04/2022 17:59:11 - INFO - codeparrot_training - Step 24286: {'lr': 0.0004725440678091881, 'samples': 12434944, 'steps': 24286, 'loss/train': 1.81865394115448} +03/04/2022 17:59:12 - INFO - codeparrot_training - Skipping example with length 204 (seq_length=1024) +03/04/2022 17:59:16 - INFO - codeparrot_training - Step 24287: {'lr': 0.00047254164991986525, 'samples': 12435456, 'steps': 24287, 'loss/train': 1.9759001731872559} +03/04/2022 17:59:19 - INFO - codeparrot_training - Step 24288: {'lr': 0.0004725392319302686, 'samples': 12435968, 'steps': 24288, 'loss/train': 0.8127661943435669} +03/04/2022 17:59:21 - INFO - codeparrot_training - Skipping example with length 878 (seq_length=1024) +03/04/2022 17:59:25 - INFO - codeparrot_training - Step 24289: {'lr': 0.0004725368138403992, 'samples': 12436480, 'steps': 24289, 'loss/train': 1.5502636432647705} +03/04/2022 17:59:28 - INFO - codeparrot_training - Step 24290: {'lr': 0.00047253439565025815, 'samples': 12436992, 'steps': 24290, 'loss/train': 2.0844812393188477} +03/04/2022 17:59:29 - INFO - codeparrot_training - Skipping example with length 913 (seq_length=1024) +03/04/2022 17:59:33 - INFO - codeparrot_training - Step 24291: {'lr': 0.00047253197735984653, 'samples': 12437504, 'steps': 24291, 'loss/train': 1.2805588245391846} +03/04/2022 17:59:36 - INFO - codeparrot_training - Step 24292: {'lr': 0.00047252955896916546, 'samples': 12438016, 'steps': 24292, 'loss/train': 2.0271823406219482} +03/04/2022 17:59:38 - INFO - codeparrot_training - Skipping example with length 544 (seq_length=1024) +03/04/2022 17:59:41 - INFO - codeparrot_training - Step 24293: {'lr': 0.000472527140478216, 'samples': 12438528, 'steps': 24293, 'loss/train': 1.4711685180664062} +03/04/2022 17:59:45 - INFO - codeparrot_training - Step 24294: {'lr': 0.00047252472188699917, 'samples': 12439040, 'steps': 24294, 'loss/train': 1.8819751739501953} +03/04/2022 17:59:46 - INFO - codeparrot_training - Skipping example with length 823 (seq_length=1024) +03/04/2022 17:59:50 - INFO - codeparrot_training - Step 24295: {'lr': 0.0004725223031955162, 'samples': 12439552, 'steps': 24295, 'loss/train': 1.436058521270752} +03/04/2022 17:59:53 - INFO - codeparrot_training - Step 24296: {'lr': 0.0004725198844037681, 'samples': 12440064, 'steps': 24296, 'loss/train': 1.1211553812026978} +03/04/2022 17:59:55 - INFO - codeparrot_training - Skipping example with length 194 (seq_length=1024) +03/04/2022 17:59:58 - INFO - codeparrot_training - Step 24297: {'lr': 0.00047251746551175603, 'samples': 12440576, 'steps': 24297, 'loss/train': 2.218895673751831} +03/04/2022 18:00:02 - INFO - codeparrot_training - Step 24298: {'lr': 0.000472515046519481, 'samples': 12441088, 'steps': 24298, 'loss/train': 2.248339891433716} +03/04/2022 18:00:03 - INFO - codeparrot_training - Skipping example with length 409 (seq_length=1024) +03/04/2022 18:00:07 - INFO - codeparrot_training - Step 24299: {'lr': 0.000472512627426944, 'samples': 12441600, 'steps': 24299, 'loss/train': 2.126749038696289} +03/04/2022 18:00:10 - INFO - codeparrot_training - Step 24300: {'lr': 0.0004725102082341464, 'samples': 12442112, 'steps': 24300, 'loss/train': 2.5303127765655518} +03/04/2022 18:00:11 - INFO - codeparrot_training - Skipping example with length 997 (seq_length=1024) +03/04/2022 18:00:15 - INFO - codeparrot_training - Step 24301: {'lr': 0.00047250778894108905, 'samples': 12442624, 'steps': 24301, 'loss/train': 0.06717956811189651} +03/04/2022 18:00:18 - INFO - codeparrot_training - Step 24302: {'lr': 0.0004725053695477731, 'samples': 12443136, 'steps': 24302, 'loss/train': 1.585740089416504} +03/04/2022 18:00:20 - INFO - codeparrot_training - Skipping example with length 506 (seq_length=1024) +03/04/2022 18:00:24 - INFO - codeparrot_training - Step 24303: {'lr': 0.0004725029500541997, 'samples': 12443648, 'steps': 24303, 'loss/train': 1.6549819707870483} +03/04/2022 18:00:27 - INFO - codeparrot_training - Step 24304: {'lr': 0.00047250053046036996, 'samples': 12444160, 'steps': 24304, 'loss/train': 1.9009531736373901} +03/04/2022 18:00:29 - INFO - codeparrot_training - Skipping example with length 406 (seq_length=1024) +03/04/2022 18:00:32 - INFO - codeparrot_training - Step 24305: {'lr': 0.00047249811076628483, 'samples': 12444672, 'steps': 24305, 'loss/train': 1.6141051054000854} +03/04/2022 18:00:35 - INFO - codeparrot_training - Step 24306: {'lr': 0.00047249569097194554, 'samples': 12445184, 'steps': 24306, 'loss/train': 1.180796504020691} +03/04/2022 18:00:37 - INFO - codeparrot_training - Skipping example with length 474 (seq_length=1024) +03/04/2022 18:00:41 - INFO - codeparrot_training - Step 24307: {'lr': 0.0004724932710773531, 'samples': 12445696, 'steps': 24307, 'loss/train': 2.142131805419922} +03/04/2022 18:00:44 - INFO - codeparrot_training - Step 24308: {'lr': 0.00047249085108250867, 'samples': 12446208, 'steps': 24308, 'loss/train': 1.8203576803207397} +03/04/2022 18:00:45 - INFO - codeparrot_training - Skipping example with length 380 (seq_length=1024) +03/04/2022 18:00:49 - INFO - codeparrot_training - Step 24309: {'lr': 0.0004724884309874132, 'samples': 12446720, 'steps': 24309, 'loss/train': 2.0513458251953125} +03/04/2022 18:00:52 - INFO - codeparrot_training - Step 24310: {'lr': 0.00047248601079206797, 'samples': 12447232, 'steps': 24310, 'loss/train': 1.5736480951309204} +03/04/2022 18:00:54 - INFO - codeparrot_training - Skipping example with length 33 (seq_length=1024) +03/04/2022 18:00:57 - INFO - codeparrot_training - Step 24311: {'lr': 0.0004724835904964739, 'samples': 12447744, 'steps': 24311, 'loss/train': 1.607282280921936} +03/04/2022 18:01:01 - INFO - codeparrot_training - Step 24312: {'lr': 0.0004724811701006322, 'samples': 12448256, 'steps': 24312, 'loss/train': 1.243443250656128} +03/04/2022 18:01:02 - INFO - codeparrot_training - Skipping example with length 695 (seq_length=1024) +03/04/2022 18:01:06 - INFO - codeparrot_training - Step 24313: {'lr': 0.00047247874960454394, 'samples': 12448768, 'steps': 24313, 'loss/train': 1.4909756183624268} +03/04/2022 18:01:09 - INFO - codeparrot_training - Step 24314: {'lr': 0.0004724763290082102, 'samples': 12449280, 'steps': 24314, 'loss/train': 1.3621405363082886} +03/04/2022 18:01:11 - INFO - codeparrot_training - Skipping example with length 234 (seq_length=1024) +03/04/2022 18:01:14 - INFO - codeparrot_training - Step 24315: {'lr': 0.000472473908311632, 'samples': 12449792, 'steps': 24315, 'loss/train': 1.7611249685287476} +03/04/2022 18:01:17 - INFO - codeparrot_training - Step 24316: {'lr': 0.0004724714875148105, 'samples': 12450304, 'steps': 24316, 'loss/train': 1.7962604761123657} +03/04/2022 18:01:19 - INFO - codeparrot_training - Skipping example with length 67 (seq_length=1024) +03/04/2022 18:01:23 - INFO - codeparrot_training - Step 24317: {'lr': 0.0004724690666177468, 'samples': 12450816, 'steps': 24317, 'loss/train': 2.24535870552063} +03/04/2022 18:01:26 - INFO - codeparrot_training - Step 24318: {'lr': 0.00047246664562044193, 'samples': 12451328, 'steps': 24318, 'loss/train': 1.973689317703247} +03/04/2022 18:01:28 - INFO - codeparrot_training - Skipping example with length 993 (seq_length=1024) +03/04/2022 18:01:31 - INFO - codeparrot_training - Step 24319: {'lr': 0.0004724642245228971, 'samples': 12451840, 'steps': 24319, 'loss/train': 1.793555736541748} +03/04/2022 18:01:34 - INFO - codeparrot_training - Step 24320: {'lr': 0.0004724618033251133, 'samples': 12452352, 'steps': 24320, 'loss/train': 1.4529109001159668} +03/04/2022 18:01:37 - INFO - codeparrot_training - Skipping example with length 520 (seq_length=1024) +03/04/2022 18:01:40 - INFO - codeparrot_training - Step 24321: {'lr': 0.0004724593820270916, 'samples': 12452864, 'steps': 24321, 'loss/train': 1.4393378496170044} +03/04/2022 18:01:43 - INFO - codeparrot_training - Step 24322: {'lr': 0.00047245696062883316, 'samples': 12453376, 'steps': 24322, 'loss/train': 2.1024208068847656} +03/04/2022 18:01:45 - INFO - codeparrot_training - Skipping example with length 225 (seq_length=1024) +03/04/2022 18:01:48 - INFO - codeparrot_training - Step 24323: {'lr': 0.0004724545391303391, 'samples': 12453888, 'steps': 24323, 'loss/train': 2.196434497833252} +03/04/2022 18:01:51 - INFO - codeparrot_training - Step 24324: {'lr': 0.0004724521175316103, 'samples': 12454400, 'steps': 24324, 'loss/train': 1.4231659173965454} +03/04/2022 18:01:54 - INFO - codeparrot_training - Skipping example with length 143 (seq_length=1024) +03/04/2022 18:01:57 - INFO - codeparrot_training - Step 24325: {'lr': 0.0004724496958326482, 'samples': 12454912, 'steps': 24325, 'loss/train': 1.9584357738494873} +03/04/2022 18:02:00 - INFO - codeparrot_training - Step 24326: {'lr': 0.00047244727403345356, 'samples': 12455424, 'steps': 24326, 'loss/train': 0.1598268300294876} +03/04/2022 18:02:03 - INFO - codeparrot_training - Skipping example with length 974 (seq_length=1024) +03/04/2022 18:02:05 - INFO - codeparrot_training - Step 24327: {'lr': 0.00047244485213402765, 'samples': 12455936, 'steps': 24327, 'loss/train': 1.1262717247009277} +03/04/2022 18:02:09 - INFO - codeparrot_training - Step 24328: {'lr': 0.0004724424301343716, 'samples': 12456448, 'steps': 24328, 'loss/train': 1.877912998199463} +03/04/2022 18:02:12 - INFO - codeparrot_training - Step 24329: {'lr': 0.00047244000803448635, 'samples': 12456960, 'steps': 24329, 'loss/train': 1.9288427829742432} +03/04/2022 18:02:12 - INFO - codeparrot_training - Skipping example with length 104 (seq_length=1024) +03/04/2022 18:02:17 - INFO - codeparrot_training - Step 24330: {'lr': 0.000472437585834373, 'samples': 12457472, 'steps': 24330, 'loss/train': 1.4592257738113403} +03/04/2022 18:02:20 - INFO - codeparrot_training - Skipping example with length 648 (seq_length=1024) +03/04/2022 18:02:23 - INFO - codeparrot_training - Step 24331: {'lr': 0.00047243516353403283, 'samples': 12457984, 'steps': 24331, 'loss/train': 0.9039060473442078} +03/04/2022 18:02:26 - INFO - codeparrot_training - Step 24332: {'lr': 0.0004724327411334668, 'samples': 12458496, 'steps': 24332, 'loss/train': 1.7935844659805298} +03/04/2022 18:02:29 - INFO - codeparrot_training - Skipping example with length 676 (seq_length=1024) +03/04/2022 18:02:31 - INFO - codeparrot_training - Step 24333: {'lr': 0.00047243031863267594, 'samples': 12459008, 'steps': 24333, 'loss/train': 2.445127010345459} +03/04/2022 18:02:34 - INFO - codeparrot_training - Step 24334: {'lr': 0.0004724278960316615, 'samples': 12459520, 'steps': 24334, 'loss/train': 2.669174909591675} +03/04/2022 18:02:37 - INFO - codeparrot_training - Skipping example with length 569 (seq_length=1024) +03/04/2022 18:02:39 - INFO - codeparrot_training - Step 24335: {'lr': 0.00047242547333042434, 'samples': 12460032, 'steps': 24335, 'loss/train': 1.7123916149139404} +03/04/2022 18:02:43 - INFO - codeparrot_training - Step 24336: {'lr': 0.0004724230505289658, 'samples': 12460544, 'steps': 24336, 'loss/train': 1.8329994678497314} +03/04/2022 18:02:45 - INFO - codeparrot_training - Skipping example with length 841 (seq_length=1024) +03/04/2022 18:02:48 - INFO - codeparrot_training - Step 24337: {'lr': 0.0004724206276272868, 'samples': 12461056, 'steps': 24337, 'loss/train': 1.385656714439392} +03/04/2022 18:02:51 - INFO - codeparrot_training - Step 24338: {'lr': 0.0004724182046253885, 'samples': 12461568, 'steps': 24338, 'loss/train': 1.7086632251739502} +03/04/2022 18:02:54 - INFO - codeparrot_training - Skipping example with length 949 (seq_length=1024) +03/04/2022 18:02:56 - INFO - codeparrot_training - Step 24339: {'lr': 0.0004724157815232721, 'samples': 12462080, 'steps': 24339, 'loss/train': 2.2320055961608887} +03/04/2022 18:03:00 - INFO - codeparrot_training - Step 24340: {'lr': 0.00047241335832093844, 'samples': 12462592, 'steps': 24340, 'loss/train': 0.9382586479187012} +03/04/2022 18:03:02 - INFO - codeparrot_training - Skipping example with length 743 (seq_length=1024) +03/04/2022 18:03:05 - INFO - codeparrot_training - Step 24341: {'lr': 0.00047241093501838887, 'samples': 12463104, 'steps': 24341, 'loss/train': 2.093116283416748} +03/04/2022 18:03:08 - INFO - codeparrot_training - Step 24342: {'lr': 0.00047240851161562433, 'samples': 12463616, 'steps': 24342, 'loss/train': 2.223428964614868} +03/04/2022 18:03:12 - INFO - codeparrot_training - Step 24343: {'lr': 0.00047240608811264595, 'samples': 12464128, 'steps': 24343, 'loss/train': 0.7209265828132629} +03/04/2022 18:03:14 - INFO - codeparrot_training - Skipping example with length 578 (seq_length=1024) +03/04/2022 18:03:17 - INFO - codeparrot_training - Step 24344: {'lr': 0.0004724036645094548, 'samples': 12464640, 'steps': 24344, 'loss/train': 1.713396430015564} +03/04/2022 18:03:20 - INFO - codeparrot_training - Step 24345: {'lr': 0.00047240124080605197, 'samples': 12465152, 'steps': 24345, 'loss/train': 1.6939464807510376} +03/04/2022 18:03:22 - INFO - codeparrot_training - Skipping example with length 223 (seq_length=1024) +03/04/2022 18:03:25 - INFO - codeparrot_training - Step 24346: {'lr': 0.0004723988170024386, 'samples': 12465664, 'steps': 24346, 'loss/train': 1.4689526557922363} +03/04/2022 18:03:28 - INFO - codeparrot_training - Step 24347: {'lr': 0.0004723963930986157, 'samples': 12466176, 'steps': 24347, 'loss/train': 2.147712230682373} +03/04/2022 18:03:31 - INFO - codeparrot_training - Skipping example with length 828 (seq_length=1024) +03/04/2022 18:03:34 - INFO - codeparrot_training - Step 24348: {'lr': 0.0004723939690945845, 'samples': 12466688, 'steps': 24348, 'loss/train': 2.07883882522583} +03/04/2022 18:03:37 - INFO - codeparrot_training - Step 24349: {'lr': 0.000472391544990346, 'samples': 12467200, 'steps': 24349, 'loss/train': 1.3336700201034546} +03/04/2022 18:03:39 - INFO - codeparrot_training - Skipping example with length 692 (seq_length=1024) +03/04/2022 18:03:42 - INFO - codeparrot_training - Step 24350: {'lr': 0.0004723891207859012, 'samples': 12467712, 'steps': 24350, 'loss/train': 2.1112406253814697} +03/04/2022 18:03:45 - INFO - codeparrot_training - Step 24351: {'lr': 0.00047238669648125146, 'samples': 12468224, 'steps': 24351, 'loss/train': 1.4390971660614014} +03/04/2022 18:03:48 - INFO - codeparrot_training - Skipping example with length 141 (seq_length=1024) +03/04/2022 18:03:51 - INFO - codeparrot_training - Step 24352: {'lr': 0.00047238427207639755, 'samples': 12468736, 'steps': 24352, 'loss/train': 0.2923624515533447} +03/04/2022 18:03:54 - INFO - codeparrot_training - Step 24353: {'lr': 0.0004723818475713408, 'samples': 12469248, 'steps': 24353, 'loss/train': 0.1767493337392807} +03/04/2022 18:03:56 - INFO - codeparrot_training - Skipping example with length 545 (seq_length=1024) +03/04/2022 18:03:59 - INFO - codeparrot_training - Step 24354: {'lr': 0.00047237942296608223, 'samples': 12469760, 'steps': 24354, 'loss/train': 1.5050644874572754} +03/04/2022 18:04:02 - INFO - codeparrot_training - Step 24355: {'lr': 0.00047237699826062286, 'samples': 12470272, 'steps': 24355, 'loss/train': 2.0005292892456055} +03/04/2022 18:04:04 - INFO - codeparrot_training - Skipping example with length 368 (seq_length=1024) +03/04/2022 18:04:08 - INFO - codeparrot_training - Step 24356: {'lr': 0.0004723745734549639, 'samples': 12470784, 'steps': 24356, 'loss/train': 1.1322113275527954} +03/04/2022 18:04:11 - INFO - codeparrot_training - Step 24357: {'lr': 0.0004723721485491064, 'samples': 12471296, 'steps': 24357, 'loss/train': 1.1600359678268433} +03/04/2022 18:04:13 - INFO - codeparrot_training - Skipping example with length 974 (seq_length=1024) +03/04/2022 18:04:16 - INFO - codeparrot_training - Step 24358: {'lr': 0.0004723697235430514, 'samples': 12471808, 'steps': 24358, 'loss/train': 2.173736333847046} +03/04/2022 18:04:19 - INFO - codeparrot_training - Step 24359: {'lr': 0.0004723672984368, 'samples': 12472320, 'steps': 24359, 'loss/train': 2.0850322246551514} +03/04/2022 18:04:21 - INFO - codeparrot_training - Skipping example with length 593 (seq_length=1024) +03/04/2022 18:04:25 - INFO - codeparrot_training - Step 24360: {'lr': 0.00047236487323035344, 'samples': 12472832, 'steps': 24360, 'loss/train': 1.83031165599823} +03/04/2022 18:04:28 - INFO - codeparrot_training - Step 24361: {'lr': 0.00047236244792371265, 'samples': 12473344, 'steps': 24361, 'loss/train': 1.1164134740829468} +03/04/2022 18:04:30 - INFO - codeparrot_training - Skipping example with length 804 (seq_length=1024) +03/04/2022 18:04:33 - INFO - codeparrot_training - Step 24362: {'lr': 0.0004723600225168787, 'samples': 12473856, 'steps': 24362, 'loss/train': 2.0817224979400635} +03/04/2022 18:04:37 - INFO - codeparrot_training - Step 24363: {'lr': 0.0004723575970098528, 'samples': 12474368, 'steps': 24363, 'loss/train': 1.3799952268600464} +03/04/2022 18:04:40 - INFO - codeparrot_training - Step 24364: {'lr': 0.00047235517140263605, 'samples': 12474880, 'steps': 24364, 'loss/train': 2.1925430297851562} +03/04/2022 18:04:41 - INFO - codeparrot_training - Skipping example with length 639 (seq_length=1024) +03/04/2022 18:04:45 - INFO - codeparrot_training - Step 24365: {'lr': 0.00047235274569522946, 'samples': 12475392, 'steps': 24365, 'loss/train': 2.468376874923706} +03/04/2022 18:04:48 - INFO - codeparrot_training - Step 24366: {'lr': 0.0004723503198876341, 'samples': 12475904, 'steps': 24366, 'loss/train': 0.807769775390625} +03/04/2022 18:04:50 - INFO - codeparrot_training - Skipping example with length 365 (seq_length=1024) +03/04/2022 18:04:54 - INFO - codeparrot_training - Step 24367: {'lr': 0.0004723478939798512, 'samples': 12476416, 'steps': 24367, 'loss/train': 2.205425262451172} +03/04/2022 18:04:57 - INFO - codeparrot_training - Step 24368: {'lr': 0.0004723454679718817, 'samples': 12476928, 'steps': 24368, 'loss/train': 1.4210705757141113} +03/04/2022 18:04:58 - INFO - codeparrot_training - Skipping example with length 304 (seq_length=1024) +03/04/2022 18:05:02 - INFO - codeparrot_training - Step 24369: {'lr': 0.00047234304186372685, 'samples': 12477440, 'steps': 24369, 'loss/train': 1.8271970748901367} +03/04/2022 18:05:05 - INFO - codeparrot_training - Step 24370: {'lr': 0.00047234061565538753, 'samples': 12477952, 'steps': 24370, 'loss/train': 1.784451961517334} +03/04/2022 18:05:07 - INFO - codeparrot_training - Skipping example with length 331 (seq_length=1024) +03/04/2022 18:05:11 - INFO - codeparrot_training - Step 24371: {'lr': 0.0004723381893468651, 'samples': 12478464, 'steps': 24371, 'loss/train': 1.1715573072433472} +03/04/2022 18:05:14 - INFO - codeparrot_training - Step 24372: {'lr': 0.00047233576293816045, 'samples': 12478976, 'steps': 24372, 'loss/train': 4.757570266723633} +03/04/2022 18:05:16 - INFO - codeparrot_training - Skipping example with length 549 (seq_length=1024) +03/04/2022 18:05:19 - INFO - codeparrot_training - Step 24373: {'lr': 0.00047233333642927465, 'samples': 12479488, 'steps': 24373, 'loss/train': 2.2656476497650146} +03/04/2022 18:05:22 - INFO - codeparrot_training - Step 24374: {'lr': 0.000472330909820209, 'samples': 12480000, 'steps': 24374, 'loss/train': 1.0471769571304321} +03/04/2022 18:05:24 - INFO - codeparrot_training - Skipping example with length 12 (seq_length=1024) +03/04/2022 18:05:28 - INFO - codeparrot_training - Step 24375: {'lr': 0.0004723284831109644, 'samples': 12480512, 'steps': 24375, 'loss/train': 2.597299814224243} +03/04/2022 18:05:31 - INFO - codeparrot_training - Step 24376: {'lr': 0.0004723260563015421, 'samples': 12481024, 'steps': 24376, 'loss/train': 1.5757673978805542} +03/04/2022 18:05:32 - INFO - codeparrot_training - Skipping example with length 460 (seq_length=1024) +03/04/2022 18:05:36 - INFO - codeparrot_training - Step 24377: {'lr': 0.00047232362939194305, 'samples': 12481536, 'steps': 24377, 'loss/train': 1.9884790182113647} +03/04/2022 18:05:39 - INFO - codeparrot_training - Step 24378: {'lr': 0.0004723212023821684, 'samples': 12482048, 'steps': 24378, 'loss/train': 2.1321568489074707} +03/04/2022 18:05:41 - INFO - codeparrot_training - Skipping example with length 303 (seq_length=1024) +03/04/2022 18:05:45 - INFO - codeparrot_training - Step 24379: {'lr': 0.0004723187752722193, 'samples': 12482560, 'steps': 24379, 'loss/train': 0.2590729594230652} +03/04/2022 18:05:48 - INFO - codeparrot_training - Step 24380: {'lr': 0.00047231634806209675, 'samples': 12483072, 'steps': 24380, 'loss/train': 2.262324571609497} +03/04/2022 18:05:49 - INFO - codeparrot_training - Skipping example with length 116 (seq_length=1024) +03/04/2022 18:05:53 - INFO - codeparrot_training - Step 24381: {'lr': 0.0004723139207518019, 'samples': 12483584, 'steps': 24381, 'loss/train': 2.645519256591797} +03/04/2022 18:05:56 - INFO - codeparrot_training - Step 24382: {'lr': 0.00047231149334133577, 'samples': 12484096, 'steps': 24382, 'loss/train': 1.8642394542694092} +03/04/2022 18:05:58 - INFO - codeparrot_training - Skipping example with length 254 (seq_length=1024) +03/04/2022 18:06:02 - INFO - codeparrot_training - Step 24383: {'lr': 0.00047230906583069953, 'samples': 12484608, 'steps': 24383, 'loss/train': 2.182098627090454} +03/04/2022 18:06:05 - INFO - codeparrot_training - Step 24384: {'lr': 0.0004723066382198943, 'samples': 12485120, 'steps': 24384, 'loss/train': 1.527432918548584} +03/04/2022 18:06:07 - INFO - codeparrot_training - Skipping example with length 667 (seq_length=1024) +03/04/2022 18:06:10 - INFO - codeparrot_training - Step 24385: {'lr': 0.00047230421050892116, 'samples': 12485632, 'steps': 24385, 'loss/train': 1.624372959136963} +03/04/2022 18:06:13 - INFO - codeparrot_training - Step 24386: {'lr': 0.00047230178269778105, 'samples': 12486144, 'steps': 24386, 'loss/train': 1.0148608684539795} +03/04/2022 18:06:15 - INFO - codeparrot_training - Skipping example with length 473 (seq_length=1024) +03/04/2022 18:06:19 - INFO - codeparrot_training - Step 24387: {'lr': 0.00047229935478647524, 'samples': 12486656, 'steps': 24387, 'loss/train': 1.683343768119812} +03/04/2022 18:06:22 - INFO - codeparrot_training - Step 24388: {'lr': 0.0004722969267750048, 'samples': 12487168, 'steps': 24388, 'loss/train': 1.9828135967254639} +03/04/2022 18:06:25 - INFO - codeparrot_training - Step 24389: {'lr': 0.0004722944986633708, 'samples': 12487680, 'steps': 24389, 'loss/train': 2.1722311973571777} +03/04/2022 18:06:25 - INFO - codeparrot_training - Skipping example with length 181 (seq_length=1024) +03/04/2022 18:06:30 - INFO - codeparrot_training - Step 24390: {'lr': 0.0004722920704515743, 'samples': 12488192, 'steps': 24390, 'loss/train': 2.1045186519622803} +03/04/2022 18:06:33 - INFO - codeparrot_training - Step 24391: {'lr': 0.00047228964213961647, 'samples': 12488704, 'steps': 24391, 'loss/train': 1.7001378536224365} +03/04/2022 18:06:33 - INFO - codeparrot_training - Skipping example with length 707 (seq_length=1024) +03/04/2022 18:06:39 - INFO - codeparrot_training - Step 24392: {'lr': 0.00047228721372749826, 'samples': 12489216, 'steps': 24392, 'loss/train': 1.915138840675354} +03/04/2022 18:06:42 - INFO - codeparrot_training - Step 24393: {'lr': 0.000472284785215221, 'samples': 12489728, 'steps': 24393, 'loss/train': 2.13015079498291} +03/04/2022 18:06:42 - INFO - codeparrot_training - Skipping example with length 36 (seq_length=1024) +03/04/2022 18:06:47 - INFO - codeparrot_training - Step 24394: {'lr': 0.0004722823566027855, 'samples': 12490240, 'steps': 24394, 'loss/train': 2.219012498855591} +03/04/2022 18:06:50 - INFO - codeparrot_training - Step 24395: {'lr': 0.00047227992789019316, 'samples': 12490752, 'steps': 24395, 'loss/train': 1.7858574390411377} +03/04/2022 18:06:50 - INFO - codeparrot_training - Skipping example with length 671 (seq_length=1024) +03/04/2022 18:06:56 - INFO - codeparrot_training - Step 24396: {'lr': 0.0004722774990774448, 'samples': 12491264, 'steps': 24396, 'loss/train': 1.7749426364898682} +03/04/2022 18:06:59 - INFO - codeparrot_training - Step 24397: {'lr': 0.00047227507016454163, 'samples': 12491776, 'steps': 24397, 'loss/train': 0.8248541355133057} +03/04/2022 18:06:59 - INFO - codeparrot_training - Skipping example with length 729 (seq_length=1024) +03/04/2022 18:07:04 - INFO - codeparrot_training - Step 24398: {'lr': 0.00047227264115148475, 'samples': 12492288, 'steps': 24398, 'loss/train': 2.2411694526672363} +03/04/2022 18:07:07 - INFO - codeparrot_training - Step 24399: {'lr': 0.00047227021203827523, 'samples': 12492800, 'steps': 24399, 'loss/train': 1.6435426473617554} +03/04/2022 18:07:07 - INFO - codeparrot_training - Skipping example with length 539 (seq_length=1024) +03/04/2022 18:07:13 - INFO - codeparrot_training - Step 24400: {'lr': 0.0004722677828249142, 'samples': 12493312, 'steps': 24400, 'loss/train': 1.185342788696289} +03/04/2022 18:07:16 - INFO - codeparrot_training - Step 24401: {'lr': 0.0004722653535114028, 'samples': 12493824, 'steps': 24401, 'loss/train': 2.6949777603149414} +03/04/2022 18:07:17 - INFO - codeparrot_training - Skipping example with length 793 (seq_length=1024) +03/04/2022 18:07:21 - INFO - codeparrot_training - Step 24402: {'lr': 0.00047226292409774205, 'samples': 12494336, 'steps': 24402, 'loss/train': 2.3745765686035156} +03/04/2022 18:07:25 - INFO - codeparrot_training - Step 24403: {'lr': 0.00047226049458393306, 'samples': 12494848, 'steps': 24403, 'loss/train': 1.638043999671936} +03/04/2022 18:07:25 - INFO - codeparrot_training - Skipping example with length 720 (seq_length=1024) +03/04/2022 18:07:30 - INFO - codeparrot_training - Step 24404: {'lr': 0.0004722580649699768, 'samples': 12495360, 'steps': 24404, 'loss/train': 1.4944322109222412} +03/04/2022 18:07:33 - INFO - codeparrot_training - Step 24405: {'lr': 0.00047225563525587463, 'samples': 12495872, 'steps': 24405, 'loss/train': 0.6046652793884277} +03/04/2022 18:07:34 - INFO - codeparrot_training - Skipping example with length 750 (seq_length=1024) +03/04/2022 18:07:38 - INFO - codeparrot_training - Step 24406: {'lr': 0.0004722532054416274, 'samples': 12496384, 'steps': 24406, 'loss/train': 2.2182908058166504} +03/04/2022 18:07:41 - INFO - codeparrot_training - Step 24407: {'lr': 0.0004722507755272364, 'samples': 12496896, 'steps': 24407, 'loss/train': 2.009392261505127} +03/04/2022 18:07:42 - INFO - codeparrot_training - Skipping example with length 221 (seq_length=1024) +03/04/2022 18:07:47 - INFO - codeparrot_training - Step 24408: {'lr': 0.0004722483455127026, 'samples': 12497408, 'steps': 24408, 'loss/train': 1.5220409631729126} +03/04/2022 18:07:50 - INFO - codeparrot_training - Step 24409: {'lr': 0.000472245915398027, 'samples': 12497920, 'steps': 24409, 'loss/train': 1.907294750213623} +03/04/2022 18:07:51 - INFO - codeparrot_training - Skipping example with length 337 (seq_length=1024) +03/04/2022 18:07:55 - INFO - codeparrot_training - Step 24410: {'lr': 0.0004722434851832109, 'samples': 12498432, 'steps': 24410, 'loss/train': 1.4560720920562744} +03/04/2022 18:07:58 - INFO - codeparrot_training - Step 24411: {'lr': 0.00047224105486825543, 'samples': 12498944, 'steps': 24411, 'loss/train': 0.20736485719680786} +03/04/2022 18:07:59 - INFO - codeparrot_training - Skipping example with length 508 (seq_length=1024) +03/04/2022 18:08:04 - INFO - codeparrot_training - Step 24412: {'lr': 0.0004722386244531615, 'samples': 12499456, 'steps': 24412, 'loss/train': 2.3332815170288086} +03/04/2022 18:08:07 - INFO - codeparrot_training - Step 24413: {'lr': 0.0004722361939379302, 'samples': 12499968, 'steps': 24413, 'loss/train': 0.7977463603019714} +03/04/2022 18:08:08 - INFO - codeparrot_training - Skipping example with length 598 (seq_length=1024) +03/04/2022 18:08:12 - INFO - codeparrot_training - Step 24414: {'lr': 0.0004722337633225627, 'samples': 12500480, 'steps': 24414, 'loss/train': 2.2121291160583496} +03/04/2022 18:08:15 - INFO - codeparrot_training - Step 24415: {'lr': 0.0004722313326070602, 'samples': 12500992, 'steps': 24415, 'loss/train': 1.4571318626403809} +03/04/2022 18:08:16 - INFO - codeparrot_training - Skipping example with length 526 (seq_length=1024) +03/04/2022 18:08:21 - INFO - codeparrot_training - Step 24416: {'lr': 0.00047222890179142365, 'samples': 12501504, 'steps': 24416, 'loss/train': 1.727952241897583} +03/04/2022 18:08:24 - INFO - codeparrot_training - Step 24417: {'lr': 0.00047222647087565413, 'samples': 12502016, 'steps': 24417, 'loss/train': 2.189281463623047} +03/04/2022 18:08:25 - INFO - codeparrot_training - Skipping example with length 727 (seq_length=1024) +03/04/2022 18:08:29 - INFO - codeparrot_training - Step 24418: {'lr': 0.0004722240398597528, 'samples': 12502528, 'steps': 24418, 'loss/train': 1.3159852027893066} +03/04/2022 18:08:32 - INFO - codeparrot_training - Step 24419: {'lr': 0.0004722216087437208, 'samples': 12503040, 'steps': 24419, 'loss/train': 1.853790283203125} +03/04/2022 18:08:33 - INFO - codeparrot_training - Skipping example with length 578 (seq_length=1024) +03/04/2022 18:08:38 - INFO - codeparrot_training - Step 24420: {'lr': 0.0004722191775275592, 'samples': 12503552, 'steps': 24420, 'loss/train': 2.5647730827331543} +03/04/2022 18:08:41 - INFO - codeparrot_training - Step 24421: {'lr': 0.00047221674621126896, 'samples': 12504064, 'steps': 24421, 'loss/train': 1.9466300010681152} +03/04/2022 18:08:42 - INFO - codeparrot_training - Skipping example with length 18 (seq_length=1024) +03/04/2022 18:08:46 - INFO - codeparrot_training - Step 24422: {'lr': 0.0004722143147948513, 'samples': 12504576, 'steps': 24422, 'loss/train': 2.532318592071533} +03/04/2022 18:08:49 - INFO - codeparrot_training - Step 24423: {'lr': 0.0004722118832783074, 'samples': 12505088, 'steps': 24423, 'loss/train': 1.0570956468582153} +03/04/2022 18:08:51 - INFO - codeparrot_training - Skipping example with length 633 (seq_length=1024) +03/04/2022 18:08:55 - INFO - codeparrot_training - Step 24424: {'lr': 0.0004722094516616382, 'samples': 12505600, 'steps': 24424, 'loss/train': 2.0713443756103516} +03/04/2022 18:08:58 - INFO - codeparrot_training - Step 24425: {'lr': 0.0004722070199448448, 'samples': 12506112, 'steps': 24425, 'loss/train': 2.467946767807007} +03/04/2022 18:09:00 - INFO - codeparrot_training - Skipping example with length 692 (seq_length=1024) +03/04/2022 18:09:03 - INFO - codeparrot_training - Step 24426: {'lr': 0.00047220458812792846, 'samples': 12506624, 'steps': 24426, 'loss/train': 1.4189249277114868} +03/04/2022 18:09:06 - INFO - codeparrot_training - Step 24427: {'lr': 0.00047220215621089005, 'samples': 12507136, 'steps': 24427, 'loss/train': 1.767484188079834} +03/04/2022 18:09:08 - INFO - codeparrot_training - Skipping example with length 479 (seq_length=1024) +03/04/2022 18:09:12 - INFO - codeparrot_training - Step 24428: {'lr': 0.00047219972419373083, 'samples': 12507648, 'steps': 24428, 'loss/train': 1.7900302410125732} +03/04/2022 18:09:15 - INFO - codeparrot_training - Step 24429: {'lr': 0.00047219729207645183, 'samples': 12508160, 'steps': 24429, 'loss/train': 1.5192335844039917} +03/04/2022 18:09:17 - INFO - codeparrot_training - Skipping example with length 278 (seq_length=1024) +03/04/2022 18:09:20 - INFO - codeparrot_training - Step 24430: {'lr': 0.0004721948598590542, 'samples': 12508672, 'steps': 24430, 'loss/train': 2.370107412338257} +03/04/2022 18:09:23 - INFO - codeparrot_training - Step 24431: {'lr': 0.0004721924275415389, 'samples': 12509184, 'steps': 24431, 'loss/train': 1.8538939952850342} +03/04/2022 18:09:25 - INFO - codeparrot_training - Skipping example with length 511 (seq_length=1024) +03/04/2022 18:09:29 - INFO - codeparrot_training - Step 24432: {'lr': 0.0004721899951239072, 'samples': 12509696, 'steps': 24432, 'loss/train': 2.228358745574951} +03/04/2022 18:09:32 - INFO - codeparrot_training - Step 24433: {'lr': 0.0004721875626061601, 'samples': 12510208, 'steps': 24433, 'loss/train': 1.3693757057189941} +03/04/2022 18:09:35 - INFO - codeparrot_training - Step 24434: {'lr': 0.00047218512998829874, 'samples': 12510720, 'steps': 24434, 'loss/train': 0.5329472422599792} +03/04/2022 18:09:35 - INFO - codeparrot_training - Skipping example with length 215 (seq_length=1024) +03/04/2022 18:09:41 - INFO - codeparrot_training - Step 24435: {'lr': 0.00047218269727032413, 'samples': 12511232, 'steps': 24435, 'loss/train': 1.679201364517212} +03/04/2022 18:09:44 - INFO - codeparrot_training - Step 24436: {'lr': 0.00047218026445223745, 'samples': 12511744, 'steps': 24436, 'loss/train': 1.9591596126556396} +03/04/2022 18:09:44 - INFO - codeparrot_training - Skipping example with length 999 (seq_length=1024) +03/04/2022 18:09:49 - INFO - codeparrot_training - Step 24437: {'lr': 0.0004721778315340398, 'samples': 12512256, 'steps': 24437, 'loss/train': 1.9117988348007202} +03/04/2022 18:09:52 - INFO - codeparrot_training - Step 24438: {'lr': 0.0004721753985157322, 'samples': 12512768, 'steps': 24438, 'loss/train': 2.1197919845581055} +03/04/2022 18:09:52 - INFO - codeparrot_training - Skipping example with length 606 (seq_length=1024) +03/04/2022 18:09:57 - INFO - codeparrot_training - Step 24439: {'lr': 0.0004721729653973158, 'samples': 12513280, 'steps': 24439, 'loss/train': 1.2452572584152222} +03/04/2022 18:10:01 - INFO - codeparrot_training - Step 24440: {'lr': 0.0004721705321787917, 'samples': 12513792, 'steps': 24440, 'loss/train': 1.5007168054580688} +03/04/2022 18:10:01 - INFO - codeparrot_training - Skipping example with length 901 (seq_length=1024) +03/04/2022 18:10:06 - INFO - codeparrot_training - Step 24441: {'lr': 0.00047216809886016097, 'samples': 12514304, 'steps': 24441, 'loss/train': 1.8779655694961548} +03/04/2022 18:10:09 - INFO - codeparrot_training - Skipping example with length 225 (seq_length=1024) +03/04/2022 18:10:11 - INFO - codeparrot_training - Step 24442: {'lr': 0.0004721656654414248, 'samples': 12514816, 'steps': 24442, 'loss/train': 1.6226814985275269} +03/04/2022 18:10:14 - INFO - codeparrot_training - Step 24443: {'lr': 0.00047216323192258416, 'samples': 12515328, 'steps': 24443, 'loss/train': 0.7962010502815247} +03/04/2022 18:10:18 - INFO - codeparrot_training - Step 24444: {'lr': 0.0004721607983036401, 'samples': 12515840, 'steps': 24444, 'loss/train': 1.578208565711975} +03/04/2022 18:10:18 - INFO - codeparrot_training - Skipping example with length 999 (seq_length=1024) +03/04/2022 18:10:23 - INFO - codeparrot_training - Step 24445: {'lr': 0.00047215836458459393, 'samples': 12516352, 'steps': 24445, 'loss/train': 1.2195442914962769} +03/04/2022 18:10:26 - INFO - codeparrot_training - Step 24446: {'lr': 0.00047215593076544663, 'samples': 12516864, 'steps': 24446, 'loss/train': 0.40893515944480896} +03/04/2022 18:10:26 - INFO - codeparrot_training - Skipping example with length 27 (seq_length=1024) +03/04/2022 18:10:31 - INFO - codeparrot_training - Step 24447: {'lr': 0.0004721534968461992, 'samples': 12517376, 'steps': 24447, 'loss/train': 0.2289259433746338} +03/04/2022 18:10:34 - INFO - codeparrot_training - Step 24448: {'lr': 0.00047215106282685296, 'samples': 12517888, 'steps': 24448, 'loss/train': 1.391273021697998} +03/04/2022 18:10:35 - INFO - codeparrot_training - Skipping example with length 460 (seq_length=1024) +03/04/2022 18:10:40 - INFO - codeparrot_training - Step 24449: {'lr': 0.0004721486287074088, 'samples': 12518400, 'steps': 24449, 'loss/train': 1.8093549013137817} +03/04/2022 18:10:43 - INFO - codeparrot_training - Step 24450: {'lr': 0.0004721461944878679, 'samples': 12518912, 'steps': 24450, 'loss/train': 2.0668952465057373} +03/04/2022 18:10:43 - INFO - codeparrot_training - Skipping example with length 645 (seq_length=1024) +03/04/2022 18:10:48 - INFO - codeparrot_training - Step 24451: {'lr': 0.00047214376016823143, 'samples': 12519424, 'steps': 24451, 'loss/train': 6.699965953826904} +03/04/2022 18:10:51 - INFO - codeparrot_training - Step 24452: {'lr': 0.0004721413257485003, 'samples': 12519936, 'steps': 24452, 'loss/train': 1.061889886856079} +03/04/2022 18:10:53 - INFO - codeparrot_training - Skipping example with length 16 (seq_length=1024) +03/04/2022 18:10:57 - INFO - codeparrot_training - Step 24453: {'lr': 0.0004721388912286758, 'samples': 12520448, 'steps': 24453, 'loss/train': 1.724984049797058} +03/04/2022 18:11:00 - INFO - codeparrot_training - Step 24454: {'lr': 0.0004721364566087589, 'samples': 12520960, 'steps': 24454, 'loss/train': 1.633524775505066} +03/04/2022 18:11:01 - INFO - codeparrot_training - Skipping example with length 559 (seq_length=1024) +03/04/2022 18:11:05 - INFO - codeparrot_training - Step 24455: {'lr': 0.00047213402188875077, 'samples': 12521472, 'steps': 24455, 'loss/train': 1.8652018308639526} +03/04/2022 18:11:08 - INFO - codeparrot_training - Step 24456: {'lr': 0.00047213158706865246, 'samples': 12521984, 'steps': 24456, 'loss/train': 1.1079046726226807} +03/04/2022 18:11:10 - INFO - codeparrot_training - Skipping example with length 299 (seq_length=1024) +03/04/2022 18:11:14 - INFO - codeparrot_training - Step 24457: {'lr': 0.000472129152148465, 'samples': 12522496, 'steps': 24457, 'loss/train': 2.1000828742980957} +03/04/2022 18:11:17 - INFO - codeparrot_training - Step 24458: {'lr': 0.0004721267171281897, 'samples': 12523008, 'steps': 24458, 'loss/train': 1.0880120992660522} +03/04/2022 18:11:19 - INFO - codeparrot_training - Skipping example with length 768 (seq_length=1024) +03/04/2022 18:11:22 - INFO - codeparrot_training - Step 24459: {'lr': 0.00047212428200782744, 'samples': 12523520, 'steps': 24459, 'loss/train': 2.7211813926696777} +03/04/2022 18:11:25 - INFO - codeparrot_training - Step 24460: {'lr': 0.00047212184678737946, 'samples': 12524032, 'steps': 24460, 'loss/train': 0.42921656370162964} +03/04/2022 18:11:28 - INFO - codeparrot_training - Skipping example with length 766 (seq_length=1024) +03/04/2022 18:11:31 - INFO - codeparrot_training - Step 24461: {'lr': 0.00047211941146684677, 'samples': 12524544, 'steps': 24461, 'loss/train': 1.3911974430084229} +03/04/2022 18:11:34 - INFO - codeparrot_training - Step 24462: {'lr': 0.00047211697604623056, 'samples': 12525056, 'steps': 24462, 'loss/train': 1.533073902130127} +03/04/2022 18:11:37 - INFO - codeparrot_training - Skipping example with length 61 (seq_length=1024) +03/04/2022 18:11:39 - INFO - codeparrot_training - Step 24463: {'lr': 0.0004721145405255318, 'samples': 12525568, 'steps': 24463, 'loss/train': 1.7363744974136353} +03/04/2022 18:11:42 - INFO - codeparrot_training - Step 24464: {'lr': 0.00047211210490475167, 'samples': 12526080, 'steps': 24464, 'loss/train': 2.428699493408203} +03/04/2022 18:11:45 - INFO - codeparrot_training - Skipping example with length 102 (seq_length=1024) +03/04/2022 18:11:48 - INFO - codeparrot_training - Step 24465: {'lr': 0.0004721096691838913, 'samples': 12526592, 'steps': 24465, 'loss/train': 2.038604736328125} +03/04/2022 18:11:51 - INFO - codeparrot_training - Step 24466: {'lr': 0.00047210723336295167, 'samples': 12527104, 'steps': 24466, 'loss/train': 1.4739770889282227} +03/04/2022 18:11:54 - INFO - codeparrot_training - Step 24467: {'lr': 0.00047210479744193404, 'samples': 12527616, 'steps': 24467, 'loss/train': 1.4840128421783447} +03/04/2022 18:11:55 - INFO - codeparrot_training - Skipping example with length 893 (seq_length=1024) +03/04/2022 18:12:00 - INFO - codeparrot_training - Step 24468: {'lr': 0.0004721023614208393, 'samples': 12528128, 'steps': 24468, 'loss/train': 2.714526891708374} +03/04/2022 18:12:03 - INFO - codeparrot_training - Step 24469: {'lr': 0.0004720999252996687, 'samples': 12528640, 'steps': 24469, 'loss/train': 2.1996963024139404} +03/04/2022 18:12:04 - INFO - codeparrot_training - Skipping example with length 283 (seq_length=1024) +03/04/2022 18:12:08 - INFO - codeparrot_training - Step 24470: {'lr': 0.00047209748907842337, 'samples': 12529152, 'steps': 24470, 'loss/train': 1.0092376470565796} +03/04/2022 18:12:11 - INFO - codeparrot_training - Step 24471: {'lr': 0.0004720950527571043, 'samples': 12529664, 'steps': 24471, 'loss/train': 0.25098586082458496} +03/04/2022 18:12:12 - INFO - codeparrot_training - Skipping example with length 190 (seq_length=1024) +03/04/2022 18:12:16 - INFO - codeparrot_training - Step 24472: {'lr': 0.0004720926163357126, 'samples': 12530176, 'steps': 24472, 'loss/train': 1.353118896484375} +03/04/2022 18:12:20 - INFO - codeparrot_training - Step 24473: {'lr': 0.0004720901798142494, 'samples': 12530688, 'steps': 24473, 'loss/train': 0.6609816551208496} +03/04/2022 18:12:20 - INFO - codeparrot_training - Skipping example with length 214 (seq_length=1024) +03/04/2022 18:12:25 - INFO - codeparrot_training - Step 24474: {'lr': 0.00047208774319271586, 'samples': 12531200, 'steps': 24474, 'loss/train': 1.1361074447631836} +03/04/2022 18:12:28 - INFO - codeparrot_training - Step 24475: {'lr': 0.00047208530647111294, 'samples': 12531712, 'steps': 24475, 'loss/train': 1.7579642534255981} +03/04/2022 18:12:29 - INFO - codeparrot_training - Skipping example with length 43 (seq_length=1024) +03/04/2022 18:12:33 - INFO - codeparrot_training - Step 24476: {'lr': 0.0004720828696494418, 'samples': 12532224, 'steps': 24476, 'loss/train': 2.0598790645599365} +03/04/2022 18:12:37 - INFO - codeparrot_training - Step 24477: {'lr': 0.00047208043272770354, 'samples': 12532736, 'steps': 24477, 'loss/train': 1.8200620412826538} +03/04/2022 18:12:37 - INFO - codeparrot_training - Skipping example with length 793 (seq_length=1024) +03/04/2022 18:12:42 - INFO - codeparrot_training - Step 24478: {'lr': 0.0004720779957058993, 'samples': 12533248, 'steps': 24478, 'loss/train': 1.9939210414886475} +03/04/2022 18:12:45 - INFO - codeparrot_training - Step 24479: {'lr': 0.0004720755585840302, 'samples': 12533760, 'steps': 24479, 'loss/train': 2.6640143394470215} +03/04/2022 18:12:46 - INFO - codeparrot_training - Skipping example with length 591 (seq_length=1024) +03/04/2022 18:12:50 - INFO - codeparrot_training - Step 24480: {'lr': 0.0004720731213620972, 'samples': 12534272, 'steps': 24480, 'loss/train': 2.259087562561035} +03/04/2022 18:12:53 - INFO - codeparrot_training - Step 24481: {'lr': 0.00047207068404010147, 'samples': 12534784, 'steps': 24481, 'loss/train': 2.4170734882354736} +03/04/2022 18:12:54 - INFO - codeparrot_training - Skipping example with length 349 (seq_length=1024) +03/04/2022 18:12:59 - INFO - codeparrot_training - Step 24482: {'lr': 0.00047206824661804415, 'samples': 12535296, 'steps': 24482, 'loss/train': 1.2653634548187256} +03/04/2022 18:13:02 - INFO - codeparrot_training - Step 24483: {'lr': 0.0004720658090959263, 'samples': 12535808, 'steps': 24483, 'loss/train': 1.7588878870010376} +03/04/2022 18:13:03 - INFO - codeparrot_training - Skipping example with length 774 (seq_length=1024) +03/04/2022 18:13:07 - INFO - codeparrot_training - Step 24484: {'lr': 0.000472063371473749, 'samples': 12536320, 'steps': 24484, 'loss/train': 1.985325574874878} +03/04/2022 18:13:10 - INFO - codeparrot_training - Step 24485: {'lr': 0.0004720609337515134, 'samples': 12536832, 'steps': 24485, 'loss/train': 1.5193121433258057} +03/04/2022 18:13:12 - INFO - codeparrot_training - Skipping example with length 430 (seq_length=1024) +03/04/2022 18:13:16 - INFO - codeparrot_training - Step 24486: {'lr': 0.00047205849592922057, 'samples': 12537344, 'steps': 24486, 'loss/train': 1.4992685317993164} +03/04/2022 18:13:19 - INFO - codeparrot_training - Step 24487: {'lr': 0.00047205605800687154, 'samples': 12537856, 'steps': 24487, 'loss/train': 1.8267135620117188} +03/04/2022 18:13:20 - INFO - codeparrot_training - Skipping example with length 725 (seq_length=1024) +03/04/2022 18:13:24 - INFO - codeparrot_training - Step 24488: {'lr': 0.0004720536199844676, 'samples': 12538368, 'steps': 24488, 'loss/train': 1.7764939069747925} +03/04/2022 18:13:27 - INFO - codeparrot_training - Step 24489: {'lr': 0.00047205118186200963, 'samples': 12538880, 'steps': 24489, 'loss/train': 1.4721856117248535} +03/04/2022 18:13:28 - INFO - codeparrot_training - Skipping example with length 241 (seq_length=1024) +03/04/2022 18:13:32 - INFO - codeparrot_training - Step 24490: {'lr': 0.00047204874363949886, 'samples': 12539392, 'steps': 24490, 'loss/train': 1.372139573097229} +03/04/2022 18:13:36 - INFO - codeparrot_training - Step 24491: {'lr': 0.00047204630531693634, 'samples': 12539904, 'steps': 24491, 'loss/train': 2.137242555618286} +03/04/2022 18:13:37 - INFO - codeparrot_training - Skipping example with length 96 (seq_length=1024) +03/04/2022 18:13:41 - INFO - codeparrot_training - Step 24492: {'lr': 0.0004720438668943232, 'samples': 12540416, 'steps': 24492, 'loss/train': 2.2619221210479736} +03/04/2022 18:13:44 - INFO - codeparrot_training - Step 24493: {'lr': 0.0004720414283716605, 'samples': 12540928, 'steps': 24493, 'loss/train': 2.112708330154419} +03/04/2022 18:13:45 - INFO - codeparrot_training - Skipping example with length 74 (seq_length=1024) +03/04/2022 18:13:49 - INFO - codeparrot_training - Step 24494: {'lr': 0.00047203898974894934, 'samples': 12541440, 'steps': 24494, 'loss/train': 2.1385791301727295} +03/04/2022 18:13:52 - INFO - codeparrot_training - Step 24495: {'lr': 0.0004720365510261909, 'samples': 12541952, 'steps': 24495, 'loss/train': 1.9912108182907104} +03/04/2022 18:13:53 - INFO - codeparrot_training - Skipping example with length 365 (seq_length=1024) +03/04/2022 18:13:58 - INFO - codeparrot_training - Step 24496: {'lr': 0.00047203411220338615, 'samples': 12542464, 'steps': 24496, 'loss/train': 1.3300347328186035} +03/04/2022 18:14:01 - INFO - codeparrot_training - Step 24497: {'lr': 0.00047203167328053634, 'samples': 12542976, 'steps': 24497, 'loss/train': 2.0370845794677734} +03/04/2022 18:14:01 - INFO - codeparrot_training - Skipping example with length 159 (seq_length=1024) +03/04/2022 18:14:06 - INFO - codeparrot_training - Step 24498: {'lr': 0.0004720292342576423, 'samples': 12543488, 'steps': 24498, 'loss/train': 1.6814428567886353} +03/04/2022 18:14:09 - INFO - codeparrot_training - Step 24499: {'lr': 0.0004720267951347055, 'samples': 12544000, 'steps': 24499, 'loss/train': 2.4289727210998535} +03/04/2022 18:14:10 - INFO - codeparrot_training - Skipping example with length 406 (seq_length=1024) +03/04/2022 18:14:14 - INFO - codeparrot_training - Step 24500: {'lr': 0.00047202435591172677, 'samples': 12544512, 'steps': 24500, 'loss/train': 2.6393795013427734} +03/04/2022 18:14:18 - INFO - codeparrot_training - Step 24501: {'lr': 0.00047202191658870737, 'samples': 12545024, 'steps': 24501, 'loss/train': 1.8278721570968628} +03/04/2022 18:14:18 - INFO - codeparrot_training - Skipping example with length 151 (seq_length=1024) +03/04/2022 18:14:23 - INFO - codeparrot_training - Step 24502: {'lr': 0.00047201947716564826, 'samples': 12545536, 'steps': 24502, 'loss/train': 1.6945443153381348} +03/04/2022 18:14:26 - INFO - codeparrot_training - Step 24503: {'lr': 0.00047201703764255057, 'samples': 12546048, 'steps': 24503, 'loss/train': 1.7766820192337036} +03/04/2022 18:14:27 - INFO - codeparrot_training - Skipping example with length 138 (seq_length=1024) +03/04/2022 18:14:31 - INFO - codeparrot_training - Step 24504: {'lr': 0.0004720145980194155, 'samples': 12546560, 'steps': 24504, 'loss/train': 2.3230977058410645} +03/04/2022 18:14:35 - INFO - codeparrot_training - Step 24505: {'lr': 0.000472012158296244, 'samples': 12547072, 'steps': 24505, 'loss/train': 1.488804817199707} +03/04/2022 18:14:35 - INFO - codeparrot_training - Skipping example with length 804 (seq_length=1024) +03/04/2022 18:14:40 - INFO - codeparrot_training - Step 24506: {'lr': 0.0004720097184730373, 'samples': 12547584, 'steps': 24506, 'loss/train': 1.3458027839660645} +03/04/2022 18:14:43 - INFO - codeparrot_training - Step 24507: {'lr': 0.00047200727854979644, 'samples': 12548096, 'steps': 24507, 'loss/train': 2.2157466411590576} +03/04/2022 18:14:44 - INFO - codeparrot_training - Skipping example with length 838 (seq_length=1024) +03/04/2022 18:14:48 - INFO - codeparrot_training - Step 24508: {'lr': 0.00047200483852652257, 'samples': 12548608, 'steps': 24508, 'loss/train': 1.5014764070510864} +03/04/2022 18:14:52 - INFO - codeparrot_training - Step 24509: {'lr': 0.0004720023984032167, 'samples': 12549120, 'steps': 24509, 'loss/train': 2.3097431659698486} +03/04/2022 18:14:52 - INFO - codeparrot_training - Skipping example with length 611 (seq_length=1024) +03/04/2022 18:14:57 - INFO - codeparrot_training - Step 24510: {'lr': 0.00047199995817987997, 'samples': 12549632, 'steps': 24510, 'loss/train': 2.1296088695526123} +03/04/2022 18:15:00 - INFO - codeparrot_training - Step 24511: {'lr': 0.00047199751785651346, 'samples': 12550144, 'steps': 24511, 'loss/train': 1.9083112478256226} +03/04/2022 18:15:00 - INFO - codeparrot_training - Skipping example with length 242 (seq_length=1024) +03/04/2022 18:15:05 - INFO - codeparrot_training - Step 24512: {'lr': 0.0004719950774331183, 'samples': 12550656, 'steps': 24512, 'loss/train': 1.4810632467269897} +03/04/2022 18:15:08 - INFO - codeparrot_training - Step 24513: {'lr': 0.00047199263690969563, 'samples': 12551168, 'steps': 24513, 'loss/train': 2.143918752670288} +03/04/2022 18:15:09 - INFO - codeparrot_training - Skipping example with length 488 (seq_length=1024) +03/04/2022 18:15:14 - INFO - codeparrot_training - Step 24514: {'lr': 0.00047199019628624647, 'samples': 12551680, 'steps': 24514, 'loss/train': 1.8891240358352661} +03/04/2022 18:15:17 - INFO - codeparrot_training - Step 24515: {'lr': 0.00047198775556277195, 'samples': 12552192, 'steps': 24515, 'loss/train': 1.6359902620315552} +03/04/2022 18:15:19 - INFO - codeparrot_training - Skipping example with length 568 (seq_length=1024) +03/04/2022 18:15:22 - INFO - codeparrot_training - Step 24516: {'lr': 0.0004719853147392732, 'samples': 12552704, 'steps': 24516, 'loss/train': 0.7026808857917786} +03/04/2022 18:15:26 - INFO - codeparrot_training - Step 24517: {'lr': 0.0004719828738157512, 'samples': 12553216, 'steps': 24517, 'loss/train': 2.0743086338043213} +03/04/2022 18:15:27 - INFO - codeparrot_training - Skipping example with length 237 (seq_length=1024) +03/04/2022 18:15:31 - INFO - codeparrot_training - Step 24518: {'lr': 0.0004719804327922073, 'samples': 12553728, 'steps': 24518, 'loss/train': 1.3966413736343384} +03/04/2022 18:15:34 - INFO - codeparrot_training - Step 24519: {'lr': 0.00047197799166864233, 'samples': 12554240, 'steps': 24519, 'loss/train': 2.173198699951172} +03/04/2022 18:15:36 - INFO - codeparrot_training - Skipping example with length 336 (seq_length=1024) +03/04/2022 18:15:39 - INFO - codeparrot_training - Step 24520: {'lr': 0.00047197555044505756, 'samples': 12554752, 'steps': 24520, 'loss/train': 1.8667889833450317} +03/04/2022 18:15:43 - INFO - codeparrot_training - Step 24521: {'lr': 0.000471973109121454, 'samples': 12555264, 'steps': 24521, 'loss/train': 3.210848569869995} +03/04/2022 18:15:44 - INFO - codeparrot_training - Skipping example with length 88 (seq_length=1024) +03/04/2022 18:15:48 - INFO - codeparrot_training - Step 24522: {'lr': 0.00047197066769783284, 'samples': 12555776, 'steps': 24522, 'loss/train': 1.9343254566192627} +03/04/2022 18:15:51 - INFO - codeparrot_training - Step 24523: {'lr': 0.000471968226174195, 'samples': 12556288, 'steps': 24523, 'loss/train': 0.7525582909584045} +03/04/2022 18:15:52 - INFO - codeparrot_training - Skipping example with length 956 (seq_length=1024) +03/04/2022 18:15:56 - INFO - codeparrot_training - Step 24524: {'lr': 0.00047196578455054175, 'samples': 12556800, 'steps': 24524, 'loss/train': 1.727347731590271} +03/04/2022 18:15:59 - INFO - codeparrot_training - Step 24525: {'lr': 0.00047196334282687414, 'samples': 12557312, 'steps': 24525, 'loss/train': 1.3574981689453125} +03/04/2022 18:16:05 - INFO - codeparrot_training - Step 24526: {'lr': 0.00047196090100319333, 'samples': 12557824, 'steps': 24526, 'loss/train': 1.7813762426376343} +03/04/2022 18:16:08 - INFO - codeparrot_training - Step 24527: {'lr': 0.00047195845907950035, 'samples': 12558336, 'steps': 24527, 'loss/train': 1.7903351783752441} +03/04/2022 18:16:09 - INFO - codeparrot_training - Skipping example with length 548 (seq_length=1024) +03/04/2022 18:16:13 - INFO - codeparrot_training - Step 24528: {'lr': 0.0004719560170557963, 'samples': 12558848, 'steps': 24528, 'loss/train': 1.6665645837783813} +03/04/2022 18:16:16 - INFO - codeparrot_training - Step 24529: {'lr': 0.0004719535749320823, 'samples': 12559360, 'steps': 24529, 'loss/train': 2.075484275817871} +03/04/2022 18:16:17 - INFO - codeparrot_training - Skipping example with length 640 (seq_length=1024) +03/04/2022 18:16:21 - INFO - codeparrot_training - Step 24530: {'lr': 0.0004719511327083594, 'samples': 12559872, 'steps': 24530, 'loss/train': 1.9032742977142334} +03/04/2022 18:16:25 - INFO - codeparrot_training - Step 24531: {'lr': 0.0004719486903846288, 'samples': 12560384, 'steps': 24531, 'loss/train': 2.14788818359375} +03/04/2022 18:16:26 - INFO - codeparrot_training - Skipping example with length 726 (seq_length=1024) +03/04/2022 18:16:30 - INFO - codeparrot_training - Step 24532: {'lr': 0.0004719462479608915, 'samples': 12560896, 'steps': 24532, 'loss/train': 2.316221237182617} +03/04/2022 18:16:33 - INFO - codeparrot_training - Step 24533: {'lr': 0.0004719438054371487, 'samples': 12561408, 'steps': 24533, 'loss/train': 1.7690099477767944} +03/04/2022 18:16:34 - INFO - codeparrot_training - Skipping example with length 856 (seq_length=1024) +03/04/2022 18:16:38 - INFO - codeparrot_training - Step 24534: {'lr': 0.00047194136281340137, 'samples': 12561920, 'steps': 24534, 'loss/train': 2.424327850341797} +03/04/2022 18:16:41 - INFO - codeparrot_training - Step 24535: {'lr': 0.00047193892008965077, 'samples': 12562432, 'steps': 24535, 'loss/train': 1.5116389989852905} +03/04/2022 18:16:43 - INFO - codeparrot_training - Skipping example with length 320 (seq_length=1024) +03/04/2022 18:16:47 - INFO - codeparrot_training - Step 24536: {'lr': 0.0004719364772658978, 'samples': 12562944, 'steps': 24536, 'loss/train': 2.1700985431671143} +03/04/2022 18:16:50 - INFO - codeparrot_training - Step 24537: {'lr': 0.00047193403434214385, 'samples': 12563456, 'steps': 24537, 'loss/train': 1.4450080394744873} +03/04/2022 18:16:51 - INFO - codeparrot_training - Skipping example with length 130 (seq_length=1024) +03/04/2022 18:16:55 - INFO - codeparrot_training - Step 24538: {'lr': 0.0004719315913183897, 'samples': 12563968, 'steps': 24538, 'loss/train': 2.150500774383545} +03/04/2022 18:16:58 - INFO - codeparrot_training - Step 24539: {'lr': 0.0004719291481946367, 'samples': 12564480, 'steps': 24539, 'loss/train': 2.2522151470184326} +03/04/2022 18:16:59 - INFO - codeparrot_training - Skipping example with length 570 (seq_length=1024) +03/04/2022 18:17:04 - INFO - codeparrot_training - Step 24540: {'lr': 0.00047192670497088577, 'samples': 12564992, 'steps': 24540, 'loss/train': 2.0248465538024902} +03/04/2022 18:17:07 - INFO - codeparrot_training - Step 24541: {'lr': 0.0004719242616471381, 'samples': 12565504, 'steps': 24541, 'loss/train': 1.4819066524505615} +03/04/2022 18:17:08 - INFO - codeparrot_training - Skipping example with length 118 (seq_length=1024) +03/04/2022 18:17:12 - INFO - codeparrot_training - Step 24542: {'lr': 0.00047192181822339484, 'samples': 12566016, 'steps': 24542, 'loss/train': 2.0245165824890137} +03/04/2022 18:17:15 - INFO - codeparrot_training - Step 24543: {'lr': 0.000471919374699657, 'samples': 12566528, 'steps': 24543, 'loss/train': 1.413270115852356} +03/04/2022 18:17:16 - INFO - codeparrot_training - Skipping example with length 763 (seq_length=1024) +03/04/2022 18:17:20 - INFO - codeparrot_training - Step 24544: {'lr': 0.0004719169310759257, 'samples': 12567040, 'steps': 24544, 'loss/train': 1.6650365591049194} +03/04/2022 18:17:24 - INFO - codeparrot_training - Step 24545: {'lr': 0.0004719144873522021, 'samples': 12567552, 'steps': 24545, 'loss/train': 1.930375576019287} +03/04/2022 18:17:24 - INFO - codeparrot_training - Skipping example with length 351 (seq_length=1024) +03/04/2022 18:17:29 - INFO - codeparrot_training - Step 24546: {'lr': 0.0004719120435284872, 'samples': 12568064, 'steps': 24546, 'loss/train': 1.8358403444290161} +03/04/2022 18:17:32 - INFO - codeparrot_training - Step 24547: {'lr': 0.0004719095996047822, 'samples': 12568576, 'steps': 24547, 'loss/train': 1.933958888053894} +03/04/2022 18:17:33 - INFO - codeparrot_training - Skipping example with length 98 (seq_length=1024) +03/04/2022 18:17:37 - INFO - codeparrot_training - Step 24548: {'lr': 0.0004719071555810881, 'samples': 12569088, 'steps': 24548, 'loss/train': 1.9388989210128784} +03/04/2022 18:17:41 - INFO - codeparrot_training - Step 24549: {'lr': 0.00047190471145740616, 'samples': 12569600, 'steps': 24549, 'loss/train': 1.7561167478561401} +03/04/2022 18:17:41 - INFO - codeparrot_training - Skipping example with length 843 (seq_length=1024) +03/04/2022 18:17:46 - INFO - codeparrot_training - Step 24550: {'lr': 0.0004719022672337373, 'samples': 12570112, 'steps': 24550, 'loss/train': 1.8279350996017456} +03/04/2022 18:17:49 - INFO - codeparrot_training - Step 24551: {'lr': 0.0004718998229100827, 'samples': 12570624, 'steps': 24551, 'loss/train': 2.385383367538452} +03/04/2022 18:17:50 - INFO - codeparrot_training - Skipping example with length 480 (seq_length=1024) +03/04/2022 18:17:54 - INFO - codeparrot_training - Step 24552: {'lr': 0.00047189737848644356, 'samples': 12571136, 'steps': 24552, 'loss/train': 1.4607124328613281} +03/04/2022 18:17:58 - INFO - codeparrot_training - Step 24553: {'lr': 0.0004718949339628208, 'samples': 12571648, 'steps': 24553, 'loss/train': 0.7268022894859314} +03/04/2022 18:17:59 - INFO - codeparrot_training - Skipping example with length 733 (seq_length=1024) +03/04/2022 18:18:03 - INFO - codeparrot_training - Step 24554: {'lr': 0.0004718924893392156, 'samples': 12572160, 'steps': 24554, 'loss/train': 2.2649381160736084} +03/04/2022 18:18:06 - INFO - codeparrot_training - Step 24555: {'lr': 0.0004718900446156291, 'samples': 12572672, 'steps': 24555, 'loss/train': 2.1796867847442627} +03/04/2022 18:18:07 - INFO - codeparrot_training - Skipping example with length 876 (seq_length=1024) +03/04/2022 18:18:11 - INFO - codeparrot_training - Step 24556: {'lr': 0.00047188759979206236, 'samples': 12573184, 'steps': 24556, 'loss/train': 6.590080261230469} +03/04/2022 18:18:15 - INFO - codeparrot_training - Step 24557: {'lr': 0.00047188515486851646, 'samples': 12573696, 'steps': 24557, 'loss/train': 2.23767352104187} +03/04/2022 18:18:16 - INFO - codeparrot_training - Skipping example with length 978 (seq_length=1024) +03/04/2022 18:18:20 - INFO - codeparrot_training - Step 24558: {'lr': 0.0004718827098449926, 'samples': 12574208, 'steps': 24558, 'loss/train': 1.1052218675613403} +03/04/2022 18:18:23 - INFO - codeparrot_training - Step 24559: {'lr': 0.00047188026472149184, 'samples': 12574720, 'steps': 24559, 'loss/train': 2.023693084716797} +03/04/2022 18:18:25 - INFO - codeparrot_training - Skipping example with length 165 (seq_length=1024) +03/04/2022 18:18:28 - INFO - codeparrot_training - Step 24560: {'lr': 0.0004718778194980151, 'samples': 12575232, 'steps': 24560, 'loss/train': 2.0866916179656982} +03/04/2022 18:18:32 - INFO - codeparrot_training - Step 24561: {'lr': 0.00047187537417456375, 'samples': 12575744, 'steps': 24561, 'loss/train': 1.3673527240753174} +03/04/2022 18:18:34 - INFO - codeparrot_training - Skipping example with length 465 (seq_length=1024) +03/04/2022 18:18:37 - INFO - codeparrot_training - Step 24562: {'lr': 0.00047187292875113874, 'samples': 12576256, 'steps': 24562, 'loss/train': 1.8108468055725098} +03/04/2022 18:18:40 - INFO - codeparrot_training - Step 24563: {'lr': 0.0004718704832277413, 'samples': 12576768, 'steps': 24563, 'loss/train': 1.9500696659088135} +03/04/2022 18:18:42 - INFO - codeparrot_training - Skipping example with length 927 (seq_length=1024) +03/04/2022 18:18:45 - INFO - codeparrot_training - Step 24564: {'lr': 0.0004718680376043724, 'samples': 12577280, 'steps': 24564, 'loss/train': 1.730804443359375} +03/04/2022 18:18:48 - INFO - codeparrot_training - Step 24565: {'lr': 0.00047186559188103314, 'samples': 12577792, 'steps': 24565, 'loss/train': 1.918236494064331} +03/04/2022 18:18:50 - INFO - codeparrot_training - Skipping example with length 948 (seq_length=1024) +03/04/2022 18:18:54 - INFO - codeparrot_training - Step 24566: {'lr': 0.00047186314605772466, 'samples': 12578304, 'steps': 24566, 'loss/train': 2.5635764598846436} +03/04/2022 18:18:57 - INFO - codeparrot_training - Step 24567: {'lr': 0.00047186070013444814, 'samples': 12578816, 'steps': 24567, 'loss/train': 1.6767905950546265} +03/04/2022 18:18:59 - INFO - codeparrot_training - Skipping example with length 176 (seq_length=1024) +03/04/2022 18:19:03 - INFO - codeparrot_training - Step 24568: {'lr': 0.00047185825411120454, 'samples': 12579328, 'steps': 24568, 'loss/train': 2.050144672393799} +03/04/2022 18:19:06 - INFO - codeparrot_training - Step 24569: {'lr': 0.0004718558079879951, 'samples': 12579840, 'steps': 24569, 'loss/train': 1.763521432876587} +03/04/2022 18:19:09 - INFO - codeparrot_training - Step 24570: {'lr': 0.00047185336176482084, 'samples': 12580352, 'steps': 24570, 'loss/train': 2.0117151737213135} +03/04/2022 18:19:10 - INFO - codeparrot_training - Skipping example with length 806 (seq_length=1024) +03/04/2022 18:19:14 - INFO - codeparrot_training - Step 24571: {'lr': 0.00047185091544168286, 'samples': 12580864, 'steps': 24571, 'loss/train': 1.7698872089385986} +03/04/2022 18:19:17 - INFO - codeparrot_training - Step 24572: {'lr': 0.00047184846901858225, 'samples': 12581376, 'steps': 24572, 'loss/train': 1.6262294054031372} +03/04/2022 18:19:18 - INFO - codeparrot_training - Skipping example with length 994 (seq_length=1024) +03/04/2022 18:19:23 - INFO - codeparrot_training - Step 24573: {'lr': 0.0004718460224955202, 'samples': 12581888, 'steps': 24573, 'loss/train': 1.5951533317565918} +03/04/2022 18:19:26 - INFO - codeparrot_training - Step 24574: {'lr': 0.0004718435758724977, 'samples': 12582400, 'steps': 24574, 'loss/train': 2.9642019271850586} +03/04/2022 18:19:27 - INFO - codeparrot_training - Skipping example with length 605 (seq_length=1024) +03/04/2022 18:19:31 - INFO - codeparrot_training - Step 24575: {'lr': 0.000471841129149516, 'samples': 12582912, 'steps': 24575, 'loss/train': 2.0319533348083496} +03/04/2022 18:19:35 - INFO - codeparrot_training - Step 24576: {'lr': 0.000471838682326576, 'samples': 12583424, 'steps': 24576, 'loss/train': 2.546630859375} +03/04/2022 18:19:36 - INFO - codeparrot_training - Skipping example with length 617 (seq_length=1024) +03/04/2022 18:19:40 - INFO - codeparrot_training - Step 24577: {'lr': 0.000471836235403679, 'samples': 12583936, 'steps': 24577, 'loss/train': 1.4531915187835693} +03/04/2022 18:19:43 - INFO - codeparrot_training - Step 24578: {'lr': 0.000471833788380826, 'samples': 12584448, 'steps': 24578, 'loss/train': 0.9757839441299438} +03/04/2022 18:19:44 - INFO - codeparrot_training - Skipping example with length 270 (seq_length=1024) +03/04/2022 18:19:48 - INFO - codeparrot_training - Step 24579: {'lr': 0.0004718313412580181, 'samples': 12584960, 'steps': 24579, 'loss/train': 1.4331064224243164} +03/04/2022 18:19:52 - INFO - codeparrot_training - Step 24580: {'lr': 0.0004718288940352564, 'samples': 12585472, 'steps': 24580, 'loss/train': 2.2657902240753174} +03/04/2022 18:19:53 - INFO - codeparrot_training - Skipping example with length 390 (seq_length=1024) +03/04/2022 18:19:57 - INFO - codeparrot_training - Step 24581: {'lr': 0.00047182644671254207, 'samples': 12585984, 'steps': 24581, 'loss/train': 1.5244977474212646} +03/04/2022 18:20:00 - INFO - codeparrot_training - Step 24582: {'lr': 0.0004718239992898761, 'samples': 12586496, 'steps': 24582, 'loss/train': 1.7095324993133545} +03/04/2022 18:20:01 - INFO - codeparrot_training - Skipping example with length 104 (seq_length=1024) +03/04/2022 18:20:05 - INFO - codeparrot_training - Step 24583: {'lr': 0.00047182155176725974, 'samples': 12587008, 'steps': 24583, 'loss/train': 0.6961734890937805} +03/04/2022 18:20:09 - INFO - codeparrot_training - Step 24584: {'lr': 0.00047181910414469396, 'samples': 12587520, 'steps': 24584, 'loss/train': 2.1303796768188477} +03/04/2022 18:20:10 - INFO - codeparrot_training - Skipping example with length 600 (seq_length=1024) +03/04/2022 18:20:14 - INFO - codeparrot_training - Step 24585: {'lr': 0.0004718166564221799, 'samples': 12588032, 'steps': 24585, 'loss/train': 1.4709196090698242} +03/04/2022 18:20:17 - INFO - codeparrot_training - Step 24586: {'lr': 0.0004718142085997187, 'samples': 12588544, 'steps': 24586, 'loss/train': 1.9656394720077515} +03/04/2022 18:20:18 - INFO - codeparrot_training - Skipping example with length 19 (seq_length=1024) +03/04/2022 18:20:22 - INFO - codeparrot_training - Step 24587: {'lr': 0.0004718117606773115, 'samples': 12589056, 'steps': 24587, 'loss/train': 1.9627468585968018} +03/04/2022 18:20:26 - INFO - codeparrot_training - Step 24588: {'lr': 0.0004718093126549592, 'samples': 12589568, 'steps': 24588, 'loss/train': 1.5840452909469604} +03/04/2022 18:20:27 - INFO - codeparrot_training - Skipping example with length 594 (seq_length=1024) +03/04/2022 18:20:31 - INFO - codeparrot_training - Step 24589: {'lr': 0.0004718068645326632, 'samples': 12590080, 'steps': 24589, 'loss/train': 1.8670438528060913} +03/04/2022 18:20:34 - INFO - codeparrot_training - Step 24590: {'lr': 0.0004718044163104244, 'samples': 12590592, 'steps': 24590, 'loss/train': 2.1948065757751465} +03/04/2022 18:20:35 - INFO - codeparrot_training - Skipping example with length 84 (seq_length=1024) +03/04/2022 18:20:39 - INFO - codeparrot_training - Step 24591: {'lr': 0.0004718019679882439, 'samples': 12591104, 'steps': 24591, 'loss/train': 1.3284690380096436} +03/04/2022 18:20:42 - INFO - codeparrot_training - Step 24592: {'lr': 0.0004717995195661229, 'samples': 12591616, 'steps': 24592, 'loss/train': 2.0250327587127686} +03/04/2022 18:20:44 - INFO - codeparrot_training - Skipping example with length 461 (seq_length=1024) +03/04/2022 18:20:48 - INFO - codeparrot_training - Step 24593: {'lr': 0.00047179707104406243, 'samples': 12592128, 'steps': 24593, 'loss/train': 0.7504532933235168} +03/04/2022 18:20:51 - INFO - codeparrot_training - Step 24594: {'lr': 0.0004717946224220637, 'samples': 12592640, 'steps': 24594, 'loss/train': 2.6074228286743164} +03/04/2022 18:20:52 - INFO - codeparrot_training - Skipping example with length 469 (seq_length=1024) +03/04/2022 18:20:56 - INFO - codeparrot_training - Step 24595: {'lr': 0.0004717921737001276, 'samples': 12593152, 'steps': 24595, 'loss/train': 2.201826572418213} +03/04/2022 18:20:59 - INFO - codeparrot_training - Step 24596: {'lr': 0.0004717897248782555, 'samples': 12593664, 'steps': 24596, 'loss/train': 3.2422537803649902} +03/04/2022 18:21:01 - INFO - codeparrot_training - Skipping example with length 578 (seq_length=1024) +03/04/2022 18:21:05 - INFO - codeparrot_training - Step 24597: {'lr': 0.0004717872759564483, 'samples': 12594176, 'steps': 24597, 'loss/train': 1.6248276233673096} +03/04/2022 18:21:08 - INFO - codeparrot_training - Step 24598: {'lr': 0.00047178482693470723, 'samples': 12594688, 'steps': 24598, 'loss/train': 1.4413553476333618} +03/04/2022 18:21:09 - INFO - codeparrot_training - Skipping example with length 236 (seq_length=1024) +03/04/2022 18:21:13 - INFO - codeparrot_training - Step 24599: {'lr': 0.0004717823778130333, 'samples': 12595200, 'steps': 24599, 'loss/train': 1.9830931425094604} +03/04/2022 18:21:16 - INFO - codeparrot_training - Step 24600: {'lr': 0.0004717799285914276, 'samples': 12595712, 'steps': 24600, 'loss/train': 1.6674185991287231} +03/04/2022 18:21:18 - INFO - codeparrot_training - Skipping example with length 467 (seq_length=1024) +03/04/2022 18:21:22 - INFO - codeparrot_training - Step 24601: {'lr': 0.00047177747926989134, 'samples': 12596224, 'steps': 24601, 'loss/train': 1.9463204145431519} +03/04/2022 18:21:25 - INFO - codeparrot_training - Step 24602: {'lr': 0.00047177502984842556, 'samples': 12596736, 'steps': 24602, 'loss/train': 2.432020664215088} +03/04/2022 18:21:26 - INFO - codeparrot_training - Skipping example with length 272 (seq_length=1024) +03/04/2022 18:21:30 - INFO - codeparrot_training - Step 24603: {'lr': 0.0004717725803270314, 'samples': 12597248, 'steps': 24603, 'loss/train': 1.981979489326477} +03/04/2022 18:21:33 - INFO - codeparrot_training - Step 24604: {'lr': 0.00047177013070570997, 'samples': 12597760, 'steps': 24604, 'loss/train': 1.765755295753479} +03/04/2022 18:21:35 - INFO - codeparrot_training - Skipping example with length 731 (seq_length=1024) +03/04/2022 18:21:38 - INFO - codeparrot_training - Step 24605: {'lr': 0.00047176768098446234, 'samples': 12598272, 'steps': 24605, 'loss/train': 1.9796833992004395} +03/04/2022 18:21:42 - INFO - codeparrot_training - Step 24606: {'lr': 0.0004717652311632895, 'samples': 12598784, 'steps': 24606, 'loss/train': 1.2564719915390015} +03/04/2022 18:21:43 - INFO - codeparrot_training - Skipping example with length 191 (seq_length=1024) +03/04/2022 18:21:47 - INFO - codeparrot_training - Step 24607: {'lr': 0.00047176278124219276, 'samples': 12599296, 'steps': 24607, 'loss/train': 1.5903581380844116} +03/04/2022 18:21:50 - INFO - codeparrot_training - Step 24608: {'lr': 0.0004717603312211731, 'samples': 12599808, 'steps': 24608, 'loss/train': 1.499257206916809} +03/04/2022 18:21:51 - INFO - codeparrot_training - Skipping example with length 685 (seq_length=1024) +03/04/2022 18:21:55 - INFO - codeparrot_training - Step 24609: {'lr': 0.0004717578811002317, 'samples': 12600320, 'steps': 24609, 'loss/train': 2.3004422187805176} +03/04/2022 18:21:58 - INFO - codeparrot_training - Step 24610: {'lr': 0.00047175543087936954, 'samples': 12600832, 'steps': 24610, 'loss/train': 2.0687336921691895} +03/04/2022 18:22:00 - INFO - codeparrot_training - Skipping example with length 373 (seq_length=1024) +03/04/2022 18:22:04 - INFO - codeparrot_training - Step 24611: {'lr': 0.0004717529805585879, 'samples': 12601344, 'steps': 24611, 'loss/train': 2.2662270069122314} +03/04/2022 18:22:07 - INFO - codeparrot_training - Step 24612: {'lr': 0.0004717505301378877, 'samples': 12601856, 'steps': 24612, 'loss/train': 1.3154491186141968} +03/04/2022 18:22:08 - INFO - codeparrot_training - Skipping example with length 162 (seq_length=1024) +03/04/2022 18:22:12 - INFO - codeparrot_training - Step 24613: {'lr': 0.0004717480796172702, 'samples': 12602368, 'steps': 24613, 'loss/train': 2.4597747325897217} +03/04/2022 18:22:16 - INFO - codeparrot_training - Step 24614: {'lr': 0.00047174562899673645, 'samples': 12602880, 'steps': 24614, 'loss/train': 1.6482360363006592} +03/04/2022 18:22:17 - INFO - codeparrot_training - Skipping example with length 620 (seq_length=1024) +03/04/2022 18:22:21 - INFO - codeparrot_training - Step 24615: {'lr': 0.0004717431782762875, 'samples': 12603392, 'steps': 24615, 'loss/train': 2.3247969150543213} +03/04/2022 18:22:24 - INFO - codeparrot_training - Step 24616: {'lr': 0.0004717407274559245, 'samples': 12603904, 'steps': 24616, 'loss/train': 2.0946590900421143} +03/04/2022 18:22:25 - INFO - codeparrot_training - Skipping example with length 218 (seq_length=1024) +03/04/2022 18:22:29 - INFO - codeparrot_training - Step 24617: {'lr': 0.0004717382765356485, 'samples': 12604416, 'steps': 24617, 'loss/train': 2.04229736328125} +03/04/2022 18:22:33 - INFO - codeparrot_training - Step 24618: {'lr': 0.0004717358255154607, 'samples': 12604928, 'steps': 24618, 'loss/train': 1.5731310844421387} +03/04/2022 18:22:33 - INFO - codeparrot_training - Skipping example with length 48 (seq_length=1024) +03/04/2022 18:22:38 - INFO - codeparrot_training - Step 24619: {'lr': 0.0004717333743953622, 'samples': 12605440, 'steps': 24619, 'loss/train': 1.8345510959625244} +03/04/2022 18:22:41 - INFO - codeparrot_training - Step 24620: {'lr': 0.00047173092317535404, 'samples': 12605952, 'steps': 24620, 'loss/train': 2.272402763366699} +03/04/2022 18:22:44 - INFO - codeparrot_training - Skipping example with length 977 (seq_length=1024) +03/04/2022 18:22:47 - INFO - codeparrot_training - Step 24621: {'lr': 0.0004717284718554373, 'samples': 12606464, 'steps': 24621, 'loss/train': 1.4395856857299805} +03/04/2022 18:22:50 - INFO - codeparrot_training - Step 24622: {'lr': 0.00047172602043561317, 'samples': 12606976, 'steps': 24622, 'loss/train': 2.451172351837158} +03/04/2022 18:22:52 - INFO - codeparrot_training - Skipping example with length 404 (seq_length=1024) +03/04/2022 18:22:55 - INFO - codeparrot_training - Step 24623: {'lr': 0.00047172356891588273, 'samples': 12607488, 'steps': 24623, 'loss/train': 1.952971339225769} +03/04/2022 18:22:58 - INFO - codeparrot_training - Step 24624: {'lr': 0.0004717211172962471, 'samples': 12608000, 'steps': 24624, 'loss/train': 2.029336452484131} +03/04/2022 18:23:01 - INFO - codeparrot_training - Skipping example with length 506 (seq_length=1024) +03/04/2022 18:23:04 - INFO - codeparrot_training - Step 24625: {'lr': 0.0004717186655767073, 'samples': 12608512, 'steps': 24625, 'loss/train': 2.1454498767852783} +03/04/2022 18:23:07 - INFO - codeparrot_training - Step 24626: {'lr': 0.0004717162137572645, 'samples': 12609024, 'steps': 24626, 'loss/train': 2.2643885612487793} +03/04/2022 18:23:09 - INFO - codeparrot_training - Skipping example with length 170 (seq_length=1024) +03/04/2022 18:23:12 - INFO - codeparrot_training - Step 24627: {'lr': 0.0004717137618379198, 'samples': 12609536, 'steps': 24627, 'loss/train': 1.9836783409118652} +03/04/2022 18:23:15 - INFO - codeparrot_training - Step 24628: {'lr': 0.0004717113098186743, 'samples': 12610048, 'steps': 24628, 'loss/train': 3.6853578090667725} +03/04/2022 18:23:18 - INFO - codeparrot_training - Skipping example with length 148 (seq_length=1024) +03/04/2022 18:23:20 - INFO - codeparrot_training - Step 24629: {'lr': 0.00047170885769952907, 'samples': 12610560, 'steps': 24629, 'loss/train': 1.6440081596374512} +03/04/2022 18:23:24 - INFO - codeparrot_training - Step 24630: {'lr': 0.00047170640548048525, 'samples': 12611072, 'steps': 24630, 'loss/train': 2.314687490463257} +03/04/2022 18:23:26 - INFO - codeparrot_training - Skipping example with length 900 (seq_length=1024) +03/04/2022 18:23:29 - INFO - codeparrot_training - Step 24631: {'lr': 0.000471703953161544, 'samples': 12611584, 'steps': 24631, 'loss/train': 1.898622989654541} +03/04/2022 18:23:32 - INFO - codeparrot_training - Step 24632: {'lr': 0.00047170150074270635, 'samples': 12612096, 'steps': 24632, 'loss/train': 1.4048856496810913} +03/04/2022 18:23:34 - INFO - codeparrot_training - Skipping example with length 1014 (seq_length=1024) +03/04/2022 18:23:38 - INFO - codeparrot_training - Step 24633: {'lr': 0.0004716990482239735, 'samples': 12612608, 'steps': 24633, 'loss/train': 1.1435734033584595} +03/04/2022 18:23:41 - INFO - codeparrot_training - Step 24634: {'lr': 0.0004716965956053463, 'samples': 12613120, 'steps': 24634, 'loss/train': 2.3474020957946777} +03/04/2022 18:23:44 - INFO - codeparrot_training - Step 24635: {'lr': 0.00047169414288682616, 'samples': 12613632, 'steps': 24635, 'loss/train': 1.4966769218444824} +03/04/2022 18:23:45 - INFO - codeparrot_training - Skipping example with length 654 (seq_length=1024) +03/04/2022 18:23:49 - INFO - codeparrot_training - Step 24636: {'lr': 0.0004716916900684141, 'samples': 12614144, 'steps': 24636, 'loss/train': 2.193981170654297} +03/04/2022 18:23:52 - INFO - codeparrot_training - Step 24637: {'lr': 0.00047168923715011103, 'samples': 12614656, 'steps': 24637, 'loss/train': 1.9065353870391846} +03/04/2022 18:23:53 - INFO - codeparrot_training - Skipping example with length 305 (seq_length=1024) +03/04/2022 18:23:58 - INFO - codeparrot_training - Step 24638: {'lr': 0.00047168678413191833, 'samples': 12615168, 'steps': 24638, 'loss/train': 3.219130754470825} +03/04/2022 18:24:01 - INFO - codeparrot_training - Step 24639: {'lr': 0.00047168433101383694, 'samples': 12615680, 'steps': 24639, 'loss/train': 1.8705313205718994} +03/04/2022 18:24:02 - INFO - codeparrot_training - Skipping example with length 16 (seq_length=1024) +03/04/2022 18:24:06 - INFO - codeparrot_training - Step 24640: {'lr': 0.000471681877795868, 'samples': 12616192, 'steps': 24640, 'loss/train': 1.8961694240570068} +03/04/2022 18:24:09 - INFO - codeparrot_training - Step 24641: {'lr': 0.0004716794244780127, 'samples': 12616704, 'steps': 24641, 'loss/train': 1.523282766342163} +03/04/2022 18:24:10 - INFO - codeparrot_training - Skipping example with length 120 (seq_length=1024) +03/04/2022 18:24:15 - INFO - codeparrot_training - Step 24642: {'lr': 0.0004716769710602721, 'samples': 12617216, 'steps': 24642, 'loss/train': 2.1076977252960205} +03/04/2022 18:24:18 - INFO - codeparrot_training - Step 24643: {'lr': 0.00047167451754264714, 'samples': 12617728, 'steps': 24643, 'loss/train': 2.709289073944092} +03/04/2022 18:24:19 - INFO - codeparrot_training - Skipping example with length 741 (seq_length=1024) +03/04/2022 18:24:23 - INFO - codeparrot_training - Step 24644: {'lr': 0.0004716720639251392, 'samples': 12618240, 'steps': 24644, 'loss/train': 1.3856135606765747} +03/04/2022 18:24:26 - INFO - codeparrot_training - Step 24645: {'lr': 0.0004716696102077491, 'samples': 12618752, 'steps': 24645, 'loss/train': 1.8420995473861694} +03/04/2022 18:24:28 - INFO - codeparrot_training - Skipping example with length 990 (seq_length=1024) +03/04/2022 18:24:32 - INFO - codeparrot_training - Step 24646: {'lr': 0.0004716671563904782, 'samples': 12619264, 'steps': 24646, 'loss/train': 1.2377257347106934} +03/04/2022 18:24:35 - INFO - codeparrot_training - Step 24647: {'lr': 0.0004716647024733275, 'samples': 12619776, 'steps': 24647, 'loss/train': 2.076728343963623} +03/04/2022 18:24:36 - INFO - codeparrot_training - Skipping example with length 956 (seq_length=1024) +03/04/2022 18:24:40 - INFO - codeparrot_training - Step 24648: {'lr': 0.00047166224845629804, 'samples': 12620288, 'steps': 24648, 'loss/train': 1.1040862798690796} +03/04/2022 18:24:43 - INFO - codeparrot_training - Step 24649: {'lr': 0.000471659794339391, 'samples': 12620800, 'steps': 24649, 'loss/train': 1.9048970937728882} +03/04/2022 18:24:45 - INFO - codeparrot_training - Skipping example with length 746 (seq_length=1024) +03/04/2022 18:24:49 - INFO - codeparrot_training - Step 24650: {'lr': 0.00047165734012260754, 'samples': 12621312, 'steps': 24650, 'loss/train': 1.1207002401351929} +03/04/2022 18:24:52 - INFO - codeparrot_training - Step 24651: {'lr': 0.0004716548858059486, 'samples': 12621824, 'steps': 24651, 'loss/train': 1.5597668886184692} +03/04/2022 18:24:53 - INFO - codeparrot_training - Skipping example with length 489 (seq_length=1024) +03/04/2022 18:24:57 - INFO - codeparrot_training - Step 24652: {'lr': 0.0004716524313894155, 'samples': 12622336, 'steps': 24652, 'loss/train': 2.1996593475341797} +03/04/2022 18:25:00 - INFO - codeparrot_training - Step 24653: {'lr': 0.0004716499768730092, 'samples': 12622848, 'steps': 24653, 'loss/train': 1.3493784666061401} +03/04/2022 18:25:02 - INFO - codeparrot_training - Skipping example with length 51 (seq_length=1024) +03/04/2022 18:25:05 - INFO - codeparrot_training - Step 24654: {'lr': 0.0004716475222567308, 'samples': 12623360, 'steps': 24654, 'loss/train': 2.2032954692840576} +03/04/2022 18:25:09 - INFO - codeparrot_training - Step 24655: {'lr': 0.0004716450675405815, 'samples': 12623872, 'steps': 24655, 'loss/train': 1.7622028589248657} +03/04/2022 18:25:10 - INFO - codeparrot_training - Skipping example with length 953 (seq_length=1024) +03/04/2022 18:25:14 - INFO - codeparrot_training - Step 24656: {'lr': 0.0004716426127245623, 'samples': 12624384, 'steps': 24656, 'loss/train': 1.983569860458374} +03/04/2022 18:25:17 - INFO - codeparrot_training - Step 24657: {'lr': 0.00047164015780867444, 'samples': 12624896, 'steps': 24657, 'loss/train': 2.214242696762085} +03/04/2022 18:25:18 - INFO - codeparrot_training - Skipping example with length 469 (seq_length=1024) +03/04/2022 18:25:22 - INFO - codeparrot_training - Step 24658: {'lr': 0.0004716377027929189, 'samples': 12625408, 'steps': 24658, 'loss/train': 1.776280164718628} +03/04/2022 18:25:25 - INFO - codeparrot_training - Step 24659: {'lr': 0.00047163524767729684, 'samples': 12625920, 'steps': 24659, 'loss/train': 1.9588526487350464} +03/04/2022 18:25:27 - INFO - codeparrot_training - Skipping example with length 173 (seq_length=1024) +03/04/2022 18:25:31 - INFO - codeparrot_training - Step 24660: {'lr': 0.0004716327924618093, 'samples': 12626432, 'steps': 24660, 'loss/train': 1.6434866189956665} +03/04/2022 18:25:34 - INFO - codeparrot_training - Step 24661: {'lr': 0.0004716303371464575, 'samples': 12626944, 'steps': 24661, 'loss/train': 1.8649039268493652} +03/04/2022 18:25:35 - INFO - codeparrot_training - Skipping example with length 629 (seq_length=1024) +03/04/2022 18:25:39 - INFO - codeparrot_training - Step 24662: {'lr': 0.0004716278817312425, 'samples': 12627456, 'steps': 24662, 'loss/train': 1.211341381072998} +03/04/2022 18:25:42 - INFO - codeparrot_training - Step 24663: {'lr': 0.0004716254262161653, 'samples': 12627968, 'steps': 24663, 'loss/train': 1.6941208839416504} +03/04/2022 18:25:44 - INFO - codeparrot_training - Skipping example with length 502 (seq_length=1024) +03/04/2022 18:25:48 - INFO - codeparrot_training - Step 24664: {'lr': 0.00047162297060122726, 'samples': 12628480, 'steps': 24664, 'loss/train': 1.5189270973205566} +03/04/2022 18:25:51 - INFO - codeparrot_training - Step 24665: {'lr': 0.0004716205148864292, 'samples': 12628992, 'steps': 24665, 'loss/train': 1.6657893657684326} +03/04/2022 18:25:52 - INFO - codeparrot_training - Skipping example with length 249 (seq_length=1024) +03/04/2022 18:25:56 - INFO - codeparrot_training - Step 24666: {'lr': 0.0004716180590717724, 'samples': 12629504, 'steps': 24666, 'loss/train': 1.8152621984481812} +03/04/2022 18:25:59 - INFO - codeparrot_training - Step 24667: {'lr': 0.0004716156031572579, 'samples': 12630016, 'steps': 24667, 'loss/train': 2.112004041671753} +03/04/2022 18:26:00 - INFO - codeparrot_training - Skipping example with length 147 (seq_length=1024) +03/04/2022 18:26:05 - INFO - codeparrot_training - Step 24668: {'lr': 0.00047161314714288697, 'samples': 12630528, 'steps': 24668, 'loss/train': 1.8357746601104736} +03/04/2022 18:26:08 - INFO - codeparrot_training - Step 24669: {'lr': 0.00047161069102866037, 'samples': 12631040, 'steps': 24669, 'loss/train': 2.348203420639038} +03/04/2022 18:26:09 - INFO - codeparrot_training - Skipping example with length 556 (seq_length=1024) +03/04/2022 18:26:13 - INFO - codeparrot_training - Step 24670: {'lr': 0.00047160823481457955, 'samples': 12631552, 'steps': 24670, 'loss/train': 1.9957668781280518} +03/04/2022 18:26:16 - INFO - codeparrot_training - Step 24671: {'lr': 0.0004716057785006454, 'samples': 12632064, 'steps': 24671, 'loss/train': 1.6927039623260498} +03/04/2022 18:26:17 - INFO - codeparrot_training - Skipping example with length 538 (seq_length=1024) +03/04/2022 18:26:21 - INFO - codeparrot_training - Step 24672: {'lr': 0.00047160332208685915, 'samples': 12632576, 'steps': 24672, 'loss/train': 2.0702621936798096} +03/04/2022 18:26:25 - INFO - codeparrot_training - Step 24673: {'lr': 0.00047160086557322185, 'samples': 12633088, 'steps': 24673, 'loss/train': 1.5426883697509766} +03/04/2022 18:26:26 - INFO - codeparrot_training - Skipping example with length 980 (seq_length=1024) +03/04/2022 18:26:30 - INFO - codeparrot_training - Step 24674: {'lr': 0.0004715984089597346, 'samples': 12633600, 'steps': 24674, 'loss/train': 2.240640640258789} +03/04/2022 18:26:33 - INFO - codeparrot_training - Step 24675: {'lr': 0.00047159595224639854, 'samples': 12634112, 'steps': 24675, 'loss/train': 0.9570732116699219} +03/04/2022 18:26:34 - INFO - codeparrot_training - Skipping example with length 648 (seq_length=1024) +03/04/2022 18:26:38 - INFO - codeparrot_training - Step 24676: {'lr': 0.00047159349543321477, 'samples': 12634624, 'steps': 24676, 'loss/train': 1.171364665031433} +03/04/2022 18:26:42 - INFO - codeparrot_training - Step 24677: {'lr': 0.00047159103852018443, 'samples': 12635136, 'steps': 24677, 'loss/train': 1.5713319778442383} +03/04/2022 18:26:43 - INFO - codeparrot_training - Skipping example with length 428 (seq_length=1024) +03/04/2022 18:26:47 - INFO - codeparrot_training - Step 24678: {'lr': 0.00047158858150730856, 'samples': 12635648, 'steps': 24678, 'loss/train': 1.747634768486023} +03/04/2022 18:26:50 - INFO - codeparrot_training - Step 24679: {'lr': 0.00047158612439458824, 'samples': 12636160, 'steps': 24679, 'loss/train': 2.0623903274536133} +03/04/2022 18:26:52 - INFO - codeparrot_training - Skipping example with length 1019 (seq_length=1024) +03/04/2022 18:26:55 - INFO - codeparrot_training - Step 24680: {'lr': 0.00047158366718202466, 'samples': 12636672, 'steps': 24680, 'loss/train': 2.718082904815674} +03/04/2022 18:26:59 - INFO - codeparrot_training - Step 24681: {'lr': 0.00047158120986961897, 'samples': 12637184, 'steps': 24681, 'loss/train': 2.000596523284912} +03/04/2022 18:27:00 - INFO - codeparrot_training - Skipping example with length 853 (seq_length=1024) +03/04/2022 18:27:04 - INFO - codeparrot_training - Step 24682: {'lr': 0.00047157875245737213, 'samples': 12637696, 'steps': 24682, 'loss/train': 1.8995782136917114} +03/04/2022 18:27:07 - INFO - codeparrot_training - Step 24683: {'lr': 0.0004715762949452853, 'samples': 12638208, 'steps': 24683, 'loss/train': 2.354377031326294} +03/04/2022 18:27:09 - INFO - codeparrot_training - Skipping example with length 959 (seq_length=1024) +03/04/2022 18:27:12 - INFO - codeparrot_training - Step 24684: {'lr': 0.0004715738373333597, 'samples': 12638720, 'steps': 24684, 'loss/train': 1.7715439796447754} +03/04/2022 18:27:16 - INFO - codeparrot_training - Step 24685: {'lr': 0.00047157137962159626, 'samples': 12639232, 'steps': 24685, 'loss/train': 1.3711684942245483} +03/04/2022 18:27:17 - INFO - codeparrot_training - Skipping example with length 110 (seq_length=1024) +03/04/2022 18:27:21 - INFO - codeparrot_training - Step 24686: {'lr': 0.00047156892180999624, 'samples': 12639744, 'steps': 24686, 'loss/train': 1.1598052978515625} +03/04/2022 18:27:24 - INFO - codeparrot_training - Step 24687: {'lr': 0.0004715664638985606, 'samples': 12640256, 'steps': 24687, 'loss/train': 1.291690707206726} +03/04/2022 18:27:26 - INFO - codeparrot_training - Skipping example with length 219 (seq_length=1024) +03/04/2022 18:27:29 - INFO - codeparrot_training - Step 24688: {'lr': 0.00047156400588729066, 'samples': 12640768, 'steps': 24688, 'loss/train': 1.5799421072006226} +03/04/2022 18:27:32 - INFO - codeparrot_training - Step 24689: {'lr': 0.0004715615477761873, 'samples': 12641280, 'steps': 24689, 'loss/train': 2.627638101577759} +03/04/2022 18:27:34 - INFO - codeparrot_training - Skipping example with length 868 (seq_length=1024) +03/04/2022 18:27:38 - INFO - codeparrot_training - Step 24690: {'lr': 0.00047155908956525173, 'samples': 12641792, 'steps': 24690, 'loss/train': 2.178576946258545} +03/04/2022 18:27:41 - INFO - codeparrot_training - Step 24691: {'lr': 0.00047155663125448514, 'samples': 12642304, 'steps': 24691, 'loss/train': 1.528391718864441} +03/04/2022 18:27:43 - INFO - codeparrot_training - Skipping example with length 822 (seq_length=1024) +03/04/2022 18:27:46 - INFO - codeparrot_training - Step 24692: {'lr': 0.00047155417284388846, 'samples': 12642816, 'steps': 24692, 'loss/train': 1.4242124557495117} +03/04/2022 18:27:49 - INFO - codeparrot_training - Step 24693: {'lr': 0.0004715517143334629, 'samples': 12643328, 'steps': 24693, 'loss/train': 1.6316503286361694} +03/04/2022 18:27:51 - INFO - codeparrot_training - Skipping example with length 502 (seq_length=1024) +03/04/2022 18:27:54 - INFO - codeparrot_training - Step 24694: {'lr': 0.00047154925572320957, 'samples': 12643840, 'steps': 24694, 'loss/train': 1.9165469408035278} +03/04/2022 18:27:58 - INFO - codeparrot_training - Step 24695: {'lr': 0.00047154679701312953, 'samples': 12644352, 'steps': 24695, 'loss/train': 1.4554461240768433} +03/04/2022 18:28:00 - INFO - codeparrot_training - Skipping example with length 749 (seq_length=1024) +03/04/2022 18:28:03 - INFO - codeparrot_training - Step 24696: {'lr': 0.00047154433820322395, 'samples': 12644864, 'steps': 24696, 'loss/train': 2.001314878463745} +03/04/2022 18:28:06 - INFO - codeparrot_training - Step 24697: {'lr': 0.0004715418792934939, 'samples': 12645376, 'steps': 24697, 'loss/train': 1.549970269203186} +03/04/2022 18:28:08 - INFO - codeparrot_training - Skipping example with length 148 (seq_length=1024) +03/04/2022 18:28:11 - INFO - codeparrot_training - Step 24698: {'lr': 0.00047153942028394056, 'samples': 12645888, 'steps': 24698, 'loss/train': 1.726121425628662} +03/04/2022 18:28:15 - INFO - codeparrot_training - Step 24699: {'lr': 0.0004715369611745649, 'samples': 12646400, 'steps': 24699, 'loss/train': 1.913517713546753} +03/04/2022 18:28:17 - INFO - codeparrot_training - Skipping example with length 83 (seq_length=1024) +03/04/2022 18:28:20 - INFO - codeparrot_training - Step 24700: {'lr': 0.00047153450196536816, 'samples': 12646912, 'steps': 24700, 'loss/train': 1.3226584196090698} +03/04/2022 18:28:23 - INFO - codeparrot_training - Step 24701: {'lr': 0.00047153204265635136, 'samples': 12647424, 'steps': 24701, 'loss/train': 1.125030279159546} +03/04/2022 18:28:26 - INFO - codeparrot_training - Skipping example with length 799 (seq_length=1024) +03/04/2022 18:28:28 - INFO - codeparrot_training - Step 24702: {'lr': 0.0004715295832475156, 'samples': 12647936, 'steps': 24702, 'loss/train': 1.5114527940750122} +03/04/2022 18:28:32 - INFO - codeparrot_training - Step 24703: {'lr': 0.0004715271237388621, 'samples': 12648448, 'steps': 24703, 'loss/train': 1.9711768627166748} +03/04/2022 18:28:34 - INFO - codeparrot_training - Skipping example with length 797 (seq_length=1024) +03/04/2022 18:28:37 - INFO - codeparrot_training - Step 24704: {'lr': 0.00047152466413039187, 'samples': 12648960, 'steps': 24704, 'loss/train': 2.4147050380706787} +03/04/2022 18:28:40 - INFO - codeparrot_training - Step 24705: {'lr': 0.000471522204422106, 'samples': 12649472, 'steps': 24705, 'loss/train': 1.3595484495162964} +03/04/2022 18:28:43 - INFO - codeparrot_training - Step 24706: {'lr': 0.0004715197446140057, 'samples': 12649984, 'steps': 24706, 'loss/train': 1.8579267263412476} +03/04/2022 18:28:44 - INFO - codeparrot_training - Skipping example with length 327 (seq_length=1024) +03/04/2022 18:28:49 - INFO - codeparrot_training - Step 24707: {'lr': 0.000471517284706092, 'samples': 12650496, 'steps': 24707, 'loss/train': 2.4519073963165283} +03/04/2022 18:28:52 - INFO - codeparrot_training - Step 24708: {'lr': 0.0004715148246983661, 'samples': 12651008, 'steps': 24708, 'loss/train': 2.1708130836486816} +03/04/2022 18:28:53 - INFO - codeparrot_training - Skipping example with length 916 (seq_length=1024) +03/04/2022 18:28:57 - INFO - codeparrot_training - Step 24709: {'lr': 0.000471512364590829, 'samples': 12651520, 'steps': 24709, 'loss/train': 1.411179542541504} +03/04/2022 18:29:00 - INFO - codeparrot_training - Step 24710: {'lr': 0.0004715099043834818, 'samples': 12652032, 'steps': 24710, 'loss/train': 2.5634896755218506} +03/04/2022 18:29:01 - INFO - codeparrot_training - Skipping example with length 828 (seq_length=1024) +03/04/2022 18:29:06 - INFO - codeparrot_training - Step 24711: {'lr': 0.00047150744407632565, 'samples': 12652544, 'steps': 24711, 'loss/train': 3.6621053218841553} +03/04/2022 18:29:09 - INFO - codeparrot_training - Step 24712: {'lr': 0.00047150498366936165, 'samples': 12653056, 'steps': 24712, 'loss/train': 1.6211223602294922} +03/04/2022 18:29:09 - INFO - codeparrot_training - Skipping example with length 440 (seq_length=1024) +03/04/2022 18:29:14 - INFO - codeparrot_training - Step 24713: {'lr': 0.000471502523162591, 'samples': 12653568, 'steps': 24713, 'loss/train': 0.3041895031929016} +03/04/2022 18:29:17 - INFO - codeparrot_training - Step 24714: {'lr': 0.00047150006255601475, 'samples': 12654080, 'steps': 24714, 'loss/train': 1.7482783794403076} +03/04/2022 18:29:18 - INFO - codeparrot_training - Skipping example with length 742 (seq_length=1024) +03/04/2022 18:29:23 - INFO - codeparrot_training - Step 24715: {'lr': 0.00047149760184963385, 'samples': 12654592, 'steps': 24715, 'loss/train': 1.5121724605560303} +03/04/2022 18:29:26 - INFO - codeparrot_training - Step 24716: {'lr': 0.0004714951410434497, 'samples': 12655104, 'steps': 24716, 'loss/train': 1.9706952571868896} +03/04/2022 18:29:26 - INFO - codeparrot_training - Skipping example with length 55 (seq_length=1024) +03/04/2022 18:29:31 - INFO - codeparrot_training - Step 24717: {'lr': 0.00047149268013746317, 'samples': 12655616, 'steps': 24717, 'loss/train': 2.383165121078491} +03/04/2022 18:29:34 - INFO - codeparrot_training - Step 24718: {'lr': 0.00047149021913167545, 'samples': 12656128, 'steps': 24718, 'loss/train': 2.336829900741577} +03/04/2022 18:29:34 - INFO - codeparrot_training - Skipping example with length 790 (seq_length=1024) +03/04/2022 18:29:39 - INFO - codeparrot_training - Step 24719: {'lr': 0.0004714877580260877, 'samples': 12656640, 'steps': 24719, 'loss/train': 1.8537455797195435} +03/04/2022 18:29:43 - INFO - codeparrot_training - Step 24720: {'lr': 0.00047148529682070094, 'samples': 12657152, 'steps': 24720, 'loss/train': 0.9557305574417114} +03/04/2022 18:29:43 - INFO - codeparrot_training - Skipping example with length 625 (seq_length=1024) +03/04/2022 18:29:48 - INFO - codeparrot_training - Step 24721: {'lr': 0.00047148283551551643, 'samples': 12657664, 'steps': 24721, 'loss/train': 1.1513121128082275} +03/04/2022 18:29:51 - INFO - codeparrot_training - Step 24722: {'lr': 0.000471480374110535, 'samples': 12658176, 'steps': 24722, 'loss/train': 1.4778661727905273} +03/04/2022 18:29:51 - INFO - codeparrot_training - Skipping example with length 399 (seq_length=1024) +03/04/2022 18:29:56 - INFO - codeparrot_training - Step 24723: {'lr': 0.00047147791260575804, 'samples': 12658688, 'steps': 24723, 'loss/train': 1.6362143754959106} +03/04/2022 18:30:00 - INFO - codeparrot_training - Step 24724: {'lr': 0.0004714754510011866, 'samples': 12659200, 'steps': 24724, 'loss/train': 1.64474356174469} +03/04/2022 18:30:00 - INFO - codeparrot_training - Skipping example with length 466 (seq_length=1024) +03/04/2022 18:30:05 - INFO - codeparrot_training - Step 24725: {'lr': 0.0004714729892968216, 'samples': 12659712, 'steps': 24725, 'loss/train': 0.701042115688324} +03/04/2022 18:30:08 - INFO - codeparrot_training - Step 24726: {'lr': 0.0004714705274926644, 'samples': 12660224, 'steps': 24726, 'loss/train': 1.1750068664550781} +03/04/2022 18:30:08 - INFO - codeparrot_training - Skipping example with length 535 (seq_length=1024) +03/04/2022 18:30:13 - INFO - codeparrot_training - Step 24727: {'lr': 0.00047146806558871594, 'samples': 12660736, 'steps': 24727, 'loss/train': 2.2862913608551025} +03/04/2022 18:30:17 - INFO - codeparrot_training - Step 24728: {'lr': 0.0004714656035849774, 'samples': 12661248, 'steps': 24728, 'loss/train': 2.1631500720977783} +03/04/2022 18:30:17 - INFO - codeparrot_training - Skipping example with length 231 (seq_length=1024) +03/04/2022 18:30:22 - INFO - codeparrot_training - Step 24729: {'lr': 0.00047146314148144986, 'samples': 12661760, 'steps': 24729, 'loss/train': 2.0542526245117188} +03/04/2022 18:30:25 - INFO - codeparrot_training - Step 24730: {'lr': 0.00047146067927813454, 'samples': 12662272, 'steps': 24730, 'loss/train': 2.438232421875} +03/04/2022 18:30:25 - INFO - codeparrot_training - Skipping example with length 571 (seq_length=1024) +03/04/2022 18:30:30 - INFO - codeparrot_training - Step 24731: {'lr': 0.00047145821697503235, 'samples': 12662784, 'steps': 24731, 'loss/train': 2.171314239501953} +03/04/2022 18:30:33 - INFO - codeparrot_training - Step 24732: {'lr': 0.00047145575457214453, 'samples': 12663296, 'steps': 24732, 'loss/train': 1.537041187286377} +03/04/2022 18:30:34 - INFO - codeparrot_training - Skipping example with length 183 (seq_length=1024) +03/04/2022 18:30:39 - INFO - codeparrot_training - Step 24733: {'lr': 0.00047145329206947216, 'samples': 12663808, 'steps': 24733, 'loss/train': 2.600062131881714} +03/04/2022 18:30:42 - INFO - codeparrot_training - Step 24734: {'lr': 0.0004714508294670164, 'samples': 12664320, 'steps': 24734, 'loss/train': 1.532106876373291} +03/04/2022 18:30:42 - INFO - codeparrot_training - Skipping example with length 87 (seq_length=1024) +03/04/2022 18:30:47 - INFO - codeparrot_training - Step 24735: {'lr': 0.00047144836676477823, 'samples': 12664832, 'steps': 24735, 'loss/train': 2.280266284942627} +03/04/2022 18:30:50 - INFO - codeparrot_training - Step 24736: {'lr': 0.00047144590396275895, 'samples': 12665344, 'steps': 24736, 'loss/train': 1.6883699893951416} +03/04/2022 18:30:51 - INFO - codeparrot_training - Skipping example with length 1009 (seq_length=1024) +03/04/2022 18:30:56 - INFO - codeparrot_training - Step 24737: {'lr': 0.0004714434410609595, 'samples': 12665856, 'steps': 24737, 'loss/train': 2.0127952098846436} +03/04/2022 18:30:59 - INFO - codeparrot_training - Step 24738: {'lr': 0.00047144097805938104, 'samples': 12666368, 'steps': 24738, 'loss/train': 1.7633246183395386} +03/04/2022 18:30:59 - INFO - codeparrot_training - Skipping example with length 323 (seq_length=1024) +03/04/2022 18:31:04 - INFO - codeparrot_training - Step 24739: {'lr': 0.0004714385149580247, 'samples': 12666880, 'steps': 24739, 'loss/train': 2.41957426071167} +03/04/2022 18:31:07 - INFO - codeparrot_training - Step 24740: {'lr': 0.0004714360517568916, 'samples': 12667392, 'steps': 24740, 'loss/train': 2.0245862007141113} +03/04/2022 18:31:08 - INFO - codeparrot_training - Skipping example with length 109 (seq_length=1024) +03/04/2022 18:31:13 - INFO - codeparrot_training - Step 24741: {'lr': 0.00047143358845598283, 'samples': 12667904, 'steps': 24741, 'loss/train': 2.1678361892700195} +03/04/2022 18:31:16 - INFO - codeparrot_training - Step 24742: {'lr': 0.0004714311250552995, 'samples': 12668416, 'steps': 24742, 'loss/train': 2.006564140319824} +03/04/2022 18:31:17 - INFO - codeparrot_training - Skipping example with length 625 (seq_length=1024) +03/04/2022 18:31:21 - INFO - codeparrot_training - Step 24743: {'lr': 0.0004714286615548427, 'samples': 12668928, 'steps': 24743, 'loss/train': 1.7313655614852905} +03/04/2022 18:31:24 - INFO - codeparrot_training - Step 24744: {'lr': 0.00047142619795461363, 'samples': 12669440, 'steps': 24744, 'loss/train': 1.4713009595870972} +03/04/2022 18:31:25 - INFO - codeparrot_training - Skipping example with length 413 (seq_length=1024) +03/04/2022 18:31:29 - INFO - codeparrot_training - Step 24745: {'lr': 0.0004714237342546133, 'samples': 12669952, 'steps': 24745, 'loss/train': 1.4249142408370972} +03/04/2022 18:31:33 - INFO - codeparrot_training - Step 24746: {'lr': 0.0004714212704548428, 'samples': 12670464, 'steps': 24746, 'loss/train': 1.8464289903640747} +03/04/2022 18:31:33 - INFO - codeparrot_training - Skipping example with length 203 (seq_length=1024) +03/04/2022 18:31:38 - INFO - codeparrot_training - Step 24747: {'lr': 0.0004714188065553033, 'samples': 12670976, 'steps': 24747, 'loss/train': 1.1678587198257446} +03/04/2022 18:31:41 - INFO - codeparrot_training - Step 24748: {'lr': 0.000471416342555996, 'samples': 12671488, 'steps': 24748, 'loss/train': 1.7508043050765991} +03/04/2022 18:31:42 - INFO - codeparrot_training - Skipping example with length 851 (seq_length=1024) +03/04/2022 18:31:46 - INFO - codeparrot_training - Step 24749: {'lr': 0.00047141387845692174, 'samples': 12672000, 'steps': 24749, 'loss/train': 1.5101207494735718} +03/04/2022 18:31:50 - INFO - codeparrot_training - Step 24750: {'lr': 0.0004714114142580819, 'samples': 12672512, 'steps': 24750, 'loss/train': 2.2803940773010254} +03/04/2022 18:31:50 - INFO - codeparrot_training - Skipping example with length 312 (seq_length=1024) +03/04/2022 18:31:55 - INFO - codeparrot_training - Step 24751: {'lr': 0.00047140894995947755, 'samples': 12673024, 'steps': 24751, 'loss/train': 1.4348726272583008} +03/04/2022 18:31:58 - INFO - codeparrot_training - Step 24752: {'lr': 0.00047140648556110966, 'samples': 12673536, 'steps': 24752, 'loss/train': 1.384469985961914} +03/04/2022 18:31:58 - INFO - codeparrot_training - Skipping example with length 818 (seq_length=1024) +03/04/2022 18:32:03 - INFO - codeparrot_training - Step 24753: {'lr': 0.00047140402106297946, 'samples': 12674048, 'steps': 24753, 'loss/train': 1.2430760860443115} +03/04/2022 18:32:06 - INFO - codeparrot_training - Step 24754: {'lr': 0.000471401556465088, 'samples': 12674560, 'steps': 24754, 'loss/train': 1.639783263206482} +03/04/2022 18:32:07 - INFO - codeparrot_training - Skipping example with length 119 (seq_length=1024) +03/04/2022 18:32:12 - INFO - codeparrot_training - Step 24755: {'lr': 0.00047139909176743643, 'samples': 12675072, 'steps': 24755, 'loss/train': 2.177448272705078} +03/04/2022 18:32:15 - INFO - codeparrot_training - Step 24756: {'lr': 0.0004713966269700259, 'samples': 12675584, 'steps': 24756, 'loss/train': 1.2752490043640137} +03/04/2022 18:32:15 - INFO - codeparrot_training - Skipping example with length 297 (seq_length=1024) +03/04/2022 18:32:20 - INFO - codeparrot_training - Step 24757: {'lr': 0.0004713941620728574, 'samples': 12676096, 'steps': 24757, 'loss/train': 2.029435873031616} +03/04/2022 18:32:23 - INFO - codeparrot_training - Step 24758: {'lr': 0.0004713916970759321, 'samples': 12676608, 'steps': 24758, 'loss/train': 1.7979013919830322} +03/04/2022 18:32:24 - INFO - codeparrot_training - Skipping example with length 93 (seq_length=1024) +03/04/2022 18:32:29 - INFO - codeparrot_training - Step 24759: {'lr': 0.00047138923197925114, 'samples': 12677120, 'steps': 24759, 'loss/train': 1.868674397468567} +03/04/2022 18:32:32 - INFO - codeparrot_training - Step 24760: {'lr': 0.00047138676678281564, 'samples': 12677632, 'steps': 24760, 'loss/train': 3.1697590351104736} +03/04/2022 18:32:32 - INFO - codeparrot_training - Skipping example with length 191 (seq_length=1024) +03/04/2022 18:32:37 - INFO - codeparrot_training - Step 24761: {'lr': 0.00047138430148662666, 'samples': 12678144, 'steps': 24761, 'loss/train': 0.5748162865638733} +03/04/2022 18:32:40 - INFO - codeparrot_training - Step 24762: {'lr': 0.0004713818360906853, 'samples': 12678656, 'steps': 24762, 'loss/train': 2.116304874420166} +03/04/2022 18:32:41 - INFO - codeparrot_training - Skipping example with length 554 (seq_length=1024) +03/04/2022 18:32:45 - INFO - codeparrot_training - Step 24763: {'lr': 0.0004713793705949927, 'samples': 12679168, 'steps': 24763, 'loss/train': 1.9361690282821655} +03/04/2022 18:32:49 - INFO - codeparrot_training - Step 24764: {'lr': 0.00047137690499955, 'samples': 12679680, 'steps': 24764, 'loss/train': 1.661600112915039} +03/04/2022 18:32:49 - INFO - codeparrot_training - Skipping example with length 81 (seq_length=1024) +03/04/2022 18:32:54 - INFO - codeparrot_training - Step 24765: {'lr': 0.0004713744393043583, 'samples': 12680192, 'steps': 24765, 'loss/train': 1.9061942100524902} +03/04/2022 18:32:57 - INFO - codeparrot_training - Step 24766: {'lr': 0.00047137197350941864, 'samples': 12680704, 'steps': 24766, 'loss/train': 2.067296266555786} +03/04/2022 18:32:58 - INFO - codeparrot_training - Skipping example with length 193 (seq_length=1024) +03/04/2022 18:33:02 - INFO - codeparrot_training - Step 24767: {'lr': 0.0004713695076147322, 'samples': 12681216, 'steps': 24767, 'loss/train': 1.8959370851516724} +03/04/2022 18:33:06 - INFO - codeparrot_training - Step 24768: {'lr': 0.0004713670416203001, 'samples': 12681728, 'steps': 24768, 'loss/train': 1.6699947118759155} +03/04/2022 18:33:06 - INFO - codeparrot_training - Skipping example with length 757 (seq_length=1024) +03/04/2022 18:33:11 - INFO - codeparrot_training - Step 24769: {'lr': 0.00047136457552612344, 'samples': 12682240, 'steps': 24769, 'loss/train': 1.8406429290771484} +03/04/2022 18:33:14 - INFO - codeparrot_training - Step 24770: {'lr': 0.00047136210933220325, 'samples': 12682752, 'steps': 24770, 'loss/train': 1.5311241149902344} +03/04/2022 18:33:14 - INFO - codeparrot_training - Skipping example with length 868 (seq_length=1024) +03/04/2022 18:33:19 - INFO - codeparrot_training - Step 24771: {'lr': 0.0004713596430385408, 'samples': 12683264, 'steps': 24771, 'loss/train': 1.4714120626449585} +03/04/2022 18:33:22 - INFO - codeparrot_training - Step 24772: {'lr': 0.00047135717664513704, 'samples': 12683776, 'steps': 24772, 'loss/train': 2.10063099861145} +03/04/2022 18:33:23 - INFO - codeparrot_training - Skipping example with length 434 (seq_length=1024) +03/04/2022 18:33:28 - INFO - codeparrot_training - Step 24773: {'lr': 0.00047135471015199315, 'samples': 12684288, 'steps': 24773, 'loss/train': 1.682602882385254} +03/04/2022 18:33:31 - INFO - codeparrot_training - Step 24774: {'lr': 0.00047135224355911035, 'samples': 12684800, 'steps': 24774, 'loss/train': 1.9194884300231934} +03/04/2022 18:33:32 - INFO - codeparrot_training - Skipping example with length 869 (seq_length=1024) +03/04/2022 18:33:36 - INFO - codeparrot_training - Step 24775: {'lr': 0.0004713497768664895, 'samples': 12685312, 'steps': 24775, 'loss/train': 1.8973873853683472} +03/04/2022 18:33:39 - INFO - codeparrot_training - Step 24776: {'lr': 0.00047134731007413195, 'samples': 12685824, 'steps': 24776, 'loss/train': 1.981945276260376} +03/04/2022 18:33:40 - INFO - codeparrot_training - Skipping example with length 16 (seq_length=1024) +03/04/2022 18:33:45 - INFO - codeparrot_training - Step 24777: {'lr': 0.0004713448431820387, 'samples': 12686336, 'steps': 24777, 'loss/train': 2.4914448261260986} +03/04/2022 18:33:48 - INFO - codeparrot_training - Step 24778: {'lr': 0.00047134237619021085, 'samples': 12686848, 'steps': 24778, 'loss/train': 0.3672856092453003} +03/04/2022 18:33:49 - INFO - codeparrot_training - Skipping example with length 939 (seq_length=1024) +03/04/2022 18:33:53 - INFO - codeparrot_training - Step 24779: {'lr': 0.00047133990909864953, 'samples': 12687360, 'steps': 24779, 'loss/train': 1.419208288192749} +03/04/2022 18:33:56 - INFO - codeparrot_training - Step 24780: {'lr': 0.0004713374419073559, 'samples': 12687872, 'steps': 24780, 'loss/train': 2.0719738006591797} +03/04/2022 18:33:57 - INFO - codeparrot_training - Skipping example with length 499 (seq_length=1024) +03/04/2022 18:34:01 - INFO - codeparrot_training - Step 24781: {'lr': 0.000471334974616331, 'samples': 12688384, 'steps': 24781, 'loss/train': 1.7094753980636597} +03/04/2022 18:34:05 - INFO - codeparrot_training - Step 24782: {'lr': 0.0004713325072255761, 'samples': 12688896, 'steps': 24782, 'loss/train': 2.5775630474090576} +03/04/2022 18:34:05 - INFO - codeparrot_training - Skipping example with length 587 (seq_length=1024) +03/04/2022 18:34:10 - INFO - codeparrot_training - Step 24783: {'lr': 0.000471330039735092, 'samples': 12689408, 'steps': 24783, 'loss/train': 1.6372050046920776} +03/04/2022 18:34:13 - INFO - codeparrot_training - Step 24784: {'lr': 0.0004713275721448801, 'samples': 12689920, 'steps': 24784, 'loss/train': 2.311481475830078} +03/04/2022 18:34:14 - INFO - codeparrot_training - Skipping example with length 999 (seq_length=1024) +03/04/2022 18:34:19 - INFO - codeparrot_training - Step 24785: {'lr': 0.0004713251044549414, 'samples': 12690432, 'steps': 24785, 'loss/train': 1.3840956687927246} +03/04/2022 18:34:22 - INFO - codeparrot_training - Step 24786: {'lr': 0.000471322636665277, 'samples': 12690944, 'steps': 24786, 'loss/train': 1.8654656410217285} +03/04/2022 18:34:23 - INFO - codeparrot_training - Skipping example with length 640 (seq_length=1024) +03/04/2022 18:34:27 - INFO - codeparrot_training - Step 24787: {'lr': 0.0004713201687758881, 'samples': 12691456, 'steps': 24787, 'loss/train': 3.042464017868042} +03/04/2022 18:34:30 - INFO - codeparrot_training - Step 24788: {'lr': 0.00047131770078677574, 'samples': 12691968, 'steps': 24788, 'loss/train': 1.2435691356658936} +03/04/2022 18:34:31 - INFO - codeparrot_training - Skipping example with length 770 (seq_length=1024) +03/04/2022 18:34:35 - INFO - codeparrot_training - Step 24789: {'lr': 0.000471315232697941, 'samples': 12692480, 'steps': 24789, 'loss/train': 1.8597811460494995} +03/04/2022 18:34:38 - INFO - codeparrot_training - Step 24790: {'lr': 0.000471312764509385, 'samples': 12692992, 'steps': 24790, 'loss/train': 1.4346798658370972} +03/04/2022 18:34:39 - INFO - codeparrot_training - Skipping example with length 211 (seq_length=1024) +03/04/2022 18:34:44 - INFO - codeparrot_training - Step 24791: {'lr': 0.0004713102962211089, 'samples': 12693504, 'steps': 24791, 'loss/train': 2.1289052963256836} +03/04/2022 18:34:47 - INFO - codeparrot_training - Step 24792: {'lr': 0.0004713078278331138, 'samples': 12694016, 'steps': 24792, 'loss/train': 1.555801510810852} +03/04/2022 18:34:47 - INFO - codeparrot_training - Skipping example with length 149 (seq_length=1024) +03/04/2022 18:34:52 - INFO - codeparrot_training - Step 24793: {'lr': 0.00047130535934540086, 'samples': 12694528, 'steps': 24793, 'loss/train': 2.58398175239563} +03/04/2022 18:34:55 - INFO - codeparrot_training - Step 24794: {'lr': 0.00047130289075797107, 'samples': 12695040, 'steps': 24794, 'loss/train': 1.2993934154510498} +03/04/2022 18:34:56 - INFO - codeparrot_training - Skipping example with length 244 (seq_length=1024) +03/04/2022 18:35:01 - INFO - codeparrot_training - Step 24795: {'lr': 0.0004713004220708257, 'samples': 12695552, 'steps': 24795, 'loss/train': 1.974942922592163} +03/04/2022 18:35:04 - INFO - codeparrot_training - Step 24796: {'lr': 0.0004712979532839656, 'samples': 12696064, 'steps': 24796, 'loss/train': 1.9970390796661377} +03/04/2022 18:35:06 - INFO - codeparrot_training - Skipping example with length 300 (seq_length=1024) +03/04/2022 18:35:09 - INFO - codeparrot_training - Step 24797: {'lr': 0.00047129548439739225, 'samples': 12696576, 'steps': 24797, 'loss/train': 1.8911539316177368} +03/04/2022 18:35:12 - INFO - codeparrot_training - Step 24798: {'lr': 0.0004712930154111065, 'samples': 12697088, 'steps': 24798, 'loss/train': 1.6669784784317017} +03/04/2022 18:35:14 - INFO - codeparrot_training - Skipping example with length 81 (seq_length=1024) +03/04/2022 18:35:18 - INFO - codeparrot_training - Step 24799: {'lr': 0.00047129054632510947, 'samples': 12697600, 'steps': 24799, 'loss/train': 1.5699328184127808} +03/04/2022 18:35:21 - INFO - codeparrot_training - Step 24800: {'lr': 0.00047128807713940244, 'samples': 12698112, 'steps': 24800, 'loss/train': 2.102475166320801} +03/04/2022 18:35:22 - INFO - codeparrot_training - Skipping example with length 916 (seq_length=1024) +03/04/2022 18:35:26 - INFO - codeparrot_training - Step 24801: {'lr': 0.00047128560785398633, 'samples': 12698624, 'steps': 24801, 'loss/train': 3.260636568069458} +03/04/2022 18:35:29 - INFO - codeparrot_training - Step 24802: {'lr': 0.0004712831384688624, 'samples': 12699136, 'steps': 24802, 'loss/train': 0.6887727975845337} +03/04/2022 18:35:31 - INFO - codeparrot_training - Skipping example with length 893 (seq_length=1024) +03/04/2022 18:35:34 - INFO - codeparrot_training - Step 24803: {'lr': 0.00047128066898403166, 'samples': 12699648, 'steps': 24803, 'loss/train': 0.8184208273887634} +03/04/2022 18:35:38 - INFO - codeparrot_training - Step 24804: {'lr': 0.00047127819939949534, 'samples': 12700160, 'steps': 24804, 'loss/train': 1.3844348192214966} +03/04/2022 18:35:39 - INFO - codeparrot_training - Skipping example with length 809 (seq_length=1024) +03/04/2022 18:35:43 - INFO - codeparrot_training - Step 24805: {'lr': 0.00047127572971525437, 'samples': 12700672, 'steps': 24805, 'loss/train': 1.5843253135681152} +03/04/2022 18:35:46 - INFO - codeparrot_training - Step 24806: {'lr': 0.00047127325993131006, 'samples': 12701184, 'steps': 24806, 'loss/train': 0.9158748984336853} +03/04/2022 18:35:48 - INFO - codeparrot_training - Skipping example with length 732 (seq_length=1024) +03/04/2022 18:35:51 - INFO - codeparrot_training - Step 24807: {'lr': 0.0004712707900476634, 'samples': 12701696, 'steps': 24807, 'loss/train': 1.7368192672729492} +03/04/2022 18:35:54 - INFO - codeparrot_training - Step 24808: {'lr': 0.00047126832006431555, 'samples': 12702208, 'steps': 24808, 'loss/train': 1.935489296913147} +03/04/2022 18:35:56 - INFO - codeparrot_training - Skipping example with length 714 (seq_length=1024) +03/04/2022 18:36:00 - INFO - codeparrot_training - Step 24809: {'lr': 0.00047126584998126756, 'samples': 12702720, 'steps': 24809, 'loss/train': 1.3985446691513062} +03/04/2022 18:36:03 - INFO - codeparrot_training - Step 24810: {'lr': 0.0004712633797985206, 'samples': 12703232, 'steps': 24810, 'loss/train': 1.9078701734542847} +03/04/2022 18:36:04 - INFO - codeparrot_training - Skipping example with length 217 (seq_length=1024) +03/04/2022 18:36:08 - INFO - codeparrot_training - Step 24811: {'lr': 0.0004712609095160758, 'samples': 12703744, 'steps': 24811, 'loss/train': 1.8915894031524658} +03/04/2022 18:36:11 - INFO - codeparrot_training - Step 24812: {'lr': 0.0004712584391339343, 'samples': 12704256, 'steps': 24812, 'loss/train': 1.4928852319717407} +03/04/2022 18:36:12 - INFO - codeparrot_training - Skipping example with length 972 (seq_length=1024) +03/04/2022 18:36:17 - INFO - codeparrot_training - Step 24813: {'lr': 0.0004712559686520971, 'samples': 12704768, 'steps': 24813, 'loss/train': 1.1476874351501465} +03/04/2022 18:36:20 - INFO - codeparrot_training - Step 24814: {'lr': 0.0004712534980705654, 'samples': 12705280, 'steps': 24814, 'loss/train': 2.1341991424560547} +03/04/2022 18:36:21 - INFO - codeparrot_training - Skipping example with length 887 (seq_length=1024) +03/04/2022 18:36:25 - INFO - codeparrot_training - Step 24815: {'lr': 0.0004712510273893402, 'samples': 12705792, 'steps': 24815, 'loss/train': 1.3223248720169067} +03/04/2022 18:36:28 - INFO - codeparrot_training - Step 24816: {'lr': 0.00047124855660842283, 'samples': 12706304, 'steps': 24816, 'loss/train': 2.0045816898345947} +03/04/2022 18:36:29 - INFO - codeparrot_training - Skipping example with length 735 (seq_length=1024) +03/04/2022 18:36:33 - INFO - codeparrot_training - Step 24817: {'lr': 0.00047124608572781426, 'samples': 12706816, 'steps': 24817, 'loss/train': 1.4444947242736816} +03/04/2022 18:36:36 - INFO - codeparrot_training - Step 24818: {'lr': 0.0004712436147475155, 'samples': 12707328, 'steps': 24818, 'loss/train': 1.4498720169067383} +03/04/2022 18:36:37 - INFO - codeparrot_training - Skipping example with length 893 (seq_length=1024) +03/04/2022 18:36:42 - INFO - codeparrot_training - Step 24819: {'lr': 0.0004712411436675279, 'samples': 12707840, 'steps': 24819, 'loss/train': 1.1109795570373535} +03/04/2022 18:36:45 - INFO - codeparrot_training - Step 24820: {'lr': 0.0004712386724878524, 'samples': 12708352, 'steps': 24820, 'loss/train': 2.0805931091308594} +03/04/2022 18:36:46 - INFO - codeparrot_training - Skipping example with length 601 (seq_length=1024) +03/04/2022 18:36:50 - INFO - codeparrot_training - Step 24821: {'lr': 0.0004712362012084902, 'samples': 12708864, 'steps': 24821, 'loss/train': 1.9600995779037476} +03/04/2022 18:36:53 - INFO - codeparrot_training - Step 24822: {'lr': 0.00047123372982944237, 'samples': 12709376, 'steps': 24822, 'loss/train': 2.2254559993743896} +03/04/2022 18:36:54 - INFO - codeparrot_training - Skipping example with length 908 (seq_length=1024) +03/04/2022 18:36:59 - INFO - codeparrot_training - Step 24823: {'lr': 0.00047123125835071004, 'samples': 12709888, 'steps': 24823, 'loss/train': 1.095399022102356} +03/04/2022 18:37:02 - INFO - codeparrot_training - Step 24824: {'lr': 0.00047122878677229426, 'samples': 12710400, 'steps': 24824, 'loss/train': 2.186128616333008} +03/04/2022 18:37:02 - INFO - codeparrot_training - Skipping example with length 405 (seq_length=1024) +03/04/2022 18:37:07 - INFO - codeparrot_training - Step 24825: {'lr': 0.0004712263150941962, 'samples': 12710912, 'steps': 24825, 'loss/train': 2.0428526401519775} +03/04/2022 18:37:10 - INFO - codeparrot_training - Step 24826: {'lr': 0.0004712238433164171, 'samples': 12711424, 'steps': 24826, 'loss/train': 1.5557576417922974} +03/04/2022 18:37:11 - INFO - codeparrot_training - Skipping example with length 863 (seq_length=1024) +03/04/2022 18:37:15 - INFO - codeparrot_training - Step 24827: {'lr': 0.00047122137143895785, 'samples': 12711936, 'steps': 24827, 'loss/train': 1.5509964227676392} +03/04/2022 18:37:19 - INFO - codeparrot_training - Step 24828: {'lr': 0.0004712188994618197, 'samples': 12712448, 'steps': 24828, 'loss/train': 1.858122706413269} +03/04/2022 18:37:19 - INFO - codeparrot_training - Skipping example with length 651 (seq_length=1024) +03/04/2022 18:37:24 - INFO - codeparrot_training - Step 24829: {'lr': 0.0004712164273850037, 'samples': 12712960, 'steps': 24829, 'loss/train': 1.96244478225708} +03/04/2022 18:37:27 - INFO - codeparrot_training - Step 24830: {'lr': 0.00047121395520851103, 'samples': 12713472, 'steps': 24830, 'loss/train': 2.172309398651123} +03/04/2022 18:37:28 - INFO - codeparrot_training - Skipping example with length 832 (seq_length=1024) +03/04/2022 18:37:32 - INFO - codeparrot_training - Step 24831: {'lr': 0.00047121148293234274, 'samples': 12713984, 'steps': 24831, 'loss/train': 2.1327712535858154} +03/04/2022 18:37:36 - INFO - codeparrot_training - Step 24832: {'lr': 0.00047120901055649995, 'samples': 12714496, 'steps': 24832, 'loss/train': 2.144022226333618} +03/04/2022 18:37:36 - INFO - codeparrot_training - Skipping example with length 49 (seq_length=1024) +03/04/2022 18:37:41 - INFO - codeparrot_training - Step 24833: {'lr': 0.0004712065380809838, 'samples': 12715008, 'steps': 24833, 'loss/train': 1.895795226097107} +03/04/2022 18:37:44 - INFO - codeparrot_training - Step 24834: {'lr': 0.0004712040655057954, 'samples': 12715520, 'steps': 24834, 'loss/train': 1.3786791563034058} +03/04/2022 18:37:45 - INFO - codeparrot_training - Skipping example with length 653 (seq_length=1024) +03/04/2022 18:37:49 - INFO - codeparrot_training - Step 24835: {'lr': 0.0004712015928309359, 'samples': 12716032, 'steps': 24835, 'loss/train': 2.516359329223633} +03/04/2022 18:37:52 - INFO - codeparrot_training - Step 24836: {'lr': 0.0004711991200564064, 'samples': 12716544, 'steps': 24836, 'loss/train': 0.6548260450363159} +03/04/2022 18:37:53 - INFO - codeparrot_training - Skipping example with length 89 (seq_length=1024) +03/04/2022 18:37:58 - INFO - codeparrot_training - Step 24837: {'lr': 0.0004711966471822079, 'samples': 12717056, 'steps': 24837, 'loss/train': 1.8074663877487183} +03/04/2022 18:38:01 - INFO - codeparrot_training - Step 24838: {'lr': 0.00047119417420834163, 'samples': 12717568, 'steps': 24838, 'loss/train': 1.9456706047058105} +03/04/2022 18:38:02 - INFO - codeparrot_training - Skipping example with length 288 (seq_length=1024) +03/04/2022 18:38:06 - INFO - codeparrot_training - Step 24839: {'lr': 0.00047119170113480867, 'samples': 12718080, 'steps': 24839, 'loss/train': 1.8541808128356934} +03/04/2022 18:38:09 - INFO - codeparrot_training - Step 24840: {'lr': 0.00047118922796161026, 'samples': 12718592, 'steps': 24840, 'loss/train': 1.629652738571167} +03/04/2022 18:38:10 - INFO - codeparrot_training - Skipping example with length 195 (seq_length=1024) +03/04/2022 18:38:15 - INFO - codeparrot_training - Step 24841: {'lr': 0.00047118675468874727, 'samples': 12719104, 'steps': 24841, 'loss/train': 0.9782308340072632} +03/04/2022 18:38:18 - INFO - codeparrot_training - Step 24842: {'lr': 0.00047118428131622095, 'samples': 12719616, 'steps': 24842, 'loss/train': 6.431194305419922} +03/04/2022 18:38:20 - INFO - codeparrot_training - Skipping example with length 794 (seq_length=1024) +03/04/2022 18:38:23 - INFO - codeparrot_training - Step 24843: {'lr': 0.00047118180784403243, 'samples': 12720128, 'steps': 24843, 'loss/train': 0.2310839593410492} +03/04/2022 18:38:26 - INFO - codeparrot_training - Step 24844: {'lr': 0.0004711793342721828, 'samples': 12720640, 'steps': 24844, 'loss/train': 1.244694709777832} +03/04/2022 18:38:28 - INFO - codeparrot_training - Skipping example with length 639 (seq_length=1024) +03/04/2022 18:38:32 - INFO - codeparrot_training - Step 24845: {'lr': 0.00047117686060067315, 'samples': 12721152, 'steps': 24845, 'loss/train': 1.4048770666122437} +03/04/2022 18:38:35 - INFO - codeparrot_training - Step 24846: {'lr': 0.00047117438682950467, 'samples': 12721664, 'steps': 24846, 'loss/train': 2.539346218109131} +03/04/2022 18:38:36 - INFO - codeparrot_training - Skipping example with length 644 (seq_length=1024) +03/04/2022 18:38:40 - INFO - codeparrot_training - Step 24847: {'lr': 0.0004711719129586784, 'samples': 12722176, 'steps': 24847, 'loss/train': 1.7890695333480835} +03/04/2022 18:38:43 - INFO - codeparrot_training - Step 24848: {'lr': 0.0004711694389881955, 'samples': 12722688, 'steps': 24848, 'loss/train': 1.1161158084869385} +03/04/2022 18:38:46 - INFO - codeparrot_training - Skipping example with length 537 (seq_length=1024) +03/04/2022 18:38:49 - INFO - codeparrot_training - Step 24849: {'lr': 0.000471166964918057, 'samples': 12723200, 'steps': 24849, 'loss/train': 1.636245608329773} +03/04/2022 18:38:52 - INFO - codeparrot_training - Step 24850: {'lr': 0.0004711644907482641, 'samples': 12723712, 'steps': 24850, 'loss/train': 2.0334742069244385} +03/04/2022 18:38:54 - INFO - codeparrot_training - Skipping example with length 915 (seq_length=1024) +03/04/2022 18:38:57 - INFO - codeparrot_training - Step 24851: {'lr': 0.00047116201647881794, 'samples': 12724224, 'steps': 24851, 'loss/train': 1.5915987491607666} +03/04/2022 18:39:00 - INFO - codeparrot_training - Step 24852: {'lr': 0.00047115954210971955, 'samples': 12724736, 'steps': 24852, 'loss/train': 1.8039389848709106} +03/04/2022 18:39:03 - INFO - codeparrot_training - Skipping example with length 866 (seq_length=1024) +03/04/2022 18:39:06 - INFO - codeparrot_training - Step 24853: {'lr': 0.0004711570676409701, 'samples': 12725248, 'steps': 24853, 'loss/train': 2.2020270824432373} +03/04/2022 18:39:09 - INFO - codeparrot_training - Step 24854: {'lr': 0.0004711545930725707, 'samples': 12725760, 'steps': 24854, 'loss/train': 2.2233572006225586} +03/04/2022 18:39:12 - INFO - codeparrot_training - Skipping example with length 763 (seq_length=1024) +03/04/2022 18:39:14 - INFO - codeparrot_training - Step 24855: {'lr': 0.0004711521184045224, 'samples': 12726272, 'steps': 24855, 'loss/train': 2.358600378036499} +03/04/2022 18:39:17 - INFO - codeparrot_training - Step 24856: {'lr': 0.0004711496436368264, 'samples': 12726784, 'steps': 24856, 'loss/train': 2.116020679473877} +03/04/2022 18:39:20 - INFO - codeparrot_training - Skipping example with length 968 (seq_length=1024) +03/04/2022 18:39:23 - INFO - codeparrot_training - Step 24857: {'lr': 0.00047114716876948384, 'samples': 12727296, 'steps': 24857, 'loss/train': 1.5870885848999023} +03/04/2022 18:39:26 - INFO - codeparrot_training - Step 24858: {'lr': 0.0004711446938024957, 'samples': 12727808, 'steps': 24858, 'loss/train': 1.6376358270645142} +03/04/2022 18:39:29 - INFO - codeparrot_training - Step 24859: {'lr': 0.00047114221873586316, 'samples': 12728320, 'steps': 24859, 'loss/train': 1.9628387689590454} +03/04/2022 18:39:30 - INFO - codeparrot_training - Skipping example with length 138 (seq_length=1024) +03/04/2022 18:39:34 - INFO - codeparrot_training - Step 24860: {'lr': 0.00047113974356958744, 'samples': 12728832, 'steps': 24860, 'loss/train': 1.893445611000061} +03/04/2022 18:39:38 - INFO - codeparrot_training - Step 24861: {'lr': 0.0004711372683036695, 'samples': 12729344, 'steps': 24861, 'loss/train': 2.0230894088745117} +03/04/2022 18:39:38 - INFO - codeparrot_training - Skipping example with length 623 (seq_length=1024) +03/04/2022 18:39:43 - INFO - codeparrot_training - Step 24862: {'lr': 0.0004711347929381105, 'samples': 12729856, 'steps': 24862, 'loss/train': 1.976881742477417} +03/04/2022 18:39:46 - INFO - codeparrot_training - Step 24863: {'lr': 0.00047113231747291165, 'samples': 12730368, 'steps': 24863, 'loss/train': 2.106827974319458} +03/04/2022 18:39:47 - INFO - codeparrot_training - Skipping example with length 306 (seq_length=1024) +03/04/2022 18:39:51 - INFO - codeparrot_training - Step 24864: {'lr': 0.0004711298419080739, 'samples': 12730880, 'steps': 24864, 'loss/train': 1.6346181631088257} +03/04/2022 18:39:54 - INFO - codeparrot_training - Step 24865: {'lr': 0.00047112736624359855, 'samples': 12731392, 'steps': 24865, 'loss/train': 1.6802377700805664} +03/04/2022 18:39:55 - INFO - codeparrot_training - Skipping example with length 495 (seq_length=1024) +03/04/2022 18:40:00 - INFO - codeparrot_training - Step 24866: {'lr': 0.00047112489047948655, 'samples': 12731904, 'steps': 24866, 'loss/train': 1.6255707740783691} +03/04/2022 18:40:03 - INFO - codeparrot_training - Step 24867: {'lr': 0.00047112241461573913, 'samples': 12732416, 'steps': 24867, 'loss/train': 1.9285180568695068} +03/04/2022 18:40:03 - INFO - codeparrot_training - Skipping example with length 290 (seq_length=1024) +03/04/2022 18:40:08 - INFO - codeparrot_training - Step 24868: {'lr': 0.0004711199386523573, 'samples': 12732928, 'steps': 24868, 'loss/train': 1.7694237232208252} +03/04/2022 18:40:11 - INFO - codeparrot_training - Step 24869: {'lr': 0.0004711174625893423, 'samples': 12733440, 'steps': 24869, 'loss/train': 1.6412372589111328} +03/04/2022 18:40:12 - INFO - codeparrot_training - Skipping example with length 116 (seq_length=1024) +03/04/2022 18:40:17 - INFO - codeparrot_training - Step 24870: {'lr': 0.00047111498642669517, 'samples': 12733952, 'steps': 24870, 'loss/train': 2.040531635284424} +03/04/2022 18:40:20 - INFO - codeparrot_training - Step 24871: {'lr': 0.00047111251016441704, 'samples': 12734464, 'steps': 24871, 'loss/train': 1.301876187324524} +03/04/2022 18:40:20 - INFO - codeparrot_training - Skipping example with length 178 (seq_length=1024) +03/04/2022 18:40:25 - INFO - codeparrot_training - Step 24872: {'lr': 0.0004711100338025089, 'samples': 12734976, 'steps': 24872, 'loss/train': 3.1480305194854736} +03/04/2022 18:40:28 - INFO - codeparrot_training - Step 24873: {'lr': 0.00047110755734097216, 'samples': 12735488, 'steps': 24873, 'loss/train': 1.8286571502685547} +03/04/2022 18:40:29 - INFO - codeparrot_training - Skipping example with length 214 (seq_length=1024) +03/04/2022 18:40:34 - INFO - codeparrot_training - Step 24874: {'lr': 0.00047110508077980774, 'samples': 12736000, 'steps': 24874, 'loss/train': 2.0031323432922363} +03/04/2022 18:40:37 - INFO - codeparrot_training - Step 24875: {'lr': 0.00047110260411901674, 'samples': 12736512, 'steps': 24875, 'loss/train': 1.9324493408203125} +03/04/2022 18:40:37 - INFO - codeparrot_training - Skipping example with length 424 (seq_length=1024) +03/04/2022 18:40:42 - INFO - codeparrot_training - Step 24876: {'lr': 0.0004711001273586003, 'samples': 12737024, 'steps': 24876, 'loss/train': 2.104465961456299} +03/04/2022 18:40:45 - INFO - codeparrot_training - Step 24877: {'lr': 0.0004710976504985596, 'samples': 12737536, 'steps': 24877, 'loss/train': 1.1745017766952515} +03/04/2022 18:40:46 - INFO - codeparrot_training - Skipping example with length 439 (seq_length=1024) +03/04/2022 18:40:50 - INFO - codeparrot_training - Step 24878: {'lr': 0.00047109517353889575, 'samples': 12738048, 'steps': 24878, 'loss/train': 2.057974100112915} +03/04/2022 18:40:54 - INFO - codeparrot_training - Step 24879: {'lr': 0.0004710926964796097, 'samples': 12738560, 'steps': 24879, 'loss/train': 1.3076591491699219} +03/04/2022 18:40:55 - INFO - codeparrot_training - Skipping example with length 287 (seq_length=1024) +03/04/2022 18:40:59 - INFO - codeparrot_training - Step 24880: {'lr': 0.00047109021932070284, 'samples': 12739072, 'steps': 24880, 'loss/train': 0.7329766154289246} +03/04/2022 18:41:02 - INFO - codeparrot_training - Step 24881: {'lr': 0.00047108774206217605, 'samples': 12739584, 'steps': 24881, 'loss/train': 1.622717022895813} +03/04/2022 18:41:03 - INFO - codeparrot_training - Skipping example with length 94 (seq_length=1024) +03/04/2022 18:41:07 - INFO - codeparrot_training - Step 24882: {'lr': 0.00047108526470403055, 'samples': 12740096, 'steps': 24882, 'loss/train': 1.7440543174743652} +03/04/2022 18:41:11 - INFO - codeparrot_training - Step 24883: {'lr': 0.0004710827872462674, 'samples': 12740608, 'steps': 24883, 'loss/train': 1.5303852558135986} +03/04/2022 18:41:12 - INFO - codeparrot_training - Skipping example with length 400 (seq_length=1024) +03/04/2022 18:41:16 - INFO - codeparrot_training - Step 24884: {'lr': 0.00047108030968888784, 'samples': 12741120, 'steps': 24884, 'loss/train': 1.5286173820495605} +03/04/2022 18:41:19 - INFO - codeparrot_training - Step 24885: {'lr': 0.00047107783203189285, 'samples': 12741632, 'steps': 24885, 'loss/train': 1.8543621301651} +03/04/2022 18:41:20 - INFO - codeparrot_training - Skipping example with length 230 (seq_length=1024) +03/04/2022 18:41:24 - INFO - codeparrot_training - Step 24886: {'lr': 0.0004710753542752836, 'samples': 12742144, 'steps': 24886, 'loss/train': 1.5102683305740356} +03/04/2022 18:41:27 - INFO - codeparrot_training - Step 24887: {'lr': 0.0004710728764190612, 'samples': 12742656, 'steps': 24887, 'loss/train': 1.116586446762085} +03/04/2022 18:41:28 - INFO - codeparrot_training - Skipping example with length 574 (seq_length=1024) +03/04/2022 18:41:33 - INFO - codeparrot_training - Step 24888: {'lr': 0.0004710703984632268, 'samples': 12743168, 'steps': 24888, 'loss/train': 1.1865496635437012} +03/04/2022 18:41:36 - INFO - codeparrot_training - Step 24889: {'lr': 0.0004710679204077815, 'samples': 12743680, 'steps': 24889, 'loss/train': 2.4639081954956055} +03/04/2022 18:41:37 - INFO - codeparrot_training - Skipping example with length 237 (seq_length=1024) +03/04/2022 18:41:41 - INFO - codeparrot_training - Step 24890: {'lr': 0.0004710654422527264, 'samples': 12744192, 'steps': 24890, 'loss/train': 2.2100818157196045} +03/04/2022 18:41:44 - INFO - codeparrot_training - Step 24891: {'lr': 0.0004710629639980626, 'samples': 12744704, 'steps': 24891, 'loss/train': 0.8656883835792542} +03/04/2022 18:41:45 - INFO - codeparrot_training - Skipping example with length 892 (seq_length=1024) +03/04/2022 18:41:49 - INFO - codeparrot_training - Step 24892: {'lr': 0.0004710604856437912, 'samples': 12745216, 'steps': 24892, 'loss/train': 2.1469385623931885} +03/04/2022 18:41:53 - INFO - codeparrot_training - Step 24893: {'lr': 0.00047105800718991343, 'samples': 12745728, 'steps': 24893, 'loss/train': 1.8515512943267822} +03/04/2022 18:41:54 - INFO - codeparrot_training - Skipping example with length 207 (seq_length=1024) +03/04/2022 18:41:58 - INFO - codeparrot_training - Step 24894: {'lr': 0.0004710555286364303, 'samples': 12746240, 'steps': 24894, 'loss/train': 2.153735876083374} +03/04/2022 18:42:01 - INFO - codeparrot_training - Step 24895: {'lr': 0.000471053049983343, 'samples': 12746752, 'steps': 24895, 'loss/train': 2.042614459991455} +03/04/2022 18:42:02 - INFO - codeparrot_training - Skipping example with length 143 (seq_length=1024) +03/04/2022 18:42:06 - INFO - codeparrot_training - Step 24896: {'lr': 0.0004710505712306526, 'samples': 12747264, 'steps': 24896, 'loss/train': 2.0345306396484375} +03/04/2022 18:42:09 - INFO - codeparrot_training - Step 24897: {'lr': 0.00047104809237836023, 'samples': 12747776, 'steps': 24897, 'loss/train': 1.5434627532958984} +03/04/2022 18:42:11 - INFO - codeparrot_training - Skipping example with length 175 (seq_length=1024) +03/04/2022 18:42:15 - INFO - codeparrot_training - Step 24898: {'lr': 0.0004710456134264669, 'samples': 12748288, 'steps': 24898, 'loss/train': 2.387312412261963} +03/04/2022 18:42:18 - INFO - codeparrot_training - Step 24899: {'lr': 0.0004710431343749739, 'samples': 12748800, 'steps': 24899, 'loss/train': 1.9605172872543335} +03/04/2022 18:42:19 - INFO - codeparrot_training - Skipping example with length 480 (seq_length=1024) +03/04/2022 18:42:24 - INFO - codeparrot_training - Step 24900: {'lr': 0.0004710406552238823, 'samples': 12749312, 'steps': 24900, 'loss/train': 0.6529803276062012} +03/04/2022 18:42:27 - INFO - codeparrot_training - Step 24901: {'lr': 0.0004710381759731932, 'samples': 12749824, 'steps': 24901, 'loss/train': 0.7979986071586609} +03/04/2022 18:42:29 - INFO - codeparrot_training - Skipping example with length 437 (seq_length=1024) +03/04/2022 18:42:32 - INFO - codeparrot_training - Step 24902: {'lr': 0.0004710356966229077, 'samples': 12750336, 'steps': 24902, 'loss/train': 2.3305885791778564} +03/04/2022 18:42:35 - INFO - codeparrot_training - Step 24903: {'lr': 0.00047103321717302684, 'samples': 12750848, 'steps': 24903, 'loss/train': 2.0249645709991455} +03/04/2022 18:42:37 - INFO - codeparrot_training - Skipping example with length 612 (seq_length=1024) +03/04/2022 18:42:40 - INFO - codeparrot_training - Step 24904: {'lr': 0.00047103073762355186, 'samples': 12751360, 'steps': 24904, 'loss/train': 2.050994634628296} +03/04/2022 18:42:44 - INFO - codeparrot_training - Step 24905: {'lr': 0.0004710282579744839, 'samples': 12751872, 'steps': 24905, 'loss/train': 1.5681874752044678} +03/04/2022 18:42:45 - INFO - codeparrot_training - Skipping example with length 574 (seq_length=1024) +03/04/2022 18:42:49 - INFO - codeparrot_training - Step 24906: {'lr': 0.000471025778225824, 'samples': 12752384, 'steps': 24906, 'loss/train': 1.983225703239441} +03/04/2022 18:42:52 - INFO - codeparrot_training - Step 24907: {'lr': 0.0004710232983775733, 'samples': 12752896, 'steps': 24907, 'loss/train': 2.021562099456787} +03/04/2022 18:42:54 - INFO - codeparrot_training - Skipping example with length 333 (seq_length=1024) +03/04/2022 18:42:57 - INFO - codeparrot_training - Step 24908: {'lr': 0.0004710208184297329, 'samples': 12753408, 'steps': 24908, 'loss/train': 1.7951819896697998} +03/04/2022 18:43:01 - INFO - codeparrot_training - Step 24909: {'lr': 0.0004710183383823039, 'samples': 12753920, 'steps': 24909, 'loss/train': 1.7496154308319092} +03/04/2022 18:43:03 - INFO - codeparrot_training - Skipping example with length 109 (seq_length=1024) +03/04/2022 18:43:06 - INFO - codeparrot_training - Step 24910: {'lr': 0.00047101585823528745, 'samples': 12754432, 'steps': 24910, 'loss/train': 2.1086714267730713} +03/04/2022 18:43:09 - INFO - codeparrot_training - Step 24911: {'lr': 0.0004710133779886847, 'samples': 12754944, 'steps': 24911, 'loss/train': 1.8396482467651367} +03/04/2022 18:43:12 - INFO - codeparrot_training - Skipping example with length 614 (seq_length=1024) +03/04/2022 18:43:14 - INFO - codeparrot_training - Step 24912: {'lr': 0.00047101089764249674, 'samples': 12755456, 'steps': 24912, 'loss/train': 1.2248332500457764} +03/04/2022 18:43:18 - INFO - codeparrot_training - Step 24913: {'lr': 0.0004710084171967246, 'samples': 12755968, 'steps': 24913, 'loss/train': 2.089012384414673} +03/04/2022 18:43:20 - INFO - codeparrot_training - Skipping example with length 587 (seq_length=1024) +03/04/2022 18:43:23 - INFO - codeparrot_training - Step 24914: {'lr': 0.00047100593665136946, 'samples': 12756480, 'steps': 24914, 'loss/train': 1.5919601917266846} +03/04/2022 18:43:26 - INFO - codeparrot_training - Step 24915: {'lr': 0.0004710034560064326, 'samples': 12756992, 'steps': 24915, 'loss/train': 2.4778003692626953} +03/04/2022 18:43:28 - INFO - codeparrot_training - Skipping example with length 997 (seq_length=1024) +03/04/2022 18:43:31 - INFO - codeparrot_training - Step 24916: {'lr': 0.00047100097526191486, 'samples': 12757504, 'steps': 24916, 'loss/train': 0.6893563866615295} +03/04/2022 18:43:35 - INFO - codeparrot_training - Step 24917: {'lr': 0.0004709984944178176, 'samples': 12758016, 'steps': 24917, 'loss/train': 2.0988693237304688} +03/04/2022 18:43:37 - INFO - codeparrot_training - Skipping example with length 947 (seq_length=1024) +03/04/2022 18:43:40 - INFO - codeparrot_training - Step 24918: {'lr': 0.0004709960134741418, 'samples': 12758528, 'steps': 24918, 'loss/train': 1.8802374601364136} +03/04/2022 18:43:43 - INFO - codeparrot_training - Step 24919: {'lr': 0.00047099353243088856, 'samples': 12759040, 'steps': 24919, 'loss/train': 2.2433252334594727} +03/04/2022 18:43:46 - INFO - codeparrot_training - Skipping example with length 779 (seq_length=1024) +03/04/2022 18:43:48 - INFO - codeparrot_training - Step 24920: {'lr': 0.00047099105128805906, 'samples': 12759552, 'steps': 24920, 'loss/train': 2.100476026535034} +03/04/2022 18:43:52 - INFO - codeparrot_training - Step 24921: {'lr': 0.00047098857004565444, 'samples': 12760064, 'steps': 24921, 'loss/train': 2.0219154357910156} +03/04/2022 18:43:54 - INFO - codeparrot_training - Skipping example with length 825 (seq_length=1024) +03/04/2022 18:43:57 - INFO - codeparrot_training - Step 24922: {'lr': 0.00047098608870367576, 'samples': 12760576, 'steps': 24922, 'loss/train': 3.877866744995117} +03/04/2022 18:44:00 - INFO - codeparrot_training - Step 24923: {'lr': 0.00047098360726212406, 'samples': 12761088, 'steps': 24923, 'loss/train': 1.6774747371673584} +03/04/2022 18:44:03 - INFO - codeparrot_training - Step 24924: {'lr': 0.0004709811257210007, 'samples': 12761600, 'steps': 24924, 'loss/train': 1.8757973909378052} +03/04/2022 18:44:03 - INFO - codeparrot_training - Skipping example with length 600 (seq_length=1024) +03/04/2022 18:44:09 - INFO - codeparrot_training - Step 24925: {'lr': 0.0004709786440803066, 'samples': 12762112, 'steps': 24925, 'loss/train': 2.5808777809143066} +03/04/2022 18:44:12 - INFO - codeparrot_training - Step 24926: {'lr': 0.00047097616234004295, 'samples': 12762624, 'steps': 24926, 'loss/train': 1.5634788274765015} +03/04/2022 18:44:12 - INFO - codeparrot_training - Skipping example with length 880 (seq_length=1024) +03/04/2022 18:44:17 - INFO - codeparrot_training - Step 24927: {'lr': 0.00047097368050021083, 'samples': 12763136, 'steps': 24927, 'loss/train': 1.775482416152954} +03/04/2022 18:44:20 - INFO - codeparrot_training - Step 24928: {'lr': 0.0004709711985608114, 'samples': 12763648, 'steps': 24928, 'loss/train': 1.88935387134552} +03/04/2022 18:44:20 - INFO - codeparrot_training - Skipping example with length 171 (seq_length=1024) +03/04/2022 18:44:26 - INFO - codeparrot_training - Step 24929: {'lr': 0.0004709687165218457, 'samples': 12764160, 'steps': 24929, 'loss/train': 2.3144657611846924} +03/04/2022 18:44:29 - INFO - codeparrot_training - Step 24930: {'lr': 0.00047096623438331497, 'samples': 12764672, 'steps': 24930, 'loss/train': 1.6460819244384766} +03/04/2022 18:44:29 - INFO - codeparrot_training - Skipping example with length 636 (seq_length=1024) +03/04/2022 18:44:34 - INFO - codeparrot_training - Step 24931: {'lr': 0.00047096375214522026, 'samples': 12765184, 'steps': 24931, 'loss/train': 2.142514705657959} +03/04/2022 18:44:37 - INFO - codeparrot_training - Step 24932: {'lr': 0.0004709612698075627, 'samples': 12765696, 'steps': 24932, 'loss/train': 2.4411206245422363} +03/04/2022 18:44:38 - INFO - codeparrot_training - Skipping example with length 135 (seq_length=1024) +03/04/2022 18:44:43 - INFO - codeparrot_training - Step 24933: {'lr': 0.00047095878737034335, 'samples': 12766208, 'steps': 24933, 'loss/train': 1.6176233291625977} +03/04/2022 18:44:46 - INFO - codeparrot_training - Step 24934: {'lr': 0.00047095630483356336, 'samples': 12766720, 'steps': 24934, 'loss/train': 1.9376760721206665} +03/04/2022 18:44:46 - INFO - codeparrot_training - Skipping example with length 124 (seq_length=1024) +03/04/2022 18:44:51 - INFO - codeparrot_training - Step 24935: {'lr': 0.00047095382219722396, 'samples': 12767232, 'steps': 24935, 'loss/train': 2.4250974655151367} +03/04/2022 18:44:54 - INFO - codeparrot_training - Step 24936: {'lr': 0.0004709513394613261, 'samples': 12767744, 'steps': 24936, 'loss/train': 1.857292652130127} +03/04/2022 18:44:55 - INFO - codeparrot_training - Skipping example with length 585 (seq_length=1024) +03/04/2022 18:45:00 - INFO - codeparrot_training - Step 24937: {'lr': 0.00047094885662587104, 'samples': 12768256, 'steps': 24937, 'loss/train': 2.092085599899292} +03/04/2022 18:45:03 - INFO - codeparrot_training - Step 24938: {'lr': 0.0004709463736908598, 'samples': 12768768, 'steps': 24938, 'loss/train': 1.8080271482467651} +03/04/2022 18:45:03 - INFO - codeparrot_training - Skipping example with length 301 (seq_length=1024) +03/04/2022 18:45:08 - INFO - codeparrot_training - Step 24939: {'lr': 0.0004709438906562935, 'samples': 12769280, 'steps': 24939, 'loss/train': 1.7955389022827148} +03/04/2022 18:45:11 - INFO - codeparrot_training - Step 24940: {'lr': 0.0004709414075221734, 'samples': 12769792, 'steps': 24940, 'loss/train': 1.6947675943374634} +03/04/2022 18:45:12 - INFO - codeparrot_training - Skipping example with length 363 (seq_length=1024) +03/04/2022 18:45:17 - INFO - codeparrot_training - Step 24941: {'lr': 0.0004709389242885004, 'samples': 12770304, 'steps': 24941, 'loss/train': 2.4031200408935547} +03/04/2022 18:45:20 - INFO - codeparrot_training - Step 24942: {'lr': 0.00047093644095527574, 'samples': 12770816, 'steps': 24942, 'loss/train': 1.7674018144607544} +03/04/2022 18:45:20 - INFO - codeparrot_training - Skipping example with length 895 (seq_length=1024) +03/04/2022 18:45:25 - INFO - codeparrot_training - Step 24943: {'lr': 0.00047093395752250056, 'samples': 12771328, 'steps': 24943, 'loss/train': 2.4696271419525146} +03/04/2022 18:45:28 - INFO - codeparrot_training - Step 24944: {'lr': 0.000470931473990176, 'samples': 12771840, 'steps': 24944, 'loss/train': 0.6401808261871338} +03/04/2022 18:45:33 - INFO - codeparrot_training - Step 24945: {'lr': 0.00047092899035830303, 'samples': 12772352, 'steps': 24945, 'loss/train': 2.658026933670044} +03/04/2022 18:45:37 - INFO - codeparrot_training - Step 24946: {'lr': 0.00047092650662688295, 'samples': 12772864, 'steps': 24946, 'loss/train': 2.1546244621276855} +03/04/2022 18:45:37 - INFO - codeparrot_training - Skipping example with length 11 (seq_length=1024) +03/04/2022 18:45:42 - INFO - codeparrot_training - Step 24947: {'lr': 0.00047092402279591674, 'samples': 12773376, 'steps': 24947, 'loss/train': 2.378253698348999} +03/04/2022 18:45:45 - INFO - codeparrot_training - Step 24948: {'lr': 0.00047092153886540554, 'samples': 12773888, 'steps': 24948, 'loss/train': 3.363250970840454} +03/04/2022 18:45:46 - INFO - codeparrot_training - Skipping example with length 634 (seq_length=1024) +03/04/2022 18:45:50 - INFO - codeparrot_training - Step 24949: {'lr': 0.0004709190548353506, 'samples': 12774400, 'steps': 24949, 'loss/train': 1.3287534713745117} +03/04/2022 18:45:53 - INFO - codeparrot_training - Step 24950: {'lr': 0.0004709165707057529, 'samples': 12774912, 'steps': 24950, 'loss/train': 2.2605483531951904} +03/04/2022 18:45:54 - INFO - codeparrot_training - Skipping example with length 673 (seq_length=1024) +03/04/2022 18:45:59 - INFO - codeparrot_training - Step 24951: {'lr': 0.0004709140864766136, 'samples': 12775424, 'steps': 24951, 'loss/train': 1.9181729555130005} +03/04/2022 18:46:02 - INFO - codeparrot_training - Step 24952: {'lr': 0.0004709116021479338, 'samples': 12775936, 'steps': 24952, 'loss/train': 1.3643338680267334} +03/04/2022 18:46:02 - INFO - codeparrot_training - Skipping example with length 319 (seq_length=1024) +03/04/2022 18:46:07 - INFO - codeparrot_training - Step 24953: {'lr': 0.00047090911771971466, 'samples': 12776448, 'steps': 24953, 'loss/train': 2.471200466156006} +03/04/2022 18:46:10 - INFO - codeparrot_training - Step 24954: {'lr': 0.0004709066331919573, 'samples': 12776960, 'steps': 24954, 'loss/train': 1.9177130460739136} +03/04/2022 18:46:10 - INFO - codeparrot_training - Skipping example with length 267 (seq_length=1024) +03/04/2022 18:46:16 - INFO - codeparrot_training - Step 24955: {'lr': 0.0004709041485646628, 'samples': 12777472, 'steps': 24955, 'loss/train': 1.465484619140625} +03/04/2022 18:46:19 - INFO - codeparrot_training - Step 24956: {'lr': 0.0004709016638378323, 'samples': 12777984, 'steps': 24956, 'loss/train': 1.728784203529358} +03/04/2022 18:46:19 - INFO - codeparrot_training - Skipping example with length 781 (seq_length=1024) +03/04/2022 18:46:24 - INFO - codeparrot_training - Step 24957: {'lr': 0.00047089917901146694, 'samples': 12778496, 'steps': 24957, 'loss/train': 1.9586820602416992} +03/04/2022 18:46:27 - INFO - codeparrot_training - Step 24958: {'lr': 0.0004708966940855678, 'samples': 12779008, 'steps': 24958, 'loss/train': 1.8313367366790771} +03/04/2022 18:46:27 - INFO - codeparrot_training - Skipping example with length 823 (seq_length=1024) +03/04/2022 18:46:32 - INFO - codeparrot_training - Step 24959: {'lr': 0.00047089420906013603, 'samples': 12779520, 'steps': 24959, 'loss/train': 1.7662988901138306} +03/04/2022 18:46:36 - INFO - codeparrot_training - Step 24960: {'lr': 0.0004708917239351727, 'samples': 12780032, 'steps': 24960, 'loss/train': 2.4338669776916504} +03/04/2022 18:46:36 - INFO - codeparrot_training - Skipping example with length 761 (seq_length=1024) +03/04/2022 18:46:41 - INFO - codeparrot_training - Step 24961: {'lr': 0.000470889238710679, 'samples': 12780544, 'steps': 24961, 'loss/train': 1.1666676998138428} +03/04/2022 18:46:44 - INFO - codeparrot_training - Step 24962: {'lr': 0.00047088675338665596, 'samples': 12781056, 'steps': 24962, 'loss/train': 1.6253352165222168} +03/04/2022 18:46:45 - INFO - codeparrot_training - Skipping example with length 265 (seq_length=1024) +03/04/2022 18:46:49 - INFO - codeparrot_training - Step 24963: {'lr': 0.00047088426796310486, 'samples': 12781568, 'steps': 24963, 'loss/train': 2.147866725921631} +03/04/2022 18:46:53 - INFO - codeparrot_training - Step 24964: {'lr': 0.00047088178244002665, 'samples': 12782080, 'steps': 24964, 'loss/train': 2.0557682514190674} +03/04/2022 18:46:53 - INFO - codeparrot_training - Skipping example with length 679 (seq_length=1024) +03/04/2022 18:46:58 - INFO - codeparrot_training - Step 24965: {'lr': 0.00047087929681742253, 'samples': 12782592, 'steps': 24965, 'loss/train': 2.175708532333374} +03/04/2022 18:47:01 - INFO - codeparrot_training - Step 24966: {'lr': 0.00047087681109529364, 'samples': 12783104, 'steps': 24966, 'loss/train': 2.0590152740478516} +03/04/2022 18:47:01 - INFO - codeparrot_training - Skipping example with length 22 (seq_length=1024) +03/04/2022 18:47:06 - INFO - codeparrot_training - Step 24967: {'lr': 0.00047087432527364106, 'samples': 12783616, 'steps': 24967, 'loss/train': 1.9796541929244995} +03/04/2022 18:47:10 - INFO - codeparrot_training - Step 24968: {'lr': 0.0004708718393524659, 'samples': 12784128, 'steps': 24968, 'loss/train': 2.414903402328491} +03/04/2022 18:47:10 - INFO - codeparrot_training - Skipping example with length 117 (seq_length=1024) +03/04/2022 18:47:15 - INFO - codeparrot_training - Step 24969: {'lr': 0.0004708693533317693, 'samples': 12784640, 'steps': 24969, 'loss/train': 2.6535918712615967} +03/04/2022 18:47:18 - INFO - codeparrot_training - Step 24970: {'lr': 0.00047086686721155237, 'samples': 12785152, 'steps': 24970, 'loss/train': 1.1157567501068115} +03/04/2022 18:47:18 - INFO - codeparrot_training - Skipping example with length 734 (seq_length=1024) +03/04/2022 18:47:23 - INFO - codeparrot_training - Step 24971: {'lr': 0.00047086438099181615, 'samples': 12785664, 'steps': 24971, 'loss/train': 2.202526807785034} +03/04/2022 18:47:26 - INFO - codeparrot_training - Step 24972: {'lr': 0.00047086189467256194, 'samples': 12786176, 'steps': 24972, 'loss/train': 0.7043659687042236} +03/04/2022 18:47:27 - INFO - codeparrot_training - Skipping example with length 329 (seq_length=1024) +03/04/2022 18:47:32 - INFO - codeparrot_training - Step 24973: {'lr': 0.0004708594082537908, 'samples': 12786688, 'steps': 24973, 'loss/train': 1.529456615447998} +03/04/2022 18:47:35 - INFO - codeparrot_training - Step 24974: {'lr': 0.00047085692173550375, 'samples': 12787200, 'steps': 24974, 'loss/train': 2.5783801078796387} +03/04/2022 18:47:35 - INFO - codeparrot_training - Skipping example with length 553 (seq_length=1024) +03/04/2022 18:47:40 - INFO - codeparrot_training - Step 24975: {'lr': 0.00047085443511770206, 'samples': 12787712, 'steps': 24975, 'loss/train': 2.3002161979675293} +03/04/2022 18:47:43 - INFO - codeparrot_training - Step 24976: {'lr': 0.0004708519484003867, 'samples': 12788224, 'steps': 24976, 'loss/train': 1.3256142139434814} +03/04/2022 18:47:44 - INFO - codeparrot_training - Skipping example with length 88 (seq_length=1024) +03/04/2022 18:47:48 - INFO - codeparrot_training - Step 24977: {'lr': 0.0004708494615835589, 'samples': 12788736, 'steps': 24977, 'loss/train': 0.8662199378013611} +03/04/2022 18:47:52 - INFO - codeparrot_training - Step 24978: {'lr': 0.00047084697466721973, 'samples': 12789248, 'steps': 24978, 'loss/train': 1.232911229133606} +03/04/2022 18:47:52 - INFO - codeparrot_training - Skipping example with length 292 (seq_length=1024) +03/04/2022 18:47:57 - INFO - codeparrot_training - Step 24979: {'lr': 0.0004708444876513703, 'samples': 12789760, 'steps': 24979, 'loss/train': 1.8080540895462036} +03/04/2022 18:48:00 - INFO - codeparrot_training - Step 24980: {'lr': 0.0004708420005360118, 'samples': 12790272, 'steps': 24980, 'loss/train': 1.7607859373092651} +03/04/2022 18:48:01 - INFO - codeparrot_training - Skipping example with length 618 (seq_length=1024) +03/04/2022 18:48:05 - INFO - codeparrot_training - Step 24981: {'lr': 0.0004708395133211452, 'samples': 12790784, 'steps': 24981, 'loss/train': 2.2153525352478027} +03/04/2022 18:48:09 - INFO - codeparrot_training - Step 24982: {'lr': 0.0004708370260067718, 'samples': 12791296, 'steps': 24982, 'loss/train': 1.7659032344818115} +03/04/2022 18:48:09 - INFO - codeparrot_training - Skipping example with length 821 (seq_length=1024) +03/04/2022 18:48:14 - INFO - codeparrot_training - Step 24983: {'lr': 0.00047083453859289267, 'samples': 12791808, 'steps': 24983, 'loss/train': 2.293233633041382} +03/04/2022 18:48:17 - INFO - codeparrot_training - Step 24984: {'lr': 0.00047083205107950886, 'samples': 12792320, 'steps': 24984, 'loss/train': 2.0944364070892334} +03/04/2022 18:48:17 - INFO - codeparrot_training - Skipping example with length 295 (seq_length=1024) +03/04/2022 18:48:22 - INFO - codeparrot_training - Step 24985: {'lr': 0.00047082956346662153, 'samples': 12792832, 'steps': 24985, 'loss/train': 2.52630615234375} +03/04/2022 18:48:25 - INFO - codeparrot_training - Step 24986: {'lr': 0.00047082707575423177, 'samples': 12793344, 'steps': 24986, 'loss/train': 1.5145107507705688} +03/04/2022 18:48:26 - INFO - codeparrot_training - Skipping example with length 411 (seq_length=1024) +03/04/2022 18:48:31 - INFO - codeparrot_training - Step 24987: {'lr': 0.00047082458794234087, 'samples': 12793856, 'steps': 24987, 'loss/train': 1.5130829811096191} +03/04/2022 18:48:34 - INFO - codeparrot_training - Step 24988: {'lr': 0.0004708221000309497, 'samples': 12794368, 'steps': 24988, 'loss/train': 0.8722270131111145} +03/04/2022 18:48:34 - INFO - codeparrot_training - Skipping example with length 232 (seq_length=1024) +03/04/2022 18:48:39 - INFO - codeparrot_training - Step 24989: {'lr': 0.0004708196120200595, 'samples': 12794880, 'steps': 24989, 'loss/train': 1.739190936088562} +03/04/2022 18:48:42 - INFO - codeparrot_training - Step 24990: {'lr': 0.0004708171239096715, 'samples': 12795392, 'steps': 24990, 'loss/train': 2.555328845977783} +03/04/2022 18:48:48 - INFO - codeparrot_training - Step 24991: {'lr': 0.00047081463569978655, 'samples': 12795904, 'steps': 24991, 'loss/train': 1.9925888776779175} +03/04/2022 18:48:51 - INFO - codeparrot_training - Step 24992: {'lr': 0.00047081214739040606, 'samples': 12796416, 'steps': 24992, 'loss/train': 2.0597944259643555} +03/04/2022 18:48:51 - INFO - codeparrot_training - Skipping example with length 820 (seq_length=1024) +03/04/2022 18:48:57 - INFO - codeparrot_training - Step 24993: {'lr': 0.000470809658981531, 'samples': 12796928, 'steps': 24993, 'loss/train': 2.0660297870635986} +03/04/2022 18:49:00 - INFO - codeparrot_training - Step 24994: {'lr': 0.00047080717047316245, 'samples': 12797440, 'steps': 24994, 'loss/train': 1.3143672943115234} +03/04/2022 18:49:03 - INFO - codeparrot_training - Skipping example with length 750 (seq_length=1024) +03/04/2022 18:49:05 - INFO - codeparrot_training - Step 24995: {'lr': 0.0004708046818653017, 'samples': 12797952, 'steps': 24995, 'loss/train': 1.9656524658203125} +03/04/2022 18:49:08 - INFO - codeparrot_training - Step 24996: {'lr': 0.0004708021931579497, 'samples': 12798464, 'steps': 24996, 'loss/train': 2.3586835861206055} +03/04/2022 18:49:11 - INFO - codeparrot_training - Skipping example with length 589 (seq_length=1024) +03/04/2022 18:49:14 - INFO - codeparrot_training - Step 24997: {'lr': 0.00047079970435110765, 'samples': 12798976, 'steps': 24997, 'loss/train': 1.260233998298645} +03/04/2022 18:49:17 - INFO - codeparrot_training - Step 24998: {'lr': 0.0004707972154447766, 'samples': 12799488, 'steps': 24998, 'loss/train': 1.7081503868103027} +03/04/2022 18:49:20 - INFO - codeparrot_training - Skipping example with length 430 (seq_length=1024) +03/04/2022 18:49:22 - INFO - codeparrot_training - Step 24999: {'lr': 0.00047079472643895784, 'samples': 12800000, 'steps': 24999, 'loss/train': 1.3369230031967163} +03/04/2022 18:49:22 - INFO - codeparrot_training - Evaluating and saving model checkpoint