diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -17403,3 +17403,1009 @@ Use FP16 precision: False 02/24/2022 23:34:53 - INFO - codeparrot_training - Step 16998: {'lr': 0.00038891976684431395, 'samples': 8703488, 'steps': 16998, 'loss/train': 2.391925096511841} 02/24/2022 23:34:59 - INFO - codeparrot_training - Step 16999: {'lr': 0.0003889061628471222, 'samples': 8704000, 'steps': 16999, 'loss/train': 3.4161200523376465} 02/24/2022 23:34:59 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/24/2022 23:35:16 - WARNING - huggingface_hub.repository - Several commits (17) will be pushed upstream. +02/24/2022 23:35:16 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/24/2022 23:35:49 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + 704f602..4fad2a5 floral-grass-11 -> floral-grass-11 + +02/24/2022 23:35:54 - INFO - codeparrot_training - Step 17000: {'lr': 0.00038889255825490053, 'samples': 8704512, 'steps': 17000, 'loss/train': 1.384848713874817} +02/24/2022 23:35:59 - INFO - codeparrot_training - Step 17001: {'lr': 0.0003888789530677073, 'samples': 8705024, 'steps': 17001, 'loss/train': 4.113182067871094} +02/24/2022 23:36:03 - INFO - codeparrot_training - Step 17002: {'lr': 0.00038886534728560073, 'samples': 8705536, 'steps': 17002, 'loss/train': 2.253518581390381} +02/24/2022 23:36:08 - INFO - codeparrot_training - Step 17003: {'lr': 0.0003888517409086391, 'samples': 8706048, 'steps': 17003, 'loss/train': 1.5202544927597046} +02/24/2022 23:36:12 - INFO - codeparrot_training - Step 17004: {'lr': 0.0003888381339368807, 'samples': 8706560, 'steps': 17004, 'loss/train': 1.6984606981277466} +02/24/2022 23:36:17 - INFO - codeparrot_training - Step 17005: {'lr': 0.00038882452637038377, 'samples': 8707072, 'steps': 17005, 'loss/train': 2.1926732063293457} +02/24/2022 23:36:21 - INFO - codeparrot_training - Step 17006: {'lr': 0.00038881091820920676, 'samples': 8707584, 'steps': 17006, 'loss/train': 1.8134039640426636} +02/24/2022 23:36:26 - INFO - codeparrot_training - Step 17007: {'lr': 0.00038879730945340775, 'samples': 8708096, 'steps': 17007, 'loss/train': 2.5462334156036377} +02/24/2022 23:36:30 - INFO - codeparrot_training - Step 17008: {'lr': 0.0003887837001030452, 'samples': 8708608, 'steps': 17008, 'loss/train': 2.75063157081604} +02/24/2022 23:36:35 - INFO - codeparrot_training - Step 17009: {'lr': 0.00038877009015817734, 'samples': 8709120, 'steps': 17009, 'loss/train': 1.5253517627716064} +02/24/2022 23:36:39 - INFO - codeparrot_training - Step 17010: {'lr': 0.0003887564796188625, 'samples': 8709632, 'steps': 17010, 'loss/train': 2.22629451751709} +02/24/2022 23:36:45 - INFO - codeparrot_training - Step 17011: {'lr': 0.0003887428684851589, 'samples': 8710144, 'steps': 17011, 'loss/train': 2.340531826019287} +02/24/2022 23:36:49 - INFO - codeparrot_training - Step 17012: {'lr': 0.00038872925675712493, 'samples': 8710656, 'steps': 17012, 'loss/train': 1.8950774669647217} +02/24/2022 23:36:54 - INFO - codeparrot_training - Step 17013: {'lr': 0.00038871564443481886, 'samples': 8711168, 'steps': 17013, 'loss/train': 2.8091981410980225} +02/24/2022 23:36:58 - INFO - codeparrot_training - Step 17014: {'lr': 0.0003887020315182991, 'samples': 8711680, 'steps': 17014, 'loss/train': 1.9716668128967285} +02/24/2022 23:37:03 - INFO - codeparrot_training - Step 17015: {'lr': 0.0003886884180076238, 'samples': 8712192, 'steps': 17015, 'loss/train': 1.541662573814392} +02/24/2022 23:37:07 - INFO - codeparrot_training - Step 17016: {'lr': 0.0003886748039028514, 'samples': 8712704, 'steps': 17016, 'loss/train': 2.596200942993164} +02/24/2022 23:37:12 - INFO - codeparrot_training - Step 17017: {'lr': 0.00038866118920404013, 'samples': 8713216, 'steps': 17017, 'loss/train': 2.6591274738311768} +02/24/2022 23:37:16 - INFO - codeparrot_training - Step 17018: {'lr': 0.0003886475739112484, 'samples': 8713728, 'steps': 17018, 'loss/train': 1.9131158590316772} +02/24/2022 23:37:21 - INFO - codeparrot_training - Step 17019: {'lr': 0.0003886339580245344, 'samples': 8714240, 'steps': 17019, 'loss/train': 1.7564442157745361} +02/24/2022 23:37:25 - INFO - codeparrot_training - Step 17020: {'lr': 0.00038862034154395664, 'samples': 8714752, 'steps': 17020, 'loss/train': 1.9179941415786743} +02/24/2022 23:37:31 - INFO - codeparrot_training - Step 17021: {'lr': 0.00038860672446957336, 'samples': 8715264, 'steps': 17021, 'loss/train': 2.935488700866699} +02/24/2022 23:37:35 - INFO - codeparrot_training - Step 17022: {'lr': 0.00038859310680144276, 'samples': 8715776, 'steps': 17022, 'loss/train': 1.9156081676483154} +02/24/2022 23:37:40 - INFO - codeparrot_training - Step 17023: {'lr': 0.0003885794885396234, 'samples': 8716288, 'steps': 17023, 'loss/train': 2.3114750385284424} +02/24/2022 23:37:44 - INFO - codeparrot_training - Step 17024: {'lr': 0.00038856586968417353, 'samples': 8716800, 'steps': 17024, 'loss/train': 2.5909228324890137} +02/24/2022 23:37:49 - INFO - codeparrot_training - Step 17025: {'lr': 0.0003885522502351514, 'samples': 8717312, 'steps': 17025, 'loss/train': 5.457999229431152} +02/24/2022 23:37:53 - INFO - codeparrot_training - Step 17026: {'lr': 0.0003885386301926155, 'samples': 8717824, 'steps': 17026, 'loss/train': 2.549170970916748} +02/24/2022 23:37:58 - INFO - codeparrot_training - Step 17027: {'lr': 0.00038852500955662407, 'samples': 8718336, 'steps': 17027, 'loss/train': 1.4599891901016235} +02/24/2022 23:38:02 - INFO - codeparrot_training - Step 17028: {'lr': 0.0003885113883272355, 'samples': 8718848, 'steps': 17028, 'loss/train': 2.0304415225982666} +02/24/2022 23:38:07 - INFO - codeparrot_training - Step 17029: {'lr': 0.0003884977665045081, 'samples': 8719360, 'steps': 17029, 'loss/train': 1.807279109954834} +02/24/2022 23:38:11 - INFO - codeparrot_training - Step 17030: {'lr': 0.0003884841440885003, 'samples': 8719872, 'steps': 17030, 'loss/train': 0.4575875699520111} +02/24/2022 23:38:17 - INFO - codeparrot_training - Step 17031: {'lr': 0.0003884705210792703, 'samples': 8720384, 'steps': 17031, 'loss/train': 1.2923752069473267} +02/24/2022 23:38:20 - INFO - codeparrot_training - Step 17032: {'lr': 0.00038845689747687664, 'samples': 8720896, 'steps': 17032, 'loss/train': 2.4116322994232178} +02/24/2022 23:38:26 - INFO - codeparrot_training - Step 17033: {'lr': 0.0003884432732813776, 'samples': 8721408, 'steps': 17033, 'loss/train': 1.3514584302902222} +02/24/2022 23:38:29 - INFO - codeparrot_training - Step 17034: {'lr': 0.00038842964849283146, 'samples': 8721920, 'steps': 17034, 'loss/train': 2.2514805793762207} +02/24/2022 23:38:35 - INFO - codeparrot_training - Step 17035: {'lr': 0.0003884160231112968, 'samples': 8722432, 'steps': 17035, 'loss/train': 1.9595551490783691} +02/24/2022 23:38:38 - INFO - codeparrot_training - Step 17036: {'lr': 0.00038840239713683165, 'samples': 8722944, 'steps': 17036, 'loss/train': 0.9615404009819031} +02/24/2022 23:38:44 - INFO - codeparrot_training - Step 17037: {'lr': 0.00038838877056949475, 'samples': 8723456, 'steps': 17037, 'loss/train': 1.247662901878357} +02/24/2022 23:38:47 - INFO - codeparrot_training - Step 17038: {'lr': 0.00038837514340934424, 'samples': 8723968, 'steps': 17038, 'loss/train': 1.8019756078720093} +02/24/2022 23:38:53 - INFO - codeparrot_training - Step 17039: {'lr': 0.0003883615156564385, 'samples': 8724480, 'steps': 17039, 'loss/train': 0.6995099186897278} +02/24/2022 23:38:56 - INFO - codeparrot_training - Step 17040: {'lr': 0.000388347887310836, 'samples': 8724992, 'steps': 17040, 'loss/train': 2.377166986465454} +02/24/2022 23:39:04 - INFO - codeparrot_training - Step 17041: {'lr': 0.0003883342583725952, 'samples': 8725504, 'steps': 17041, 'loss/train': 2.044576406478882} +02/24/2022 23:39:07 - INFO - codeparrot_training - Step 17042: {'lr': 0.0003883206288417742, 'samples': 8726016, 'steps': 17042, 'loss/train': 1.3499064445495605} +02/24/2022 23:39:13 - INFO - codeparrot_training - Step 17043: {'lr': 0.0003883069987184316, 'samples': 8726528, 'steps': 17043, 'loss/train': 1.1370898485183716} +02/24/2022 23:39:16 - INFO - codeparrot_training - Step 17044: {'lr': 0.0003882933680026257, 'samples': 8727040, 'steps': 17044, 'loss/train': 1.7531712055206299} +02/24/2022 23:39:22 - INFO - codeparrot_training - Step 17045: {'lr': 0.000388279736694415, 'samples': 8727552, 'steps': 17045, 'loss/train': 1.5097105503082275} +02/24/2022 23:39:25 - INFO - codeparrot_training - Step 17046: {'lr': 0.00038826610479385774, 'samples': 8728064, 'steps': 17046, 'loss/train': 1.8655685186386108} +02/24/2022 23:39:31 - INFO - codeparrot_training - Step 17047: {'lr': 0.00038825247230101244, 'samples': 8728576, 'steps': 17047, 'loss/train': 1.5133527517318726} +02/24/2022 23:39:37 - INFO - codeparrot_training - Step 17048: {'lr': 0.0003882388392159375, 'samples': 8729088, 'steps': 17048, 'loss/train': 2.1465346813201904} +02/24/2022 23:39:40 - INFO - codeparrot_training - Step 17049: {'lr': 0.0003882252055386912, 'samples': 8729600, 'steps': 17049, 'loss/train': 2.474968433380127} +02/24/2022 23:39:43 - INFO - codeparrot_training - Step 17050: {'lr': 0.00038821157126933204, 'samples': 8730112, 'steps': 17050, 'loss/train': 2.2572433948516846} +02/24/2022 23:39:51 - INFO - codeparrot_training - Step 17051: {'lr': 0.00038819793640791834, 'samples': 8730624, 'steps': 17051, 'loss/train': 1.7960113286972046} +02/24/2022 23:39:54 - INFO - codeparrot_training - Step 17052: {'lr': 0.0003881843009545086, 'samples': 8731136, 'steps': 17052, 'loss/train': 1.1132287979125977} +02/24/2022 23:40:00 - INFO - codeparrot_training - Step 17053: {'lr': 0.0003881706649091612, 'samples': 8731648, 'steps': 17053, 'loss/train': 0.7072966694831848} +02/24/2022 23:40:06 - INFO - codeparrot_training - Step 17054: {'lr': 0.0003881570282719346, 'samples': 8732160, 'steps': 17054, 'loss/train': 2.306298017501831} +02/24/2022 23:40:09 - INFO - codeparrot_training - Step 17055: {'lr': 0.00038814339104288706, 'samples': 8732672, 'steps': 17055, 'loss/train': 2.1409566402435303} +02/24/2022 23:40:15 - INFO - codeparrot_training - Step 17056: {'lr': 0.00038812975322207713, 'samples': 8733184, 'steps': 17056, 'loss/train': 1.8545455932617188} +02/24/2022 23:40:18 - INFO - codeparrot_training - Step 17057: {'lr': 0.0003881161148095632, 'samples': 8733696, 'steps': 17057, 'loss/train': 1.762295126914978} +02/24/2022 23:40:24 - INFO - codeparrot_training - Step 17058: {'lr': 0.0003881024758054037, 'samples': 8734208, 'steps': 17058, 'loss/train': 2.726966381072998} +02/24/2022 23:40:27 - INFO - codeparrot_training - Step 17059: {'lr': 0.00038808883620965705, 'samples': 8734720, 'steps': 17059, 'loss/train': 2.3146941661834717} +02/24/2022 23:40:33 - INFO - codeparrot_training - Step 17060: {'lr': 0.00038807519602238174, 'samples': 8735232, 'steps': 17060, 'loss/train': 1.8632471561431885} +02/24/2022 23:40:36 - INFO - codeparrot_training - Step 17061: {'lr': 0.00038806155524363594, 'samples': 8735744, 'steps': 17061, 'loss/train': 1.590744972229004} +02/24/2022 23:40:43 - INFO - codeparrot_training - Step 17062: {'lr': 0.00038804791387347844, 'samples': 8736256, 'steps': 17062, 'loss/train': 0.5118932723999023} +02/24/2022 23:40:47 - INFO - codeparrot_training - Step 17063: {'lr': 0.0003880342719119675, 'samples': 8736768, 'steps': 17063, 'loss/train': 2.611489772796631} +02/24/2022 23:40:52 - INFO - codeparrot_training - Step 17064: {'lr': 0.0003880206293591615, 'samples': 8737280, 'steps': 17064, 'loss/train': 1.9074097871780396} +02/24/2022 23:40:56 - INFO - codeparrot_training - Step 17065: {'lr': 0.000388006986215119, 'samples': 8737792, 'steps': 17065, 'loss/train': 1.3274916410446167} +02/24/2022 23:41:01 - INFO - codeparrot_training - Step 17066: {'lr': 0.0003879933424798984, 'samples': 8738304, 'steps': 17066, 'loss/train': 0.15926577150821686} +02/24/2022 23:41:05 - INFO - codeparrot_training - Step 17067: {'lr': 0.0003879796981535582, 'samples': 8738816, 'steps': 17067, 'loss/train': 2.1513419151306152} +02/24/2022 23:41:10 - INFO - codeparrot_training - Step 17068: {'lr': 0.00038796605323615664, 'samples': 8739328, 'steps': 17068, 'loss/train': 2.6258888244628906} +02/24/2022 23:41:14 - INFO - codeparrot_training - Step 17069: {'lr': 0.00038795240772775244, 'samples': 8739840, 'steps': 17069, 'loss/train': 1.524785041809082} +02/24/2022 23:41:19 - INFO - codeparrot_training - Step 17070: {'lr': 0.0003879387616284038, 'samples': 8740352, 'steps': 17070, 'loss/train': 2.2197866439819336} +02/24/2022 23:41:23 - INFO - codeparrot_training - Step 17071: {'lr': 0.0003879251149381694, 'samples': 8740864, 'steps': 17071, 'loss/train': 2.068557024002075} +02/24/2022 23:41:30 - INFO - codeparrot_training - Step 17072: {'lr': 0.0003879114676571076, 'samples': 8741376, 'steps': 17072, 'loss/train': 2.4184749126434326} +02/24/2022 23:41:34 - INFO - codeparrot_training - Step 17073: {'lr': 0.00038789781978527683, 'samples': 8741888, 'steps': 17073, 'loss/train': 2.5602409839630127} +02/24/2022 23:41:39 - INFO - codeparrot_training - Step 17074: {'lr': 0.0003878841713227356, 'samples': 8742400, 'steps': 17074, 'loss/train': 2.6567413806915283} +02/24/2022 23:41:43 - INFO - codeparrot_training - Step 17075: {'lr': 0.00038787052226954235, 'samples': 8742912, 'steps': 17075, 'loss/train': 2.5525169372558594} +02/24/2022 23:41:48 - INFO - codeparrot_training - Step 17076: {'lr': 0.0003878568726257556, 'samples': 8743424, 'steps': 17076, 'loss/train': 4.4807233810424805} +02/24/2022 23:41:52 - INFO - codeparrot_training - Step 17077: {'lr': 0.0003878432223914338, 'samples': 8743936, 'steps': 17077, 'loss/train': 2.2350618839263916} +02/24/2022 23:41:57 - INFO - codeparrot_training - Step 17078: {'lr': 0.00038782957156663535, 'samples': 8744448, 'steps': 17078, 'loss/train': 2.6714046001434326} +02/24/2022 23:42:01 - INFO - codeparrot_training - Step 17079: {'lr': 0.0003878159201514188, 'samples': 8744960, 'steps': 17079, 'loss/train': 2.422719717025757} +02/24/2022 23:42:07 - INFO - codeparrot_training - Step 17080: {'lr': 0.00038780226814584263, 'samples': 8745472, 'steps': 17080, 'loss/train': 1.8320813179016113} +02/24/2022 23:42:10 - INFO - codeparrot_training - Step 17081: {'lr': 0.00038778861554996524, 'samples': 8745984, 'steps': 17081, 'loss/train': 1.4904323816299438} +02/24/2022 23:42:16 - INFO - codeparrot_training - Step 17082: {'lr': 0.00038777496236384526, 'samples': 8746496, 'steps': 17082, 'loss/train': 1.8217389583587646} +02/24/2022 23:42:19 - INFO - codeparrot_training - Step 17083: {'lr': 0.000387761308587541, 'samples': 8747008, 'steps': 17083, 'loss/train': 2.243881940841675} +02/24/2022 23:42:25 - INFO - codeparrot_training - Step 17084: {'lr': 0.0003877476542211111, 'samples': 8747520, 'steps': 17084, 'loss/train': 0.8803138136863708} +02/24/2022 23:42:28 - INFO - codeparrot_training - Step 17085: {'lr': 0.00038773399926461395, 'samples': 8748032, 'steps': 17085, 'loss/train': 2.692265510559082} +02/24/2022 23:42:34 - INFO - codeparrot_training - Step 17086: {'lr': 0.0003877203437181081, 'samples': 8748544, 'steps': 17086, 'loss/train': 2.32560133934021} +02/24/2022 23:42:37 - INFO - codeparrot_training - Step 17087: {'lr': 0.0003877066875816521, 'samples': 8749056, 'steps': 17087, 'loss/train': 1.8611980676651} +02/24/2022 23:42:45 - INFO - codeparrot_training - Step 17088: {'lr': 0.00038769303085530425, 'samples': 8749568, 'steps': 17088, 'loss/train': 1.200783610343933} +02/24/2022 23:42:48 - INFO - codeparrot_training - Step 17089: {'lr': 0.0003876793735391233, 'samples': 8750080, 'steps': 17089, 'loss/train': 2.1668903827667236} +02/24/2022 23:42:54 - INFO - codeparrot_training - Step 17090: {'lr': 0.00038766571563316756, 'samples': 8750592, 'steps': 17090, 'loss/train': 1.193395733833313} +02/24/2022 23:42:57 - INFO - codeparrot_training - Step 17091: {'lr': 0.00038765205713749563, 'samples': 8751104, 'steps': 17091, 'loss/train': 1.4541212320327759} +02/24/2022 23:43:03 - INFO - codeparrot_training - Step 17092: {'lr': 0.0003876383980521659, 'samples': 8751616, 'steps': 17092, 'loss/train': 1.65485680103302} +02/24/2022 23:43:06 - INFO - codeparrot_training - Step 17093: {'lr': 0.0003876247383772371, 'samples': 8752128, 'steps': 17093, 'loss/train': 0.4849706292152405} +02/24/2022 23:43:12 - INFO - codeparrot_training - Step 17094: {'lr': 0.00038761107811276756, 'samples': 8752640, 'steps': 17094, 'loss/train': 2.5173490047454834} +02/24/2022 23:43:15 - INFO - codeparrot_training - Step 17095: {'lr': 0.00038759741725881593, 'samples': 8753152, 'steps': 17095, 'loss/train': 3.143068552017212} +02/24/2022 23:43:21 - INFO - codeparrot_training - Step 17096: {'lr': 0.0003875837558154406, 'samples': 8753664, 'steps': 17096, 'loss/train': 2.195749044418335} +02/24/2022 23:43:24 - INFO - codeparrot_training - Step 17097: {'lr': 0.00038757009378270014, 'samples': 8754176, 'steps': 17097, 'loss/train': 1.4201284646987915} +02/24/2022 23:43:32 - INFO - codeparrot_training - Step 17098: {'lr': 0.0003875564311606531, 'samples': 8754688, 'steps': 17098, 'loss/train': 0.7694094181060791} +02/24/2022 23:43:35 - INFO - codeparrot_training - Step 17099: {'lr': 0.000387542767949358, 'samples': 8755200, 'steps': 17099, 'loss/train': 1.7876856327056885} +02/24/2022 23:43:41 - INFO - codeparrot_training - Step 17100: {'lr': 0.0003875291041488734, 'samples': 8755712, 'steps': 17100, 'loss/train': 1.1753270626068115} +02/24/2022 23:43:44 - INFO - codeparrot_training - Step 17101: {'lr': 0.00038751543975925766, 'samples': 8756224, 'steps': 17101, 'loss/train': 2.3657734394073486} +02/24/2022 23:43:50 - INFO - codeparrot_training - Step 17102: {'lr': 0.00038750177478056956, 'samples': 8756736, 'steps': 17102, 'loss/train': 1.3064559698104858} +02/24/2022 23:43:53 - INFO - codeparrot_training - Step 17103: {'lr': 0.0003874881092128675, 'samples': 8757248, 'steps': 17103, 'loss/train': 2.9817073345184326} +02/24/2022 23:43:59 - INFO - codeparrot_training - Step 17104: {'lr': 0.00038747444305621, 'samples': 8757760, 'steps': 17104, 'loss/train': 0.7996066808700562} +02/24/2022 23:44:02 - INFO - codeparrot_training - Step 17105: {'lr': 0.0003874607763106556, 'samples': 8758272, 'steps': 17105, 'loss/train': 2.700169801712036} +02/24/2022 23:44:08 - INFO - codeparrot_training - Step 17106: {'lr': 0.00038744710897626293, 'samples': 8758784, 'steps': 17106, 'loss/train': 2.0943641662597656} +02/24/2022 23:44:11 - INFO - codeparrot_training - Step 17107: {'lr': 0.00038743344105309055, 'samples': 8759296, 'steps': 17107, 'loss/train': 2.6027300357818604} +02/24/2022 23:44:19 - INFO - codeparrot_training - Step 17108: {'lr': 0.0003874197725411969, 'samples': 8759808, 'steps': 17108, 'loss/train': 1.9064092636108398} +02/24/2022 23:44:22 - INFO - codeparrot_training - Step 17109: {'lr': 0.0003874061034406405, 'samples': 8760320, 'steps': 17109, 'loss/train': 1.881370186805725} +02/24/2022 23:44:28 - INFO - codeparrot_training - Step 17110: {'lr': 0.00038739243375148, 'samples': 8760832, 'steps': 17110, 'loss/train': 2.559566020965576} +02/24/2022 23:44:31 - INFO - codeparrot_training - Step 17111: {'lr': 0.0003873787634737741, 'samples': 8761344, 'steps': 17111, 'loss/train': 2.025768995285034} +02/24/2022 23:44:37 - INFO - codeparrot_training - Step 17112: {'lr': 0.00038736509260758103, 'samples': 8761856, 'steps': 17112, 'loss/train': 1.2163435220718384} +02/24/2022 23:44:40 - INFO - codeparrot_training - Step 17113: {'lr': 0.00038735142115295965, 'samples': 8762368, 'steps': 17113, 'loss/train': 1.903591513633728} +02/24/2022 23:44:46 - INFO - codeparrot_training - Step 17114: {'lr': 0.00038733774910996825, 'samples': 8762880, 'steps': 17114, 'loss/train': 1.4680233001708984} +02/24/2022 23:44:49 - INFO - codeparrot_training - Step 17115: {'lr': 0.00038732407647866567, 'samples': 8763392, 'steps': 17115, 'loss/train': 2.2477405071258545} +02/24/2022 23:44:55 - INFO - codeparrot_training - Step 17116: {'lr': 0.00038731040325911027, 'samples': 8763904, 'steps': 17116, 'loss/train': 1.5594722032546997} +02/24/2022 23:44:58 - INFO - codeparrot_training - Step 17117: {'lr': 0.0003872967294513608, 'samples': 8764416, 'steps': 17117, 'loss/train': 1.8007839918136597} +02/24/2022 23:45:05 - INFO - codeparrot_training - Step 17118: {'lr': 0.0003872830550554757, 'samples': 8764928, 'steps': 17118, 'loss/train': 2.6001248359680176} +02/24/2022 23:45:09 - INFO - codeparrot_training - Step 17119: {'lr': 0.0003872693800715135, 'samples': 8765440, 'steps': 17119, 'loss/train': 2.1997592449188232} +02/24/2022 23:45:14 - INFO - codeparrot_training - Step 17120: {'lr': 0.00038725570449953296, 'samples': 8765952, 'steps': 17120, 'loss/train': 2.052170991897583} +02/24/2022 23:45:18 - INFO - codeparrot_training - Step 17121: {'lr': 0.00038724202833959254, 'samples': 8766464, 'steps': 17121, 'loss/train': 2.8242053985595703} +02/24/2022 23:45:23 - INFO - codeparrot_training - Step 17122: {'lr': 0.00038722835159175087, 'samples': 8766976, 'steps': 17122, 'loss/train': 2.0248873233795166} +02/24/2022 23:45:27 - INFO - codeparrot_training - Step 17123: {'lr': 0.00038721467425606644, 'samples': 8767488, 'steps': 17123, 'loss/train': 1.7550607919692993} +02/24/2022 23:45:32 - INFO - codeparrot_training - Step 17124: {'lr': 0.000387200996332598, 'samples': 8768000, 'steps': 17124, 'loss/train': 1.693726897239685} +02/24/2022 23:45:36 - INFO - codeparrot_training - Step 17125: {'lr': 0.000387187317821404, 'samples': 8768512, 'steps': 17125, 'loss/train': 1.760815143585205} +02/24/2022 23:45:41 - INFO - codeparrot_training - Step 17126: {'lr': 0.0003871736387225431, 'samples': 8769024, 'steps': 17126, 'loss/train': 3.0384926795959473} +02/24/2022 23:45:45 - INFO - codeparrot_training - Step 17127: {'lr': 0.0003871599590360739, 'samples': 8769536, 'steps': 17127, 'loss/train': 2.5729176998138428} +02/24/2022 23:45:52 - INFO - codeparrot_training - Step 17128: {'lr': 0.000387146278762055, 'samples': 8770048, 'steps': 17128, 'loss/train': 1.2496367692947388} +02/24/2022 23:45:56 - INFO - codeparrot_training - Step 17129: {'lr': 0.000387132597900545, 'samples': 8770560, 'steps': 17129, 'loss/train': 2.1538803577423096} +02/24/2022 23:46:01 - INFO - codeparrot_training - Step 17130: {'lr': 0.0003871189164516025, 'samples': 8771072, 'steps': 17130, 'loss/train': 2.379394769668579} +02/24/2022 23:46:05 - INFO - codeparrot_training - Step 17131: {'lr': 0.000387105234415286, 'samples': 8771584, 'steps': 17131, 'loss/train': 1.665564775466919} +02/24/2022 23:46:10 - INFO - codeparrot_training - Step 17132: {'lr': 0.00038709155179165436, 'samples': 8772096, 'steps': 17132, 'loss/train': 2.272392749786377} +02/24/2022 23:46:14 - INFO - codeparrot_training - Step 17133: {'lr': 0.000387077868580766, 'samples': 8772608, 'steps': 17133, 'loss/train': 0.39942196011543274} +02/24/2022 23:46:19 - INFO - codeparrot_training - Step 17134: {'lr': 0.00038706418478267945, 'samples': 8773120, 'steps': 17134, 'loss/train': 1.7531652450561523} +02/24/2022 23:46:23 - INFO - codeparrot_training - Step 17135: {'lr': 0.0003870505003974536, 'samples': 8773632, 'steps': 17135, 'loss/train': 1.9936140775680542} +02/24/2022 23:46:28 - INFO - codeparrot_training - Step 17136: {'lr': 0.0003870368154251469, 'samples': 8774144, 'steps': 17136, 'loss/train': 1.8988162279129028} +02/24/2022 23:46:32 - INFO - codeparrot_training - Step 17137: {'lr': 0.000387023129865818, 'samples': 8774656, 'steps': 17137, 'loss/train': 2.8043172359466553} +02/24/2022 23:46:37 - INFO - codeparrot_training - Step 17138: {'lr': 0.00038700944371952543, 'samples': 8775168, 'steps': 17138, 'loss/train': 1.4487859010696411} +02/24/2022 23:46:41 - INFO - codeparrot_training - Step 17139: {'lr': 0.00038699575698632806, 'samples': 8775680, 'steps': 17139, 'loss/train': 3.4096124172210693} +02/24/2022 23:46:46 - INFO - codeparrot_training - Step 17140: {'lr': 0.00038698206966628426, 'samples': 8776192, 'steps': 17140, 'loss/train': 2.037593364715576} +02/24/2022 23:46:50 - INFO - codeparrot_training - Step 17141: {'lr': 0.00038696838175945284, 'samples': 8776704, 'steps': 17141, 'loss/train': 2.4670138359069824} +02/24/2022 23:46:55 - INFO - codeparrot_training - Step 17142: {'lr': 0.0003869546932658923, 'samples': 8777216, 'steps': 17142, 'loss/train': 1.7054975032806396} +02/24/2022 23:46:59 - INFO - codeparrot_training - Step 17143: {'lr': 0.0003869410041856614, 'samples': 8777728, 'steps': 17143, 'loss/train': 2.2802484035491943} +02/24/2022 23:47:04 - INFO - codeparrot_training - Step 17144: {'lr': 0.0003869273145188186, 'samples': 8778240, 'steps': 17144, 'loss/train': 2.6949515342712402} +02/24/2022 23:47:08 - INFO - codeparrot_training - Step 17145: {'lr': 0.00038691362426542273, 'samples': 8778752, 'steps': 17145, 'loss/train': 3.853553056716919} +02/24/2022 23:47:15 - INFO - codeparrot_training - Step 17146: {'lr': 0.0003868999334255324, 'samples': 8779264, 'steps': 17146, 'loss/train': 1.9217404127120972} +02/24/2022 23:47:19 - INFO - codeparrot_training - Step 17147: {'lr': 0.00038688624199920623, 'samples': 8779776, 'steps': 17147, 'loss/train': 2.070007085800171} +02/24/2022 23:47:24 - INFO - codeparrot_training - Step 17148: {'lr': 0.0003868725499865029, 'samples': 8780288, 'steps': 17148, 'loss/train': 2.3876490592956543} +02/24/2022 23:47:27 - INFO - codeparrot_training - Step 17149: {'lr': 0.00038685885738748096, 'samples': 8780800, 'steps': 17149, 'loss/train': 2.0504651069641113} +02/24/2022 23:47:33 - INFO - codeparrot_training - Step 17150: {'lr': 0.0003868451642021992, 'samples': 8781312, 'steps': 17150, 'loss/train': 2.0499277114868164} +02/24/2022 23:47:37 - INFO - codeparrot_training - Step 17151: {'lr': 0.0003868314704307161, 'samples': 8781824, 'steps': 17151, 'loss/train': 2.0177595615386963} +02/24/2022 23:47:43 - INFO - codeparrot_training - Step 17152: {'lr': 0.0003868177760730905, 'samples': 8782336, 'steps': 17152, 'loss/train': 0.8068851232528687} +02/24/2022 23:47:46 - INFO - codeparrot_training - Step 17153: {'lr': 0.00038680408112938097, 'samples': 8782848, 'steps': 17153, 'loss/train': 1.987796664237976} +02/24/2022 23:47:53 - INFO - codeparrot_training - Step 17154: {'lr': 0.00038679038559964626, 'samples': 8783360, 'steps': 17154, 'loss/train': 1.763417363166809} +02/24/2022 23:47:57 - INFO - codeparrot_training - Step 17155: {'lr': 0.0003867766894839449, 'samples': 8783872, 'steps': 17155, 'loss/train': 1.6264234781265259} +02/24/2022 23:48:03 - INFO - codeparrot_training - Step 17156: {'lr': 0.0003867629927823357, 'samples': 8784384, 'steps': 17156, 'loss/train': 2.31601619720459} +02/24/2022 23:48:06 - INFO - codeparrot_training - Step 17157: {'lr': 0.00038674929549487714, 'samples': 8784896, 'steps': 17157, 'loss/train': 2.436521053314209} +02/24/2022 23:48:12 - INFO - codeparrot_training - Step 17158: {'lr': 0.00038673559762162816, 'samples': 8785408, 'steps': 17158, 'loss/train': 2.391249418258667} +02/24/2022 23:48:15 - INFO - codeparrot_training - Step 17159: {'lr': 0.0003867218991626472, 'samples': 8785920, 'steps': 17159, 'loss/train': 1.0393623113632202} +02/24/2022 23:48:21 - INFO - codeparrot_training - Step 17160: {'lr': 0.0003867082001179931, 'samples': 8786432, 'steps': 17160, 'loss/train': 2.1306588649749756} +02/24/2022 23:48:24 - INFO - codeparrot_training - Step 17161: {'lr': 0.0003866945004877245, 'samples': 8786944, 'steps': 17161, 'loss/train': 3.0967583656311035} +02/24/2022 23:48:30 - INFO - codeparrot_training - Step 17162: {'lr': 0.0003866808002719, 'samples': 8787456, 'steps': 17162, 'loss/train': 1.4777824878692627} +02/24/2022 23:48:33 - INFO - codeparrot_training - Step 17163: {'lr': 0.00038666709947057836, 'samples': 8787968, 'steps': 17163, 'loss/train': 2.7453489303588867} +02/24/2022 23:48:41 - INFO - codeparrot_training - Step 17164: {'lr': 0.0003866533980838183, 'samples': 8788480, 'steps': 17164, 'loss/train': 2.041107654571533} +02/24/2022 23:48:44 - INFO - codeparrot_training - Step 17165: {'lr': 0.0003866396961116785, 'samples': 8788992, 'steps': 17165, 'loss/train': 2.743147850036621} +02/24/2022 23:48:50 - INFO - codeparrot_training - Step 17166: {'lr': 0.00038662599355421756, 'samples': 8789504, 'steps': 17166, 'loss/train': 2.681450843811035} +02/24/2022 23:48:53 - INFO - codeparrot_training - Step 17167: {'lr': 0.00038661229041149427, 'samples': 8790016, 'steps': 17167, 'loss/train': 2.6153838634490967} +02/24/2022 23:48:59 - INFO - codeparrot_training - Step 17168: {'lr': 0.0003865985866835673, 'samples': 8790528, 'steps': 17168, 'loss/train': 2.2887461185455322} +02/24/2022 23:49:02 - INFO - codeparrot_training - Step 17169: {'lr': 0.0003865848823704954, 'samples': 8791040, 'steps': 17169, 'loss/train': 2.4888033866882324} +02/24/2022 23:49:08 - INFO - codeparrot_training - Step 17170: {'lr': 0.00038657117747233717, 'samples': 8791552, 'steps': 17170, 'loss/train': 1.12760329246521} +02/24/2022 23:49:11 - INFO - codeparrot_training - Step 17171: {'lr': 0.00038655747198915137, 'samples': 8792064, 'steps': 17171, 'loss/train': 3.0158510208129883} +02/24/2022 23:49:17 - INFO - codeparrot_training - Step 17172: {'lr': 0.0003865437659209968, 'samples': 8792576, 'steps': 17172, 'loss/train': 1.4976574182510376} +02/24/2022 23:49:20 - INFO - codeparrot_training - Step 17173: {'lr': 0.00038653005926793203, 'samples': 8793088, 'steps': 17173, 'loss/train': 1.0372263193130493} +02/24/2022 23:49:26 - INFO - codeparrot_training - Step 17174: {'lr': 0.0003865163520300159, 'samples': 8793600, 'steps': 17174, 'loss/train': 2.25982666015625} +02/24/2022 23:49:29 - INFO - codeparrot_training - Step 17175: {'lr': 0.00038650264420730707, 'samples': 8794112, 'steps': 17175, 'loss/train': 2.554713010787964} +02/24/2022 23:49:37 - INFO - codeparrot_training - Step 17176: {'lr': 0.00038648893579986424, 'samples': 8794624, 'steps': 17176, 'loss/train': 1.747277855873108} +02/24/2022 23:49:40 - INFO - codeparrot_training - Step 17177: {'lr': 0.00038647522680774603, 'samples': 8795136, 'steps': 17177, 'loss/train': 0.9550808072090149} +02/24/2022 23:49:46 - INFO - codeparrot_training - Step 17178: {'lr': 0.0003864615172310115, 'samples': 8795648, 'steps': 17178, 'loss/train': 2.0351920127868652} +02/24/2022 23:49:49 - INFO - codeparrot_training - Step 17179: {'lr': 0.000386447807069719, 'samples': 8796160, 'steps': 17179, 'loss/train': 3.6719768047332764} +02/24/2022 23:49:55 - INFO - codeparrot_training - Step 17180: {'lr': 0.0003864340963239275, 'samples': 8796672, 'steps': 17180, 'loss/train': 1.9507302045822144} +02/24/2022 23:49:58 - INFO - codeparrot_training - Step 17181: {'lr': 0.00038642038499369556, 'samples': 8797184, 'steps': 17181, 'loss/train': 1.3756699562072754} +02/24/2022 23:50:04 - INFO - codeparrot_training - Step 17182: {'lr': 0.0003864066730790821, 'samples': 8797696, 'steps': 17182, 'loss/train': 1.7046701908111572} +02/24/2022 23:50:07 - INFO - codeparrot_training - Step 17183: {'lr': 0.00038639296058014575, 'samples': 8798208, 'steps': 17183, 'loss/train': 1.9142571687698364} +02/24/2022 23:50:12 - INFO - codeparrot_training - Step 17184: {'lr': 0.0003863792474969453, 'samples': 8798720, 'steps': 17184, 'loss/train': 1.423899531364441} +02/24/2022 23:50:16 - INFO - codeparrot_training - Step 17185: {'lr': 0.00038636553382953944, 'samples': 8799232, 'steps': 17185, 'loss/train': 1.5073208808898926} +02/24/2022 23:50:21 - INFO - codeparrot_training - Step 17186: {'lr': 0.00038635181957798686, 'samples': 8799744, 'steps': 17186, 'loss/train': 1.9099687337875366} +02/24/2022 23:50:25 - INFO - codeparrot_training - Step 17187: {'lr': 0.00038633810474234643, 'samples': 8800256, 'steps': 17187, 'loss/train': 2.5165624618530273} +02/24/2022 23:50:30 - INFO - codeparrot_training - Step 17188: {'lr': 0.00038632438932267686, 'samples': 8800768, 'steps': 17188, 'loss/train': 3.1032791137695312} +02/24/2022 23:50:34 - INFO - codeparrot_training - Step 17189: {'lr': 0.0003863106733190369, 'samples': 8801280, 'steps': 17189, 'loss/train': 2.5249416828155518} +02/24/2022 23:50:41 - INFO - codeparrot_training - Step 17190: {'lr': 0.0003862969567314852, 'samples': 8801792, 'steps': 17190, 'loss/train': 0.4238697290420532} +02/24/2022 23:50:45 - INFO - codeparrot_training - Step 17191: {'lr': 0.0003862832395600808, 'samples': 8802304, 'steps': 17191, 'loss/train': 0.6999640464782715} +02/24/2022 23:50:50 - INFO - codeparrot_training - Step 17192: {'lr': 0.0003862695218048822, 'samples': 8802816, 'steps': 17192, 'loss/train': 1.8220654726028442} +02/24/2022 23:50:54 - INFO - codeparrot_training - Step 17193: {'lr': 0.00038625580346594824, 'samples': 8803328, 'steps': 17193, 'loss/train': 6.060995578765869} +02/24/2022 23:50:59 - INFO - codeparrot_training - Step 17194: {'lr': 0.00038624208454333763, 'samples': 8803840, 'steps': 17194, 'loss/train': 2.0100319385528564} +02/24/2022 23:51:03 - INFO - codeparrot_training - Step 17195: {'lr': 0.00038622836503710917, 'samples': 8804352, 'steps': 17195, 'loss/train': 1.8906749486923218} +02/24/2022 23:51:09 - INFO - codeparrot_training - Step 17196: {'lr': 0.00038621464494732174, 'samples': 8804864, 'steps': 17196, 'loss/train': 3.2008378505706787} +02/24/2022 23:51:12 - INFO - codeparrot_training - Step 17197: {'lr': 0.00038620092427403395, 'samples': 8805376, 'steps': 17197, 'loss/train': 1.7259007692337036} +02/24/2022 23:51:18 - INFO - codeparrot_training - Step 17198: {'lr': 0.0003861872030173047, 'samples': 8805888, 'steps': 17198, 'loss/train': 2.322706699371338} +02/24/2022 23:51:21 - INFO - codeparrot_training - Step 17199: {'lr': 0.0003861734811771928, 'samples': 8806400, 'steps': 17199, 'loss/train': 1.5288407802581787} +02/24/2022 23:51:27 - INFO - codeparrot_training - Step 17200: {'lr': 0.00038615975875375683, 'samples': 8806912, 'steps': 17200, 'loss/train': 1.030848503112793} +02/24/2022 23:51:30 - INFO - codeparrot_training - Step 17201: {'lr': 0.0003861460357470556, 'samples': 8807424, 'steps': 17201, 'loss/train': 1.8166879415512085} +02/24/2022 23:51:38 - INFO - codeparrot_training - Step 17202: {'lr': 0.0003861323121571482, 'samples': 8807936, 'steps': 17202, 'loss/train': 1.5446628332138062} +02/24/2022 23:51:41 - INFO - codeparrot_training - Step 17203: {'lr': 0.0003861185879840931, 'samples': 8808448, 'steps': 17203, 'loss/train': 1.6572048664093018} +02/24/2022 23:51:47 - INFO - codeparrot_training - Step 17204: {'lr': 0.00038610486322794915, 'samples': 8808960, 'steps': 17204, 'loss/train': 0.9908636212348938} +02/24/2022 23:51:51 - INFO - codeparrot_training - Step 17205: {'lr': 0.0003860911378887752, 'samples': 8809472, 'steps': 17205, 'loss/train': 2.543142080307007} +02/24/2022 23:51:56 - INFO - codeparrot_training - Step 17206: {'lr': 0.00038607741196663005, 'samples': 8809984, 'steps': 17206, 'loss/train': 1.8947288990020752} +02/24/2022 23:52:00 - INFO - codeparrot_training - Step 17207: {'lr': 0.0003860636854615725, 'samples': 8810496, 'steps': 17207, 'loss/train': 1.871793508529663} +02/24/2022 23:52:05 - INFO - codeparrot_training - Step 17208: {'lr': 0.0003860499583736613, 'samples': 8811008, 'steps': 17208, 'loss/train': 2.360804796218872} +02/24/2022 23:52:09 - INFO - codeparrot_training - Step 17209: {'lr': 0.00038603623070295536, 'samples': 8811520, 'steps': 17209, 'loss/train': 1.8559503555297852} +02/24/2022 23:52:14 - INFO - codeparrot_training - Step 17210: {'lr': 0.0003860225024495133, 'samples': 8812032, 'steps': 17210, 'loss/train': 2.4367282390594482} +02/24/2022 23:52:18 - INFO - codeparrot_training - Step 17211: {'lr': 0.000386008773613394, 'samples': 8812544, 'steps': 17211, 'loss/train': 2.1229898929595947} +02/24/2022 23:52:24 - INFO - codeparrot_training - Step 17212: {'lr': 0.0003859950441946564, 'samples': 8813056, 'steps': 17212, 'loss/train': 2.0907034873962402} +02/24/2022 23:52:27 - INFO - codeparrot_training - Step 17213: {'lr': 0.0003859813141933592, 'samples': 8813568, 'steps': 17213, 'loss/train': 2.2270755767822266} +02/24/2022 23:52:33 - INFO - codeparrot_training - Step 17214: {'lr': 0.0003859675836095612, 'samples': 8814080, 'steps': 17214, 'loss/train': 0.8479047417640686} +02/24/2022 23:52:36 - INFO - codeparrot_training - Step 17215: {'lr': 0.00038595385244332125, 'samples': 8814592, 'steps': 17215, 'loss/train': 2.7664647102355957} +02/24/2022 23:52:42 - INFO - codeparrot_training - Step 17216: {'lr': 0.00038594012069469814, 'samples': 8815104, 'steps': 17216, 'loss/train': 1.1028177738189697} +02/24/2022 23:52:45 - INFO - codeparrot_training - Step 17217: {'lr': 0.00038592638836375075, 'samples': 8815616, 'steps': 17217, 'loss/train': 3.339601993560791} +02/24/2022 23:52:51 - INFO - codeparrot_training - Step 17218: {'lr': 0.0003859126554505379, 'samples': 8816128, 'steps': 17218, 'loss/train': 2.159816265106201} +02/24/2022 23:52:54 - INFO - codeparrot_training - Step 17219: {'lr': 0.00038589892195511834, 'samples': 8816640, 'steps': 17219, 'loss/train': 4.061238765716553} +02/24/2022 23:53:00 - INFO - codeparrot_training - Step 17220: {'lr': 0.00038588518787755096, 'samples': 8817152, 'steps': 17220, 'loss/train': 1.8575063943862915} +02/24/2022 23:53:03 - INFO - codeparrot_training - Step 17221: {'lr': 0.00038587145321789456, 'samples': 8817664, 'steps': 17221, 'loss/train': 1.722158670425415} +02/24/2022 23:53:09 - INFO - codeparrot_training - Step 17222: {'lr': 0.00038585771797620803, 'samples': 8818176, 'steps': 17222, 'loss/train': 1.7295022010803223} +02/24/2022 23:53:13 - INFO - codeparrot_training - Step 17223: {'lr': 0.00038584398215255023, 'samples': 8818688, 'steps': 17223, 'loss/train': 1.8057955503463745} +02/24/2022 23:53:18 - INFO - codeparrot_training - Step 17224: {'lr': 0.0003858302457469799, 'samples': 8819200, 'steps': 17224, 'loss/train': 1.9161033630371094} +02/24/2022 23:53:22 - INFO - codeparrot_training - Step 17225: {'lr': 0.0003858165087595559, 'samples': 8819712, 'steps': 17225, 'loss/train': 1.9712824821472168} +02/24/2022 23:53:27 - INFO - codeparrot_training - Step 17226: {'lr': 0.00038580277119033715, 'samples': 8820224, 'steps': 17226, 'loss/train': 2.2184431552886963} +02/24/2022 23:53:31 - INFO - codeparrot_training - Step 17227: {'lr': 0.0003857890330393824, 'samples': 8820736, 'steps': 17227, 'loss/train': 1.8530975580215454} +02/24/2022 23:53:36 - INFO - codeparrot_training - Step 17228: {'lr': 0.0003857752943067506, 'samples': 8821248, 'steps': 17228, 'loss/train': 1.5442469120025635} +02/24/2022 23:53:40 - INFO - codeparrot_training - Step 17229: {'lr': 0.00038576155499250056, 'samples': 8821760, 'steps': 17229, 'loss/train': 2.025756359100342} +02/24/2022 23:53:45 - INFO - codeparrot_training - Step 17230: {'lr': 0.000385747815096691, 'samples': 8822272, 'steps': 17230, 'loss/train': 2.134267568588257} +02/24/2022 23:53:49 - INFO - codeparrot_training - Step 17231: {'lr': 0.00038573407461938103, 'samples': 8822784, 'steps': 17231, 'loss/train': 1.656584620475769} +02/24/2022 23:53:54 - INFO - codeparrot_training - Step 17232: {'lr': 0.0003857203335606294, 'samples': 8823296, 'steps': 17232, 'loss/train': 1.0241409540176392} +02/24/2022 23:53:58 - INFO - codeparrot_training - Step 17233: {'lr': 0.00038570659192049497, 'samples': 8823808, 'steps': 17233, 'loss/train': 1.7235249280929565} +02/24/2022 23:54:03 - INFO - codeparrot_training - Step 17234: {'lr': 0.0003856928496990364, 'samples': 8824320, 'steps': 17234, 'loss/train': 1.845015048980713} +02/24/2022 23:54:07 - INFO - codeparrot_training - Step 17235: {'lr': 0.000385679106896313, 'samples': 8824832, 'steps': 17235, 'loss/train': 2.526581048965454} +02/24/2022 23:54:13 - INFO - codeparrot_training - Step 17236: {'lr': 0.0003856653635123832, 'samples': 8825344, 'steps': 17236, 'loss/train': 2.620913028717041} +02/24/2022 23:54:17 - INFO - codeparrot_training - Step 17237: {'lr': 0.0003856516195473062, 'samples': 8825856, 'steps': 17237, 'loss/train': 1.7650483846664429} +02/24/2022 23:54:22 - INFO - codeparrot_training - Step 17238: {'lr': 0.0003856378750011407, 'samples': 8826368, 'steps': 17238, 'loss/train': 1.261871099472046} +02/24/2022 23:54:26 - INFO - codeparrot_training - Step 17239: {'lr': 0.0003856241298739456, 'samples': 8826880, 'steps': 17239, 'loss/train': 1.746508240699768} +02/24/2022 23:54:31 - INFO - codeparrot_training - Step 17240: {'lr': 0.0003856103841657797, 'samples': 8827392, 'steps': 17240, 'loss/train': 2.340733766555786} +02/24/2022 23:54:35 - INFO - codeparrot_training - Step 17241: {'lr': 0.0003855966378767021, 'samples': 8827904, 'steps': 17241, 'loss/train': 2.799269676208496} +02/24/2022 23:54:40 - INFO - codeparrot_training - Step 17242: {'lr': 0.00038558289100677144, 'samples': 8828416, 'steps': 17242, 'loss/train': 2.2604362964630127} +02/24/2022 23:54:44 - INFO - codeparrot_training - Step 17243: {'lr': 0.00038556914355604676, 'samples': 8828928, 'steps': 17243, 'loss/train': 2.04171085357666} +02/24/2022 23:54:49 - INFO - codeparrot_training - Step 17244: {'lr': 0.0003855553955245871, 'samples': 8829440, 'steps': 17244, 'loss/train': 2.540919542312622} +02/24/2022 23:54:53 - INFO - codeparrot_training - Step 17245: {'lr': 0.00038554164691245095, 'samples': 8829952, 'steps': 17245, 'loss/train': 1.5962390899658203} +02/24/2022 23:55:00 - INFO - codeparrot_training - Step 17246: {'lr': 0.00038552789771969755, 'samples': 8830464, 'steps': 17246, 'loss/train': 1.262508511543274} +02/24/2022 23:55:03 - INFO - codeparrot_training - Step 17247: {'lr': 0.00038551414794638555, 'samples': 8830976, 'steps': 17247, 'loss/train': 1.760473608970642} +02/24/2022 23:55:08 - INFO - codeparrot_training - Step 17248: {'lr': 0.00038550039759257404, 'samples': 8831488, 'steps': 17248, 'loss/train': 2.559582233428955} +02/24/2022 23:55:12 - INFO - codeparrot_training - Step 17249: {'lr': 0.0003854866466583219, 'samples': 8832000, 'steps': 17249, 'loss/train': 1.8721861839294434} +02/24/2022 23:55:17 - INFO - codeparrot_training - Step 17250: {'lr': 0.00038547289514368795, 'samples': 8832512, 'steps': 17250, 'loss/train': 2.282911539077759} +02/24/2022 23:55:21 - INFO - codeparrot_training - Step 17251: {'lr': 0.00038545914304873117, 'samples': 8833024, 'steps': 17251, 'loss/train': 2.229508638381958} +02/24/2022 23:55:26 - INFO - codeparrot_training - Step 17252: {'lr': 0.00038544539037351037, 'samples': 8833536, 'steps': 17252, 'loss/train': 2.107180118560791} +02/24/2022 23:55:30 - INFO - codeparrot_training - Step 17253: {'lr': 0.00038543163711808457, 'samples': 8834048, 'steps': 17253, 'loss/train': 2.538442850112915} +02/24/2022 23:55:35 - INFO - codeparrot_training - Step 17254: {'lr': 0.0003854178832825126, 'samples': 8834560, 'steps': 17254, 'loss/train': 2.3268327713012695} +02/24/2022 23:55:39 - INFO - codeparrot_training - Step 17255: {'lr': 0.0003854041288668534, 'samples': 8835072, 'steps': 17255, 'loss/train': 1.1370352506637573} +02/24/2022 23:55:45 - INFO - codeparrot_training - Step 17256: {'lr': 0.00038539037387116595, 'samples': 8835584, 'steps': 17256, 'loss/train': 1.4813798666000366} +02/24/2022 23:55:48 - INFO - codeparrot_training - Step 17257: {'lr': 0.0003853766182955092, 'samples': 8836096, 'steps': 17257, 'loss/train': 0.5485183000564575} +02/24/2022 23:55:55 - INFO - codeparrot_training - Step 17258: {'lr': 0.0003853628621399419, 'samples': 8836608, 'steps': 17258, 'loss/train': 1.8450720310211182} +02/24/2022 23:55:59 - INFO - codeparrot_training - Step 17259: {'lr': 0.00038534910540452305, 'samples': 8837120, 'steps': 17259, 'loss/train': 0.7206200361251831} +02/24/2022 23:56:04 - INFO - codeparrot_training - Step 17260: {'lr': 0.0003853353480893117, 'samples': 8837632, 'steps': 17260, 'loss/train': 2.4634640216827393} +02/24/2022 23:56:08 - INFO - codeparrot_training - Step 17261: {'lr': 0.0003853215901943667, 'samples': 8838144, 'steps': 17261, 'loss/train': 2.0845487117767334} +02/24/2022 23:56:13 - INFO - codeparrot_training - Step 17262: {'lr': 0.00038530783171974694, 'samples': 8838656, 'steps': 17262, 'loss/train': 2.1178362369537354} +02/24/2022 23:56:17 - INFO - codeparrot_training - Step 17263: {'lr': 0.0003852940726655114, 'samples': 8839168, 'steps': 17263, 'loss/train': 2.995131492614746} +02/24/2022 23:56:22 - INFO - codeparrot_training - Step 17264: {'lr': 0.000385280313031719, 'samples': 8839680, 'steps': 17264, 'loss/train': 2.0326082706451416} +02/24/2022 23:56:26 - INFO - codeparrot_training - Step 17265: {'lr': 0.0003852665528184287, 'samples': 8840192, 'steps': 17265, 'loss/train': 1.9007039070129395} +02/24/2022 23:56:31 - INFO - codeparrot_training - Step 17266: {'lr': 0.0003852527920256994, 'samples': 8840704, 'steps': 17266, 'loss/train': 1.9658913612365723} +02/24/2022 23:56:35 - INFO - codeparrot_training - Step 17267: {'lr': 0.00038523903065359013, 'samples': 8841216, 'steps': 17267, 'loss/train': 2.657074213027954} +02/24/2022 23:56:40 - INFO - codeparrot_training - Step 17268: {'lr': 0.0003852252687021598, 'samples': 8841728, 'steps': 17268, 'loss/train': 2.217015027999878} +02/24/2022 23:56:44 - INFO - codeparrot_training - Step 17269: {'lr': 0.00038521150617146737, 'samples': 8842240, 'steps': 17269, 'loss/train': 0.7021298408508301} +02/24/2022 23:56:50 - INFO - codeparrot_training - Step 17270: {'lr': 0.00038519774306157174, 'samples': 8842752, 'steps': 17270, 'loss/train': 2.22515869140625} +02/24/2022 23:56:53 - INFO - codeparrot_training - Step 17271: {'lr': 0.00038518397937253195, 'samples': 8843264, 'steps': 17271, 'loss/train': 0.89049232006073} +02/24/2022 23:56:59 - INFO - codeparrot_training - Step 17272: {'lr': 0.00038517021510440694, 'samples': 8843776, 'steps': 17272, 'loss/train': 2.688854217529297} +02/24/2022 23:57:02 - INFO - codeparrot_training - Step 17273: {'lr': 0.0003851564502572556, 'samples': 8844288, 'steps': 17273, 'loss/train': 1.1446787118911743} +02/24/2022 23:57:08 - INFO - codeparrot_training - Step 17274: {'lr': 0.00038514268483113694, 'samples': 8844800, 'steps': 17274, 'loss/train': 1.6221375465393066} +02/24/2022 23:57:12 - INFO - codeparrot_training - Step 17275: {'lr': 0.00038512891882610997, 'samples': 8845312, 'steps': 17275, 'loss/train': 2.0164897441864014} +02/24/2022 23:57:18 - INFO - codeparrot_training - Step 17276: {'lr': 0.0003851151522422336, 'samples': 8845824, 'steps': 17276, 'loss/train': 2.2144687175750732} +02/24/2022 23:57:21 - INFO - codeparrot_training - Step 17277: {'lr': 0.0003851013850795668, 'samples': 8846336, 'steps': 17277, 'loss/train': 2.7005553245544434} +02/24/2022 23:57:27 - INFO - codeparrot_training - Step 17278: {'lr': 0.00038508761733816864, 'samples': 8846848, 'steps': 17278, 'loss/train': 1.0483784675598145} +02/24/2022 23:57:30 - INFO - codeparrot_training - Step 17279: {'lr': 0.00038507384901809795, 'samples': 8847360, 'steps': 17279, 'loss/train': 2.0545334815979004} +02/24/2022 23:57:36 - INFO - codeparrot_training - Step 17280: {'lr': 0.00038506008011941376, 'samples': 8847872, 'steps': 17280, 'loss/train': 1.1230887174606323} +02/24/2022 23:57:39 - INFO - codeparrot_training - Step 17281: {'lr': 0.0003850463106421751, 'samples': 8848384, 'steps': 17281, 'loss/train': 1.8626708984375} +02/24/2022 23:57:45 - INFO - codeparrot_training - Step 17282: {'lr': 0.000385032540586441, 'samples': 8848896, 'steps': 17282, 'loss/train': 1.9536232948303223} +02/24/2022 23:57:48 - INFO - codeparrot_training - Step 17283: {'lr': 0.00038501876995227023, 'samples': 8849408, 'steps': 17283, 'loss/train': 2.213552236557007} +02/24/2022 23:57:54 - INFO - codeparrot_training - Step 17284: {'lr': 0.00038500499873972204, 'samples': 8849920, 'steps': 17284, 'loss/train': 1.6882789134979248} +02/24/2022 23:57:58 - INFO - codeparrot_training - Step 17285: {'lr': 0.0003849912269488552, 'samples': 8850432, 'steps': 17285, 'loss/train': 2.7493436336517334} +02/24/2022 23:58:03 - INFO - codeparrot_training - Step 17286: {'lr': 0.000384977454579729, 'samples': 8850944, 'steps': 17286, 'loss/train': 0.14187151193618774} +02/24/2022 23:58:07 - INFO - codeparrot_training - Step 17287: {'lr': 0.00038496368163240215, 'samples': 8851456, 'steps': 17287, 'loss/train': 1.7262598276138306} +02/24/2022 23:58:12 - INFO - codeparrot_training - Step 17288: {'lr': 0.00038494990810693366, 'samples': 8851968, 'steps': 17288, 'loss/train': 1.8332463502883911} +02/24/2022 23:58:15 - INFO - codeparrot_training - Step 17289: {'lr': 0.00038493613400338267, 'samples': 8852480, 'steps': 17289, 'loss/train': 2.5230519771575928} +02/24/2022 23:58:21 - INFO - codeparrot_training - Step 17290: {'lr': 0.0003849223593218082, 'samples': 8852992, 'steps': 17290, 'loss/train': 1.3553800582885742} +02/24/2022 23:58:24 - INFO - codeparrot_training - Step 17291: {'lr': 0.00038490858406226903, 'samples': 8853504, 'steps': 17291, 'loss/train': 2.525913953781128} +02/24/2022 23:58:30 - INFO - codeparrot_training - Step 17292: {'lr': 0.00038489480822482446, 'samples': 8854016, 'steps': 17292, 'loss/train': 1.7875498533248901} +02/24/2022 23:58:33 - INFO - codeparrot_training - Step 17293: {'lr': 0.00038488103180953326, 'samples': 8854528, 'steps': 17293, 'loss/train': 2.5612146854400635} +02/24/2022 23:58:40 - INFO - codeparrot_training - Step 17294: {'lr': 0.00038486725481645467, 'samples': 8855040, 'steps': 17294, 'loss/train': 1.9052436351776123} +02/24/2022 23:58:43 - INFO - codeparrot_training - Step 17295: {'lr': 0.00038485347724564746, 'samples': 8855552, 'steps': 17295, 'loss/train': 1.891305685043335} +02/24/2022 23:58:49 - INFO - codeparrot_training - Step 17296: {'lr': 0.0003848396990971709, 'samples': 8856064, 'steps': 17296, 'loss/train': 1.0746748447418213} +02/24/2022 23:58:52 - INFO - codeparrot_training - Step 17297: {'lr': 0.00038482592037108375, 'samples': 8856576, 'steps': 17297, 'loss/train': 2.259819269180298} +02/24/2022 23:58:58 - INFO - codeparrot_training - Step 17298: {'lr': 0.0003848121410674453, 'samples': 8857088, 'steps': 17298, 'loss/train': 1.4291638135910034} +02/24/2022 23:59:01 - INFO - codeparrot_training - Step 17299: {'lr': 0.0003847983611863144, 'samples': 8857600, 'steps': 17299, 'loss/train': 1.225612998008728} +02/24/2022 23:59:07 - INFO - codeparrot_training - Step 17300: {'lr': 0.0003847845807277501, 'samples': 8858112, 'steps': 17300, 'loss/train': 0.6676669120788574} +02/24/2022 23:59:10 - INFO - codeparrot_training - Step 17301: {'lr': 0.00038477079969181146, 'samples': 8858624, 'steps': 17301, 'loss/train': 2.7663023471832275} +02/24/2022 23:59:16 - INFO - codeparrot_training - Step 17302: {'lr': 0.00038475701807855753, 'samples': 8859136, 'steps': 17302, 'loss/train': 2.323258638381958} +02/24/2022 23:59:19 - INFO - codeparrot_training - Step 17303: {'lr': 0.00038474323588804727, 'samples': 8859648, 'steps': 17303, 'loss/train': 0.9643365144729614} +02/24/2022 23:59:26 - INFO - codeparrot_training - Step 17304: {'lr': 0.0003847294531203398, 'samples': 8860160, 'steps': 17304, 'loss/train': 2.4043641090393066} +02/24/2022 23:59:29 - INFO - codeparrot_training - Step 17305: {'lr': 0.0003847156697754942, 'samples': 8860672, 'steps': 17305, 'loss/train': 3.8596770763397217} +02/24/2022 23:59:35 - INFO - codeparrot_training - Step 17306: {'lr': 0.00038470188585356936, 'samples': 8861184, 'steps': 17306, 'loss/train': 1.4697163105010986} +02/24/2022 23:59:38 - INFO - codeparrot_training - Step 17307: {'lr': 0.00038468810135462445, 'samples': 8861696, 'steps': 17307, 'loss/train': 1.7332180738449097} +02/24/2022 23:59:44 - INFO - codeparrot_training - Step 17308: {'lr': 0.00038467431627871844, 'samples': 8862208, 'steps': 17308, 'loss/train': 1.3113980293273926} +02/24/2022 23:59:47 - INFO - codeparrot_training - Step 17309: {'lr': 0.0003846605306259105, 'samples': 8862720, 'steps': 17309, 'loss/train': 1.7961474657058716} +02/24/2022 23:59:53 - INFO - codeparrot_training - Step 17310: {'lr': 0.0003846467443962596, 'samples': 8863232, 'steps': 17310, 'loss/train': 2.2187306880950928} +02/24/2022 23:59:56 - INFO - codeparrot_training - Step 17311: {'lr': 0.00038463295758982475, 'samples': 8863744, 'steps': 17311, 'loss/train': 2.2249069213867188} +02/25/2022 00:00:02 - INFO - codeparrot_training - Step 17312: {'lr': 0.00038461917020666506, 'samples': 8864256, 'steps': 17312, 'loss/train': 1.267568588256836} +02/25/2022 00:00:05 - INFO - codeparrot_training - Step 17313: {'lr': 0.0003846053822468396, 'samples': 8864768, 'steps': 17313, 'loss/train': 1.2431365251541138} +02/25/2022 00:00:12 - INFO - codeparrot_training - Step 17314: {'lr': 0.00038459159371040743, 'samples': 8865280, 'steps': 17314, 'loss/train': 2.431715488433838} +02/25/2022 00:00:15 - INFO - codeparrot_training - Step 17315: {'lr': 0.0003845778045974276, 'samples': 8865792, 'steps': 17315, 'loss/train': 1.4574394226074219} +02/25/2022 00:00:21 - INFO - codeparrot_training - Step 17316: {'lr': 0.0003845640149079592, 'samples': 8866304, 'steps': 17316, 'loss/train': 1.674900770187378} +02/25/2022 00:00:24 - INFO - codeparrot_training - Step 17317: {'lr': 0.0003845502246420613, 'samples': 8866816, 'steps': 17317, 'loss/train': 1.8590947389602661} +02/25/2022 00:00:29 - INFO - codeparrot_training - Step 17318: {'lr': 0.00038453643379979295, 'samples': 8867328, 'steps': 17318, 'loss/train': 2.256387710571289} +02/25/2022 00:00:33 - INFO - codeparrot_training - Step 17319: {'lr': 0.00038452264238121326, 'samples': 8867840, 'steps': 17319, 'loss/train': 2.5044546127319336} +02/25/2022 00:00:39 - INFO - codeparrot_training - Step 17320: {'lr': 0.0003845088503863813, 'samples': 8868352, 'steps': 17320, 'loss/train': 1.5753520727157593} +02/25/2022 00:00:42 - INFO - codeparrot_training - Step 17321: {'lr': 0.0003844950578153561, 'samples': 8868864, 'steps': 17321, 'loss/train': 2.375957489013672} +02/25/2022 00:00:48 - INFO - codeparrot_training - Step 17322: {'lr': 0.00038448126466819675, 'samples': 8869376, 'steps': 17322, 'loss/train': 2.416022300720215} +02/25/2022 00:00:51 - INFO - codeparrot_training - Step 17323: {'lr': 0.00038446747094496243, 'samples': 8869888, 'steps': 17323, 'loss/train': 2.419193744659424} +02/25/2022 00:00:57 - INFO - codeparrot_training - Step 17324: {'lr': 0.00038445367664571216, 'samples': 8870400, 'steps': 17324, 'loss/train': 1.1529008150100708} +02/25/2022 00:01:00 - INFO - codeparrot_training - Step 17325: {'lr': 0.000384439881770505, 'samples': 8870912, 'steps': 17325, 'loss/train': 2.1947786808013916} +02/25/2022 00:01:06 - INFO - codeparrot_training - Step 17326: {'lr': 0.0003844260863194001, 'samples': 8871424, 'steps': 17326, 'loss/train': 0.752173662185669} +02/25/2022 00:01:09 - INFO - codeparrot_training - Step 17327: {'lr': 0.0003844122902924565, 'samples': 8871936, 'steps': 17327, 'loss/train': 2.003032684326172} +02/25/2022 00:01:15 - INFO - codeparrot_training - Step 17328: {'lr': 0.00038439849368973334, 'samples': 8872448, 'steps': 17328, 'loss/train': 1.9385327100753784} +02/25/2022 00:01:21 - INFO - codeparrot_training - Step 17329: {'lr': 0.0003843846965112897, 'samples': 8872960, 'steps': 17329, 'loss/train': 2.169112205505371} +02/25/2022 00:01:25 - INFO - codeparrot_training - Step 17330: {'lr': 0.0003843708987571847, 'samples': 8873472, 'steps': 17330, 'loss/train': 1.393505573272705} +02/25/2022 00:01:30 - INFO - codeparrot_training - Step 17331: {'lr': 0.0003843571004274775, 'samples': 8873984, 'steps': 17331, 'loss/train': 2.15645432472229} +02/25/2022 00:01:33 - INFO - codeparrot_training - Step 17332: {'lr': 0.0003843433015222271, 'samples': 8874496, 'steps': 17332, 'loss/train': 1.316759467124939} +02/25/2022 00:01:39 - INFO - codeparrot_training - Step 17333: {'lr': 0.0003843295020414926, 'samples': 8875008, 'steps': 17333, 'loss/train': 1.4930756092071533} +02/25/2022 00:01:43 - INFO - codeparrot_training - Step 17334: {'lr': 0.0003843157019853332, 'samples': 8875520, 'steps': 17334, 'loss/train': 1.4754931926727295} +02/25/2022 00:01:48 - INFO - codeparrot_training - Step 17335: {'lr': 0.00038430190135380803, 'samples': 8876032, 'steps': 17335, 'loss/train': 1.6922804117202759} +02/25/2022 00:01:52 - INFO - codeparrot_training - Step 17336: {'lr': 0.00038428810014697615, 'samples': 8876544, 'steps': 17336, 'loss/train': 1.763075351715088} +02/25/2022 00:01:57 - INFO - codeparrot_training - Step 17337: {'lr': 0.00038427429836489663, 'samples': 8877056, 'steps': 17337, 'loss/train': 2.5658202171325684} +02/25/2022 00:02:01 - INFO - codeparrot_training - Step 17338: {'lr': 0.00038426049600762867, 'samples': 8877568, 'steps': 17338, 'loss/train': 1.1538020372390747} +02/25/2022 00:02:07 - INFO - codeparrot_training - Step 17339: {'lr': 0.00038424669307523135, 'samples': 8878080, 'steps': 17339, 'loss/train': 1.071851134300232} +02/25/2022 00:02:11 - INFO - codeparrot_training - Step 17340: {'lr': 0.00038423288956776394, 'samples': 8878592, 'steps': 17340, 'loss/train': 1.8985893726348877} +02/25/2022 00:02:16 - INFO - codeparrot_training - Step 17341: {'lr': 0.00038421908548528534, 'samples': 8879104, 'steps': 17341, 'loss/train': 2.143930673599243} +02/25/2022 00:02:20 - INFO - codeparrot_training - Step 17342: {'lr': 0.0003842052808278549, 'samples': 8879616, 'steps': 17342, 'loss/train': 1.532324194908142} +02/25/2022 00:02:25 - INFO - codeparrot_training - Step 17343: {'lr': 0.0003841914755955315, 'samples': 8880128, 'steps': 17343, 'loss/train': 1.4769550561904907} +02/25/2022 00:02:29 - INFO - codeparrot_training - Step 17344: {'lr': 0.00038417766978837453, 'samples': 8880640, 'steps': 17344, 'loss/train': 1.7774338722229004} +02/25/2022 00:02:34 - INFO - codeparrot_training - Step 17345: {'lr': 0.00038416386340644305, 'samples': 8881152, 'steps': 17345, 'loss/train': 2.018763303756714} +02/25/2022 00:02:38 - INFO - codeparrot_training - Step 17346: {'lr': 0.00038415005644979616, 'samples': 8881664, 'steps': 17346, 'loss/train': 1.8801765441894531} +02/25/2022 00:02:43 - INFO - codeparrot_training - Step 17347: {'lr': 0.00038413624891849295, 'samples': 8882176, 'steps': 17347, 'loss/train': 1.8801686763763428} +02/25/2022 00:02:47 - INFO - codeparrot_training - Step 17348: {'lr': 0.00038412244081259273, 'samples': 8882688, 'steps': 17348, 'loss/train': 2.8764495849609375} +02/25/2022 00:02:53 - INFO - codeparrot_training - Step 17349: {'lr': 0.00038410863213215454, 'samples': 8883200, 'steps': 17349, 'loss/train': 2.4866943359375} +02/25/2022 00:02:56 - INFO - codeparrot_training - Step 17350: {'lr': 0.0003840948228772376, 'samples': 8883712, 'steps': 17350, 'loss/train': 1.2575478553771973} +02/25/2022 00:03:02 - INFO - codeparrot_training - Step 17351: {'lr': 0.00038408101304790096, 'samples': 8884224, 'steps': 17351, 'loss/train': 2.2058801651000977} +02/25/2022 00:03:06 - INFO - codeparrot_training - Step 17352: {'lr': 0.0003840672026442038, 'samples': 8884736, 'steps': 17352, 'loss/train': 2.506976842880249} +02/25/2022 00:03:09 - INFO - codeparrot_training - Step 17353: {'lr': 0.0003840533916662054, 'samples': 8885248, 'steps': 17353, 'loss/train': 2.8243627548217773} +02/25/2022 00:03:15 - INFO - codeparrot_training - Step 17354: {'lr': 0.00038403958011396476, 'samples': 8885760, 'steps': 17354, 'loss/train': 1.2112480401992798} +02/25/2022 00:03:18 - INFO - codeparrot_training - Step 17355: {'lr': 0.0003840257679875412, 'samples': 8886272, 'steps': 17355, 'loss/train': 2.1621241569519043} +02/25/2022 00:03:24 - INFO - codeparrot_training - Step 17356: {'lr': 0.00038401195528699374, 'samples': 8886784, 'steps': 17356, 'loss/train': 1.3151633739471436} +02/25/2022 00:03:27 - INFO - codeparrot_training - Step 17357: {'lr': 0.0003839981420123817, 'samples': 8887296, 'steps': 17357, 'loss/train': 2.6348929405212402} +02/25/2022 00:03:33 - INFO - codeparrot_training - Step 17358: {'lr': 0.00038398432816376404, 'samples': 8887808, 'steps': 17358, 'loss/train': 2.2323763370513916} +02/25/2022 00:03:36 - INFO - codeparrot_training - Step 17359: {'lr': 0.00038397051374120016, 'samples': 8888320, 'steps': 17359, 'loss/train': 2.9336204528808594} +02/25/2022 00:03:43 - INFO - codeparrot_training - Step 17360: {'lr': 0.00038395669874474915, 'samples': 8888832, 'steps': 17360, 'loss/train': 2.6371617317199707} +02/25/2022 00:03:46 - INFO - codeparrot_training - Step 17361: {'lr': 0.0003839428831744702, 'samples': 8889344, 'steps': 17361, 'loss/train': 2.2495625019073486} +02/25/2022 00:03:52 - INFO - codeparrot_training - Step 17362: {'lr': 0.0003839290670304224, 'samples': 8889856, 'steps': 17362, 'loss/train': 1.7084813117980957} +02/25/2022 00:03:55 - INFO - codeparrot_training - Step 17363: {'lr': 0.00038391525031266494, 'samples': 8890368, 'steps': 17363, 'loss/train': 1.162110447883606} +02/25/2022 00:04:01 - INFO - codeparrot_training - Step 17364: {'lr': 0.0003839014330212572, 'samples': 8890880, 'steps': 17364, 'loss/train': 1.8352888822555542} +02/25/2022 00:04:04 - INFO - codeparrot_training - Step 17365: {'lr': 0.00038388761515625815, 'samples': 8891392, 'steps': 17365, 'loss/train': 2.446359634399414} +02/25/2022 00:04:10 - INFO - codeparrot_training - Step 17366: {'lr': 0.0003838737967177271, 'samples': 8891904, 'steps': 17366, 'loss/train': 2.0594189167022705} +02/25/2022 00:04:13 - INFO - codeparrot_training - Step 17367: {'lr': 0.00038385997770572336, 'samples': 8892416, 'steps': 17367, 'loss/train': 2.2992804050445557} +02/25/2022 00:04:19 - INFO - codeparrot_training - Step 17368: {'lr': 0.0003838461581203058, 'samples': 8892928, 'steps': 17368, 'loss/train': 1.9460521936416626} +02/25/2022 00:04:22 - INFO - codeparrot_training - Step 17369: {'lr': 0.00038383233796153383, 'samples': 8893440, 'steps': 17369, 'loss/train': 1.9894386529922485} +02/25/2022 00:04:28 - INFO - codeparrot_training - Step 17370: {'lr': 0.00038381851722946663, 'samples': 8893952, 'steps': 17370, 'loss/train': 1.7741245031356812} +02/25/2022 00:04:31 - INFO - codeparrot_training - Step 17371: {'lr': 0.00038380469592416347, 'samples': 8894464, 'steps': 17371, 'loss/train': 0.7290230393409729} +02/25/2022 00:04:37 - INFO - codeparrot_training - Step 17372: {'lr': 0.00038379087404568333, 'samples': 8894976, 'steps': 17372, 'loss/train': 1.2639176845550537} +02/25/2022 00:04:40 - INFO - codeparrot_training - Step 17373: {'lr': 0.0003837770515940857, 'samples': 8895488, 'steps': 17373, 'loss/train': 2.028148889541626} +02/25/2022 00:04:46 - INFO - codeparrot_training - Step 17374: {'lr': 0.0003837632285694296, 'samples': 8896000, 'steps': 17374, 'loss/train': 2.300626516342163} +02/25/2022 00:04:49 - INFO - codeparrot_training - Step 17375: {'lr': 0.00038374940497177434, 'samples': 8896512, 'steps': 17375, 'loss/train': 2.2144341468811035} +02/25/2022 00:04:56 - INFO - codeparrot_training - Step 17376: {'lr': 0.000383735580801179, 'samples': 8897024, 'steps': 17376, 'loss/train': 1.6073862314224243} +02/25/2022 00:04:59 - INFO - codeparrot_training - Step 17377: {'lr': 0.00038372175605770305, 'samples': 8897536, 'steps': 17377, 'loss/train': 2.0909602642059326} +02/25/2022 00:05:05 - INFO - codeparrot_training - Step 17378: {'lr': 0.00038370793074140545, 'samples': 8898048, 'steps': 17378, 'loss/train': 1.5658361911773682} +02/25/2022 00:05:08 - INFO - codeparrot_training - Step 17379: {'lr': 0.00038369410485234557, 'samples': 8898560, 'steps': 17379, 'loss/train': 2.0146656036376953} +02/25/2022 00:05:14 - INFO - codeparrot_training - Step 17380: {'lr': 0.0003836802783905826, 'samples': 8899072, 'steps': 17380, 'loss/train': 2.606670618057251} +02/25/2022 00:05:17 - INFO - codeparrot_training - Step 17381: {'lr': 0.0003836664513561758, 'samples': 8899584, 'steps': 17381, 'loss/train': 1.9620325565338135} +02/25/2022 00:05:23 - INFO - codeparrot_training - Step 17382: {'lr': 0.0003836526237491843, 'samples': 8900096, 'steps': 17382, 'loss/train': 2.1051924228668213} +02/25/2022 00:05:26 - INFO - codeparrot_training - Step 17383: {'lr': 0.0003836387955696674, 'samples': 8900608, 'steps': 17383, 'loss/train': 2.5000431537628174} +02/25/2022 00:05:32 - INFO - codeparrot_training - Step 17384: {'lr': 0.00038362496681768434, 'samples': 8901120, 'steps': 17384, 'loss/train': 1.8268605470657349} +02/25/2022 00:05:35 - INFO - codeparrot_training - Step 17385: {'lr': 0.00038361113749329443, 'samples': 8901632, 'steps': 17385, 'loss/train': 1.5343314409255981} +02/25/2022 00:05:41 - INFO - codeparrot_training - Step 17386: {'lr': 0.00038359730759655674, 'samples': 8902144, 'steps': 17386, 'loss/train': 2.431920289993286} +02/25/2022 00:05:44 - INFO - codeparrot_training - Step 17387: {'lr': 0.00038358347712753063, 'samples': 8902656, 'steps': 17387, 'loss/train': 1.6329838037490845} +02/25/2022 00:05:51 - INFO - codeparrot_training - Step 17388: {'lr': 0.0003835696460862753, 'samples': 8903168, 'steps': 17388, 'loss/train': 1.812713861465454} +02/25/2022 00:05:55 - INFO - codeparrot_training - Step 17389: {'lr': 0.00038355581447285005, 'samples': 8903680, 'steps': 17389, 'loss/train': 1.0038838386535645} +02/25/2022 00:06:00 - INFO - codeparrot_training - Step 17390: {'lr': 0.00038354198228731414, 'samples': 8904192, 'steps': 17390, 'loss/train': 2.625652313232422} +02/25/2022 00:06:04 - INFO - codeparrot_training - Step 17391: {'lr': 0.0003835281495297267, 'samples': 8904704, 'steps': 17391, 'loss/train': 1.3525059223175049} +02/25/2022 00:06:09 - INFO - codeparrot_training - Step 17392: {'lr': 0.0003835143162001472, 'samples': 8905216, 'steps': 17392, 'loss/train': 1.672836184501648} +02/25/2022 00:06:13 - INFO - codeparrot_training - Step 17393: {'lr': 0.0003835004822986346, 'samples': 8905728, 'steps': 17393, 'loss/train': 1.8405547142028809} +02/25/2022 00:06:18 - INFO - codeparrot_training - Step 17394: {'lr': 0.00038348664782524846, 'samples': 8906240, 'steps': 17394, 'loss/train': 1.8644921779632568} +02/25/2022 00:06:22 - INFO - codeparrot_training - Step 17395: {'lr': 0.00038347281278004774, 'samples': 8906752, 'steps': 17395, 'loss/train': 2.2251172065734863} +02/25/2022 00:06:27 - INFO - codeparrot_training - Step 17396: {'lr': 0.0003834589771630921, 'samples': 8907264, 'steps': 17396, 'loss/train': 2.490211248397827} +02/25/2022 00:06:30 - INFO - codeparrot_training - Step 17397: {'lr': 0.0003834451409744404, 'samples': 8907776, 'steps': 17397, 'loss/train': 1.9458705186843872} +02/25/2022 00:06:37 - INFO - codeparrot_training - Step 17398: {'lr': 0.0003834313042141522, 'samples': 8908288, 'steps': 17398, 'loss/train': 2.2974541187286377} +02/25/2022 00:06:41 - INFO - codeparrot_training - Step 17399: {'lr': 0.0003834174668822865, 'samples': 8908800, 'steps': 17399, 'loss/train': 1.4726758003234863} +02/25/2022 00:06:46 - INFO - codeparrot_training - Step 17400: {'lr': 0.0003834036289789029, 'samples': 8909312, 'steps': 17400, 'loss/train': 2.2040772438049316} +02/25/2022 00:06:50 - INFO - codeparrot_training - Step 17401: {'lr': 0.0003833897905040604, 'samples': 8909824, 'steps': 17401, 'loss/train': 1.7959519624710083} +02/25/2022 00:06:55 - INFO - codeparrot_training - Step 17402: {'lr': 0.00038337595145781844, 'samples': 8910336, 'steps': 17402, 'loss/train': 1.9410135746002197} +02/25/2022 00:06:59 - INFO - codeparrot_training - Step 17403: {'lr': 0.00038336211184023634, 'samples': 8910848, 'steps': 17403, 'loss/train': 2.2259018421173096} +02/25/2022 00:07:04 - INFO - codeparrot_training - Step 17404: {'lr': 0.0003833482716513732, 'samples': 8911360, 'steps': 17404, 'loss/train': 2.1858718395233154} +02/25/2022 00:07:08 - INFO - codeparrot_training - Step 17405: {'lr': 0.0003833344308912885, 'samples': 8911872, 'steps': 17405, 'loss/train': 1.7019981145858765} +02/25/2022 00:07:13 - INFO - codeparrot_training - Step 17406: {'lr': 0.00038332058956004134, 'samples': 8912384, 'steps': 17406, 'loss/train': 1.642392873764038} +02/25/2022 00:07:17 - INFO - codeparrot_training - Step 17407: {'lr': 0.0003833067476576911, 'samples': 8912896, 'steps': 17407, 'loss/train': 0.7289206981658936} +02/25/2022 00:07:24 - INFO - codeparrot_training - Step 17408: {'lr': 0.0003832929051842972, 'samples': 8913408, 'steps': 17408, 'loss/train': 2.099308729171753} +02/25/2022 00:07:27 - INFO - codeparrot_training - Step 17409: {'lr': 0.0003832790621399187, 'samples': 8913920, 'steps': 17409, 'loss/train': 2.12369966506958} +02/25/2022 00:07:33 - INFO - codeparrot_training - Step 17410: {'lr': 0.00038326521852461505, 'samples': 8914432, 'steps': 17410, 'loss/train': 1.924633264541626} +02/25/2022 00:07:36 - INFO - codeparrot_training - Step 17411: {'lr': 0.0003832513743384456, 'samples': 8914944, 'steps': 17411, 'loss/train': 1.7784247398376465} +02/25/2022 00:07:42 - INFO - codeparrot_training - Step 17412: {'lr': 0.0003832375295814695, 'samples': 8915456, 'steps': 17412, 'loss/train': 1.3785922527313232} +02/25/2022 00:07:45 - INFO - codeparrot_training - Step 17413: {'lr': 0.0003832236842537461, 'samples': 8915968, 'steps': 17413, 'loss/train': 2.2821872234344482} +02/25/2022 00:07:51 - INFO - codeparrot_training - Step 17414: {'lr': 0.0003832098383553347, 'samples': 8916480, 'steps': 17414, 'loss/train': 2.455449342727661} +02/25/2022 00:07:54 - INFO - codeparrot_training - Step 17415: {'lr': 0.00038319599188629485, 'samples': 8916992, 'steps': 17415, 'loss/train': 1.7601726055145264} +02/25/2022 00:08:00 - INFO - codeparrot_training - Step 17416: {'lr': 0.00038318214484668557, 'samples': 8917504, 'steps': 17416, 'loss/train': 1.5574798583984375} +02/25/2022 00:08:05 - INFO - codeparrot_training - Step 17417: {'lr': 0.0003831682972365662, 'samples': 8918016, 'steps': 17417, 'loss/train': 2.0122427940368652} +02/25/2022 00:08:09 - INFO - codeparrot_training - Step 17418: {'lr': 0.0003831544490559962, 'samples': 8918528, 'steps': 17418, 'loss/train': 1.056150197982788} +02/25/2022 00:08:14 - INFO - codeparrot_training - Step 17419: {'lr': 0.00038314060030503476, 'samples': 8919040, 'steps': 17419, 'loss/train': 2.1905977725982666} +02/25/2022 00:08:18 - INFO - codeparrot_training - Step 17420: {'lr': 0.00038312675098374136, 'samples': 8919552, 'steps': 17420, 'loss/train': 1.6523983478546143} +02/25/2022 00:08:24 - INFO - codeparrot_training - Step 17421: {'lr': 0.0003831129010921751, 'samples': 8920064, 'steps': 17421, 'loss/train': 2.0863749980926514} +02/25/2022 00:08:28 - INFO - codeparrot_training - Step 17422: {'lr': 0.0003830990506303956, 'samples': 8920576, 'steps': 17422, 'loss/train': 1.2508740425109863} +02/25/2022 00:08:31 - INFO - codeparrot_training - Step 17423: {'lr': 0.0003830851995984619, 'samples': 8921088, 'steps': 17423, 'loss/train': 0.18153704702854156} +02/25/2022 00:08:36 - INFO - codeparrot_training - Step 17424: {'lr': 0.0003830713479964335, 'samples': 8921600, 'steps': 17424, 'loss/train': 2.6745219230651855} +02/25/2022 00:08:42 - INFO - codeparrot_training - Step 17425: {'lr': 0.0003830574958243697, 'samples': 8922112, 'steps': 17425, 'loss/train': 1.4275271892547607} +02/25/2022 00:08:46 - INFO - codeparrot_training - Step 17426: {'lr': 0.00038304364308232986, 'samples': 8922624, 'steps': 17426, 'loss/train': 1.6835452318191528} +02/25/2022 00:08:49 - INFO - codeparrot_training - Step 17427: {'lr': 0.0003830297897703733, 'samples': 8923136, 'steps': 17427, 'loss/train': 2.2621264457702637} +02/25/2022 00:08:55 - INFO - codeparrot_training - Step 17428: {'lr': 0.0003830159358885593, 'samples': 8923648, 'steps': 17428, 'loss/train': 2.3754281997680664} +02/25/2022 00:09:00 - INFO - codeparrot_training - Step 17429: {'lr': 0.00038300208143694737, 'samples': 8924160, 'steps': 17429, 'loss/train': 4.128789901733398} +02/25/2022 00:09:04 - INFO - codeparrot_training - Step 17430: {'lr': 0.00038298822641559673, 'samples': 8924672, 'steps': 17430, 'loss/train': 2.1815717220306396} +02/25/2022 00:09:07 - INFO - codeparrot_training - Step 17431: {'lr': 0.0003829743708245667, 'samples': 8925184, 'steps': 17431, 'loss/train': 2.9297618865966797} +02/25/2022 00:09:13 - INFO - codeparrot_training - Step 17432: {'lr': 0.0003829605146639167, 'samples': 8925696, 'steps': 17432, 'loss/train': 2.4314677715301514} +02/25/2022 00:09:19 - INFO - codeparrot_training - Step 17433: {'lr': 0.0003829466579337061, 'samples': 8926208, 'steps': 17433, 'loss/train': 0.6870249509811401} +02/25/2022 00:09:22 - INFO - codeparrot_training - Step 17434: {'lr': 0.00038293280063399427, 'samples': 8926720, 'steps': 17434, 'loss/train': 0.9593690037727356} +02/25/2022 00:09:28 - INFO - codeparrot_training - Step 17435: {'lr': 0.00038291894276484053, 'samples': 8927232, 'steps': 17435, 'loss/train': 1.496435284614563} +02/25/2022 00:09:31 - INFO - codeparrot_training - Step 17436: {'lr': 0.0003829050843263041, 'samples': 8927744, 'steps': 17436, 'loss/train': 1.831271767616272} +02/25/2022 00:09:37 - INFO - codeparrot_training - Step 17437: {'lr': 0.0003828912253184446, 'samples': 8928256, 'steps': 17437, 'loss/train': 1.7486910820007324} +02/25/2022 00:09:40 - INFO - codeparrot_training - Step 17438: {'lr': 0.0003828773657413213, 'samples': 8928768, 'steps': 17438, 'loss/train': 2.1680681705474854} +02/25/2022 00:09:46 - INFO - codeparrot_training - Step 17439: {'lr': 0.0003828635055949935, 'samples': 8929280, 'steps': 17439, 'loss/train': 1.663074016571045} +02/25/2022 00:09:49 - INFO - codeparrot_training - Step 17440: {'lr': 0.0003828496448795207, 'samples': 8929792, 'steps': 17440, 'loss/train': 1.912155270576477} +02/25/2022 00:09:55 - INFO - codeparrot_training - Step 17441: {'lr': 0.0003828357835949622, 'samples': 8930304, 'steps': 17441, 'loss/train': 1.4286575317382812} +02/25/2022 00:09:58 - INFO - codeparrot_training - Step 17442: {'lr': 0.00038282192174137744, 'samples': 8930816, 'steps': 17442, 'loss/train': 1.6696593761444092} +02/25/2022 00:10:05 - INFO - codeparrot_training - Step 17443: {'lr': 0.00038280805931882557, 'samples': 8931328, 'steps': 17443, 'loss/train': 0.6226693391799927} +02/25/2022 00:10:08 - INFO - codeparrot_training - Step 17444: {'lr': 0.0003827941963273663, 'samples': 8931840, 'steps': 17444, 'loss/train': 1.0359865427017212} +02/25/2022 00:10:14 - INFO - codeparrot_training - Step 17445: {'lr': 0.00038278033276705875, 'samples': 8932352, 'steps': 17445, 'loss/train': 2.0010087490081787} +02/25/2022 00:10:17 - INFO - codeparrot_training - Step 17446: {'lr': 0.0003827664686379625, 'samples': 8932864, 'steps': 17446, 'loss/train': 2.163616180419922} +02/25/2022 00:10:23 - INFO - codeparrot_training - Step 17447: {'lr': 0.00038275260394013676, 'samples': 8933376, 'steps': 17447, 'loss/train': 1.1750496625900269} +02/25/2022 00:10:26 - INFO - codeparrot_training - Step 17448: {'lr': 0.0003827387386736411, 'samples': 8933888, 'steps': 17448, 'loss/train': 0.9757341146469116} +02/25/2022 00:10:32 - INFO - codeparrot_training - Step 17449: {'lr': 0.0003827248728385349, 'samples': 8934400, 'steps': 17449, 'loss/train': 1.9917367696762085} +02/25/2022 00:10:35 - INFO - codeparrot_training - Step 17450: {'lr': 0.0003827110064348773, 'samples': 8934912, 'steps': 17450, 'loss/train': 2.0245180130004883} +02/25/2022 00:10:41 - INFO - codeparrot_training - Step 17451: {'lr': 0.000382697139462728, 'samples': 8935424, 'steps': 17451, 'loss/train': 1.805024266242981} +02/25/2022 00:10:44 - INFO - codeparrot_training - Step 17452: {'lr': 0.00038268327192214635, 'samples': 8935936, 'steps': 17452, 'loss/train': 2.43996524810791} +02/25/2022 00:10:51 - INFO - codeparrot_training - Step 17453: {'lr': 0.0003826694038131916, 'samples': 8936448, 'steps': 17453, 'loss/train': 1.3299962282180786} +02/25/2022 00:10:54 - INFO - codeparrot_training - Step 17454: {'lr': 0.00038265553513592334, 'samples': 8936960, 'steps': 17454, 'loss/train': 1.845405101776123} +02/25/2022 00:10:59 - INFO - codeparrot_training - Step 17455: {'lr': 0.00038264166589040084, 'samples': 8937472, 'steps': 17455, 'loss/train': 1.8641937971115112} +02/25/2022 00:11:03 - INFO - codeparrot_training - Step 17456: {'lr': 0.00038262779607668354, 'samples': 8937984, 'steps': 17456, 'loss/train': 3.083961009979248} +02/25/2022 00:11:09 - INFO - codeparrot_training - Step 17457: {'lr': 0.00038261392569483087, 'samples': 8938496, 'steps': 17457, 'loss/train': 3.5749051570892334} +02/25/2022 00:11:12 - INFO - codeparrot_training - Step 17458: {'lr': 0.0003826000547449023, 'samples': 8939008, 'steps': 17458, 'loss/train': 3.5318398475646973} +02/25/2022 00:11:18 - INFO - codeparrot_training - Step 17459: {'lr': 0.0003825861832269571, 'samples': 8939520, 'steps': 17459, 'loss/train': 1.3561949729919434} +02/25/2022 00:11:21 - INFO - codeparrot_training - Step 17460: {'lr': 0.00038257231114105495, 'samples': 8940032, 'steps': 17460, 'loss/train': 2.32259202003479} +02/25/2022 00:11:27 - INFO - codeparrot_training - Step 17461: {'lr': 0.00038255843848725504, 'samples': 8940544, 'steps': 17461, 'loss/train': 2.3007588386535645} +02/25/2022 00:11:30 - INFO - codeparrot_training - Step 17462: {'lr': 0.0003825445652656169, 'samples': 8941056, 'steps': 17462, 'loss/train': 6.096596717834473} +02/25/2022 00:11:36 - INFO - codeparrot_training - Step 17463: {'lr': 0.00038253069147619977, 'samples': 8941568, 'steps': 17463, 'loss/train': 0.38452979922294617} +02/25/2022 00:11:39 - INFO - codeparrot_training - Step 17464: {'lr': 0.00038251681711906345, 'samples': 8942080, 'steps': 17464, 'loss/train': 1.534771203994751} +02/25/2022 00:11:45 - INFO - codeparrot_training - Step 17465: {'lr': 0.00038250294219426706, 'samples': 8942592, 'steps': 17465, 'loss/train': 1.817731261253357} +02/25/2022 00:11:48 - INFO - codeparrot_training - Step 17466: {'lr': 0.00038248906670187017, 'samples': 8943104, 'steps': 17466, 'loss/train': 2.0167102813720703} +02/25/2022 00:11:53 - INFO - codeparrot_training - Step 17467: {'lr': 0.00038247519064193216, 'samples': 8943616, 'steps': 17467, 'loss/train': 2.3912782669067383} +02/25/2022 00:11:57 - INFO - codeparrot_training - Step 17468: {'lr': 0.0003824613140145125, 'samples': 8944128, 'steps': 17468, 'loss/train': 1.724741816520691} +02/25/2022 00:12:03 - INFO - codeparrot_training - Step 17469: {'lr': 0.00038244743681967066, 'samples': 8944640, 'steps': 17469, 'loss/train': 2.1507270336151123} +02/25/2022 00:12:07 - INFO - codeparrot_training - Step 17470: {'lr': 0.000382433559057466, 'samples': 8945152, 'steps': 17470, 'loss/train': 2.123882532119751} +02/25/2022 00:12:12 - INFO - codeparrot_training - Step 17471: {'lr': 0.00038241968072795805, 'samples': 8945664, 'steps': 17471, 'loss/train': 1.9180289506912231} +02/25/2022 00:12:16 - INFO - codeparrot_training - Step 17472: {'lr': 0.00038240580183120624, 'samples': 8946176, 'steps': 17472, 'loss/train': 2.4886069297790527} +02/25/2022 00:12:21 - INFO - codeparrot_training - Step 17473: {'lr': 0.0003823919223672701, 'samples': 8946688, 'steps': 17473, 'loss/train': 1.4735954999923706} +02/25/2022 00:12:25 - INFO - codeparrot_training - Step 17474: {'lr': 0.00038237804233620887, 'samples': 8947200, 'steps': 17474, 'loss/train': 0.1073988825082779} +02/25/2022 00:12:30 - INFO - codeparrot_training - Step 17475: {'lr': 0.0003823641617380823, 'samples': 8947712, 'steps': 17475, 'loss/train': 2.577849864959717} +02/25/2022 00:12:34 - INFO - codeparrot_training - Step 17476: {'lr': 0.00038235028057294953, 'samples': 8948224, 'steps': 17476, 'loss/train': 2.058762788772583} +02/25/2022 00:12:39 - INFO - codeparrot_training - Step 17477: {'lr': 0.0003823363988408703, 'samples': 8948736, 'steps': 17477, 'loss/train': 2.0384023189544678} +02/25/2022 00:12:43 - INFO - codeparrot_training - Step 17478: {'lr': 0.00038232251654190386, 'samples': 8949248, 'steps': 17478, 'loss/train': 0.9737243056297302} +02/25/2022 00:12:49 - INFO - codeparrot_training - Step 17479: {'lr': 0.0003823086336761099, 'samples': 8949760, 'steps': 17479, 'loss/train': 0.8889243602752686} +02/25/2022 00:12:52 - INFO - codeparrot_training - Step 17480: {'lr': 0.00038229475024354766, 'samples': 8950272, 'steps': 17480, 'loss/train': 2.84980845451355} +02/25/2022 00:12:58 - INFO - codeparrot_training - Step 17481: {'lr': 0.00038228086624427675, 'samples': 8950784, 'steps': 17481, 'loss/train': 2.54441499710083} +02/25/2022 00:13:01 - INFO - codeparrot_training - Step 17482: {'lr': 0.0003822669816783566, 'samples': 8951296, 'steps': 17482, 'loss/train': 2.1469645500183105} +02/25/2022 00:13:07 - INFO - codeparrot_training - Step 17483: {'lr': 0.0003822530965458467, 'samples': 8951808, 'steps': 17483, 'loss/train': 2.187218427658081} +02/25/2022 00:13:10 - INFO - codeparrot_training - Step 17484: {'lr': 0.0003822392108468066, 'samples': 8952320, 'steps': 17484, 'loss/train': 1.8644400835037231} +02/25/2022 00:13:16 - INFO - codeparrot_training - Step 17485: {'lr': 0.00038222532458129563, 'samples': 8952832, 'steps': 17485, 'loss/train': 1.0419584512710571} +02/25/2022 00:13:19 - INFO - codeparrot_training - Step 17486: {'lr': 0.0003822114377493734, 'samples': 8953344, 'steps': 17486, 'loss/train': 1.8946095705032349} +02/25/2022 00:13:25 - INFO - codeparrot_training - Step 17487: {'lr': 0.0003821975503510993, 'samples': 8953856, 'steps': 17487, 'loss/train': 2.175102710723877} +02/25/2022 00:13:28 - INFO - codeparrot_training - Step 17488: {'lr': 0.0003821836623865329, 'samples': 8954368, 'steps': 17488, 'loss/train': 2.0778934955596924} +02/25/2022 00:13:35 - INFO - codeparrot_training - Step 17489: {'lr': 0.0003821697738557337, 'samples': 8954880, 'steps': 17489, 'loss/train': 1.0143626928329468} +02/25/2022 00:13:38 - INFO - codeparrot_training - Step 17490: {'lr': 0.00038215588475876117, 'samples': 8955392, 'steps': 17490, 'loss/train': 0.8945921063423157} +02/25/2022 00:13:44 - INFO - codeparrot_training - Step 17491: {'lr': 0.0003821419950956747, 'samples': 8955904, 'steps': 17491, 'loss/train': 2.351945638656616} +02/25/2022 00:13:48 - INFO - codeparrot_training - Step 17492: {'lr': 0.00038212810486653394, 'samples': 8956416, 'steps': 17492, 'loss/train': 1.6855456829071045} +02/25/2022 00:13:53 - INFO - codeparrot_training - Step 17493: {'lr': 0.0003821142140713983, 'samples': 8956928, 'steps': 17493, 'loss/train': 2.2223989963531494} +02/25/2022 00:13:57 - INFO - codeparrot_training - Step 17494: {'lr': 0.0003821003227103274, 'samples': 8957440, 'steps': 17494, 'loss/train': 2.366349697113037} +02/25/2022 00:14:02 - INFO - codeparrot_training - Step 17495: {'lr': 0.00038208643078338055, 'samples': 8957952, 'steps': 17495, 'loss/train': 1.8043476343154907} +02/25/2022 00:14:06 - INFO - codeparrot_training - Step 17496: {'lr': 0.0003820725382906175, 'samples': 8958464, 'steps': 17496, 'loss/train': 0.7203257083892822} +02/25/2022 00:14:11 - INFO - codeparrot_training - Step 17497: {'lr': 0.0003820586452320975, 'samples': 8958976, 'steps': 17497, 'loss/train': 1.6372029781341553} +02/25/2022 00:14:15 - INFO - codeparrot_training - Step 17498: {'lr': 0.0003820447516078803, 'samples': 8959488, 'steps': 17498, 'loss/train': 1.2810819149017334} +02/25/2022 00:14:21 - INFO - codeparrot_training - Step 17499: {'lr': 0.0003820308574180253, 'samples': 8960000, 'steps': 17499, 'loss/train': 1.4078881740570068} +02/25/2022 00:14:25 - INFO - codeparrot_training - Step 17500: {'lr': 0.000382016962662592, 'samples': 8960512, 'steps': 17500, 'loss/train': 1.9532833099365234} +02/25/2022 00:14:30 - INFO - codeparrot_training - Step 17501: {'lr': 0.0003820030673416399, 'samples': 8961024, 'steps': 17501, 'loss/train': 2.0017523765563965} +02/25/2022 00:14:34 - INFO - codeparrot_training - Step 17502: {'lr': 0.0003819891714552287, 'samples': 8961536, 'steps': 17502, 'loss/train': 2.565995693206787} +02/25/2022 00:14:39 - INFO - codeparrot_training - Step 17503: {'lr': 0.00038197527500341777, 'samples': 8962048, 'steps': 17503, 'loss/train': 1.6449755430221558} +02/25/2022 00:14:43 - INFO - codeparrot_training - Step 17504: {'lr': 0.00038196137798626663, 'samples': 8962560, 'steps': 17504, 'loss/train': 1.8359822034835815} +02/25/2022 00:14:48 - INFO - codeparrot_training - Step 17505: {'lr': 0.00038194748040383487, 'samples': 8963072, 'steps': 17505, 'loss/train': 2.4941248893737793} +02/25/2022 00:14:52 - INFO - codeparrot_training - Step 17506: {'lr': 0.00038193358225618195, 'samples': 8963584, 'steps': 17506, 'loss/train': 3.04447865486145} +02/25/2022 00:14:57 - INFO - codeparrot_training - Step 17507: {'lr': 0.0003819196835433675, 'samples': 8964096, 'steps': 17507, 'loss/train': 1.7993943691253662} +02/25/2022 00:15:01 - INFO - codeparrot_training - Step 17508: {'lr': 0.000381905784265451, 'samples': 8964608, 'steps': 17508, 'loss/train': 1.528943657875061} +02/25/2022 00:15:08 - INFO - codeparrot_training - Step 17509: {'lr': 0.000381891884422492, 'samples': 8965120, 'steps': 17509, 'loss/train': 1.7222402095794678} +02/25/2022 00:15:11 - INFO - codeparrot_training - Step 17510: {'lr': 0.0003818779840145501, 'samples': 8965632, 'steps': 17510, 'loss/train': 2.2193057537078857} +02/25/2022 00:15:16 - INFO - codeparrot_training - Step 17511: {'lr': 0.00038186408304168474, 'samples': 8966144, 'steps': 17511, 'loss/train': 1.5934960842132568} +02/25/2022 00:15:20 - INFO - codeparrot_training - Step 17512: {'lr': 0.00038185018150395557, 'samples': 8966656, 'steps': 17512, 'loss/train': 1.2100094556808472} +02/25/2022 00:15:25 - INFO - codeparrot_training - Step 17513: {'lr': 0.000381836279401422, 'samples': 8967168, 'steps': 17513, 'loss/train': 2.0431087017059326} +02/25/2022 00:15:29 - INFO - codeparrot_training - Step 17514: {'lr': 0.00038182237673414375, 'samples': 8967680, 'steps': 17514, 'loss/train': 1.8947263956069946} +02/25/2022 00:15:35 - INFO - codeparrot_training - Step 17515: {'lr': 0.0003818084735021803, 'samples': 8968192, 'steps': 17515, 'loss/train': 1.9881848096847534} +02/25/2022 00:15:38 - INFO - codeparrot_training - Step 17516: {'lr': 0.00038179456970559116, 'samples': 8968704, 'steps': 17516, 'loss/train': 1.73069167137146} +02/25/2022 00:15:44 - INFO - codeparrot_training - Step 17517: {'lr': 0.00038178066534443587, 'samples': 8969216, 'steps': 17517, 'loss/train': 1.9151536226272583} +02/25/2022 00:15:47 - INFO - codeparrot_training - Step 17518: {'lr': 0.00038176676041877424, 'samples': 8969728, 'steps': 17518, 'loss/train': 2.415066957473755} +02/25/2022 00:15:53 - INFO - codeparrot_training - Step 17519: {'lr': 0.0003817528549286655, 'samples': 8970240, 'steps': 17519, 'loss/train': 2.235966920852661} +02/25/2022 00:15:56 - INFO - codeparrot_training - Step 17520: {'lr': 0.00038173894887416946, 'samples': 8970752, 'steps': 17520, 'loss/train': 1.8851059675216675} +02/25/2022 00:16:02 - INFO - codeparrot_training - Step 17521: {'lr': 0.0003817250422553455, 'samples': 8971264, 'steps': 17521, 'loss/train': 2.310234546661377} +02/25/2022 00:16:05 - INFO - codeparrot_training - Step 17522: {'lr': 0.0003817111350722533, 'samples': 8971776, 'steps': 17522, 'loss/train': 2.544466495513916} +02/25/2022 00:16:11 - INFO - codeparrot_training - Step 17523: {'lr': 0.0003816972273249525, 'samples': 8972288, 'steps': 17523, 'loss/train': 3.050013303756714} +02/25/2022 00:16:14 - INFO - codeparrot_training - Step 17524: {'lr': 0.00038168331901350253, 'samples': 8972800, 'steps': 17524, 'loss/train': 1.6691980361938477} +02/25/2022 00:16:20 - INFO - codeparrot_training - Step 17525: {'lr': 0.0003816694101379631, 'samples': 8973312, 'steps': 17525, 'loss/train': 1.4608827829360962} +02/25/2022 00:16:24 - INFO - codeparrot_training - Step 17526: {'lr': 0.0003816555006983936, 'samples': 8973824, 'steps': 17526, 'loss/train': 1.6421570777893066} +02/25/2022 00:16:29 - INFO - codeparrot_training - Step 17527: {'lr': 0.0003816415906948538, 'samples': 8974336, 'steps': 17527, 'loss/train': 2.1995041370391846} +02/25/2022 00:16:33 - INFO - codeparrot_training - Step 17528: {'lr': 0.00038162768012740323, 'samples': 8974848, 'steps': 17528, 'loss/train': 1.6848971843719482} +02/25/2022 00:16:38 - INFO - codeparrot_training - Step 17529: {'lr': 0.00038161376899610154, 'samples': 8975360, 'steps': 17529, 'loss/train': 3.63096284866333} +02/25/2022 00:16:42 - INFO - codeparrot_training - Step 17530: {'lr': 0.0003815998573010082, 'samples': 8975872, 'steps': 17530, 'loss/train': 2.397806167602539} +02/25/2022 00:16:47 - INFO - codeparrot_training - Step 17531: {'lr': 0.0003815859450421829, 'samples': 8976384, 'steps': 17531, 'loss/train': 3.6741068363189697} +02/25/2022 00:16:51 - INFO - codeparrot_training - Step 17532: {'lr': 0.00038157203221968514, 'samples': 8976896, 'steps': 17532, 'loss/train': 2.158233165740967} +02/25/2022 00:16:57 - INFO - codeparrot_training - Step 17533: {'lr': 0.00038155811883357454, 'samples': 8977408, 'steps': 17533, 'loss/train': 1.203019380569458} +02/25/2022 00:17:00 - INFO - codeparrot_training - Step 17534: {'lr': 0.0003815442048839108, 'samples': 8977920, 'steps': 17534, 'loss/train': 1.6776306629180908} +02/25/2022 00:17:06 - INFO - codeparrot_training - Step 17535: {'lr': 0.0003815302903707534, 'samples': 8978432, 'steps': 17535, 'loss/train': 2.1549835205078125} +02/25/2022 00:17:10 - INFO - codeparrot_training - Step 17536: {'lr': 0.0003815163752941621, 'samples': 8978944, 'steps': 17536, 'loss/train': 1.0324739217758179} +02/25/2022 00:17:15 - INFO - codeparrot_training - Step 17537: {'lr': 0.00038150245965419636, 'samples': 8979456, 'steps': 17537, 'loss/train': 1.4928556680679321} +02/25/2022 00:17:19 - INFO - codeparrot_training - Step 17538: {'lr': 0.0003814885434509158, 'samples': 8979968, 'steps': 17538, 'loss/train': 1.694128394126892} +02/25/2022 00:17:24 - INFO - codeparrot_training - Step 17539: {'lr': 0.0003814746266843801, 'samples': 8980480, 'steps': 17539, 'loss/train': 2.3358724117279053} +02/25/2022 00:17:28 - INFO - codeparrot_training - Step 17540: {'lr': 0.0003814607093546489, 'samples': 8980992, 'steps': 17540, 'loss/train': 0.5446189641952515} +02/25/2022 00:17:33 - INFO - codeparrot_training - Step 17541: {'lr': 0.00038144679146178166, 'samples': 8981504, 'steps': 17541, 'loss/train': 2.15185546875} +02/25/2022 00:17:37 - INFO - codeparrot_training - Step 17542: {'lr': 0.00038143287300583816, 'samples': 8982016, 'steps': 17542, 'loss/train': 1.3598071336746216} +02/25/2022 00:17:42 - INFO - codeparrot_training - Step 17543: {'lr': 0.00038141895398687806, 'samples': 8982528, 'steps': 17543, 'loss/train': 2.9764785766601562} +02/25/2022 00:17:46 - INFO - codeparrot_training - Step 17544: {'lr': 0.0003814050344049608, 'samples': 8983040, 'steps': 17544, 'loss/train': 1.0933226346969604} +02/25/2022 00:17:52 - INFO - codeparrot_training - Step 17545: {'lr': 0.00038139111426014607, 'samples': 8983552, 'steps': 17545, 'loss/train': 1.916902780532837} +02/25/2022 00:17:55 - INFO - codeparrot_training - Step 17546: {'lr': 0.00038137719355249355, 'samples': 8984064, 'steps': 17546, 'loss/train': 1.3877387046813965} +02/25/2022 00:18:01 - INFO - codeparrot_training - Step 17547: {'lr': 0.00038136327228206285, 'samples': 8984576, 'steps': 17547, 'loss/train': 1.461612343788147} +02/25/2022 00:18:04 - INFO - codeparrot_training - Step 17548: {'lr': 0.0003813493504489136, 'samples': 8985088, 'steps': 17548, 'loss/train': 2.3623855113983154} +02/25/2022 00:18:10 - INFO - codeparrot_training - Step 17549: {'lr': 0.0003813354280531055, 'samples': 8985600, 'steps': 17549, 'loss/train': 1.20237135887146} +02/25/2022 00:18:13 - INFO - codeparrot_training - Step 17550: {'lr': 0.00038132150509469806, 'samples': 8986112, 'steps': 17550, 'loss/train': 2.5042812824249268} +02/25/2022 00:18:19 - INFO - codeparrot_training - Step 17551: {'lr': 0.000381307581573751, 'samples': 8986624, 'steps': 17551, 'loss/train': 1.842660665512085} +02/25/2022 00:18:22 - INFO - codeparrot_training - Step 17552: {'lr': 0.00038129365749032395, 'samples': 8987136, 'steps': 17552, 'loss/train': 3.0439553260803223} +02/25/2022 00:18:28 - INFO - codeparrot_training - Step 17553: {'lr': 0.0003812797328444766, 'samples': 8987648, 'steps': 17553, 'loss/train': 2.318154811859131} +02/25/2022 00:18:31 - INFO - codeparrot_training - Step 17554: {'lr': 0.0003812658076362685, 'samples': 8988160, 'steps': 17554, 'loss/train': 2.430816411972046} +02/25/2022 00:18:37 - INFO - codeparrot_training - Step 17555: {'lr': 0.00038125188186575944, 'samples': 8988672, 'steps': 17555, 'loss/train': 2.5937442779541016} +02/25/2022 00:18:40 - INFO - codeparrot_training - Step 17556: {'lr': 0.00038123795553300893, 'samples': 8989184, 'steps': 17556, 'loss/train': 1.7834750413894653} +02/25/2022 00:18:46 - INFO - codeparrot_training - Step 17557: {'lr': 0.0003812240286380767, 'samples': 8989696, 'steps': 17557, 'loss/train': 1.567370891571045} +02/25/2022 00:18:50 - INFO - codeparrot_training - Step 17558: {'lr': 0.0003812101011810224, 'samples': 8990208, 'steps': 17558, 'loss/train': 2.460343360900879} +02/25/2022 00:18:58 - INFO - codeparrot_training - Step 17559: {'lr': 0.0003811961731619057, 'samples': 8990720, 'steps': 17559, 'loss/train': 2.5448851585388184} +02/25/2022 00:19:01 - INFO - codeparrot_training - Step 17560: {'lr': 0.0003811822445807863, 'samples': 8991232, 'steps': 17560, 'loss/train': 1.2904130220413208} +02/25/2022 00:19:07 - INFO - codeparrot_training - Step 17561: {'lr': 0.00038116831543772377, 'samples': 8991744, 'steps': 17561, 'loss/train': 2.130690097808838} +02/25/2022 00:19:10 - INFO - codeparrot_training - Step 17562: {'lr': 0.00038115438573277784, 'samples': 8992256, 'steps': 17562, 'loss/train': 2.2204508781433105} +02/25/2022 00:19:16 - INFO - codeparrot_training - Step 17563: {'lr': 0.0003811404554660082, 'samples': 8992768, 'steps': 17563, 'loss/train': 2.2739343643188477} +02/25/2022 00:19:19 - INFO - codeparrot_training - Step 17564: {'lr': 0.00038112652463747444, 'samples': 8993280, 'steps': 17564, 'loss/train': 2.316910982131958} +02/25/2022 00:19:25 - INFO - codeparrot_training - Step 17565: {'lr': 0.00038111259324723624, 'samples': 8993792, 'steps': 17565, 'loss/train': 1.7901859283447266} +02/25/2022 00:19:28 - INFO - codeparrot_training - Step 17566: {'lr': 0.0003810986612953534, 'samples': 8994304, 'steps': 17566, 'loss/train': 1.536866307258606} +02/25/2022 00:19:34 - INFO - codeparrot_training - Step 17567: {'lr': 0.0003810847287818855, 'samples': 8994816, 'steps': 17567, 'loss/train': 2.2681477069854736} +02/25/2022 00:19:37 - INFO - codeparrot_training - Step 17568: {'lr': 0.0003810707957068923, 'samples': 8995328, 'steps': 17568, 'loss/train': 2.4871232509613037} +02/25/2022 00:19:41 - INFO - codeparrot_training - Step 17569: {'lr': 0.0003810568620704334, 'samples': 8995840, 'steps': 17569, 'loss/train': 2.3494536876678467} +02/25/2022 00:19:48 - INFO - codeparrot_training - Step 17570: {'lr': 0.00038104292787256844, 'samples': 8996352, 'steps': 17570, 'loss/train': 2.346104860305786} +02/25/2022 00:19:52 - INFO - codeparrot_training - Step 17571: {'lr': 0.0003810289931133573, 'samples': 8996864, 'steps': 17571, 'loss/train': 2.085747241973877} +02/25/2022 00:19:57 - INFO - codeparrot_training - Step 17572: {'lr': 0.0003810150577928595, 'samples': 8997376, 'steps': 17572, 'loss/train': 1.8231298923492432} +02/25/2022 00:20:03 - INFO - codeparrot_training - Step 17573: {'lr': 0.0003810011219111348, 'samples': 8997888, 'steps': 17573, 'loss/train': 2.469087600708008} +02/25/2022 00:20:06 - INFO - codeparrot_training - Step 17574: {'lr': 0.00038098718546824287, 'samples': 8998400, 'steps': 17574, 'loss/train': 1.6692852973937988} +02/25/2022 00:20:10 - INFO - codeparrot_training - Step 17575: {'lr': 0.00038097324846424354, 'samples': 8998912, 'steps': 17575, 'loss/train': 2.06733775138855} +02/25/2022 00:20:15 - INFO - codeparrot_training - Step 17576: {'lr': 0.0003809593108991962, 'samples': 8999424, 'steps': 17576, 'loss/train': 1.8917266130447388} +02/25/2022 00:20:19 - INFO - codeparrot_training - Step 17577: {'lr': 0.0003809453727731609, 'samples': 8999936, 'steps': 17577, 'loss/train': 1.5943111181259155} +02/25/2022 00:20:24 - INFO - codeparrot_training - Step 17578: {'lr': 0.00038093143408619726, 'samples': 9000448, 'steps': 17578, 'loss/train': 1.589393973350525} +02/25/2022 00:20:28 - INFO - codeparrot_training - Step 17579: {'lr': 0.0003809174948383648, 'samples': 9000960, 'steps': 17579, 'loss/train': 2.8796637058258057} +02/25/2022 00:20:36 - INFO - codeparrot_training - Step 17580: {'lr': 0.0003809035550297234, 'samples': 9001472, 'steps': 17580, 'loss/train': 1.3443183898925781} +02/25/2022 00:20:39 - INFO - codeparrot_training - Step 17581: {'lr': 0.00038088961466033276, 'samples': 9001984, 'steps': 17581, 'loss/train': 2.3200275897979736} +02/25/2022 00:20:45 - INFO - codeparrot_training - Step 17582: {'lr': 0.00038087567373025255, 'samples': 9002496, 'steps': 17582, 'loss/train': 2.038069486618042} +02/25/2022 00:20:48 - INFO - codeparrot_training - Step 17583: {'lr': 0.0003808617322395425, 'samples': 9003008, 'steps': 17583, 'loss/train': 1.5535337924957275} +02/25/2022 00:20:54 - INFO - codeparrot_training - Step 17584: {'lr': 0.00038084779018826245, 'samples': 9003520, 'steps': 17584, 'loss/train': 1.5001914501190186} +02/25/2022 00:20:57 - INFO - codeparrot_training - Step 17585: {'lr': 0.00038083384757647186, 'samples': 9004032, 'steps': 17585, 'loss/train': 2.75010085105896} +02/25/2022 00:21:03 - INFO - codeparrot_training - Step 17586: {'lr': 0.0003808199044042308, 'samples': 9004544, 'steps': 17586, 'loss/train': 2.0771737098693848} +02/25/2022 00:21:06 - INFO - codeparrot_training - Step 17587: {'lr': 0.00038080596067159865, 'samples': 9005056, 'steps': 17587, 'loss/train': 2.6117703914642334} +02/25/2022 00:21:12 - INFO - codeparrot_training - Step 17588: {'lr': 0.0003807920163786353, 'samples': 9005568, 'steps': 17588, 'loss/train': 1.8987400531768799} +02/25/2022 00:21:15 - INFO - codeparrot_training - Step 17589: {'lr': 0.0003807780715254006, 'samples': 9006080, 'steps': 17589, 'loss/train': 1.979446291923523} +02/25/2022 00:21:21 - INFO - codeparrot_training - Step 17590: {'lr': 0.000380764126111954, 'samples': 9006592, 'steps': 17590, 'loss/train': 2.4507508277893066} +02/25/2022 00:21:24 - INFO - codeparrot_training - Step 17591: {'lr': 0.0003807501801383555, 'samples': 9007104, 'steps': 17591, 'loss/train': 2.1036577224731445} +02/25/2022 00:21:32 - INFO - codeparrot_training - Step 17592: {'lr': 0.0003807362336046648, 'samples': 9007616, 'steps': 17592, 'loss/train': 2.216003656387329} +02/25/2022 00:21:35 - INFO - codeparrot_training - Step 17593: {'lr': 0.00038072228651094155, 'samples': 9008128, 'steps': 17593, 'loss/train': 1.699267029762268} +02/25/2022 00:21:41 - INFO - codeparrot_training - Step 17594: {'lr': 0.0003807083388572455, 'samples': 9008640, 'steps': 17594, 'loss/train': 2.1328063011169434} +02/25/2022 00:21:44 - INFO - codeparrot_training - Step 17595: {'lr': 0.0003806943906436364, 'samples': 9009152, 'steps': 17595, 'loss/train': 2.195122003555298} +02/25/2022 00:21:50 - INFO - codeparrot_training - Step 17596: {'lr': 0.0003806804418701741, 'samples': 9009664, 'steps': 17596, 'loss/train': 1.8058260679244995} +02/25/2022 00:21:53 - INFO - codeparrot_training - Step 17597: {'lr': 0.0003806664925369183, 'samples': 9010176, 'steps': 17597, 'loss/train': 1.5706242322921753} +02/25/2022 00:21:59 - INFO - codeparrot_training - Step 17598: {'lr': 0.0003806525426439287, 'samples': 9010688, 'steps': 17598, 'loss/train': 1.4891386032104492} +02/25/2022 00:22:02 - INFO - codeparrot_training - Step 17599: {'lr': 0.00038063859219126514, 'samples': 9011200, 'steps': 17599, 'loss/train': 1.4190083742141724} +02/25/2022 00:22:09 - INFO - codeparrot_training - Step 17600: {'lr': 0.0003806246411789872, 'samples': 9011712, 'steps': 17600, 'loss/train': 1.6553376913070679} +02/25/2022 00:22:12 - INFO - codeparrot_training - Step 17601: {'lr': 0.00038061068960715494, 'samples': 9012224, 'steps': 17601, 'loss/train': 1.555208683013916} +02/25/2022 00:22:16 - INFO - codeparrot_training - Step 17602: {'lr': 0.00038059673747582783, 'samples': 9012736, 'steps': 17602, 'loss/train': 0.843410849571228} +02/25/2022 00:22:21 - INFO - codeparrot_training - Step 17603: {'lr': 0.00038058278478506584, 'samples': 9013248, 'steps': 17603, 'loss/train': 0.4604855477809906} +02/25/2022 00:22:25 - INFO - codeparrot_training - Step 17604: {'lr': 0.0003805688315349286, 'samples': 9013760, 'steps': 17604, 'loss/train': 1.3503074645996094} +02/25/2022 00:22:30 - INFO - codeparrot_training - Step 17605: {'lr': 0.00038055487772547603, 'samples': 9014272, 'steps': 17605, 'loss/train': 1.3851650953292847} +02/25/2022 00:22:34 - INFO - codeparrot_training - Step 17606: {'lr': 0.00038054092335676774, 'samples': 9014784, 'steps': 17606, 'loss/train': 2.7930288314819336} +02/25/2022 00:22:41 - INFO - codeparrot_training - Step 17607: {'lr': 0.00038052696842886364, 'samples': 9015296, 'steps': 17607, 'loss/train': 1.7360361814498901} +02/25/2022 00:22:45 - INFO - codeparrot_training - Step 17608: {'lr': 0.0003805130129418235, 'samples': 9015808, 'steps': 17608, 'loss/train': 1.829670786857605} +02/25/2022 00:22:50 - INFO - codeparrot_training - Step 17609: {'lr': 0.00038049905689570697, 'samples': 9016320, 'steps': 17609, 'loss/train': 2.2178890705108643} +02/25/2022 00:22:54 - INFO - codeparrot_training - Step 17610: {'lr': 0.00038048510029057393, 'samples': 9016832, 'steps': 17610, 'loss/train': 1.4339778423309326} +02/25/2022 00:22:59 - INFO - codeparrot_training - Step 17611: {'lr': 0.00038047114312648414, 'samples': 9017344, 'steps': 17611, 'loss/train': 2.061182975769043} +02/25/2022 00:23:05 - INFO - codeparrot_training - Step 17612: {'lr': 0.0003804571854034975, 'samples': 9017856, 'steps': 17612, 'loss/train': 1.550879716873169} +02/25/2022 00:23:08 - INFO - codeparrot_training - Step 17613: {'lr': 0.0003804432271216736, 'samples': 9018368, 'steps': 17613, 'loss/train': 1.9707456827163696} +02/25/2022 00:23:12 - INFO - codeparrot_training - Step 17614: {'lr': 0.0003804292682810724, 'samples': 9018880, 'steps': 17614, 'loss/train': 2.291849136352539} +02/25/2022 00:23:17 - INFO - codeparrot_training - Step 17615: {'lr': 0.00038041530888175356, 'samples': 9019392, 'steps': 17615, 'loss/train': 2.577650785446167} +02/25/2022 00:23:21 - INFO - codeparrot_training - Step 17616: {'lr': 0.00038040134892377696, 'samples': 9019904, 'steps': 17616, 'loss/train': 2.0950605869293213} +02/25/2022 00:23:28 - INFO - codeparrot_training - Step 17617: {'lr': 0.00038038738840720244, 'samples': 9020416, 'steps': 17617, 'loss/train': 2.4005303382873535} +02/25/2022 00:23:32 - INFO - codeparrot_training - Step 17618: {'lr': 0.0003803734273320897, 'samples': 9020928, 'steps': 17618, 'loss/train': 1.9828367233276367} +02/25/2022 00:23:37 - INFO - codeparrot_training - Step 17619: {'lr': 0.0003803594656984986, 'samples': 9021440, 'steps': 17619, 'loss/train': 2.0146870613098145} +02/25/2022 00:23:41 - INFO - codeparrot_training - Step 17620: {'lr': 0.000380345503506489, 'samples': 9021952, 'steps': 17620, 'loss/train': 2.2994463443756104} +02/25/2022 00:23:46 - INFO - codeparrot_training - Step 17621: {'lr': 0.00038033154075612063, 'samples': 9022464, 'steps': 17621, 'loss/train': 2.2996325492858887} +02/25/2022 00:23:50 - INFO - codeparrot_training - Step 17622: {'lr': 0.00038031757744745327, 'samples': 9022976, 'steps': 17622, 'loss/train': 2.190804958343506} +02/25/2022 00:23:55 - INFO - codeparrot_training - Step 17623: {'lr': 0.0003803036135805469, 'samples': 9023488, 'steps': 17623, 'loss/train': 2.294494152069092} +02/25/2022 00:23:59 - INFO - codeparrot_training - Step 17624: {'lr': 0.00038028964915546107, 'samples': 9024000, 'steps': 17624, 'loss/train': 1.5395901203155518} +02/25/2022 00:24:04 - INFO - codeparrot_training - Step 17625: {'lr': 0.00038027568417225586, 'samples': 9024512, 'steps': 17625, 'loss/train': 1.272788643836975} +02/25/2022 00:24:08 - INFO - codeparrot_training - Step 17626: {'lr': 0.00038026171863099093, 'samples': 9025024, 'steps': 17626, 'loss/train': 1.6162992715835571} +02/25/2022 00:24:15 - INFO - codeparrot_training - Step 17627: {'lr': 0.0003802477525317263, 'samples': 9025536, 'steps': 17627, 'loss/train': 2.5885162353515625} +02/25/2022 00:24:19 - INFO - codeparrot_training - Step 17628: {'lr': 0.00038023378587452144, 'samples': 9026048, 'steps': 17628, 'loss/train': 0.10160930454730988} +02/25/2022 00:24:24 - INFO - codeparrot_training - Step 17629: {'lr': 0.0003802198186594366, 'samples': 9026560, 'steps': 17629, 'loss/train': 2.385128974914551} +02/25/2022 00:24:27 - INFO - codeparrot_training - Step 17630: {'lr': 0.00038020585088653126, 'samples': 9027072, 'steps': 17630, 'loss/train': 1.407119631767273} +02/25/2022 00:24:33 - INFO - codeparrot_training - Step 17631: {'lr': 0.00038019188255586546, 'samples': 9027584, 'steps': 17631, 'loss/train': 1.7080103158950806} +02/25/2022 00:24:39 - INFO - codeparrot_training - Step 17632: {'lr': 0.00038017791366749896, 'samples': 9028096, 'steps': 17632, 'loss/train': 1.6353788375854492} +02/25/2022 00:24:42 - INFO - codeparrot_training - Step 17633: {'lr': 0.0003801639442214916, 'samples': 9028608, 'steps': 17633, 'loss/train': 1.9614137411117554} +02/25/2022 00:24:46 - INFO - codeparrot_training - Step 17634: {'lr': 0.0003801499742179033, 'samples': 9029120, 'steps': 17634, 'loss/train': 2.6898295879364014} +02/25/2022 00:24:51 - INFO - codeparrot_training - Step 17635: {'lr': 0.0003801360036567938, 'samples': 9029632, 'steps': 17635, 'loss/train': 1.8056670427322388} +02/25/2022 00:24:55 - INFO - codeparrot_training - Step 17636: {'lr': 0.000380122032538223, 'samples': 9030144, 'steps': 17636, 'loss/train': 2.6003236770629883} +02/25/2022 00:25:00 - INFO - codeparrot_training - Step 17637: {'lr': 0.0003801080608622507, 'samples': 9030656, 'steps': 17637, 'loss/train': 0.978050172328949} +02/25/2022 00:25:04 - INFO - codeparrot_training - Step 17638: {'lr': 0.0003800940886289368, 'samples': 9031168, 'steps': 17638, 'loss/train': 1.6452912092208862} +02/25/2022 00:25:11 - INFO - codeparrot_training - Step 17639: {'lr': 0.0003800801158383411, 'samples': 9031680, 'steps': 17639, 'loss/train': 1.1491564512252808} +02/25/2022 00:25:15 - INFO - codeparrot_training - Step 17640: {'lr': 0.00038006614249052353, 'samples': 9032192, 'steps': 17640, 'loss/train': 2.4591915607452393} +02/25/2022 00:25:20 - INFO - codeparrot_training - Step 17641: {'lr': 0.0003800521685855439, 'samples': 9032704, 'steps': 17641, 'loss/train': 0.5061256289482117} +02/25/2022 00:25:24 - INFO - codeparrot_training - Step 17642: {'lr': 0.000380038194123462, 'samples': 9033216, 'steps': 17642, 'loss/train': 2.544386148452759} +02/25/2022 00:25:29 - INFO - codeparrot_training - Step 17643: {'lr': 0.0003800242191043379, 'samples': 9033728, 'steps': 17643, 'loss/train': 3.2017595767974854} +02/25/2022 00:25:33 - INFO - codeparrot_training - Step 17644: {'lr': 0.00038001024352823123, 'samples': 9034240, 'steps': 17644, 'loss/train': 2.2780935764312744} +02/25/2022 00:25:38 - INFO - codeparrot_training - Step 17645: {'lr': 0.00037999626739520197, 'samples': 9034752, 'steps': 17645, 'loss/train': 2.3560986518859863} +02/25/2022 00:25:42 - INFO - codeparrot_training - Step 17646: {'lr': 0.00037998229070531, 'samples': 9035264, 'steps': 17646, 'loss/train': 1.8259999752044678} +02/25/2022 00:25:47 - INFO - codeparrot_training - Step 17647: {'lr': 0.0003799683134586152, 'samples': 9035776, 'steps': 17647, 'loss/train': 2.271980047225952} +02/25/2022 00:25:51 - INFO - codeparrot_training - Step 17648: {'lr': 0.0003799543356551773, 'samples': 9036288, 'steps': 17648, 'loss/train': 1.0147898197174072} +02/25/2022 00:25:56 - INFO - codeparrot_training - Step 17649: {'lr': 0.0003799403572950565, 'samples': 9036800, 'steps': 17649, 'loss/train': 1.4735788106918335} +02/25/2022 00:26:00 - INFO - codeparrot_training - Step 17650: {'lr': 0.00037992637837831235, 'samples': 9037312, 'steps': 17650, 'loss/train': 1.616865873336792} +02/25/2022 00:26:05 - INFO - codeparrot_training - Step 17651: {'lr': 0.00037991239890500483, 'samples': 9037824, 'steps': 17651, 'loss/train': 0.1368354707956314} +02/25/2022 00:26:09 - INFO - codeparrot_training - Step 17652: {'lr': 0.00037989841887519385, 'samples': 9038336, 'steps': 17652, 'loss/train': 1.9762451648712158} +02/25/2022 00:26:14 - INFO - codeparrot_training - Step 17653: {'lr': 0.00037988443828893936, 'samples': 9038848, 'steps': 17653, 'loss/train': 2.2848517894744873} +02/25/2022 00:26:18 - INFO - codeparrot_training - Step 17654: {'lr': 0.0003798704571463011, 'samples': 9039360, 'steps': 17654, 'loss/train': 2.704603433609009} +02/25/2022 00:26:25 - INFO - codeparrot_training - Step 17655: {'lr': 0.00037985647544733903, 'samples': 9039872, 'steps': 17655, 'loss/train': 2.0447399616241455} +02/25/2022 00:26:29 - INFO - codeparrot_training - Step 17656: {'lr': 0.0003798424931921131, 'samples': 9040384, 'steps': 17656, 'loss/train': 1.9364594221115112} +02/25/2022 00:26:34 - INFO - codeparrot_training - Step 17657: {'lr': 0.0003798285103806831, 'samples': 9040896, 'steps': 17657, 'loss/train': 1.139986515045166} +02/25/2022 00:26:38 - INFO - codeparrot_training - Step 17658: {'lr': 0.0003798145270131091, 'samples': 9041408, 'steps': 17658, 'loss/train': 0.7799100279808044} +02/25/2022 00:26:43 - INFO - codeparrot_training - Step 17659: {'lr': 0.00037980054308945076, 'samples': 9041920, 'steps': 17659, 'loss/train': 2.321681261062622} +02/25/2022 00:26:47 - INFO - codeparrot_training - Step 17660: {'lr': 0.00037978655860976826, 'samples': 9042432, 'steps': 17660, 'loss/train': 1.8950034379959106} +02/25/2022 00:26:52 - INFO - codeparrot_training - Step 17661: {'lr': 0.0003797725735741212, 'samples': 9042944, 'steps': 17661, 'loss/train': 1.894861102104187} +02/25/2022 00:26:59 - INFO - codeparrot_training - Step 17662: {'lr': 0.0003797585879825698, 'samples': 9043456, 'steps': 17662, 'loss/train': 1.2853599786758423} +02/25/2022 00:27:03 - INFO - codeparrot_training - Step 17663: {'lr': 0.00037974460183517366, 'samples': 9043968, 'steps': 17663, 'loss/train': 1.0120782852172852} +02/25/2022 00:27:08 - INFO - codeparrot_training - Step 17664: {'lr': 0.0003797306151319929, 'samples': 9044480, 'steps': 17664, 'loss/train': 1.5664876699447632} +02/25/2022 00:27:12 - INFO - codeparrot_training - Step 17665: {'lr': 0.00037971662787308734, 'samples': 9044992, 'steps': 17665, 'loss/train': 0.9982131719589233} +02/25/2022 00:27:18 - INFO - codeparrot_training - Step 17666: {'lr': 0.00037970264005851703, 'samples': 9045504, 'steps': 17666, 'loss/train': 3.467337131500244} +02/25/2022 00:27:22 - INFO - codeparrot_training - Step 17667: {'lr': 0.0003796886516883418, 'samples': 9046016, 'steps': 17667, 'loss/train': 1.6415878534317017} +02/25/2022 00:27:25 - INFO - codeparrot_training - Step 17668: {'lr': 0.0003796746627626214, 'samples': 9046528, 'steps': 17668, 'loss/train': 3.6356019973754883} +02/25/2022 00:27:31 - INFO - codeparrot_training - Step 17669: {'lr': 0.00037966067328141606, 'samples': 9047040, 'steps': 17669, 'loss/train': 2.9348771572113037} +02/25/2022 00:27:34 - INFO - codeparrot_training - Step 17670: {'lr': 0.0003796466832447856, 'samples': 9047552, 'steps': 17670, 'loss/train': 2.1387054920196533} +02/25/2022 00:27:40 - INFO - codeparrot_training - Step 17671: {'lr': 0.00037963269265278986, 'samples': 9048064, 'steps': 17671, 'loss/train': 2.7863388061523438} +02/25/2022 00:27:43 - INFO - codeparrot_training - Step 17672: {'lr': 0.0003796187015054888, 'samples': 9048576, 'steps': 17672, 'loss/train': 1.904708981513977} +02/25/2022 00:27:51 - INFO - codeparrot_training - Step 17673: {'lr': 0.0003796047098029424, 'samples': 9049088, 'steps': 17673, 'loss/train': 1.4503318071365356} +02/25/2022 00:27:54 - INFO - codeparrot_training - Step 17674: {'lr': 0.0003795907175452106, 'samples': 9049600, 'steps': 17674, 'loss/train': 0.6155263185501099} +02/25/2022 00:28:00 - INFO - codeparrot_training - Step 17675: {'lr': 0.0003795767247323533, 'samples': 9050112, 'steps': 17675, 'loss/train': 2.0112037658691406} +02/25/2022 00:28:03 - INFO - codeparrot_training - Step 17676: {'lr': 0.00037956273136443056, 'samples': 9050624, 'steps': 17676, 'loss/train': 1.6704438924789429} +02/25/2022 00:28:09 - INFO - codeparrot_training - Step 17677: {'lr': 0.000379548737441502, 'samples': 9051136, 'steps': 17677, 'loss/train': 3.0951290130615234} +02/25/2022 00:28:12 - INFO - codeparrot_training - Step 17678: {'lr': 0.00037953474296362796, 'samples': 9051648, 'steps': 17678, 'loss/train': 2.1795482635498047} +02/25/2022 00:28:18 - INFO - codeparrot_training - Step 17679: {'lr': 0.0003795207479308681, 'samples': 9052160, 'steps': 17679, 'loss/train': 2.0755298137664795} +02/25/2022 00:28:21 - INFO - codeparrot_training - Step 17680: {'lr': 0.00037950675234328256, 'samples': 9052672, 'steps': 17680, 'loss/train': 2.460949182510376} +02/25/2022 00:28:27 - INFO - codeparrot_training - Step 17681: {'lr': 0.00037949275620093124, 'samples': 9053184, 'steps': 17681, 'loss/train': 0.4283615052700043} +02/25/2022 00:28:30 - INFO - codeparrot_training - Step 17682: {'lr': 0.000379478759503874, 'samples': 9053696, 'steps': 17682, 'loss/train': 1.1838616132736206} +02/25/2022 00:28:36 - INFO - codeparrot_training - Step 17683: {'lr': 0.00037946476225217087, 'samples': 9054208, 'steps': 17683, 'loss/train': 0.9548600912094116} +02/25/2022 00:28:39 - INFO - codeparrot_training - Step 17684: {'lr': 0.0003794507644458819, 'samples': 9054720, 'steps': 17684, 'loss/train': 1.411368727684021} +02/25/2022 00:28:46 - INFO - codeparrot_training - Step 17685: {'lr': 0.00037943676608506683, 'samples': 9055232, 'steps': 17685, 'loss/train': 1.7294789552688599} +02/25/2022 00:28:50 - INFO - codeparrot_training - Step 17686: {'lr': 0.00037942276716978584, 'samples': 9055744, 'steps': 17686, 'loss/train': 1.638486385345459} +02/25/2022 00:28:55 - INFO - codeparrot_training - Step 17687: {'lr': 0.0003794087677000988, 'samples': 9056256, 'steps': 17687, 'loss/train': 2.057386636734009} +02/25/2022 00:28:59 - INFO - codeparrot_training - Step 17688: {'lr': 0.0003793947676760657, 'samples': 9056768, 'steps': 17688, 'loss/train': 1.859421730041504} +02/25/2022 00:29:04 - INFO - codeparrot_training - Step 17689: {'lr': 0.00037938076709774645, 'samples': 9057280, 'steps': 17689, 'loss/train': 2.4088358879089355} +02/25/2022 00:29:08 - INFO - codeparrot_training - Step 17690: {'lr': 0.0003793667659652011, 'samples': 9057792, 'steps': 17690, 'loss/train': 2.4362845420837402} +02/25/2022 00:29:13 - INFO - codeparrot_training - Step 17691: {'lr': 0.0003793527642784896, 'samples': 9058304, 'steps': 17691, 'loss/train': 2.44637131690979} +02/25/2022 00:29:17 - INFO - codeparrot_training - Step 17692: {'lr': 0.0003793387620376719, 'samples': 9058816, 'steps': 17692, 'loss/train': 1.7292999029159546} +02/25/2022 00:29:22 - INFO - codeparrot_training - Step 17693: {'lr': 0.0003793247592428081, 'samples': 9059328, 'steps': 17693, 'loss/train': 1.4550867080688477} +02/25/2022 00:29:26 - INFO - codeparrot_training - Step 17694: {'lr': 0.00037931075589395805, 'samples': 9059840, 'steps': 17694, 'loss/train': 1.7441051006317139} +02/25/2022 00:29:31 - INFO - codeparrot_training - Step 17695: {'lr': 0.00037929675199118183, 'samples': 9060352, 'steps': 17695, 'loss/train': 2.029517889022827} +02/25/2022 00:29:35 - INFO - codeparrot_training - Step 17696: {'lr': 0.0003792827475345393, 'samples': 9060864, 'steps': 17696, 'loss/train': 1.2710902690887451} +02/25/2022 00:29:40 - INFO - codeparrot_training - Step 17697: {'lr': 0.0003792687425240906, 'samples': 9061376, 'steps': 17697, 'loss/train': 1.5868641138076782} +02/25/2022 00:29:44 - INFO - codeparrot_training - Step 17698: {'lr': 0.0003792547369598956, 'samples': 9061888, 'steps': 17698, 'loss/train': 1.3753433227539062} +02/25/2022 00:29:51 - INFO - codeparrot_training - Step 17699: {'lr': 0.0003792407308420144, 'samples': 9062400, 'steps': 17699, 'loss/train': 1.7627010345458984} +02/25/2022 00:29:55 - INFO - codeparrot_training - Step 17700: {'lr': 0.00037922672417050685, 'samples': 9062912, 'steps': 17700, 'loss/train': 1.804733395576477} +02/25/2022 00:30:00 - INFO - codeparrot_training - Step 17701: {'lr': 0.00037921271694543317, 'samples': 9063424, 'steps': 17701, 'loss/train': 1.8018553256988525} +02/25/2022 00:30:04 - INFO - codeparrot_training - Step 17702: {'lr': 0.0003791987091668532, 'samples': 9063936, 'steps': 17702, 'loss/train': 1.9875469207763672} +02/25/2022 00:30:09 - INFO - codeparrot_training - Step 17703: {'lr': 0.00037918470083482693, 'samples': 9064448, 'steps': 17703, 'loss/train': 1.7927709817886353} +02/25/2022 00:30:13 - INFO - codeparrot_training - Step 17704: {'lr': 0.0003791706919494145, 'samples': 9064960, 'steps': 17704, 'loss/train': 2.337580680847168} +02/25/2022 00:30:18 - INFO - codeparrot_training - Step 17705: {'lr': 0.0003791566825106758, 'samples': 9065472, 'steps': 17705, 'loss/train': 2.726628065109253} +02/25/2022 00:30:22 - INFO - codeparrot_training - Step 17706: {'lr': 0.0003791426725186709, 'samples': 9065984, 'steps': 17706, 'loss/train': 2.2806732654571533} +02/25/2022 00:30:27 - INFO - codeparrot_training - Step 17707: {'lr': 0.0003791286619734597, 'samples': 9066496, 'steps': 17707, 'loss/train': 1.6257905960083008} +02/25/2022 00:30:31 - INFO - codeparrot_training - Step 17708: {'lr': 0.0003791146508751025, 'samples': 9067008, 'steps': 17708, 'loss/train': 0.14708131551742554} +02/25/2022 00:30:37 - INFO - codeparrot_training - Step 17709: {'lr': 0.00037910063922365903, 'samples': 9067520, 'steps': 17709, 'loss/train': 1.544282078742981} +02/25/2022 00:30:40 - INFO - codeparrot_training - Step 17710: {'lr': 0.00037908662701918944, 'samples': 9068032, 'steps': 17710, 'loss/train': 2.2766592502593994} +02/25/2022 00:30:43 - INFO - codeparrot_training - Step 17711: {'lr': 0.00037907261426175365, 'samples': 9068544, 'steps': 17711, 'loss/train': 2.3601431846618652} +02/25/2022 00:30:51 - INFO - codeparrot_training - Step 17712: {'lr': 0.0003790586009514119, 'samples': 9069056, 'steps': 17712, 'loss/train': 1.7010282278060913} +02/25/2022 00:30:54 - INFO - codeparrot_training - Step 17713: {'lr': 0.000379044587088224, 'samples': 9069568, 'steps': 17713, 'loss/train': 2.3169925212860107} +02/25/2022 00:31:00 - INFO - codeparrot_training - Step 17714: {'lr': 0.0003790305726722501, 'samples': 9070080, 'steps': 17714, 'loss/train': 1.6872769594192505} +02/25/2022 00:31:06 - INFO - codeparrot_training - Step 17715: {'lr': 0.00037901655770355015, 'samples': 9070592, 'steps': 17715, 'loss/train': 1.6401950120925903} +02/25/2022 00:31:09 - INFO - codeparrot_training - Step 17716: {'lr': 0.0003790025421821843, 'samples': 9071104, 'steps': 17716, 'loss/train': 2.0675594806671143} +02/25/2022 00:31:12 - INFO - codeparrot_training - Step 17717: {'lr': 0.0003789885261082124, 'samples': 9071616, 'steps': 17717, 'loss/train': 1.3518098592758179} +02/25/2022 00:31:18 - INFO - codeparrot_training - Step 17718: {'lr': 0.00037897450948169476, 'samples': 9072128, 'steps': 17718, 'loss/train': 2.6437594890594482} +02/25/2022 00:31:21 - INFO - codeparrot_training - Step 17719: {'lr': 0.0003789604923026912, 'samples': 9072640, 'steps': 17719, 'loss/train': 1.2582480907440186} +02/25/2022 00:31:27 - INFO - codeparrot_training - Step 17720: {'lr': 0.00037894647457126186, 'samples': 9073152, 'steps': 17720, 'loss/train': 1.8962912559509277} +02/25/2022 00:31:30 - INFO - codeparrot_training - Step 17721: {'lr': 0.0003789324562874668, 'samples': 9073664, 'steps': 17721, 'loss/train': 2.2809383869171143} +02/25/2022 00:31:38 - INFO - codeparrot_training - Step 17722: {'lr': 0.000378918437451366, 'samples': 9074176, 'steps': 17722, 'loss/train': 2.389406681060791} +02/25/2022 00:31:41 - INFO - codeparrot_training - Step 17723: {'lr': 0.00037890441806301954, 'samples': 9074688, 'steps': 17723, 'loss/train': 2.9667856693267822} +02/25/2022 00:31:47 - INFO - codeparrot_training - Step 17724: {'lr': 0.0003788903981224875, 'samples': 9075200, 'steps': 17724, 'loss/train': 1.3824249505996704} +02/25/2022 00:31:50 - INFO - codeparrot_training - Step 17725: {'lr': 0.00037887637762982996, 'samples': 9075712, 'steps': 17725, 'loss/train': 1.8328057527542114} +02/25/2022 00:31:56 - INFO - codeparrot_training - Step 17726: {'lr': 0.0003788623565851068, 'samples': 9076224, 'steps': 17726, 'loss/train': 1.9919962882995605} +02/25/2022 00:31:59 - INFO - codeparrot_training - Step 17727: {'lr': 0.00037884833498837833, 'samples': 9076736, 'steps': 17727, 'loss/train': 2.372586965560913} +02/25/2022 00:32:05 - INFO - codeparrot_training - Step 17728: {'lr': 0.00037883431283970454, 'samples': 9077248, 'steps': 17728, 'loss/train': 2.5369796752929688} +02/25/2022 00:32:08 - INFO - codeparrot_training - Step 17729: {'lr': 0.00037882029013914544, 'samples': 9077760, 'steps': 17729, 'loss/train': 2.93064022064209} +02/25/2022 00:32:14 - INFO - codeparrot_training - Step 17730: {'lr': 0.0003788062668867611, 'samples': 9078272, 'steps': 17730, 'loss/train': 2.5228629112243652} +02/25/2022 00:32:17 - INFO - codeparrot_training - Step 17731: {'lr': 0.00037879224308261163, 'samples': 9078784, 'steps': 17731, 'loss/train': 2.4482505321502686} +02/25/2022 00:32:23 - INFO - codeparrot_training - Step 17732: {'lr': 0.00037877821872675705, 'samples': 9079296, 'steps': 17732, 'loss/train': 2.4996213912963867} +02/25/2022 00:32:26 - INFO - codeparrot_training - Step 17733: {'lr': 0.0003787641938192575, 'samples': 9079808, 'steps': 17733, 'loss/train': 3.121630907058716} +02/25/2022 00:32:32 - INFO - codeparrot_training - Step 17734: {'lr': 0.00037875016836017304, 'samples': 9080320, 'steps': 17734, 'loss/train': 1.9725698232650757} +02/25/2022 00:32:36 - INFO - codeparrot_training - Step 17735: {'lr': 0.0003787361423495637, 'samples': 9080832, 'steps': 17735, 'loss/train': 1.4404319524765015} +02/25/2022 00:32:42 - INFO - codeparrot_training - Step 17736: {'lr': 0.0003787221157874897, 'samples': 9081344, 'steps': 17736, 'loss/train': 2.0056955814361572} +02/25/2022 00:32:45 - INFO - codeparrot_training - Step 17737: {'lr': 0.00037870808867401085, 'samples': 9081856, 'steps': 17737, 'loss/train': 1.2769489288330078} +02/25/2022 00:32:51 - INFO - codeparrot_training - Step 17738: {'lr': 0.00037869406100918756, 'samples': 9082368, 'steps': 17738, 'loss/train': 2.929872512817383} +02/25/2022 00:32:54 - INFO - codeparrot_training - Step 17739: {'lr': 0.0003786800327930797, 'samples': 9082880, 'steps': 17739, 'loss/train': 1.3697381019592285} +02/25/2022 00:33:00 - INFO - codeparrot_training - Step 17740: {'lr': 0.0003786660040257475, 'samples': 9083392, 'steps': 17740, 'loss/train': 3.458914279937744} +02/25/2022 00:33:03 - INFO - codeparrot_training - Step 17741: {'lr': 0.00037865197470725103, 'samples': 9083904, 'steps': 17741, 'loss/train': 2.1405208110809326} +02/25/2022 00:33:09 - INFO - codeparrot_training - Step 17742: {'lr': 0.0003786379448376503, 'samples': 9084416, 'steps': 17742, 'loss/train': 1.8302637338638306} +02/25/2022 00:33:12 - INFO - codeparrot_training - Step 17743: {'lr': 0.0003786239144170055, 'samples': 9084928, 'steps': 17743, 'loss/train': 2.5693111419677734} +02/25/2022 00:33:18 - INFO - codeparrot_training - Step 17744: {'lr': 0.0003786098834453766, 'samples': 9085440, 'steps': 17744, 'loss/train': 2.4527623653411865} +02/25/2022 00:33:21 - INFO - codeparrot_training - Step 17745: {'lr': 0.00037859585192282386, 'samples': 9085952, 'steps': 17745, 'loss/train': 2.138601779937744} +02/25/2022 00:33:27 - INFO - codeparrot_training - Step 17746: {'lr': 0.00037858181984940734, 'samples': 9086464, 'steps': 17746, 'loss/train': 2.146455764770508} +02/25/2022 00:33:30 - INFO - codeparrot_training - Step 17747: {'lr': 0.0003785677872251871, 'samples': 9086976, 'steps': 17747, 'loss/train': 2.2206130027770996} +02/25/2022 00:33:36 - INFO - codeparrot_training - Step 17748: {'lr': 0.0003785537540502233, 'samples': 9087488, 'steps': 17748, 'loss/train': 1.8951431512832642} +02/25/2022 00:33:40 - INFO - codeparrot_training - Step 17749: {'lr': 0.0003785397203245761, 'samples': 9088000, 'steps': 17749, 'loss/train': 2.0423741340637207} +02/25/2022 00:33:45 - INFO - codeparrot_training - Step 17750: {'lr': 0.0003785256860483054, 'samples': 9088512, 'steps': 17750, 'loss/train': 2.477142572402954} +02/25/2022 00:33:49 - INFO - codeparrot_training - Step 17751: {'lr': 0.0003785116512214716, 'samples': 9089024, 'steps': 17751, 'loss/train': 2.615281105041504} +02/25/2022 00:33:54 - INFO - codeparrot_training - Step 17752: {'lr': 0.0003784976158441347, 'samples': 9089536, 'steps': 17752, 'loss/train': 1.9404678344726562} +02/25/2022 00:33:58 - INFO - codeparrot_training - Step 17753: {'lr': 0.0003784835799163547, 'samples': 9090048, 'steps': 17753, 'loss/train': 1.0144660472869873} +02/25/2022 00:34:04 - INFO - codeparrot_training - Step 17754: {'lr': 0.00037846954343819195, 'samples': 9090560, 'steps': 17754, 'loss/train': 3.125117778778076} +02/25/2022 00:34:09 - INFO - codeparrot_training - Step 17755: {'lr': 0.00037845550640970636, 'samples': 9091072, 'steps': 17755, 'loss/train': 3.338714599609375} +02/25/2022 00:34:12 - INFO - codeparrot_training - Step 17756: {'lr': 0.0003784414688309583, 'samples': 9091584, 'steps': 17756, 'loss/train': 1.8071317672729492} +02/25/2022 00:34:19 - INFO - codeparrot_training - Step 17757: {'lr': 0.00037842743070200767, 'samples': 9092096, 'steps': 17757, 'loss/train': 4.624802112579346} +02/25/2022 00:34:22 - INFO - codeparrot_training - Step 17758: {'lr': 0.0003784133920229148, 'samples': 9092608, 'steps': 17758, 'loss/train': 1.204598307609558} +02/25/2022 00:34:28 - INFO - codeparrot_training - Step 17759: {'lr': 0.0003783993527937397, 'samples': 9093120, 'steps': 17759, 'loss/train': 2.5813825130462646} +02/25/2022 00:34:31 - INFO - codeparrot_training - Step 17760: {'lr': 0.0003783853130145425, 'samples': 9093632, 'steps': 17760, 'loss/train': 2.3898720741271973} +02/25/2022 00:34:35 - INFO - codeparrot_training - Step 17761: {'lr': 0.0003783712726853835, 'samples': 9094144, 'steps': 17761, 'loss/train': 1.5693180561065674} +02/25/2022 00:34:40 - INFO - codeparrot_training - Step 17762: {'lr': 0.00037835723180632263, 'samples': 9094656, 'steps': 17762, 'loss/train': 1.8705394268035889} +02/25/2022 00:34:44 - INFO - codeparrot_training - Step 17763: {'lr': 0.00037834319037742016, 'samples': 9095168, 'steps': 17763, 'loss/train': 4.796335697174072} +02/25/2022 00:34:49 - INFO - codeparrot_training - Step 17764: {'lr': 0.00037832914839873623, 'samples': 9095680, 'steps': 17764, 'loss/train': 1.8139954805374146} +02/25/2022 00:34:53 - INFO - codeparrot_training - Step 17765: {'lr': 0.0003783151058703309, 'samples': 9096192, 'steps': 17765, 'loss/train': 1.8321171998977661} +02/25/2022 00:34:58 - INFO - codeparrot_training - Step 17766: {'lr': 0.0003783010627922645, 'samples': 9096704, 'steps': 17766, 'loss/train': 1.9763429164886475} +02/25/2022 00:35:02 - INFO - codeparrot_training - Step 17767: {'lr': 0.0003782870191645971, 'samples': 9097216, 'steps': 17767, 'loss/train': 0.9249021410942078} +02/25/2022 00:35:08 - INFO - codeparrot_training - Step 17768: {'lr': 0.0003782729749873887, 'samples': 9097728, 'steps': 17768, 'loss/train': 1.7698930501937866} +02/25/2022 00:35:11 - INFO - codeparrot_training - Step 17769: {'lr': 0.00037825893026069977, 'samples': 9098240, 'steps': 17769, 'loss/train': 0.3880889117717743} +02/25/2022 00:35:17 - INFO - codeparrot_training - Step 17770: {'lr': 0.0003782448849845902, 'samples': 9098752, 'steps': 17770, 'loss/train': 1.4427440166473389} +02/25/2022 00:35:20 - INFO - codeparrot_training - Step 17771: {'lr': 0.0003782308391591203, 'samples': 9099264, 'steps': 17771, 'loss/train': 2.7742958068847656} +02/25/2022 00:35:26 - INFO - codeparrot_training - Step 17772: {'lr': 0.00037821679278435017, 'samples': 9099776, 'steps': 17772, 'loss/train': 2.4256370067596436} +02/25/2022 00:35:30 - INFO - codeparrot_training - Step 17773: {'lr': 0.0003782027458603401, 'samples': 9100288, 'steps': 17773, 'loss/train': 3.8415708541870117} +02/25/2022 00:35:35 - INFO - codeparrot_training - Step 17774: {'lr': 0.0003781886983871501, 'samples': 9100800, 'steps': 17774, 'loss/train': 2.436509132385254} +02/25/2022 00:35:39 - INFO - codeparrot_training - Step 17775: {'lr': 0.00037817465036484043, 'samples': 9101312, 'steps': 17775, 'loss/train': 1.5396196842193604} +02/25/2022 00:35:44 - INFO - codeparrot_training - Step 17776: {'lr': 0.0003781606017934713, 'samples': 9101824, 'steps': 17776, 'loss/train': 1.927518606185913} +02/25/2022 00:35:48 - INFO - codeparrot_training - Step 17777: {'lr': 0.0003781465526731028, 'samples': 9102336, 'steps': 17777, 'loss/train': 2.4948067665100098} +02/25/2022 00:35:53 - INFO - codeparrot_training - Step 17778: {'lr': 0.0003781325030037952, 'samples': 9102848, 'steps': 17778, 'loss/train': 1.948328971862793} +02/25/2022 00:35:57 - INFO - codeparrot_training - Step 17779: {'lr': 0.00037811845278560864, 'samples': 9103360, 'steps': 17779, 'loss/train': 0.8280380964279175} +02/25/2022 00:36:02 - INFO - codeparrot_training - Step 17780: {'lr': 0.0003781044020186033, 'samples': 9103872, 'steps': 17780, 'loss/train': 1.3666096925735474} +02/25/2022 00:36:06 - INFO - codeparrot_training - Step 17781: {'lr': 0.0003780903507028393, 'samples': 9104384, 'steps': 17781, 'loss/train': 0.5460395812988281} +02/25/2022 00:36:12 - INFO - codeparrot_training - Step 17782: {'lr': 0.00037807629883837703, 'samples': 9104896, 'steps': 17782, 'loss/train': 2.9699387550354004} +02/25/2022 00:36:16 - INFO - codeparrot_training - Step 17783: {'lr': 0.00037806224642527653, 'samples': 9105408, 'steps': 17783, 'loss/train': 1.9725273847579956} +02/25/2022 00:36:21 - INFO - codeparrot_training - Step 17784: {'lr': 0.000378048193463598, 'samples': 9105920, 'steps': 17784, 'loss/train': 2.8924503326416016} +02/25/2022 00:36:25 - INFO - codeparrot_training - Step 17785: {'lr': 0.0003780341399534017, 'samples': 9106432, 'steps': 17785, 'loss/train': 2.25705623626709} +02/25/2022 00:36:30 - INFO - codeparrot_training - Step 17786: {'lr': 0.00037802008589474777, 'samples': 9106944, 'steps': 17786, 'loss/train': 2.7668399810791016} +02/25/2022 00:36:33 - INFO - codeparrot_training - Step 17787: {'lr': 0.0003780060312876965, 'samples': 9107456, 'steps': 17787, 'loss/train': 3.03151273727417} +02/25/2022 00:36:39 - INFO - codeparrot_training - Step 17788: {'lr': 0.00037799197613230795, 'samples': 9107968, 'steps': 17788, 'loss/train': 1.6267716884613037} +02/25/2022 00:36:43 - INFO - codeparrot_training - Step 17789: {'lr': 0.00037797792042864247, 'samples': 9108480, 'steps': 17789, 'loss/train': 2.3754568099975586} +02/25/2022 00:36:48 - INFO - codeparrot_training - Step 17790: {'lr': 0.0003779638641767602, 'samples': 9108992, 'steps': 17790, 'loss/train': 1.3895710706710815} +02/25/2022 00:36:52 - INFO - codeparrot_training - Step 17791: {'lr': 0.0003779498073767214, 'samples': 9109504, 'steps': 17791, 'loss/train': 2.1284360885620117} +02/25/2022 00:36:57 - INFO - codeparrot_training - Step 17792: {'lr': 0.00037793575002858625, 'samples': 9110016, 'steps': 17792, 'loss/train': 1.511710524559021} +02/25/2022 00:37:01 - INFO - codeparrot_training - Step 17793: {'lr': 0.00037792169213241494, 'samples': 9110528, 'steps': 17793, 'loss/train': 1.6062425374984741} +02/25/2022 00:37:07 - INFO - codeparrot_training - Step 17794: {'lr': 0.00037790763368826774, 'samples': 9111040, 'steps': 17794, 'loss/train': 0.8982266783714294} +02/25/2022 00:37:11 - INFO - codeparrot_training - Step 17795: {'lr': 0.00037789357469620487, 'samples': 9111552, 'steps': 17795, 'loss/train': 2.3349978923797607} +02/25/2022 00:37:16 - INFO - codeparrot_training - Step 17796: {'lr': 0.0003778795151562865, 'samples': 9112064, 'steps': 17796, 'loss/train': 2.528998851776123} +02/25/2022 00:37:20 - INFO - codeparrot_training - Step 17797: {'lr': 0.00037786545506857295, 'samples': 9112576, 'steps': 17797, 'loss/train': 2.137895107269287} +02/25/2022 00:37:25 - INFO - codeparrot_training - Step 17798: {'lr': 0.0003778513944331243, 'samples': 9113088, 'steps': 17798, 'loss/train': 2.3629095554351807} +02/25/2022 00:37:29 - INFO - codeparrot_training - Step 17799: {'lr': 0.0003778373332500009, 'samples': 9113600, 'steps': 17799, 'loss/train': 2.8258047103881836} +02/25/2022 00:37:34 - INFO - codeparrot_training - Step 17800: {'lr': 0.00037782327151926297, 'samples': 9114112, 'steps': 17800, 'loss/train': 2.040865898132324} +02/25/2022 00:37:38 - INFO - codeparrot_training - Step 17801: {'lr': 0.00037780920924097085, 'samples': 9114624, 'steps': 17801, 'loss/train': 2.445117235183716} +02/25/2022 00:37:43 - INFO - codeparrot_training - Step 17802: {'lr': 0.00037779514641518455, 'samples': 9115136, 'steps': 17802, 'loss/train': 1.2249690294265747} +02/25/2022 00:37:47 - INFO - codeparrot_training - Step 17803: {'lr': 0.0003777810830419644, 'samples': 9115648, 'steps': 17803, 'loss/train': 2.1157913208007812} +02/25/2022 00:37:53 - INFO - codeparrot_training - Step 17804: {'lr': 0.00037776701912137066, 'samples': 9116160, 'steps': 17804, 'loss/train': 2.5650148391723633} +02/25/2022 00:37:56 - INFO - codeparrot_training - Step 17805: {'lr': 0.00037775295465346373, 'samples': 9116672, 'steps': 17805, 'loss/train': 0.37460461258888245} +02/25/2022 00:38:02 - INFO - codeparrot_training - Step 17806: {'lr': 0.0003777388896383035, 'samples': 9117184, 'steps': 17806, 'loss/train': 2.438077688217163} +02/25/2022 00:38:05 - INFO - codeparrot_training - Step 17807: {'lr': 0.00037772482407595056, 'samples': 9117696, 'steps': 17807, 'loss/train': 2.6476752758026123} +02/25/2022 00:38:11 - INFO - codeparrot_training - Step 17808: {'lr': 0.000377710757966465, 'samples': 9118208, 'steps': 17808, 'loss/train': 2.1442501544952393} +02/25/2022 00:38:15 - INFO - codeparrot_training - Step 17809: {'lr': 0.0003776966913099071, 'samples': 9118720, 'steps': 17809, 'loss/train': 2.276533603668213} +02/25/2022 00:38:20 - INFO - codeparrot_training - Step 17810: {'lr': 0.00037768262410633715, 'samples': 9119232, 'steps': 17810, 'loss/train': 1.7017161846160889} +02/25/2022 00:38:24 - INFO - codeparrot_training - Step 17811: {'lr': 0.0003776685563558153, 'samples': 9119744, 'steps': 17811, 'loss/train': 1.5580724477767944} +02/25/2022 00:38:29 - INFO - codeparrot_training - Step 17812: {'lr': 0.00037765448805840196, 'samples': 9120256, 'steps': 17812, 'loss/train': 1.3341552019119263} +02/25/2022 00:38:33 - INFO - codeparrot_training - Step 17813: {'lr': 0.00037764041921415736, 'samples': 9120768, 'steps': 17813, 'loss/train': 2.3488428592681885} +02/25/2022 00:38:39 - INFO - codeparrot_training - Step 17814: {'lr': 0.00037762634982314164, 'samples': 9121280, 'steps': 17814, 'loss/train': 1.8621599674224854} +02/25/2022 00:38:42 - INFO - codeparrot_training - Step 17815: {'lr': 0.00037761227988541523, 'samples': 9121792, 'steps': 17815, 'loss/train': 1.6176320314407349} +02/25/2022 00:38:48 - INFO - codeparrot_training - Step 17816: {'lr': 0.00037759820940103827, 'samples': 9122304, 'steps': 17816, 'loss/train': 2.340167760848999} +02/25/2022 00:38:51 - INFO - codeparrot_training - Step 17817: {'lr': 0.00037758413837007124, 'samples': 9122816, 'steps': 17817, 'loss/train': 0.27927979826927185} +02/25/2022 00:38:57 - INFO - codeparrot_training - Step 17818: {'lr': 0.0003775700667925741, 'samples': 9123328, 'steps': 17818, 'loss/train': 2.227670907974243} +02/25/2022 00:39:00 - INFO - codeparrot_training - Step 17819: {'lr': 0.0003775559946686075, 'samples': 9123840, 'steps': 17819, 'loss/train': 1.4832324981689453} +02/25/2022 00:39:06 - INFO - codeparrot_training - Step 17820: {'lr': 0.00037754192199823135, 'samples': 9124352, 'steps': 17820, 'loss/train': 1.5890132188796997} +02/25/2022 00:39:09 - INFO - codeparrot_training - Step 17821: {'lr': 0.00037752784878150613, 'samples': 9124864, 'steps': 17821, 'loss/train': 1.9960347414016724} +02/25/2022 00:39:15 - INFO - codeparrot_training - Step 17822: {'lr': 0.00037751377501849215, 'samples': 9125376, 'steps': 17822, 'loss/train': 1.1255578994750977} +02/25/2022 00:39:18 - INFO - codeparrot_training - Step 17823: {'lr': 0.0003774997007092496, 'samples': 9125888, 'steps': 17823, 'loss/train': 1.7893080711364746} +02/25/2022 00:39:24 - INFO - codeparrot_training - Step 17824: {'lr': 0.00037748562585383886, 'samples': 9126400, 'steps': 17824, 'loss/train': 2.536302089691162} +02/25/2022 00:39:27 - INFO - codeparrot_training - Step 17825: {'lr': 0.00037747155045232016, 'samples': 9126912, 'steps': 17825, 'loss/train': 2.4392430782318115} +02/25/2022 00:39:33 - INFO - codeparrot_training - Step 17826: {'lr': 0.0003774574745047539, 'samples': 9127424, 'steps': 17826, 'loss/train': 2.0956664085388184} +02/25/2022 00:39:36 - INFO - codeparrot_training - Step 17827: {'lr': 0.0003774433980112001, 'samples': 9127936, 'steps': 17827, 'loss/train': 1.8346408605575562} +02/25/2022 00:39:42 - INFO - codeparrot_training - Step 17828: {'lr': 0.00037742932097171945, 'samples': 9128448, 'steps': 17828, 'loss/train': 2.8361427783966064} +02/25/2022 00:39:45 - INFO - codeparrot_training - Step 17829: {'lr': 0.0003774152433863719, 'samples': 9128960, 'steps': 17829, 'loss/train': 3.5036838054656982} +02/25/2022 00:39:52 - INFO - codeparrot_training - Step 17830: {'lr': 0.000377401165255218, 'samples': 9129472, 'steps': 17830, 'loss/train': 2.1435322761535645} +02/25/2022 00:39:55 - INFO - codeparrot_training - Step 17831: {'lr': 0.0003773870865783179, 'samples': 9129984, 'steps': 17831, 'loss/train': 3.1499199867248535} +02/25/2022 00:40:01 - INFO - codeparrot_training - Step 17832: {'lr': 0.00037737300735573204, 'samples': 9130496, 'steps': 17832, 'loss/train': 2.5886032581329346} +02/25/2022 00:40:06 - INFO - codeparrot_training - Step 17833: {'lr': 0.00037735892758752063, 'samples': 9131008, 'steps': 17833, 'loss/train': 1.7489620447158813} +02/25/2022 00:40:10 - INFO - codeparrot_training - Step 17834: {'lr': 0.000377344847273744, 'samples': 9131520, 'steps': 17834, 'loss/train': 3.011117935180664} +02/25/2022 00:40:13 - INFO - codeparrot_training - Step 17835: {'lr': 0.0003773307664144625, 'samples': 9132032, 'steps': 17835, 'loss/train': 1.3230488300323486} +02/25/2022 00:40:19 - INFO - codeparrot_training - Step 17836: {'lr': 0.00037731668500973637, 'samples': 9132544, 'steps': 17836, 'loss/train': 1.1957640647888184} +02/25/2022 00:40:24 - INFO - codeparrot_training - Step 17837: {'lr': 0.00037730260305962604, 'samples': 9133056, 'steps': 17837, 'loss/train': 2.230207920074463} +02/25/2022 00:40:28 - INFO - codeparrot_training - Step 17838: {'lr': 0.00037728852056419183, 'samples': 9133568, 'steps': 17838, 'loss/train': 2.751715898513794} +02/25/2022 00:40:31 - INFO - codeparrot_training - Step 17839: {'lr': 0.000377274437523494, 'samples': 9134080, 'steps': 17839, 'loss/train': 2.4595115184783936} +02/25/2022 00:40:37 - INFO - codeparrot_training - Step 17840: {'lr': 0.00037726035393759286, 'samples': 9134592, 'steps': 17840, 'loss/train': 0.865892767906189} +02/25/2022 00:40:43 - INFO - codeparrot_training - Step 17841: {'lr': 0.00037724626980654877, 'samples': 9135104, 'steps': 17841, 'loss/train': 2.0343310832977295} +02/25/2022 00:40:46 - INFO - codeparrot_training - Step 17842: {'lr': 0.00037723218513042203, 'samples': 9135616, 'steps': 17842, 'loss/train': 1.599124550819397} +02/25/2022 00:40:52 - INFO - codeparrot_training - Step 17843: {'lr': 0.0003772180999092731, 'samples': 9136128, 'steps': 17843, 'loss/train': 1.4413633346557617} +02/25/2022 00:40:55 - INFO - codeparrot_training - Step 17844: {'lr': 0.00037720401414316213, 'samples': 9136640, 'steps': 17844, 'loss/train': 1.9628946781158447} +02/25/2022 00:41:01 - INFO - codeparrot_training - Step 17845: {'lr': 0.00037718992783214965, 'samples': 9137152, 'steps': 17845, 'loss/train': 2.9506173133850098} +02/25/2022 00:41:04 - INFO - codeparrot_training - Step 17846: {'lr': 0.0003771758409762958, 'samples': 9137664, 'steps': 17846, 'loss/train': 1.4582948684692383} +02/25/2022 00:41:10 - INFO - codeparrot_training - Step 17847: {'lr': 0.0003771617535756611, 'samples': 9138176, 'steps': 17847, 'loss/train': 2.0441431999206543} +02/25/2022 00:41:13 - INFO - codeparrot_training - Step 17848: {'lr': 0.00037714766563030585, 'samples': 9138688, 'steps': 17848, 'loss/train': 1.0176316499710083} +02/25/2022 00:41:19 - INFO - codeparrot_training - Step 17849: {'lr': 0.00037713357714029035, 'samples': 9139200, 'steps': 17849, 'loss/train': 1.4250121116638184} +02/25/2022 00:41:23 - INFO - codeparrot_training - Step 17850: {'lr': 0.000377119488105675, 'samples': 9139712, 'steps': 17850, 'loss/train': 2.64493727684021} +02/25/2022 00:41:28 - INFO - codeparrot_training - Step 17851: {'lr': 0.00037710539852652003, 'samples': 9140224, 'steps': 17851, 'loss/train': 1.5466663837432861} +02/25/2022 00:41:32 - INFO - codeparrot_training - Step 17852: {'lr': 0.00037709130840288605, 'samples': 9140736, 'steps': 17852, 'loss/train': 1.3424402475357056} +02/25/2022 00:41:38 - INFO - codeparrot_training - Step 17853: {'lr': 0.0003770772177348331, 'samples': 9141248, 'steps': 17853, 'loss/train': 2.6895925998687744} +02/25/2022 00:41:41 - INFO - codeparrot_training - Step 17854: {'lr': 0.0003770631265224218, 'samples': 9141760, 'steps': 17854, 'loss/train': 2.4510087966918945} +02/25/2022 00:41:45 - INFO - codeparrot_training - Step 17855: {'lr': 0.0003770490347657124, 'samples': 9142272, 'steps': 17855, 'loss/train': 1.26418936252594} +02/25/2022 00:41:50 - INFO - codeparrot_training - Step 17856: {'lr': 0.00037703494246476524, 'samples': 9142784, 'steps': 17856, 'loss/train': 1.832175374031067} +02/25/2022 00:41:54 - INFO - codeparrot_training - Step 17857: {'lr': 0.00037702084961964075, 'samples': 9143296, 'steps': 17857, 'loss/train': 1.06133234500885} +02/25/2022 00:41:59 - INFO - codeparrot_training - Step 17858: {'lr': 0.00037700675623039925, 'samples': 9143808, 'steps': 17858, 'loss/train': 1.9219799041748047} +02/25/2022 00:42:03 - INFO - codeparrot_training - Step 17859: {'lr': 0.00037699266229710115, 'samples': 9144320, 'steps': 17859, 'loss/train': 2.2191593647003174} +02/25/2022 00:42:09 - INFO - codeparrot_training - Step 17860: {'lr': 0.0003769785678198068, 'samples': 9144832, 'steps': 17860, 'loss/train': 2.746103525161743} +02/25/2022 00:42:13 - INFO - codeparrot_training - Step 17861: {'lr': 0.0003769644727985766, 'samples': 9145344, 'steps': 17861, 'loss/train': 1.5594803094863892} +02/25/2022 00:42:18 - INFO - codeparrot_training - Step 17862: {'lr': 0.00037695037723347094, 'samples': 9145856, 'steps': 17862, 'loss/train': 2.2601146697998047} +02/25/2022 00:42:21 - INFO - codeparrot_training - Step 17863: {'lr': 0.00037693628112455015, 'samples': 9146368, 'steps': 17863, 'loss/train': 2.0484261512756348} +02/25/2022 00:42:27 - INFO - codeparrot_training - Step 17864: {'lr': 0.0003769221844718746, 'samples': 9146880, 'steps': 17864, 'loss/train': 1.8653650283813477} +02/25/2022 00:42:30 - INFO - codeparrot_training - Step 17865: {'lr': 0.00037690808727550477, 'samples': 9147392, 'steps': 17865, 'loss/train': 1.382372498512268} +02/25/2022 00:42:36 - INFO - codeparrot_training - Step 17866: {'lr': 0.0003768939895355009, 'samples': 9147904, 'steps': 17866, 'loss/train': 1.6069279909133911} +02/25/2022 00:42:41 - INFO - codeparrot_training - Step 17867: {'lr': 0.0003768798912519236, 'samples': 9148416, 'steps': 17867, 'loss/train': 2.339858055114746} +02/25/2022 00:42:45 - INFO - codeparrot_training - Step 17868: {'lr': 0.0003768657924248331, 'samples': 9148928, 'steps': 17868, 'loss/train': 1.9556550979614258} +02/25/2022 00:42:50 - INFO - codeparrot_training - Step 17869: {'lr': 0.0003768516930542898, 'samples': 9149440, 'steps': 17869, 'loss/train': 2.3381431102752686} +02/25/2022 00:42:54 - INFO - codeparrot_training - Step 17870: {'lr': 0.00037683759314035414, 'samples': 9149952, 'steps': 17870, 'loss/train': 1.4832879304885864} +02/25/2022 00:42:59 - INFO - codeparrot_training - Step 17871: {'lr': 0.0003768234926830865, 'samples': 9150464, 'steps': 17871, 'loss/train': 1.8632103204727173} +02/25/2022 00:43:03 - INFO - codeparrot_training - Step 17872: {'lr': 0.0003768093916825473, 'samples': 9150976, 'steps': 17872, 'loss/train': 1.5347721576690674} +02/25/2022 00:43:08 - INFO - codeparrot_training - Step 17873: {'lr': 0.00037679529013879686, 'samples': 9151488, 'steps': 17873, 'loss/train': 1.3869832754135132} +02/25/2022 00:43:12 - INFO - codeparrot_training - Step 17874: {'lr': 0.00037678118805189575, 'samples': 9152000, 'steps': 17874, 'loss/train': 4.124237060546875} +02/25/2022 00:43:18 - INFO - codeparrot_training - Step 17875: {'lr': 0.0003767670854219043, 'samples': 9152512, 'steps': 17875, 'loss/train': 2.5834977626800537} +02/25/2022 00:43:22 - INFO - codeparrot_training - Step 17876: {'lr': 0.00037675298224888287, 'samples': 9153024, 'steps': 17876, 'loss/train': 1.7203011512756348} +02/25/2022 00:43:25 - INFO - codeparrot_training - Step 17877: {'lr': 0.0003767388785328919, 'samples': 9153536, 'steps': 17877, 'loss/train': 1.0873969793319702} +02/25/2022 00:43:31 - INFO - codeparrot_training - Step 17878: {'lr': 0.0003767247742739918, 'samples': 9154048, 'steps': 17878, 'loss/train': 2.1951661109924316} +02/25/2022 00:43:34 - INFO - codeparrot_training - Step 17879: {'lr': 0.0003767106694722431, 'samples': 9154560, 'steps': 17879, 'loss/train': 2.0824179649353027} +02/25/2022 00:43:40 - INFO - codeparrot_training - Step 17880: {'lr': 0.000376696564127706, 'samples': 9155072, 'steps': 17880, 'loss/train': 2.2230143547058105} +02/25/2022 00:43:43 - INFO - codeparrot_training - Step 17881: {'lr': 0.0003766824582404411, 'samples': 9155584, 'steps': 17881, 'loss/train': 1.6026670932769775} +02/25/2022 00:43:49 - INFO - codeparrot_training - Step 17882: {'lr': 0.00037666835181050887, 'samples': 9156096, 'steps': 17882, 'loss/train': 1.7113717794418335} +02/25/2022 00:43:52 - INFO - codeparrot_training - Step 17883: {'lr': 0.0003766542448379695, 'samples': 9156608, 'steps': 17883, 'loss/train': 3.091701030731201} +02/25/2022 00:43:58 - INFO - codeparrot_training - Step 17884: {'lr': 0.0003766401373228836, 'samples': 9157120, 'steps': 17884, 'loss/train': 2.518641233444214} +02/25/2022 00:44:01 - INFO - codeparrot_training - Step 17885: {'lr': 0.00037662602926531166, 'samples': 9157632, 'steps': 17885, 'loss/train': 2.174736738204956} +02/25/2022 00:44:08 - INFO - codeparrot_training - Step 17886: {'lr': 0.0003766119206653139, 'samples': 9158144, 'steps': 17886, 'loss/train': 2.477818012237549} +02/25/2022 00:44:11 - INFO - codeparrot_training - Step 17887: {'lr': 0.00037659781152295094, 'samples': 9158656, 'steps': 17887, 'loss/train': 2.4373779296875} +02/25/2022 00:44:17 - INFO - codeparrot_training - Step 17888: {'lr': 0.0003765837018382831, 'samples': 9159168, 'steps': 17888, 'loss/train': 1.989872932434082} +02/25/2022 00:44:20 - INFO - codeparrot_training - Step 17889: {'lr': 0.00037656959161137094, 'samples': 9159680, 'steps': 17889, 'loss/train': 2.429205894470215} +02/25/2022 00:44:26 - INFO - codeparrot_training - Step 17890: {'lr': 0.00037655548084227484, 'samples': 9160192, 'steps': 17890, 'loss/train': 2.52811861038208} +02/25/2022 00:44:29 - INFO - codeparrot_training - Step 17891: {'lr': 0.0003765413695310552, 'samples': 9160704, 'steps': 17891, 'loss/train': 3.137202501296997} +02/25/2022 00:44:35 - INFO - codeparrot_training - Step 17892: {'lr': 0.00037652725767777255, 'samples': 9161216, 'steps': 17892, 'loss/train': 2.6416451930999756} +02/25/2022 00:44:38 - INFO - codeparrot_training - Step 17893: {'lr': 0.00037651314528248724, 'samples': 9161728, 'steps': 17893, 'loss/train': 2.622027635574341} +02/25/2022 00:44:44 - INFO - codeparrot_training - Step 17894: {'lr': 0.00037649903234525996, 'samples': 9162240, 'steps': 17894, 'loss/train': 1.530945897102356} +02/25/2022 00:44:47 - INFO - codeparrot_training - Step 17895: {'lr': 0.00037648491886615077, 'samples': 9162752, 'steps': 17895, 'loss/train': 2.3728630542755127} +02/25/2022 00:44:54 - INFO - codeparrot_training - Step 17896: {'lr': 0.0003764708048452205, 'samples': 9163264, 'steps': 17896, 'loss/train': 1.969873070716858} +02/25/2022 00:44:57 - INFO - codeparrot_training - Step 17897: {'lr': 0.0003764566902825294, 'samples': 9163776, 'steps': 17897, 'loss/train': 1.7541239261627197} +02/25/2022 00:45:03 - INFO - codeparrot_training - Step 17898: {'lr': 0.0003764425751781381, 'samples': 9164288, 'steps': 17898, 'loss/train': 2.7798380851745605} +02/25/2022 00:45:08 - INFO - codeparrot_training - Step 17899: {'lr': 0.0003764284595321068, 'samples': 9164800, 'steps': 17899, 'loss/train': 1.6835497617721558} +02/25/2022 00:45:12 - INFO - codeparrot_training - Step 17900: {'lr': 0.0003764143433444962, 'samples': 9165312, 'steps': 17900, 'loss/train': 0.22605247795581818} +02/25/2022 00:45:15 - INFO - codeparrot_training - Step 17901: {'lr': 0.00037640022661536665, 'samples': 9165824, 'steps': 17901, 'loss/train': 2.232663631439209} +02/25/2022 00:45:22 - INFO - codeparrot_training - Step 17902: {'lr': 0.0003763861093447787, 'samples': 9166336, 'steps': 17902, 'loss/train': 1.1055772304534912} +02/25/2022 00:45:25 - INFO - codeparrot_training - Step 17903: {'lr': 0.0003763719915327928, 'samples': 9166848, 'steps': 17903, 'loss/train': 2.2600111961364746} +02/25/2022 00:45:31 - INFO - codeparrot_training - Step 17904: {'lr': 0.00037635787317946945, 'samples': 9167360, 'steps': 17904, 'loss/train': 1.1961216926574707} +02/25/2022 00:45:34 - INFO - codeparrot_training - Step 17905: {'lr': 0.000376343754284869, 'samples': 9167872, 'steps': 17905, 'loss/train': 3.1459691524505615} +02/25/2022 00:45:38 - INFO - codeparrot_training - Step 17906: {'lr': 0.00037632963484905213, 'samples': 9168384, 'steps': 17906, 'loss/train': 1.6319698095321655} +02/25/2022 00:45:44 - INFO - codeparrot_training - Step 17907: {'lr': 0.0003763155148720791, 'samples': 9168896, 'steps': 17907, 'loss/train': 1.7362861633300781} +02/25/2022 00:45:48 - INFO - codeparrot_training - Step 17908: {'lr': 0.00037630139435401055, 'samples': 9169408, 'steps': 17908, 'loss/train': 2.1172192096710205} +02/25/2022 00:45:53 - INFO - codeparrot_training - Step 17909: {'lr': 0.000376287273294907, 'samples': 9169920, 'steps': 17909, 'loss/train': 0.4598305821418762} +02/25/2022 00:45:57 - INFO - codeparrot_training - Step 17910: {'lr': 0.0003762731516948288, 'samples': 9170432, 'steps': 17910, 'loss/train': 2.358130693435669} +02/25/2022 00:46:02 - INFO - codeparrot_training - Step 17911: {'lr': 0.00037625902955383664, 'samples': 9170944, 'steps': 17911, 'loss/train': 1.4387531280517578} +02/25/2022 00:46:06 - INFO - codeparrot_training - Step 17912: {'lr': 0.0003762449068719907, 'samples': 9171456, 'steps': 17912, 'loss/train': 2.5760319232940674} +02/25/2022 00:46:12 - INFO - codeparrot_training - Step 17913: {'lr': 0.0003762307836493518, 'samples': 9171968, 'steps': 17913, 'loss/train': 1.6828365325927734} +02/25/2022 00:46:15 - INFO - codeparrot_training - Step 17914: {'lr': 0.00037621665988598024, 'samples': 9172480, 'steps': 17914, 'loss/train': 1.8636233806610107} +02/25/2022 00:46:21 - INFO - codeparrot_training - Step 17915: {'lr': 0.0003762025355819366, 'samples': 9172992, 'steps': 17915, 'loss/train': 2.0677895545959473} +02/25/2022 00:46:24 - INFO - codeparrot_training - Step 17916: {'lr': 0.0003761884107372814, 'samples': 9173504, 'steps': 17916, 'loss/train': 2.408090591430664} +02/25/2022 00:46:30 - INFO - codeparrot_training - Step 17917: {'lr': 0.0003761742853520751, 'samples': 9174016, 'steps': 17917, 'loss/train': 1.8922818899154663} +02/25/2022 00:46:34 - INFO - codeparrot_training - Step 17918: {'lr': 0.00037616015942637824, 'samples': 9174528, 'steps': 17918, 'loss/train': 1.2953591346740723} +02/25/2022 00:46:39 - INFO - codeparrot_training - Step 17919: {'lr': 0.0003761460329602513, 'samples': 9175040, 'steps': 17919, 'loss/train': 1.213797688484192} +02/25/2022 00:46:43 - INFO - codeparrot_training - Step 17920: {'lr': 0.0003761319059537548, 'samples': 9175552, 'steps': 17920, 'loss/train': 2.525622606277466} +02/25/2022 00:46:49 - INFO - codeparrot_training - Step 17921: {'lr': 0.0003761177784069493, 'samples': 9176064, 'steps': 17921, 'loss/train': 2.7263717651367188} +02/25/2022 00:46:52 - INFO - codeparrot_training - Step 17922: {'lr': 0.00037610365031989524, 'samples': 9176576, 'steps': 17922, 'loss/train': 1.6838401556015015} +02/25/2022 00:46:58 - INFO - codeparrot_training - Step 17923: {'lr': 0.0003760895216926532, 'samples': 9177088, 'steps': 17923, 'loss/train': 2.4757680892944336} +02/25/2022 00:47:01 - INFO - codeparrot_training - Step 17924: {'lr': 0.0003760753925252838, 'samples': 9177600, 'steps': 17924, 'loss/train': 1.7989904880523682} +02/25/2022 00:47:07 - INFO - codeparrot_training - Step 17925: {'lr': 0.00037606126281784725, 'samples': 9178112, 'steps': 17925, 'loss/train': 1.7875397205352783} +02/25/2022 00:47:10 - INFO - codeparrot_training - Step 17926: {'lr': 0.0003760471325704045, 'samples': 9178624, 'steps': 17926, 'loss/train': 2.589329719543457} +02/25/2022 00:47:16 - INFO - codeparrot_training - Step 17927: {'lr': 0.0003760330017830157, 'samples': 9179136, 'steps': 17927, 'loss/train': 1.5632318258285522} +02/25/2022 00:47:19 - INFO - codeparrot_training - Step 17928: {'lr': 0.00037601887045574155, 'samples': 9179648, 'steps': 17928, 'loss/train': 2.014723300933838} +02/25/2022 00:47:25 - INFO - codeparrot_training - Step 17929: {'lr': 0.0003760047385886426, 'samples': 9180160, 'steps': 17929, 'loss/train': 2.1561315059661865} +02/25/2022 00:47:28 - INFO - codeparrot_training - Step 17930: {'lr': 0.0003759906061817794, 'samples': 9180672, 'steps': 17930, 'loss/train': 2.223832845687866} +02/25/2022 00:47:34 - INFO - codeparrot_training - Step 17931: {'lr': 0.00037597647323521234, 'samples': 9181184, 'steps': 17931, 'loss/train': 2.039451837539673} +02/25/2022 00:47:38 - INFO - codeparrot_training - Step 17932: {'lr': 0.0003759623397490022, 'samples': 9181696, 'steps': 17932, 'loss/train': 1.986750602722168} +02/25/2022 00:47:44 - INFO - codeparrot_training - Step 17933: {'lr': 0.00037594820572320933, 'samples': 9182208, 'steps': 17933, 'loss/train': 2.7664713859558105} +02/25/2022 00:47:48 - INFO - codeparrot_training - Step 17934: {'lr': 0.0003759340711578944, 'samples': 9182720, 'steps': 17934, 'loss/train': 1.9041202068328857} +02/25/2022 00:47:53 - INFO - codeparrot_training - Step 17935: {'lr': 0.0003759199360531178, 'samples': 9183232, 'steps': 17935, 'loss/train': 1.7459298372268677} +02/25/2022 00:47:57 - INFO - codeparrot_training - Step 17936: {'lr': 0.00037590580040894024, 'samples': 9183744, 'steps': 17936, 'loss/train': 2.398015260696411} +02/25/2022 00:48:02 - INFO - codeparrot_training - Step 17937: {'lr': 0.0003758916642254222, 'samples': 9184256, 'steps': 17937, 'loss/train': 2.96384334564209} +02/25/2022 00:48:05 - INFO - codeparrot_training - Step 17938: {'lr': 0.00037587752750262426, 'samples': 9184768, 'steps': 17938, 'loss/train': 2.4360480308532715} +02/25/2022 00:48:11 - INFO - codeparrot_training - Step 17939: {'lr': 0.00037586339024060696, 'samples': 9185280, 'steps': 17939, 'loss/train': 2.36801815032959} +02/25/2022 00:48:15 - INFO - codeparrot_training - Step 17940: {'lr': 0.0003758492524394308, 'samples': 9185792, 'steps': 17940, 'loss/train': 0.4478470981121063} +02/25/2022 00:48:20 - INFO - codeparrot_training - Step 17941: {'lr': 0.0003758351140991565, 'samples': 9186304, 'steps': 17941, 'loss/train': 1.4435036182403564} +02/25/2022 00:48:24 - INFO - codeparrot_training - Step 17942: {'lr': 0.0003758209752198444, 'samples': 9186816, 'steps': 17942, 'loss/train': 1.9797041416168213} +02/25/2022 00:48:29 - INFO - codeparrot_training - Step 17943: {'lr': 0.0003758068358015553, 'samples': 9187328, 'steps': 17943, 'loss/train': 1.366119146347046} +02/25/2022 00:48:33 - INFO - codeparrot_training - Step 17944: {'lr': 0.0003757926958443496, 'samples': 9187840, 'steps': 17944, 'loss/train': 2.4365100860595703} +02/25/2022 00:48:39 - INFO - codeparrot_training - Step 17945: {'lr': 0.000375778555348288, 'samples': 9188352, 'steps': 17945, 'loss/train': 1.6386650800704956} +02/25/2022 00:48:42 - INFO - codeparrot_training - Step 17946: {'lr': 0.000375764414313431, 'samples': 9188864, 'steps': 17946, 'loss/train': 1.8946306705474854} +02/25/2022 00:48:48 - INFO - codeparrot_training - Step 17947: {'lr': 0.0003757502727398391, 'samples': 9189376, 'steps': 17947, 'loss/train': 2.3807075023651123} +02/25/2022 00:48:51 - INFO - codeparrot_training - Step 17948: {'lr': 0.00037573613062757304, 'samples': 9189888, 'steps': 17948, 'loss/train': 1.8239479064941406} +02/25/2022 00:48:57 - INFO - codeparrot_training - Step 17949: {'lr': 0.0003757219879766933, 'samples': 9190400, 'steps': 17949, 'loss/train': 1.410290002822876} +02/25/2022 00:49:02 - INFO - codeparrot_training - Step 17950: {'lr': 0.00037570784478726057, 'samples': 9190912, 'steps': 17950, 'loss/train': 1.792197346687317} +02/25/2022 00:49:06 - INFO - codeparrot_training - Step 17951: {'lr': 0.00037569370105933523, 'samples': 9191424, 'steps': 17951, 'loss/train': 2.7010374069213867} +02/25/2022 00:49:11 - INFO - codeparrot_training - Step 17952: {'lr': 0.00037567955679297806, 'samples': 9191936, 'steps': 17952, 'loss/train': 1.2868130207061768} +02/25/2022 00:49:15 - INFO - codeparrot_training - Step 17953: {'lr': 0.0003756654119882496, 'samples': 9192448, 'steps': 17953, 'loss/train': 2.4192488193511963} +02/25/2022 00:49:18 - INFO - codeparrot_training - Step 17954: {'lr': 0.0003756512666452103, 'samples': 9192960, 'steps': 17954, 'loss/train': 1.679328203201294} +02/25/2022 00:49:25 - INFO - codeparrot_training - Step 17955: {'lr': 0.0003756371207639209, 'samples': 9193472, 'steps': 17955, 'loss/train': 1.953917145729065} +02/25/2022 00:49:28 - INFO - codeparrot_training - Step 17956: {'lr': 0.00037562297434444203, 'samples': 9193984, 'steps': 17956, 'loss/train': 2.0183377265930176} +02/25/2022 00:49:34 - INFO - codeparrot_training - Step 17957: {'lr': 0.0003756088273868342, 'samples': 9194496, 'steps': 17957, 'loss/train': 2.3318753242492676} +02/25/2022 00:49:37 - INFO - codeparrot_training - Step 17958: {'lr': 0.00037559467989115806, 'samples': 9195008, 'steps': 17958, 'loss/train': 1.780102252960205} +02/25/2022 00:49:43 - INFO - codeparrot_training - Step 17959: {'lr': 0.00037558053185747416, 'samples': 9195520, 'steps': 17959, 'loss/train': 2.6560707092285156} +02/25/2022 00:49:48 - INFO - codeparrot_training - Step 17960: {'lr': 0.00037556638328584314, 'samples': 9196032, 'steps': 17960, 'loss/train': 1.8643437623977661} +02/25/2022 00:49:52 - INFO - codeparrot_training - Step 17961: {'lr': 0.00037555223417632565, 'samples': 9196544, 'steps': 17961, 'loss/train': 1.6963603496551514} +02/25/2022 00:49:57 - INFO - codeparrot_training - Step 17962: {'lr': 0.0003755380845289822, 'samples': 9197056, 'steps': 17962, 'loss/train': 1.8083080053329468} +02/25/2022 00:50:01 - INFO - codeparrot_training - Step 17963: {'lr': 0.0003755239343438735, 'samples': 9197568, 'steps': 17963, 'loss/train': 2.3275163173675537} +02/25/2022 00:50:07 - INFO - codeparrot_training - Step 17964: {'lr': 0.00037550978362106, 'samples': 9198080, 'steps': 17964, 'loss/train': 1.2991315126419067} +02/25/2022 00:50:10 - INFO - codeparrot_training - Step 17965: {'lr': 0.0003754956323606026, 'samples': 9198592, 'steps': 17965, 'loss/train': 1.908624291419983} +02/25/2022 00:50:16 - INFO - codeparrot_training - Step 17966: {'lr': 0.0003754814805625617, 'samples': 9199104, 'steps': 17966, 'loss/train': 2.5921530723571777} +02/25/2022 00:50:20 - INFO - codeparrot_training - Step 17967: {'lr': 0.00037546732822699803, 'samples': 9199616, 'steps': 17967, 'loss/train': 2.1310462951660156} +02/25/2022 00:50:25 - INFO - codeparrot_training - Step 17968: {'lr': 0.0003754531753539721, 'samples': 9200128, 'steps': 17968, 'loss/train': 1.7575037479400635} +02/25/2022 00:50:29 - INFO - codeparrot_training - Step 17969: {'lr': 0.0003754390219435446, 'samples': 9200640, 'steps': 17969, 'loss/train': 2.1864471435546875} +02/25/2022 00:50:34 - INFO - codeparrot_training - Step 17970: {'lr': 0.00037542486799577624, 'samples': 9201152, 'steps': 17970, 'loss/train': 0.6265259981155396} +02/25/2022 00:50:38 - INFO - codeparrot_training - Step 17971: {'lr': 0.00037541071351072746, 'samples': 9201664, 'steps': 17971, 'loss/train': 1.3466262817382812} +02/25/2022 00:50:43 - INFO - codeparrot_training - Step 17972: {'lr': 0.0003753965584884591, 'samples': 9202176, 'steps': 17972, 'loss/train': 1.4059536457061768} +02/25/2022 00:50:47 - INFO - codeparrot_training - Step 17973: {'lr': 0.00037538240292903167, 'samples': 9202688, 'steps': 17973, 'loss/train': 1.6436214447021484} +02/25/2022 00:50:52 - INFO - codeparrot_training - Step 17974: {'lr': 0.0003753682468325059, 'samples': 9203200, 'steps': 17974, 'loss/train': 3.1578054428100586} +02/25/2022 00:50:56 - INFO - codeparrot_training - Step 17975: {'lr': 0.0003753540901989422, 'samples': 9203712, 'steps': 17975, 'loss/train': 1.153119683265686} +02/25/2022 00:51:01 - INFO - codeparrot_training - Step 17976: {'lr': 0.00037533993302840153, 'samples': 9204224, 'steps': 17976, 'loss/train': 1.9482473134994507} +02/25/2022 00:51:05 - INFO - codeparrot_training - Step 17977: {'lr': 0.00037532577532094436, 'samples': 9204736, 'steps': 17977, 'loss/train': 2.373196840286255} +02/25/2022 00:51:11 - INFO - codeparrot_training - Step 17978: {'lr': 0.00037531161707663136, 'samples': 9205248, 'steps': 17978, 'loss/train': 2.9937961101531982} +02/25/2022 00:51:14 - INFO - codeparrot_training - Step 17979: {'lr': 0.0003752974582955232, 'samples': 9205760, 'steps': 17979, 'loss/train': 1.9035017490386963} +02/25/2022 00:51:20 - INFO - codeparrot_training - Step 17980: {'lr': 0.0003752832989776804, 'samples': 9206272, 'steps': 17980, 'loss/train': 2.5906925201416016} +02/25/2022 00:51:23 - INFO - codeparrot_training - Step 17981: {'lr': 0.0003752691391231639, 'samples': 9206784, 'steps': 17981, 'loss/train': 2.4567105770111084} +02/25/2022 00:51:29 - INFO - codeparrot_training - Step 17982: {'lr': 0.00037525497873203405, 'samples': 9207296, 'steps': 17982, 'loss/train': 2.204061985015869} +02/25/2022 00:51:32 - INFO - codeparrot_training - Step 17983: {'lr': 0.0003752408178043518, 'samples': 9207808, 'steps': 17983, 'loss/train': 1.612808108329773} +02/25/2022 00:51:38 - INFO - codeparrot_training - Step 17984: {'lr': 0.0003752266563401775, 'samples': 9208320, 'steps': 17984, 'loss/train': 1.8292522430419922} +02/25/2022 00:51:42 - INFO - codeparrot_training - Step 17985: {'lr': 0.00037521249433957203, 'samples': 9208832, 'steps': 17985, 'loss/train': 1.753325343132019} +02/25/2022 00:51:47 - INFO - codeparrot_training - Step 17986: {'lr': 0.000375198331802596, 'samples': 9209344, 'steps': 17986, 'loss/train': 0.6351202130317688} +02/25/2022 00:51:50 - INFO - codeparrot_training - Step 17987: {'lr': 0.00037518416872931007, 'samples': 9209856, 'steps': 17987, 'loss/train': 2.168978691101074} +02/25/2022 00:51:56 - INFO - codeparrot_training - Step 17988: {'lr': 0.00037517000511977486, 'samples': 9210368, 'steps': 17988, 'loss/train': 1.2864890098571777} +02/25/2022 00:51:59 - INFO - codeparrot_training - Step 17989: {'lr': 0.00037515584097405115, 'samples': 9210880, 'steps': 17989, 'loss/train': 3.4598395824432373} +02/25/2022 00:52:06 - INFO - codeparrot_training - Step 17990: {'lr': 0.00037514167629219955, 'samples': 9211392, 'steps': 17990, 'loss/train': 1.6512317657470703} +02/25/2022 00:52:09 - INFO - codeparrot_training - Step 17991: {'lr': 0.0003751275110742807, 'samples': 9211904, 'steps': 17991, 'loss/train': 2.2843728065490723} +02/25/2022 00:52:15 - INFO - codeparrot_training - Step 17992: {'lr': 0.00037511334532035537, 'samples': 9212416, 'steps': 17992, 'loss/train': 0.32034048438072205} +02/25/2022 00:52:18 - INFO - codeparrot_training - Step 17993: {'lr': 0.00037509917903048417, 'samples': 9212928, 'steps': 17993, 'loss/train': 2.830625295639038} +02/25/2022 00:52:24 - INFO - codeparrot_training - Step 17994: {'lr': 0.00037508501220472783, 'samples': 9213440, 'steps': 17994, 'loss/train': 3.263338565826416} +02/25/2022 00:52:27 - INFO - codeparrot_training - Step 17995: {'lr': 0.000375070844843147, 'samples': 9213952, 'steps': 17995, 'loss/train': 0.6421509981155396} +02/25/2022 00:52:33 - INFO - codeparrot_training - Step 17996: {'lr': 0.00037505667694580244, 'samples': 9214464, 'steps': 17996, 'loss/train': 2.5737671852111816} +02/25/2022 00:52:36 - INFO - codeparrot_training - Step 17997: {'lr': 0.00037504250851275466, 'samples': 9214976, 'steps': 17997, 'loss/train': 1.7033635377883911} +02/25/2022 00:52:42 - INFO - codeparrot_training - Step 17998: {'lr': 0.0003750283395440647, 'samples': 9215488, 'steps': 17998, 'loss/train': 1.7467429637908936} +02/25/2022 00:52:45 - INFO - codeparrot_training - Step 17999: {'lr': 0.0003750141700397928, 'samples': 9216000, 'steps': 17999, 'loss/train': 1.3792665004730225} +02/25/2022 00:52:45 - INFO - codeparrot_training - Evaluating and saving model checkpoint