diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -18409,3 +18409,1009 @@ Use FP16 precision: False 02/25/2022 00:52:42 - INFO - codeparrot_training - Step 17998: {'lr': 0.0003750283395440647, 'samples': 9215488, 'steps': 17998, 'loss/train': 1.7467429637908936} 02/25/2022 00:52:45 - INFO - codeparrot_training - Step 17999: {'lr': 0.0003750141700397928, 'samples': 9216000, 'steps': 17999, 'loss/train': 1.3792665004730225} 02/25/2022 00:52:45 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/25/2022 00:53:02 - WARNING - huggingface_hub.repository - Several commits (18) will be pushed upstream. +02/25/2022 00:53:02 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/25/2022 00:53:36 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + 4fad2a5..4d2b52a floral-grass-11 -> floral-grass-11 + +02/25/2022 00:53:44 - INFO - codeparrot_training - Step 18000: {'lr': 0.000375, 'samples': 9216512, 'steps': 18000, 'loss/train': 2.864016532897949} +02/25/2022 00:53:47 - INFO - codeparrot_training - Step 18001: {'lr': 0.0003749858294247469, 'samples': 9217024, 'steps': 18001, 'loss/train': 0.5618553757667542} +02/25/2022 00:53:53 - INFO - codeparrot_training - Step 18002: {'lr': 0.0003749716583140942, 'samples': 9217536, 'steps': 18002, 'loss/train': 1.8189059495925903} +02/25/2022 00:53:56 - INFO - codeparrot_training - Step 18003: {'lr': 0.00037495748666810255, 'samples': 9218048, 'steps': 18003, 'loss/train': 2.4676153659820557} +02/25/2022 00:54:02 - INFO - codeparrot_training - Step 18004: {'lr': 0.0003749433144868327, 'samples': 9218560, 'steps': 18004, 'loss/train': 2.285482406616211} +02/25/2022 00:54:05 - INFO - codeparrot_training - Step 18005: {'lr': 0.00037492914177034544, 'samples': 9219072, 'steps': 18005, 'loss/train': 0.5833332538604736} +02/25/2022 00:54:11 - INFO - codeparrot_training - Step 18006: {'lr': 0.00037491496851870134, 'samples': 9219584, 'steps': 18006, 'loss/train': 2.044358491897583} +02/25/2022 00:54:14 - INFO - codeparrot_training - Step 18007: {'lr': 0.0003749007947319612, 'samples': 9220096, 'steps': 18007, 'loss/train': 1.815633773803711} +02/25/2022 00:54:20 - INFO - codeparrot_training - Step 18008: {'lr': 0.00037488662041018574, 'samples': 9220608, 'steps': 18008, 'loss/train': 1.6437115669250488} +02/25/2022 00:54:23 - INFO - codeparrot_training - Step 18009: {'lr': 0.0003748724455534356, 'samples': 9221120, 'steps': 18009, 'loss/train': 1.8296072483062744} +02/25/2022 00:54:30 - INFO - codeparrot_training - Step 18010: {'lr': 0.0003748582701617716, 'samples': 9221632, 'steps': 18010, 'loss/train': 0.8397614359855652} +02/25/2022 00:54:33 - INFO - codeparrot_training - Step 18011: {'lr': 0.00037484409423525446, 'samples': 9222144, 'steps': 18011, 'loss/train': 2.213873863220215} +02/25/2022 00:54:39 - INFO - codeparrot_training - Step 18012: {'lr': 0.00037482991777394476, 'samples': 9222656, 'steps': 18012, 'loss/train': 1.686563491821289} +02/25/2022 00:54:42 - INFO - codeparrot_training - Step 18013: {'lr': 0.00037481574077790334, 'samples': 9223168, 'steps': 18013, 'loss/train': 2.7907283306121826} +02/25/2022 00:54:48 - INFO - codeparrot_training - Step 18014: {'lr': 0.00037480156324719093, 'samples': 9223680, 'steps': 18014, 'loss/train': 1.6545108556747437} +02/25/2022 00:54:51 - INFO - codeparrot_training - Step 18015: {'lr': 0.00037478738518186835, 'samples': 9224192, 'steps': 18015, 'loss/train': 2.19451904296875} +02/25/2022 00:54:56 - INFO - codeparrot_training - Step 18016: {'lr': 0.00037477320658199615, 'samples': 9224704, 'steps': 18016, 'loss/train': 1.6612696647644043} +02/25/2022 00:55:00 - INFO - codeparrot_training - Step 18017: {'lr': 0.0003747590274476351, 'samples': 9225216, 'steps': 18017, 'loss/train': 1.947067141532898} +02/25/2022 00:55:05 - INFO - codeparrot_training - Step 18018: {'lr': 0.0003747448477788461, 'samples': 9225728, 'steps': 18018, 'loss/train': 1.3791682720184326} +02/25/2022 00:55:09 - INFO - codeparrot_training - Step 18019: {'lr': 0.0003747306675756898, 'samples': 9226240, 'steps': 18019, 'loss/train': 2.568746328353882} +02/25/2022 00:55:15 - INFO - codeparrot_training - Step 18020: {'lr': 0.00037471648683822683, 'samples': 9226752, 'steps': 18020, 'loss/train': 2.169994831085205} +02/25/2022 00:55:18 - INFO - codeparrot_training - Step 18021: {'lr': 0.00037470230556651814, 'samples': 9227264, 'steps': 18021, 'loss/train': 2.9739251136779785} +02/25/2022 00:55:23 - INFO - codeparrot_training - Step 18022: {'lr': 0.00037468812376062423, 'samples': 9227776, 'steps': 18022, 'loss/train': 0.4943702220916748} +02/25/2022 00:55:27 - INFO - codeparrot_training - Step 18023: {'lr': 0.00037467394142060614, 'samples': 9228288, 'steps': 18023, 'loss/train': 0.43165501952171326} +02/25/2022 00:55:32 - INFO - codeparrot_training - Step 18024: {'lr': 0.0003746597585465243, 'samples': 9228800, 'steps': 18024, 'loss/train': 2.010676145553589} +02/25/2022 00:55:36 - INFO - codeparrot_training - Step 18025: {'lr': 0.00037464557513843975, 'samples': 9229312, 'steps': 18025, 'loss/train': 1.948112964630127} +02/25/2022 00:55:42 - INFO - codeparrot_training - Step 18026: {'lr': 0.0003746313911964132, 'samples': 9229824, 'steps': 18026, 'loss/train': 1.3395642042160034} +02/25/2022 00:55:46 - INFO - codeparrot_training - Step 18027: {'lr': 0.00037461720672050524, 'samples': 9230336, 'steps': 18027, 'loss/train': 0.8749566078186035} +02/25/2022 00:55:51 - INFO - codeparrot_training - Step 18028: {'lr': 0.0003746030217107768, 'samples': 9230848, 'steps': 18028, 'loss/train': 2.4887499809265137} +02/25/2022 00:55:55 - INFO - codeparrot_training - Step 18029: {'lr': 0.0003745888361672885, 'samples': 9231360, 'steps': 18029, 'loss/train': 0.6735855340957642} +02/25/2022 00:56:00 - INFO - codeparrot_training - Step 18030: {'lr': 0.00037457465009010124, 'samples': 9231872, 'steps': 18030, 'loss/train': 2.5045032501220703} +02/25/2022 00:56:04 - INFO - codeparrot_training - Step 18031: {'lr': 0.00037456046347927576, 'samples': 9232384, 'steps': 18031, 'loss/train': 2.466980457305908} +02/25/2022 00:56:09 - INFO - codeparrot_training - Step 18032: {'lr': 0.0003745462763348727, 'samples': 9232896, 'steps': 18032, 'loss/train': 1.8540087938308716} +02/25/2022 00:56:13 - INFO - codeparrot_training - Step 18033: {'lr': 0.00037453208865695305, 'samples': 9233408, 'steps': 18033, 'loss/train': 1.4267619848251343} +02/25/2022 00:56:18 - INFO - codeparrot_training - Step 18034: {'lr': 0.0003745179004455774, 'samples': 9233920, 'steps': 18034, 'loss/train': 1.6516650915145874} +02/25/2022 00:56:22 - INFO - codeparrot_training - Step 18035: {'lr': 0.00037450371170080673, 'samples': 9234432, 'steps': 18035, 'loss/train': 2.1614670753479004} +02/25/2022 00:56:28 - INFO - codeparrot_training - Step 18036: {'lr': 0.00037448952242270155, 'samples': 9234944, 'steps': 18036, 'loss/train': 2.139948844909668} +02/25/2022 00:56:31 - INFO - codeparrot_training - Step 18037: {'lr': 0.0003744753326113229, 'samples': 9235456, 'steps': 18037, 'loss/train': 2.0563716888427734} +02/25/2022 00:56:37 - INFO - codeparrot_training - Step 18038: {'lr': 0.00037446114226673136, 'samples': 9235968, 'steps': 18038, 'loss/train': 1.928979754447937} +02/25/2022 00:56:42 - INFO - codeparrot_training - Step 18039: {'lr': 0.00037444695138898784, 'samples': 9236480, 'steps': 18039, 'loss/train': 3.129032850265503} +02/25/2022 00:56:46 - INFO - codeparrot_training - Step 18040: {'lr': 0.00037443275997815306, 'samples': 9236992, 'steps': 18040, 'loss/train': 1.6235578060150146} +02/25/2022 00:56:51 - INFO - codeparrot_training - Step 18041: {'lr': 0.00037441856803428785, 'samples': 9237504, 'steps': 18041, 'loss/train': 1.7870166301727295} +02/25/2022 00:56:55 - INFO - codeparrot_training - Step 18042: {'lr': 0.0003744043755574531, 'samples': 9238016, 'steps': 18042, 'loss/train': 2.188650608062744} +02/25/2022 00:57:00 - INFO - codeparrot_training - Step 18043: {'lr': 0.0003743901825477094, 'samples': 9238528, 'steps': 18043, 'loss/train': 1.6643199920654297} +02/25/2022 00:57:04 - INFO - codeparrot_training - Step 18044: {'lr': 0.0003743759890051177, 'samples': 9239040, 'steps': 18044, 'loss/train': 0.06709722429513931} +02/25/2022 00:57:10 - INFO - codeparrot_training - Step 18045: {'lr': 0.00037436179492973876, 'samples': 9239552, 'steps': 18045, 'loss/train': 1.8383413553237915} +02/25/2022 00:57:14 - INFO - codeparrot_training - Step 18046: {'lr': 0.0003743476003216334, 'samples': 9240064, 'steps': 18046, 'loss/train': 1.3675178289413452} +02/25/2022 00:57:19 - INFO - codeparrot_training - Step 18047: {'lr': 0.00037433340518086235, 'samples': 9240576, 'steps': 18047, 'loss/train': 2.4452977180480957} +02/25/2022 00:57:22 - INFO - codeparrot_training - Step 18048: {'lr': 0.0003743192095074866, 'samples': 9241088, 'steps': 18048, 'loss/train': 0.7475156188011169} +02/25/2022 00:57:28 - INFO - codeparrot_training - Step 18049: {'lr': 0.0003743050133015666, 'samples': 9241600, 'steps': 18049, 'loss/train': 1.7063541412353516} +02/25/2022 00:57:31 - INFO - codeparrot_training - Step 18050: {'lr': 0.0003742908165631636, 'samples': 9242112, 'steps': 18050, 'loss/train': 1.7238842248916626} +02/25/2022 00:57:37 - INFO - codeparrot_training - Step 18051: {'lr': 0.00037427661929233816, 'samples': 9242624, 'steps': 18051, 'loss/train': 1.30217707157135} +02/25/2022 00:57:40 - INFO - codeparrot_training - Step 18052: {'lr': 0.00037426242148915113, 'samples': 9243136, 'steps': 18052, 'loss/train': 2.37275767326355} +02/25/2022 00:57:46 - INFO - codeparrot_training - Step 18053: {'lr': 0.0003742482231536633, 'samples': 9243648, 'steps': 18053, 'loss/train': 2.9769768714904785} +02/25/2022 00:57:50 - INFO - codeparrot_training - Step 18054: {'lr': 0.00037423402428593555, 'samples': 9244160, 'steps': 18054, 'loss/train': 2.0993216037750244} +02/25/2022 00:57:56 - INFO - codeparrot_training - Step 18055: {'lr': 0.00037421982488602875, 'samples': 9244672, 'steps': 18055, 'loss/train': 0.5246959328651428} +02/25/2022 00:57:59 - INFO - codeparrot_training - Step 18056: {'lr': 0.0003742056249540036, 'samples': 9245184, 'steps': 18056, 'loss/train': 0.6965020895004272} +02/25/2022 00:58:05 - INFO - codeparrot_training - Step 18057: {'lr': 0.00037419142448992103, 'samples': 9245696, 'steps': 18057, 'loss/train': 1.2578572034835815} +02/25/2022 00:58:08 - INFO - codeparrot_training - Step 18058: {'lr': 0.0003741772234938418, 'samples': 9246208, 'steps': 18058, 'loss/train': 1.4348853826522827} +02/25/2022 00:58:14 - INFO - codeparrot_training - Step 18059: {'lr': 0.00037416302196582684, 'samples': 9246720, 'steps': 18059, 'loss/train': 2.3590095043182373} +02/25/2022 00:58:17 - INFO - codeparrot_training - Step 18060: {'lr': 0.00037414881990593686, 'samples': 9247232, 'steps': 18060, 'loss/train': 2.1980643272399902} +02/25/2022 00:58:23 - INFO - codeparrot_training - Step 18061: {'lr': 0.0003741346173142328, 'samples': 9247744, 'steps': 18061, 'loss/train': 1.9618237018585205} +02/25/2022 00:58:26 - INFO - codeparrot_training - Step 18062: {'lr': 0.0003741204141907754, 'samples': 9248256, 'steps': 18062, 'loss/train': 1.7716025114059448} +02/25/2022 00:58:32 - INFO - codeparrot_training - Step 18063: {'lr': 0.00037410621053562563, 'samples': 9248768, 'steps': 18063, 'loss/train': 1.532081127166748} +02/25/2022 00:58:35 - INFO - codeparrot_training - Step 18064: {'lr': 0.0003740920063488442, 'samples': 9249280, 'steps': 18064, 'loss/train': 0.5840127468109131} +02/25/2022 00:58:41 - INFO - codeparrot_training - Step 18065: {'lr': 0.0003740778016304921, 'samples': 9249792, 'steps': 18065, 'loss/train': 2.6977975368499756} +02/25/2022 00:58:44 - INFO - codeparrot_training - Step 18066: {'lr': 0.00037406359638063005, 'samples': 9250304, 'steps': 18066, 'loss/train': 1.6146961450576782} +02/25/2022 00:58:50 - INFO - codeparrot_training - Step 18067: {'lr': 0.00037404939059931884, 'samples': 9250816, 'steps': 18067, 'loss/train': 2.1225221157073975} +02/25/2022 00:58:53 - INFO - codeparrot_training - Step 18068: {'lr': 0.0003740351842866196, 'samples': 9251328, 'steps': 18068, 'loss/train': 1.1155716180801392} +02/25/2022 00:58:59 - INFO - codeparrot_training - Step 18069: {'lr': 0.0003740209774425929, 'samples': 9251840, 'steps': 18069, 'loss/train': 1.7514163255691528} +02/25/2022 00:59:02 - INFO - codeparrot_training - Step 18070: {'lr': 0.00037400677006729977, 'samples': 9252352, 'steps': 18070, 'loss/train': 2.209989547729492} +02/25/2022 00:59:09 - INFO - codeparrot_training - Step 18071: {'lr': 0.000373992562160801, 'samples': 9252864, 'steps': 18071, 'loss/train': 2.072781562805176} +02/25/2022 00:59:12 - INFO - codeparrot_training - Step 18072: {'lr': 0.0003739783537231575, 'samples': 9253376, 'steps': 18072, 'loss/train': 3.9846749305725098} +02/25/2022 00:59:18 - INFO - codeparrot_training - Step 18073: {'lr': 0.0003739641447544301, 'samples': 9253888, 'steps': 18073, 'loss/train': 0.84217768907547} +02/25/2022 00:59:21 - INFO - codeparrot_training - Step 18074: {'lr': 0.0003739499352546796, 'samples': 9254400, 'steps': 18074, 'loss/train': 2.2811882495880127} +02/25/2022 00:59:27 - INFO - codeparrot_training - Step 18075: {'lr': 0.00037393572522396697, 'samples': 9254912, 'steps': 18075, 'loss/train': 9.352462768554688} +02/25/2022 00:59:30 - INFO - codeparrot_training - Step 18076: {'lr': 0.00037392151466235303, 'samples': 9255424, 'steps': 18076, 'loss/train': 2.6928279399871826} +02/25/2022 00:59:36 - INFO - codeparrot_training - Step 18077: {'lr': 0.00037390730356989864, 'samples': 9255936, 'steps': 18077, 'loss/train': 1.6681896448135376} +02/25/2022 00:59:39 - INFO - codeparrot_training - Step 18078: {'lr': 0.0003738930919466648, 'samples': 9256448, 'steps': 18078, 'loss/train': 2.294260025024414} +02/25/2022 00:59:45 - INFO - codeparrot_training - Step 18079: {'lr': 0.0003738788797927122, 'samples': 9256960, 'steps': 18079, 'loss/train': 1.343259334564209} +02/25/2022 00:59:48 - INFO - codeparrot_training - Step 18080: {'lr': 0.0003738646671081019, 'samples': 9257472, 'steps': 18080, 'loss/train': 1.635291576385498} +02/25/2022 00:59:54 - INFO - codeparrot_training - Step 18081: {'lr': 0.0003738504538928947, 'samples': 9257984, 'steps': 18081, 'loss/train': 1.811547040939331} +02/25/2022 00:59:57 - INFO - codeparrot_training - Step 18082: {'lr': 0.0003738362401471514, 'samples': 9258496, 'steps': 18082, 'loss/train': 1.9558956623077393} +02/25/2022 01:00:03 - INFO - codeparrot_training - Step 18083: {'lr': 0.00037382202587093305, 'samples': 9259008, 'steps': 18083, 'loss/train': 1.6220651865005493} +02/25/2022 01:00:06 - INFO - codeparrot_training - Step 18084: {'lr': 0.0003738078110643004, 'samples': 9259520, 'steps': 18084, 'loss/train': 1.7357509136199951} +02/25/2022 01:00:19 - INFO - codeparrot_training - Step 18085: {'lr': 0.0003737935957273144, 'samples': 9260032, 'steps': 18085, 'loss/train': 1.1062289476394653} +02/25/2022 01:00:22 - INFO - codeparrot_training - Step 18086: {'lr': 0.000373779379860036, 'samples': 9260544, 'steps': 18086, 'loss/train': 2.552191734313965} +02/25/2022 01:00:28 - INFO - codeparrot_training - Step 18087: {'lr': 0.000373765163462526, 'samples': 9261056, 'steps': 18087, 'loss/train': 1.418000340461731} +02/25/2022 01:00:31 - INFO - codeparrot_training - Step 18088: {'lr': 0.0003737509465348453, 'samples': 9261568, 'steps': 18088, 'loss/train': 1.6852855682373047} +02/25/2022 01:00:36 - INFO - codeparrot_training - Step 18089: {'lr': 0.0003737367290770549, 'samples': 9262080, 'steps': 18089, 'loss/train': 1.9217792749404907} +02/25/2022 01:00:40 - INFO - codeparrot_training - Step 18090: {'lr': 0.0003737225110892156, 'samples': 9262592, 'steps': 18090, 'loss/train': 0.3693554401397705} +02/25/2022 01:00:46 - INFO - codeparrot_training - Step 18091: {'lr': 0.0003737082925713884, 'samples': 9263104, 'steps': 18091, 'loss/train': 3.2785496711730957} +02/25/2022 01:00:49 - INFO - codeparrot_training - Step 18092: {'lr': 0.00037369407352363417, 'samples': 9263616, 'steps': 18092, 'loss/train': 1.668378472328186} +02/25/2022 01:00:55 - INFO - codeparrot_training - Step 18093: {'lr': 0.0003736798539460138, 'samples': 9264128, 'steps': 18093, 'loss/train': 2.0754849910736084} +02/25/2022 01:00:58 - INFO - codeparrot_training - Step 18094: {'lr': 0.00037366563383858814, 'samples': 9264640, 'steps': 18094, 'loss/train': 1.7872614860534668} +02/25/2022 01:01:06 - INFO - codeparrot_training - Step 18095: {'lr': 0.0003736514132014182, 'samples': 9265152, 'steps': 18095, 'loss/train': 1.4480342864990234} +02/25/2022 01:01:09 - INFO - codeparrot_training - Step 18096: {'lr': 0.0003736371920345649, 'samples': 9265664, 'steps': 18096, 'loss/train': 2.033862829208374} +02/25/2022 01:01:15 - INFO - codeparrot_training - Step 18097: {'lr': 0.0003736229703380891, 'samples': 9266176, 'steps': 18097, 'loss/train': 1.2324012517929077} +02/25/2022 01:01:18 - INFO - codeparrot_training - Step 18098: {'lr': 0.0003736087481120518, 'samples': 9266688, 'steps': 18098, 'loss/train': 2.1741888523101807} +02/25/2022 01:01:24 - INFO - codeparrot_training - Step 18099: {'lr': 0.0003735945253565138, 'samples': 9267200, 'steps': 18099, 'loss/train': 1.3801395893096924} +02/25/2022 01:01:27 - INFO - codeparrot_training - Step 18100: {'lr': 0.00037358030207153617, 'samples': 9267712, 'steps': 18100, 'loss/train': 2.210556983947754} +02/25/2022 01:01:33 - INFO - codeparrot_training - Step 18101: {'lr': 0.0003735660782571797, 'samples': 9268224, 'steps': 18101, 'loss/train': 2.1449480056762695} +02/25/2022 01:01:36 - INFO - codeparrot_training - Step 18102: {'lr': 0.00037355185391350546, 'samples': 9268736, 'steps': 18102, 'loss/train': 1.519236445426941} +02/25/2022 01:01:42 - INFO - codeparrot_training - Step 18103: {'lr': 0.00037353762904057425, 'samples': 9269248, 'steps': 18103, 'loss/train': 2.09230899810791} +02/25/2022 01:01:45 - INFO - codeparrot_training - Step 18104: {'lr': 0.00037352340363844706, 'samples': 9269760, 'steps': 18104, 'loss/train': 1.587156057357788} +02/25/2022 01:01:51 - INFO - codeparrot_training - Step 18105: {'lr': 0.0003735091777071849, 'samples': 9270272, 'steps': 18105, 'loss/train': 1.9107987880706787} +02/25/2022 01:01:54 - INFO - codeparrot_training - Step 18106: {'lr': 0.0003734949512468486, 'samples': 9270784, 'steps': 18106, 'loss/train': 1.8355761766433716} +02/25/2022 01:02:02 - INFO - codeparrot_training - Step 18107: {'lr': 0.0003734807242574991, 'samples': 9271296, 'steps': 18107, 'loss/train': 2.575145721435547} +02/25/2022 01:02:05 - INFO - codeparrot_training - Step 18108: {'lr': 0.0003734664967391975, 'samples': 9271808, 'steps': 18108, 'loss/train': 2.6048264503479004} +02/25/2022 01:02:10 - INFO - codeparrot_training - Step 18109: {'lr': 0.0003734522686920045, 'samples': 9272320, 'steps': 18109, 'loss/train': 1.0448098182678223} +02/25/2022 01:02:14 - INFO - codeparrot_training - Step 18110: {'lr': 0.00037343804011598125, 'samples': 9272832, 'steps': 18110, 'loss/train': 2.380887508392334} +02/25/2022 01:02:20 - INFO - codeparrot_training - Step 18111: {'lr': 0.0003734238110111886, 'samples': 9273344, 'steps': 18111, 'loss/train': 2.5484042167663574} +02/25/2022 01:02:23 - INFO - codeparrot_training - Step 18112: {'lr': 0.0003734095813776876, 'samples': 9273856, 'steps': 18112, 'loss/train': 1.769220232963562} +02/25/2022 01:02:28 - INFO - codeparrot_training - Step 18113: {'lr': 0.00037339535121553907, 'samples': 9274368, 'steps': 18113, 'loss/train': 1.68701171875} +02/25/2022 01:02:32 - INFO - codeparrot_training - Step 18114: {'lr': 0.0003733811205248041, 'samples': 9274880, 'steps': 18114, 'loss/train': 1.1715240478515625} +02/25/2022 01:02:37 - INFO - codeparrot_training - Step 18115: {'lr': 0.0003733668893055435, 'samples': 9275392, 'steps': 18115, 'loss/train': 1.523885726928711} +02/25/2022 01:02:41 - INFO - codeparrot_training - Step 18116: {'lr': 0.00037335265755781844, 'samples': 9275904, 'steps': 18116, 'loss/train': 2.677492618560791} +02/25/2022 01:02:49 - INFO - codeparrot_training - Step 18117: {'lr': 0.0003733384252816897, 'samples': 9276416, 'steps': 18117, 'loss/train': 2.5146453380584717} +02/25/2022 01:02:52 - INFO - codeparrot_training - Step 18118: {'lr': 0.0003733241924772183, 'samples': 9276928, 'steps': 18118, 'loss/train': 2.061568260192871} +02/25/2022 01:02:58 - INFO - codeparrot_training - Step 18119: {'lr': 0.0003733099591444652, 'samples': 9277440, 'steps': 18119, 'loss/train': 2.324390172958374} +02/25/2022 01:03:01 - INFO - codeparrot_training - Step 18120: {'lr': 0.0003732957252834914, 'samples': 9277952, 'steps': 18120, 'loss/train': 2.0387895107269287} +02/25/2022 01:03:07 - INFO - codeparrot_training - Step 18121: {'lr': 0.0003732814908943579, 'samples': 9278464, 'steps': 18121, 'loss/train': 2.0450620651245117} +02/25/2022 01:03:10 - INFO - codeparrot_training - Step 18122: {'lr': 0.0003732672559771256, 'samples': 9278976, 'steps': 18122, 'loss/train': 2.033639907836914} +02/25/2022 01:03:16 - INFO - codeparrot_training - Step 18123: {'lr': 0.00037325302053185555, 'samples': 9279488, 'steps': 18123, 'loss/train': 1.8048869371414185} +02/25/2022 01:03:19 - INFO - codeparrot_training - Step 18124: {'lr': 0.0003732387845586087, 'samples': 9280000, 'steps': 18124, 'loss/train': 2.6541860103607178} +02/25/2022 01:03:25 - INFO - codeparrot_training - Step 18125: {'lr': 0.00037322454805744607, 'samples': 9280512, 'steps': 18125, 'loss/train': 1.932218313217163} +02/25/2022 01:03:28 - INFO - codeparrot_training - Step 18126: {'lr': 0.0003732103110284285, 'samples': 9281024, 'steps': 18126, 'loss/train': 2.185729503631592} +02/25/2022 01:03:34 - INFO - codeparrot_training - Step 18127: {'lr': 0.00037319607347161715, 'samples': 9281536, 'steps': 18127, 'loss/train': 2.0502755641937256} +02/25/2022 01:03:37 - INFO - codeparrot_training - Step 18128: {'lr': 0.0003731818353870729, 'samples': 9282048, 'steps': 18128, 'loss/train': 1.6255725622177124} +02/25/2022 01:03:43 - INFO - codeparrot_training - Step 18129: {'lr': 0.0003731675967748568, 'samples': 9282560, 'steps': 18129, 'loss/train': 1.9743597507476807} +02/25/2022 01:03:46 - INFO - codeparrot_training - Step 18130: {'lr': 0.0003731533576350298, 'samples': 9283072, 'steps': 18130, 'loss/train': 2.2525885105133057} +02/25/2022 01:03:53 - INFO - codeparrot_training - Step 18131: {'lr': 0.000373139117967653, 'samples': 9283584, 'steps': 18131, 'loss/train': 1.9093345403671265} +02/25/2022 01:03:57 - INFO - codeparrot_training - Step 18132: {'lr': 0.00037312487777278725, 'samples': 9284096, 'steps': 18132, 'loss/train': 1.726583480834961} +02/25/2022 01:04:03 - INFO - codeparrot_training - Step 18133: {'lr': 0.00037311063705049364, 'samples': 9284608, 'steps': 18133, 'loss/train': 1.5134916305541992} +02/25/2022 01:04:06 - INFO - codeparrot_training - Step 18134: {'lr': 0.00037309639580083314, 'samples': 9285120, 'steps': 18134, 'loss/train': 0.827488899230957} +02/25/2022 01:04:12 - INFO - codeparrot_training - Step 18135: {'lr': 0.0003730821540238668, 'samples': 9285632, 'steps': 18135, 'loss/train': 2.3526413440704346} +02/25/2022 01:04:15 - INFO - codeparrot_training - Step 18136: {'lr': 0.0003730679117196556, 'samples': 9286144, 'steps': 18136, 'loss/train': 1.6611604690551758} +02/25/2022 01:04:21 - INFO - codeparrot_training - Step 18137: {'lr': 0.0003730536688882605, 'samples': 9286656, 'steps': 18137, 'loss/train': 2.226381778717041} +02/25/2022 01:04:25 - INFO - codeparrot_training - Step 18138: {'lr': 0.00037303942552974255, 'samples': 9287168, 'steps': 18138, 'loss/train': 2.1728363037109375} +02/25/2022 01:04:30 - INFO - codeparrot_training - Step 18139: {'lr': 0.0003730251816441628, 'samples': 9287680, 'steps': 18139, 'loss/train': 1.7336833477020264} +02/25/2022 01:04:34 - INFO - codeparrot_training - Step 18140: {'lr': 0.00037301093723158223, 'samples': 9288192, 'steps': 18140, 'loss/train': 1.9051004648208618} +02/25/2022 01:04:39 - INFO - codeparrot_training - Step 18141: {'lr': 0.0003729966922920619, 'samples': 9288704, 'steps': 18141, 'loss/train': 2.0433247089385986} +02/25/2022 01:04:43 - INFO - codeparrot_training - Step 18142: {'lr': 0.00037298244682566273, 'samples': 9289216, 'steps': 18142, 'loss/train': 3.299739360809326} +02/25/2022 01:04:50 - INFO - codeparrot_training - Step 18143: {'lr': 0.0003729682008324459, 'samples': 9289728, 'steps': 18143, 'loss/train': 2.7595059871673584} +02/25/2022 01:04:53 - INFO - codeparrot_training - Step 18144: {'lr': 0.00037295395431247223, 'samples': 9290240, 'steps': 18144, 'loss/train': 1.1415661573410034} +02/25/2022 01:04:59 - INFO - codeparrot_training - Step 18145: {'lr': 0.000372939707265803, 'samples': 9290752, 'steps': 18145, 'loss/train': 1.5083061456680298} +02/25/2022 01:05:02 - INFO - codeparrot_training - Step 18146: {'lr': 0.000372925459692499, 'samples': 9291264, 'steps': 18146, 'loss/train': 1.9867349863052368} +02/25/2022 01:05:08 - INFO - codeparrot_training - Step 18147: {'lr': 0.0003729112115926214, 'samples': 9291776, 'steps': 18147, 'loss/train': 2.9592525959014893} +02/25/2022 01:05:12 - INFO - codeparrot_training - Step 18148: {'lr': 0.00037289696296623117, 'samples': 9292288, 'steps': 18148, 'loss/train': 2.8230485916137695} +02/25/2022 01:05:17 - INFO - codeparrot_training - Step 18149: {'lr': 0.00037288271381338936, 'samples': 9292800, 'steps': 18149, 'loss/train': 2.882969856262207} +02/25/2022 01:05:20 - INFO - codeparrot_training - Step 18150: {'lr': 0.0003728684641341571, 'samples': 9293312, 'steps': 18150, 'loss/train': 2.1075592041015625} +02/25/2022 01:05:26 - INFO - codeparrot_training - Step 18151: {'lr': 0.00037285421392859526, 'samples': 9293824, 'steps': 18151, 'loss/train': 0.2469644844532013} +02/25/2022 01:05:29 - INFO - codeparrot_training - Step 18152: {'lr': 0.00037283996319676505, 'samples': 9294336, 'steps': 18152, 'loss/train': 2.240318536758423} +02/25/2022 01:05:37 - INFO - codeparrot_training - Step 18153: {'lr': 0.0003728257119387274, 'samples': 9294848, 'steps': 18153, 'loss/train': 1.770466685295105} +02/25/2022 01:05:42 - INFO - codeparrot_training - Step 18154: {'lr': 0.0003728114601545435, 'samples': 9295360, 'steps': 18154, 'loss/train': 2.044111967086792} +02/25/2022 01:05:46 - INFO - codeparrot_training - Step 18155: {'lr': 0.0003727972078442742, 'samples': 9295872, 'steps': 18155, 'loss/train': 2.1879515647888184} +02/25/2022 01:05:49 - INFO - codeparrot_training - Step 18156: {'lr': 0.0003727829550079806, 'samples': 9296384, 'steps': 18156, 'loss/train': 1.160942792892456} +02/25/2022 01:05:55 - INFO - codeparrot_training - Step 18157: {'lr': 0.00037276870164572394, 'samples': 9296896, 'steps': 18157, 'loss/train': 2.1044564247131348} +02/25/2022 01:06:00 - INFO - codeparrot_training - Step 18158: {'lr': 0.00037275444775756506, 'samples': 9297408, 'steps': 18158, 'loss/train': 1.6638935804367065} +02/25/2022 01:06:04 - INFO - codeparrot_training - Step 18159: {'lr': 0.00037274019334356516, 'samples': 9297920, 'steps': 18159, 'loss/train': 2.2887117862701416} +02/25/2022 01:06:07 - INFO - codeparrot_training - Step 18160: {'lr': 0.0003727259384037852, 'samples': 9298432, 'steps': 18160, 'loss/train': 2.5881030559539795} +02/25/2022 01:06:13 - INFO - codeparrot_training - Step 18161: {'lr': 0.0003727116829382864, 'samples': 9298944, 'steps': 18161, 'loss/train': 2.3937418460845947} +02/25/2022 01:06:16 - INFO - codeparrot_training - Step 18162: {'lr': 0.00037269742694712963, 'samples': 9299456, 'steps': 18162, 'loss/train': 2.2580037117004395} +02/25/2022 01:06:24 - INFO - codeparrot_training - Step 18163: {'lr': 0.000372683170430376, 'samples': 9299968, 'steps': 18163, 'loss/train': 2.270491361618042} +02/25/2022 01:06:27 - INFO - codeparrot_training - Step 18164: {'lr': 0.00037266891338808667, 'samples': 9300480, 'steps': 18164, 'loss/train': 1.6043137311935425} +02/25/2022 01:06:32 - INFO - codeparrot_training - Step 18165: {'lr': 0.0003726546558203227, 'samples': 9300992, 'steps': 18165, 'loss/train': 3.3150887489318848} +02/25/2022 01:06:36 - INFO - codeparrot_training - Step 18166: {'lr': 0.0003726403977271451, 'samples': 9301504, 'steps': 18166, 'loss/train': 1.5193564891815186} +02/25/2022 01:06:41 - INFO - codeparrot_training - Step 18167: {'lr': 0.00037262613910861485, 'samples': 9302016, 'steps': 18167, 'loss/train': 0.18542303144931793} +02/25/2022 01:06:45 - INFO - codeparrot_training - Step 18168: {'lr': 0.0003726118799647933, 'samples': 9302528, 'steps': 18168, 'loss/train': 2.282599687576294} +02/25/2022 01:06:50 - INFO - codeparrot_training - Step 18169: {'lr': 0.0003725976202957414, 'samples': 9303040, 'steps': 18169, 'loss/train': 1.3491321802139282} +02/25/2022 01:06:54 - INFO - codeparrot_training - Step 18170: {'lr': 0.0003725833601015202, 'samples': 9303552, 'steps': 18170, 'loss/train': 1.3623782396316528} +02/25/2022 01:06:59 - INFO - codeparrot_training - Step 18171: {'lr': 0.0003725690993821908, 'samples': 9304064, 'steps': 18171, 'loss/train': 2.0347962379455566} +02/25/2022 01:07:03 - INFO - codeparrot_training - Step 18172: {'lr': 0.00037255483813781434, 'samples': 9304576, 'steps': 18172, 'loss/train': 1.257857084274292} +02/25/2022 01:07:08 - INFO - codeparrot_training - Step 18173: {'lr': 0.00037254057636845186, 'samples': 9305088, 'steps': 18173, 'loss/train': 2.6665821075439453} +02/25/2022 01:07:12 - INFO - codeparrot_training - Step 18174: {'lr': 0.0003725263140741644, 'samples': 9305600, 'steps': 18174, 'loss/train': 1.9697328805923462} +02/25/2022 01:07:17 - INFO - codeparrot_training - Step 18175: {'lr': 0.0003725120512550131, 'samples': 9306112, 'steps': 18175, 'loss/train': 1.4675523042678833} +02/25/2022 01:07:23 - INFO - codeparrot_training - Step 18176: {'lr': 0.00037249778791105916, 'samples': 9306624, 'steps': 18176, 'loss/train': 1.2928705215454102} +02/25/2022 01:07:26 - INFO - codeparrot_training - Step 18177: {'lr': 0.0003724835240423636, 'samples': 9307136, 'steps': 18177, 'loss/train': 2.392090320587158} +02/25/2022 01:07:34 - INFO - codeparrot_training - Step 18178: {'lr': 0.0003724692596489874, 'samples': 9307648, 'steps': 18178, 'loss/train': 2.3078391551971436} +02/25/2022 01:07:37 - INFO - codeparrot_training - Step 18179: {'lr': 0.0003724549947309919, 'samples': 9308160, 'steps': 18179, 'loss/train': 0.6930814385414124} +02/25/2022 01:07:43 - INFO - codeparrot_training - Step 18180: {'lr': 0.00037244072928843805, 'samples': 9308672, 'steps': 18180, 'loss/train': 1.8631397485733032} +02/25/2022 01:07:46 - INFO - codeparrot_training - Step 18181: {'lr': 0.000372426463321387, 'samples': 9309184, 'steps': 18181, 'loss/train': 2.373523712158203} +02/25/2022 01:07:51 - INFO - codeparrot_training - Step 18182: {'lr': 0.0003724121968298998, 'samples': 9309696, 'steps': 18182, 'loss/train': 1.6055680513381958} +02/25/2022 01:07:55 - INFO - codeparrot_training - Step 18183: {'lr': 0.0003723979298140377, 'samples': 9310208, 'steps': 18183, 'loss/train': 2.60477352142334} +02/25/2022 01:08:00 - INFO - codeparrot_training - Step 18184: {'lr': 0.0003723836622738617, 'samples': 9310720, 'steps': 18184, 'loss/train': 0.5717832446098328} +02/25/2022 01:08:04 - INFO - codeparrot_training - Step 18185: {'lr': 0.00037236939420943294, 'samples': 9311232, 'steps': 18185, 'loss/train': 1.2912105321884155} +02/25/2022 01:08:09 - INFO - codeparrot_training - Step 18186: {'lr': 0.0003723551256208125, 'samples': 9311744, 'steps': 18186, 'loss/train': 1.1674050092697144} +02/25/2022 01:08:13 - INFO - codeparrot_training - Step 18187: {'lr': 0.0003723408565080616, 'samples': 9312256, 'steps': 18187, 'loss/train': 1.5088348388671875} +02/25/2022 01:08:20 - INFO - codeparrot_training - Step 18188: {'lr': 0.00037232658687124135, 'samples': 9312768, 'steps': 18188, 'loss/train': 1.5856822729110718} +02/25/2022 01:08:24 - INFO - codeparrot_training - Step 18189: {'lr': 0.0003723123167104128, 'samples': 9313280, 'steps': 18189, 'loss/train': 2.3326053619384766} +02/25/2022 01:08:30 - INFO - codeparrot_training - Step 18190: {'lr': 0.0003722980460256372, 'samples': 9313792, 'steps': 18190, 'loss/train': 0.7577613592147827} +02/25/2022 01:08:33 - INFO - codeparrot_training - Step 18191: {'lr': 0.00037228377481697555, 'samples': 9314304, 'steps': 18191, 'loss/train': 1.4120705127716064} +02/25/2022 01:08:39 - INFO - codeparrot_training - Step 18192: {'lr': 0.0003722695030844891, 'samples': 9314816, 'steps': 18192, 'loss/train': 3.4470698833465576} +02/25/2022 01:08:42 - INFO - codeparrot_training - Step 18193: {'lr': 0.0003722552308282388, 'samples': 9315328, 'steps': 18193, 'loss/train': 2.4086358547210693} +02/25/2022 01:08:48 - INFO - codeparrot_training - Step 18194: {'lr': 0.000372240958048286, 'samples': 9315840, 'steps': 18194, 'loss/train': 1.9647432565689087} +02/25/2022 01:08:51 - INFO - codeparrot_training - Step 18195: {'lr': 0.0003722266847446918, 'samples': 9316352, 'steps': 18195, 'loss/train': 2.35617733001709} +02/25/2022 01:08:57 - INFO - codeparrot_training - Step 18196: {'lr': 0.00037221241091751716, 'samples': 9316864, 'steps': 18196, 'loss/train': 2.1925320625305176} +02/25/2022 01:09:00 - INFO - codeparrot_training - Step 18197: {'lr': 0.00037219813656682346, 'samples': 9317376, 'steps': 18197, 'loss/train': 1.495530605316162} +02/25/2022 01:09:07 - INFO - codeparrot_training - Step 18198: {'lr': 0.0003721838616926717, 'samples': 9317888, 'steps': 18198, 'loss/train': 1.1121848821640015} +02/25/2022 01:09:11 - INFO - codeparrot_training - Step 18199: {'lr': 0.0003721695862951231, 'samples': 9318400, 'steps': 18199, 'loss/train': 2.4066126346588135} +02/25/2022 01:09:16 - INFO - codeparrot_training - Step 18200: {'lr': 0.0003721553103742388, 'samples': 9318912, 'steps': 18200, 'loss/train': 2.2910492420196533} +02/25/2022 01:09:20 - INFO - codeparrot_training - Step 18201: {'lr': 0.0003721410339300799, 'samples': 9319424, 'steps': 18201, 'loss/train': 1.93354332447052} +02/25/2022 01:09:26 - INFO - codeparrot_training - Step 18202: {'lr': 0.0003721267569627076, 'samples': 9319936, 'steps': 18202, 'loss/train': 2.36238169670105} +02/25/2022 01:09:29 - INFO - codeparrot_training - Step 18203: {'lr': 0.00037211247947218306, 'samples': 9320448, 'steps': 18203, 'loss/train': 3.8021886348724365} +02/25/2022 01:09:35 - INFO - codeparrot_training - Step 18204: {'lr': 0.0003720982014585674, 'samples': 9320960, 'steps': 18204, 'loss/train': 2.211372137069702} +02/25/2022 01:09:38 - INFO - codeparrot_training - Step 18205: {'lr': 0.0003720839229219218, 'samples': 9321472, 'steps': 18205, 'loss/train': 2.2768352031707764} +02/25/2022 01:09:44 - INFO - codeparrot_training - Step 18206: {'lr': 0.00037206964386230754, 'samples': 9321984, 'steps': 18206, 'loss/train': 1.9894641637802124} +02/25/2022 01:09:47 - INFO - codeparrot_training - Step 18207: {'lr': 0.00037205536427978563, 'samples': 9322496, 'steps': 18207, 'loss/train': 1.787400722503662} +02/25/2022 01:09:54 - INFO - codeparrot_training - Step 18208: {'lr': 0.0003720410841744173, 'samples': 9323008, 'steps': 18208, 'loss/train': 1.9226490259170532} +02/25/2022 01:09:58 - INFO - codeparrot_training - Step 18209: {'lr': 0.0003720268035462637, 'samples': 9323520, 'steps': 18209, 'loss/train': 2.5580832958221436} +02/25/2022 01:10:04 - INFO - codeparrot_training - Step 18210: {'lr': 0.000372012522395386, 'samples': 9324032, 'steps': 18210, 'loss/train': 1.062124252319336} +02/25/2022 01:10:07 - INFO - codeparrot_training - Step 18211: {'lr': 0.00037199824072184546, 'samples': 9324544, 'steps': 18211, 'loss/train': 3.8143019676208496} +02/25/2022 01:10:13 - INFO - codeparrot_training - Step 18212: {'lr': 0.0003719839585257032, 'samples': 9325056, 'steps': 18212, 'loss/train': 2.7102842330932617} +02/25/2022 01:10:16 - INFO - codeparrot_training - Step 18213: {'lr': 0.00037196967580702036, 'samples': 9325568, 'steps': 18213, 'loss/train': 1.4733316898345947} +02/25/2022 01:10:22 - INFO - codeparrot_training - Step 18214: {'lr': 0.0003719553925658581, 'samples': 9326080, 'steps': 18214, 'loss/train': 1.295449137687683} +02/25/2022 01:10:25 - INFO - codeparrot_training - Step 18215: {'lr': 0.00037194110880227777, 'samples': 9326592, 'steps': 18215, 'loss/train': 1.6882466077804565} +02/25/2022 01:10:31 - INFO - codeparrot_training - Step 18216: {'lr': 0.0003719268245163404, 'samples': 9327104, 'steps': 18216, 'loss/train': 1.218934178352356} +02/25/2022 01:10:34 - INFO - codeparrot_training - Step 18217: {'lr': 0.0003719125397081072, 'samples': 9327616, 'steps': 18217, 'loss/train': 0.9610633254051208} +02/25/2022 01:10:40 - INFO - codeparrot_training - Step 18218: {'lr': 0.00037189825437763946, 'samples': 9328128, 'steps': 18218, 'loss/train': 2.1672823429107666} +02/25/2022 01:10:43 - INFO - codeparrot_training - Step 18219: {'lr': 0.0003718839685249983, 'samples': 9328640, 'steps': 18219, 'loss/train': 1.6228522062301636} +02/25/2022 01:10:49 - INFO - codeparrot_training - Step 18220: {'lr': 0.0003718696821502449, 'samples': 9329152, 'steps': 18220, 'loss/train': 2.0922420024871826} +02/25/2022 01:10:52 - INFO - codeparrot_training - Step 18221: {'lr': 0.0003718553952534405, 'samples': 9329664, 'steps': 18221, 'loss/train': 2.2527365684509277} +02/25/2022 01:10:58 - INFO - codeparrot_training - Step 18222: {'lr': 0.0003718411078346462, 'samples': 9330176, 'steps': 18222, 'loss/train': 1.614151120185852} +02/25/2022 01:11:01 - INFO - codeparrot_training - Step 18223: {'lr': 0.0003718268198939234, 'samples': 9330688, 'steps': 18223, 'loss/train': 1.6068544387817383} +02/25/2022 01:11:09 - INFO - codeparrot_training - Step 18224: {'lr': 0.0003718125314313331, 'samples': 9331200, 'steps': 18224, 'loss/train': 1.7785314321517944} +02/25/2022 01:11:12 - INFO - codeparrot_training - Step 18225: {'lr': 0.0003717982424469366, 'samples': 9331712, 'steps': 18225, 'loss/train': 3.118506908416748} +02/25/2022 01:11:18 - INFO - codeparrot_training - Step 18226: {'lr': 0.00037178395294079516, 'samples': 9332224, 'steps': 18226, 'loss/train': 1.8392834663391113} +02/25/2022 01:11:21 - INFO - codeparrot_training - Step 18227: {'lr': 0.0003717696629129699, 'samples': 9332736, 'steps': 18227, 'loss/train': 3.2869040966033936} +02/25/2022 01:11:27 - INFO - codeparrot_training - Step 18228: {'lr': 0.00037175537236352205, 'samples': 9333248, 'steps': 18228, 'loss/train': 1.5508891344070435} +02/25/2022 01:11:30 - INFO - codeparrot_training - Step 18229: {'lr': 0.0003717410812925129, 'samples': 9333760, 'steps': 18229, 'loss/train': 2.510763168334961} +02/25/2022 01:11:36 - INFO - codeparrot_training - Step 18230: {'lr': 0.0003717267897000036, 'samples': 9334272, 'steps': 18230, 'loss/train': 0.9270935654640198} +02/25/2022 01:11:39 - INFO - codeparrot_training - Step 18231: {'lr': 0.0003717124975860554, 'samples': 9334784, 'steps': 18231, 'loss/train': 1.5932364463806152} +02/25/2022 01:11:45 - INFO - codeparrot_training - Step 18232: {'lr': 0.00037169820495072935, 'samples': 9335296, 'steps': 18232, 'loss/train': 1.8895373344421387} +02/25/2022 01:11:48 - INFO - codeparrot_training - Step 18233: {'lr': 0.000371683911794087, 'samples': 9335808, 'steps': 18233, 'loss/train': 2.0789709091186523} +02/25/2022 01:11:56 - INFO - codeparrot_training - Step 18234: {'lr': 0.0003716696181161894, 'samples': 9336320, 'steps': 18234, 'loss/train': 2.076809883117676} +02/25/2022 01:11:59 - INFO - codeparrot_training - Step 18235: {'lr': 0.00037165532391709777, 'samples': 9336832, 'steps': 18235, 'loss/train': 1.8170396089553833} +02/25/2022 01:12:04 - INFO - codeparrot_training - Step 18236: {'lr': 0.00037164102919687335, 'samples': 9337344, 'steps': 18236, 'loss/train': 1.6767241954803467} +02/25/2022 01:12:08 - INFO - codeparrot_training - Step 18237: {'lr': 0.00037162673395557737, 'samples': 9337856, 'steps': 18237, 'loss/train': 2.6790950298309326} +02/25/2022 01:12:13 - INFO - codeparrot_training - Step 18238: {'lr': 0.0003716124381932711, 'samples': 9338368, 'steps': 18238, 'loss/train': 2.849553108215332} +02/25/2022 01:12:17 - INFO - codeparrot_training - Step 18239: {'lr': 0.00037159814191001586, 'samples': 9338880, 'steps': 18239, 'loss/train': 2.4096310138702393} +02/25/2022 01:12:23 - INFO - codeparrot_training - Step 18240: {'lr': 0.00037158384510587263, 'samples': 9339392, 'steps': 18240, 'loss/train': 1.4861860275268555} +02/25/2022 01:12:28 - INFO - codeparrot_training - Step 18241: {'lr': 0.0003715695477809029, 'samples': 9339904, 'steps': 18241, 'loss/train': 1.0371453762054443} +02/25/2022 01:12:31 - INFO - codeparrot_training - Step 18242: {'lr': 0.0003715552499351678, 'samples': 9340416, 'steps': 18242, 'loss/train': 2.462574005126953} +02/25/2022 01:12:38 - INFO - codeparrot_training - Step 18243: {'lr': 0.0003715409515687286, 'samples': 9340928, 'steps': 18243, 'loss/train': 1.9747743606567383} +02/25/2022 01:12:41 - INFO - codeparrot_training - Step 18244: {'lr': 0.00037152665268164664, 'samples': 9341440, 'steps': 18244, 'loss/train': 1.7964118719100952} +02/25/2022 01:12:45 - INFO - codeparrot_training - Step 18245: {'lr': 0.00037151235327398304, 'samples': 9341952, 'steps': 18245, 'loss/train': 1.9131324291229248} +02/25/2022 01:12:50 - INFO - codeparrot_training - Step 18246: {'lr': 0.000371498053345799, 'samples': 9342464, 'steps': 18246, 'loss/train': 3.0158257484436035} +02/25/2022 01:12:54 - INFO - codeparrot_training - Step 18247: {'lr': 0.000371483752897156, 'samples': 9342976, 'steps': 18247, 'loss/train': 2.997561454772949} +02/25/2022 01:12:59 - INFO - codeparrot_training - Step 18248: {'lr': 0.00037146945192811513, 'samples': 9343488, 'steps': 18248, 'loss/train': 1.6555982828140259} +02/25/2022 01:13:06 - INFO - codeparrot_training - Step 18249: {'lr': 0.0003714551504387378, 'samples': 9344000, 'steps': 18249, 'loss/train': 2.153865337371826} +02/25/2022 01:13:09 - INFO - codeparrot_training - Step 18250: {'lr': 0.000371440848429085, 'samples': 9344512, 'steps': 18250, 'loss/train': 1.8725228309631348} +02/25/2022 01:13:12 - INFO - codeparrot_training - Step 18251: {'lr': 0.0003714265458992183, 'samples': 9345024, 'steps': 18251, 'loss/train': 1.5715473890304565} +02/25/2022 01:13:18 - INFO - codeparrot_training - Step 18252: {'lr': 0.00037141224284919876, 'samples': 9345536, 'steps': 18252, 'loss/train': 2.6121826171875} +02/25/2022 01:13:21 - INFO - codeparrot_training - Step 18253: {'lr': 0.0003713979392790878, 'samples': 9346048, 'steps': 18253, 'loss/train': 1.0052279233932495} +02/25/2022 01:13:28 - INFO - codeparrot_training - Step 18254: {'lr': 0.0003713836351889465, 'samples': 9346560, 'steps': 18254, 'loss/train': 0.39789798855781555} +02/25/2022 01:13:31 - INFO - codeparrot_training - Step 18255: {'lr': 0.00037136933057883636, 'samples': 9347072, 'steps': 18255, 'loss/train': 3.3356668949127197} +02/25/2022 01:13:37 - INFO - codeparrot_training - Step 18256: {'lr': 0.0003713550254488185, 'samples': 9347584, 'steps': 18256, 'loss/train': 1.932038426399231} +02/25/2022 01:13:40 - INFO - codeparrot_training - Step 18257: {'lr': 0.0003713407197989543, 'samples': 9348096, 'steps': 18257, 'loss/train': 1.9442864656448364} +02/25/2022 01:13:46 - INFO - codeparrot_training - Step 18258: {'lr': 0.0003713264136293049, 'samples': 9348608, 'steps': 18258, 'loss/train': 1.2026679515838623} +02/25/2022 01:13:49 - INFO - codeparrot_training - Step 18259: {'lr': 0.0003713121069399317, 'samples': 9349120, 'steps': 18259, 'loss/train': 1.7753469944000244} +02/25/2022 01:13:55 - INFO - codeparrot_training - Step 18260: {'lr': 0.00037129779973089596, 'samples': 9349632, 'steps': 18260, 'loss/train': 2.330617904663086} +02/25/2022 01:13:58 - INFO - codeparrot_training - Step 18261: {'lr': 0.00037128349200225895, 'samples': 9350144, 'steps': 18261, 'loss/train': 1.6819695234298706} +02/25/2022 01:14:04 - INFO - codeparrot_training - Step 18262: {'lr': 0.000371269183754082, 'samples': 9350656, 'steps': 18262, 'loss/train': 2.2887349128723145} +02/25/2022 01:14:07 - INFO - codeparrot_training - Step 18263: {'lr': 0.00037125487498642636, 'samples': 9351168, 'steps': 18263, 'loss/train': 2.0672101974487305} +02/25/2022 01:14:13 - INFO - codeparrot_training - Step 18264: {'lr': 0.00037124056569935336, 'samples': 9351680, 'steps': 18264, 'loss/train': 2.353395938873291} +02/25/2022 01:14:16 - INFO - codeparrot_training - Step 18265: {'lr': 0.00037122625589292425, 'samples': 9352192, 'steps': 18265, 'loss/train': 2.8991599082946777} +02/25/2022 01:14:22 - INFO - codeparrot_training - Step 18266: {'lr': 0.0003712119455672004, 'samples': 9352704, 'steps': 18266, 'loss/train': 1.459247350692749} +02/25/2022 01:14:25 - INFO - codeparrot_training - Step 18267: {'lr': 0.000371197634722243, 'samples': 9353216, 'steps': 18267, 'loss/train': 1.7257615327835083} +02/25/2022 01:14:31 - INFO - codeparrot_training - Step 18268: {'lr': 0.0003711833233581134, 'samples': 9353728, 'steps': 18268, 'loss/train': 1.954116940498352} +02/25/2022 01:14:34 - INFO - codeparrot_training - Step 18269: {'lr': 0.000371169011474873, 'samples': 9354240, 'steps': 18269, 'loss/train': 1.8782039880752563} +02/25/2022 01:14:41 - INFO - codeparrot_training - Step 18270: {'lr': 0.00037115469907258303, 'samples': 9354752, 'steps': 18270, 'loss/train': 2.1509058475494385} +02/25/2022 01:14:44 - INFO - codeparrot_training - Step 18271: {'lr': 0.0003711403861513047, 'samples': 9355264, 'steps': 18271, 'loss/train': 1.794556975364685} +02/25/2022 01:14:50 - INFO - codeparrot_training - Step 18272: {'lr': 0.0003711260727110995, 'samples': 9355776, 'steps': 18272, 'loss/train': 1.8774168491363525} +02/25/2022 01:14:53 - INFO - codeparrot_training - Step 18273: {'lr': 0.00037111175875202863, 'samples': 9356288, 'steps': 18273, 'loss/train': 2.1876180171966553} +02/25/2022 01:14:59 - INFO - codeparrot_training - Step 18274: {'lr': 0.00037109744427415346, 'samples': 9356800, 'steps': 18274, 'loss/train': 1.2828644514083862} +02/25/2022 01:15:02 - INFO - codeparrot_training - Step 18275: {'lr': 0.0003710831292775353, 'samples': 9357312, 'steps': 18275, 'loss/train': 2.9989209175109863} +02/25/2022 01:15:08 - INFO - codeparrot_training - Step 18276: {'lr': 0.00037106881376223544, 'samples': 9357824, 'steps': 18276, 'loss/train': 1.2367514371871948} +02/25/2022 01:15:11 - INFO - codeparrot_training - Step 18277: {'lr': 0.00037105449772831527, 'samples': 9358336, 'steps': 18277, 'loss/train': 1.881469964981079} +02/25/2022 01:15:17 - INFO - codeparrot_training - Step 18278: {'lr': 0.00037104018117583605, 'samples': 9358848, 'steps': 18278, 'loss/train': 1.8572330474853516} +02/25/2022 01:15:20 - INFO - codeparrot_training - Step 18279: {'lr': 0.00037102586410485915, 'samples': 9359360, 'steps': 18279, 'loss/train': 2.6141300201416016} +02/25/2022 01:15:27 - INFO - codeparrot_training - Step 18280: {'lr': 0.00037101154651544583, 'samples': 9359872, 'steps': 18280, 'loss/train': 2.3476433753967285} +02/25/2022 01:15:30 - INFO - codeparrot_training - Step 18281: {'lr': 0.0003709972284076575, 'samples': 9360384, 'steps': 18281, 'loss/train': 2.8311078548431396} +02/25/2022 01:15:36 - INFO - codeparrot_training - Step 18282: {'lr': 0.0003709829097815555, 'samples': 9360896, 'steps': 18282, 'loss/train': 2.3941707611083984} +02/25/2022 01:15:39 - INFO - codeparrot_training - Step 18283: {'lr': 0.00037096859063720104, 'samples': 9361408, 'steps': 18283, 'loss/train': 2.813333749771118} +02/25/2022 01:15:45 - INFO - codeparrot_training - Step 18284: {'lr': 0.00037095427097465564, 'samples': 9361920, 'steps': 18284, 'loss/train': 2.8890011310577393} +02/25/2022 01:15:48 - INFO - codeparrot_training - Step 18285: {'lr': 0.0003709399507939805, 'samples': 9362432, 'steps': 18285, 'loss/train': 2.1595945358276367} +02/25/2022 01:15:54 - INFO - codeparrot_training - Step 18286: {'lr': 0.00037092563009523703, 'samples': 9362944, 'steps': 18286, 'loss/train': 0.4117693603038788} +02/25/2022 01:15:57 - INFO - codeparrot_training - Step 18287: {'lr': 0.0003709113088784865, 'samples': 9363456, 'steps': 18287, 'loss/train': 1.4524484872817993} +02/25/2022 01:16:03 - INFO - codeparrot_training - Step 18288: {'lr': 0.0003708969871437904, 'samples': 9363968, 'steps': 18288, 'loss/train': 1.7061798572540283} +02/25/2022 01:16:06 - INFO - codeparrot_training - Step 18289: {'lr': 0.00037088266489120996, 'samples': 9364480, 'steps': 18289, 'loss/train': 1.9754153490066528} +02/25/2022 01:16:12 - INFO - codeparrot_training - Step 18290: {'lr': 0.0003708683421208066, 'samples': 9364992, 'steps': 18290, 'loss/train': 2.352123975753784} +02/25/2022 01:16:15 - INFO - codeparrot_training - Step 18291: {'lr': 0.0003708540188326416, 'samples': 9365504, 'steps': 18291, 'loss/train': 2.7301297187805176} +02/25/2022 01:16:22 - INFO - codeparrot_training - Step 18292: {'lr': 0.0003708396950267764, 'samples': 9366016, 'steps': 18292, 'loss/train': 3.0772812366485596} +02/25/2022 01:16:26 - INFO - codeparrot_training - Step 18293: {'lr': 0.00037082537070327225, 'samples': 9366528, 'steps': 18293, 'loss/train': 1.3117905855178833} +02/25/2022 01:16:31 - INFO - codeparrot_training - Step 18294: {'lr': 0.0003708110458621906, 'samples': 9367040, 'steps': 18294, 'loss/train': 1.9054096937179565} +02/25/2022 01:16:35 - INFO - codeparrot_training - Step 18295: {'lr': 0.00037079672050359283, 'samples': 9367552, 'steps': 18295, 'loss/train': 1.9621068239212036} +02/25/2022 01:16:40 - INFO - codeparrot_training - Step 18296: {'lr': 0.00037078239462754023, 'samples': 9368064, 'steps': 18296, 'loss/train': 2.882730007171631} +02/25/2022 01:16:44 - INFO - codeparrot_training - Step 18297: {'lr': 0.00037076806823409426, 'samples': 9368576, 'steps': 18297, 'loss/train': 1.7667450904846191} +02/25/2022 01:16:49 - INFO - codeparrot_training - Step 18298: {'lr': 0.00037075374132331613, 'samples': 9369088, 'steps': 18298, 'loss/train': 1.369690179824829} +02/25/2022 01:16:53 - INFO - codeparrot_training - Step 18299: {'lr': 0.0003707394138952674, 'samples': 9369600, 'steps': 18299, 'loss/train': 1.5599039793014526} +02/25/2022 01:16:58 - INFO - codeparrot_training - Step 18300: {'lr': 0.0003707250859500093, 'samples': 9370112, 'steps': 18300, 'loss/train': 2.365187644958496} +02/25/2022 01:17:02 - INFO - codeparrot_training - Step 18301: {'lr': 0.00037071075748760336, 'samples': 9370624, 'steps': 18301, 'loss/train': 1.4861847162246704} +02/25/2022 01:17:08 - INFO - codeparrot_training - Step 18302: {'lr': 0.0003706964285081108, 'samples': 9371136, 'steps': 18302, 'loss/train': 2.0958995819091797} +02/25/2022 01:17:12 - INFO - codeparrot_training - Step 18303: {'lr': 0.0003706820990115931, 'samples': 9371648, 'steps': 18303, 'loss/train': 1.6913419961929321} +02/25/2022 01:17:17 - INFO - codeparrot_training - Step 18304: {'lr': 0.00037066776899811153, 'samples': 9372160, 'steps': 18304, 'loss/train': 3.7470314502716064} +02/25/2022 01:17:21 - INFO - codeparrot_training - Step 18305: {'lr': 0.00037065343846772765, 'samples': 9372672, 'steps': 18305, 'loss/train': 2.057295083999634} +02/25/2022 01:17:26 - INFO - codeparrot_training - Step 18306: {'lr': 0.0003706391074205027, 'samples': 9373184, 'steps': 18306, 'loss/train': 1.244897723197937} +02/25/2022 01:17:29 - INFO - codeparrot_training - Step 18307: {'lr': 0.00037062477585649814, 'samples': 9373696, 'steps': 18307, 'loss/train': 1.9053668975830078} +02/25/2022 01:17:35 - INFO - codeparrot_training - Step 18308: {'lr': 0.00037061044377577535, 'samples': 9374208, 'steps': 18308, 'loss/train': 1.5779110193252563} +02/25/2022 01:17:39 - INFO - codeparrot_training - Step 18309: {'lr': 0.00037059611117839565, 'samples': 9374720, 'steps': 18309, 'loss/train': 1.312774896621704} +02/25/2022 01:17:44 - INFO - codeparrot_training - Step 18310: {'lr': 0.0003705817780644206, 'samples': 9375232, 'steps': 18310, 'loss/train': 1.9697589874267578} +02/25/2022 01:17:47 - INFO - codeparrot_training - Step 18311: {'lr': 0.0003705674444339114, 'samples': 9375744, 'steps': 18311, 'loss/train': 0.5874767899513245} +02/25/2022 01:17:53 - INFO - codeparrot_training - Step 18312: {'lr': 0.0003705531102869297, 'samples': 9376256, 'steps': 18312, 'loss/train': 1.3228329420089722} +02/25/2022 01:17:57 - INFO - codeparrot_training - Step 18313: {'lr': 0.0003705387756235366, 'samples': 9376768, 'steps': 18313, 'loss/train': 2.041604518890381} +02/25/2022 01:18:02 - INFO - codeparrot_training - Step 18314: {'lr': 0.00037052444044379375, 'samples': 9377280, 'steps': 18314, 'loss/train': 1.872889518737793} +02/25/2022 01:18:06 - INFO - codeparrot_training - Step 18315: {'lr': 0.00037051010474776244, 'samples': 9377792, 'steps': 18315, 'loss/train': 1.7747011184692383} +02/25/2022 01:18:11 - INFO - codeparrot_training - Step 18316: {'lr': 0.0003704957685355041, 'samples': 9378304, 'steps': 18316, 'loss/train': 1.9528049230575562} +02/25/2022 01:18:15 - INFO - codeparrot_training - Step 18317: {'lr': 0.00037048143180708014, 'samples': 9378816, 'steps': 18317, 'loss/train': 2.45226788520813} +02/25/2022 01:18:21 - INFO - codeparrot_training - Step 18318: {'lr': 0.000370467094562552, 'samples': 9379328, 'steps': 18318, 'loss/train': 0.18193577229976654} +02/25/2022 01:18:24 - INFO - codeparrot_training - Step 18319: {'lr': 0.0003704527568019811, 'samples': 9379840, 'steps': 18319, 'loss/train': 3.0419232845306396} +02/25/2022 01:18:30 - INFO - codeparrot_training - Step 18320: {'lr': 0.0003704384185254288, 'samples': 9380352, 'steps': 18320, 'loss/train': 2.0626776218414307} +02/25/2022 01:18:33 - INFO - codeparrot_training - Step 18321: {'lr': 0.0003704240797329566, 'samples': 9380864, 'steps': 18321, 'loss/train': 2.1452231407165527} +02/25/2022 01:18:39 - INFO - codeparrot_training - Step 18322: {'lr': 0.00037040974042462584, 'samples': 9381376, 'steps': 18322, 'loss/train': 2.517463207244873} +02/25/2022 01:18:43 - INFO - codeparrot_training - Step 18323: {'lr': 0.000370395400600498, 'samples': 9381888, 'steps': 18323, 'loss/train': 1.62734854221344} +02/25/2022 01:18:48 - INFO - codeparrot_training - Step 18324: {'lr': 0.00037038106026063457, 'samples': 9382400, 'steps': 18324, 'loss/train': 2.8580076694488525} +02/25/2022 01:18:52 - INFO - codeparrot_training - Step 18325: {'lr': 0.0003703667194050968, 'samples': 9382912, 'steps': 18325, 'loss/train': 2.0689048767089844} +02/25/2022 01:18:57 - INFO - codeparrot_training - Step 18326: {'lr': 0.0003703523780339463, 'samples': 9383424, 'steps': 18326, 'loss/train': 1.462218165397644} +02/25/2022 01:19:00 - INFO - codeparrot_training - Step 18327: {'lr': 0.0003703380361472444, 'samples': 9383936, 'steps': 18327, 'loss/train': 1.5697070360183716} +02/25/2022 01:19:07 - INFO - codeparrot_training - Step 18328: {'lr': 0.00037032369374505255, 'samples': 9384448, 'steps': 18328, 'loss/train': 1.4214327335357666} +02/25/2022 01:19:11 - INFO - codeparrot_training - Step 18329: {'lr': 0.0003703093508274322, 'samples': 9384960, 'steps': 18329, 'loss/train': 1.856069803237915} +02/25/2022 01:19:16 - INFO - codeparrot_training - Step 18330: {'lr': 0.0003702950073944448, 'samples': 9385472, 'steps': 18330, 'loss/train': 1.7260596752166748} +02/25/2022 01:19:20 - INFO - codeparrot_training - Step 18331: {'lr': 0.00037028066344615176, 'samples': 9385984, 'steps': 18331, 'loss/train': 1.7860007286071777} +02/25/2022 01:19:25 - INFO - codeparrot_training - Step 18332: {'lr': 0.0003702663189826146, 'samples': 9386496, 'steps': 18332, 'loss/train': 2.374366521835327} +02/25/2022 01:19:29 - INFO - codeparrot_training - Step 18333: {'lr': 0.00037025197400389467, 'samples': 9387008, 'steps': 18333, 'loss/train': 2.837503671646118} +02/25/2022 01:19:34 - INFO - codeparrot_training - Step 18334: {'lr': 0.0003702376285100535, 'samples': 9387520, 'steps': 18334, 'loss/train': 1.8662089109420776} +02/25/2022 01:19:38 - INFO - codeparrot_training - Step 18335: {'lr': 0.00037022328250115244, 'samples': 9388032, 'steps': 18335, 'loss/train': 1.944802165031433} +02/25/2022 01:19:43 - INFO - codeparrot_training - Step 18336: {'lr': 0.00037020893597725313, 'samples': 9388544, 'steps': 18336, 'loss/train': 1.6886168718338013} +02/25/2022 01:19:47 - INFO - codeparrot_training - Step 18337: {'lr': 0.0003701945889384168, 'samples': 9389056, 'steps': 18337, 'loss/train': 1.4969483613967896} +02/25/2022 01:19:54 - INFO - codeparrot_training - Step 18338: {'lr': 0.00037018024138470515, 'samples': 9389568, 'steps': 18338, 'loss/train': 2.8870134353637695} +02/25/2022 01:19:57 - INFO - codeparrot_training - Step 18339: {'lr': 0.0003701658933161794, 'samples': 9390080, 'steps': 18339, 'loss/train': 2.0723161697387695} +02/25/2022 01:20:02 - INFO - codeparrot_training - Step 18340: {'lr': 0.00037015154473290113, 'samples': 9390592, 'steps': 18340, 'loss/train': 0.9998683333396912} +02/25/2022 01:20:06 - INFO - codeparrot_training - Step 18341: {'lr': 0.0003701371956349318, 'samples': 9391104, 'steps': 18341, 'loss/train': 1.3940317630767822} +02/25/2022 01:20:11 - INFO - codeparrot_training - Step 18342: {'lr': 0.00037012284602233294, 'samples': 9391616, 'steps': 18342, 'loss/train': 1.5923874378204346} +02/25/2022 01:20:15 - INFO - codeparrot_training - Step 18343: {'lr': 0.0003701084958951659, 'samples': 9392128, 'steps': 18343, 'loss/train': 1.4979335069656372} +02/25/2022 01:20:21 - INFO - codeparrot_training - Step 18344: {'lr': 0.0003700941452534922, 'samples': 9392640, 'steps': 18344, 'loss/train': 1.7824156284332275} +02/25/2022 01:20:24 - INFO - codeparrot_training - Step 18345: {'lr': 0.00037007979409737324, 'samples': 9393152, 'steps': 18345, 'loss/train': 1.1498560905456543} +02/25/2022 01:20:30 - INFO - codeparrot_training - Step 18346: {'lr': 0.0003700654424268707, 'samples': 9393664, 'steps': 18346, 'loss/train': 2.7380874156951904} +02/25/2022 01:20:36 - INFO - codeparrot_training - Step 18347: {'lr': 0.00037005109024204586, 'samples': 9394176, 'steps': 18347, 'loss/train': 2.328641176223755} +02/25/2022 01:20:39 - INFO - codeparrot_training - Step 18348: {'lr': 0.00037003673754296026, 'samples': 9394688, 'steps': 18348, 'loss/train': 1.8855458498001099} +02/25/2022 01:20:45 - INFO - codeparrot_training - Step 18349: {'lr': 0.00037002238432967547, 'samples': 9395200, 'steps': 18349, 'loss/train': 3.376471757888794} +02/25/2022 01:20:48 - INFO - codeparrot_training - Step 18350: {'lr': 0.0003700080306022528, 'samples': 9395712, 'steps': 18350, 'loss/train': 2.581509828567505} +02/25/2022 01:20:54 - INFO - codeparrot_training - Step 18351: {'lr': 0.00036999367636075386, 'samples': 9396224, 'steps': 18351, 'loss/train': 2.2691421508789062} +02/25/2022 01:20:58 - INFO - codeparrot_training - Step 18352: {'lr': 0.00036997932160524015, 'samples': 9396736, 'steps': 18352, 'loss/train': 1.8684520721435547} +02/25/2022 01:21:03 - INFO - codeparrot_training - Step 18353: {'lr': 0.00036996496633577314, 'samples': 9397248, 'steps': 18353, 'loss/train': 2.008300304412842} +02/25/2022 01:21:06 - INFO - codeparrot_training - Step 18354: {'lr': 0.00036995061055241426, 'samples': 9397760, 'steps': 18354, 'loss/train': 1.93635094165802} +02/25/2022 01:21:12 - INFO - codeparrot_training - Step 18355: {'lr': 0.000369936254255225, 'samples': 9398272, 'steps': 18355, 'loss/train': 1.8770588636398315} +02/25/2022 01:21:15 - INFO - codeparrot_training - Step 18356: {'lr': 0.000369921897444267, 'samples': 9398784, 'steps': 18356, 'loss/train': 2.5583128929138184} +02/25/2022 01:21:21 - INFO - codeparrot_training - Step 18357: {'lr': 0.00036990754011960165, 'samples': 9399296, 'steps': 18357, 'loss/train': 1.3393158912658691} +02/25/2022 01:21:25 - INFO - codeparrot_training - Step 18358: {'lr': 0.0003698931822812905, 'samples': 9399808, 'steps': 18358, 'loss/train': 0.09090162068605423} +02/25/2022 01:21:30 - INFO - codeparrot_training - Step 18359: {'lr': 0.000369878823929395, 'samples': 9400320, 'steps': 18359, 'loss/train': 1.9868463277816772} +02/25/2022 01:21:34 - INFO - codeparrot_training - Step 18360: {'lr': 0.00036986446506397666, 'samples': 9400832, 'steps': 18360, 'loss/train': 1.0877060890197754} +02/25/2022 01:21:39 - INFO - codeparrot_training - Step 18361: {'lr': 0.00036985010568509703, 'samples': 9401344, 'steps': 18361, 'loss/train': 1.9403364658355713} +02/25/2022 01:21:43 - INFO - codeparrot_training - Step 18362: {'lr': 0.00036983574579281764, 'samples': 9401856, 'steps': 18362, 'loss/train': 3.4670326709747314} +02/25/2022 01:21:49 - INFO - codeparrot_training - Step 18363: {'lr': 0.0003698213853871999, 'samples': 9402368, 'steps': 18363, 'loss/train': 2.404047727584839} +02/25/2022 01:21:52 - INFO - codeparrot_training - Step 18364: {'lr': 0.00036980702446830547, 'samples': 9402880, 'steps': 18364, 'loss/train': 1.217385172843933} +02/25/2022 01:21:58 - INFO - codeparrot_training - Step 18365: {'lr': 0.0003697926630361957, 'samples': 9403392, 'steps': 18365, 'loss/train': 2.240957498550415} +02/25/2022 01:22:01 - INFO - codeparrot_training - Step 18366: {'lr': 0.00036977830109093227, 'samples': 9403904, 'steps': 18366, 'loss/train': 2.3599369525909424} +02/25/2022 01:22:07 - INFO - codeparrot_training - Step 18367: {'lr': 0.0003697639386325766, 'samples': 9404416, 'steps': 18367, 'loss/train': 2.1313180923461914} +02/25/2022 01:22:10 - INFO - codeparrot_training - Step 18368: {'lr': 0.00036974957566119027, 'samples': 9404928, 'steps': 18368, 'loss/train': 2.2007174491882324} +02/25/2022 01:22:16 - INFO - codeparrot_training - Step 18369: {'lr': 0.00036973521217683475, 'samples': 9405440, 'steps': 18369, 'loss/train': 1.2404251098632812} +02/25/2022 01:22:19 - INFO - codeparrot_training - Step 18370: {'lr': 0.00036972084817957164, 'samples': 9405952, 'steps': 18370, 'loss/train': 1.6137281656265259} +02/25/2022 01:22:25 - INFO - codeparrot_training - Step 18371: {'lr': 0.0003697064836694624, 'samples': 9406464, 'steps': 18371, 'loss/train': 1.1886402368545532} +02/25/2022 01:22:28 - INFO - codeparrot_training - Step 18372: {'lr': 0.0003696921186465686, 'samples': 9406976, 'steps': 18372, 'loss/train': 1.5630158185958862} +02/25/2022 01:22:34 - INFO - codeparrot_training - Step 18373: {'lr': 0.00036967775311095186, 'samples': 9407488, 'steps': 18373, 'loss/train': 1.7254512310028076} +02/25/2022 01:22:38 - INFO - codeparrot_training - Step 18374: {'lr': 0.00036966338706267347, 'samples': 9408000, 'steps': 18374, 'loss/train': 1.644241213798523} +02/25/2022 01:22:43 - INFO - codeparrot_training - Step 18375: {'lr': 0.0003696490205017953, 'samples': 9408512, 'steps': 18375, 'loss/train': 1.9731807708740234} +02/25/2022 01:22:47 - INFO - codeparrot_training - Step 18376: {'lr': 0.00036963465342837855, 'samples': 9409024, 'steps': 18376, 'loss/train': 1.5577452182769775} +02/25/2022 01:22:52 - INFO - codeparrot_training - Step 18377: {'lr': 0.000369620285842485, 'samples': 9409536, 'steps': 18377, 'loss/train': 2.6976330280303955} +02/25/2022 01:22:56 - INFO - codeparrot_training - Step 18378: {'lr': 0.00036960591774417613, 'samples': 9410048, 'steps': 18378, 'loss/train': 2.6651177406311035} +02/25/2022 01:23:01 - INFO - codeparrot_training - Step 18379: {'lr': 0.00036959154913351357, 'samples': 9410560, 'steps': 18379, 'loss/train': 2.071716547012329} +02/25/2022 01:23:05 - INFO - codeparrot_training - Step 18380: {'lr': 0.0003695771800105586, 'samples': 9411072, 'steps': 18380, 'loss/train': 1.6388473510742188} +02/25/2022 01:23:11 - INFO - codeparrot_training - Step 18381: {'lr': 0.00036956281037537307, 'samples': 9411584, 'steps': 18381, 'loss/train': 2.0668752193450928} +02/25/2022 01:23:15 - INFO - codeparrot_training - Step 18382: {'lr': 0.00036954844022801846, 'samples': 9412096, 'steps': 18382, 'loss/train': 1.8403310775756836} +02/25/2022 01:23:20 - INFO - codeparrot_training - Step 18383: {'lr': 0.00036953406956855624, 'samples': 9412608, 'steps': 18383, 'loss/train': 2.3804545402526855} +02/25/2022 01:23:24 - INFO - codeparrot_training - Step 18384: {'lr': 0.0003695196983970481, 'samples': 9413120, 'steps': 18384, 'loss/train': 1.3367730379104614} +02/25/2022 01:23:29 - INFO - codeparrot_training - Step 18385: {'lr': 0.0003695053267135554, 'samples': 9413632, 'steps': 18385, 'loss/train': 2.2116525173187256} +02/25/2022 01:23:33 - INFO - codeparrot_training - Step 18386: {'lr': 0.00036949095451813997, 'samples': 9414144, 'steps': 18386, 'loss/train': 2.4977450370788574} +02/25/2022 01:23:39 - INFO - codeparrot_training - Step 18387: {'lr': 0.0003694765818108631, 'samples': 9414656, 'steps': 18387, 'loss/train': 2.2690670490264893} +02/25/2022 01:23:42 - INFO - codeparrot_training - Step 18388: {'lr': 0.00036946220859178656, 'samples': 9415168, 'steps': 18388, 'loss/train': 1.9999747276306152} +02/25/2022 01:23:48 - INFO - codeparrot_training - Step 18389: {'lr': 0.0003694478348609718, 'samples': 9415680, 'steps': 18389, 'loss/train': 1.8512916564941406} +02/25/2022 01:23:51 - INFO - codeparrot_training - Step 18390: {'lr': 0.00036943346061848054, 'samples': 9416192, 'steps': 18390, 'loss/train': 2.158099889755249} +02/25/2022 01:23:57 - INFO - codeparrot_training - Step 18391: {'lr': 0.00036941908586437416, 'samples': 9416704, 'steps': 18391, 'loss/train': 2.0486855506896973} +02/25/2022 01:24:00 - INFO - codeparrot_training - Step 18392: {'lr': 0.0003694047105987144, 'samples': 9417216, 'steps': 18392, 'loss/train': 1.4348971843719482} +02/25/2022 01:24:07 - INFO - codeparrot_training - Step 18393: {'lr': 0.00036939033482156277, 'samples': 9417728, 'steps': 18393, 'loss/train': 2.4456098079681396} +02/25/2022 01:24:10 - INFO - codeparrot_training - Step 18394: {'lr': 0.00036937595853298076, 'samples': 9418240, 'steps': 18394, 'loss/train': 4.3987884521484375} +02/25/2022 01:24:16 - INFO - codeparrot_training - Step 18395: {'lr': 0.0003693615817330302, 'samples': 9418752, 'steps': 18395, 'loss/train': 1.0911381244659424} +02/25/2022 01:24:19 - INFO - codeparrot_training - Step 18396: {'lr': 0.00036934720442177244, 'samples': 9419264, 'steps': 18396, 'loss/train': 2.5088391304016113} +02/25/2022 01:24:25 - INFO - codeparrot_training - Step 18397: {'lr': 0.0003693328265992692, 'samples': 9419776, 'steps': 18397, 'loss/train': 0.30967798829078674} +02/25/2022 01:24:28 - INFO - codeparrot_training - Step 18398: {'lr': 0.000369318448265582, 'samples': 9420288, 'steps': 18398, 'loss/train': 1.6470991373062134} +02/25/2022 01:24:34 - INFO - codeparrot_training - Step 18399: {'lr': 0.00036930406942077245, 'samples': 9420800, 'steps': 18399, 'loss/train': 0.20155486464500427} +02/25/2022 01:24:37 - INFO - codeparrot_training - Step 18400: {'lr': 0.0003692896900649021, 'samples': 9421312, 'steps': 18400, 'loss/train': 2.0221967697143555} +02/25/2022 01:24:43 - INFO - codeparrot_training - Step 18401: {'lr': 0.0003692753101980327, 'samples': 9421824, 'steps': 18401, 'loss/train': 2.021521806716919} +02/25/2022 01:24:46 - INFO - codeparrot_training - Step 18402: {'lr': 0.00036926092982022564, 'samples': 9422336, 'steps': 18402, 'loss/train': 2.6775269508361816} +02/25/2022 01:24:52 - INFO - codeparrot_training - Step 18403: {'lr': 0.0003692465489315427, 'samples': 9422848, 'steps': 18403, 'loss/train': 2.4501376152038574} +02/25/2022 01:24:55 - INFO - codeparrot_training - Step 18404: {'lr': 0.00036923216753204536, 'samples': 9423360, 'steps': 18404, 'loss/train': 0.5973928570747375} +02/25/2022 01:25:01 - INFO - codeparrot_training - Step 18405: {'lr': 0.0003692177856217953, 'samples': 9423872, 'steps': 18405, 'loss/train': 1.571923017501831} +02/25/2022 01:25:05 - INFO - codeparrot_training - Step 18406: {'lr': 0.00036920340320085413, 'samples': 9424384, 'steps': 18406, 'loss/train': 1.652537226676941} +02/25/2022 01:25:10 - INFO - codeparrot_training - Step 18407: {'lr': 0.00036918902026928334, 'samples': 9424896, 'steps': 18407, 'loss/train': 1.7127047777175903} +02/25/2022 01:25:14 - INFO - codeparrot_training - Step 18408: {'lr': 0.00036917463682714473, 'samples': 9425408, 'steps': 18408, 'loss/train': 2.864731788635254} +02/25/2022 01:25:20 - INFO - codeparrot_training - Step 18409: {'lr': 0.00036916025287449976, 'samples': 9425920, 'steps': 18409, 'loss/train': 2.3754286766052246} +02/25/2022 01:25:23 - INFO - codeparrot_training - Step 18410: {'lr': 0.0003691458684114102, 'samples': 9426432, 'steps': 18410, 'loss/train': 1.3476039171218872} +02/25/2022 01:25:29 - INFO - codeparrot_training - Step 18411: {'lr': 0.00036913148343793744, 'samples': 9426944, 'steps': 18411, 'loss/train': 1.0442968606948853} +02/25/2022 01:25:32 - INFO - codeparrot_training - Step 18412: {'lr': 0.00036911709795414336, 'samples': 9427456, 'steps': 18412, 'loss/train': 0.8767587542533875} +02/25/2022 01:25:38 - INFO - codeparrot_training - Step 18413: {'lr': 0.00036910271196008936, 'samples': 9427968, 'steps': 18413, 'loss/train': 7.2651214599609375} +02/25/2022 01:25:41 - INFO - codeparrot_training - Step 18414: {'lr': 0.0003690883254558372, 'samples': 9428480, 'steps': 18414, 'loss/train': 2.196528434753418} +02/25/2022 01:25:47 - INFO - codeparrot_training - Step 18415: {'lr': 0.0003690739384414485, 'samples': 9428992, 'steps': 18415, 'loss/train': 3.6021289825439453} +02/25/2022 01:25:51 - INFO - codeparrot_training - Step 18416: {'lr': 0.0003690595509169848, 'samples': 9429504, 'steps': 18416, 'loss/train': 1.0051133632659912} +02/25/2022 01:25:56 - INFO - codeparrot_training - Step 18417: {'lr': 0.00036904516288250786, 'samples': 9430016, 'steps': 18417, 'loss/train': 2.189079761505127} +02/25/2022 01:26:00 - INFO - codeparrot_training - Step 18418: {'lr': 0.0003690307743380791, 'samples': 9430528, 'steps': 18418, 'loss/train': 2.0821285247802734} +02/25/2022 01:26:06 - INFO - codeparrot_training - Step 18419: {'lr': 0.00036901638528376047, 'samples': 9431040, 'steps': 18419, 'loss/train': 1.8279738426208496} +02/25/2022 01:26:09 - INFO - codeparrot_training - Step 18420: {'lr': 0.00036900199571961336, 'samples': 9431552, 'steps': 18420, 'loss/train': 2.6134769916534424} +02/25/2022 01:26:15 - INFO - codeparrot_training - Step 18421: {'lr': 0.0003689876056456995, 'samples': 9432064, 'steps': 18421, 'loss/train': 1.9416747093200684} +02/25/2022 01:26:18 - INFO - codeparrot_training - Step 18422: {'lr': 0.0003689732150620805, 'samples': 9432576, 'steps': 18422, 'loss/train': 2.1659059524536133} +02/25/2022 01:26:24 - INFO - codeparrot_training - Step 18423: {'lr': 0.00036895882396881805, 'samples': 9433088, 'steps': 18423, 'loss/train': 2.4592130184173584} +02/25/2022 01:26:27 - INFO - codeparrot_training - Step 18424: {'lr': 0.0003689444323659737, 'samples': 9433600, 'steps': 18424, 'loss/train': 1.4516977071762085} +02/25/2022 01:26:33 - INFO - codeparrot_training - Step 18425: {'lr': 0.00036893004025360926, 'samples': 9434112, 'steps': 18425, 'loss/train': 1.8877735137939453} +02/25/2022 01:26:36 - INFO - codeparrot_training - Step 18426: {'lr': 0.0003689156476317862, 'samples': 9434624, 'steps': 18426, 'loss/train': 3.7378692626953125} +02/25/2022 01:26:42 - INFO - codeparrot_training - Step 18427: {'lr': 0.0003689012545005664, 'samples': 9435136, 'steps': 18427, 'loss/train': 1.6135536432266235} +02/25/2022 01:26:45 - INFO - codeparrot_training - Step 18428: {'lr': 0.0003688868608600113, 'samples': 9435648, 'steps': 18428, 'loss/train': 2.551117181777954} +02/25/2022 01:26:52 - INFO - codeparrot_training - Step 18429: {'lr': 0.0003688724667101826, 'samples': 9436160, 'steps': 18429, 'loss/train': 2.1527068614959717} +02/25/2022 01:26:55 - INFO - codeparrot_training - Step 18430: {'lr': 0.0003688580720511421, 'samples': 9436672, 'steps': 18430, 'loss/train': 1.1708322763442993} +02/25/2022 01:27:01 - INFO - codeparrot_training - Step 18431: {'lr': 0.0003688436768829512, 'samples': 9437184, 'steps': 18431, 'loss/train': 1.2201554775238037} +02/25/2022 01:27:04 - INFO - codeparrot_training - Step 18432: {'lr': 0.0003688292812056719, 'samples': 9437696, 'steps': 18432, 'loss/train': 2.417375326156616} +02/25/2022 01:27:10 - INFO - codeparrot_training - Step 18433: {'lr': 0.00036881488501936554, 'samples': 9438208, 'steps': 18433, 'loss/train': 0.9687504172325134} +02/25/2022 01:27:13 - INFO - codeparrot_training - Step 18434: {'lr': 0.00036880048832409407, 'samples': 9438720, 'steps': 18434, 'loss/train': 2.5395543575286865} +02/25/2022 01:27:19 - INFO - codeparrot_training - Step 18435: {'lr': 0.000368786091119919, 'samples': 9439232, 'steps': 18435, 'loss/train': 2.1375091075897217} +02/25/2022 01:27:22 - INFO - codeparrot_training - Step 18436: {'lr': 0.00036877169340690204, 'samples': 9439744, 'steps': 18436, 'loss/train': 2.1693179607391357} +02/25/2022 01:27:28 - INFO - codeparrot_training - Step 18437: {'lr': 0.0003687572951851048, 'samples': 9440256, 'steps': 18437, 'loss/train': 2.2370991706848145} +02/25/2022 01:27:31 - INFO - codeparrot_training - Step 18438: {'lr': 0.0003687428964545891, 'samples': 9440768, 'steps': 18438, 'loss/train': 1.4605731964111328} +02/25/2022 01:27:37 - INFO - codeparrot_training - Step 18439: {'lr': 0.00036872849721541643, 'samples': 9441280, 'steps': 18439, 'loss/train': 3.647575855255127} +02/25/2022 01:27:41 - INFO - codeparrot_training - Step 18440: {'lr': 0.0003687140974676486, 'samples': 9441792, 'steps': 18440, 'loss/train': 1.9040979146957397} +02/25/2022 01:27:46 - INFO - codeparrot_training - Step 18441: {'lr': 0.00036869969721134736, 'samples': 9442304, 'steps': 18441, 'loss/train': 1.2172081470489502} +02/25/2022 01:27:50 - INFO - codeparrot_training - Step 18442: {'lr': 0.0003686852964465742, 'samples': 9442816, 'steps': 18442, 'loss/train': 1.8815768957138062} +02/25/2022 01:27:55 - INFO - codeparrot_training - Step 18443: {'lr': 0.000368670895173391, 'samples': 9443328, 'steps': 18443, 'loss/train': 2.245439291000366} +02/25/2022 01:27:59 - INFO - codeparrot_training - Step 18444: {'lr': 0.00036865649339185935, 'samples': 9443840, 'steps': 18444, 'loss/train': 2.360513925552368} +02/25/2022 01:28:04 - INFO - codeparrot_training - Step 18445: {'lr': 0.000368642091102041, 'samples': 9444352, 'steps': 18445, 'loss/train': 1.7992020845413208} +02/25/2022 01:28:08 - INFO - codeparrot_training - Step 18446: {'lr': 0.0003686276883039975, 'samples': 9444864, 'steps': 18446, 'loss/train': 0.591636598110199} +02/25/2022 01:28:13 - INFO - codeparrot_training - Step 18447: {'lr': 0.0003686132849977908, 'samples': 9445376, 'steps': 18447, 'loss/train': 2.6358649730682373} +02/25/2022 01:28:17 - INFO - codeparrot_training - Step 18448: {'lr': 0.0003685988811834823, 'samples': 9445888, 'steps': 18448, 'loss/train': 1.8494524955749512} +02/25/2022 01:28:22 - INFO - codeparrot_training - Step 18449: {'lr': 0.00036858447686113395, 'samples': 9446400, 'steps': 18449, 'loss/train': 1.716161847114563} +02/25/2022 01:28:26 - INFO - codeparrot_training - Step 18450: {'lr': 0.0003685700720308073, 'samples': 9446912, 'steps': 18450, 'loss/train': 2.587435483932495} +02/25/2022 01:28:32 - INFO - codeparrot_training - Step 18451: {'lr': 0.0003685556666925641, 'samples': 9447424, 'steps': 18451, 'loss/train': 1.9122740030288696} +02/25/2022 01:28:37 - INFO - codeparrot_training - Step 18452: {'lr': 0.0003685412608464661, 'samples': 9447936, 'steps': 18452, 'loss/train': 1.8353451490402222} +02/25/2022 01:28:41 - INFO - codeparrot_training - Step 18453: {'lr': 0.00036852685449257505, 'samples': 9448448, 'steps': 18453, 'loss/train': 2.0929436683654785} +02/25/2022 01:28:46 - INFO - codeparrot_training - Step 18454: {'lr': 0.00036851244763095247, 'samples': 9448960, 'steps': 18454, 'loss/train': 2.102295398712158} +02/25/2022 01:28:50 - INFO - codeparrot_training - Step 18455: {'lr': 0.0003684980402616603, 'samples': 9449472, 'steps': 18455, 'loss/train': 2.165572166442871} +02/25/2022 01:28:55 - INFO - codeparrot_training - Step 18456: {'lr': 0.0003684836323847601, 'samples': 9449984, 'steps': 18456, 'loss/train': 1.3640385866165161} +02/25/2022 01:28:59 - INFO - codeparrot_training - Step 18457: {'lr': 0.0003684692240003137, 'samples': 9450496, 'steps': 18457, 'loss/train': 1.8594518899917603} +02/25/2022 01:29:04 - INFO - codeparrot_training - Step 18458: {'lr': 0.00036845481510838264, 'samples': 9451008, 'steps': 18458, 'loss/train': 2.5686378479003906} +02/25/2022 01:29:08 - INFO - codeparrot_training - Step 18459: {'lr': 0.00036844040570902886, 'samples': 9451520, 'steps': 18459, 'loss/train': 1.8522651195526123} +02/25/2022 01:29:14 - INFO - codeparrot_training - Step 18460: {'lr': 0.00036842599580231395, 'samples': 9452032, 'steps': 18460, 'loss/train': 2.4291305541992188} +02/25/2022 01:29:18 - INFO - codeparrot_training - Step 18461: {'lr': 0.0003684115853882997, 'samples': 9452544, 'steps': 18461, 'loss/train': 1.8021272420883179} +02/25/2022 01:29:23 - INFO - codeparrot_training - Step 18462: {'lr': 0.00036839717446704787, 'samples': 9453056, 'steps': 18462, 'loss/train': 2.070101737976074} +02/25/2022 01:29:27 - INFO - codeparrot_training - Step 18463: {'lr': 0.00036838276303862, 'samples': 9453568, 'steps': 18463, 'loss/train': 1.5154380798339844} +02/25/2022 01:29:32 - INFO - codeparrot_training - Step 18464: {'lr': 0.00036836835110307803, 'samples': 9454080, 'steps': 18464, 'loss/train': 2.5555546283721924} +02/25/2022 01:29:36 - INFO - codeparrot_training - Step 18465: {'lr': 0.0003683539386604837, 'samples': 9454592, 'steps': 18465, 'loss/train': 2.0288686752319336} +02/25/2022 01:29:41 - INFO - codeparrot_training - Step 18466: {'lr': 0.00036833952571089856, 'samples': 9455104, 'steps': 18466, 'loss/train': 2.535541296005249} +02/25/2022 01:29:45 - INFO - codeparrot_training - Step 18467: {'lr': 0.0003683251122543846, 'samples': 9455616, 'steps': 18467, 'loss/train': 2.0063090324401855} +02/25/2022 01:29:50 - INFO - codeparrot_training - Step 18468: {'lr': 0.0003683106982910033, 'samples': 9456128, 'steps': 18468, 'loss/train': 1.337662696838379} +02/25/2022 01:29:54 - INFO - codeparrot_training - Step 18469: {'lr': 0.0003682962838208166, 'samples': 9456640, 'steps': 18469, 'loss/train': 2.7879881858825684} +02/25/2022 01:29:59 - INFO - codeparrot_training - Step 18470: {'lr': 0.0003682818688438862, 'samples': 9457152, 'steps': 18470, 'loss/train': 2.137911319732666} +02/25/2022 01:30:03 - INFO - codeparrot_training - Step 18471: {'lr': 0.00036826745336027383, 'samples': 9457664, 'steps': 18471, 'loss/train': 1.447295904159546} +02/25/2022 01:30:08 - INFO - codeparrot_training - Step 18472: {'lr': 0.0003682530373700412, 'samples': 9458176, 'steps': 18472, 'loss/train': 0.09690675139427185} +02/25/2022 01:30:12 - INFO - codeparrot_training - Step 18473: {'lr': 0.00036823862087325017, 'samples': 9458688, 'steps': 18473, 'loss/train': 2.516244411468506} +02/25/2022 01:30:17 - INFO - codeparrot_training - Step 18474: {'lr': 0.00036822420386996237, 'samples': 9459200, 'steps': 18474, 'loss/train': 3.8018407821655273} +02/25/2022 01:30:21 - INFO - codeparrot_training - Step 18475: {'lr': 0.0003682097863602397, 'samples': 9459712, 'steps': 18475, 'loss/train': 1.629761815071106} +02/25/2022 01:30:27 - INFO - codeparrot_training - Step 18476: {'lr': 0.00036819536834414374, 'samples': 9460224, 'steps': 18476, 'loss/train': 1.3174259662628174} +02/25/2022 01:30:30 - INFO - codeparrot_training - Step 18477: {'lr': 0.0003681809498217364, 'samples': 9460736, 'steps': 18477, 'loss/train': 1.6340336799621582} +02/25/2022 01:30:36 - INFO - codeparrot_training - Step 18478: {'lr': 0.0003681665307930794, 'samples': 9461248, 'steps': 18478, 'loss/train': 2.07969069480896} +02/25/2022 01:30:39 - INFO - codeparrot_training - Step 18479: {'lr': 0.0003681521112582345, 'samples': 9461760, 'steps': 18479, 'loss/train': 2.468186140060425} +02/25/2022 01:30:45 - INFO - codeparrot_training - Step 18480: {'lr': 0.00036813769121726354, 'samples': 9462272, 'steps': 18480, 'loss/train': 1.7095630168914795} +02/25/2022 01:30:48 - INFO - codeparrot_training - Step 18481: {'lr': 0.00036812327067022813, 'samples': 9462784, 'steps': 18481, 'loss/train': 1.8921030759811401} +02/25/2022 01:30:54 - INFO - codeparrot_training - Step 18482: {'lr': 0.00036810884961719015, 'samples': 9463296, 'steps': 18482, 'loss/train': 3.1167805194854736} +02/25/2022 01:30:57 - INFO - codeparrot_training - Step 18483: {'lr': 0.0003680944280582114, 'samples': 9463808, 'steps': 18483, 'loss/train': 8.680535316467285} +02/25/2022 01:31:03 - INFO - codeparrot_training - Step 18484: {'lr': 0.0003680800059933536, 'samples': 9464320, 'steps': 18484, 'loss/train': 1.7377374172210693} +02/25/2022 01:31:06 - INFO - codeparrot_training - Step 18485: {'lr': 0.00036806558342267854, 'samples': 9464832, 'steps': 18485, 'loss/train': 1.5186407566070557} +02/25/2022 01:31:13 - INFO - codeparrot_training - Step 18486: {'lr': 0.0003680511603462481, 'samples': 9465344, 'steps': 18486, 'loss/train': 2.3167595863342285} +02/25/2022 01:31:16 - INFO - codeparrot_training - Step 18487: {'lr': 0.00036803673676412386, 'samples': 9465856, 'steps': 18487, 'loss/train': 1.307106375694275} +02/25/2022 01:31:22 - INFO - codeparrot_training - Step 18488: {'lr': 0.00036802231267636773, 'samples': 9466368, 'steps': 18488, 'loss/train': 1.9781852960586548} +02/25/2022 01:31:25 - INFO - codeparrot_training - Step 18489: {'lr': 0.0003680078880830415, 'samples': 9466880, 'steps': 18489, 'loss/train': 1.6012712717056274} +02/25/2022 01:31:31 - INFO - codeparrot_training - Step 18490: {'lr': 0.000367993462984207, 'samples': 9467392, 'steps': 18490, 'loss/train': 2.742845058441162} +02/25/2022 01:31:34 - INFO - codeparrot_training - Step 18491: {'lr': 0.0003679790373799259, 'samples': 9467904, 'steps': 18491, 'loss/train': 1.0837022066116333} +02/25/2022 01:31:40 - INFO - codeparrot_training - Step 18492: {'lr': 0.0003679646112702601, 'samples': 9468416, 'steps': 18492, 'loss/train': 2.023545026779175} +02/25/2022 01:31:43 - INFO - codeparrot_training - Step 18493: {'lr': 0.0003679501846552714, 'samples': 9468928, 'steps': 18493, 'loss/train': 1.9390811920166016} +02/25/2022 01:31:49 - INFO - codeparrot_training - Step 18494: {'lr': 0.00036793575753502153, 'samples': 9469440, 'steps': 18494, 'loss/train': 1.4034349918365479} +02/25/2022 01:31:52 - INFO - codeparrot_training - Step 18495: {'lr': 0.0003679213299095723, 'samples': 9469952, 'steps': 18495, 'loss/train': 0.9205752015113831} +02/25/2022 01:31:58 - INFO - codeparrot_training - Step 18496: {'lr': 0.00036790690177898556, 'samples': 9470464, 'steps': 18496, 'loss/train': 1.2389694452285767} +02/25/2022 01:32:02 - INFO - codeparrot_training - Step 18497: {'lr': 0.00036789247314332306, 'samples': 9470976, 'steps': 18497, 'loss/train': 0.17057999968528748} +02/25/2022 01:32:07 - INFO - codeparrot_training - Step 18498: {'lr': 0.00036787804400264666, 'samples': 9471488, 'steps': 18498, 'loss/train': 2.3652663230895996} +02/25/2022 01:32:11 - INFO - codeparrot_training - Step 18499: {'lr': 0.00036786361435701823, 'samples': 9472000, 'steps': 18499, 'loss/train': 0.6023238301277161} +02/25/2022 01:32:16 - INFO - codeparrot_training - Step 18500: {'lr': 0.0003678491842064995, 'samples': 9472512, 'steps': 18500, 'loss/train': 2.1793487071990967} +02/25/2022 01:32:20 - INFO - codeparrot_training - Step 18501: {'lr': 0.00036783475355115213, 'samples': 9473024, 'steps': 18501, 'loss/train': 1.4218807220458984} +02/25/2022 01:32:25 - INFO - codeparrot_training - Step 18502: {'lr': 0.0003678203223910382, 'samples': 9473536, 'steps': 18502, 'loss/train': 2.8462727069854736} +02/25/2022 01:32:29 - INFO - codeparrot_training - Step 18503: {'lr': 0.0003678058907262194, 'samples': 9474048, 'steps': 18503, 'loss/train': 1.7098971605300903} +02/25/2022 01:32:34 - INFO - codeparrot_training - Step 18504: {'lr': 0.00036779145855675763, 'samples': 9474560, 'steps': 18504, 'loss/train': 1.4468417167663574} +02/25/2022 01:32:38 - INFO - codeparrot_training - Step 18505: {'lr': 0.00036777702588271455, 'samples': 9475072, 'steps': 18505, 'loss/train': 2.733323574066162} +02/25/2022 01:32:44 - INFO - codeparrot_training - Step 18506: {'lr': 0.0003677625927041522, 'samples': 9475584, 'steps': 18506, 'loss/train': 1.9343047142028809} +02/25/2022 01:32:47 - INFO - codeparrot_training - Step 18507: {'lr': 0.0003677481590211322, 'samples': 9476096, 'steps': 18507, 'loss/train': 2.8910818099975586} +02/25/2022 01:32:53 - INFO - codeparrot_training - Step 18508: {'lr': 0.0003677337248337165, 'samples': 9476608, 'steps': 18508, 'loss/train': 2.4169318675994873} +02/25/2022 01:32:56 - INFO - codeparrot_training - Step 18509: {'lr': 0.0003677192901419669, 'samples': 9477120, 'steps': 18509, 'loss/train': 2.179732322692871} +02/25/2022 01:33:02 - INFO - codeparrot_training - Step 18510: {'lr': 0.0003677048549459453, 'samples': 9477632, 'steps': 18510, 'loss/train': 0.9500709176063538} +02/25/2022 01:33:07 - INFO - codeparrot_training - Step 18511: {'lr': 0.00036769041924571345, 'samples': 9478144, 'steps': 18511, 'loss/train': 2.284907817840576} +02/25/2022 01:33:11 - INFO - codeparrot_training - Step 18512: {'lr': 0.0003676759830413332, 'samples': 9478656, 'steps': 18512, 'loss/train': 2.174438238143921} +02/25/2022 01:33:16 - INFO - codeparrot_training - Step 18513: {'lr': 0.00036766154633286635, 'samples': 9479168, 'steps': 18513, 'loss/train': 1.991797685623169} +02/25/2022 01:33:20 - INFO - codeparrot_training - Step 18514: {'lr': 0.00036764710912037487, 'samples': 9479680, 'steps': 18514, 'loss/train': 2.8928022384643555} +02/25/2022 01:33:26 - INFO - codeparrot_training - Step 18515: {'lr': 0.00036763267140392053, 'samples': 9480192, 'steps': 18515, 'loss/train': 2.113471746444702} +02/25/2022 01:33:29 - INFO - codeparrot_training - Step 18516: {'lr': 0.0003676182331835651, 'samples': 9480704, 'steps': 18516, 'loss/train': 1.1642417907714844} +02/25/2022 01:33:35 - INFO - codeparrot_training - Step 18517: {'lr': 0.00036760379445937067, 'samples': 9481216, 'steps': 18517, 'loss/train': 1.633929967880249} +02/25/2022 01:33:38 - INFO - codeparrot_training - Step 18518: {'lr': 0.0003675893552313988, 'samples': 9481728, 'steps': 18518, 'loss/train': 1.928324580192566} +02/25/2022 01:33:44 - INFO - codeparrot_training - Step 18519: {'lr': 0.0003675749154997115, 'samples': 9482240, 'steps': 18519, 'loss/train': 1.4640511274337769} +02/25/2022 01:33:47 - INFO - codeparrot_training - Step 18520: {'lr': 0.00036756047526437057, 'samples': 9482752, 'steps': 18520, 'loss/train': 1.706226110458374} +02/25/2022 01:33:53 - INFO - codeparrot_training - Step 18521: {'lr': 0.00036754603452543796, 'samples': 9483264, 'steps': 18521, 'loss/train': 1.2723559141159058} +02/25/2022 01:33:57 - INFO - codeparrot_training - Step 18522: {'lr': 0.00036753159328297536, 'samples': 9483776, 'steps': 18522, 'loss/train': 2.3758411407470703} +02/25/2022 01:34:02 - INFO - codeparrot_training - Step 18523: {'lr': 0.00036751715153704483, 'samples': 9484288, 'steps': 18523, 'loss/train': 1.993325114250183} +02/25/2022 01:34:06 - INFO - codeparrot_training - Step 18524: {'lr': 0.0003675027092877081, 'samples': 9484800, 'steps': 18524, 'loss/train': 1.9763778448104858} +02/25/2022 01:34:11 - INFO - codeparrot_training - Step 18525: {'lr': 0.0003674882665350271, 'samples': 9485312, 'steps': 18525, 'loss/train': 1.1987676620483398} +02/25/2022 01:34:15 - INFO - codeparrot_training - Step 18526: {'lr': 0.0003674738232790636, 'samples': 9485824, 'steps': 18526, 'loss/train': 0.5050768852233887} +02/25/2022 01:34:20 - INFO - codeparrot_training - Step 18527: {'lr': 0.0003674593795198796, 'samples': 9486336, 'steps': 18527, 'loss/train': 2.358412265777588} +02/25/2022 01:34:24 - INFO - codeparrot_training - Step 18528: {'lr': 0.00036744493525753697, 'samples': 9486848, 'steps': 18528, 'loss/train': 0.9244344234466553} +02/25/2022 01:34:29 - INFO - codeparrot_training - Step 18529: {'lr': 0.00036743049049209743, 'samples': 9487360, 'steps': 18529, 'loss/train': 1.5023391246795654} +02/25/2022 01:34:33 - INFO - codeparrot_training - Step 18530: {'lr': 0.00036741604522362304, 'samples': 9487872, 'steps': 18530, 'loss/train': 2.2764010429382324} +02/25/2022 01:34:39 - INFO - codeparrot_training - Step 18531: {'lr': 0.00036740159945217556, 'samples': 9488384, 'steps': 18531, 'loss/train': 1.776261568069458} +02/25/2022 01:34:42 - INFO - codeparrot_training - Step 18532: {'lr': 0.0003673871531778169, 'samples': 9488896, 'steps': 18532, 'loss/train': 1.4440553188323975} +02/25/2022 01:34:48 - INFO - codeparrot_training - Step 18533: {'lr': 0.00036737270640060894, 'samples': 9489408, 'steps': 18533, 'loss/train': 2.702498197555542} +02/25/2022 01:34:51 - INFO - codeparrot_training - Step 18534: {'lr': 0.0003673582591206136, 'samples': 9489920, 'steps': 18534, 'loss/train': 2.5179083347320557} +02/25/2022 01:34:57 - INFO - codeparrot_training - Step 18535: {'lr': 0.00036734381133789277, 'samples': 9490432, 'steps': 18535, 'loss/train': 2.2687923908233643} +02/25/2022 01:35:00 - INFO - codeparrot_training - Step 18536: {'lr': 0.00036732936305250826, 'samples': 9490944, 'steps': 18536, 'loss/train': 2.0512826442718506} +02/25/2022 01:35:06 - INFO - codeparrot_training - Step 18537: {'lr': 0.00036731491426452204, 'samples': 9491456, 'steps': 18537, 'loss/train': 1.6194995641708374} +02/25/2022 01:35:09 - INFO - codeparrot_training - Step 18538: {'lr': 0.00036730046497399587, 'samples': 9491968, 'steps': 18538, 'loss/train': 1.5482797622680664} +02/25/2022 01:35:15 - INFO - codeparrot_training - Step 18539: {'lr': 0.0003672860151809919, 'samples': 9492480, 'steps': 18539, 'loss/train': 0.4164883494377136} +02/25/2022 01:35:19 - INFO - codeparrot_training - Step 18540: {'lr': 0.0003672715648855718, 'samples': 9492992, 'steps': 18540, 'loss/train': 2.125943660736084} +02/25/2022 01:35:25 - INFO - codeparrot_training - Step 18541: {'lr': 0.00036725711408779765, 'samples': 9493504, 'steps': 18541, 'loss/train': 2.3170721530914307} +02/25/2022 01:35:28 - INFO - codeparrot_training - Step 18542: {'lr': 0.0003672426627877312, 'samples': 9494016, 'steps': 18542, 'loss/train': 1.6657222509384155} +02/25/2022 01:35:34 - INFO - codeparrot_training - Step 18543: {'lr': 0.0003672282109854344, 'samples': 9494528, 'steps': 18543, 'loss/train': 2.4740726947784424} +02/25/2022 01:35:37 - INFO - codeparrot_training - Step 18544: {'lr': 0.00036721375868096925, 'samples': 9495040, 'steps': 18544, 'loss/train': 1.5595194101333618} +02/25/2022 01:35:43 - INFO - codeparrot_training - Step 18545: {'lr': 0.00036719930587439744, 'samples': 9495552, 'steps': 18545, 'loss/train': 2.0873467922210693} +02/25/2022 01:35:46 - INFO - codeparrot_training - Step 18546: {'lr': 0.00036718485256578116, 'samples': 9496064, 'steps': 18546, 'loss/train': 2.391516923904419} +02/25/2022 01:35:52 - INFO - codeparrot_training - Step 18547: {'lr': 0.00036717039875518203, 'samples': 9496576, 'steps': 18547, 'loss/train': 2.4634275436401367} +02/25/2022 01:35:56 - INFO - codeparrot_training - Step 18548: {'lr': 0.00036715594444266224, 'samples': 9497088, 'steps': 18548, 'loss/train': 2.5728814601898193} +02/25/2022 01:36:01 - INFO - codeparrot_training - Step 18549: {'lr': 0.00036714148962828353, 'samples': 9497600, 'steps': 18549, 'loss/train': 2.1516098976135254} +02/25/2022 01:36:04 - INFO - codeparrot_training - Step 18550: {'lr': 0.0003671270343121079, 'samples': 9498112, 'steps': 18550, 'loss/train': 0.10505091398954391} +02/25/2022 01:36:10 - INFO - codeparrot_training - Step 18551: {'lr': 0.0003671125784941972, 'samples': 9498624, 'steps': 18551, 'loss/train': 2.427187204360962} +02/25/2022 01:36:14 - INFO - codeparrot_training - Step 18552: {'lr': 0.00036709812217461347, 'samples': 9499136, 'steps': 18552, 'loss/train': 1.4499229192733765} +02/25/2022 01:36:17 - INFO - codeparrot_training - Step 18553: {'lr': 0.0003670836653534185, 'samples': 9499648, 'steps': 18553, 'loss/train': 2.1627843379974365} +02/25/2022 01:36:23 - INFO - codeparrot_training - Step 18554: {'lr': 0.0003670692080306743, 'samples': 9500160, 'steps': 18554, 'loss/train': 1.0516340732574463} +02/25/2022 01:36:27 - INFO - codeparrot_training - Step 18555: {'lr': 0.0003670547502064429, 'samples': 9500672, 'steps': 18555, 'loss/train': 2.418111562728882} +02/25/2022 01:36:32 - INFO - codeparrot_training - Step 18556: {'lr': 0.000367040291880786, 'samples': 9501184, 'steps': 18556, 'loss/train': 1.2454153299331665} +02/25/2022 01:36:36 - INFO - codeparrot_training - Step 18557: {'lr': 0.0003670258330537656, 'samples': 9501696, 'steps': 18557, 'loss/train': 1.701690673828125} +02/25/2022 01:36:41 - INFO - codeparrot_training - Step 18558: {'lr': 0.0003670113737254438, 'samples': 9502208, 'steps': 18558, 'loss/train': 1.7004296779632568} +02/25/2022 01:36:45 - INFO - codeparrot_training - Step 18559: {'lr': 0.0003669969138958824, 'samples': 9502720, 'steps': 18559, 'loss/train': 2.023686170578003} +02/25/2022 01:36:50 - INFO - codeparrot_training - Step 18560: {'lr': 0.00036698245356514336, 'samples': 9503232, 'steps': 18560, 'loss/train': 2.2197930812835693} +02/25/2022 01:36:56 - INFO - codeparrot_training - Step 18561: {'lr': 0.00036696799273328864, 'samples': 9503744, 'steps': 18561, 'loss/train': 0.8763425946235657} +02/25/2022 01:36:59 - INFO - codeparrot_training - Step 18562: {'lr': 0.0003669535314003802, 'samples': 9504256, 'steps': 18562, 'loss/train': 1.7204899787902832} +02/25/2022 01:37:05 - INFO - codeparrot_training - Step 18563: {'lr': 0.00036693906956647996, 'samples': 9504768, 'steps': 18563, 'loss/train': 2.2132744789123535} +02/25/2022 01:37:08 - INFO - codeparrot_training - Step 18564: {'lr': 0.0003669246072316498, 'samples': 9505280, 'steps': 18564, 'loss/train': 1.6493988037109375} +02/25/2022 01:37:14 - INFO - codeparrot_training - Step 18565: {'lr': 0.00036691014439595187, 'samples': 9505792, 'steps': 18565, 'loss/train': 2.3527889251708984} +02/25/2022 01:37:17 - INFO - codeparrot_training - Step 18566: {'lr': 0.00036689568105944794, 'samples': 9506304, 'steps': 18566, 'loss/train': 1.448954463005066} +02/25/2022 01:37:24 - INFO - codeparrot_training - Step 18567: {'lr': 0.0003668812172222001, 'samples': 9506816, 'steps': 18567, 'loss/train': 3.2489798069000244} +02/25/2022 01:37:28 - INFO - codeparrot_training - Step 18568: {'lr': 0.0003668667528842702, 'samples': 9507328, 'steps': 18568, 'loss/train': 2.65628981590271} +02/25/2022 01:37:33 - INFO - codeparrot_training - Step 18569: {'lr': 0.0003668522880457202, 'samples': 9507840, 'steps': 18569, 'loss/train': 1.5596976280212402} +02/25/2022 01:37:37 - INFO - codeparrot_training - Step 18570: {'lr': 0.0003668378227066121, 'samples': 9508352, 'steps': 18570, 'loss/train': 2.1314170360565186} +02/25/2022 01:37:40 - INFO - codeparrot_training - Step 18571: {'lr': 0.00036682335686700796, 'samples': 9508864, 'steps': 18571, 'loss/train': 0.7917835116386414} +02/25/2022 01:37:46 - INFO - codeparrot_training - Step 18572: {'lr': 0.00036680889052696954, 'samples': 9509376, 'steps': 18572, 'loss/train': 2.388180732727051} +02/25/2022 01:37:52 - INFO - codeparrot_training - Step 18573: {'lr': 0.00036679442368655897, 'samples': 9509888, 'steps': 18573, 'loss/train': 1.961667776107788} +02/25/2022 01:37:55 - INFO - codeparrot_training - Step 18574: {'lr': 0.00036677995634583815, 'samples': 9510400, 'steps': 18574, 'loss/train': 1.1142741441726685} +02/25/2022 01:38:01 - INFO - codeparrot_training - Step 18575: {'lr': 0.0003667654885048691, 'samples': 9510912, 'steps': 18575, 'loss/train': 1.5314449071884155} +02/25/2022 01:38:04 - INFO - codeparrot_training - Step 18576: {'lr': 0.00036675102016371386, 'samples': 9511424, 'steps': 18576, 'loss/train': 1.826163411140442} +02/25/2022 01:38:11 - INFO - codeparrot_training - Step 18577: {'lr': 0.0003667365513224342, 'samples': 9511936, 'steps': 18577, 'loss/train': 1.1742926836013794} +02/25/2022 01:38:14 - INFO - codeparrot_training - Step 18578: {'lr': 0.0003667220819810923, 'samples': 9512448, 'steps': 18578, 'loss/train': 1.7007622718811035} +02/25/2022 01:38:20 - INFO - codeparrot_training - Step 18579: {'lr': 0.00036670761213975, 'samples': 9512960, 'steps': 18579, 'loss/train': 2.1866090297698975} +02/25/2022 01:38:23 - INFO - codeparrot_training - Step 18580: {'lr': 0.0003666931417984694, 'samples': 9513472, 'steps': 18580, 'loss/train': 1.435978651046753} +02/25/2022 01:38:29 - INFO - codeparrot_training - Step 18581: {'lr': 0.00036667867095731244, 'samples': 9513984, 'steps': 18581, 'loss/train': 2.2813663482666016} +02/25/2022 01:38:32 - INFO - codeparrot_training - Step 18582: {'lr': 0.0003666641996163411, 'samples': 9514496, 'steps': 18582, 'loss/train': 0.24506095051765442} +02/25/2022 01:38:38 - INFO - codeparrot_training - Step 18583: {'lr': 0.0003666497277756173, 'samples': 9515008, 'steps': 18583, 'loss/train': 2.7871553897857666} +02/25/2022 01:38:42 - INFO - codeparrot_training - Step 18584: {'lr': 0.0003666352554352032, 'samples': 9515520, 'steps': 18584, 'loss/train': 1.028181552886963} +02/25/2022 01:38:45 - INFO - codeparrot_training - Step 18585: {'lr': 0.0003666207825951606, 'samples': 9516032, 'steps': 18585, 'loss/train': 2.455007553100586} +02/25/2022 01:38:51 - INFO - codeparrot_training - Step 18586: {'lr': 0.00036660630925555173, 'samples': 9516544, 'steps': 18586, 'loss/train': 2.088752508163452} +02/25/2022 01:38:54 - INFO - codeparrot_training - Step 18587: {'lr': 0.0003665918354164384, 'samples': 9517056, 'steps': 18587, 'loss/train': 2.1828858852386475} +02/25/2022 01:39:00 - INFO - codeparrot_training - Step 18588: {'lr': 0.00036657736107788264, 'samples': 9517568, 'steps': 18588, 'loss/train': 1.4636602401733398} +02/25/2022 01:39:03 - INFO - codeparrot_training - Step 18589: {'lr': 0.00036656288623994647, 'samples': 9518080, 'steps': 18589, 'loss/train': 1.9326659440994263} +02/25/2022 01:39:09 - INFO - codeparrot_training - Step 18590: {'lr': 0.000366548410902692, 'samples': 9518592, 'steps': 18590, 'loss/train': 1.6341596841812134} +02/25/2022 01:39:13 - INFO - codeparrot_training - Step 18591: {'lr': 0.00036653393506618106, 'samples': 9519104, 'steps': 18591, 'loss/train': 1.962467074394226} +02/25/2022 01:39:18 - INFO - codeparrot_training - Step 18592: {'lr': 0.00036651945873047574, 'samples': 9519616, 'steps': 18592, 'loss/train': 2.0787315368652344} +02/25/2022 01:39:22 - INFO - codeparrot_training - Step 18593: {'lr': 0.0003665049818956381, 'samples': 9520128, 'steps': 18593, 'loss/train': 2.3415849208831787} +02/25/2022 01:39:27 - INFO - codeparrot_training - Step 18594: {'lr': 0.0003664905045617301, 'samples': 9520640, 'steps': 18594, 'loss/train': 2.2822210788726807} +02/25/2022 01:39:31 - INFO - codeparrot_training - Step 18595: {'lr': 0.0003664760267288138, 'samples': 9521152, 'steps': 18595, 'loss/train': 3.195042371749878} +02/25/2022 01:39:36 - INFO - codeparrot_training - Step 18596: {'lr': 0.0003664615483969511, 'samples': 9521664, 'steps': 18596, 'loss/train': 1.5851309299468994} +02/25/2022 01:39:40 - INFO - codeparrot_training - Step 18597: {'lr': 0.0003664470695662042, 'samples': 9522176, 'steps': 18597, 'loss/train': 0.9775142669677734} +02/25/2022 01:39:45 - INFO - codeparrot_training - Step 18598: {'lr': 0.000366432590236635, 'samples': 9522688, 'steps': 18598, 'loss/train': 1.7166187763214111} +02/25/2022 01:39:49 - INFO - codeparrot_training - Step 18599: {'lr': 0.0003664181104083055, 'samples': 9523200, 'steps': 18599, 'loss/train': 1.5581121444702148} +02/25/2022 01:39:54 - INFO - codeparrot_training - Step 18600: {'lr': 0.00036640363008127785, 'samples': 9523712, 'steps': 18600, 'loss/train': 2.07365345954895} +02/25/2022 01:39:58 - INFO - codeparrot_training - Step 18601: {'lr': 0.000366389149255614, 'samples': 9524224, 'steps': 18601, 'loss/train': 1.609239935874939} +02/25/2022 01:40:06 - INFO - codeparrot_training - Step 18602: {'lr': 0.00036637466793137605, 'samples': 9524736, 'steps': 18602, 'loss/train': 1.6889331340789795} +02/25/2022 01:40:09 - INFO - codeparrot_training - Step 18603: {'lr': 0.0003663601861086259, 'samples': 9525248, 'steps': 18603, 'loss/train': 1.559266209602356} +02/25/2022 01:40:15 - INFO - codeparrot_training - Step 18604: {'lr': 0.00036634570378742565, 'samples': 9525760, 'steps': 18604, 'loss/train': 2.1429872512817383} +02/25/2022 01:40:18 - INFO - codeparrot_training - Step 18605: {'lr': 0.00036633122096783736, 'samples': 9526272, 'steps': 18605, 'loss/train': 1.1589276790618896} +02/25/2022 01:40:24 - INFO - codeparrot_training - Step 18606: {'lr': 0.00036631673764992307, 'samples': 9526784, 'steps': 18606, 'loss/train': 2.046861410140991} +02/25/2022 01:40:27 - INFO - codeparrot_training - Step 18607: {'lr': 0.00036630225383374476, 'samples': 9527296, 'steps': 18607, 'loss/train': 1.6538593769073486} +02/25/2022 01:40:33 - INFO - codeparrot_training - Step 18608: {'lr': 0.0003662877695193646, 'samples': 9527808, 'steps': 18608, 'loss/train': 1.6244672536849976} +02/25/2022 01:40:36 - INFO - codeparrot_training - Step 18609: {'lr': 0.0003662732847068445, 'samples': 9528320, 'steps': 18609, 'loss/train': 1.192000389099121} +02/25/2022 01:40:42 - INFO - codeparrot_training - Step 18610: {'lr': 0.00036625879939624663, 'samples': 9528832, 'steps': 18610, 'loss/train': 2.6161937713623047} +02/25/2022 01:40:45 - INFO - codeparrot_training - Step 18611: {'lr': 0.000366244313587633, 'samples': 9529344, 'steps': 18611, 'loss/train': 2.635939359664917} +02/25/2022 01:40:53 - INFO - codeparrot_training - Step 18612: {'lr': 0.0003662298272810655, 'samples': 9529856, 'steps': 18612, 'loss/train': 1.6780717372894287} +02/25/2022 01:40:56 - INFO - codeparrot_training - Step 18613: {'lr': 0.00036621534047660647, 'samples': 9530368, 'steps': 18613, 'loss/train': 2.310574769973755} +02/25/2022 01:41:02 - INFO - codeparrot_training - Step 18614: {'lr': 0.00036620085317431777, 'samples': 9530880, 'steps': 18614, 'loss/train': 1.6441854238510132} +02/25/2022 01:41:05 - INFO - codeparrot_training - Step 18615: {'lr': 0.0003661863653742615, 'samples': 9531392, 'steps': 18615, 'loss/train': 1.5445668697357178} +02/25/2022 01:41:11 - INFO - codeparrot_training - Step 18616: {'lr': 0.0003661718770764998, 'samples': 9531904, 'steps': 18616, 'loss/train': 3.113262414932251} +02/25/2022 01:41:14 - INFO - codeparrot_training - Step 18617: {'lr': 0.00036615738828109465, 'samples': 9532416, 'steps': 18617, 'loss/train': 2.387874126434326} +02/25/2022 01:41:20 - INFO - codeparrot_training - Step 18618: {'lr': 0.00036614289898810804, 'samples': 9532928, 'steps': 18618, 'loss/train': 1.5805346965789795} +02/25/2022 01:41:23 - INFO - codeparrot_training - Step 18619: {'lr': 0.00036612840919760225, 'samples': 9533440, 'steps': 18619, 'loss/train': 1.964694857597351} +02/25/2022 01:41:29 - INFO - codeparrot_training - Step 18620: {'lr': 0.00036611391890963913, 'samples': 9533952, 'steps': 18620, 'loss/train': 1.001628041267395} +02/25/2022 01:41:32 - INFO - codeparrot_training - Step 18621: {'lr': 0.00036609942812428087, 'samples': 9534464, 'steps': 18621, 'loss/train': 0.9977004528045654} +02/25/2022 01:41:40 - INFO - codeparrot_training - Step 18622: {'lr': 0.00036608493684158963, 'samples': 9534976, 'steps': 18622, 'loss/train': 1.417203426361084} +02/25/2022 01:41:43 - INFO - codeparrot_training - Step 18623: {'lr': 0.0003660704450616272, 'samples': 9535488, 'steps': 18623, 'loss/train': 2.5538036823272705} +02/25/2022 01:41:49 - INFO - codeparrot_training - Step 18624: {'lr': 0.00036605595278445605, 'samples': 9536000, 'steps': 18624, 'loss/train': 1.2743014097213745} +02/25/2022 01:41:52 - INFO - codeparrot_training - Step 18625: {'lr': 0.0003660414600101379, 'samples': 9536512, 'steps': 18625, 'loss/train': 0.45932769775390625} +02/25/2022 01:41:58 - INFO - codeparrot_training - Step 18626: {'lr': 0.00036602696673873505, 'samples': 9537024, 'steps': 18626, 'loss/train': 1.9580533504486084} +02/25/2022 01:42:01 - INFO - codeparrot_training - Step 18627: {'lr': 0.00036601247297030943, 'samples': 9537536, 'steps': 18627, 'loss/train': 2.022444248199463} +02/25/2022 01:42:07 - INFO - codeparrot_training - Step 18628: {'lr': 0.00036599797870492327, 'samples': 9538048, 'steps': 18628, 'loss/train': 2.1271891593933105} +02/25/2022 01:42:10 - INFO - codeparrot_training - Step 18629: {'lr': 0.0003659834839426387, 'samples': 9538560, 'steps': 18629, 'loss/train': 2.4939234256744385} +02/25/2022 01:42:16 - INFO - codeparrot_training - Step 18630: {'lr': 0.0003659689886835176, 'samples': 9539072, 'steps': 18630, 'loss/train': 1.5407270193099976} +02/25/2022 01:42:23 - INFO - codeparrot_training - Step 18631: {'lr': 0.00036595449292762215, 'samples': 9539584, 'steps': 18631, 'loss/train': 1.8431187868118286} +02/25/2022 01:42:26 - INFO - codeparrot_training - Step 18632: {'lr': 0.00036593999667501457, 'samples': 9540096, 'steps': 18632, 'loss/train': 1.1904419660568237} +02/25/2022 01:42:32 - INFO - codeparrot_training - Step 18633: {'lr': 0.0003659254999257568, 'samples': 9540608, 'steps': 18633, 'loss/train': 1.2419469356536865} +02/25/2022 01:42:35 - INFO - codeparrot_training - Step 18634: {'lr': 0.000365911002679911, 'samples': 9541120, 'steps': 18634, 'loss/train': 1.5306427478790283} +02/25/2022 01:42:41 - INFO - codeparrot_training - Step 18635: {'lr': 0.00036589650493753937, 'samples': 9541632, 'steps': 18635, 'loss/train': 2.0469346046447754} +02/25/2022 01:42:44 - INFO - codeparrot_training - Step 18636: {'lr': 0.00036588200669870376, 'samples': 9542144, 'steps': 18636, 'loss/train': 1.7330089807510376} +02/25/2022 01:42:50 - INFO - codeparrot_training - Step 18637: {'lr': 0.0003658675079634665, 'samples': 9542656, 'steps': 18637, 'loss/train': 2.1543710231781006} +02/25/2022 01:42:53 - INFO - codeparrot_training - Step 18638: {'lr': 0.0003658530087318896, 'samples': 9543168, 'steps': 18638, 'loss/train': 1.717761754989624} +02/25/2022 01:42:59 - INFO - codeparrot_training - Step 18639: {'lr': 0.00036583850900403527, 'samples': 9543680, 'steps': 18639, 'loss/train': 1.5691272020339966} +02/25/2022 01:43:02 - INFO - codeparrot_training - Step 18640: {'lr': 0.00036582400877996547, 'samples': 9544192, 'steps': 18640, 'loss/train': 2.1716856956481934} +02/25/2022 01:43:08 - INFO - codeparrot_training - Step 18641: {'lr': 0.0003658095080597424, 'samples': 9544704, 'steps': 18641, 'loss/train': 1.2038847208023071} +02/25/2022 01:43:11 - INFO - codeparrot_training - Step 18642: {'lr': 0.0003657950068434282, 'samples': 9545216, 'steps': 18642, 'loss/train': 2.194905996322632} +02/25/2022 01:43:17 - INFO - codeparrot_training - Step 18643: {'lr': 0.000365780505131085, 'samples': 9545728, 'steps': 18643, 'loss/train': 2.553783893585205} +02/25/2022 01:43:20 - INFO - codeparrot_training - Step 18644: {'lr': 0.00036576600292277477, 'samples': 9546240, 'steps': 18644, 'loss/train': 1.8347636461257935} +02/25/2022 01:43:26 - INFO - codeparrot_training - Step 18645: {'lr': 0.00036575150021855987, 'samples': 9546752, 'steps': 18645, 'loss/train': 1.0521267652511597} +02/25/2022 01:43:29 - INFO - codeparrot_training - Step 18646: {'lr': 0.00036573699701850223, 'samples': 9547264, 'steps': 18646, 'loss/train': 1.9159833192825317} +02/25/2022 01:43:36 - INFO - codeparrot_training - Step 18647: {'lr': 0.000365722493322664, 'samples': 9547776, 'steps': 18647, 'loss/train': 1.199291706085205} +02/25/2022 01:43:39 - INFO - codeparrot_training - Step 18648: {'lr': 0.0003657079891311075, 'samples': 9548288, 'steps': 18648, 'loss/train': 1.877380609512329} +02/25/2022 01:43:45 - INFO - codeparrot_training - Step 18649: {'lr': 0.00036569348444389456, 'samples': 9548800, 'steps': 18649, 'loss/train': 2.6515560150146484} +02/25/2022 01:43:48 - INFO - codeparrot_training - Step 18650: {'lr': 0.00036567897926108756, 'samples': 9549312, 'steps': 18650, 'loss/train': 3.0750679969787598} +02/25/2022 01:43:54 - INFO - codeparrot_training - Step 18651: {'lr': 0.00036566447358274846, 'samples': 9549824, 'steps': 18651, 'loss/train': 1.3747057914733887} +02/25/2022 01:43:58 - INFO - codeparrot_training - Step 18652: {'lr': 0.0003656499674089396, 'samples': 9550336, 'steps': 18652, 'loss/train': 2.389620304107666} +02/25/2022 01:44:03 - INFO - codeparrot_training - Step 18653: {'lr': 0.0003656354607397229, 'samples': 9550848, 'steps': 18653, 'loss/train': 0.7566519975662231} +02/25/2022 01:44:07 - INFO - codeparrot_training - Step 18654: {'lr': 0.00036562095357516066, 'samples': 9551360, 'steps': 18654, 'loss/train': 0.7429487705230713} +02/25/2022 01:44:12 - INFO - codeparrot_training - Step 18655: {'lr': 0.00036560644591531496, 'samples': 9551872, 'steps': 18655, 'loss/train': 1.5984597206115723} +02/25/2022 01:44:16 - INFO - codeparrot_training - Step 18656: {'lr': 0.00036559193776024794, 'samples': 9552384, 'steps': 18656, 'loss/train': 2.789930582046509} +02/25/2022 01:44:22 - INFO - codeparrot_training - Step 18657: {'lr': 0.0003655774291100218, 'samples': 9552896, 'steps': 18657, 'loss/train': 0.9962130784988403} +02/25/2022 01:44:25 - INFO - codeparrot_training - Step 18658: {'lr': 0.0003655629199646986, 'samples': 9553408, 'steps': 18658, 'loss/train': 1.7426908016204834} +02/25/2022 01:44:31 - INFO - codeparrot_training - Step 18659: {'lr': 0.00036554841032434063, 'samples': 9553920, 'steps': 18659, 'loss/train': 3.1881673336029053} +02/25/2022 01:44:35 - INFO - codeparrot_training - Step 18660: {'lr': 0.00036553390018900984, 'samples': 9554432, 'steps': 18660, 'loss/train': 1.6364705562591553} +02/25/2022 01:44:40 - INFO - codeparrot_training - Step 18661: {'lr': 0.0003655193895587686, 'samples': 9554944, 'steps': 18661, 'loss/train': 1.835903525352478} +02/25/2022 01:44:44 - INFO - codeparrot_training - Step 18662: {'lr': 0.000365504878433679, 'samples': 9555456, 'steps': 18662, 'loss/train': 2.0200867652893066} +02/25/2022 01:44:49 - INFO - codeparrot_training - Step 18663: {'lr': 0.00036549036681380307, 'samples': 9555968, 'steps': 18663, 'loss/train': 0.8442659378051758} +02/25/2022 01:44:53 - INFO - codeparrot_training - Step 18664: {'lr': 0.00036547585469920316, 'samples': 9556480, 'steps': 18664, 'loss/train': 1.6322911977767944} +02/25/2022 01:44:59 - INFO - codeparrot_training - Step 18665: {'lr': 0.00036546134208994137, 'samples': 9556992, 'steps': 18665, 'loss/train': 0.6029117703437805} +02/25/2022 01:45:02 - INFO - codeparrot_training - Step 18666: {'lr': 0.00036544682898607977, 'samples': 9557504, 'steps': 18666, 'loss/train': 0.2979816198348999} +02/25/2022 01:45:06 - INFO - codeparrot_training - Step 18667: {'lr': 0.00036543231538768066, 'samples': 9558016, 'steps': 18667, 'loss/train': 1.5411999225616455} +02/25/2022 01:45:12 - INFO - codeparrot_training - Step 18668: {'lr': 0.00036541780129480616, 'samples': 9558528, 'steps': 18668, 'loss/train': 2.4821763038635254} +02/25/2022 01:45:16 - INFO - codeparrot_training - Step 18669: {'lr': 0.0003654032867075185, 'samples': 9559040, 'steps': 18669, 'loss/train': 2.9620797634124756} +02/25/2022 01:45:21 - INFO - codeparrot_training - Step 18670: {'lr': 0.00036538877162587975, 'samples': 9559552, 'steps': 18670, 'loss/train': 1.7412275075912476} +02/25/2022 01:45:25 - INFO - codeparrot_training - Step 18671: {'lr': 0.00036537425604995214, 'samples': 9560064, 'steps': 18671, 'loss/train': 2.2659592628479004} +02/25/2022 01:45:30 - INFO - codeparrot_training - Step 18672: {'lr': 0.00036535973997979787, 'samples': 9560576, 'steps': 18672, 'loss/train': 1.7775391340255737} +02/25/2022 01:45:33 - INFO - codeparrot_training - Step 18673: {'lr': 0.0003653452234154791, 'samples': 9561088, 'steps': 18673, 'loss/train': 1.4945180416107178} +02/25/2022 01:45:39 - INFO - codeparrot_training - Step 18674: {'lr': 0.000365330706357058, 'samples': 9561600, 'steps': 18674, 'loss/train': 1.8599547147750854} +02/25/2022 01:45:43 - INFO - codeparrot_training - Step 18675: {'lr': 0.0003653161888045968, 'samples': 9562112, 'steps': 18675, 'loss/train': 1.500205636024475} +02/25/2022 01:45:48 - INFO - codeparrot_training - Step 18676: {'lr': 0.0003653016707581577, 'samples': 9562624, 'steps': 18676, 'loss/train': 2.0400757789611816} +02/25/2022 01:45:52 - INFO - codeparrot_training - Step 18677: {'lr': 0.00036528715221780276, 'samples': 9563136, 'steps': 18677, 'loss/train': 1.7568780183792114} +02/25/2022 01:45:57 - INFO - codeparrot_training - Step 18678: {'lr': 0.0003652726331835944, 'samples': 9563648, 'steps': 18678, 'loss/train': 2.6606838703155518} +02/25/2022 01:46:01 - INFO - codeparrot_training - Step 18679: {'lr': 0.00036525811365559457, 'samples': 9564160, 'steps': 18679, 'loss/train': 2.54681134223938} +02/25/2022 01:46:07 - INFO - codeparrot_training - Step 18680: {'lr': 0.0003652435936338656, 'samples': 9564672, 'steps': 18680, 'loss/train': 1.964200735092163} +02/25/2022 01:46:10 - INFO - codeparrot_training - Step 18681: {'lr': 0.0003652290731184697, 'samples': 9565184, 'steps': 18681, 'loss/train': 1.8075731992721558} +02/25/2022 01:46:16 - INFO - codeparrot_training - Step 18682: {'lr': 0.000365214552109469, 'samples': 9565696, 'steps': 18682, 'loss/train': 1.778092861175537} +02/25/2022 01:46:19 - INFO - codeparrot_training - Step 18683: {'lr': 0.0003652000306069258, 'samples': 9566208, 'steps': 18683, 'loss/train': 2.2482798099517822} +02/25/2022 01:46:25 - INFO - codeparrot_training - Step 18684: {'lr': 0.00036518550861090217, 'samples': 9566720, 'steps': 18684, 'loss/train': 2.2633185386657715} +02/25/2022 01:46:29 - INFO - codeparrot_training - Step 18685: {'lr': 0.0003651709861214605, 'samples': 9567232, 'steps': 18685, 'loss/train': 1.3000870943069458} +02/25/2022 01:46:34 - INFO - codeparrot_training - Step 18686: {'lr': 0.0003651564631386628, 'samples': 9567744, 'steps': 18686, 'loss/train': 2.1072256565093994} +02/25/2022 01:46:38 - INFO - codeparrot_training - Step 18687: {'lr': 0.0003651419396625714, 'samples': 9568256, 'steps': 18687, 'loss/train': 2.890693426132202} +02/25/2022 01:46:43 - INFO - codeparrot_training - Step 18688: {'lr': 0.0003651274156932485, 'samples': 9568768, 'steps': 18688, 'loss/train': 1.5545525550842285} +02/25/2022 01:46:47 - INFO - codeparrot_training - Step 18689: {'lr': 0.00036511289123075636, 'samples': 9569280, 'steps': 18689, 'loss/train': 1.9959642887115479} +02/25/2022 01:46:52 - INFO - codeparrot_training - Step 18690: {'lr': 0.0003650983662751571, 'samples': 9569792, 'steps': 18690, 'loss/train': 2.338176727294922} +02/25/2022 01:46:56 - INFO - codeparrot_training - Step 18691: {'lr': 0.000365083840826513, 'samples': 9570304, 'steps': 18691, 'loss/train': 0.6471648812294006} +02/25/2022 01:47:01 - INFO - codeparrot_training - Step 18692: {'lr': 0.00036506931488488627, 'samples': 9570816, 'steps': 18692, 'loss/train': 2.605759620666504} +02/25/2022 01:47:05 - INFO - codeparrot_training - Step 18693: {'lr': 0.0003650547884503391, 'samples': 9571328, 'steps': 18693, 'loss/train': 1.1564210653305054} +02/25/2022 01:47:11 - INFO - codeparrot_training - Step 18694: {'lr': 0.0003650402615229338, 'samples': 9571840, 'steps': 18694, 'loss/train': 1.6388384103775024} +02/25/2022 01:47:15 - INFO - codeparrot_training - Step 18695: {'lr': 0.00036502573410273243, 'samples': 9572352, 'steps': 18695, 'loss/train': 2.0362765789031982} +02/25/2022 01:47:20 - INFO - codeparrot_training - Step 18696: {'lr': 0.0003650112061897975, 'samples': 9572864, 'steps': 18696, 'loss/train': 2.113410234451294} +02/25/2022 01:47:24 - INFO - codeparrot_training - Step 18697: {'lr': 0.000364996677784191, 'samples': 9573376, 'steps': 18697, 'loss/train': 2.0579826831817627} +02/25/2022 01:47:29 - INFO - codeparrot_training - Step 18698: {'lr': 0.00036498214888597524, 'samples': 9573888, 'steps': 18698, 'loss/train': 2.890291452407837} +02/25/2022 01:47:33 - INFO - codeparrot_training - Step 18699: {'lr': 0.0003649676194952125, 'samples': 9574400, 'steps': 18699, 'loss/train': 2.3442039489746094} +02/25/2022 01:47:38 - INFO - codeparrot_training - Step 18700: {'lr': 0.000364953089611965, 'samples': 9574912, 'steps': 18700, 'loss/train': 2.574342727661133} +02/25/2022 01:47:42 - INFO - codeparrot_training - Step 18701: {'lr': 0.00036493855923629495, 'samples': 9575424, 'steps': 18701, 'loss/train': 3.9268972873687744} +02/25/2022 01:47:47 - INFO - codeparrot_training - Step 18702: {'lr': 0.00036492402836826453, 'samples': 9575936, 'steps': 18702, 'loss/train': 2.2675607204437256} +02/25/2022 01:47:54 - INFO - codeparrot_training - Step 18703: {'lr': 0.0003649094970079362, 'samples': 9576448, 'steps': 18703, 'loss/train': 2.112107992172241} +02/25/2022 01:47:57 - INFO - codeparrot_training - Step 18704: {'lr': 0.000364894965155372, 'samples': 9576960, 'steps': 18704, 'loss/train': 0.6963909864425659} +02/25/2022 01:48:03 - INFO - codeparrot_training - Step 18705: {'lr': 0.0003648804328106342, 'samples': 9577472, 'steps': 18705, 'loss/train': 2.042991876602173} +02/25/2022 01:48:07 - INFO - codeparrot_training - Step 18706: {'lr': 0.0003648658999737852, 'samples': 9577984, 'steps': 18706, 'loss/train': 1.4525494575500488} +02/25/2022 01:48:12 - INFO - codeparrot_training - Step 18707: {'lr': 0.0003648513666448871, 'samples': 9578496, 'steps': 18707, 'loss/train': 2.257282018661499} +02/25/2022 01:48:16 - INFO - codeparrot_training - Step 18708: {'lr': 0.0003648368328240022, 'samples': 9579008, 'steps': 18708, 'loss/train': 2.449894428253174} +02/25/2022 01:48:19 - INFO - codeparrot_training - Step 18709: {'lr': 0.00036482229851119287, 'samples': 9579520, 'steps': 18709, 'loss/train': 2.3471951484680176} +02/25/2022 01:48:25 - INFO - codeparrot_training - Step 18710: {'lr': 0.0003648077637065212, 'samples': 9580032, 'steps': 18710, 'loss/train': 2.30021071434021} +02/25/2022 01:48:28 - INFO - codeparrot_training - Step 18711: {'lr': 0.00036479322841004953, 'samples': 9580544, 'steps': 18711, 'loss/train': 1.9611283540725708} +02/25/2022 01:48:34 - INFO - codeparrot_training - Step 18712: {'lr': 0.0003647786926218401, 'samples': 9581056, 'steps': 18712, 'loss/train': 2.7539775371551514} +02/25/2022 01:48:37 - INFO - codeparrot_training - Step 18713: {'lr': 0.00036476415634195523, 'samples': 9581568, 'steps': 18713, 'loss/train': 0.16106773912906647} +02/25/2022 01:48:43 - INFO - codeparrot_training - Step 18714: {'lr': 0.00036474961957045715, 'samples': 9582080, 'steps': 18714, 'loss/train': 2.0612494945526123} +02/25/2022 01:48:47 - INFO - codeparrot_training - Step 18715: {'lr': 0.00036473508230740816, 'samples': 9582592, 'steps': 18715, 'loss/train': 2.2212347984313965} +02/25/2022 01:48:52 - INFO - codeparrot_training - Step 18716: {'lr': 0.00036472054455287053, 'samples': 9583104, 'steps': 18716, 'loss/train': 1.5926774740219116} +02/25/2022 01:48:56 - INFO - codeparrot_training - Step 18717: {'lr': 0.0003647060063069064, 'samples': 9583616, 'steps': 18717, 'loss/train': 1.8369320631027222} +02/25/2022 01:49:01 - INFO - codeparrot_training - Step 18718: {'lr': 0.0003646914675695783, 'samples': 9584128, 'steps': 18718, 'loss/train': 1.7059699296951294} +02/25/2022 01:49:05 - INFO - codeparrot_training - Step 18719: {'lr': 0.0003646769283409483, 'samples': 9584640, 'steps': 18719, 'loss/train': 2.582383155822754} +02/25/2022 01:49:11 - INFO - codeparrot_training - Step 18720: {'lr': 0.0003646623886210788, 'samples': 9585152, 'steps': 18720, 'loss/train': 2.0679750442504883} +02/25/2022 01:49:14 - INFO - codeparrot_training - Step 18721: {'lr': 0.00036464784841003196, 'samples': 9585664, 'steps': 18721, 'loss/train': 1.9153096675872803} +02/25/2022 01:49:20 - INFO - codeparrot_training - Step 18722: {'lr': 0.0003646333077078702, 'samples': 9586176, 'steps': 18722, 'loss/train': 1.320304274559021} +02/25/2022 01:49:23 - INFO - codeparrot_training - Step 18723: {'lr': 0.0003646187665146557, 'samples': 9586688, 'steps': 18723, 'loss/train': 2.1479573249816895} +02/25/2022 01:49:29 - INFO - codeparrot_training - Step 18724: {'lr': 0.00036460422483045084, 'samples': 9587200, 'steps': 18724, 'loss/train': 2.007450819015503} +02/25/2022 01:49:32 - INFO - codeparrot_training - Step 18725: {'lr': 0.0003645896826553178, 'samples': 9587712, 'steps': 18725, 'loss/train': 0.10037413239479065} +02/25/2022 01:49:38 - INFO - codeparrot_training - Step 18726: {'lr': 0.0003645751399893191, 'samples': 9588224, 'steps': 18726, 'loss/train': 2.3800787925720215} +02/25/2022 01:49:42 - INFO - codeparrot_training - Step 18727: {'lr': 0.0003645605968325167, 'samples': 9588736, 'steps': 18727, 'loss/train': 2.0359084606170654} +02/25/2022 01:49:47 - INFO - codeparrot_training - Step 18728: {'lr': 0.00036454605318497323, 'samples': 9589248, 'steps': 18728, 'loss/train': 2.33123779296875} +02/25/2022 01:49:51 - INFO - codeparrot_training - Step 18729: {'lr': 0.00036453150904675074, 'samples': 9589760, 'steps': 18729, 'loss/train': 1.9773380756378174} +02/25/2022 01:49:56 - INFO - codeparrot_training - Step 18730: {'lr': 0.00036451696441791164, 'samples': 9590272, 'steps': 18730, 'loss/train': 1.75650954246521} +02/25/2022 01:50:00 - INFO - codeparrot_training - Step 18731: {'lr': 0.0003645024192985183, 'samples': 9590784, 'steps': 18731, 'loss/train': 1.3827403783798218} +02/25/2022 01:50:05 - INFO - codeparrot_training - Step 18732: {'lr': 0.0003644878736886329, 'samples': 9591296, 'steps': 18732, 'loss/train': 0.9074762463569641} +02/25/2022 01:50:09 - INFO - codeparrot_training - Step 18733: {'lr': 0.0003644733275883179, 'samples': 9591808, 'steps': 18733, 'loss/train': 2.2755424976348877} +02/25/2022 01:50:14 - INFO - codeparrot_training - Step 18734: {'lr': 0.00036445878099763534, 'samples': 9592320, 'steps': 18734, 'loss/train': 1.992601990699768} +02/25/2022 01:50:18 - INFO - codeparrot_training - Step 18735: {'lr': 0.00036444423391664783, 'samples': 9592832, 'steps': 18735, 'loss/train': 1.757514476776123} +02/25/2022 01:50:23 - INFO - codeparrot_training - Step 18736: {'lr': 0.0003644296863454175, 'samples': 9593344, 'steps': 18736, 'loss/train': 2.7851083278656006} +02/25/2022 01:50:27 - INFO - codeparrot_training - Step 18737: {'lr': 0.0003644151382840068, 'samples': 9593856, 'steps': 18737, 'loss/train': 1.7062655687332153} +02/25/2022 01:50:32 - INFO - codeparrot_training - Step 18738: {'lr': 0.00036440058973247793, 'samples': 9594368, 'steps': 18738, 'loss/train': 1.3448009490966797} +02/25/2022 01:50:36 - INFO - codeparrot_training - Step 18739: {'lr': 0.0003643860406908933, 'samples': 9594880, 'steps': 18739, 'loss/train': 1.472324252128601} +02/25/2022 01:50:41 - INFO - codeparrot_training - Step 18740: {'lr': 0.0003643714911593151, 'samples': 9595392, 'steps': 18740, 'loss/train': 2.843435764312744} +02/25/2022 01:50:45 - INFO - codeparrot_training - Step 18741: {'lr': 0.00036435694113780585, 'samples': 9595904, 'steps': 18741, 'loss/train': 1.2746641635894775} +02/25/2022 01:50:51 - INFO - codeparrot_training - Step 18742: {'lr': 0.0003643423906264277, 'samples': 9596416, 'steps': 18742, 'loss/train': 1.2105600833892822} +02/25/2022 01:50:54 - INFO - codeparrot_training - Step 18743: {'lr': 0.0003643278396252431, 'samples': 9596928, 'steps': 18743, 'loss/train': 3.263894557952881} +02/25/2022 01:51:00 - INFO - codeparrot_training - Step 18744: {'lr': 0.0003643132881343144, 'samples': 9597440, 'steps': 18744, 'loss/train': 2.2403008937835693} +02/25/2022 01:51:03 - INFO - codeparrot_training - Step 18745: {'lr': 0.00036429873615370374, 'samples': 9597952, 'steps': 18745, 'loss/train': 1.7017784118652344} +02/25/2022 01:51:09 - INFO - codeparrot_training - Step 18746: {'lr': 0.00036428418368347363, 'samples': 9598464, 'steps': 18746, 'loss/train': 1.3000973463058472} +02/25/2022 01:51:12 - INFO - codeparrot_training - Step 18747: {'lr': 0.0003642696307236864, 'samples': 9598976, 'steps': 18747, 'loss/train': 1.8979524374008179} +02/25/2022 01:51:18 - INFO - codeparrot_training - Step 18748: {'lr': 0.0003642550772744044, 'samples': 9599488, 'steps': 18748, 'loss/train': 1.9144237041473389} +02/25/2022 01:51:21 - INFO - codeparrot_training - Step 18749: {'lr': 0.0003642405233356898, 'samples': 9600000, 'steps': 18749, 'loss/train': 1.6354658603668213} +02/25/2022 01:51:27 - INFO - codeparrot_training - Step 18750: {'lr': 0.00036422596890760517, 'samples': 9600512, 'steps': 18750, 'loss/train': 1.7698363065719604} +02/25/2022 01:51:30 - INFO - codeparrot_training - Step 18751: {'lr': 0.00036421141399021274, 'samples': 9601024, 'steps': 18751, 'loss/train': 0.7682331800460815} +02/25/2022 01:51:36 - INFO - codeparrot_training - Step 18752: {'lr': 0.00036419685858357485, 'samples': 9601536, 'steps': 18752, 'loss/train': 2.680948495864868} +02/25/2022 01:51:40 - INFO - codeparrot_training - Step 18753: {'lr': 0.00036418230268775393, 'samples': 9602048, 'steps': 18753, 'loss/train': 1.591759204864502} +02/25/2022 01:51:45 - INFO - codeparrot_training - Step 18754: {'lr': 0.0003641677463028123, 'samples': 9602560, 'steps': 18754, 'loss/train': 2.06766939163208} +02/25/2022 01:51:51 - INFO - codeparrot_training - Step 18755: {'lr': 0.0003641531894288122, 'samples': 9603072, 'steps': 18755, 'loss/train': 1.9107542037963867} +02/25/2022 01:51:54 - INFO - codeparrot_training - Step 18756: {'lr': 0.0003641386320658161, 'samples': 9603584, 'steps': 18756, 'loss/train': 0.9932055473327637} +02/25/2022 01:51:58 - INFO - codeparrot_training - Step 18757: {'lr': 0.00036412407421388646, 'samples': 9604096, 'steps': 18757, 'loss/train': 1.7812520265579224} +02/25/2022 01:52:03 - INFO - codeparrot_training - Step 18758: {'lr': 0.00036410951587308545, 'samples': 9604608, 'steps': 18758, 'loss/train': 2.319784641265869} +02/25/2022 01:52:09 - INFO - codeparrot_training - Step 18759: {'lr': 0.00036409495704347553, 'samples': 9605120, 'steps': 18759, 'loss/train': 3.9322290420532227} +02/25/2022 01:52:12 - INFO - codeparrot_training - Step 18760: {'lr': 0.000364080397725119, 'samples': 9605632, 'steps': 18760, 'loss/train': 1.9735074043273926} +02/25/2022 01:52:18 - INFO - codeparrot_training - Step 18761: {'lr': 0.00036406583791807824, 'samples': 9606144, 'steps': 18761, 'loss/train': 1.642637848854065} +02/25/2022 01:52:22 - INFO - codeparrot_training - Step 18762: {'lr': 0.0003640512776224157, 'samples': 9606656, 'steps': 18762, 'loss/train': 2.916508197784424} +02/25/2022 01:52:28 - INFO - codeparrot_training - Step 18763: {'lr': 0.0003640367168381937, 'samples': 9607168, 'steps': 18763, 'loss/train': 2.194520950317383} +02/25/2022 01:52:31 - INFO - codeparrot_training - Step 18764: {'lr': 0.0003640221555654747, 'samples': 9607680, 'steps': 18764, 'loss/train': 2.134033441543579} +02/25/2022 01:52:34 - INFO - codeparrot_training - Step 18765: {'lr': 0.00036400759380432083, 'samples': 9608192, 'steps': 18765, 'loss/train': 2.271791934967041} +02/25/2022 01:52:40 - INFO - codeparrot_training - Step 18766: {'lr': 0.00036399303155479476, 'samples': 9608704, 'steps': 18766, 'loss/train': 1.747043490409851} +02/25/2022 01:52:44 - INFO - codeparrot_training - Step 18767: {'lr': 0.00036397846881695866, 'samples': 9609216, 'steps': 18767, 'loss/train': 2.0768213272094727} +02/25/2022 01:52:49 - INFO - codeparrot_training - Step 18768: {'lr': 0.00036396390559087505, 'samples': 9609728, 'steps': 18768, 'loss/train': 2.3907644748687744} +02/25/2022 01:52:53 - INFO - codeparrot_training - Step 18769: {'lr': 0.0003639493418766062, 'samples': 9610240, 'steps': 18769, 'loss/train': 1.804693341255188} +02/25/2022 01:52:58 - INFO - codeparrot_training - Step 18770: {'lr': 0.0003639347776742146, 'samples': 9610752, 'steps': 18770, 'loss/train': 2.2344589233398438} +02/25/2022 01:53:02 - INFO - codeparrot_training - Step 18771: {'lr': 0.00036392021298376257, 'samples': 9611264, 'steps': 18771, 'loss/train': 1.1538872718811035} +02/25/2022 01:53:08 - INFO - codeparrot_training - Step 18772: {'lr': 0.0003639056478053125, 'samples': 9611776, 'steps': 18772, 'loss/train': 1.552165150642395} +02/25/2022 01:53:11 - INFO - codeparrot_training - Step 18773: {'lr': 0.0003638910821389268, 'samples': 9612288, 'steps': 18773, 'loss/train': 1.2958184480667114} +02/25/2022 01:53:17 - INFO - codeparrot_training - Step 18774: {'lr': 0.0003638765159846679, 'samples': 9612800, 'steps': 18774, 'loss/train': 1.857056975364685} +02/25/2022 01:53:20 - INFO - codeparrot_training - Step 18775: {'lr': 0.0003638619493425982, 'samples': 9613312, 'steps': 18775, 'loss/train': 0.22676512598991394} +02/25/2022 01:53:26 - INFO - codeparrot_training - Step 18776: {'lr': 0.00036384738221278, 'samples': 9613824, 'steps': 18776, 'loss/train': 1.1256705522537231} +02/25/2022 01:53:30 - INFO - codeparrot_training - Step 18777: {'lr': 0.0003638328145952758, 'samples': 9614336, 'steps': 18777, 'loss/train': 1.938619613647461} +02/25/2022 01:53:35 - INFO - codeparrot_training - Step 18778: {'lr': 0.0003638182464901479, 'samples': 9614848, 'steps': 18778, 'loss/train': 1.5208390951156616} +02/25/2022 01:53:39 - INFO - codeparrot_training - Step 18779: {'lr': 0.00036380367789745887, 'samples': 9615360, 'steps': 18779, 'loss/train': 2.1775331497192383} +02/25/2022 01:53:44 - INFO - codeparrot_training - Step 18780: {'lr': 0.00036378910881727095, 'samples': 9615872, 'steps': 18780, 'loss/train': 1.8853918313980103} +02/25/2022 01:53:48 - INFO - codeparrot_training - Step 18781: {'lr': 0.00036377453924964665, 'samples': 9616384, 'steps': 18781, 'loss/train': 2.315471649169922} +02/25/2022 01:53:53 - INFO - codeparrot_training - Step 18782: {'lr': 0.0003637599691946484, 'samples': 9616896, 'steps': 18782, 'loss/train': 1.3137383460998535} +02/25/2022 01:53:57 - INFO - codeparrot_training - Step 18783: {'lr': 0.00036374539865233847, 'samples': 9617408, 'steps': 18783, 'loss/train': 1.7385649681091309} +02/25/2022 01:54:02 - INFO - codeparrot_training - Step 18784: {'lr': 0.00036373082762277943, 'samples': 9617920, 'steps': 18784, 'loss/train': 2.9578235149383545} +02/25/2022 01:54:06 - INFO - codeparrot_training - Step 18785: {'lr': 0.00036371625610603366, 'samples': 9618432, 'steps': 18785, 'loss/train': 1.29164719581604} +02/25/2022 01:54:11 - INFO - codeparrot_training - Step 18786: {'lr': 0.0003637016841021635, 'samples': 9618944, 'steps': 18786, 'loss/train': 2.966223955154419} +02/25/2022 01:54:15 - INFO - codeparrot_training - Step 18787: {'lr': 0.00036368711161123145, 'samples': 9619456, 'steps': 18787, 'loss/train': 1.7146046161651611} +02/25/2022 01:54:21 - INFO - codeparrot_training - Step 18788: {'lr': 0.0003636725386332999, 'samples': 9619968, 'steps': 18788, 'loss/train': 1.6252731084823608} +02/25/2022 01:54:24 - INFO - codeparrot_training - Step 18789: {'lr': 0.0003636579651684313, 'samples': 9620480, 'steps': 18789, 'loss/train': 2.730056047439575} +02/25/2022 01:54:30 - INFO - codeparrot_training - Step 18790: {'lr': 0.0003636433912166881, 'samples': 9620992, 'steps': 18790, 'loss/train': 3.086652994155884} +02/25/2022 01:54:33 - INFO - codeparrot_training - Step 18791: {'lr': 0.00036362881677813266, 'samples': 9621504, 'steps': 18791, 'loss/train': 0.9975144267082214} +02/25/2022 01:54:39 - INFO - codeparrot_training - Step 18792: {'lr': 0.00036361424185282743, 'samples': 9622016, 'steps': 18792, 'loss/train': 1.7287423610687256} +02/25/2022 01:54:42 - INFO - codeparrot_training - Step 18793: {'lr': 0.00036359966644083485, 'samples': 9622528, 'steps': 18793, 'loss/train': 0.9263642430305481} +02/25/2022 01:54:48 - INFO - codeparrot_training - Step 18794: {'lr': 0.0003635850905422175, 'samples': 9623040, 'steps': 18794, 'loss/train': 1.5517878532409668} +02/25/2022 01:54:51 - INFO - codeparrot_training - Step 18795: {'lr': 0.00036357051415703755, 'samples': 9623552, 'steps': 18795, 'loss/train': 1.856420874595642} +02/25/2022 01:54:57 - INFO - codeparrot_training - Step 18796: {'lr': 0.00036355593728535767, 'samples': 9624064, 'steps': 18796, 'loss/train': 1.5313332080841064} +02/25/2022 01:55:00 - INFO - codeparrot_training - Step 18797: {'lr': 0.00036354135992724013, 'samples': 9624576, 'steps': 18797, 'loss/train': 1.5411643981933594} +02/25/2022 01:55:07 - INFO - codeparrot_training - Step 18798: {'lr': 0.0003635267820827476, 'samples': 9625088, 'steps': 18798, 'loss/train': 0.7101810574531555} +02/25/2022 01:55:10 - INFO - codeparrot_training - Step 18799: {'lr': 0.0003635122037519422, 'samples': 9625600, 'steps': 18799, 'loss/train': 2.0135602951049805} +02/25/2022 01:55:16 - INFO - codeparrot_training - Step 18800: {'lr': 0.00036349762493488667, 'samples': 9626112, 'steps': 18800, 'loss/train': 1.9933468103408813} +02/25/2022 01:55:19 - INFO - codeparrot_training - Step 18801: {'lr': 0.0003634830456316434, 'samples': 9626624, 'steps': 18801, 'loss/train': 1.921012043952942} +02/25/2022 01:55:25 - INFO - codeparrot_training - Step 18802: {'lr': 0.00036346846584227473, 'samples': 9627136, 'steps': 18802, 'loss/train': 2.269662380218506} +02/25/2022 01:55:28 - INFO - codeparrot_training - Step 18803: {'lr': 0.0003634538855668432, 'samples': 9627648, 'steps': 18803, 'loss/train': 1.8927403688430786} +02/25/2022 01:55:34 - INFO - codeparrot_training - Step 18804: {'lr': 0.00036343930480541123, 'samples': 9628160, 'steps': 18804, 'loss/train': 2.0032622814178467} +02/25/2022 01:55:37 - INFO - codeparrot_training - Step 18805: {'lr': 0.0003634247235580413, 'samples': 9628672, 'steps': 18805, 'loss/train': 2.2543907165527344} +02/25/2022 01:55:43 - INFO - codeparrot_training - Step 18806: {'lr': 0.0003634101418247959, 'samples': 9629184, 'steps': 18806, 'loss/train': 2.0594019889831543} +02/25/2022 01:55:46 - INFO - codeparrot_training - Step 18807: {'lr': 0.0003633955596057374, 'samples': 9629696, 'steps': 18807, 'loss/train': 3.0865230560302734} +02/25/2022 01:55:52 - INFO - codeparrot_training - Step 18808: {'lr': 0.00036338097690092843, 'samples': 9630208, 'steps': 18808, 'loss/train': 2.2409377098083496} +02/25/2022 01:55:56 - INFO - codeparrot_training - Step 18809: {'lr': 0.0003633663937104313, 'samples': 9630720, 'steps': 18809, 'loss/train': 1.6155309677124023} +02/25/2022 01:56:01 - INFO - codeparrot_training - Step 18810: {'lr': 0.0003633518100343085, 'samples': 9631232, 'steps': 18810, 'loss/train': 4.343753337860107} +02/25/2022 01:56:05 - INFO - codeparrot_training - Step 18811: {'lr': 0.0003633372258726226, 'samples': 9631744, 'steps': 18811, 'loss/train': 2.3139777183532715} +02/25/2022 01:56:11 - INFO - codeparrot_training - Step 18812: {'lr': 0.00036332264122543594, 'samples': 9632256, 'steps': 18812, 'loss/train': 1.5373393297195435} +02/25/2022 01:56:14 - INFO - codeparrot_training - Step 18813: {'lr': 0.0003633080560928111, 'samples': 9632768, 'steps': 18813, 'loss/train': 0.41178491711616516} +02/25/2022 01:56:20 - INFO - codeparrot_training - Step 18814: {'lr': 0.0003632934704748106, 'samples': 9633280, 'steps': 18814, 'loss/train': 1.730994701385498} +02/25/2022 01:56:23 - INFO - codeparrot_training - Step 18815: {'lr': 0.00036327888437149674, 'samples': 9633792, 'steps': 18815, 'loss/train': 2.9415700435638428} +02/25/2022 01:56:28 - INFO - codeparrot_training - Step 18816: {'lr': 0.00036326429778293223, 'samples': 9634304, 'steps': 18816, 'loss/train': 0.736484706401825} +02/25/2022 01:56:32 - INFO - codeparrot_training - Step 18817: {'lr': 0.00036324971070917934, 'samples': 9634816, 'steps': 18817, 'loss/train': 1.6742634773254395} +02/25/2022 01:56:38 - INFO - codeparrot_training - Step 18818: {'lr': 0.00036323512315030067, 'samples': 9635328, 'steps': 18818, 'loss/train': 1.4184633493423462} +02/25/2022 01:56:42 - INFO - codeparrot_training - Step 18819: {'lr': 0.0003632205351063587, 'samples': 9635840, 'steps': 18819, 'loss/train': 2.167532205581665} +02/25/2022 01:56:47 - INFO - codeparrot_training - Step 18820: {'lr': 0.0003632059465774159, 'samples': 9636352, 'steps': 18820, 'loss/train': 2.1325812339782715} +02/25/2022 01:56:50 - INFO - codeparrot_training - Step 18821: {'lr': 0.0003631913575635348, 'samples': 9636864, 'steps': 18821, 'loss/train': 1.9987629652023315} +02/25/2022 01:56:56 - INFO - codeparrot_training - Step 18822: {'lr': 0.00036317676806477784, 'samples': 9637376, 'steps': 18822, 'loss/train': 2.838286876678467} +02/25/2022 01:57:00 - INFO - codeparrot_training - Step 18823: {'lr': 0.0003631621780812075, 'samples': 9637888, 'steps': 18823, 'loss/train': 0.4584333300590515} +02/25/2022 01:57:05 - INFO - codeparrot_training - Step 18824: {'lr': 0.00036314758761288643, 'samples': 9638400, 'steps': 18824, 'loss/train': 2.645183801651001} +02/25/2022 01:57:11 - INFO - codeparrot_training - Step 18825: {'lr': 0.0003631329966598769, 'samples': 9638912, 'steps': 18825, 'loss/train': 2.121828079223633} +02/25/2022 01:57:14 - INFO - codeparrot_training - Step 18826: {'lr': 0.0003631184052222416, 'samples': 9639424, 'steps': 18826, 'loss/train': 0.2151978760957718} +02/25/2022 01:57:20 - INFO - codeparrot_training - Step 18827: {'lr': 0.00036310381330004296, 'samples': 9639936, 'steps': 18827, 'loss/train': 1.7303944826126099} +02/25/2022 01:57:23 - INFO - codeparrot_training - Step 18828: {'lr': 0.0003630892208933435, 'samples': 9640448, 'steps': 18828, 'loss/train': 0.845332145690918} +02/25/2022 01:57:29 - INFO - codeparrot_training - Step 18829: {'lr': 0.00036307462800220575, 'samples': 9640960, 'steps': 18829, 'loss/train': 2.6933765411376953} +02/25/2022 01:57:32 - INFO - codeparrot_training - Step 18830: {'lr': 0.0003630600346266922, 'samples': 9641472, 'steps': 18830, 'loss/train': 2.6756937503814697} +02/25/2022 01:57:38 - INFO - codeparrot_training - Step 18831: {'lr': 0.00036304544076686527, 'samples': 9641984, 'steps': 18831, 'loss/train': 1.8550463914871216} +02/25/2022 01:57:41 - INFO - codeparrot_training - Step 18832: {'lr': 0.0003630308464227877, 'samples': 9642496, 'steps': 18832, 'loss/train': 1.7160649299621582} +02/25/2022 01:57:48 - INFO - codeparrot_training - Step 18833: {'lr': 0.0003630162515945218, 'samples': 9643008, 'steps': 18833, 'loss/train': 0.8779038190841675} +02/25/2022 01:57:51 - INFO - codeparrot_training - Step 18834: {'lr': 0.00036300165628213015, 'samples': 9643520, 'steps': 18834, 'loss/train': 0.9629799723625183} +02/25/2022 01:57:57 - INFO - codeparrot_training - Step 18835: {'lr': 0.0003629870604856754, 'samples': 9644032, 'steps': 18835, 'loss/train': 1.703185796737671} +02/25/2022 01:58:00 - INFO - codeparrot_training - Step 18836: {'lr': 0.0003629724642052198, 'samples': 9644544, 'steps': 18836, 'loss/train': 1.658734679222107} +02/25/2022 01:58:06 - INFO - codeparrot_training - Step 18837: {'lr': 0.00036295786744082616, 'samples': 9645056, 'steps': 18837, 'loss/train': 3.145209312438965} +02/25/2022 01:58:09 - INFO - codeparrot_training - Step 18838: {'lr': 0.0003629432701925568, 'samples': 9645568, 'steps': 18838, 'loss/train': 1.8590718507766724} +02/25/2022 01:58:15 - INFO - codeparrot_training - Step 18839: {'lr': 0.0003629286724604744, 'samples': 9646080, 'steps': 18839, 'loss/train': 1.4280321598052979} +02/25/2022 01:58:18 - INFO - codeparrot_training - Step 18840: {'lr': 0.0003629140742446414, 'samples': 9646592, 'steps': 18840, 'loss/train': 1.3024110794067383} +02/25/2022 01:58:23 - INFO - codeparrot_training - Step 18841: {'lr': 0.00036289947554512034, 'samples': 9647104, 'steps': 18841, 'loss/train': 2.747363328933716} +02/25/2022 01:58:27 - INFO - codeparrot_training - Step 18842: {'lr': 0.0003628848763619738, 'samples': 9647616, 'steps': 18842, 'loss/train': 1.8926026821136475} +02/25/2022 01:58:33 - INFO - codeparrot_training - Step 18843: {'lr': 0.0003628702766952643, 'samples': 9648128, 'steps': 18843, 'loss/train': 2.5643370151519775} +02/25/2022 01:58:37 - INFO - codeparrot_training - Step 18844: {'lr': 0.00036285567654505433, 'samples': 9648640, 'steps': 18844, 'loss/train': 2.6005680561065674} +02/25/2022 01:58:42 - INFO - codeparrot_training - Step 18845: {'lr': 0.00036284107591140653, 'samples': 9649152, 'steps': 18845, 'loss/train': 2.3802216053009033} +02/25/2022 01:58:46 - INFO - codeparrot_training - Step 18846: {'lr': 0.0003628264747943834, 'samples': 9649664, 'steps': 18846, 'loss/train': 1.682621955871582} +02/25/2022 01:58:51 - INFO - codeparrot_training - Step 18847: {'lr': 0.0003628118731940475, 'samples': 9650176, 'steps': 18847, 'loss/train': 2.3060736656188965} +02/25/2022 01:58:55 - INFO - codeparrot_training - Step 18848: {'lr': 0.00036279727111046127, 'samples': 9650688, 'steps': 18848, 'loss/train': 2.185255765914917} +02/25/2022 01:59:00 - INFO - codeparrot_training - Step 18849: {'lr': 0.0003627826685436874, 'samples': 9651200, 'steps': 18849, 'loss/train': 2.854978084564209} +02/25/2022 01:59:04 - INFO - codeparrot_training - Step 18850: {'lr': 0.00036276806549378836, 'samples': 9651712, 'steps': 18850, 'loss/train': 1.5781532526016235} +02/25/2022 01:59:09 - INFO - codeparrot_training - Step 18851: {'lr': 0.0003627534619608268, 'samples': 9652224, 'steps': 18851, 'loss/train': 2.168938636779785} +02/25/2022 01:59:13 - INFO - codeparrot_training - Step 18852: {'lr': 0.00036273885794486514, 'samples': 9652736, 'steps': 18852, 'loss/train': 1.8750176429748535} +02/25/2022 01:59:19 - INFO - codeparrot_training - Step 18853: {'lr': 0.00036272425344596607, 'samples': 9653248, 'steps': 18853, 'loss/train': 2.0590384006500244} +02/25/2022 01:59:23 - INFO - codeparrot_training - Step 18854: {'lr': 0.000362709648464192, 'samples': 9653760, 'steps': 18854, 'loss/train': 1.1000386476516724} +02/25/2022 01:59:28 - INFO - codeparrot_training - Step 18855: {'lr': 0.00036269504299960573, 'samples': 9654272, 'steps': 18855, 'loss/train': 2.4076638221740723} +02/25/2022 01:59:32 - INFO - codeparrot_training - Step 18856: {'lr': 0.00036268043705226953, 'samples': 9654784, 'steps': 18856, 'loss/train': 2.177995204925537} +02/25/2022 01:59:37 - INFO - codeparrot_training - Step 18857: {'lr': 0.00036266583062224625, 'samples': 9655296, 'steps': 18857, 'loss/train': 2.24255108833313} +02/25/2022 01:59:41 - INFO - codeparrot_training - Step 18858: {'lr': 0.0003626512237095982, 'samples': 9655808, 'steps': 18858, 'loss/train': 2.230933427810669} +02/25/2022 01:59:46 - INFO - codeparrot_training - Step 18859: {'lr': 0.00036263661631438814, 'samples': 9656320, 'steps': 18859, 'loss/train': 2.467299699783325} +02/25/2022 01:59:50 - INFO - codeparrot_training - Step 18860: {'lr': 0.0003626220084366786, 'samples': 9656832, 'steps': 18860, 'loss/train': 1.9779781103134155} +02/25/2022 01:59:55 - INFO - codeparrot_training - Step 18861: {'lr': 0.00036260740007653216, 'samples': 9657344, 'steps': 18861, 'loss/train': 1.9783228635787964} +02/25/2022 01:59:59 - INFO - codeparrot_training - Step 18862: {'lr': 0.00036259279123401127, 'samples': 9657856, 'steps': 18862, 'loss/train': 3.0171501636505127} +02/25/2022 02:00:05 - INFO - codeparrot_training - Step 18863: {'lr': 0.0003625781819091787, 'samples': 9658368, 'steps': 18863, 'loss/train': 1.9603748321533203} +02/25/2022 02:00:08 - INFO - codeparrot_training - Step 18864: {'lr': 0.0003625635721020969, 'samples': 9658880, 'steps': 18864, 'loss/train': 2.323228120803833} +02/25/2022 02:00:14 - INFO - codeparrot_training - Step 18865: {'lr': 0.00036254896181282846, 'samples': 9659392, 'steps': 18865, 'loss/train': 0.31502848863601685} +02/25/2022 02:00:17 - INFO - codeparrot_training - Step 18866: {'lr': 0.0003625343510414362, 'samples': 9659904, 'steps': 18866, 'loss/train': 1.4574376344680786} +02/25/2022 02:00:23 - INFO - codeparrot_training - Step 18867: {'lr': 0.0003625197397879823, 'samples': 9660416, 'steps': 18867, 'loss/train': 2.1850383281707764} +02/25/2022 02:00:26 - INFO - codeparrot_training - Step 18868: {'lr': 0.0003625051280525297, 'samples': 9660928, 'steps': 18868, 'loss/train': 2.6404757499694824} +02/25/2022 02:00:30 - INFO - codeparrot_training - Step 18869: {'lr': 0.00036249051583514075, 'samples': 9661440, 'steps': 18869, 'loss/train': 2.3283097743988037} +02/25/2022 02:00:36 - INFO - codeparrot_training - Step 18870: {'lr': 0.00036247590313587827, 'samples': 9661952, 'steps': 18870, 'loss/train': 2.741659641265869} +02/25/2022 02:00:39 - INFO - codeparrot_training - Step 18871: {'lr': 0.0003624612899548046, 'samples': 9662464, 'steps': 18871, 'loss/train': 2.1134397983551025} +02/25/2022 02:00:45 - INFO - codeparrot_training - Step 18872: {'lr': 0.0003624466762919826, 'samples': 9662976, 'steps': 18872, 'loss/train': 1.6623711585998535} +02/25/2022 02:00:48 - INFO - codeparrot_training - Step 18873: {'lr': 0.00036243206214747466, 'samples': 9663488, 'steps': 18873, 'loss/train': 3.236616373062134} +02/25/2022 02:00:54 - INFO - codeparrot_training - Step 18874: {'lr': 0.00036241744752134347, 'samples': 9664000, 'steps': 18874, 'loss/train': 0.1615999937057495} +02/25/2022 02:00:57 - INFO - codeparrot_training - Step 18875: {'lr': 0.0003624028324136517, 'samples': 9664512, 'steps': 18875, 'loss/train': 2.288236379623413} +02/25/2022 02:01:03 - INFO - codeparrot_training - Step 18876: {'lr': 0.00036238821682446176, 'samples': 9665024, 'steps': 18876, 'loss/train': 1.4840154647827148} +02/25/2022 02:01:06 - INFO - codeparrot_training - Step 18877: {'lr': 0.0003623736007538365, 'samples': 9665536, 'steps': 18877, 'loss/train': 2.1744518280029297} +02/25/2022 02:01:11 - INFO - codeparrot_training - Step 18878: {'lr': 0.00036235898420183837, 'samples': 9666048, 'steps': 18878, 'loss/train': 2.1867895126342773} +02/25/2022 02:01:18 - INFO - codeparrot_training - Step 18879: {'lr': 0.0003623443671685301, 'samples': 9666560, 'steps': 18879, 'loss/train': 1.3891382217407227} +02/25/2022 02:01:21 - INFO - codeparrot_training - Step 18880: {'lr': 0.00036232974965397414, 'samples': 9667072, 'steps': 18880, 'loss/train': 1.3203290700912476} +02/25/2022 02:01:25 - INFO - codeparrot_training - Step 18881: {'lr': 0.00036231513165823324, 'samples': 9667584, 'steps': 18881, 'loss/train': 2.8153724670410156} +02/25/2022 02:01:30 - INFO - codeparrot_training - Step 18882: {'lr': 0.00036230051318137, 'samples': 9668096, 'steps': 18882, 'loss/train': 1.3842017650604248} +02/25/2022 02:01:34 - INFO - codeparrot_training - Step 18883: {'lr': 0.00036228589422344703, 'samples': 9668608, 'steps': 18883, 'loss/train': 0.4513271749019623} +02/25/2022 02:01:39 - INFO - codeparrot_training - Step 18884: {'lr': 0.0003622712747845269, 'samples': 9669120, 'steps': 18884, 'loss/train': 1.695831298828125} +02/25/2022 02:01:43 - INFO - codeparrot_training - Step 18885: {'lr': 0.0003622566548646723, 'samples': 9669632, 'steps': 18885, 'loss/train': 1.5047125816345215} +02/25/2022 02:01:48 - INFO - codeparrot_training - Step 18886: {'lr': 0.00036224203446394584, 'samples': 9670144, 'steps': 18886, 'loss/train': 3.071146011352539} +02/25/2022 02:01:54 - INFO - codeparrot_training - Step 18887: {'lr': 0.00036222741358241014, 'samples': 9670656, 'steps': 18887, 'loss/train': 2.0389113426208496} +02/25/2022 02:01:57 - INFO - codeparrot_training - Step 18888: {'lr': 0.0003622127922201278, 'samples': 9671168, 'steps': 18888, 'loss/train': 1.9491676092147827} +02/25/2022 02:02:01 - INFO - codeparrot_training - Step 18889: {'lr': 0.0003621981703771616, 'samples': 9671680, 'steps': 18889, 'loss/train': 2.0626060962677} +02/25/2022 02:02:07 - INFO - codeparrot_training - Step 18890: {'lr': 0.0003621835480535739, 'samples': 9672192, 'steps': 18890, 'loss/train': 1.7543541193008423} +02/25/2022 02:02:13 - INFO - codeparrot_training - Step 18891: {'lr': 0.00036216892524942764, 'samples': 9672704, 'steps': 18891, 'loss/train': 1.1323871612548828} +02/25/2022 02:02:16 - INFO - codeparrot_training - Step 18892: {'lr': 0.00036215430196478526, 'samples': 9673216, 'steps': 18892, 'loss/train': 1.2245393991470337} +02/25/2022 02:02:20 - INFO - codeparrot_training - Step 18893: {'lr': 0.0003621396781997095, 'samples': 9673728, 'steps': 18893, 'loss/train': 2.807162046432495} +02/25/2022 02:02:25 - INFO - codeparrot_training - Step 18894: {'lr': 0.00036212505395426297, 'samples': 9674240, 'steps': 18894, 'loss/train': 2.5276408195495605} +02/25/2022 02:02:29 - INFO - codeparrot_training - Step 18895: {'lr': 0.0003621104292285082, 'samples': 9674752, 'steps': 18895, 'loss/train': 2.152820587158203} +02/25/2022 02:02:34 - INFO - codeparrot_training - Step 18896: {'lr': 0.0003620958040225081, 'samples': 9675264, 'steps': 18896, 'loss/train': 2.154420852661133} +02/25/2022 02:02:38 - INFO - codeparrot_training - Step 18897: {'lr': 0.0003620811783363251, 'samples': 9675776, 'steps': 18897, 'loss/train': 0.9838770627975464} +02/25/2022 02:02:43 - INFO - codeparrot_training - Step 18898: {'lr': 0.0003620665521700219, 'samples': 9676288, 'steps': 18898, 'loss/train': 1.6690455675125122} +02/25/2022 02:02:47 - INFO - codeparrot_training - Step 18899: {'lr': 0.00036205192552366124, 'samples': 9676800, 'steps': 18899, 'loss/train': 2.0534627437591553} +02/25/2022 02:02:53 - INFO - codeparrot_training - Step 18900: {'lr': 0.0003620372983973057, 'samples': 9677312, 'steps': 18900, 'loss/train': 1.8415592908859253} +02/25/2022 02:02:56 - INFO - codeparrot_training - Step 18901: {'lr': 0.00036202267079101793, 'samples': 9677824, 'steps': 18901, 'loss/train': 2.4564619064331055} +02/25/2022 02:03:02 - INFO - codeparrot_training - Step 18902: {'lr': 0.0003620080427048605, 'samples': 9678336, 'steps': 18902, 'loss/train': 2.430135726928711} +02/25/2022 02:03:05 - INFO - codeparrot_training - Step 18903: {'lr': 0.00036199341413889637, 'samples': 9678848, 'steps': 18903, 'loss/train': 0.8723113536834717} +02/25/2022 02:03:11 - INFO - codeparrot_training - Step 18904: {'lr': 0.00036197878509318794, 'samples': 9679360, 'steps': 18904, 'loss/train': 2.141354560852051} +02/25/2022 02:03:14 - INFO - codeparrot_training - Step 18905: {'lr': 0.00036196415556779795, 'samples': 9679872, 'steps': 18905, 'loss/train': 1.7737194299697876} +02/25/2022 02:03:20 - INFO - codeparrot_training - Step 18906: {'lr': 0.00036194952556278906, 'samples': 9680384, 'steps': 18906, 'loss/train': 2.0249686241149902} +02/25/2022 02:03:23 - INFO - codeparrot_training - Step 18907: {'lr': 0.00036193489507822395, 'samples': 9680896, 'steps': 18907, 'loss/train': 2.179361343383789} +02/25/2022 02:03:29 - INFO - codeparrot_training - Step 18908: {'lr': 0.0003619202641141652, 'samples': 9681408, 'steps': 18908, 'loss/train': 1.6962487697601318} +02/25/2022 02:03:32 - INFO - codeparrot_training - Step 18909: {'lr': 0.00036190563267067576, 'samples': 9681920, 'steps': 18909, 'loss/train': 3.1392674446105957} +02/25/2022 02:03:39 - INFO - codeparrot_training - Step 18910: {'lr': 0.000361891000747818, 'samples': 9682432, 'steps': 18910, 'loss/train': 2.121316432952881} +02/25/2022 02:03:42 - INFO - codeparrot_training - Step 18911: {'lr': 0.0003618763683456548, 'samples': 9682944, 'steps': 18911, 'loss/train': 1.4225636720657349} +02/25/2022 02:03:48 - INFO - codeparrot_training - Step 18912: {'lr': 0.00036186173546424874, 'samples': 9683456, 'steps': 18912, 'loss/train': 0.2675212323665619} +02/25/2022 02:03:51 - INFO - codeparrot_training - Step 18913: {'lr': 0.00036184710210366243, 'samples': 9683968, 'steps': 18913, 'loss/train': 2.4865174293518066} +02/25/2022 02:03:56 - INFO - codeparrot_training - Step 18914: {'lr': 0.0003618324682639588, 'samples': 9684480, 'steps': 18914, 'loss/train': 1.8758796453475952} +02/25/2022 02:04:00 - INFO - codeparrot_training - Step 18915: {'lr': 0.0003618178339452003, 'samples': 9684992, 'steps': 18915, 'loss/train': 1.406438946723938} +02/25/2022 02:04:05 - INFO - codeparrot_training - Step 18916: {'lr': 0.00036180319914744973, 'samples': 9685504, 'steps': 18916, 'loss/train': 2.1090404987335205} +02/25/2022 02:04:09 - INFO - codeparrot_training - Step 18917: {'lr': 0.00036178856387076967, 'samples': 9686016, 'steps': 18917, 'loss/train': 2.6613447666168213} +02/25/2022 02:04:15 - INFO - codeparrot_training - Step 18918: {'lr': 0.00036177392811522304, 'samples': 9686528, 'steps': 18918, 'loss/train': 3.106773614883423} +02/25/2022 02:04:18 - INFO - codeparrot_training - Step 18919: {'lr': 0.0003617592918808723, 'samples': 9687040, 'steps': 18919, 'loss/train': 1.9209779500961304} +02/25/2022 02:04:24 - INFO - codeparrot_training - Step 18920: {'lr': 0.0003617446551677803, 'samples': 9687552, 'steps': 18920, 'loss/train': 1.841469645500183} +02/25/2022 02:04:27 - INFO - codeparrot_training - Step 18921: {'lr': 0.0003617300179760096, 'samples': 9688064, 'steps': 18921, 'loss/train': 1.5022215843200684} +02/25/2022 02:04:33 - INFO - codeparrot_training - Step 18922: {'lr': 0.0003617153803056231, 'samples': 9688576, 'steps': 18922, 'loss/train': 2.4341156482696533} +02/25/2022 02:04:36 - INFO - codeparrot_training - Step 18923: {'lr': 0.0003617007421566833, 'samples': 9689088, 'steps': 18923, 'loss/train': 2.744959831237793} +02/25/2022 02:04:42 - INFO - codeparrot_training - Step 18924: {'lr': 0.00036168610352925294, 'samples': 9689600, 'steps': 18924, 'loss/train': 1.898758053779602} +02/25/2022 02:04:45 - INFO - codeparrot_training - Step 18925: {'lr': 0.00036167146442339486, 'samples': 9690112, 'steps': 18925, 'loss/train': 0.7830764055252075} +02/25/2022 02:04:51 - INFO - codeparrot_training - Step 18926: {'lr': 0.00036165682483917165, 'samples': 9690624, 'steps': 18926, 'loss/train': 2.000889778137207} +02/25/2022 02:04:55 - INFO - codeparrot_training - Step 18927: {'lr': 0.0003616421847766461, 'samples': 9691136, 'steps': 18927, 'loss/train': 1.683374285697937} +02/25/2022 02:05:00 - INFO - codeparrot_training - Step 18928: {'lr': 0.00036162754423588086, 'samples': 9691648, 'steps': 18928, 'loss/train': 1.7564724683761597} +02/25/2022 02:05:04 - INFO - codeparrot_training - Step 18929: {'lr': 0.0003616129032169387, 'samples': 9692160, 'steps': 18929, 'loss/train': 1.1115750074386597} +02/25/2022 02:05:09 - INFO - codeparrot_training - Step 18930: {'lr': 0.00036159826171988223, 'samples': 9692672, 'steps': 18930, 'loss/train': 2.705695152282715} +02/25/2022 02:05:13 - INFO - codeparrot_training - Step 18931: {'lr': 0.00036158361974477434, 'samples': 9693184, 'steps': 18931, 'loss/train': 1.909696102142334} +02/25/2022 02:05:18 - INFO - codeparrot_training - Step 18932: {'lr': 0.0003615689772916776, 'samples': 9693696, 'steps': 18932, 'loss/train': 1.8387887477874756} +02/25/2022 02:05:22 - INFO - codeparrot_training - Step 18933: {'lr': 0.00036155433436065477, 'samples': 9694208, 'steps': 18933, 'loss/train': 2.6957552433013916} +02/25/2022 02:05:27 - INFO - codeparrot_training - Step 18934: {'lr': 0.0003615396909517686, 'samples': 9694720, 'steps': 18934, 'loss/train': 2.0331976413726807} +02/25/2022 02:05:31 - INFO - codeparrot_training - Step 18935: {'lr': 0.00036152504706508183, 'samples': 9695232, 'steps': 18935, 'loss/train': 1.6742535829544067} +02/25/2022 02:05:36 - INFO - codeparrot_training - Step 18936: {'lr': 0.00036151040270065716, 'samples': 9695744, 'steps': 18936, 'loss/train': 1.4416239261627197} +02/25/2022 02:05:40 - INFO - codeparrot_training - Step 18937: {'lr': 0.00036149575785855736, 'samples': 9696256, 'steps': 18937, 'loss/train': 1.6289310455322266} +02/25/2022 02:05:46 - INFO - codeparrot_training - Step 18938: {'lr': 0.00036148111253884514, 'samples': 9696768, 'steps': 18938, 'loss/train': 2.012981653213501} +02/25/2022 02:05:49 - INFO - codeparrot_training - Step 18939: {'lr': 0.00036146646674158317, 'samples': 9697280, 'steps': 18939, 'loss/train': 1.5505435466766357} +02/25/2022 02:05:55 - INFO - codeparrot_training - Step 18940: {'lr': 0.00036145182046683433, 'samples': 9697792, 'steps': 18940, 'loss/train': 3.1853725910186768} +02/25/2022 02:05:58 - INFO - codeparrot_training - Step 18941: {'lr': 0.0003614371737146612, 'samples': 9698304, 'steps': 18941, 'loss/train': 2.1582465171813965} +02/25/2022 02:06:04 - INFO - codeparrot_training - Step 18942: {'lr': 0.0003614225264851266, 'samples': 9698816, 'steps': 18942, 'loss/train': 1.6931086778640747} +02/25/2022 02:06:08 - INFO - codeparrot_training - Step 18943: {'lr': 0.00036140787877829336, 'samples': 9699328, 'steps': 18943, 'loss/train': 2.1213903427124023} +02/25/2022 02:06:13 - INFO - codeparrot_training - Step 18944: {'lr': 0.0003613932305942241, 'samples': 9699840, 'steps': 18944, 'loss/train': 2.4540419578552246} +02/25/2022 02:06:17 - INFO - codeparrot_training - Step 18945: {'lr': 0.0003613785819329816, 'samples': 9700352, 'steps': 18945, 'loss/train': 1.9778475761413574} +02/25/2022 02:06:22 - INFO - codeparrot_training - Step 18946: {'lr': 0.00036136393279462866, 'samples': 9700864, 'steps': 18946, 'loss/train': 1.6620349884033203} +02/25/2022 02:06:26 - INFO - codeparrot_training - Step 18947: {'lr': 0.00036134928317922796, 'samples': 9701376, 'steps': 18947, 'loss/train': 3.143666982650757} +02/25/2022 02:06:32 - INFO - codeparrot_training - Step 18948: {'lr': 0.0003613346330868423, 'samples': 9701888, 'steps': 18948, 'loss/train': 1.8337793350219727} +02/25/2022 02:06:35 - INFO - codeparrot_training - Step 18949: {'lr': 0.0003613199825175344, 'samples': 9702400, 'steps': 18949, 'loss/train': 2.0417895317077637} +02/25/2022 02:06:41 - INFO - codeparrot_training - Step 18950: {'lr': 0.0003613053314713671, 'samples': 9702912, 'steps': 18950, 'loss/train': 2.463352918624878} +02/25/2022 02:06:44 - INFO - codeparrot_training - Step 18951: {'lr': 0.00036129067994840303, 'samples': 9703424, 'steps': 18951, 'loss/train': 1.6299381256103516} +02/25/2022 02:06:50 - INFO - codeparrot_training - Step 18952: {'lr': 0.0003612760279487051, 'samples': 9703936, 'steps': 18952, 'loss/train': 1.932807445526123} +02/25/2022 02:06:54 - INFO - codeparrot_training - Step 18953: {'lr': 0.00036126137547233593, 'samples': 9704448, 'steps': 18953, 'loss/train': 3.009141683578491} +02/25/2022 02:06:59 - INFO - codeparrot_training - Step 18954: {'lr': 0.00036124672251935843, 'samples': 9704960, 'steps': 18954, 'loss/train': 1.8990607261657715} +02/25/2022 02:07:03 - INFO - codeparrot_training - Step 18955: {'lr': 0.00036123206908983527, 'samples': 9705472, 'steps': 18955, 'loss/train': 0.758811354637146} +02/25/2022 02:07:09 - INFO - codeparrot_training - Step 18956: {'lr': 0.00036121741518382915, 'samples': 9705984, 'steps': 18956, 'loss/train': 2.3784446716308594} +02/25/2022 02:07:12 - INFO - codeparrot_training - Step 18957: {'lr': 0.0003612027608014031, 'samples': 9706496, 'steps': 18957, 'loss/train': 0.8154066205024719} +02/25/2022 02:07:18 - INFO - codeparrot_training - Step 18958: {'lr': 0.0003611881059426196, 'samples': 9707008, 'steps': 18958, 'loss/train': 1.6953154802322388} +02/25/2022 02:07:22 - INFO - codeparrot_training - Step 18959: {'lr': 0.00036117345060754163, 'samples': 9707520, 'steps': 18959, 'loss/train': 0.1523435413837433} +02/25/2022 02:07:28 - INFO - codeparrot_training - Step 18960: {'lr': 0.00036115879479623185, 'samples': 9708032, 'steps': 18960, 'loss/train': 0.30938780307769775} +02/25/2022 02:07:31 - INFO - codeparrot_training - Step 18961: {'lr': 0.00036114413850875317, 'samples': 9708544, 'steps': 18961, 'loss/train': 0.3409813344478607} +02/25/2022 02:07:37 - INFO - codeparrot_training - Step 18962: {'lr': 0.00036112948174516824, 'samples': 9709056, 'steps': 18962, 'loss/train': 2.7156331539154053} +02/25/2022 02:07:40 - INFO - codeparrot_training - Step 18963: {'lr': 0.0003611148245055399, 'samples': 9709568, 'steps': 18963, 'loss/train': 1.6761538982391357} +02/25/2022 02:07:45 - INFO - codeparrot_training - Step 18964: {'lr': 0.000361100166789931, 'samples': 9710080, 'steps': 18964, 'loss/train': 2.658154010772705} +02/25/2022 02:07:49 - INFO - codeparrot_training - Step 18965: {'lr': 0.0003610855085984041, 'samples': 9710592, 'steps': 18965, 'loss/train': 2.2412421703338623} +02/25/2022 02:07:54 - INFO - codeparrot_training - Step 18966: {'lr': 0.0003610708499310223, 'samples': 9711104, 'steps': 18966, 'loss/train': 0.1282891035079956} +02/25/2022 02:07:58 - INFO - codeparrot_training - Step 18967: {'lr': 0.0003610561907878482, 'samples': 9711616, 'steps': 18967, 'loss/train': 1.6763033866882324} +02/25/2022 02:08:03 - INFO - codeparrot_training - Step 18968: {'lr': 0.00036104153116894465, 'samples': 9712128, 'steps': 18968, 'loss/train': 3.7002174854278564} +02/25/2022 02:08:07 - INFO - codeparrot_training - Step 18969: {'lr': 0.00036102687107437444, 'samples': 9712640, 'steps': 18969, 'loss/train': 1.2943414449691772} +02/25/2022 02:08:12 - INFO - codeparrot_training - Step 18970: {'lr': 0.0003610122105042004, 'samples': 9713152, 'steps': 18970, 'loss/train': 2.2200047969818115} +02/25/2022 02:08:16 - INFO - codeparrot_training - Step 18971: {'lr': 0.0003609975494584852, 'samples': 9713664, 'steps': 18971, 'loss/train': 1.8420921564102173} +02/25/2022 02:08:23 - INFO - codeparrot_training - Step 18972: {'lr': 0.0003609828879372918, 'samples': 9714176, 'steps': 18972, 'loss/train': 2.4288551807403564} +02/25/2022 02:08:26 - INFO - codeparrot_training - Step 18973: {'lr': 0.000360968225940683, 'samples': 9714688, 'steps': 18973, 'loss/train': 3.2310967445373535} +02/25/2022 02:08:32 - INFO - codeparrot_training - Step 18974: {'lr': 0.00036095356346872156, 'samples': 9715200, 'steps': 18974, 'loss/train': 1.9115612506866455} +02/25/2022 02:08:35 - INFO - codeparrot_training - Step 18975: {'lr': 0.0003609389005214702, 'samples': 9715712, 'steps': 18975, 'loss/train': 3.7304508686065674} +02/25/2022 02:08:41 - INFO - codeparrot_training - Step 18976: {'lr': 0.0003609242370989919, 'samples': 9716224, 'steps': 18976, 'loss/train': 2.1039819717407227} +02/25/2022 02:08:44 - INFO - codeparrot_training - Step 18977: {'lr': 0.0003609095732013493, 'samples': 9716736, 'steps': 18977, 'loss/train': 2.0949344635009766} +02/25/2022 02:08:50 - INFO - codeparrot_training - Step 18978: {'lr': 0.0003608949088286054, 'samples': 9717248, 'steps': 18978, 'loss/train': 1.1355477571487427} +02/25/2022 02:08:53 - INFO - codeparrot_training - Step 18979: {'lr': 0.00036088024398082285, 'samples': 9717760, 'steps': 18979, 'loss/train': 2.464592218399048} +02/25/2022 02:08:59 - INFO - codeparrot_training - Step 18980: {'lr': 0.00036086557865806464, 'samples': 9718272, 'steps': 18980, 'loss/train': 2.1163060665130615} +02/25/2022 02:09:02 - INFO - codeparrot_training - Step 18981: {'lr': 0.0003608509128603934, 'samples': 9718784, 'steps': 18981, 'loss/train': 2.080054521560669} +02/25/2022 02:09:09 - INFO - codeparrot_training - Step 18982: {'lr': 0.0003608362465878721, 'samples': 9719296, 'steps': 18982, 'loss/train': 2.635371446609497} +02/25/2022 02:09:12 - INFO - codeparrot_training - Step 18983: {'lr': 0.0003608215798405635, 'samples': 9719808, 'steps': 18983, 'loss/train': 1.5709997415542603} +02/25/2022 02:09:18 - INFO - codeparrot_training - Step 18984: {'lr': 0.0003608069126185304, 'samples': 9720320, 'steps': 18984, 'loss/train': 2.0796685218811035} +02/25/2022 02:09:21 - INFO - codeparrot_training - Step 18985: {'lr': 0.0003607922449218357, 'samples': 9720832, 'steps': 18985, 'loss/train': 1.9137529134750366} +02/25/2022 02:09:27 - INFO - codeparrot_training - Step 18986: {'lr': 0.00036077757675054224, 'samples': 9721344, 'steps': 18986, 'loss/train': 1.7957603931427002} +02/25/2022 02:09:30 - INFO - codeparrot_training - Step 18987: {'lr': 0.0003607629081047128, 'samples': 9721856, 'steps': 18987, 'loss/train': 1.5062298774719238} +02/25/2022 02:09:36 - INFO - codeparrot_training - Step 18988: {'lr': 0.0003607482389844102, 'samples': 9722368, 'steps': 18988, 'loss/train': 1.6406534910202026} +02/25/2022 02:09:39 - INFO - codeparrot_training - Step 18989: {'lr': 0.0003607335693896973, 'samples': 9722880, 'steps': 18989, 'loss/train': 1.2689883708953857} +02/25/2022 02:09:45 - INFO - codeparrot_training - Step 18990: {'lr': 0.00036071889932063706, 'samples': 9723392, 'steps': 18990, 'loss/train': 2.2161219120025635} +02/25/2022 02:09:49 - INFO - codeparrot_training - Step 18991: {'lr': 0.0003607042287772921, 'samples': 9723904, 'steps': 18991, 'loss/train': 1.3956328630447388} +02/25/2022 02:09:54 - INFO - codeparrot_training - Step 18992: {'lr': 0.0003606895577597254, 'samples': 9724416, 'steps': 18992, 'loss/train': 2.0705862045288086} +02/25/2022 02:09:58 - INFO - codeparrot_training - Step 18993: {'lr': 0.0003606748862679998, 'samples': 9724928, 'steps': 18993, 'loss/train': 1.8988370895385742} +02/25/2022 02:10:04 - INFO - codeparrot_training - Step 18994: {'lr': 0.00036066021430217817, 'samples': 9725440, 'steps': 18994, 'loss/train': 2.5313243865966797} +02/25/2022 02:10:07 - INFO - codeparrot_training - Step 18995: {'lr': 0.00036064554186232323, 'samples': 9725952, 'steps': 18995, 'loss/train': 2.888190269470215} +02/25/2022 02:10:13 - INFO - codeparrot_training - Step 18996: {'lr': 0.0003606308689484979, 'samples': 9726464, 'steps': 18996, 'loss/train': 2.065453290939331} +02/25/2022 02:10:16 - INFO - codeparrot_training - Step 18997: {'lr': 0.0003606161955607652, 'samples': 9726976, 'steps': 18997, 'loss/train': 1.7177544832229614} +02/25/2022 02:10:22 - INFO - codeparrot_training - Step 18998: {'lr': 0.0003606015216991877, 'samples': 9727488, 'steps': 18998, 'loss/train': 2.8631699085235596} +02/25/2022 02:10:25 - INFO - codeparrot_training - Step 18999: {'lr': 0.0003605868473638285, 'samples': 9728000, 'steps': 18999, 'loss/train': 1.1513526439666748} +02/25/2022 02:10:25 - INFO - codeparrot_training - Evaluating and saving model checkpoint