diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -21427,3 +21427,1009 @@ Use FP16 precision: False 02/25/2022 04:46:23 - INFO - codeparrot_training - Step 20998: {'lr': 0.0003303908538532534, 'samples': 10751488, 'steps': 20998, 'loss/train': 2.365709066390991} 02/25/2022 04:46:27 - INFO - codeparrot_training - Step 20999: {'lr': 0.00033037536026167313, 'samples': 10752000, 'steps': 20999, 'loss/train': 2.366279363632202} 02/25/2022 04:46:27 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/25/2022 04:46:44 - WARNING - huggingface_hub.repository - Several commits (21) will be pushed upstream. +02/25/2022 04:46:44 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/25/2022 04:47:19 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + ad14ca4..1098822 floral-grass-11 -> floral-grass-11 + +02/25/2022 04:47:26 - INFO - codeparrot_training - Step 21000: {'lr': 0.0003303598663257904, 'samples': 10752512, 'steps': 21000, 'loss/train': 2.4860072135925293} +02/25/2022 04:47:29 - INFO - codeparrot_training - Step 21001: {'lr': 0.00033034437204567145, 'samples': 10753024, 'steps': 21001, 'loss/train': 1.9799772500991821} +02/25/2022 04:47:35 - INFO - codeparrot_training - Step 21002: {'lr': 0.00033032887742138285, 'samples': 10753536, 'steps': 21002, 'loss/train': 1.8810853958129883} +02/25/2022 04:47:38 - INFO - codeparrot_training - Step 21003: {'lr': 0.0003303133824529907, 'samples': 10754048, 'steps': 21003, 'loss/train': 2.583627939224243} +02/25/2022 04:47:44 - INFO - codeparrot_training - Step 21004: {'lr': 0.00033029788714056165, 'samples': 10754560, 'steps': 21004, 'loss/train': 2.34503436088562} +02/25/2022 04:47:47 - INFO - codeparrot_training - Step 21005: {'lr': 0.0003302823914841618, 'samples': 10755072, 'steps': 21005, 'loss/train': 0.7265483736991882} +02/25/2022 04:47:54 - INFO - codeparrot_training - Step 21006: {'lr': 0.00033026689548385776, 'samples': 10755584, 'steps': 21006, 'loss/train': 0.3984239399433136} +02/25/2022 04:47:58 - INFO - codeparrot_training - Step 21007: {'lr': 0.00033025139913971585, 'samples': 10756096, 'steps': 21007, 'loss/train': 1.0154517889022827} +02/25/2022 04:48:03 - INFO - codeparrot_training - Step 21008: {'lr': 0.00033023590245180237, 'samples': 10756608, 'steps': 21008, 'loss/train': 2.5615317821502686} +02/25/2022 04:48:07 - INFO - codeparrot_training - Step 21009: {'lr': 0.00033022040542018385, 'samples': 10757120, 'steps': 21009, 'loss/train': 1.0869808197021484} +02/25/2022 04:48:12 - INFO - codeparrot_training - Step 21010: {'lr': 0.0003302049080449265, 'samples': 10757632, 'steps': 21010, 'loss/train': 3.8830811977386475} +02/25/2022 04:48:16 - INFO - codeparrot_training - Step 21011: {'lr': 0.0003301894103260968, 'samples': 10758144, 'steps': 21011, 'loss/train': 1.8186558485031128} +02/25/2022 04:48:21 - INFO - codeparrot_training - Step 21012: {'lr': 0.0003301739122637611, 'samples': 10758656, 'steps': 21012, 'loss/train': 1.922775387763977} +02/25/2022 04:48:25 - INFO - codeparrot_training - Step 21013: {'lr': 0.00033015841385798596, 'samples': 10759168, 'steps': 21013, 'loss/train': 1.9774383306503296} +02/25/2022 04:48:30 - INFO - codeparrot_training - Step 21014: {'lr': 0.00033014291510883746, 'samples': 10759680, 'steps': 21014, 'loss/train': 1.828243613243103} +02/25/2022 04:48:34 - INFO - codeparrot_training - Step 21015: {'lr': 0.0003301274160163823, 'samples': 10760192, 'steps': 21015, 'loss/train': 2.2705297470092773} +02/25/2022 04:48:41 - INFO - codeparrot_training - Step 21016: {'lr': 0.00033011191658068663, 'samples': 10760704, 'steps': 21016, 'loss/train': 2.7779653072357178} +02/25/2022 04:48:45 - INFO - codeparrot_training - Step 21017: {'lr': 0.00033009641680181696, 'samples': 10761216, 'steps': 21017, 'loss/train': 0.4270228445529938} +02/25/2022 04:48:50 - INFO - codeparrot_training - Step 21018: {'lr': 0.00033008091667983974, 'samples': 10761728, 'steps': 21018, 'loss/train': 2.462270975112915} +02/25/2022 04:48:54 - INFO - codeparrot_training - Step 21019: {'lr': 0.0003300654162148213, 'samples': 10762240, 'steps': 21019, 'loss/train': 1.0128071308135986} +02/25/2022 04:48:59 - INFO - codeparrot_training - Step 21020: {'lr': 0.00033004991540682793, 'samples': 10762752, 'steps': 21020, 'loss/train': 0.6379898190498352} +02/25/2022 04:49:03 - INFO - codeparrot_training - Step 21021: {'lr': 0.00033003441425592627, 'samples': 10763264, 'steps': 21021, 'loss/train': 1.2960740327835083} +02/25/2022 04:49:08 - INFO - codeparrot_training - Step 21022: {'lr': 0.00033001891276218247, 'samples': 10763776, 'steps': 21022, 'loss/train': 1.744156002998352} +02/25/2022 04:49:12 - INFO - codeparrot_training - Step 21023: {'lr': 0.0003300034109256632, 'samples': 10764288, 'steps': 21023, 'loss/train': 1.010110855102539} +02/25/2022 04:49:17 - INFO - codeparrot_training - Step 21024: {'lr': 0.00032998790874643456, 'samples': 10764800, 'steps': 21024, 'loss/train': 0.2765105366706848} +02/25/2022 04:49:21 - INFO - codeparrot_training - Step 21025: {'lr': 0.00032997240622456326, 'samples': 10765312, 'steps': 21025, 'loss/train': 1.4589354991912842} +02/25/2022 04:49:28 - INFO - codeparrot_training - Step 21026: {'lr': 0.0003299569033601155, 'samples': 10765824, 'steps': 21026, 'loss/train': 1.5648595094680786} +02/25/2022 04:49:32 - INFO - codeparrot_training - Step 21027: {'lr': 0.0003299414001531578, 'samples': 10766336, 'steps': 21027, 'loss/train': 2.5697457790374756} +02/25/2022 04:49:37 - INFO - codeparrot_training - Step 21028: {'lr': 0.0003299258966037565, 'samples': 10766848, 'steps': 21028, 'loss/train': 2.44779896736145} +02/25/2022 04:49:41 - INFO - codeparrot_training - Step 21029: {'lr': 0.000329910392711978, 'samples': 10767360, 'steps': 21029, 'loss/train': 1.4116836786270142} +02/25/2022 04:49:46 - INFO - codeparrot_training - Step 21030: {'lr': 0.0003298948884778887, 'samples': 10767872, 'steps': 21030, 'loss/train': 2.0573225021362305} +02/25/2022 04:49:50 - INFO - codeparrot_training - Step 21031: {'lr': 0.00032987938390155523, 'samples': 10768384, 'steps': 21031, 'loss/train': 1.0688015222549438} +02/25/2022 04:49:55 - INFO - codeparrot_training - Step 21032: {'lr': 0.00032986387898304375, 'samples': 10768896, 'steps': 21032, 'loss/train': 1.879749059677124} +02/25/2022 04:49:59 - INFO - codeparrot_training - Step 21033: {'lr': 0.00032984837372242084, 'samples': 10769408, 'steps': 21033, 'loss/train': 2.6197383403778076} +02/25/2022 04:50:04 - INFO - codeparrot_training - Step 21034: {'lr': 0.0003298328681197528, 'samples': 10769920, 'steps': 21034, 'loss/train': 1.3640557527542114} +02/25/2022 04:50:08 - INFO - codeparrot_training - Step 21035: {'lr': 0.00032981736217510603, 'samples': 10770432, 'steps': 21035, 'loss/train': 0.510646402835846} +02/25/2022 04:50:13 - INFO - codeparrot_training - Step 21036: {'lr': 0.0003298018558885471, 'samples': 10770944, 'steps': 21036, 'loss/train': 2.5600621700286865} +02/25/2022 04:50:17 - INFO - codeparrot_training - Step 21037: {'lr': 0.0003297863492601424, 'samples': 10771456, 'steps': 21037, 'loss/train': 1.5862576961517334} +02/25/2022 04:50:24 - INFO - codeparrot_training - Step 21038: {'lr': 0.0003297708422899582, 'samples': 10771968, 'steps': 21038, 'loss/train': 2.7320468425750732} +02/25/2022 04:50:27 - INFO - codeparrot_training - Step 21039: {'lr': 0.0003297553349780612, 'samples': 10772480, 'steps': 21039, 'loss/train': 1.9368572235107422} +02/25/2022 04:50:33 - INFO - codeparrot_training - Step 21040: {'lr': 0.0003297398273245175, 'samples': 10772992, 'steps': 21040, 'loss/train': 1.3581600189208984} +02/25/2022 04:50:37 - INFO - codeparrot_training - Step 21041: {'lr': 0.0003297243193293938, 'samples': 10773504, 'steps': 21041, 'loss/train': 1.6091434955596924} +02/25/2022 04:50:42 - INFO - codeparrot_training - Step 21042: {'lr': 0.0003297088109927564, 'samples': 10774016, 'steps': 21042, 'loss/train': 1.7612583637237549} +02/25/2022 04:50:46 - INFO - codeparrot_training - Step 21043: {'lr': 0.00032969330231467177, 'samples': 10774528, 'steps': 21043, 'loss/train': 2.51531720161438} +02/25/2022 04:50:51 - INFO - codeparrot_training - Step 21044: {'lr': 0.0003296777932952064, 'samples': 10775040, 'steps': 21044, 'loss/train': 1.769242525100708} +02/25/2022 04:50:55 - INFO - codeparrot_training - Step 21045: {'lr': 0.0003296622839344265, 'samples': 10775552, 'steps': 21045, 'loss/train': 1.7496014833450317} +02/25/2022 04:51:01 - INFO - codeparrot_training - Step 21046: {'lr': 0.00032964677423239885, 'samples': 10776064, 'steps': 21046, 'loss/train': 0.8651546239852905} +02/25/2022 04:51:04 - INFO - codeparrot_training - Step 21047: {'lr': 0.0003296312641891896, 'samples': 10776576, 'steps': 21047, 'loss/train': 2.37147855758667} +02/25/2022 04:51:10 - INFO - codeparrot_training - Step 21048: {'lr': 0.0003296157538048654, 'samples': 10777088, 'steps': 21048, 'loss/train': 2.2832863330841064} +02/25/2022 04:51:13 - INFO - codeparrot_training - Step 21049: {'lr': 0.0003296002430794925, 'samples': 10777600, 'steps': 21049, 'loss/train': 1.270258903503418} +02/25/2022 04:51:18 - INFO - codeparrot_training - Step 21050: {'lr': 0.00032958473201313745, 'samples': 10778112, 'steps': 21050, 'loss/train': 2.1549997329711914} +02/25/2022 04:51:22 - INFO - codeparrot_training - Step 21051: {'lr': 0.0003295692206058667, 'samples': 10778624, 'steps': 21051, 'loss/train': 2.7490174770355225} +02/25/2022 04:51:29 - INFO - codeparrot_training - Step 21052: {'lr': 0.00032955370885774665, 'samples': 10779136, 'steps': 21052, 'loss/train': 1.6762350797653198} +02/25/2022 04:51:33 - INFO - codeparrot_training - Step 21053: {'lr': 0.0003295381967688438, 'samples': 10779648, 'steps': 21053, 'loss/train': 1.6603776216506958} +02/25/2022 04:51:38 - INFO - codeparrot_training - Step 21054: {'lr': 0.0003295226843392245, 'samples': 10780160, 'steps': 21054, 'loss/train': 2.773618459701538} +02/25/2022 04:51:42 - INFO - codeparrot_training - Step 21055: {'lr': 0.0003295071715689554, 'samples': 10780672, 'steps': 21055, 'loss/train': 1.3425158262252808} +02/25/2022 04:51:47 - INFO - codeparrot_training - Step 21056: {'lr': 0.0003294916584581027, 'samples': 10781184, 'steps': 21056, 'loss/train': 1.8481996059417725} +02/25/2022 04:51:51 - INFO - codeparrot_training - Step 21057: {'lr': 0.00032947614500673306, 'samples': 10781696, 'steps': 21057, 'loss/train': 1.491181492805481} +02/25/2022 04:51:56 - INFO - codeparrot_training - Step 21058: {'lr': 0.0003294606312149128, 'samples': 10782208, 'steps': 21058, 'loss/train': 1.9579530954360962} +02/25/2022 04:52:00 - INFO - codeparrot_training - Step 21059: {'lr': 0.00032944511708270853, 'samples': 10782720, 'steps': 21059, 'loss/train': 2.503943681716919} +02/25/2022 04:52:05 - INFO - codeparrot_training - Step 21060: {'lr': 0.00032942960261018653, 'samples': 10783232, 'steps': 21060, 'loss/train': 3.124640464782715} +02/25/2022 04:52:09 - INFO - codeparrot_training - Step 21061: {'lr': 0.0003294140877974133, 'samples': 10783744, 'steps': 21061, 'loss/train': 1.4339001178741455} +02/25/2022 04:52:14 - INFO - codeparrot_training - Step 21062: {'lr': 0.00032939857264445535, 'samples': 10784256, 'steps': 21062, 'loss/train': 2.0752813816070557} +02/25/2022 04:52:18 - INFO - codeparrot_training - Step 21063: {'lr': 0.00032938305715137916, 'samples': 10784768, 'steps': 21063, 'loss/train': 1.7039581537246704} +02/25/2022 04:52:25 - INFO - codeparrot_training - Step 21064: {'lr': 0.00032936754131825113, 'samples': 10785280, 'steps': 21064, 'loss/train': 1.726387619972229} +02/25/2022 04:52:29 - INFO - codeparrot_training - Step 21065: {'lr': 0.0003293520251451378, 'samples': 10785792, 'steps': 21065, 'loss/train': 2.150524377822876} +02/25/2022 04:52:34 - INFO - codeparrot_training - Step 21066: {'lr': 0.00032933650863210557, 'samples': 10786304, 'steps': 21066, 'loss/train': 3.0293538570404053} +02/25/2022 04:52:38 - INFO - codeparrot_training - Step 21067: {'lr': 0.00032932099177922095, 'samples': 10786816, 'steps': 21067, 'loss/train': 2.0431742668151855} +02/25/2022 04:52:43 - INFO - codeparrot_training - Step 21068: {'lr': 0.00032930547458655035, 'samples': 10787328, 'steps': 21068, 'loss/train': 2.278754949569702} +02/25/2022 04:52:47 - INFO - codeparrot_training - Step 21069: {'lr': 0.0003292899570541603, 'samples': 10787840, 'steps': 21069, 'loss/train': 1.818558692932129} +02/25/2022 04:52:52 - INFO - codeparrot_training - Step 21070: {'lr': 0.00032927443918211717, 'samples': 10788352, 'steps': 21070, 'loss/train': 1.162871241569519} +02/25/2022 04:52:56 - INFO - codeparrot_training - Step 21071: {'lr': 0.0003292589209704876, 'samples': 10788864, 'steps': 21071, 'loss/train': 2.547092914581299} +02/25/2022 04:53:01 - INFO - codeparrot_training - Step 21072: {'lr': 0.000329243402419338, 'samples': 10789376, 'steps': 21072, 'loss/train': 1.8579257726669312} +02/25/2022 04:53:05 - INFO - codeparrot_training - Step 21073: {'lr': 0.00032922788352873474, 'samples': 10789888, 'steps': 21073, 'loss/train': 2.371066093444824} +02/25/2022 04:53:12 - INFO - codeparrot_training - Step 21074: {'lr': 0.00032921236429874444, 'samples': 10790400, 'steps': 21074, 'loss/train': 2.1397194862365723} +02/25/2022 04:53:15 - INFO - codeparrot_training - Step 21075: {'lr': 0.0003291968447294335, 'samples': 10790912, 'steps': 21075, 'loss/train': 1.8102643489837646} +02/25/2022 04:53:21 - INFO - codeparrot_training - Step 21076: {'lr': 0.0003291813248208685, 'samples': 10791424, 'steps': 21076, 'loss/train': 1.8460663557052612} +02/25/2022 04:53:24 - INFO - codeparrot_training - Step 21077: {'lr': 0.00032916580457311573, 'samples': 10791936, 'steps': 21077, 'loss/train': 3.097266674041748} +02/25/2022 04:53:30 - INFO - codeparrot_training - Step 21078: {'lr': 0.00032915028398624186, 'samples': 10792448, 'steps': 21078, 'loss/train': 1.4640593528747559} +02/25/2022 04:53:33 - INFO - codeparrot_training - Step 21079: {'lr': 0.0003291347630603133, 'samples': 10792960, 'steps': 21079, 'loss/train': 2.1472299098968506} +02/25/2022 04:53:39 - INFO - codeparrot_training - Step 21080: {'lr': 0.00032911924179539653, 'samples': 10793472, 'steps': 21080, 'loss/train': 2.6353187561035156} +02/25/2022 04:53:42 - INFO - codeparrot_training - Step 21081: {'lr': 0.0003291037201915581, 'samples': 10793984, 'steps': 21081, 'loss/train': 0.8202171921730042} +02/25/2022 04:53:48 - INFO - codeparrot_training - Step 21082: {'lr': 0.0003290881982488644, 'samples': 10794496, 'steps': 21082, 'loss/train': 3.2697031497955322} +02/25/2022 04:53:51 - INFO - codeparrot_training - Step 21083: {'lr': 0.000329072675967382, 'samples': 10795008, 'steps': 21083, 'loss/train': 1.1767582893371582} +02/25/2022 04:53:59 - INFO - codeparrot_training - Step 21084: {'lr': 0.00032905715334717735, 'samples': 10795520, 'steps': 21084, 'loss/train': 2.0095412731170654} +02/25/2022 04:54:02 - INFO - codeparrot_training - Step 21085: {'lr': 0.000329041630388317, 'samples': 10796032, 'steps': 21085, 'loss/train': 2.0435166358947754} +02/25/2022 04:54:08 - INFO - codeparrot_training - Step 21086: {'lr': 0.00032902610709086727, 'samples': 10796544, 'steps': 21086, 'loss/train': 8.830096244812012} +02/25/2022 04:54:11 - INFO - codeparrot_training - Step 21087: {'lr': 0.00032901058345489494, 'samples': 10797056, 'steps': 21087, 'loss/train': 1.9922130107879639} +02/25/2022 04:54:17 - INFO - codeparrot_training - Step 21088: {'lr': 0.00032899505948046624, 'samples': 10797568, 'steps': 21088, 'loss/train': 2.054474353790283} +02/25/2022 04:54:20 - INFO - codeparrot_training - Step 21089: {'lr': 0.0003289795351676479, 'samples': 10798080, 'steps': 21089, 'loss/train': 2.3793280124664307} +02/25/2022 04:54:26 - INFO - codeparrot_training - Step 21090: {'lr': 0.0003289640105165063, 'samples': 10798592, 'steps': 21090, 'loss/train': 2.679232358932495} +02/25/2022 04:54:29 - INFO - codeparrot_training - Step 21091: {'lr': 0.0003289484855271078, 'samples': 10799104, 'steps': 21091, 'loss/train': 1.937221884727478} +02/25/2022 04:54:35 - INFO - codeparrot_training - Step 21092: {'lr': 0.0003289329601995192, 'samples': 10799616, 'steps': 21092, 'loss/train': 1.3778191804885864} +02/25/2022 04:54:38 - INFO - codeparrot_training - Step 21093: {'lr': 0.00032891743453380685, 'samples': 10800128, 'steps': 21093, 'loss/train': 1.8153339624404907} +02/25/2022 04:54:46 - INFO - codeparrot_training - Step 21094: {'lr': 0.00032890190853003727, 'samples': 10800640, 'steps': 21094, 'loss/train': 2.862828254699707} +02/25/2022 04:54:49 - INFO - codeparrot_training - Step 21095: {'lr': 0.0003288863821882769, 'samples': 10801152, 'steps': 21095, 'loss/train': 2.6834182739257812} +02/25/2022 04:54:55 - INFO - codeparrot_training - Step 21096: {'lr': 0.0003288708555085924, 'samples': 10801664, 'steps': 21096, 'loss/train': 1.8154313564300537} +02/25/2022 04:54:58 - INFO - codeparrot_training - Step 21097: {'lr': 0.00032885532849105014, 'samples': 10802176, 'steps': 21097, 'loss/train': 1.644252896308899} +02/25/2022 04:55:04 - INFO - codeparrot_training - Step 21098: {'lr': 0.0003288398011357168, 'samples': 10802688, 'steps': 21098, 'loss/train': 2.0015244483947754} +02/25/2022 04:55:07 - INFO - codeparrot_training - Step 21099: {'lr': 0.0003288242734426586, 'samples': 10803200, 'steps': 21099, 'loss/train': 2.689619779586792} +02/25/2022 04:55:13 - INFO - codeparrot_training - Step 21100: {'lr': 0.00032880874541194244, 'samples': 10803712, 'steps': 21100, 'loss/train': 1.8975166082382202} +02/25/2022 04:55:16 - INFO - codeparrot_training - Step 21101: {'lr': 0.0003287932170436345, 'samples': 10804224, 'steps': 21101, 'loss/train': 2.0911428928375244} +02/25/2022 04:55:22 - INFO - codeparrot_training - Step 21102: {'lr': 0.00032877768833780146, 'samples': 10804736, 'steps': 21102, 'loss/train': 2.481397867202759} +02/25/2022 04:55:25 - INFO - codeparrot_training - Step 21103: {'lr': 0.0003287621592945099, 'samples': 10805248, 'steps': 21103, 'loss/train': 1.8134071826934814} +02/25/2022 04:55:31 - INFO - codeparrot_training - Step 21104: {'lr': 0.0003287466299138262, 'samples': 10805760, 'steps': 21104, 'loss/train': 2.027007818222046} +02/25/2022 04:55:34 - INFO - codeparrot_training - Step 21105: {'lr': 0.000328731100195817, 'samples': 10806272, 'steps': 21105, 'loss/train': 1.9264589548110962} +02/25/2022 04:55:40 - INFO - codeparrot_training - Step 21106: {'lr': 0.00032871557014054864, 'samples': 10806784, 'steps': 21106, 'loss/train': 1.8219751119613647} +02/25/2022 04:55:43 - INFO - codeparrot_training - Step 21107: {'lr': 0.00032870003974808787, 'samples': 10807296, 'steps': 21107, 'loss/train': 1.6427385807037354} +02/25/2022 04:55:50 - INFO - codeparrot_training - Step 21108: {'lr': 0.0003286845090185011, 'samples': 10807808, 'steps': 21108, 'loss/train': 1.62216055393219} +02/25/2022 04:55:54 - INFO - codeparrot_training - Step 21109: {'lr': 0.0003286689779518549, 'samples': 10808320, 'steps': 21109, 'loss/train': 2.5330448150634766} +02/25/2022 04:55:59 - INFO - codeparrot_training - Step 21110: {'lr': 0.00032865344654821576, 'samples': 10808832, 'steps': 21110, 'loss/train': 1.924042820930481} +02/25/2022 04:56:03 - INFO - codeparrot_training - Step 21111: {'lr': 0.0003286379148076502, 'samples': 10809344, 'steps': 21111, 'loss/train': 2.2303214073181152} +02/25/2022 04:56:08 - INFO - codeparrot_training - Step 21112: {'lr': 0.00032862238273022483, 'samples': 10809856, 'steps': 21112, 'loss/train': 2.805931568145752} +02/25/2022 04:56:12 - INFO - codeparrot_training - Step 21113: {'lr': 0.0003286068503160061, 'samples': 10810368, 'steps': 21113, 'loss/train': 1.8496837615966797} +02/25/2022 04:56:17 - INFO - codeparrot_training - Step 21114: {'lr': 0.0003285913175650606, 'samples': 10810880, 'steps': 21114, 'loss/train': 1.8455113172531128} +02/25/2022 04:56:21 - INFO - codeparrot_training - Step 21115: {'lr': 0.00032857578447745484, 'samples': 10811392, 'steps': 21115, 'loss/train': 0.6516101956367493} +02/25/2022 04:56:26 - INFO - codeparrot_training - Step 21116: {'lr': 0.00032856025105325537, 'samples': 10811904, 'steps': 21116, 'loss/train': 1.9704476594924927} +02/25/2022 04:56:30 - INFO - codeparrot_training - Step 21117: {'lr': 0.00032854471729252876, 'samples': 10812416, 'steps': 21117, 'loss/train': 0.9231125116348267} +02/25/2022 04:56:35 - INFO - codeparrot_training - Step 21118: {'lr': 0.00032852918319534153, 'samples': 10812928, 'steps': 21118, 'loss/train': 2.7022061347961426} +02/25/2022 04:56:39 - INFO - codeparrot_training - Step 21119: {'lr': 0.00032851364876176014, 'samples': 10813440, 'steps': 21119, 'loss/train': 1.6050589084625244} +02/25/2022 04:56:46 - INFO - codeparrot_training - Step 21120: {'lr': 0.00032849811399185126, 'samples': 10813952, 'steps': 21120, 'loss/train': 1.2908967733383179} +02/25/2022 04:56:50 - INFO - codeparrot_training - Step 21121: {'lr': 0.0003284825788856814, 'samples': 10814464, 'steps': 21121, 'loss/train': 2.0518288612365723} +02/25/2022 04:56:55 - INFO - codeparrot_training - Step 21122: {'lr': 0.00032846704344331707, 'samples': 10814976, 'steps': 21122, 'loss/train': 1.7153793573379517} +02/25/2022 04:56:59 - INFO - codeparrot_training - Step 21123: {'lr': 0.00032845150766482484, 'samples': 10815488, 'steps': 21123, 'loss/train': 2.383584499359131} +02/25/2022 04:57:04 - INFO - codeparrot_training - Step 21124: {'lr': 0.0003284359715502713, 'samples': 10816000, 'steps': 21124, 'loss/train': 2.0794625282287598} +02/25/2022 04:57:08 - INFO - codeparrot_training - Step 21125: {'lr': 0.0003284204350997229, 'samples': 10816512, 'steps': 21125, 'loss/train': 2.3251895904541016} +02/25/2022 04:57:13 - INFO - codeparrot_training - Step 21126: {'lr': 0.0003284048983132463, 'samples': 10817024, 'steps': 21126, 'loss/train': 1.4006412029266357} +02/25/2022 04:57:17 - INFO - codeparrot_training - Step 21127: {'lr': 0.000328389361190908, 'samples': 10817536, 'steps': 21127, 'loss/train': 3.051767587661743} +02/25/2022 04:57:22 - INFO - codeparrot_training - Step 21128: {'lr': 0.0003283738237327745, 'samples': 10818048, 'steps': 21128, 'loss/train': 2.5093495845794678} +02/25/2022 04:57:26 - INFO - codeparrot_training - Step 21129: {'lr': 0.0003283582859389125, 'samples': 10818560, 'steps': 21129, 'loss/train': 1.7888586521148682} +02/25/2022 04:57:33 - INFO - codeparrot_training - Step 21130: {'lr': 0.0003283427478093885, 'samples': 10819072, 'steps': 21130, 'loss/train': 2.1881496906280518} +02/25/2022 04:57:36 - INFO - codeparrot_training - Step 21131: {'lr': 0.0003283272093442691, 'samples': 10819584, 'steps': 21131, 'loss/train': 2.4616143703460693} +02/25/2022 04:57:42 - INFO - codeparrot_training - Step 21132: {'lr': 0.00032831167054362065, 'samples': 10820096, 'steps': 21132, 'loss/train': 1.0961674451828003} +02/25/2022 04:57:45 - INFO - codeparrot_training - Step 21133: {'lr': 0.00032829613140751004, 'samples': 10820608, 'steps': 21133, 'loss/train': 1.1025972366333008} +02/25/2022 04:57:51 - INFO - codeparrot_training - Step 21134: {'lr': 0.0003282805919360035, 'samples': 10821120, 'steps': 21134, 'loss/train': 2.0721688270568848} +02/25/2022 04:57:54 - INFO - codeparrot_training - Step 21135: {'lr': 0.0003282650521291679, 'samples': 10821632, 'steps': 21135, 'loss/train': 1.9505776166915894} +02/25/2022 04:58:00 - INFO - codeparrot_training - Step 21136: {'lr': 0.0003282495119870695, 'samples': 10822144, 'steps': 21136, 'loss/train': 1.6411024332046509} +02/25/2022 04:58:04 - INFO - codeparrot_training - Step 21137: {'lr': 0.00032823397150977523, 'samples': 10822656, 'steps': 21137, 'loss/train': 1.9987449645996094} +02/25/2022 04:58:09 - INFO - codeparrot_training - Step 21138: {'lr': 0.00032821843069735134, 'samples': 10823168, 'steps': 21138, 'loss/train': 1.6433777809143066} +02/25/2022 04:58:13 - INFO - codeparrot_training - Step 21139: {'lr': 0.00032820288954986453, 'samples': 10823680, 'steps': 21139, 'loss/train': 1.414487600326538} +02/25/2022 04:58:20 - INFO - codeparrot_training - Step 21140: {'lr': 0.00032818734806738147, 'samples': 10824192, 'steps': 21140, 'loss/train': 1.5155943632125854} +02/25/2022 04:58:23 - INFO - codeparrot_training - Step 21141: {'lr': 0.0003281718062499686, 'samples': 10824704, 'steps': 21141, 'loss/train': 3.074453353881836} +02/25/2022 04:58:29 - INFO - codeparrot_training - Step 21142: {'lr': 0.0003281562640976925, 'samples': 10825216, 'steps': 21142, 'loss/train': 2.4185543060302734} +02/25/2022 04:58:32 - INFO - codeparrot_training - Step 21143: {'lr': 0.0003281407216106198, 'samples': 10825728, 'steps': 21143, 'loss/train': 1.5754749774932861} +02/25/2022 04:58:38 - INFO - codeparrot_training - Step 21144: {'lr': 0.0003281251787888171, 'samples': 10826240, 'steps': 21144, 'loss/train': 2.1936450004577637} +02/25/2022 04:58:41 - INFO - codeparrot_training - Step 21145: {'lr': 0.00032810963563235083, 'samples': 10826752, 'steps': 21145, 'loss/train': 0.7120290398597717} +02/25/2022 04:58:47 - INFO - codeparrot_training - Step 21146: {'lr': 0.00032809409214128784, 'samples': 10827264, 'steps': 21146, 'loss/train': 1.374009132385254} +02/25/2022 04:58:50 - INFO - codeparrot_training - Step 21147: {'lr': 0.0003280785483156944, 'samples': 10827776, 'steps': 21147, 'loss/train': 2.015887975692749} +02/25/2022 04:58:56 - INFO - codeparrot_training - Step 21148: {'lr': 0.0003280630041556374, 'samples': 10828288, 'steps': 21148, 'loss/train': 2.152256488800049} +02/25/2022 04:58:59 - INFO - codeparrot_training - Step 21149: {'lr': 0.0003280474596611832, 'samples': 10828800, 'steps': 21149, 'loss/train': 1.2193974256515503} +02/25/2022 04:59:05 - INFO - codeparrot_training - Step 21150: {'lr': 0.0003280319148323985, 'samples': 10829312, 'steps': 21150, 'loss/train': 1.5585376024246216} +02/25/2022 04:59:08 - INFO - codeparrot_training - Step 21151: {'lr': 0.00032801636966934975, 'samples': 10829824, 'steps': 21151, 'loss/train': 2.268585681915283} +02/25/2022 04:59:14 - INFO - codeparrot_training - Step 21152: {'lr': 0.0003280008241721038, 'samples': 10830336, 'steps': 21152, 'loss/train': 2.3331964015960693} +02/25/2022 04:59:17 - INFO - codeparrot_training - Step 21153: {'lr': 0.0003279852783407269, 'samples': 10830848, 'steps': 21153, 'loss/train': 0.49396517872810364} +02/25/2022 04:59:23 - INFO - codeparrot_training - Step 21154: {'lr': 0.00032796973217528595, 'samples': 10831360, 'steps': 21154, 'loss/train': 1.787941575050354} +02/25/2022 04:59:26 - INFO - codeparrot_training - Step 21155: {'lr': 0.00032795418567584746, 'samples': 10831872, 'steps': 21155, 'loss/train': 1.124595046043396} +02/25/2022 04:59:34 - INFO - codeparrot_training - Step 21156: {'lr': 0.00032793863884247794, 'samples': 10832384, 'steps': 21156, 'loss/train': 1.5419896841049194} +02/25/2022 04:59:37 - INFO - codeparrot_training - Step 21157: {'lr': 0.00032792309167524404, 'samples': 10832896, 'steps': 21157, 'loss/train': 8.86019229888916} +02/25/2022 04:59:43 - INFO - codeparrot_training - Step 21158: {'lr': 0.00032790754417421237, 'samples': 10833408, 'steps': 21158, 'loss/train': 2.938297748565674} +02/25/2022 04:59:46 - INFO - codeparrot_training - Step 21159: {'lr': 0.00032789199633944955, 'samples': 10833920, 'steps': 21159, 'loss/train': 2.118612289428711} +02/25/2022 04:59:52 - INFO - codeparrot_training - Step 21160: {'lr': 0.00032787644817102205, 'samples': 10834432, 'steps': 21160, 'loss/train': 1.8754611015319824} +02/25/2022 04:59:55 - INFO - codeparrot_training - Step 21161: {'lr': 0.00032786089966899666, 'samples': 10834944, 'steps': 21161, 'loss/train': 1.1040573120117188} +02/25/2022 05:00:01 - INFO - codeparrot_training - Step 21162: {'lr': 0.0003278453508334399, 'samples': 10835456, 'steps': 21162, 'loss/train': 1.3849550485610962} +02/25/2022 05:00:04 - INFO - codeparrot_training - Step 21163: {'lr': 0.00032782980166441836, 'samples': 10835968, 'steps': 21163, 'loss/train': 1.6671080589294434} +02/25/2022 05:00:10 - INFO - codeparrot_training - Step 21164: {'lr': 0.00032781425216199864, 'samples': 10836480, 'steps': 21164, 'loss/train': 0.5320336222648621} +02/25/2022 05:00:17 - INFO - codeparrot_training - Step 21165: {'lr': 0.00032779870232624737, 'samples': 10836992, 'steps': 21165, 'loss/train': 2.697252035140991} +02/25/2022 05:00:20 - INFO - codeparrot_training - Step 21166: {'lr': 0.0003277831521572312, 'samples': 10837504, 'steps': 21166, 'loss/train': 2.136936664581299} +02/25/2022 05:00:26 - INFO - codeparrot_training - Step 21167: {'lr': 0.00032776760165501663, 'samples': 10838016, 'steps': 21167, 'loss/train': 2.4128663539886475} +02/25/2022 05:00:29 - INFO - codeparrot_training - Step 21168: {'lr': 0.00032775205081967047, 'samples': 10838528, 'steps': 21168, 'loss/train': 2.4195454120635986} +02/25/2022 05:00:35 - INFO - codeparrot_training - Step 21169: {'lr': 0.00032773649965125914, 'samples': 10839040, 'steps': 21169, 'loss/train': 2.199857234954834} +02/25/2022 05:00:38 - INFO - codeparrot_training - Step 21170: {'lr': 0.0003277209481498493, 'samples': 10839552, 'steps': 21170, 'loss/train': 2.2646963596343994} +02/25/2022 05:00:44 - INFO - codeparrot_training - Step 21171: {'lr': 0.00032770539631550767, 'samples': 10840064, 'steps': 21171, 'loss/train': 2.768000364303589} +02/25/2022 05:00:47 - INFO - codeparrot_training - Step 21172: {'lr': 0.0003276898441483008, 'samples': 10840576, 'steps': 21172, 'loss/train': 1.887044906616211} +02/25/2022 05:00:53 - INFO - codeparrot_training - Step 21173: {'lr': 0.0003276742916482952, 'samples': 10841088, 'steps': 21173, 'loss/train': 2.0410633087158203} +02/25/2022 05:00:56 - INFO - codeparrot_training - Step 21174: {'lr': 0.00032765873881555765, 'samples': 10841600, 'steps': 21174, 'loss/train': 1.6916298866271973} +02/25/2022 05:01:03 - INFO - codeparrot_training - Step 21175: {'lr': 0.0003276431856501548, 'samples': 10842112, 'steps': 21175, 'loss/train': 1.9030319452285767} +02/25/2022 05:01:07 - INFO - codeparrot_training - Step 21176: {'lr': 0.0003276276321521531, 'samples': 10842624, 'steps': 21176, 'loss/train': 2.1897199153900146} +02/25/2022 05:01:12 - INFO - codeparrot_training - Step 21177: {'lr': 0.00032761207832161934, 'samples': 10843136, 'steps': 21177, 'loss/train': 1.475611925125122} +02/25/2022 05:01:16 - INFO - codeparrot_training - Step 21178: {'lr': 0.00032759652415862003, 'samples': 10843648, 'steps': 21178, 'loss/train': 2.7832348346710205} +02/25/2022 05:01:21 - INFO - codeparrot_training - Step 21179: {'lr': 0.00032758096966322185, 'samples': 10844160, 'steps': 21179, 'loss/train': 2.833794355392456} +02/25/2022 05:01:25 - INFO - codeparrot_training - Step 21180: {'lr': 0.00032756541483549146, 'samples': 10844672, 'steps': 21180, 'loss/train': 1.7616722583770752} +02/25/2022 05:01:30 - INFO - codeparrot_training - Step 21181: {'lr': 0.00032754985967549545, 'samples': 10845184, 'steps': 21181, 'loss/train': 2.2143542766571045} +02/25/2022 05:01:34 - INFO - codeparrot_training - Step 21182: {'lr': 0.0003275343041833005, 'samples': 10845696, 'steps': 21182, 'loss/train': 2.070802688598633} +02/25/2022 05:01:39 - INFO - codeparrot_training - Step 21183: {'lr': 0.00032751874835897316, 'samples': 10846208, 'steps': 21183, 'loss/train': 1.9777973890304565} +02/25/2022 05:01:43 - INFO - codeparrot_training - Step 21184: {'lr': 0.0003275031922025801, 'samples': 10846720, 'steps': 21184, 'loss/train': 1.5701086521148682} +02/25/2022 05:01:48 - INFO - codeparrot_training - Step 21185: {'lr': 0.00032748763571418805, 'samples': 10847232, 'steps': 21185, 'loss/train': 1.8671663999557495} +02/25/2022 05:01:52 - INFO - codeparrot_training - Step 21186: {'lr': 0.00032747207889386354, 'samples': 10847744, 'steps': 21186, 'loss/train': 1.347330093383789} +02/25/2022 05:01:59 - INFO - codeparrot_training - Step 21187: {'lr': 0.00032745652174167325, 'samples': 10848256, 'steps': 21187, 'loss/train': 1.8753383159637451} +02/25/2022 05:02:02 - INFO - codeparrot_training - Step 21188: {'lr': 0.00032744096425768376, 'samples': 10848768, 'steps': 21188, 'loss/train': 0.994610071182251} +02/25/2022 05:02:08 - INFO - codeparrot_training - Step 21189: {'lr': 0.00032742540644196185, 'samples': 10849280, 'steps': 21189, 'loss/train': 1.4592911005020142} +02/25/2022 05:02:11 - INFO - codeparrot_training - Step 21190: {'lr': 0.00032740984829457404, 'samples': 10849792, 'steps': 21190, 'loss/train': 1.7477768659591675} +02/25/2022 05:02:17 - INFO - codeparrot_training - Step 21191: {'lr': 0.00032739428981558706, 'samples': 10850304, 'steps': 21191, 'loss/train': 1.7833974361419678} +02/25/2022 05:02:20 - INFO - codeparrot_training - Step 21192: {'lr': 0.0003273787310050675, 'samples': 10850816, 'steps': 21192, 'loss/train': 2.370821237564087} +02/25/2022 05:02:26 - INFO - codeparrot_training - Step 21193: {'lr': 0.00032736317186308193, 'samples': 10851328, 'steps': 21193, 'loss/train': 2.1287100315093994} +02/25/2022 05:02:29 - INFO - codeparrot_training - Step 21194: {'lr': 0.00032734761238969724, 'samples': 10851840, 'steps': 21194, 'loss/train': 1.648510456085205} +02/25/2022 05:02:35 - INFO - codeparrot_training - Step 21195: {'lr': 0.00032733205258497994, 'samples': 10852352, 'steps': 21195, 'loss/train': 2.151329755783081} +02/25/2022 05:02:38 - INFO - codeparrot_training - Step 21196: {'lr': 0.0003273164924489966, 'samples': 10852864, 'steps': 21196, 'loss/train': 2.278211832046509} +02/25/2022 05:02:44 - INFO - codeparrot_training - Step 21197: {'lr': 0.000327300931981814, 'samples': 10853376, 'steps': 21197, 'loss/train': 1.2565044164657593} +02/25/2022 05:02:47 - INFO - codeparrot_training - Step 21198: {'lr': 0.0003272853711834987, 'samples': 10853888, 'steps': 21198, 'loss/train': 1.0751640796661377} +02/25/2022 05:02:53 - INFO - codeparrot_training - Step 21199: {'lr': 0.00032726981005411747, 'samples': 10854400, 'steps': 21199, 'loss/train': 2.5215718746185303} +02/25/2022 05:02:57 - INFO - codeparrot_training - Step 21200: {'lr': 0.00032725424859373687, 'samples': 10854912, 'steps': 21200, 'loss/train': 2.211833953857422} +02/25/2022 05:03:03 - INFO - codeparrot_training - Step 21201: {'lr': 0.0003272386868024236, 'samples': 10855424, 'steps': 21201, 'loss/train': 2.257880210876465} +02/25/2022 05:03:06 - INFO - codeparrot_training - Step 21202: {'lr': 0.00032722312468024434, 'samples': 10855936, 'steps': 21202, 'loss/train': 3.337616205215454} +02/25/2022 05:03:12 - INFO - codeparrot_training - Step 21203: {'lr': 0.00032720756222726576, 'samples': 10856448, 'steps': 21203, 'loss/train': 2.967454195022583} +02/25/2022 05:03:15 - INFO - codeparrot_training - Step 21204: {'lr': 0.0003271919994435545, 'samples': 10856960, 'steps': 21204, 'loss/train': 1.9554553031921387} +02/25/2022 05:03:21 - INFO - codeparrot_training - Step 21205: {'lr': 0.0003271764363291772, 'samples': 10857472, 'steps': 21205, 'loss/train': 1.900269627571106} +02/25/2022 05:03:24 - INFO - codeparrot_training - Step 21206: {'lr': 0.00032716087288420047, 'samples': 10857984, 'steps': 21206, 'loss/train': 1.0107386112213135} +02/25/2022 05:03:30 - INFO - codeparrot_training - Step 21207: {'lr': 0.0003271453091086912, 'samples': 10858496, 'steps': 21207, 'loss/train': 1.9128812551498413} +02/25/2022 05:03:33 - INFO - codeparrot_training - Step 21208: {'lr': 0.0003271297450027159, 'samples': 10859008, 'steps': 21208, 'loss/train': 2.0418624877929688} +02/25/2022 05:03:39 - INFO - codeparrot_training - Step 21209: {'lr': 0.0003271141805663412, 'samples': 10859520, 'steps': 21209, 'loss/train': 2.390519618988037} +02/25/2022 05:03:43 - INFO - codeparrot_training - Step 21210: {'lr': 0.00032709861579963384, 'samples': 10860032, 'steps': 21210, 'loss/train': 2.144798517227173} +02/25/2022 05:03:48 - INFO - codeparrot_training - Step 21211: {'lr': 0.00032708305070266053, 'samples': 10860544, 'steps': 21211, 'loss/train': 1.5633108615875244} +02/25/2022 05:03:52 - INFO - codeparrot_training - Step 21212: {'lr': 0.00032706748527548793, 'samples': 10861056, 'steps': 21212, 'loss/train': 2.3127710819244385} +02/25/2022 05:03:57 - INFO - codeparrot_training - Step 21213: {'lr': 0.0003270519195181826, 'samples': 10861568, 'steps': 21213, 'loss/train': 1.4027512073516846} +02/25/2022 05:04:01 - INFO - codeparrot_training - Step 21214: {'lr': 0.00032703635343081137, 'samples': 10862080, 'steps': 21214, 'loss/train': 2.843304395675659} +02/25/2022 05:04:06 - INFO - codeparrot_training - Step 21215: {'lr': 0.0003270207870134408, 'samples': 10862592, 'steps': 21215, 'loss/train': 1.4319182634353638} +02/25/2022 05:04:10 - INFO - codeparrot_training - Step 21216: {'lr': 0.00032700522026613785, 'samples': 10863104, 'steps': 21216, 'loss/train': 1.7666003704071045} +02/25/2022 05:04:15 - INFO - codeparrot_training - Step 21217: {'lr': 0.00032698965318896876, 'samples': 10863616, 'steps': 21217, 'loss/train': 2.059915542602539} +02/25/2022 05:04:19 - INFO - codeparrot_training - Step 21218: {'lr': 0.00032697408578200054, 'samples': 10864128, 'steps': 21218, 'loss/train': 2.2525713443756104} +02/25/2022 05:04:25 - INFO - codeparrot_training - Step 21219: {'lr': 0.00032695851804529977, 'samples': 10864640, 'steps': 21219, 'loss/train': 1.1876782178878784} +02/25/2022 05:04:28 - INFO - codeparrot_training - Step 21220: {'lr': 0.0003269429499789331, 'samples': 10865152, 'steps': 21220, 'loss/train': 0.6411092877388} +02/25/2022 05:04:34 - INFO - codeparrot_training - Step 21221: {'lr': 0.0003269273815829673, 'samples': 10865664, 'steps': 21221, 'loss/train': 2.443061113357544} +02/25/2022 05:04:37 - INFO - codeparrot_training - Step 21222: {'lr': 0.00032691181285746904, 'samples': 10866176, 'steps': 21222, 'loss/train': 2.3866283893585205} +02/25/2022 05:04:43 - INFO - codeparrot_training - Step 21223: {'lr': 0.000326896243802505, 'samples': 10866688, 'steps': 21223, 'loss/train': 0.8457977175712585} +02/25/2022 05:04:46 - INFO - codeparrot_training - Step 21224: {'lr': 0.0003268806744181418, 'samples': 10867200, 'steps': 21224, 'loss/train': 1.7025806903839111} +02/25/2022 05:04:52 - INFO - codeparrot_training - Step 21225: {'lr': 0.0003268651047044463, 'samples': 10867712, 'steps': 21225, 'loss/train': 1.9069551229476929} +02/25/2022 05:04:55 - INFO - codeparrot_training - Step 21226: {'lr': 0.00032684953466148505, 'samples': 10868224, 'steps': 21226, 'loss/train': 1.2650535106658936} +02/25/2022 05:05:01 - INFO - codeparrot_training - Step 21227: {'lr': 0.00032683396428932483, 'samples': 10868736, 'steps': 21227, 'loss/train': 2.2936995029449463} +02/25/2022 05:05:04 - INFO - codeparrot_training - Step 21228: {'lr': 0.0003268183935880322, 'samples': 10869248, 'steps': 21228, 'loss/train': 2.6150200366973877} +02/25/2022 05:05:11 - INFO - codeparrot_training - Step 21229: {'lr': 0.0003268028225576741, 'samples': 10869760, 'steps': 21229, 'loss/train': 1.4549059867858887} +02/25/2022 05:05:14 - INFO - codeparrot_training - Step 21230: {'lr': 0.00032678725119831696, 'samples': 10870272, 'steps': 21230, 'loss/train': 2.556772470474243} +02/25/2022 05:05:20 - INFO - codeparrot_training - Step 21231: {'lr': 0.0003267716795100278, 'samples': 10870784, 'steps': 21231, 'loss/train': 1.857347846031189} +02/25/2022 05:05:23 - INFO - codeparrot_training - Step 21232: {'lr': 0.000326756107492873, 'samples': 10871296, 'steps': 21232, 'loss/train': 1.5590025186538696} +02/25/2022 05:05:29 - INFO - codeparrot_training - Step 21233: {'lr': 0.00032674053514691946, 'samples': 10871808, 'steps': 21233, 'loss/train': 2.4627904891967773} +02/25/2022 05:05:32 - INFO - codeparrot_training - Step 21234: {'lr': 0.00032672496247223377, 'samples': 10872320, 'steps': 21234, 'loss/train': 0.885090708732605} +02/25/2022 05:05:38 - INFO - codeparrot_training - Step 21235: {'lr': 0.00032670938946888277, 'samples': 10872832, 'steps': 21235, 'loss/train': 3.0869076251983643} +02/25/2022 05:05:43 - INFO - codeparrot_training - Step 21236: {'lr': 0.00032669381613693307, 'samples': 10873344, 'steps': 21236, 'loss/train': 1.5500586032867432} +02/25/2022 05:05:47 - INFO - codeparrot_training - Step 21237: {'lr': 0.0003266782424764515, 'samples': 10873856, 'steps': 21237, 'loss/train': 1.2396222352981567} +02/25/2022 05:05:52 - INFO - codeparrot_training - Step 21238: {'lr': 0.0003266626684875046, 'samples': 10874368, 'steps': 21238, 'loss/train': 3.0672528743743896} +02/25/2022 05:05:56 - INFO - codeparrot_training - Step 21239: {'lr': 0.0003266470941701592, 'samples': 10874880, 'steps': 21239, 'loss/train': 1.9160960912704468} +02/25/2022 05:06:02 - INFO - codeparrot_training - Step 21240: {'lr': 0.00032663151952448194, 'samples': 10875392, 'steps': 21240, 'loss/train': 3.1761932373046875} +02/25/2022 05:06:05 - INFO - codeparrot_training - Step 21241: {'lr': 0.00032661594455053966, 'samples': 10875904, 'steps': 21241, 'loss/train': 2.4112744331359863} +02/25/2022 05:06:08 - INFO - codeparrot_training - Step 21242: {'lr': 0.0003266003692483989, 'samples': 10876416, 'steps': 21242, 'loss/train': 1.534583330154419} +02/25/2022 05:06:15 - INFO - codeparrot_training - Step 21243: {'lr': 0.0003265847936181266, 'samples': 10876928, 'steps': 21243, 'loss/train': 1.4044123888015747} +02/25/2022 05:06:19 - INFO - codeparrot_training - Step 21244: {'lr': 0.0003265692176597893, 'samples': 10877440, 'steps': 21244, 'loss/train': 1.7013826370239258} +02/25/2022 05:06:25 - INFO - codeparrot_training - Step 21245: {'lr': 0.0003265536413734538, 'samples': 10877952, 'steps': 21245, 'loss/train': 1.1996538639068604} +02/25/2022 05:06:28 - INFO - codeparrot_training - Step 21246: {'lr': 0.0003265380647591868, 'samples': 10878464, 'steps': 21246, 'loss/train': 2.779387950897217} +02/25/2022 05:06:34 - INFO - codeparrot_training - Step 21247: {'lr': 0.000326522487817055, 'samples': 10878976, 'steps': 21247, 'loss/train': 1.4230057001113892} +02/25/2022 05:06:37 - INFO - codeparrot_training - Step 21248: {'lr': 0.00032650691054712523, 'samples': 10879488, 'steps': 21248, 'loss/train': 3.7371480464935303} +02/25/2022 05:06:43 - INFO - codeparrot_training - Step 21249: {'lr': 0.0003264913329494641, 'samples': 10880000, 'steps': 21249, 'loss/train': 0.5596840977668762} +02/25/2022 05:06:46 - INFO - codeparrot_training - Step 21250: {'lr': 0.0003264757550241384, 'samples': 10880512, 'steps': 21250, 'loss/train': 0.5821067690849304} +02/25/2022 05:06:52 - INFO - codeparrot_training - Step 21251: {'lr': 0.00032646017677121484, 'samples': 10881024, 'steps': 21251, 'loss/train': 1.5946199893951416} +02/25/2022 05:06:55 - INFO - codeparrot_training - Step 21252: {'lr': 0.00032644459819076017, 'samples': 10881536, 'steps': 21252, 'loss/train': 2.3842592239379883} +02/25/2022 05:07:01 - INFO - codeparrot_training - Step 21253: {'lr': 0.00032642901928284115, 'samples': 10882048, 'steps': 21253, 'loss/train': 0.673209547996521} +02/25/2022 05:07:04 - INFO - codeparrot_training - Step 21254: {'lr': 0.0003264134400475244, 'samples': 10882560, 'steps': 21254, 'loss/train': 2.3186614513397217} +02/25/2022 05:07:11 - INFO - codeparrot_training - Step 21255: {'lr': 0.00032639786048487685, 'samples': 10883072, 'steps': 21255, 'loss/train': 2.7216572761535645} +02/25/2022 05:07:14 - INFO - codeparrot_training - Step 21256: {'lr': 0.000326382280594965, 'samples': 10883584, 'steps': 21256, 'loss/train': 2.0145411491394043} +02/25/2022 05:07:20 - INFO - codeparrot_training - Step 21257: {'lr': 0.00032636670037785583, 'samples': 10884096, 'steps': 21257, 'loss/train': 1.768389344215393} +02/25/2022 05:07:24 - INFO - codeparrot_training - Step 21258: {'lr': 0.00032635111983361586, 'samples': 10884608, 'steps': 21258, 'loss/train': 1.6506032943725586} +02/25/2022 05:07:29 - INFO - codeparrot_training - Step 21259: {'lr': 0.0003263355389623119, 'samples': 10885120, 'steps': 21259, 'loss/train': 1.3988884687423706} +02/25/2022 05:07:33 - INFO - codeparrot_training - Step 21260: {'lr': 0.0003263199577640109, 'samples': 10885632, 'steps': 21260, 'loss/train': 2.0579917430877686} +02/25/2022 05:07:38 - INFO - codeparrot_training - Step 21261: {'lr': 0.00032630437623877936, 'samples': 10886144, 'steps': 21261, 'loss/train': 2.7820398807525635} +02/25/2022 05:07:41 - INFO - codeparrot_training - Step 21262: {'lr': 0.00032628879438668414, 'samples': 10886656, 'steps': 21262, 'loss/train': 2.115137815475464} +02/25/2022 05:07:47 - INFO - codeparrot_training - Step 21263: {'lr': 0.00032627321220779184, 'samples': 10887168, 'steps': 21263, 'loss/train': 1.4926291704177856} +02/25/2022 05:07:50 - INFO - codeparrot_training - Step 21264: {'lr': 0.0003262576297021694, 'samples': 10887680, 'steps': 21264, 'loss/train': 2.3401951789855957} +02/25/2022 05:07:57 - INFO - codeparrot_training - Step 21265: {'lr': 0.00032624204686988343, 'samples': 10888192, 'steps': 21265, 'loss/train': 1.9107630252838135} +02/25/2022 05:08:00 - INFO - codeparrot_training - Step 21266: {'lr': 0.00032622646371100085, 'samples': 10888704, 'steps': 21266, 'loss/train': 1.1766200065612793} +02/25/2022 05:08:06 - INFO - codeparrot_training - Step 21267: {'lr': 0.00032621088022558823, 'samples': 10889216, 'steps': 21267, 'loss/train': 1.8639490604400635} +02/25/2022 05:08:09 - INFO - codeparrot_training - Step 21268: {'lr': 0.00032619529641371236, 'samples': 10889728, 'steps': 21268, 'loss/train': 2.769702434539795} +02/25/2022 05:08:15 - INFO - codeparrot_training - Step 21269: {'lr': 0.0003261797122754401, 'samples': 10890240, 'steps': 21269, 'loss/train': 2.4219839572906494} +02/25/2022 05:08:18 - INFO - codeparrot_training - Step 21270: {'lr': 0.00032616412781083813, 'samples': 10890752, 'steps': 21270, 'loss/train': 1.6763619184494019} +02/25/2022 05:08:24 - INFO - codeparrot_training - Step 21271: {'lr': 0.00032614854301997323, 'samples': 10891264, 'steps': 21271, 'loss/train': 2.125734806060791} +02/25/2022 05:08:27 - INFO - codeparrot_training - Step 21272: {'lr': 0.0003261329579029121, 'samples': 10891776, 'steps': 21272, 'loss/train': 2.938096523284912} +02/25/2022 05:08:33 - INFO - codeparrot_training - Step 21273: {'lr': 0.00032611737245972155, 'samples': 10892288, 'steps': 21273, 'loss/train': 2.756913661956787} +02/25/2022 05:08:36 - INFO - codeparrot_training - Step 21274: {'lr': 0.0003261017866904684, 'samples': 10892800, 'steps': 21274, 'loss/train': 0.14569512009620667} +02/25/2022 05:08:42 - INFO - codeparrot_training - Step 21275: {'lr': 0.0003260862005952193, 'samples': 10893312, 'steps': 21275, 'loss/train': 1.828144907951355} +02/25/2022 05:08:45 - INFO - codeparrot_training - Step 21276: {'lr': 0.00032607061417404113, 'samples': 10893824, 'steps': 21276, 'loss/train': 1.659318208694458} +02/25/2022 05:08:51 - INFO - codeparrot_training - Step 21277: {'lr': 0.0003260550274270007, 'samples': 10894336, 'steps': 21277, 'loss/train': 2.3319449424743652} +02/25/2022 05:08:54 - INFO - codeparrot_training - Step 21278: {'lr': 0.0003260394403541645, 'samples': 10894848, 'steps': 21278, 'loss/train': 1.7328227758407593} +02/25/2022 05:09:01 - INFO - codeparrot_training - Step 21279: {'lr': 0.00032602385295559953, 'samples': 10895360, 'steps': 21279, 'loss/train': 1.6713923215866089} +02/25/2022 05:09:04 - INFO - codeparrot_training - Step 21280: {'lr': 0.00032600826523137254, 'samples': 10895872, 'steps': 21280, 'loss/train': 2.395864963531494} +02/25/2022 05:09:10 - INFO - codeparrot_training - Step 21281: {'lr': 0.00032599267718155036, 'samples': 10896384, 'steps': 21281, 'loss/train': 1.2828642129898071} +02/25/2022 05:09:13 - INFO - codeparrot_training - Step 21282: {'lr': 0.0003259770888061995, 'samples': 10896896, 'steps': 21282, 'loss/train': 1.498377799987793} +02/25/2022 05:09:18 - INFO - codeparrot_training - Step 21283: {'lr': 0.0003259615001053871, 'samples': 10897408, 'steps': 21283, 'loss/train': 2.6201424598693848} +02/25/2022 05:09:22 - INFO - codeparrot_training - Step 21284: {'lr': 0.0003259459110791797, 'samples': 10897920, 'steps': 21284, 'loss/train': 2.3451859951019287} +02/25/2022 05:09:28 - INFO - codeparrot_training - Step 21285: {'lr': 0.0003259303217276441, 'samples': 10898432, 'steps': 21285, 'loss/train': 2.6213128566741943} +02/25/2022 05:09:31 - INFO - codeparrot_training - Step 21286: {'lr': 0.0003259147320508471, 'samples': 10898944, 'steps': 21286, 'loss/train': 2.1762027740478516} +02/25/2022 05:09:37 - INFO - codeparrot_training - Step 21287: {'lr': 0.0003258991420488555, 'samples': 10899456, 'steps': 21287, 'loss/train': 2.7658660411834717} +02/25/2022 05:09:40 - INFO - codeparrot_training - Step 21288: {'lr': 0.0003258835517217361, 'samples': 10899968, 'steps': 21288, 'loss/train': 1.5106842517852783} +02/25/2022 05:09:46 - INFO - codeparrot_training - Step 21289: {'lr': 0.0003258679610695556, 'samples': 10900480, 'steps': 21289, 'loss/train': 2.2157793045043945} +02/25/2022 05:09:49 - INFO - codeparrot_training - Step 21290: {'lr': 0.000325852370092381, 'samples': 10900992, 'steps': 21290, 'loss/train': 1.886971116065979} +02/25/2022 05:09:55 - INFO - codeparrot_training - Step 21291: {'lr': 0.00032583677879027877, 'samples': 10901504, 'steps': 21291, 'loss/train': 0.7840352654457092} +02/25/2022 05:09:59 - INFO - codeparrot_training - Step 21292: {'lr': 0.00032582118716331594, 'samples': 10902016, 'steps': 21292, 'loss/train': 1.838408350944519} +02/25/2022 05:10:04 - INFO - codeparrot_training - Step 21293: {'lr': 0.00032580559521155916, 'samples': 10902528, 'steps': 21293, 'loss/train': 2.238321304321289} +02/25/2022 05:10:08 - INFO - codeparrot_training - Step 21294: {'lr': 0.00032579000293507537, 'samples': 10903040, 'steps': 21294, 'loss/train': 1.330949068069458} +02/25/2022 05:10:13 - INFO - codeparrot_training - Step 21295: {'lr': 0.0003257744103339312, 'samples': 10903552, 'steps': 21295, 'loss/train': 1.9749799966812134} +02/25/2022 05:10:17 - INFO - codeparrot_training - Step 21296: {'lr': 0.00032575881740819353, 'samples': 10904064, 'steps': 21296, 'loss/train': 2.121551275253296} +02/25/2022 05:10:22 - INFO - codeparrot_training - Step 21297: {'lr': 0.0003257432241579291, 'samples': 10904576, 'steps': 21297, 'loss/train': 1.8827018737792969} +02/25/2022 05:10:26 - INFO - codeparrot_training - Step 21298: {'lr': 0.00032572763058320484, 'samples': 10905088, 'steps': 21298, 'loss/train': 2.8160476684570312} +02/25/2022 05:10:31 - INFO - codeparrot_training - Step 21299: {'lr': 0.00032571203668408744, 'samples': 10905600, 'steps': 21299, 'loss/train': 2.4017887115478516} +02/25/2022 05:10:37 - INFO - codeparrot_training - Step 21300: {'lr': 0.0003256964424606437, 'samples': 10906112, 'steps': 21300, 'loss/train': 1.7928662300109863} +02/25/2022 05:10:41 - INFO - codeparrot_training - Step 21301: {'lr': 0.0003256808479129404, 'samples': 10906624, 'steps': 21301, 'loss/train': 2.3589839935302734} +02/25/2022 05:10:46 - INFO - codeparrot_training - Step 21302: {'lr': 0.0003256652530410444, 'samples': 10907136, 'steps': 21302, 'loss/train': 2.0396180152893066} +02/25/2022 05:10:50 - INFO - codeparrot_training - Step 21303: {'lr': 0.00032564965784502255, 'samples': 10907648, 'steps': 21303, 'loss/train': 3.246683120727539} +02/25/2022 05:10:56 - INFO - codeparrot_training - Step 21304: {'lr': 0.0003256340623249415, 'samples': 10908160, 'steps': 21304, 'loss/train': 2.8570749759674072} +02/25/2022 05:10:59 - INFO - codeparrot_training - Step 21305: {'lr': 0.00032561846648086816, 'samples': 10908672, 'steps': 21305, 'loss/train': 3.0938119888305664} +02/25/2022 05:11:02 - INFO - codeparrot_training - Step 21306: {'lr': 0.0003256028703128693, 'samples': 10909184, 'steps': 21306, 'loss/train': 2.3098132610321045} +02/25/2022 05:11:08 - INFO - codeparrot_training - Step 21307: {'lr': 0.0003255872738210118, 'samples': 10909696, 'steps': 21307, 'loss/train': 1.508814811706543} +02/25/2022 05:11:14 - INFO - codeparrot_training - Step 21308: {'lr': 0.0003255716770053624, 'samples': 10910208, 'steps': 21308, 'loss/train': 2.027381420135498} +02/25/2022 05:11:18 - INFO - codeparrot_training - Step 21309: {'lr': 0.0003255560798659879, 'samples': 10910720, 'steps': 21309, 'loss/train': 2.1558380126953125} +02/25/2022 05:11:21 - INFO - codeparrot_training - Step 21310: {'lr': 0.0003255404824029552, 'samples': 10911232, 'steps': 21310, 'loss/train': 2.34118914604187} +02/25/2022 05:11:28 - INFO - codeparrot_training - Step 21311: {'lr': 0.00032552488461633103, 'samples': 10911744, 'steps': 21311, 'loss/train': 2.317178726196289} +02/25/2022 05:11:32 - INFO - codeparrot_training - Step 21312: {'lr': 0.00032550928650618225, 'samples': 10912256, 'steps': 21312, 'loss/train': 2.280752182006836} +02/25/2022 05:11:37 - INFO - codeparrot_training - Step 21313: {'lr': 0.0003254936880725757, 'samples': 10912768, 'steps': 21313, 'loss/train': 1.3541532754898071} +02/25/2022 05:11:40 - INFO - codeparrot_training - Step 21314: {'lr': 0.0003254780893155781, 'samples': 10913280, 'steps': 21314, 'loss/train': 1.4611402750015259} +02/25/2022 05:11:46 - INFO - codeparrot_training - Step 21315: {'lr': 0.00032546249023525636, 'samples': 10913792, 'steps': 21315, 'loss/train': 1.0965029001235962} +02/25/2022 05:11:50 - INFO - codeparrot_training - Step 21316: {'lr': 0.00032544689083167727, 'samples': 10914304, 'steps': 21316, 'loss/train': 2.3084423542022705} +02/25/2022 05:11:55 - INFO - codeparrot_training - Step 21317: {'lr': 0.00032543129110490764, 'samples': 10914816, 'steps': 21317, 'loss/train': 2.16798734664917} +02/25/2022 05:11:58 - INFO - codeparrot_training - Step 21318: {'lr': 0.00032541569105501433, 'samples': 10915328, 'steps': 21318, 'loss/train': 0.34300774335861206} +02/25/2022 05:12:04 - INFO - codeparrot_training - Step 21319: {'lr': 0.0003254000906820642, 'samples': 10915840, 'steps': 21319, 'loss/train': 1.1201236248016357} +02/25/2022 05:12:07 - INFO - codeparrot_training - Step 21320: {'lr': 0.0003253844899861239, 'samples': 10916352, 'steps': 21320, 'loss/train': 0.29911482334136963} +02/25/2022 05:12:13 - INFO - codeparrot_training - Step 21321: {'lr': 0.00032536888896726046, 'samples': 10916864, 'steps': 21321, 'loss/train': 2.0089926719665527} +02/25/2022 05:12:16 - INFO - codeparrot_training - Step 21322: {'lr': 0.00032535328762554064, 'samples': 10917376, 'steps': 21322, 'loss/train': 1.7739598751068115} +02/25/2022 05:12:22 - INFO - codeparrot_training - Step 21323: {'lr': 0.00032533768596103123, 'samples': 10917888, 'steps': 21323, 'loss/train': 1.973223090171814} +02/25/2022 05:12:25 - INFO - codeparrot_training - Step 21324: {'lr': 0.0003253220839737991, 'samples': 10918400, 'steps': 21324, 'loss/train': 2.3111445903778076} +02/25/2022 05:12:32 - INFO - codeparrot_training - Step 21325: {'lr': 0.00032530648166391115, 'samples': 10918912, 'steps': 21325, 'loss/train': 2.7867369651794434} +02/25/2022 05:12:35 - INFO - codeparrot_training - Step 21326: {'lr': 0.00032529087903143407, 'samples': 10919424, 'steps': 21326, 'loss/train': 1.4398127794265747} +02/25/2022 05:12:41 - INFO - codeparrot_training - Step 21327: {'lr': 0.00032527527607643475, 'samples': 10919936, 'steps': 21327, 'loss/train': 1.3045326471328735} +02/25/2022 05:12:44 - INFO - codeparrot_training - Step 21328: {'lr': 0.00032525967279898015, 'samples': 10920448, 'steps': 21328, 'loss/train': 1.9129797220230103} +02/25/2022 05:12:50 - INFO - codeparrot_training - Step 21329: {'lr': 0.0003252440691991369, 'samples': 10920960, 'steps': 21329, 'loss/train': 1.9628323316574097} +02/25/2022 05:12:53 - INFO - codeparrot_training - Step 21330: {'lr': 0.000325228465276972, 'samples': 10921472, 'steps': 21330, 'loss/train': 0.9939577579498291} +02/25/2022 05:12:59 - INFO - codeparrot_training - Step 21331: {'lr': 0.00032521286103255224, 'samples': 10921984, 'steps': 21331, 'loss/train': 2.592625617980957} +02/25/2022 05:13:02 - INFO - codeparrot_training - Step 21332: {'lr': 0.0003251972564659445, 'samples': 10922496, 'steps': 21332, 'loss/train': 0.34064191579818726} +02/25/2022 05:13:08 - INFO - codeparrot_training - Step 21333: {'lr': 0.00032518165157721554, 'samples': 10923008, 'steps': 21333, 'loss/train': 2.357318639755249} +02/25/2022 05:13:12 - INFO - codeparrot_training - Step 21334: {'lr': 0.00032516604636643234, 'samples': 10923520, 'steps': 21334, 'loss/train': 1.158659815788269} +02/25/2022 05:13:15 - INFO - codeparrot_training - Step 21335: {'lr': 0.00032515044083366153, 'samples': 10924032, 'steps': 21335, 'loss/train': 1.301227331161499} +02/25/2022 05:13:20 - INFO - codeparrot_training - Step 21336: {'lr': 0.0003251348349789702, 'samples': 10924544, 'steps': 21336, 'loss/train': 1.601779580116272} +02/25/2022 05:13:24 - INFO - codeparrot_training - Step 21337: {'lr': 0.00032511922880242505, 'samples': 10925056, 'steps': 21337, 'loss/train': 1.9722219705581665} +02/25/2022 05:13:30 - INFO - codeparrot_training - Step 21338: {'lr': 0.00032510362230409295, 'samples': 10925568, 'steps': 21338, 'loss/train': 1.6293399333953857} +02/25/2022 05:13:34 - INFO - codeparrot_training - Step 21339: {'lr': 0.0003250880154840408, 'samples': 10926080, 'steps': 21339, 'loss/train': 2.160531520843506} +02/25/2022 05:13:39 - INFO - codeparrot_training - Step 21340: {'lr': 0.0003250724083423355, 'samples': 10926592, 'steps': 21340, 'loss/train': 1.19654381275177} +02/25/2022 05:13:43 - INFO - codeparrot_training - Step 21341: {'lr': 0.00032505680087904375, 'samples': 10927104, 'steps': 21341, 'loss/train': 0.9483667016029358} +02/25/2022 05:13:49 - INFO - codeparrot_training - Step 21342: {'lr': 0.0003250411930942326, 'samples': 10927616, 'steps': 21342, 'loss/train': 2.331989049911499} +02/25/2022 05:13:52 - INFO - codeparrot_training - Step 21343: {'lr': 0.00032502558498796876, 'samples': 10928128, 'steps': 21343, 'loss/train': 2.6511056423187256} +02/25/2022 05:13:58 - INFO - codeparrot_training - Step 21344: {'lr': 0.00032500997656031907, 'samples': 10928640, 'steps': 21344, 'loss/train': 2.0560503005981445} +02/25/2022 05:14:01 - INFO - codeparrot_training - Step 21345: {'lr': 0.0003249943678113505, 'samples': 10929152, 'steps': 21345, 'loss/train': 1.9853085279464722} +02/25/2022 05:14:07 - INFO - codeparrot_training - Step 21346: {'lr': 0.00032497875874112995, 'samples': 10929664, 'steps': 21346, 'loss/train': 1.886953592300415} +02/25/2022 05:14:10 - INFO - codeparrot_training - Step 21347: {'lr': 0.0003249631493497241, 'samples': 10930176, 'steps': 21347, 'loss/train': 2.412015676498413} +02/25/2022 05:14:16 - INFO - codeparrot_training - Step 21348: {'lr': 0.0003249475396371999, 'samples': 10930688, 'steps': 21348, 'loss/train': 1.7576769590377808} +02/25/2022 05:14:20 - INFO - codeparrot_training - Step 21349: {'lr': 0.00032493192960362437, 'samples': 10931200, 'steps': 21349, 'loss/train': 1.962602972984314} +02/25/2022 05:14:25 - INFO - codeparrot_training - Step 21350: {'lr': 0.00032491631924906416, 'samples': 10931712, 'steps': 21350, 'loss/train': 1.3196920156478882} +02/25/2022 05:14:29 - INFO - codeparrot_training - Step 21351: {'lr': 0.0003249007085735863, 'samples': 10932224, 'steps': 21351, 'loss/train': 2.489952325820923} +02/25/2022 05:14:34 - INFO - codeparrot_training - Step 21352: {'lr': 0.0003248850975772575, 'samples': 10932736, 'steps': 21352, 'loss/train': 1.9956902265548706} +02/25/2022 05:14:38 - INFO - codeparrot_training - Step 21353: {'lr': 0.00032486948626014476, 'samples': 10933248, 'steps': 21353, 'loss/train': 0.5638812780380249} +02/25/2022 05:14:44 - INFO - codeparrot_training - Step 21354: {'lr': 0.00032485387462231484, 'samples': 10933760, 'steps': 21354, 'loss/train': 1.377614140510559} +02/25/2022 05:14:47 - INFO - codeparrot_training - Step 21355: {'lr': 0.0003248382626638348, 'samples': 10934272, 'steps': 21355, 'loss/train': 1.6708141565322876} +02/25/2022 05:14:52 - INFO - codeparrot_training - Step 21356: {'lr': 0.0003248226503847714, 'samples': 10934784, 'steps': 21356, 'loss/train': 1.6795576810836792} +02/25/2022 05:14:56 - INFO - codeparrot_training - Step 21357: {'lr': 0.00032480703778519146, 'samples': 10935296, 'steps': 21357, 'loss/train': 2.426008939743042} +02/25/2022 05:15:02 - INFO - codeparrot_training - Step 21358: {'lr': 0.00032479142486516193, 'samples': 10935808, 'steps': 21358, 'loss/train': 1.647079348564148} +02/25/2022 05:15:06 - INFO - codeparrot_training - Step 21359: {'lr': 0.00032477581162474974, 'samples': 10936320, 'steps': 21359, 'loss/train': 1.9271972179412842} +02/25/2022 05:15:11 - INFO - codeparrot_training - Step 21360: {'lr': 0.0003247601980640217, 'samples': 10936832, 'steps': 21360, 'loss/train': 0.4454154670238495} +02/25/2022 05:15:15 - INFO - codeparrot_training - Step 21361: {'lr': 0.0003247445841830446, 'samples': 10937344, 'steps': 21361, 'loss/train': 2.4724366664886475} +02/25/2022 05:15:20 - INFO - codeparrot_training - Step 21362: {'lr': 0.0003247289699818856, 'samples': 10937856, 'steps': 21362, 'loss/train': 2.876556634902954} +02/25/2022 05:15:24 - INFO - codeparrot_training - Step 21363: {'lr': 0.0003247133554606113, 'samples': 10938368, 'steps': 21363, 'loss/train': 1.9389662742614746} +02/25/2022 05:15:29 - INFO - codeparrot_training - Step 21364: {'lr': 0.0003246977406192888, 'samples': 10938880, 'steps': 21364, 'loss/train': 1.802810549736023} +02/25/2022 05:15:33 - INFO - codeparrot_training - Step 21365: {'lr': 0.00032468212545798484, 'samples': 10939392, 'steps': 21365, 'loss/train': 1.1155353784561157} +02/25/2022 05:15:38 - INFO - codeparrot_training - Step 21366: {'lr': 0.0003246665099767664, 'samples': 10939904, 'steps': 21366, 'loss/train': 2.016072988510132} +02/25/2022 05:15:42 - INFO - codeparrot_training - Step 21367: {'lr': 0.0003246508941757004, 'samples': 10940416, 'steps': 21367, 'loss/train': 1.5841889381408691} +02/25/2022 05:15:47 - INFO - codeparrot_training - Step 21368: {'lr': 0.0003246352780548536, 'samples': 10940928, 'steps': 21368, 'loss/train': 1.494071125984192} +02/25/2022 05:15:51 - INFO - codeparrot_training - Step 21369: {'lr': 0.0003246196616142929, 'samples': 10941440, 'steps': 21369, 'loss/train': 1.6460036039352417} +02/25/2022 05:15:56 - INFO - codeparrot_training - Step 21370: {'lr': 0.0003246040448540854, 'samples': 10941952, 'steps': 21370, 'loss/train': 0.8125658631324768} +02/25/2022 05:16:00 - INFO - codeparrot_training - Step 21371: {'lr': 0.00032458842777429776, 'samples': 10942464, 'steps': 21371, 'loss/train': 1.5591201782226562} +02/25/2022 05:16:05 - INFO - codeparrot_training - Step 21372: {'lr': 0.00032457281037499706, 'samples': 10942976, 'steps': 21372, 'loss/train': 1.009738564491272} +02/25/2022 05:16:09 - INFO - codeparrot_training - Step 21373: {'lr': 0.0003245571926562501, 'samples': 10943488, 'steps': 21373, 'loss/train': 2.5773777961730957} +02/25/2022 05:16:15 - INFO - codeparrot_training - Step 21374: {'lr': 0.0003245415746181237, 'samples': 10944000, 'steps': 21374, 'loss/train': 1.5520761013031006} +02/25/2022 05:16:18 - INFO - codeparrot_training - Step 21375: {'lr': 0.00032452595626068504, 'samples': 10944512, 'steps': 21375, 'loss/train': 2.8511366844177246} +02/25/2022 05:16:24 - INFO - codeparrot_training - Step 21376: {'lr': 0.0003245103375840007, 'samples': 10945024, 'steps': 21376, 'loss/train': 2.3014512062072754} +02/25/2022 05:16:27 - INFO - codeparrot_training - Step 21377: {'lr': 0.0003244947185881378, 'samples': 10945536, 'steps': 21377, 'loss/train': 1.6308921575546265} +02/25/2022 05:16:33 - INFO - codeparrot_training - Step 21378: {'lr': 0.00032447909927316317, 'samples': 10946048, 'steps': 21378, 'loss/train': 1.7439149618148804} +02/25/2022 05:16:36 - INFO - codeparrot_training - Step 21379: {'lr': 0.00032446347963914376, 'samples': 10946560, 'steps': 21379, 'loss/train': 0.8212968707084656} +02/25/2022 05:16:42 - INFO - codeparrot_training - Step 21380: {'lr': 0.0003244478596861464, 'samples': 10947072, 'steps': 21380, 'loss/train': 2.1549930572509766} +02/25/2022 05:16:46 - INFO - codeparrot_training - Step 21381: {'lr': 0.000324432239414238, 'samples': 10947584, 'steps': 21381, 'loss/train': 2.2635746002197266} +02/25/2022 05:16:51 - INFO - codeparrot_training - Step 21382: {'lr': 0.0003244166188234856, 'samples': 10948096, 'steps': 21382, 'loss/train': 0.44461098313331604} +02/25/2022 05:16:55 - INFO - codeparrot_training - Step 21383: {'lr': 0.000324400997913956, 'samples': 10948608, 'steps': 21383, 'loss/train': 1.6025444269180298} +02/25/2022 05:17:01 - INFO - codeparrot_training - Step 21384: {'lr': 0.0003243853766857162, 'samples': 10949120, 'steps': 21384, 'loss/train': 2.54162859916687} +02/25/2022 05:17:05 - INFO - codeparrot_training - Step 21385: {'lr': 0.000324369755138833, 'samples': 10949632, 'steps': 21385, 'loss/train': 1.9391682147979736} +02/25/2022 05:17:10 - INFO - codeparrot_training - Step 21386: {'lr': 0.0003243541332733734, 'samples': 10950144, 'steps': 21386, 'loss/train': 2.18705677986145} +02/25/2022 05:17:14 - INFO - codeparrot_training - Step 21387: {'lr': 0.00032433851108940433, 'samples': 10950656, 'steps': 21387, 'loss/train': 3.22505259513855} +02/25/2022 05:17:19 - INFO - codeparrot_training - Step 21388: {'lr': 0.0003243228885869927, 'samples': 10951168, 'steps': 21388, 'loss/train': 3.6134586334228516} +02/25/2022 05:17:23 - INFO - codeparrot_training - Step 21389: {'lr': 0.0003243072657662054, 'samples': 10951680, 'steps': 21389, 'loss/train': 1.7234821319580078} +02/25/2022 05:17:28 - INFO - codeparrot_training - Step 21390: {'lr': 0.00032429164262710934, 'samples': 10952192, 'steps': 21390, 'loss/train': 2.198101758956909} +02/25/2022 05:17:32 - INFO - codeparrot_training - Step 21391: {'lr': 0.0003242760191697714, 'samples': 10952704, 'steps': 21391, 'loss/train': 0.9394028186798096} +02/25/2022 05:17:37 - INFO - codeparrot_training - Step 21392: {'lr': 0.0003242603953942587, 'samples': 10953216, 'steps': 21392, 'loss/train': 1.871528148651123} +02/25/2022 05:17:41 - INFO - codeparrot_training - Step 21393: {'lr': 0.00032424477130063806, 'samples': 10953728, 'steps': 21393, 'loss/train': 1.598361849784851} +02/25/2022 05:17:48 - INFO - codeparrot_training - Step 21394: {'lr': 0.0003242291468889763, 'samples': 10954240, 'steps': 21394, 'loss/train': 2.190145492553711} +02/25/2022 05:17:51 - INFO - codeparrot_training - Step 21395: {'lr': 0.0003242135221593405, 'samples': 10954752, 'steps': 21395, 'loss/train': 1.4833991527557373} +02/25/2022 05:17:57 - INFO - codeparrot_training - Step 21396: {'lr': 0.0003241978971117976, 'samples': 10955264, 'steps': 21396, 'loss/train': 1.6480028629302979} +02/25/2022 05:18:00 - INFO - codeparrot_training - Step 21397: {'lr': 0.0003241822717464144, 'samples': 10955776, 'steps': 21397, 'loss/train': 1.575247883796692} +02/25/2022 05:18:05 - INFO - codeparrot_training - Step 21398: {'lr': 0.0003241666460632579, 'samples': 10956288, 'steps': 21398, 'loss/train': 1.7289868593215942} +02/25/2022 05:18:09 - INFO - codeparrot_training - Step 21399: {'lr': 0.00032415102006239506, 'samples': 10956800, 'steps': 21399, 'loss/train': 1.049124836921692} +02/25/2022 05:18:15 - INFO - codeparrot_training - Step 21400: {'lr': 0.0003241353937438927, 'samples': 10957312, 'steps': 21400, 'loss/train': 2.3949666023254395} +02/25/2022 05:18:18 - INFO - codeparrot_training - Step 21401: {'lr': 0.000324119767107818, 'samples': 10957824, 'steps': 21401, 'loss/train': 1.219030737876892} +02/25/2022 05:18:23 - INFO - codeparrot_training - Step 21402: {'lr': 0.0003241041401542377, 'samples': 10958336, 'steps': 21402, 'loss/train': 1.8786659240722656} +02/25/2022 05:18:27 - INFO - codeparrot_training - Step 21403: {'lr': 0.0003240885128832188, 'samples': 10958848, 'steps': 21403, 'loss/train': 2.7543208599090576} +02/25/2022 05:18:33 - INFO - codeparrot_training - Step 21404: {'lr': 0.0003240728852948281, 'samples': 10959360, 'steps': 21404, 'loss/train': 1.6549605131149292} +02/25/2022 05:18:37 - INFO - codeparrot_training - Step 21405: {'lr': 0.00032405725738913284, 'samples': 10959872, 'steps': 21405, 'loss/train': 2.694563865661621} +02/25/2022 05:18:42 - INFO - codeparrot_training - Step 21406: {'lr': 0.0003240416291661998, 'samples': 10960384, 'steps': 21406, 'loss/train': 1.2894922494888306} +02/25/2022 05:18:46 - INFO - codeparrot_training - Step 21407: {'lr': 0.0003240260006260959, 'samples': 10960896, 'steps': 21407, 'loss/train': 2.1315720081329346} +02/25/2022 05:18:51 - INFO - codeparrot_training - Step 21408: {'lr': 0.0003240103717688881, 'samples': 10961408, 'steps': 21408, 'loss/train': 0.43893662095069885} +02/25/2022 05:18:55 - INFO - codeparrot_training - Step 21409: {'lr': 0.00032399474259464336, 'samples': 10961920, 'steps': 21409, 'loss/train': 1.3445855379104614} +02/25/2022 05:19:00 - INFO - codeparrot_training - Step 21410: {'lr': 0.0003239791131034287, 'samples': 10962432, 'steps': 21410, 'loss/train': 1.8981529474258423} +02/25/2022 05:19:04 - INFO - codeparrot_training - Step 21411: {'lr': 0.00032396348329531097, 'samples': 10962944, 'steps': 21411, 'loss/train': 2.38852596282959} +02/25/2022 05:19:09 - INFO - codeparrot_training - Step 21412: {'lr': 0.0003239478531703571, 'samples': 10963456, 'steps': 21412, 'loss/train': 2.345404863357544} +02/25/2022 05:19:13 - INFO - codeparrot_training - Step 21413: {'lr': 0.0003239322227286343, 'samples': 10963968, 'steps': 21413, 'loss/train': 2.1669015884399414} +02/25/2022 05:19:18 - INFO - codeparrot_training - Step 21414: {'lr': 0.0003239165919702092, 'samples': 10964480, 'steps': 21414, 'loss/train': 2.0201916694641113} +02/25/2022 05:19:22 - INFO - codeparrot_training - Step 21415: {'lr': 0.0003239009608951489, 'samples': 10964992, 'steps': 21415, 'loss/train': 1.5376311540603638} +02/25/2022 05:19:27 - INFO - codeparrot_training - Step 21416: {'lr': 0.0003238853295035203, 'samples': 10965504, 'steps': 21416, 'loss/train': 2.01336932182312} +02/25/2022 05:19:31 - INFO - codeparrot_training - Step 21417: {'lr': 0.0003238696977953905, 'samples': 10966016, 'steps': 21417, 'loss/train': 2.6084911823272705} +02/25/2022 05:19:36 - INFO - codeparrot_training - Step 21418: {'lr': 0.0003238540657708263, 'samples': 10966528, 'steps': 21418, 'loss/train': 1.6179522275924683} +02/25/2022 05:19:40 - INFO - codeparrot_training - Step 21419: {'lr': 0.0003238384334298948, 'samples': 10967040, 'steps': 21419, 'loss/train': 3.299560308456421} +02/25/2022 05:19:46 - INFO - codeparrot_training - Step 21420: {'lr': 0.0003238228007726628, 'samples': 10967552, 'steps': 21420, 'loss/train': 1.4794039726257324} +02/25/2022 05:19:50 - INFO - codeparrot_training - Step 21421: {'lr': 0.00032380716779919745, 'samples': 10968064, 'steps': 21421, 'loss/train': 1.9403265714645386} +02/25/2022 05:19:55 - INFO - codeparrot_training - Step 21422: {'lr': 0.00032379153450956555, 'samples': 10968576, 'steps': 21422, 'loss/train': 1.8774206638336182} +02/25/2022 05:19:59 - INFO - codeparrot_training - Step 21423: {'lr': 0.0003237759009038342, 'samples': 10969088, 'steps': 21423, 'loss/train': 1.940247654914856} +02/25/2022 05:20:04 - INFO - codeparrot_training - Step 21424: {'lr': 0.0003237602669820703, 'samples': 10969600, 'steps': 21424, 'loss/train': 1.4444103240966797} +02/25/2022 05:20:08 - INFO - codeparrot_training - Step 21425: {'lr': 0.00032374463274434097, 'samples': 10970112, 'steps': 21425, 'loss/train': 2.090341091156006} +02/25/2022 05:20:13 - INFO - codeparrot_training - Step 21426: {'lr': 0.0003237289981907129, 'samples': 10970624, 'steps': 21426, 'loss/train': 1.3708947896957397} +02/25/2022 05:20:17 - INFO - codeparrot_training - Step 21427: {'lr': 0.00032371336332125323, 'samples': 10971136, 'steps': 21427, 'loss/train': 1.4762861728668213} +02/25/2022 05:20:22 - INFO - codeparrot_training - Step 21428: {'lr': 0.0003236977281360289, 'samples': 10971648, 'steps': 21428, 'loss/train': 2.433065891265869} +02/25/2022 05:20:26 - INFO - codeparrot_training - Step 21429: {'lr': 0.00032368209263510694, 'samples': 10972160, 'steps': 21429, 'loss/train': 0.7332336902618408} +02/25/2022 05:20:33 - INFO - codeparrot_training - Step 21430: {'lr': 0.00032366645681855435, 'samples': 10972672, 'steps': 21430, 'loss/train': 2.2203409671783447} +02/25/2022 05:20:36 - INFO - codeparrot_training - Step 21431: {'lr': 0.0003236508206864379, 'samples': 10973184, 'steps': 21431, 'loss/train': 1.782835841178894} +02/25/2022 05:20:42 - INFO - codeparrot_training - Step 21432: {'lr': 0.0003236351842388249, 'samples': 10973696, 'steps': 21432, 'loss/train': 2.293973684310913} +02/25/2022 05:20:45 - INFO - codeparrot_training - Step 21433: {'lr': 0.00032361954747578203, 'samples': 10974208, 'steps': 21433, 'loss/train': 1.5398344993591309} +02/25/2022 05:20:51 - INFO - codeparrot_training - Step 21434: {'lr': 0.00032360391039737646, 'samples': 10974720, 'steps': 21434, 'loss/train': 2.6571176052093506} +02/25/2022 05:20:54 - INFO - codeparrot_training - Step 21435: {'lr': 0.00032358827300367504, 'samples': 10975232, 'steps': 21435, 'loss/train': 1.572434902191162} +02/25/2022 05:21:00 - INFO - codeparrot_training - Step 21436: {'lr': 0.0003235726352947449, 'samples': 10975744, 'steps': 21436, 'loss/train': 1.5795124769210815} +02/25/2022 05:21:03 - INFO - codeparrot_training - Step 21437: {'lr': 0.0003235569972706529, 'samples': 10976256, 'steps': 21437, 'loss/train': 1.3008886575698853} +02/25/2022 05:21:09 - INFO - codeparrot_training - Step 21438: {'lr': 0.00032354135893146614, 'samples': 10976768, 'steps': 21438, 'loss/train': 1.5897101163864136} +02/25/2022 05:21:12 - INFO - codeparrot_training - Step 21439: {'lr': 0.0003235257202772515, 'samples': 10977280, 'steps': 21439, 'loss/train': 1.3608970642089844} +02/25/2022 05:21:19 - INFO - codeparrot_training - Step 21440: {'lr': 0.000323510081308076, 'samples': 10977792, 'steps': 21440, 'loss/train': 2.2303550243377686} +02/25/2022 05:21:22 - INFO - codeparrot_training - Step 21441: {'lr': 0.00032349444202400666, 'samples': 10978304, 'steps': 21441, 'loss/train': 2.004150390625} +02/25/2022 05:21:28 - INFO - codeparrot_training - Step 21442: {'lr': 0.0003234788024251105, 'samples': 10978816, 'steps': 21442, 'loss/train': 1.6814168691635132} +02/25/2022 05:21:33 - INFO - codeparrot_training - Step 21443: {'lr': 0.00032346316251145445, 'samples': 10979328, 'steps': 21443, 'loss/train': 1.9356404542922974} +02/25/2022 05:21:37 - INFO - codeparrot_training - Step 21444: {'lr': 0.0003234475222831056, 'samples': 10979840, 'steps': 21444, 'loss/train': 1.8436837196350098} +02/25/2022 05:21:42 - INFO - codeparrot_training - Step 21445: {'lr': 0.0003234318817401309, 'samples': 10980352, 'steps': 21445, 'loss/train': 1.9931660890579224} +02/25/2022 05:21:46 - INFO - codeparrot_training - Step 21446: {'lr': 0.00032341624088259727, 'samples': 10980864, 'steps': 21446, 'loss/train': 1.9036604166030884} +02/25/2022 05:21:49 - INFO - codeparrot_training - Step 21447: {'lr': 0.0003234005997105718, 'samples': 10981376, 'steps': 21447, 'loss/train': 1.9068318605422974} +02/25/2022 05:21:55 - INFO - codeparrot_training - Step 21448: {'lr': 0.0003233849582241214, 'samples': 10981888, 'steps': 21448, 'loss/train': 2.5308852195739746} +02/25/2022 05:21:58 - INFO - codeparrot_training - Step 21449: {'lr': 0.0003233693164233132, 'samples': 10982400, 'steps': 21449, 'loss/train': 0.7553144097328186} +02/25/2022 05:22:04 - INFO - codeparrot_training - Step 21450: {'lr': 0.00032335367430821416, 'samples': 10982912, 'steps': 21450, 'loss/train': 2.3498120307922363} +02/25/2022 05:22:07 - INFO - codeparrot_training - Step 21451: {'lr': 0.00032333803187889135, 'samples': 10983424, 'steps': 21451, 'loss/train': 2.21490478515625} +02/25/2022 05:22:14 - INFO - codeparrot_training - Step 21452: {'lr': 0.0003233223891354116, 'samples': 10983936, 'steps': 21452, 'loss/train': 1.1874514818191528} +02/25/2022 05:22:18 - INFO - codeparrot_training - Step 21453: {'lr': 0.000323306746077842, 'samples': 10984448, 'steps': 21453, 'loss/train': 1.4692349433898926} +02/25/2022 05:22:23 - INFO - codeparrot_training - Step 21454: {'lr': 0.00032329110270624956, 'samples': 10984960, 'steps': 21454, 'loss/train': 2.027933120727539} +02/25/2022 05:22:27 - INFO - codeparrot_training - Step 21455: {'lr': 0.00032327545902070137, 'samples': 10985472, 'steps': 21455, 'loss/train': 2.1449429988861084} +02/25/2022 05:22:32 - INFO - codeparrot_training - Step 21456: {'lr': 0.00032325981502126435, 'samples': 10985984, 'steps': 21456, 'loss/train': 2.320558786392212} +02/25/2022 05:22:36 - INFO - codeparrot_training - Step 21457: {'lr': 0.0003232441707080056, 'samples': 10986496, 'steps': 21457, 'loss/train': 3.3822484016418457} +02/25/2022 05:22:41 - INFO - codeparrot_training - Step 21458: {'lr': 0.00032322852608099203, 'samples': 10987008, 'steps': 21458, 'loss/train': 0.48733872175216675} +02/25/2022 05:22:45 - INFO - codeparrot_training - Step 21459: {'lr': 0.00032321288114029074, 'samples': 10987520, 'steps': 21459, 'loss/train': 2.1243081092834473} +02/25/2022 05:22:50 - INFO - codeparrot_training - Step 21460: {'lr': 0.00032319723588596875, 'samples': 10988032, 'steps': 21460, 'loss/train': 1.4248861074447632} +02/25/2022 05:22:54 - INFO - codeparrot_training - Step 21461: {'lr': 0.00032318159031809293, 'samples': 10988544, 'steps': 21461, 'loss/train': 1.7653262615203857} +02/25/2022 05:22:59 - INFO - codeparrot_training - Step 21462: {'lr': 0.00032316594443673047, 'samples': 10989056, 'steps': 21462, 'loss/train': 1.8202306032180786} +02/25/2022 05:23:03 - INFO - codeparrot_training - Step 21463: {'lr': 0.0003231502982419483, 'samples': 10989568, 'steps': 21463, 'loss/train': 1.9894782304763794} +02/25/2022 05:23:08 - INFO - codeparrot_training - Step 21464: {'lr': 0.0003231346517338135, 'samples': 10990080, 'steps': 21464, 'loss/train': 1.9325813055038452} +02/25/2022 05:23:12 - INFO - codeparrot_training - Step 21465: {'lr': 0.0003231190049123931, 'samples': 10990592, 'steps': 21465, 'loss/train': 1.9395051002502441} +02/25/2022 05:23:18 - INFO - codeparrot_training - Step 21466: {'lr': 0.00032310335777775413, 'samples': 10991104, 'steps': 21466, 'loss/train': 2.093909502029419} +02/25/2022 05:23:21 - INFO - codeparrot_training - Step 21467: {'lr': 0.00032308771032996353, 'samples': 10991616, 'steps': 21467, 'loss/train': 2.245347499847412} +02/25/2022 05:23:27 - INFO - codeparrot_training - Step 21468: {'lr': 0.0003230720625690884, 'samples': 10992128, 'steps': 21468, 'loss/train': 1.9406821727752686} +02/25/2022 05:23:30 - INFO - codeparrot_training - Step 21469: {'lr': 0.0003230564144951958, 'samples': 10992640, 'steps': 21469, 'loss/train': 2.02470064163208} +02/25/2022 05:23:36 - INFO - codeparrot_training - Step 21470: {'lr': 0.00032304076610835267, 'samples': 10993152, 'steps': 21470, 'loss/train': 0.7104726433753967} +02/25/2022 05:23:42 - INFO - codeparrot_training - Step 21471: {'lr': 0.0003230251174086261, 'samples': 10993664, 'steps': 21471, 'loss/train': 2.2231509685516357} +02/25/2022 05:23:45 - INFO - codeparrot_training - Step 21472: {'lr': 0.0003230094683960831, 'samples': 10994176, 'steps': 21472, 'loss/train': 2.0974111557006836} +02/25/2022 05:23:49 - INFO - codeparrot_training - Step 21473: {'lr': 0.0003229938190707908, 'samples': 10994688, 'steps': 21473, 'loss/train': 2.2401421070098877} +02/25/2022 05:23:54 - INFO - codeparrot_training - Step 21474: {'lr': 0.00032297816943281605, 'samples': 10995200, 'steps': 21474, 'loss/train': 1.2822587490081787} +02/25/2022 05:23:58 - INFO - codeparrot_training - Step 21475: {'lr': 0.00032296251948222605, 'samples': 10995712, 'steps': 21475, 'loss/train': 1.9859706163406372} +02/25/2022 05:24:04 - INFO - codeparrot_training - Step 21476: {'lr': 0.0003229468692190878, 'samples': 10996224, 'steps': 21476, 'loss/train': 1.5835591554641724} +02/25/2022 05:24:07 - INFO - codeparrot_training - Step 21477: {'lr': 0.00032293121864346823, 'samples': 10996736, 'steps': 21477, 'loss/train': 2.6338560581207275} +02/25/2022 05:24:13 - INFO - codeparrot_training - Step 21478: {'lr': 0.00032291556775543463, 'samples': 10997248, 'steps': 21478, 'loss/train': 0.9179028868675232} +02/25/2022 05:24:16 - INFO - codeparrot_training - Step 21479: {'lr': 0.0003228999165550537, 'samples': 10997760, 'steps': 21479, 'loss/train': 9.65449333190918} +02/25/2022 05:24:22 - INFO - codeparrot_training - Step 21480: {'lr': 0.0003228842650423929, 'samples': 10998272, 'steps': 21480, 'loss/train': 2.430394411087036} +02/25/2022 05:24:25 - INFO - codeparrot_training - Step 21481: {'lr': 0.0003228686132175189, 'samples': 10998784, 'steps': 21481, 'loss/train': 2.339261531829834} +02/25/2022 05:24:31 - INFO - codeparrot_training - Step 21482: {'lr': 0.0003228529610804989, 'samples': 10999296, 'steps': 21482, 'loss/train': 1.9135621786117554} +02/25/2022 05:24:34 - INFO - codeparrot_training - Step 21483: {'lr': 0.00032283730863140003, 'samples': 10999808, 'steps': 21483, 'loss/train': 2.3155276775360107} +02/25/2022 05:24:40 - INFO - codeparrot_training - Step 21484: {'lr': 0.0003228216558702892, 'samples': 11000320, 'steps': 21484, 'loss/train': 0.3337833285331726} +02/25/2022 05:24:44 - INFO - codeparrot_training - Step 21485: {'lr': 0.00032280600279723355, 'samples': 11000832, 'steps': 21485, 'loss/train': 2.135376214981079} +02/25/2022 05:24:51 - INFO - codeparrot_training - Step 21486: {'lr': 0.00032279034941230014, 'samples': 11001344, 'steps': 21486, 'loss/train': 2.3484268188476562} +02/25/2022 05:24:54 - INFO - codeparrot_training - Step 21487: {'lr': 0.00032277469571555587, 'samples': 11001856, 'steps': 21487, 'loss/train': 1.3024400472640991} +02/25/2022 05:24:58 - INFO - codeparrot_training - Step 21488: {'lr': 0.0003227590417070679, 'samples': 11002368, 'steps': 21488, 'loss/train': 2.356600522994995} +02/25/2022 05:25:03 - INFO - codeparrot_training - Step 21489: {'lr': 0.00032274338738690344, 'samples': 11002880, 'steps': 21489, 'loss/train': 2.263683319091797} +02/25/2022 05:25:07 - INFO - codeparrot_training - Step 21490: {'lr': 0.00032272773275512933, 'samples': 11003392, 'steps': 21490, 'loss/train': 1.0298773050308228} +02/25/2022 05:25:12 - INFO - codeparrot_training - Step 21491: {'lr': 0.0003227120778118127, 'samples': 11003904, 'steps': 21491, 'loss/train': 2.260049343109131} +02/25/2022 05:25:16 - INFO - codeparrot_training - Step 21492: {'lr': 0.00032269642255702065, 'samples': 11004416, 'steps': 21492, 'loss/train': 0.27687039971351624} +02/25/2022 05:25:21 - INFO - codeparrot_training - Step 21493: {'lr': 0.00032268076699082024, 'samples': 11004928, 'steps': 21493, 'loss/train': 2.0938594341278076} +02/25/2022 05:25:25 - INFO - codeparrot_training - Step 21494: {'lr': 0.0003226651111132784, 'samples': 11005440, 'steps': 21494, 'loss/train': 1.9103293418884277} +02/25/2022 05:25:30 - INFO - codeparrot_training - Step 21495: {'lr': 0.0003226494549244624, 'samples': 11005952, 'steps': 21495, 'loss/train': 2.0065977573394775} +02/25/2022 05:25:34 - INFO - codeparrot_training - Step 21496: {'lr': 0.00032263379842443915, 'samples': 11006464, 'steps': 21496, 'loss/train': 2.1660172939300537} +02/25/2022 05:25:39 - INFO - codeparrot_training - Step 21497: {'lr': 0.0003226181416132758, 'samples': 11006976, 'steps': 21497, 'loss/train': 1.0341888666152954} +02/25/2022 05:25:43 - INFO - codeparrot_training - Step 21498: {'lr': 0.00032260248449103937, 'samples': 11007488, 'steps': 21498, 'loss/train': 1.4313536882400513} +02/25/2022 05:25:50 - INFO - codeparrot_training - Step 21499: {'lr': 0.00032258682705779695, 'samples': 11008000, 'steps': 21499, 'loss/train': 1.1632885932922363} +02/25/2022 05:25:53 - INFO - codeparrot_training - Step 21500: {'lr': 0.00032257116931361555, 'samples': 11008512, 'steps': 21500, 'loss/train': 1.889289379119873} +02/25/2022 05:25:59 - INFO - codeparrot_training - Step 21501: {'lr': 0.0003225555112585624, 'samples': 11009024, 'steps': 21501, 'loss/train': 2.3959970474243164} +02/25/2022 05:26:04 - INFO - codeparrot_training - Step 21502: {'lr': 0.0003225398528927045, 'samples': 11009536, 'steps': 21502, 'loss/train': 1.7308250665664673} +02/25/2022 05:26:08 - INFO - codeparrot_training - Step 21503: {'lr': 0.00032252419421610883, 'samples': 11010048, 'steps': 21503, 'loss/train': 1.6846668720245361} +02/25/2022 05:26:13 - INFO - codeparrot_training - Step 21504: {'lr': 0.0003225085352288426, 'samples': 11010560, 'steps': 21504, 'loss/train': 1.2956887483596802} +02/25/2022 05:26:17 - INFO - codeparrot_training - Step 21505: {'lr': 0.00032249287593097274, 'samples': 11011072, 'steps': 21505, 'loss/train': 0.7393494844436646} +02/25/2022 05:26:22 - INFO - codeparrot_training - Step 21506: {'lr': 0.00032247721632256657, 'samples': 11011584, 'steps': 21506, 'loss/train': 2.6542041301727295} +02/25/2022 05:26:25 - INFO - codeparrot_training - Step 21507: {'lr': 0.0003224615564036908, 'samples': 11012096, 'steps': 21507, 'loss/train': 1.7451930046081543} +02/25/2022 05:26:32 - INFO - codeparrot_training - Step 21508: {'lr': 0.00032244589617441287, 'samples': 11012608, 'steps': 21508, 'loss/train': 0.37942302227020264} +02/25/2022 05:26:35 - INFO - codeparrot_training - Step 21509: {'lr': 0.0003224302356347997, 'samples': 11013120, 'steps': 21509, 'loss/train': 1.7924957275390625} +02/25/2022 05:26:41 - INFO - codeparrot_training - Step 21510: {'lr': 0.0003224145747849185, 'samples': 11013632, 'steps': 21510, 'loss/train': 1.631548285484314} +02/25/2022 05:26:44 - INFO - codeparrot_training - Step 21511: {'lr': 0.0003223989136248361, 'samples': 11014144, 'steps': 21511, 'loss/train': 2.365044355392456} +02/25/2022 05:26:50 - INFO - codeparrot_training - Step 21512: {'lr': 0.0003223832521546198, 'samples': 11014656, 'steps': 21512, 'loss/train': 2.034379005432129} +02/25/2022 05:26:53 - INFO - codeparrot_training - Step 21513: {'lr': 0.0003223675903743366, 'samples': 11015168, 'steps': 21513, 'loss/train': 1.1576682329177856} +02/25/2022 05:26:59 - INFO - codeparrot_training - Step 21514: {'lr': 0.0003223519282840537, 'samples': 11015680, 'steps': 21514, 'loss/train': 2.333731174468994} +02/25/2022 05:27:02 - INFO - codeparrot_training - Step 21515: {'lr': 0.00032233626588383806, 'samples': 11016192, 'steps': 21515, 'loss/train': 1.3007678985595703} +02/25/2022 05:27:08 - INFO - codeparrot_training - Step 21516: {'lr': 0.00032232060317375684, 'samples': 11016704, 'steps': 21516, 'loss/train': 2.1323704719543457} +02/25/2022 05:27:11 - INFO - codeparrot_training - Step 21517: {'lr': 0.00032230494015387715, 'samples': 11017216, 'steps': 21517, 'loss/train': 3.6352202892303467} +02/25/2022 05:27:17 - INFO - codeparrot_training - Step 21518: {'lr': 0.000322289276824266, 'samples': 11017728, 'steps': 21518, 'loss/train': 2.469447612762451} +02/25/2022 05:27:20 - INFO - codeparrot_training - Step 21519: {'lr': 0.0003222736131849906, 'samples': 11018240, 'steps': 21519, 'loss/train': 2.392763614654541} +02/25/2022 05:27:26 - INFO - codeparrot_training - Step 21520: {'lr': 0.0003222579492361179, 'samples': 11018752, 'steps': 21520, 'loss/train': 2.1672446727752686} +02/25/2022 05:27:29 - INFO - codeparrot_training - Step 21521: {'lr': 0.0003222422849777152, 'samples': 11019264, 'steps': 21521, 'loss/train': 1.3211790323257446} +02/25/2022 05:27:36 - INFO - codeparrot_training - Step 21522: {'lr': 0.0003222266204098494, 'samples': 11019776, 'steps': 21522, 'loss/train': 1.3548214435577393} +02/25/2022 05:27:39 - INFO - codeparrot_training - Step 21523: {'lr': 0.0003222109555325877, 'samples': 11020288, 'steps': 21523, 'loss/train': 2.0841355323791504} +02/25/2022 05:27:45 - INFO - codeparrot_training - Step 21524: {'lr': 0.00032219529034599725, 'samples': 11020800, 'steps': 21524, 'loss/train': 1.3415675163269043} +02/25/2022 05:27:48 - INFO - codeparrot_training - Step 21525: {'lr': 0.00032217962485014506, 'samples': 11021312, 'steps': 21525, 'loss/train': 1.6736340522766113} +02/25/2022 05:27:54 - INFO - codeparrot_training - Step 21526: {'lr': 0.0003221639590450983, 'samples': 11021824, 'steps': 21526, 'loss/train': 2.2644736766815186} +02/25/2022 05:27:57 - INFO - codeparrot_training - Step 21527: {'lr': 0.00032214829293092406, 'samples': 11022336, 'steps': 21527, 'loss/train': 1.2068449258804321} +02/25/2022 05:28:03 - INFO - codeparrot_training - Step 21528: {'lr': 0.0003221326265076894, 'samples': 11022848, 'steps': 21528, 'loss/train': 1.7141304016113281} +02/25/2022 05:28:06 - INFO - codeparrot_training - Step 21529: {'lr': 0.00032211695977546153, 'samples': 11023360, 'steps': 21529, 'loss/train': 1.680727243423462} +02/25/2022 05:28:12 - INFO - codeparrot_training - Step 21530: {'lr': 0.0003221012927343075, 'samples': 11023872, 'steps': 21530, 'loss/train': 1.3877259492874146} +02/25/2022 05:28:15 - INFO - codeparrot_training - Step 21531: {'lr': 0.0003220856253842944, 'samples': 11024384, 'steps': 21531, 'loss/train': 2.5571298599243164} +02/25/2022 05:28:21 - INFO - codeparrot_training - Step 21532: {'lr': 0.00032206995772548943, 'samples': 11024896, 'steps': 21532, 'loss/train': 2.1010231971740723} +02/25/2022 05:28:25 - INFO - codeparrot_training - Step 21533: {'lr': 0.00032205428975795955, 'samples': 11025408, 'steps': 21533, 'loss/train': 1.8804799318313599} +02/25/2022 05:28:28 - INFO - codeparrot_training - Step 21534: {'lr': 0.000322038621481772, 'samples': 11025920, 'steps': 21534, 'loss/train': 2.0093066692352295} +02/25/2022 05:28:35 - INFO - codeparrot_training - Step 21535: {'lr': 0.0003220229528969939, 'samples': 11026432, 'steps': 21535, 'loss/train': 1.9713863134384155} +02/25/2022 05:28:40 - INFO - codeparrot_training - Step 21536: {'lr': 0.00032200728400369233, 'samples': 11026944, 'steps': 21536, 'loss/train': 1.868619680404663} +02/25/2022 05:28:44 - INFO - codeparrot_training - Step 21537: {'lr': 0.0003219916148019344, 'samples': 11027456, 'steps': 21537, 'loss/train': 2.5183682441711426} +02/25/2022 05:28:50 - INFO - codeparrot_training - Step 21538: {'lr': 0.0003219759452917872, 'samples': 11027968, 'steps': 21538, 'loss/train': 2.6670279502868652} +02/25/2022 05:28:53 - INFO - codeparrot_training - Step 21539: {'lr': 0.000321960275473318, 'samples': 11028480, 'steps': 21539, 'loss/train': 1.315918207168579} +02/25/2022 05:28:57 - INFO - codeparrot_training - Step 21540: {'lr': 0.0003219446053465938, 'samples': 11028992, 'steps': 21540, 'loss/train': 1.5764955282211304} +02/25/2022 05:29:02 - INFO - codeparrot_training - Step 21541: {'lr': 0.0003219289349116818, 'samples': 11029504, 'steps': 21541, 'loss/train': 2.478363513946533} +02/25/2022 05:29:06 - INFO - codeparrot_training - Step 21542: {'lr': 0.0003219132641686491, 'samples': 11030016, 'steps': 21542, 'loss/train': 3.195913791656494} +02/25/2022 05:29:11 - INFO - codeparrot_training - Step 21543: {'lr': 0.0003218975931175627, 'samples': 11030528, 'steps': 21543, 'loss/train': 2.328716993331909} +02/25/2022 05:29:15 - INFO - codeparrot_training - Step 21544: {'lr': 0.0003218819217584899, 'samples': 11031040, 'steps': 21544, 'loss/train': 1.7310285568237305} +02/25/2022 05:29:21 - INFO - codeparrot_training - Step 21545: {'lr': 0.0003218662500914977, 'samples': 11031552, 'steps': 21545, 'loss/train': 2.249018907546997} +02/25/2022 05:29:25 - INFO - codeparrot_training - Step 21546: {'lr': 0.0003218505781166534, 'samples': 11032064, 'steps': 21546, 'loss/train': 1.839613676071167} +02/25/2022 05:29:30 - INFO - codeparrot_training - Step 21547: {'lr': 0.000321834905834024, 'samples': 11032576, 'steps': 21547, 'loss/train': 0.6879871487617493} +02/25/2022 05:29:34 - INFO - codeparrot_training - Step 21548: {'lr': 0.00032181923324367675, 'samples': 11033088, 'steps': 21548, 'loss/train': 2.6076583862304688} +02/25/2022 05:29:40 - INFO - codeparrot_training - Step 21549: {'lr': 0.0003218035603456786, 'samples': 11033600, 'steps': 21549, 'loss/train': 0.9620224833488464} +02/25/2022 05:29:43 - INFO - codeparrot_training - Step 21550: {'lr': 0.00032178788714009687, 'samples': 11034112, 'steps': 21550, 'loss/train': 2.3495805263519287} +02/25/2022 05:29:48 - INFO - codeparrot_training - Step 21551: {'lr': 0.00032177221362699853, 'samples': 11034624, 'steps': 21551, 'loss/train': 2.0556299686431885} +02/25/2022 05:29:52 - INFO - codeparrot_training - Step 21552: {'lr': 0.0003217565398064509, 'samples': 11035136, 'steps': 21552, 'loss/train': 1.6750190258026123} +02/25/2022 05:29:57 - INFO - codeparrot_training - Step 21553: {'lr': 0.000321740865678521, 'samples': 11035648, 'steps': 21553, 'loss/train': 2.536710500717163} +02/25/2022 05:30:01 - INFO - codeparrot_training - Step 21554: {'lr': 0.00032172519124327607, 'samples': 11036160, 'steps': 21554, 'loss/train': 2.2535555362701416} +02/25/2022 05:30:08 - INFO - codeparrot_training - Step 21555: {'lr': 0.00032170951650078316, 'samples': 11036672, 'steps': 21555, 'loss/train': 1.9997724294662476} +02/25/2022 05:30:11 - INFO - codeparrot_training - Step 21556: {'lr': 0.0003216938414511095, 'samples': 11037184, 'steps': 21556, 'loss/train': 0.8170745372772217} +02/25/2022 05:30:17 - INFO - codeparrot_training - Step 21557: {'lr': 0.0003216781660943221, 'samples': 11037696, 'steps': 21557, 'loss/train': 3.0403549671173096} +02/25/2022 05:30:20 - INFO - codeparrot_training - Step 21558: {'lr': 0.0003216624904304882, 'samples': 11038208, 'steps': 21558, 'loss/train': 0.5783214569091797} +02/25/2022 05:30:26 - INFO - codeparrot_training - Step 21559: {'lr': 0.000321646814459675, 'samples': 11038720, 'steps': 21559, 'loss/train': 1.0909634828567505} +02/25/2022 05:30:30 - INFO - codeparrot_training - Step 21560: {'lr': 0.0003216311381819496, 'samples': 11039232, 'steps': 21560, 'loss/train': 1.8134702444076538} +02/25/2022 05:30:35 - INFO - codeparrot_training - Step 21561: {'lr': 0.00032161546159737917, 'samples': 11039744, 'steps': 21561, 'loss/train': 1.2008908987045288} +02/25/2022 05:30:39 - INFO - codeparrot_training - Step 21562: {'lr': 0.0003215997847060307, 'samples': 11040256, 'steps': 21562, 'loss/train': 1.9453626871109009} +02/25/2022 05:30:44 - INFO - codeparrot_training - Step 21563: {'lr': 0.00032158410750797163, 'samples': 11040768, 'steps': 21563, 'loss/train': 2.7384119033813477} +02/25/2022 05:30:48 - INFO - codeparrot_training - Step 21564: {'lr': 0.000321568430003269, 'samples': 11041280, 'steps': 21564, 'loss/train': 2.250610589981079} +02/25/2022 05:30:53 - INFO - codeparrot_training - Step 21565: {'lr': 0.00032155275219198986, 'samples': 11041792, 'steps': 21565, 'loss/train': 2.0307776927948} +02/25/2022 05:30:57 - INFO - codeparrot_training - Step 21566: {'lr': 0.0003215370740742014, 'samples': 11042304, 'steps': 21566, 'loss/train': 2.0014281272888184} +02/25/2022 05:31:02 - INFO - codeparrot_training - Step 21567: {'lr': 0.00032152139564997097, 'samples': 11042816, 'steps': 21567, 'loss/train': 2.3043441772460938} +02/25/2022 05:31:06 - INFO - codeparrot_training - Step 21568: {'lr': 0.0003215057169193655, 'samples': 11043328, 'steps': 21568, 'loss/train': 1.6961584091186523} +02/25/2022 05:31:12 - INFO - codeparrot_training - Step 21569: {'lr': 0.00032149003788245223, 'samples': 11043840, 'steps': 21569, 'loss/train': 1.4521780014038086} +02/25/2022 05:31:15 - INFO - codeparrot_training - Step 21570: {'lr': 0.0003214743585392984, 'samples': 11044352, 'steps': 21570, 'loss/train': 2.091576337814331} +02/25/2022 05:31:21 - INFO - codeparrot_training - Step 21571: {'lr': 0.0003214586788899711, 'samples': 11044864, 'steps': 21571, 'loss/train': 0.19392921030521393} +02/25/2022 05:31:24 - INFO - codeparrot_training - Step 21572: {'lr': 0.00032144299893453743, 'samples': 11045376, 'steps': 21572, 'loss/train': 2.0229432582855225} +02/25/2022 05:31:30 - INFO - codeparrot_training - Step 21573: {'lr': 0.00032142731867306466, 'samples': 11045888, 'steps': 21573, 'loss/train': 1.1942694187164307} +02/25/2022 05:31:33 - INFO - codeparrot_training - Step 21574: {'lr': 0.00032141163810562, 'samples': 11046400, 'steps': 21574, 'loss/train': 1.114532470703125} +02/25/2022 05:31:39 - INFO - codeparrot_training - Step 21575: {'lr': 0.00032139595723227054, 'samples': 11046912, 'steps': 21575, 'loss/train': 1.5500388145446777} +02/25/2022 05:31:43 - INFO - codeparrot_training - Step 21576: {'lr': 0.0003213802760530835, 'samples': 11047424, 'steps': 21576, 'loss/train': 1.2036854028701782} +02/25/2022 05:31:48 - INFO - codeparrot_training - Step 21577: {'lr': 0.000321364594568126, 'samples': 11047936, 'steps': 21577, 'loss/train': 1.8385207653045654} +02/25/2022 05:31:51 - INFO - codeparrot_training - Step 21578: {'lr': 0.00032134891277746527, 'samples': 11048448, 'steps': 21578, 'loss/train': 1.7082643508911133} +02/25/2022 05:31:58 - INFO - codeparrot_training - Step 21579: {'lr': 0.0003213332306811684, 'samples': 11048960, 'steps': 21579, 'loss/train': 1.6035127639770508} +02/25/2022 05:32:01 - INFO - codeparrot_training - Step 21580: {'lr': 0.0003213175482793026, 'samples': 11049472, 'steps': 21580, 'loss/train': 2.3959131240844727} +02/25/2022 05:32:07 - INFO - codeparrot_training - Step 21581: {'lr': 0.00032130186557193506, 'samples': 11049984, 'steps': 21581, 'loss/train': 2.015110969543457} +02/25/2022 05:32:10 - INFO - codeparrot_training - Step 21582: {'lr': 0.0003212861825591331, 'samples': 11050496, 'steps': 21582, 'loss/train': 1.8352669477462769} +02/25/2022 05:32:16 - INFO - codeparrot_training - Step 21583: {'lr': 0.00032127049924096364, 'samples': 11051008, 'steps': 21583, 'loss/train': 1.3586941957473755} +02/25/2022 05:32:19 - INFO - codeparrot_training - Step 21584: {'lr': 0.00032125481561749405, 'samples': 11051520, 'steps': 21584, 'loss/train': 2.635873556137085} +02/25/2022 05:32:25 - INFO - codeparrot_training - Step 21585: {'lr': 0.00032123913168879146, 'samples': 11052032, 'steps': 21585, 'loss/train': 1.7228937149047852} +02/25/2022 05:32:29 - INFO - codeparrot_training - Step 21586: {'lr': 0.00032122344745492303, 'samples': 11052544, 'steps': 21586, 'loss/train': 2.5237181186676025} +02/25/2022 05:32:34 - INFO - codeparrot_training - Step 21587: {'lr': 0.00032120776291595594, 'samples': 11053056, 'steps': 21587, 'loss/train': 1.6375033855438232} +02/25/2022 05:32:38 - INFO - codeparrot_training - Step 21588: {'lr': 0.00032119207807195747, 'samples': 11053568, 'steps': 21588, 'loss/train': 2.575406551361084} +02/25/2022 05:32:41 - INFO - codeparrot_training - Step 21589: {'lr': 0.0003211763929229947, 'samples': 11054080, 'steps': 21589, 'loss/train': 1.9104794263839722} +02/25/2022 05:32:47 - INFO - codeparrot_training - Step 21590: {'lr': 0.00032116070746913484, 'samples': 11054592, 'steps': 21590, 'loss/train': 1.5984069108963013} +02/25/2022 05:32:50 - INFO - codeparrot_training - Step 21591: {'lr': 0.0003211450217104452, 'samples': 11055104, 'steps': 21591, 'loss/train': 1.8124760389328003} +02/25/2022 05:32:56 - INFO - codeparrot_training - Step 21592: {'lr': 0.00032112933564699275, 'samples': 11055616, 'steps': 21592, 'loss/train': 1.4645565748214722} +02/25/2022 05:33:00 - INFO - codeparrot_training - Step 21593: {'lr': 0.0003211136492788449, 'samples': 11056128, 'steps': 21593, 'loss/train': 2.189774990081787} +02/25/2022 05:33:05 - INFO - codeparrot_training - Step 21594: {'lr': 0.0003210979626060687, 'samples': 11056640, 'steps': 21594, 'loss/train': 2.0911288261413574} +02/25/2022 05:33:09 - INFO - codeparrot_training - Step 21595: {'lr': 0.00032108227562873147, 'samples': 11057152, 'steps': 21595, 'loss/train': 1.86595618724823} +02/25/2022 05:33:14 - INFO - codeparrot_training - Step 21596: {'lr': 0.0003210665883469003, 'samples': 11057664, 'steps': 21596, 'loss/train': 1.6551841497421265} +02/25/2022 05:33:18 - INFO - codeparrot_training - Step 21597: {'lr': 0.0003210509007606424, 'samples': 11058176, 'steps': 21597, 'loss/train': 1.5344195365905762} +02/25/2022 05:33:23 - INFO - codeparrot_training - Step 21598: {'lr': 0.00032103521287002505, 'samples': 11058688, 'steps': 21598, 'loss/train': 1.24522066116333} +02/25/2022 05:33:26 - INFO - codeparrot_training - Step 21599: {'lr': 0.0003210195246751154, 'samples': 11059200, 'steps': 21599, 'loss/train': 1.8943443298339844} +02/25/2022 05:33:32 - INFO - codeparrot_training - Step 21600: {'lr': 0.0003210038361759807, 'samples': 11059712, 'steps': 21600, 'loss/train': 1.5027269124984741} +02/25/2022 05:33:35 - INFO - codeparrot_training - Step 21601: {'lr': 0.000320988147372688, 'samples': 11060224, 'steps': 21601, 'loss/train': 1.9278641939163208} +02/25/2022 05:33:42 - INFO - codeparrot_training - Step 21602: {'lr': 0.00032097245826530476, 'samples': 11060736, 'steps': 21602, 'loss/train': 1.8387091159820557} +02/25/2022 05:33:45 - INFO - codeparrot_training - Step 21603: {'lr': 0.00032095676885389793, 'samples': 11061248, 'steps': 21603, 'loss/train': 2.8980751037597656} +02/25/2022 05:33:51 - INFO - codeparrot_training - Step 21604: {'lr': 0.00032094107913853485, 'samples': 11061760, 'steps': 21604, 'loss/train': 1.0872629880905151} +02/25/2022 05:33:54 - INFO - codeparrot_training - Step 21605: {'lr': 0.00032092538911928276, 'samples': 11062272, 'steps': 21605, 'loss/train': 1.9053674936294556} +02/25/2022 05:34:00 - INFO - codeparrot_training - Step 21606: {'lr': 0.00032090969879620886, 'samples': 11062784, 'steps': 21606, 'loss/train': 1.8309128284454346} +02/25/2022 05:34:03 - INFO - codeparrot_training - Step 21607: {'lr': 0.00032089400816938016, 'samples': 11063296, 'steps': 21607, 'loss/train': 2.072174072265625} +02/25/2022 05:34:09 - INFO - codeparrot_training - Step 21608: {'lr': 0.0003208783172388642, 'samples': 11063808, 'steps': 21608, 'loss/train': 1.8215605020523071} +02/25/2022 05:34:12 - INFO - codeparrot_training - Step 21609: {'lr': 0.000320862626004728, 'samples': 11064320, 'steps': 21609, 'loss/train': 1.2355942726135254} +02/25/2022 05:34:18 - INFO - codeparrot_training - Step 21610: {'lr': 0.00032084693446703875, 'samples': 11064832, 'steps': 21610, 'loss/train': 1.642086386680603} +02/25/2022 05:34:21 - INFO - codeparrot_training - Step 21611: {'lr': 0.00032083124262586384, 'samples': 11065344, 'steps': 21611, 'loss/train': 2.1018474102020264} +02/25/2022 05:34:27 - INFO - codeparrot_training - Step 21612: {'lr': 0.0003208155504812703, 'samples': 11065856, 'steps': 21612, 'loss/train': 2.3018088340759277} +02/25/2022 05:34:30 - INFO - codeparrot_training - Step 21613: {'lr': 0.00032079985803332546, 'samples': 11066368, 'steps': 21613, 'loss/train': 2.333974838256836} +02/25/2022 05:34:36 - INFO - codeparrot_training - Step 21614: {'lr': 0.0003207841652820964, 'samples': 11066880, 'steps': 21614, 'loss/train': 2.717080593109131} +02/25/2022 05:34:39 - INFO - codeparrot_training - Step 21615: {'lr': 0.0003207684722276506, 'samples': 11067392, 'steps': 21615, 'loss/train': 1.194378137588501} +02/25/2022 05:34:46 - INFO - codeparrot_training - Step 21616: {'lr': 0.00032075277887005503, 'samples': 11067904, 'steps': 21616, 'loss/train': 1.5053054094314575} +02/25/2022 05:34:49 - INFO - codeparrot_training - Step 21617: {'lr': 0.0003207370852093771, 'samples': 11068416, 'steps': 21617, 'loss/train': 2.3491132259368896} +02/25/2022 05:34:55 - INFO - codeparrot_training - Step 21618: {'lr': 0.00032072139124568396, 'samples': 11068928, 'steps': 21618, 'loss/train': 1.3829879760742188} +02/25/2022 05:34:58 - INFO - codeparrot_training - Step 21619: {'lr': 0.0003207056969790428, 'samples': 11069440, 'steps': 21619, 'loss/train': 1.9224377870559692} +02/25/2022 05:35:04 - INFO - codeparrot_training - Step 21620: {'lr': 0.0003206900024095208, 'samples': 11069952, 'steps': 21620, 'loss/train': 0.983349621295929} +02/25/2022 05:35:07 - INFO - codeparrot_training - Step 21621: {'lr': 0.0003206743075371854, 'samples': 11070464, 'steps': 21621, 'loss/train': 1.8232043981552124} +02/25/2022 05:35:13 - INFO - codeparrot_training - Step 21622: {'lr': 0.0003206586123621037, 'samples': 11070976, 'steps': 21622, 'loss/train': 1.5382137298583984} +02/25/2022 05:35:17 - INFO - codeparrot_training - Step 21623: {'lr': 0.00032064291688434286, 'samples': 11071488, 'steps': 21623, 'loss/train': 2.208237409591675} +02/25/2022 05:35:22 - INFO - codeparrot_training - Step 21624: {'lr': 0.00032062722110397034, 'samples': 11072000, 'steps': 21624, 'loss/train': 2.0548887252807617} +02/25/2022 05:35:26 - INFO - codeparrot_training - Step 21625: {'lr': 0.0003206115250210531, 'samples': 11072512, 'steps': 21625, 'loss/train': 2.4550108909606934} +02/25/2022 05:35:32 - INFO - codeparrot_training - Step 21626: {'lr': 0.00032059582863565864, 'samples': 11073024, 'steps': 21626, 'loss/train': 1.967165470123291} +02/25/2022 05:35:35 - INFO - codeparrot_training - Step 21627: {'lr': 0.0003205801319478539, 'samples': 11073536, 'steps': 21627, 'loss/train': 2.440612316131592} +02/25/2022 05:35:41 - INFO - codeparrot_training - Step 21628: {'lr': 0.00032056443495770637, 'samples': 11074048, 'steps': 21628, 'loss/train': 2.2766144275665283} +02/25/2022 05:35:44 - INFO - codeparrot_training - Step 21629: {'lr': 0.0003205487376652833, 'samples': 11074560, 'steps': 21629, 'loss/train': 1.8009629249572754} +02/25/2022 05:35:50 - INFO - codeparrot_training - Step 21630: {'lr': 0.0003205330400706517, 'samples': 11075072, 'steps': 21630, 'loss/train': 2.550490140914917} +02/25/2022 05:35:53 - INFO - codeparrot_training - Step 21631: {'lr': 0.000320517342173879, 'samples': 11075584, 'steps': 21631, 'loss/train': 2.653726100921631} +02/25/2022 05:35:59 - INFO - codeparrot_training - Step 21632: {'lr': 0.0003205016439750323, 'samples': 11076096, 'steps': 21632, 'loss/train': 1.3426618576049805} +02/25/2022 05:36:02 - INFO - codeparrot_training - Step 21633: {'lr': 0.00032048594547417916, 'samples': 11076608, 'steps': 21633, 'loss/train': 2.6711585521698} +02/25/2022 05:36:08 - INFO - codeparrot_training - Step 21634: {'lr': 0.00032047024667138644, 'samples': 11077120, 'steps': 21634, 'loss/train': 1.8430368900299072} +02/25/2022 05:36:11 - INFO - codeparrot_training - Step 21635: {'lr': 0.00032045454756672164, 'samples': 11077632, 'steps': 21635, 'loss/train': 1.7331066131591797} +02/25/2022 05:36:17 - INFO - codeparrot_training - Step 21636: {'lr': 0.00032043884816025187, 'samples': 11078144, 'steps': 21636, 'loss/train': 1.765415072441101} +02/25/2022 05:36:20 - INFO - codeparrot_training - Step 21637: {'lr': 0.0003204231484520445, 'samples': 11078656, 'steps': 21637, 'loss/train': 2.5659048557281494} +02/25/2022 05:36:26 - INFO - codeparrot_training - Step 21638: {'lr': 0.0003204074484421667, 'samples': 11079168, 'steps': 21638, 'loss/train': 3.0074050426483154} +02/25/2022 05:36:30 - INFO - codeparrot_training - Step 21639: {'lr': 0.0003203917481306857, 'samples': 11079680, 'steps': 21639, 'loss/train': 2.3378326892852783} +02/25/2022 05:36:35 - INFO - codeparrot_training - Step 21640: {'lr': 0.0003203760475176689, 'samples': 11080192, 'steps': 21640, 'loss/train': 1.4756957292556763} +02/25/2022 05:36:39 - INFO - codeparrot_training - Step 21641: {'lr': 0.00032036034660318344, 'samples': 11080704, 'steps': 21641, 'loss/train': 1.7736973762512207} +02/25/2022 05:36:44 - INFO - codeparrot_training - Step 21642: {'lr': 0.00032034464538729647, 'samples': 11081216, 'steps': 21642, 'loss/train': 2.2646687030792236} +02/25/2022 05:36:48 - INFO - codeparrot_training - Step 21643: {'lr': 0.0003203289438700755, 'samples': 11081728, 'steps': 21643, 'loss/train': 2.2318098545074463} +02/25/2022 05:36:53 - INFO - codeparrot_training - Step 21644: {'lr': 0.0003203132420515876, 'samples': 11082240, 'steps': 21644, 'loss/train': 2.0150458812713623} +02/25/2022 05:36:57 - INFO - codeparrot_training - Step 21645: {'lr': 0.0003202975399319002, 'samples': 11082752, 'steps': 21645, 'loss/train': 2.0222859382629395} +02/25/2022 05:37:02 - INFO - codeparrot_training - Step 21646: {'lr': 0.00032028183751108035, 'samples': 11083264, 'steps': 21646, 'loss/train': 1.8015633821487427} +02/25/2022 05:37:06 - INFO - codeparrot_training - Step 21647: {'lr': 0.00032026613478919547, 'samples': 11083776, 'steps': 21647, 'loss/train': 1.9266560077667236} +02/25/2022 05:37:12 - INFO - codeparrot_training - Step 21648: {'lr': 0.0003202504317663128, 'samples': 11084288, 'steps': 21648, 'loss/train': 0.4452267289161682} +02/25/2022 05:37:16 - INFO - codeparrot_training - Step 21649: {'lr': 0.0003202347284424995, 'samples': 11084800, 'steps': 21649, 'loss/train': 1.8500224351882935} +02/25/2022 05:37:21 - INFO - codeparrot_training - Step 21650: {'lr': 0.00032021902481782304, 'samples': 11085312, 'steps': 21650, 'loss/train': 1.7770010232925415} +02/25/2022 05:37:25 - INFO - codeparrot_training - Step 21651: {'lr': 0.0003202033208923505, 'samples': 11085824, 'steps': 21651, 'loss/train': 1.1370052099227905} +02/25/2022 05:37:30 - INFO - codeparrot_training - Step 21652: {'lr': 0.0003201876166661493, 'samples': 11086336, 'steps': 21652, 'loss/train': 1.2044696807861328} +02/25/2022 05:37:34 - INFO - codeparrot_training - Step 21653: {'lr': 0.00032017191213928653, 'samples': 11086848, 'steps': 21653, 'loss/train': 1.6115232706069946} +02/25/2022 05:37:39 - INFO - codeparrot_training - Step 21654: {'lr': 0.0003201562073118297, 'samples': 11087360, 'steps': 21654, 'loss/train': 1.9590775966644287} +02/25/2022 05:37:43 - INFO - codeparrot_training - Step 21655: {'lr': 0.00032014050218384584, 'samples': 11087872, 'steps': 21655, 'loss/train': 1.4783846139907837} +02/25/2022 05:37:48 - INFO - codeparrot_training - Step 21656: {'lr': 0.0003201247967554024, 'samples': 11088384, 'steps': 21656, 'loss/train': 2.3855648040771484} +02/25/2022 05:37:52 - INFO - codeparrot_training - Step 21657: {'lr': 0.0003201090910265666, 'samples': 11088896, 'steps': 21657, 'loss/train': 0.8637088537216187} +02/25/2022 05:37:57 - INFO - codeparrot_training - Step 21658: {'lr': 0.0003200933849974056, 'samples': 11089408, 'steps': 21658, 'loss/train': 2.0209999084472656} +02/25/2022 05:38:01 - INFO - codeparrot_training - Step 21659: {'lr': 0.000320077678667987, 'samples': 11089920, 'steps': 21659, 'loss/train': 2.6658084392547607} +02/25/2022 05:38:06 - INFO - codeparrot_training - Step 21660: {'lr': 0.00032006197203837775, 'samples': 11090432, 'steps': 21660, 'loss/train': 2.1893503665924072} +02/25/2022 05:38:10 - INFO - codeparrot_training - Step 21661: {'lr': 0.00032004626510864526, 'samples': 11090944, 'steps': 21661, 'loss/train': 1.649872899055481} +02/25/2022 05:38:16 - INFO - codeparrot_training - Step 21662: {'lr': 0.00032003055787885684, 'samples': 11091456, 'steps': 21662, 'loss/train': 0.434133380651474} +02/25/2022 05:38:19 - INFO - codeparrot_training - Step 21663: {'lr': 0.00032001485034907975, 'samples': 11091968, 'steps': 21663, 'loss/train': 1.8698738813400269} +02/25/2022 05:38:25 - INFO - codeparrot_training - Step 21664: {'lr': 0.00031999914251938124, 'samples': 11092480, 'steps': 21664, 'loss/train': 2.088303327560425} +02/25/2022 05:38:29 - INFO - codeparrot_training - Step 21665: {'lr': 0.00031998343438982866, 'samples': 11092992, 'steps': 21665, 'loss/train': 2.7469441890716553} +02/25/2022 05:38:34 - INFO - codeparrot_training - Step 21666: {'lr': 0.0003199677259604893, 'samples': 11093504, 'steps': 21666, 'loss/train': 0.9768959879875183} +02/25/2022 05:38:38 - INFO - codeparrot_training - Step 21667: {'lr': 0.0003199520172314304, 'samples': 11094016, 'steps': 21667, 'loss/train': 2.5853993892669678} +02/25/2022 05:38:44 - INFO - codeparrot_training - Step 21668: {'lr': 0.00031993630820271925, 'samples': 11094528, 'steps': 21668, 'loss/train': 2.009589195251465} +02/25/2022 05:38:48 - INFO - codeparrot_training - Step 21669: {'lr': 0.00031992059887442316, 'samples': 11095040, 'steps': 21669, 'loss/train': 1.9536654949188232} +02/25/2022 05:38:51 - INFO - codeparrot_training - Step 21670: {'lr': 0.00031990488924660943, 'samples': 11095552, 'steps': 21670, 'loss/train': 2.4060659408569336} +02/25/2022 05:38:57 - INFO - codeparrot_training - Step 21671: {'lr': 0.00031988917931934536, 'samples': 11096064, 'steps': 21671, 'loss/train': 2.093985080718994} +02/25/2022 05:39:00 - INFO - codeparrot_training - Step 21672: {'lr': 0.0003198734690926982, 'samples': 11096576, 'steps': 21672, 'loss/train': 2.3026843070983887} +02/25/2022 05:39:06 - INFO - codeparrot_training - Step 21673: {'lr': 0.00031985775856673536, 'samples': 11097088, 'steps': 21673, 'loss/train': 0.8858436346054077} +02/25/2022 05:39:09 - INFO - codeparrot_training - Step 21674: {'lr': 0.000319842047741524, 'samples': 11097600, 'steps': 21674, 'loss/train': 2.1434948444366455} +02/25/2022 05:39:16 - INFO - codeparrot_training - Step 21675: {'lr': 0.0003198263366171315, 'samples': 11098112, 'steps': 21675, 'loss/train': 2.1019387245178223} +02/25/2022 05:39:19 - INFO - codeparrot_training - Step 21676: {'lr': 0.00031981062519362513, 'samples': 11098624, 'steps': 21676, 'loss/train': 2.29005765914917} +02/25/2022 05:39:24 - INFO - codeparrot_training - Step 21677: {'lr': 0.00031979491347107226, 'samples': 11099136, 'steps': 21677, 'loss/train': 2.097097873687744} +02/25/2022 05:39:28 - INFO - codeparrot_training - Step 21678: {'lr': 0.0003197792014495402, 'samples': 11099648, 'steps': 21678, 'loss/train': 2.0289864540100098} +02/25/2022 05:39:34 - INFO - codeparrot_training - Step 21679: {'lr': 0.0003197634891290961, 'samples': 11100160, 'steps': 21679, 'loss/train': 1.9478580951690674} +02/25/2022 05:39:37 - INFO - codeparrot_training - Step 21680: {'lr': 0.00031974777650980735, 'samples': 11100672, 'steps': 21680, 'loss/train': 1.9184279441833496} +02/25/2022 05:39:42 - INFO - codeparrot_training - Step 21681: {'lr': 0.0003197320635917413, 'samples': 11101184, 'steps': 21681, 'loss/train': 1.909900426864624} +02/25/2022 05:39:46 - INFO - codeparrot_training - Step 21682: {'lr': 0.0003197163503749652, 'samples': 11101696, 'steps': 21682, 'loss/train': 1.452235460281372} +02/25/2022 05:39:52 - INFO - codeparrot_training - Step 21683: {'lr': 0.00031970063685954645, 'samples': 11102208, 'steps': 21683, 'loss/train': 1.1591506004333496} +02/25/2022 05:39:55 - INFO - codeparrot_training - Step 21684: {'lr': 0.0003196849230455523, 'samples': 11102720, 'steps': 21684, 'loss/train': 2.5111916065216064} +02/25/2022 05:40:01 - INFO - codeparrot_training - Step 21685: {'lr': 0.0003196692089330501, 'samples': 11103232, 'steps': 21685, 'loss/train': 4.647315502166748} +02/25/2022 05:40:05 - INFO - codeparrot_training - Step 21686: {'lr': 0.000319653494522107, 'samples': 11103744, 'steps': 21686, 'loss/train': 1.4676721096038818} +02/25/2022 05:40:10 - INFO - codeparrot_training - Step 21687: {'lr': 0.00031963777981279057, 'samples': 11104256, 'steps': 21687, 'loss/train': 2.463695764541626} +02/25/2022 05:40:14 - INFO - codeparrot_training - Step 21688: {'lr': 0.00031962206480516794, 'samples': 11104768, 'steps': 21688, 'loss/train': 2.2198917865753174} +02/25/2022 05:40:19 - INFO - codeparrot_training - Step 21689: {'lr': 0.00031960634949930656, 'samples': 11105280, 'steps': 21689, 'loss/train': 3.032831907272339} +02/25/2022 05:40:23 - INFO - codeparrot_training - Step 21690: {'lr': 0.0003195906338952736, 'samples': 11105792, 'steps': 21690, 'loss/train': 1.944473385810852} +02/25/2022 05:40:29 - INFO - codeparrot_training - Step 21691: {'lr': 0.00031957491799313646, 'samples': 11106304, 'steps': 21691, 'loss/train': 1.865497350692749} +02/25/2022 05:40:32 - INFO - codeparrot_training - Step 21692: {'lr': 0.0003195592017929625, 'samples': 11106816, 'steps': 21692, 'loss/train': 1.2514517307281494} +02/25/2022 05:40:38 - INFO - codeparrot_training - Step 21693: {'lr': 0.000319543485294819, 'samples': 11107328, 'steps': 21693, 'loss/train': 2.192732810974121} +02/25/2022 05:40:41 - INFO - codeparrot_training - Step 21694: {'lr': 0.0003195277684987733, 'samples': 11107840, 'steps': 21694, 'loss/train': 1.3806431293487549} +02/25/2022 05:40:47 - INFO - codeparrot_training - Step 21695: {'lr': 0.0003195120514048927, 'samples': 11108352, 'steps': 21695, 'loss/train': 1.8311817646026611} +02/25/2022 05:40:51 - INFO - codeparrot_training - Step 21696: {'lr': 0.00031949633401324464, 'samples': 11108864, 'steps': 21696, 'loss/train': 3.015684127807617} +02/25/2022 05:40:56 - INFO - codeparrot_training - Step 21697: {'lr': 0.00031948061632389624, 'samples': 11109376, 'steps': 21697, 'loss/train': 2.6459567546844482} +02/25/2022 05:41:00 - INFO - codeparrot_training - Step 21698: {'lr': 0.00031946489833691494, 'samples': 11109888, 'steps': 21698, 'loss/train': 2.950221061706543} +02/25/2022 05:41:06 - INFO - codeparrot_training - Step 21699: {'lr': 0.0003194491800523681, 'samples': 11110400, 'steps': 21699, 'loss/train': 8.44400691986084} +02/25/2022 05:41:09 - INFO - codeparrot_training - Step 21700: {'lr': 0.0003194334614703231, 'samples': 11110912, 'steps': 21700, 'loss/train': 1.637054204940796} +02/25/2022 05:41:14 - INFO - codeparrot_training - Step 21701: {'lr': 0.0003194177425908471, 'samples': 11111424, 'steps': 21701, 'loss/train': 2.4237711429595947} +02/25/2022 05:41:18 - INFO - codeparrot_training - Step 21702: {'lr': 0.0003194020234140076, 'samples': 11111936, 'steps': 21702, 'loss/train': 0.9742262363433838} +02/25/2022 05:41:24 - INFO - codeparrot_training - Step 21703: {'lr': 0.00031938630393987176, 'samples': 11112448, 'steps': 21703, 'loss/train': 3.2086498737335205} +02/25/2022 05:41:27 - INFO - codeparrot_training - Step 21704: {'lr': 0.00031937058416850716, 'samples': 11112960, 'steps': 21704, 'loss/train': 2.247377395629883} +02/25/2022 05:41:31 - INFO - codeparrot_training - Step 21705: {'lr': 0.00031935486409998096, 'samples': 11113472, 'steps': 21705, 'loss/train': 2.8686368465423584} +02/25/2022 05:41:37 - INFO - codeparrot_training - Step 21706: {'lr': 0.0003193391437343605, 'samples': 11113984, 'steps': 21706, 'loss/train': 1.051864743232727} +02/25/2022 05:41:40 - INFO - codeparrot_training - Step 21707: {'lr': 0.0003193234230717132, 'samples': 11114496, 'steps': 21707, 'loss/train': 2.027564287185669} +02/25/2022 05:41:46 - INFO - codeparrot_training - Step 21708: {'lr': 0.00031930770211210637, 'samples': 11115008, 'steps': 21708, 'loss/train': 1.927280306816101} +02/25/2022 05:41:49 - INFO - codeparrot_training - Step 21709: {'lr': 0.0003192919808556073, 'samples': 11115520, 'steps': 21709, 'loss/train': 2.390582323074341} +02/25/2022 05:41:57 - INFO - codeparrot_training - Step 21710: {'lr': 0.00031927625930228343, 'samples': 11116032, 'steps': 21710, 'loss/train': 0.09722858667373657} +02/25/2022 05:42:00 - INFO - codeparrot_training - Step 21711: {'lr': 0.00031926053745220213, 'samples': 11116544, 'steps': 21711, 'loss/train': 1.2596288919448853} +02/25/2022 05:42:06 - INFO - codeparrot_training - Step 21712: {'lr': 0.0003192448153054306, 'samples': 11117056, 'steps': 21712, 'loss/train': 1.5834779739379883} +02/25/2022 05:42:09 - INFO - codeparrot_training - Step 21713: {'lr': 0.0003192290928620363, 'samples': 11117568, 'steps': 21713, 'loss/train': 0.8703785538673401} +02/25/2022 05:42:15 - INFO - codeparrot_training - Step 21714: {'lr': 0.0003192133701220865, 'samples': 11118080, 'steps': 21714, 'loss/train': 1.3689699172973633} +02/25/2022 05:42:18 - INFO - codeparrot_training - Step 21715: {'lr': 0.0003191976470856487, 'samples': 11118592, 'steps': 21715, 'loss/train': 0.7610230445861816} +02/25/2022 05:42:24 - INFO - codeparrot_training - Step 21716: {'lr': 0.00031918192375279006, 'samples': 11119104, 'steps': 21716, 'loss/train': 2.0348331928253174} +02/25/2022 05:42:27 - INFO - codeparrot_training - Step 21717: {'lr': 0.00031916620012357804, 'samples': 11119616, 'steps': 21717, 'loss/train': 1.950169324874878} +02/25/2022 05:42:33 - INFO - codeparrot_training - Step 21718: {'lr': 0.00031915047619808, 'samples': 11120128, 'steps': 21718, 'loss/train': 1.8343373537063599} +02/25/2022 05:42:36 - INFO - codeparrot_training - Step 21719: {'lr': 0.0003191347519763633, 'samples': 11120640, 'steps': 21719, 'loss/train': 4.004819869995117} +02/25/2022 05:42:44 - INFO - codeparrot_training - Step 21720: {'lr': 0.00031911902745849525, 'samples': 11121152, 'steps': 21720, 'loss/train': 3.9783334732055664} +02/25/2022 05:42:47 - INFO - codeparrot_training - Step 21721: {'lr': 0.0003191033026445432, 'samples': 11121664, 'steps': 21721, 'loss/train': 0.32352808117866516} +02/25/2022 05:42:53 - INFO - codeparrot_training - Step 21722: {'lr': 0.00031908757753457465, 'samples': 11122176, 'steps': 21722, 'loss/train': 1.5259758234024048} +02/25/2022 05:42:56 - INFO - codeparrot_training - Step 21723: {'lr': 0.00031907185212865673, 'samples': 11122688, 'steps': 21723, 'loss/train': 2.676894426345825} +02/25/2022 05:43:02 - INFO - codeparrot_training - Step 21724: {'lr': 0.000319056126426857, 'samples': 11123200, 'steps': 21724, 'loss/train': 2.128580093383789} +02/25/2022 05:43:05 - INFO - codeparrot_training - Step 21725: {'lr': 0.0003190404004292427, 'samples': 11123712, 'steps': 21725, 'loss/train': 2.2001075744628906} +02/25/2022 05:43:11 - INFO - codeparrot_training - Step 21726: {'lr': 0.00031902467413588134, 'samples': 11124224, 'steps': 21726, 'loss/train': 1.6967453956604004} +02/25/2022 05:43:15 - INFO - codeparrot_training - Step 21727: {'lr': 0.00031900894754684006, 'samples': 11124736, 'steps': 21727, 'loss/train': 1.6169955730438232} +02/25/2022 05:43:20 - INFO - codeparrot_training - Step 21728: {'lr': 0.0003189932206621865, 'samples': 11125248, 'steps': 21728, 'loss/train': 2.5277345180511475} +02/25/2022 05:43:23 - INFO - codeparrot_training - Step 21729: {'lr': 0.00031897749348198777, 'samples': 11125760, 'steps': 21729, 'loss/train': 1.373136281967163} +02/25/2022 05:43:31 - INFO - codeparrot_training - Step 21730: {'lr': 0.0003189617660063114, 'samples': 11126272, 'steps': 21730, 'loss/train': 2.139944553375244} +02/25/2022 05:43:34 - INFO - codeparrot_training - Step 21731: {'lr': 0.0003189460382352248, 'samples': 11126784, 'steps': 21731, 'loss/train': 2.2382009029388428} +02/25/2022 05:43:40 - INFO - codeparrot_training - Step 21732: {'lr': 0.00031893031016879515, 'samples': 11127296, 'steps': 21732, 'loss/train': 2.19052791595459} +02/25/2022 05:43:43 - INFO - codeparrot_training - Step 21733: {'lr': 0.00031891458180709003, 'samples': 11127808, 'steps': 21733, 'loss/train': 1.669534683227539} +02/25/2022 05:43:49 - INFO - codeparrot_training - Step 21734: {'lr': 0.0003188988531501766, 'samples': 11128320, 'steps': 21734, 'loss/train': 2.308239698410034} +02/25/2022 05:43:52 - INFO - codeparrot_training - Step 21735: {'lr': 0.00031888312419812255, 'samples': 11128832, 'steps': 21735, 'loss/train': 1.716054081916809} +02/25/2022 05:43:58 - INFO - codeparrot_training - Step 21736: {'lr': 0.00031886739495099494, 'samples': 11129344, 'steps': 21736, 'loss/train': 0.9718392491340637} +02/25/2022 05:44:01 - INFO - codeparrot_training - Step 21737: {'lr': 0.00031885166540886135, 'samples': 11129856, 'steps': 21737, 'loss/train': 1.8353155851364136} +02/25/2022 05:44:08 - INFO - codeparrot_training - Step 21738: {'lr': 0.000318835935571789, 'samples': 11130368, 'steps': 21738, 'loss/train': 1.8165950775146484} +02/25/2022 05:44:12 - INFO - codeparrot_training - Step 21739: {'lr': 0.0003188202054398454, 'samples': 11130880, 'steps': 21739, 'loss/train': 2.2578961849212646} +02/25/2022 05:44:17 - INFO - codeparrot_training - Step 21740: {'lr': 0.00031880447501309787, 'samples': 11131392, 'steps': 21740, 'loss/train': 2.956209182739258} +02/25/2022 05:44:21 - INFO - codeparrot_training - Step 21741: {'lr': 0.0003187887442916139, 'samples': 11131904, 'steps': 21741, 'loss/train': 0.956165075302124} +02/25/2022 05:44:26 - INFO - codeparrot_training - Step 21742: {'lr': 0.0003187730132754607, 'samples': 11132416, 'steps': 21742, 'loss/train': 1.874299168586731} +02/25/2022 05:44:30 - INFO - codeparrot_training - Step 21743: {'lr': 0.0003187572819647058, 'samples': 11132928, 'steps': 21743, 'loss/train': 3.6773388385772705} +02/25/2022 05:44:35 - INFO - codeparrot_training - Step 21744: {'lr': 0.00031874155035941656, 'samples': 11133440, 'steps': 21744, 'loss/train': 2.2933857440948486} +02/25/2022 05:44:39 - INFO - codeparrot_training - Step 21745: {'lr': 0.00031872581845966024, 'samples': 11133952, 'steps': 21745, 'loss/train': 0.9773945808410645} +02/25/2022 05:44:44 - INFO - codeparrot_training - Step 21746: {'lr': 0.0003187100862655044, 'samples': 11134464, 'steps': 21746, 'loss/train': 2.891629457473755} +02/25/2022 05:44:48 - INFO - codeparrot_training - Step 21747: {'lr': 0.00031869435377701637, 'samples': 11134976, 'steps': 21747, 'loss/train': 1.495321273803711} +02/25/2022 05:44:53 - INFO - codeparrot_training - Step 21748: {'lr': 0.0003186786209942636, 'samples': 11135488, 'steps': 21748, 'loss/train': 1.6084660291671753} +02/25/2022 05:44:57 - INFO - codeparrot_training - Step 21749: {'lr': 0.00031866288791731334, 'samples': 11136000, 'steps': 21749, 'loss/train': 1.2702771425247192} +02/25/2022 05:45:02 - INFO - codeparrot_training - Step 21750: {'lr': 0.0003186471545462331, 'samples': 11136512, 'steps': 21750, 'loss/train': 1.8765714168548584} +02/25/2022 05:45:06 - INFO - codeparrot_training - Step 21751: {'lr': 0.0003186314208810902, 'samples': 11137024, 'steps': 21751, 'loss/train': 1.386092185974121} +02/25/2022 05:45:12 - INFO - codeparrot_training - Step 21752: {'lr': 0.0003186156869219522, 'samples': 11137536, 'steps': 21752, 'loss/train': 1.234541416168213} +02/25/2022 05:45:15 - INFO - codeparrot_training - Step 21753: {'lr': 0.0003185999526688863, 'samples': 11138048, 'steps': 21753, 'loss/train': 2.1140732765197754} +02/25/2022 05:45:21 - INFO - codeparrot_training - Step 21754: {'lr': 0.00031858421812196, 'samples': 11138560, 'steps': 21754, 'loss/train': 1.5866442918777466} +02/25/2022 05:45:24 - INFO - codeparrot_training - Step 21755: {'lr': 0.0003185684832812407, 'samples': 11139072, 'steps': 21755, 'loss/train': 1.4904589653015137} +02/25/2022 05:45:32 - INFO - codeparrot_training - Step 21756: {'lr': 0.00031855274814679576, 'samples': 11139584, 'steps': 21756, 'loss/train': 2.090688467025757} +02/25/2022 05:45:35 - INFO - codeparrot_training - Step 21757: {'lr': 0.0003185370127186926, 'samples': 11140096, 'steps': 21757, 'loss/train': 2.320434331893921} +02/25/2022 05:45:41 - INFO - codeparrot_training - Step 21758: {'lr': 0.00031852127699699874, 'samples': 11140608, 'steps': 21758, 'loss/train': 1.4662401676177979} +02/25/2022 05:45:44 - INFO - codeparrot_training - Step 21759: {'lr': 0.0003185055409817814, 'samples': 11141120, 'steps': 21759, 'loss/train': 3.2484476566314697} +02/25/2022 05:45:50 - INFO - codeparrot_training - Step 21760: {'lr': 0.0003184898046731082, 'samples': 11141632, 'steps': 21760, 'loss/train': 2.333574056625366} +02/25/2022 05:45:53 - INFO - codeparrot_training - Step 21761: {'lr': 0.0003184740680710462, 'samples': 11142144, 'steps': 21761, 'loss/train': 2.1570136547088623} +02/25/2022 05:45:58 - INFO - codeparrot_training - Step 21762: {'lr': 0.00031845833117566326, 'samples': 11142656, 'steps': 21762, 'loss/train': 1.5060921907424927} +02/25/2022 05:46:02 - INFO - codeparrot_training - Step 21763: {'lr': 0.0003184425939870264, 'samples': 11143168, 'steps': 21763, 'loss/train': 2.389431953430176} +02/25/2022 05:46:07 - INFO - codeparrot_training - Step 21764: {'lr': 0.0003184268565052033, 'samples': 11143680, 'steps': 21764, 'loss/train': 1.9260852336883545} +02/25/2022 05:46:11 - INFO - codeparrot_training - Step 21765: {'lr': 0.00031841111873026124, 'samples': 11144192, 'steps': 21765, 'loss/train': 1.991213083267212} +02/25/2022 05:46:18 - INFO - codeparrot_training - Step 21766: {'lr': 0.0003183953806622677, 'samples': 11144704, 'steps': 21766, 'loss/train': 2.0757369995117188} +02/25/2022 05:46:22 - INFO - codeparrot_training - Step 21767: {'lr': 0.00031837964230129004, 'samples': 11145216, 'steps': 21767, 'loss/train': 1.8996134996414185} +02/25/2022 05:46:27 - INFO - codeparrot_training - Step 21768: {'lr': 0.0003183639036473957, 'samples': 11145728, 'steps': 21768, 'loss/train': 2.2683517932891846} +02/25/2022 05:46:30 - INFO - codeparrot_training - Step 21769: {'lr': 0.0003183481647006521, 'samples': 11146240, 'steps': 21769, 'loss/train': 1.8951406478881836} +02/25/2022 05:46:36 - INFO - codeparrot_training - Step 21770: {'lr': 0.0003183324254611267, 'samples': 11146752, 'steps': 21770, 'loss/train': 1.2873823642730713} +02/25/2022 05:46:39 - INFO - codeparrot_training - Step 21771: {'lr': 0.00031831668592888684, 'samples': 11147264, 'steps': 21771, 'loss/train': 1.0109927654266357} +02/25/2022 05:46:45 - INFO - codeparrot_training - Step 21772: {'lr': 0.0003183009461040001, 'samples': 11147776, 'steps': 21772, 'loss/train': 1.9222068786621094} +02/25/2022 05:46:48 - INFO - codeparrot_training - Step 21773: {'lr': 0.0003182852059865337, 'samples': 11148288, 'steps': 21773, 'loss/train': 2.1747031211853027} +02/25/2022 05:46:54 - INFO - codeparrot_training - Step 21774: {'lr': 0.0003182694655765551, 'samples': 11148800, 'steps': 21774, 'loss/train': 1.476946473121643} +02/25/2022 05:46:57 - INFO - codeparrot_training - Step 21775: {'lr': 0.00031825372487413186, 'samples': 11149312, 'steps': 21775, 'loss/train': 2.472576141357422} +02/25/2022 05:47:05 - INFO - codeparrot_training - Step 21776: {'lr': 0.00031823798387933133, 'samples': 11149824, 'steps': 21776, 'loss/train': 2.259316921234131} +02/25/2022 05:47:08 - INFO - codeparrot_training - Step 21777: {'lr': 0.00031822224259222095, 'samples': 11150336, 'steps': 21777, 'loss/train': 2.6426916122436523} +02/25/2022 05:47:14 - INFO - codeparrot_training - Step 21778: {'lr': 0.0003182065010128682, 'samples': 11150848, 'steps': 21778, 'loss/train': 1.7708532810211182} +02/25/2022 05:47:17 - INFO - codeparrot_training - Step 21779: {'lr': 0.0003181907591413403, 'samples': 11151360, 'steps': 21779, 'loss/train': 1.8966010808944702} +02/25/2022 05:47:23 - INFO - codeparrot_training - Step 21780: {'lr': 0.00031817501697770496, 'samples': 11151872, 'steps': 21780, 'loss/train': 1.9514433145523071} +02/25/2022 05:47:26 - INFO - codeparrot_training - Step 21781: {'lr': 0.00031815927452202955, 'samples': 11152384, 'steps': 21781, 'loss/train': 1.747356653213501} +02/25/2022 05:47:32 - INFO - codeparrot_training - Step 21782: {'lr': 0.0003181435317743813, 'samples': 11152896, 'steps': 21782, 'loss/train': 1.8831177949905396} +02/25/2022 05:47:37 - INFO - codeparrot_training - Step 21783: {'lr': 0.00031812778873482796, 'samples': 11153408, 'steps': 21783, 'loss/train': 1.084555983543396} +02/25/2022 05:47:41 - INFO - codeparrot_training - Step 21784: {'lr': 0.00031811204540343666, 'samples': 11153920, 'steps': 21784, 'loss/train': 1.1655285358428955} +02/25/2022 05:47:48 - INFO - codeparrot_training - Step 21785: {'lr': 0.00031809630178027506, 'samples': 11154432, 'steps': 21785, 'loss/train': 2.3911197185516357} +02/25/2022 05:47:51 - INFO - codeparrot_training - Step 21786: {'lr': 0.0003180805578654105, 'samples': 11154944, 'steps': 21786, 'loss/train': 1.0611038208007812} +02/25/2022 05:47:57 - INFO - codeparrot_training - Step 21787: {'lr': 0.0003180648136589105, 'samples': 11155456, 'steps': 21787, 'loss/train': 2.358529567718506} +02/25/2022 05:48:00 - INFO - codeparrot_training - Step 21788: {'lr': 0.00031804906916084235, 'samples': 11155968, 'steps': 21788, 'loss/train': 1.7644219398498535} +02/25/2022 05:48:04 - INFO - codeparrot_training - Step 21789: {'lr': 0.0003180333243712737, 'samples': 11156480, 'steps': 21789, 'loss/train': 1.7161561250686646} +02/25/2022 05:48:09 - INFO - codeparrot_training - Step 21790: {'lr': 0.00031801757929027187, 'samples': 11156992, 'steps': 21790, 'loss/train': 2.701889991760254} +02/25/2022 05:48:13 - INFO - codeparrot_training - Step 21791: {'lr': 0.0003180018339179043, 'samples': 11157504, 'steps': 21791, 'loss/train': 1.7461918592453003} +02/25/2022 05:48:19 - INFO - codeparrot_training - Step 21792: {'lr': 0.00031798608825423847, 'samples': 11158016, 'steps': 21792, 'loss/train': 2.433109998703003} +02/25/2022 05:48:23 - INFO - codeparrot_training - Step 21793: {'lr': 0.0003179703422993418, 'samples': 11158528, 'steps': 21793, 'loss/train': 1.8091360330581665} +02/25/2022 05:48:26 - INFO - codeparrot_training - Step 21794: {'lr': 0.00031795459605328183, 'samples': 11159040, 'steps': 21794, 'loss/train': 1.9812487363815308} +02/25/2022 05:48:32 - INFO - codeparrot_training - Step 21795: {'lr': 0.0003179388495161259, 'samples': 11159552, 'steps': 21795, 'loss/train': 1.5676612854003906} +02/25/2022 05:48:35 - INFO - codeparrot_training - Step 21796: {'lr': 0.00031792310268794155, 'samples': 11160064, 'steps': 21796, 'loss/train': 1.1732467412948608} +02/25/2022 05:48:43 - INFO - codeparrot_training - Step 21797: {'lr': 0.0003179073555687961, 'samples': 11160576, 'steps': 21797, 'loss/train': 1.2560960054397583} +02/25/2022 05:48:46 - INFO - codeparrot_training - Step 21798: {'lr': 0.00031789160815875724, 'samples': 11161088, 'steps': 21798, 'loss/train': 1.4300634860992432} +02/25/2022 05:48:52 - INFO - codeparrot_training - Step 21799: {'lr': 0.0003178758604578922, 'samples': 11161600, 'steps': 21799, 'loss/train': 1.5342192649841309} +02/25/2022 05:48:55 - INFO - codeparrot_training - Step 21800: {'lr': 0.00031786011246626855, 'samples': 11162112, 'steps': 21800, 'loss/train': 2.1601996421813965} +02/25/2022 05:49:01 - INFO - codeparrot_training - Step 21801: {'lr': 0.00031784436418395373, 'samples': 11162624, 'steps': 21801, 'loss/train': 0.8945356607437134} +02/25/2022 05:49:04 - INFO - codeparrot_training - Step 21802: {'lr': 0.0003178286156110152, 'samples': 11163136, 'steps': 21802, 'loss/train': 1.3482789993286133} +02/25/2022 05:49:09 - INFO - codeparrot_training - Step 21803: {'lr': 0.00031781286674752043, 'samples': 11163648, 'steps': 21803, 'loss/train': 2.4322028160095215} +02/25/2022 05:49:13 - INFO - codeparrot_training - Step 21804: {'lr': 0.00031779711759353683, 'samples': 11164160, 'steps': 21804, 'loss/train': 2.3651041984558105} +02/25/2022 05:49:18 - INFO - codeparrot_training - Step 21805: {'lr': 0.00031778136814913195, 'samples': 11164672, 'steps': 21805, 'loss/train': 2.624145030975342} +02/25/2022 05:49:22 - INFO - codeparrot_training - Step 21806: {'lr': 0.0003177656184143732, 'samples': 11165184, 'steps': 21806, 'loss/train': 1.7672096490859985} +02/25/2022 05:49:27 - INFO - codeparrot_training - Step 21807: {'lr': 0.0003177498683893281, 'samples': 11165696, 'steps': 21807, 'loss/train': 1.8487001657485962} +02/25/2022 05:49:31 - INFO - codeparrot_training - Step 21808: {'lr': 0.000317734118074064, 'samples': 11166208, 'steps': 21808, 'loss/train': 1.944106936454773} +02/25/2022 05:49:36 - INFO - codeparrot_training - Step 21809: {'lr': 0.00031771836746864854, 'samples': 11166720, 'steps': 21809, 'loss/train': 2.9717795848846436} +02/25/2022 05:49:40 - INFO - codeparrot_training - Step 21810: {'lr': 0.000317702616573149, 'samples': 11167232, 'steps': 21810, 'loss/train': 2.2071993350982666} +02/25/2022 05:49:45 - INFO - codeparrot_training - Step 21811: {'lr': 0.000317686865387633, 'samples': 11167744, 'steps': 21811, 'loss/train': 2.3968911170959473} +02/25/2022 05:49:53 - INFO - codeparrot_training - Step 21812: {'lr': 0.0003176711139121679, 'samples': 11168256, 'steps': 21812, 'loss/train': 0.9619163870811462} +02/25/2022 05:49:56 - INFO - codeparrot_training - Step 21813: {'lr': 0.00031765536214682134, 'samples': 11168768, 'steps': 21813, 'loss/train': 2.2722325325012207} +02/25/2022 05:50:02 - INFO - codeparrot_training - Step 21814: {'lr': 0.00031763961009166055, 'samples': 11169280, 'steps': 21814, 'loss/train': 1.8562426567077637} +02/25/2022 05:50:05 - INFO - codeparrot_training - Step 21815: {'lr': 0.00031762385774675324, 'samples': 11169792, 'steps': 21815, 'loss/train': 2.3930931091308594} +02/25/2022 05:50:11 - INFO - codeparrot_training - Step 21816: {'lr': 0.0003176081051121668, 'samples': 11170304, 'steps': 21816, 'loss/train': 1.8064790964126587} +02/25/2022 05:50:14 - INFO - codeparrot_training - Step 21817: {'lr': 0.0003175923521879687, 'samples': 11170816, 'steps': 21817, 'loss/train': 1.9096622467041016} +02/25/2022 05:50:20 - INFO - codeparrot_training - Step 21818: {'lr': 0.0003175765989742264, 'samples': 11171328, 'steps': 21818, 'loss/train': 3.021843910217285} +02/25/2022 05:50:23 - INFO - codeparrot_training - Step 21819: {'lr': 0.0003175608454710074, 'samples': 11171840, 'steps': 21819, 'loss/train': 1.4630122184753418} +02/25/2022 05:50:29 - INFO - codeparrot_training - Step 21820: {'lr': 0.00031754509167837927, 'samples': 11172352, 'steps': 21820, 'loss/train': 2.1704537868499756} +02/25/2022 05:50:32 - INFO - codeparrot_training - Step 21821: {'lr': 0.00031752933759640937, 'samples': 11172864, 'steps': 21821, 'loss/train': 1.9772887229919434} +02/25/2022 05:50:39 - INFO - codeparrot_training - Step 21822: {'lr': 0.0003175135832251652, 'samples': 11173376, 'steps': 21822, 'loss/train': 2.621196985244751} +02/25/2022 05:50:43 - INFO - codeparrot_training - Step 21823: {'lr': 0.00031749782856471426, 'samples': 11173888, 'steps': 21823, 'loss/train': 1.4463396072387695} +02/25/2022 05:50:48 - INFO - codeparrot_training - Step 21824: {'lr': 0.00031748207361512415, 'samples': 11174400, 'steps': 21824, 'loss/train': 1.6398717164993286} +02/25/2022 05:50:52 - INFO - codeparrot_training - Step 21825: {'lr': 0.00031746631837646216, 'samples': 11174912, 'steps': 21825, 'loss/train': 1.392532229423523} +02/25/2022 05:50:57 - INFO - codeparrot_training - Step 21826: {'lr': 0.000317450562848796, 'samples': 11175424, 'steps': 21826, 'loss/train': 1.6127662658691406} +02/25/2022 05:51:01 - INFO - codeparrot_training - Step 21827: {'lr': 0.00031743480703219293, 'samples': 11175936, 'steps': 21827, 'loss/train': 1.640582799911499} +02/25/2022 05:51:07 - INFO - codeparrot_training - Step 21828: {'lr': 0.00031741905092672057, 'samples': 11176448, 'steps': 21828, 'loss/train': 2.6265556812286377} +02/25/2022 05:51:10 - INFO - codeparrot_training - Step 21829: {'lr': 0.0003174032945324465, 'samples': 11176960, 'steps': 21829, 'loss/train': 2.2672958374023438} +02/25/2022 05:51:16 - INFO - codeparrot_training - Step 21830: {'lr': 0.00031738753784943803, 'samples': 11177472, 'steps': 21830, 'loss/train': 1.5970215797424316} +02/25/2022 05:51:19 - INFO - codeparrot_training - Step 21831: {'lr': 0.0003173717808777628, 'samples': 11177984, 'steps': 21831, 'loss/train': 1.926913857460022} +02/25/2022 05:51:27 - INFO - codeparrot_training - Step 21832: {'lr': 0.00031735602361748815, 'samples': 11178496, 'steps': 21832, 'loss/train': 3.0766592025756836} +02/25/2022 05:51:30 - INFO - codeparrot_training - Step 21833: {'lr': 0.00031734026606868184, 'samples': 11179008, 'steps': 21833, 'loss/train': 1.5882474184036255} +02/25/2022 05:51:36 - INFO - codeparrot_training - Step 21834: {'lr': 0.0003173245082314111, 'samples': 11179520, 'steps': 21834, 'loss/train': 0.5971845388412476} +02/25/2022 05:51:40 - INFO - codeparrot_training - Step 21835: {'lr': 0.0003173087501057436, 'samples': 11180032, 'steps': 21835, 'loss/train': 2.0837650299072266} +02/25/2022 05:51:43 - INFO - codeparrot_training - Step 21836: {'lr': 0.00031729299169174673, 'samples': 11180544, 'steps': 21836, 'loss/train': 1.2923085689544678} +02/25/2022 05:51:49 - INFO - codeparrot_training - Step 21837: {'lr': 0.0003172772329894882, 'samples': 11181056, 'steps': 21837, 'loss/train': 2.2904369831085205} +02/25/2022 05:51:52 - INFO - codeparrot_training - Step 21838: {'lr': 0.0003172614739990352, 'samples': 11181568, 'steps': 21838, 'loss/train': 1.8894999027252197} +02/25/2022 05:51:58 - INFO - codeparrot_training - Step 21839: {'lr': 0.0003172457147204554, 'samples': 11182080, 'steps': 21839, 'loss/train': 1.8371168375015259} +02/25/2022 05:52:01 - INFO - codeparrot_training - Step 21840: {'lr': 0.0003172299551538164, 'samples': 11182592, 'steps': 21840, 'loss/train': 1.8612170219421387} +02/25/2022 05:52:07 - INFO - codeparrot_training - Step 21841: {'lr': 0.0003172141952991856, 'samples': 11183104, 'steps': 21841, 'loss/train': 1.421238660812378} +02/25/2022 05:52:10 - INFO - codeparrot_training - Step 21842: {'lr': 0.00031719843515663055, 'samples': 11183616, 'steps': 21842, 'loss/train': 1.301273226737976} +02/25/2022 05:52:16 - INFO - codeparrot_training - Step 21843: {'lr': 0.0003171826747262187, 'samples': 11184128, 'steps': 21843, 'loss/train': 1.9404352903366089} +02/25/2022 05:52:19 - INFO - codeparrot_training - Step 21844: {'lr': 0.0003171669140080177, 'samples': 11184640, 'steps': 21844, 'loss/train': 1.9364114999771118} +02/25/2022 05:52:27 - INFO - codeparrot_training - Step 21845: {'lr': 0.00031715115300209477, 'samples': 11185152, 'steps': 21845, 'loss/train': 1.4868065118789673} +02/25/2022 05:52:30 - INFO - codeparrot_training - Step 21846: {'lr': 0.0003171353917085178, 'samples': 11185664, 'steps': 21846, 'loss/train': 1.7475389242172241} +02/25/2022 05:52:36 - INFO - codeparrot_training - Step 21847: {'lr': 0.00031711963012735414, 'samples': 11186176, 'steps': 21847, 'loss/train': 2.692734718322754} +02/25/2022 05:52:39 - INFO - codeparrot_training - Step 21848: {'lr': 0.0003171038682586712, 'samples': 11186688, 'steps': 21848, 'loss/train': 2.5753443241119385} +02/25/2022 05:52:45 - INFO - codeparrot_training - Step 21849: {'lr': 0.0003170881061025366, 'samples': 11187200, 'steps': 21849, 'loss/train': 2.5939724445343018} +02/25/2022 05:52:48 - INFO - codeparrot_training - Step 21850: {'lr': 0.00031707234365901786, 'samples': 11187712, 'steps': 21850, 'loss/train': 1.4615073204040527} +02/25/2022 05:52:54 - INFO - codeparrot_training - Step 21851: {'lr': 0.0003170565809281826, 'samples': 11188224, 'steps': 21851, 'loss/train': 1.6854385137557983} +02/25/2022 05:52:57 - INFO - codeparrot_training - Step 21852: {'lr': 0.0003170408179100981, 'samples': 11188736, 'steps': 21852, 'loss/train': 1.514777421951294} +02/25/2022 05:53:03 - INFO - codeparrot_training - Step 21853: {'lr': 0.0003170250546048321, 'samples': 11189248, 'steps': 21853, 'loss/train': 1.7585700750350952} +02/25/2022 05:53:06 - INFO - codeparrot_training - Step 21854: {'lr': 0.000317009291012452, 'samples': 11189760, 'steps': 21854, 'loss/train': 2.0186338424682617} +02/25/2022 05:53:12 - INFO - codeparrot_training - Step 21855: {'lr': 0.00031699352713302544, 'samples': 11190272, 'steps': 21855, 'loss/train': 2.488243341445923} +02/25/2022 05:53:15 - INFO - codeparrot_training - Step 21856: {'lr': 0.00031697776296661987, 'samples': 11190784, 'steps': 21856, 'loss/train': 1.6587148904800415} +02/25/2022 05:53:21 - INFO - codeparrot_training - Step 21857: {'lr': 0.0003169619985133028, 'samples': 11191296, 'steps': 21857, 'loss/train': 1.4487026929855347} +02/25/2022 05:53:24 - INFO - codeparrot_training - Step 21858: {'lr': 0.0003169462337731418, 'samples': 11191808, 'steps': 21858, 'loss/train': 1.4821003675460815} +02/25/2022 05:53:32 - INFO - codeparrot_training - Step 21859: {'lr': 0.0003169304687462044, 'samples': 11192320, 'steps': 21859, 'loss/train': 1.9078369140625} +02/25/2022 05:53:35 - INFO - codeparrot_training - Step 21860: {'lr': 0.00031691470343255814, 'samples': 11192832, 'steps': 21860, 'loss/train': 2.3228437900543213} +02/25/2022 05:53:41 - INFO - codeparrot_training - Step 21861: {'lr': 0.00031689893783227053, 'samples': 11193344, 'steps': 21861, 'loss/train': 1.3735699653625488} +02/25/2022 05:53:44 - INFO - codeparrot_training - Step 21862: {'lr': 0.00031688317194540904, 'samples': 11193856, 'steps': 21862, 'loss/train': 3.6704025268554688} +02/25/2022 05:53:50 - INFO - codeparrot_training - Step 21863: {'lr': 0.0003168674057720413, 'samples': 11194368, 'steps': 21863, 'loss/train': 1.6292989253997803} +02/25/2022 05:53:53 - INFO - codeparrot_training - Step 21864: {'lr': 0.0003168516393122349, 'samples': 11194880, 'steps': 21864, 'loss/train': 0.9050735831260681} +02/25/2022 05:53:59 - INFO - codeparrot_training - Step 21865: {'lr': 0.0003168358725660573, 'samples': 11195392, 'steps': 21865, 'loss/train': 2.204618215560913} +02/25/2022 05:54:02 - INFO - codeparrot_training - Step 21866: {'lr': 0.000316820105533576, 'samples': 11195904, 'steps': 21866, 'loss/train': 0.9024485349655151} +02/25/2022 05:54:08 - INFO - codeparrot_training - Step 21867: {'lr': 0.0003168043382148586, 'samples': 11196416, 'steps': 21867, 'loss/train': 2.4066126346588135} +02/25/2022 05:54:11 - INFO - codeparrot_training - Step 21868: {'lr': 0.0003167885706099726, 'samples': 11196928, 'steps': 21868, 'loss/train': 1.3682433366775513} +02/25/2022 05:54:17 - INFO - codeparrot_training - Step 21869: {'lr': 0.0003167728027189856, 'samples': 11197440, 'steps': 21869, 'loss/train': 2.008517026901245} +02/25/2022 05:54:20 - INFO - codeparrot_training - Step 21870: {'lr': 0.00031675703454196513, 'samples': 11197952, 'steps': 21870, 'loss/train': 1.7025039196014404} +02/25/2022 05:54:27 - INFO - codeparrot_training - Step 21871: {'lr': 0.00031674126607897867, 'samples': 11198464, 'steps': 21871, 'loss/train': 2.643584728240967} +02/25/2022 05:54:31 - INFO - codeparrot_training - Step 21872: {'lr': 0.00031672549733009395, 'samples': 11198976, 'steps': 21872, 'loss/train': 1.5366795063018799} +02/25/2022 05:54:36 - INFO - codeparrot_training - Step 21873: {'lr': 0.00031670972829537825, 'samples': 11199488, 'steps': 21873, 'loss/train': 2.6930928230285645} +02/25/2022 05:54:40 - INFO - codeparrot_training - Step 21874: {'lr': 0.0003166939589748993, 'samples': 11200000, 'steps': 21874, 'loss/train': 2.4976396560668945} +02/25/2022 05:54:45 - INFO - codeparrot_training - Step 21875: {'lr': 0.0003166781893687246, 'samples': 11200512, 'steps': 21875, 'loss/train': 1.1043554544448853} +02/25/2022 05:54:49 - INFO - codeparrot_training - Step 21876: {'lr': 0.00031666241947692173, 'samples': 11201024, 'steps': 21876, 'loss/train': 1.8815280199050903} +02/25/2022 05:54:54 - INFO - codeparrot_training - Step 21877: {'lr': 0.0003166466492995582, 'samples': 11201536, 'steps': 21877, 'loss/train': 2.5244462490081787} +02/25/2022 05:54:58 - INFO - codeparrot_training - Step 21878: {'lr': 0.0003166308788367016, 'samples': 11202048, 'steps': 21878, 'loss/train': 1.9727551937103271} +02/25/2022 05:55:03 - INFO - codeparrot_training - Step 21879: {'lr': 0.00031661510808841947, 'samples': 11202560, 'steps': 21879, 'loss/train': 1.600012183189392} +02/25/2022 05:55:07 - INFO - codeparrot_training - Step 21880: {'lr': 0.0003165993370547794, 'samples': 11203072, 'steps': 21880, 'loss/train': 1.368313193321228} +02/25/2022 05:55:13 - INFO - codeparrot_training - Step 21881: {'lr': 0.0003165835657358489, 'samples': 11203584, 'steps': 21881, 'loss/train': 1.874405860900879} +02/25/2022 05:55:17 - INFO - codeparrot_training - Step 21882: {'lr': 0.00031656779413169543, 'samples': 11204096, 'steps': 21882, 'loss/train': 2.786487102508545} +02/25/2022 05:55:22 - INFO - codeparrot_training - Step 21883: {'lr': 0.00031655202224238686, 'samples': 11204608, 'steps': 21883, 'loss/train': 2.423970937728882} +02/25/2022 05:55:26 - INFO - codeparrot_training - Step 21884: {'lr': 0.0003165362500679905, 'samples': 11205120, 'steps': 21884, 'loss/train': 0.9125109910964966} +02/25/2022 05:55:31 - INFO - codeparrot_training - Step 21885: {'lr': 0.00031652047760857393, 'samples': 11205632, 'steps': 21885, 'loss/train': 1.6199257373809814} +02/25/2022 05:55:35 - INFO - codeparrot_training - Step 21886: {'lr': 0.0003165047048642047, 'samples': 11206144, 'steps': 21886, 'loss/train': 2.3350679874420166} +02/25/2022 05:55:40 - INFO - codeparrot_training - Step 21887: {'lr': 0.00031648893183495053, 'samples': 11206656, 'steps': 21887, 'loss/train': 1.8431631326675415} +02/25/2022 05:55:44 - INFO - codeparrot_training - Step 21888: {'lr': 0.0003164731585208789, 'samples': 11207168, 'steps': 21888, 'loss/train': 1.8499499559402466} +02/25/2022 05:55:49 - INFO - codeparrot_training - Step 21889: {'lr': 0.00031645738492205736, 'samples': 11207680, 'steps': 21889, 'loss/train': 1.6582543849945068} +02/25/2022 05:55:53 - INFO - codeparrot_training - Step 21890: {'lr': 0.0003164416110385534, 'samples': 11208192, 'steps': 21890, 'loss/train': 2.1935536861419678} +02/25/2022 05:55:59 - INFO - codeparrot_training - Step 21891: {'lr': 0.0003164258368704347, 'samples': 11208704, 'steps': 21891, 'loss/train': 5.17179012298584} +02/25/2022 05:56:03 - INFO - codeparrot_training - Step 21892: {'lr': 0.00031641006241776886, 'samples': 11209216, 'steps': 21892, 'loss/train': 2.0094187259674072} +02/25/2022 05:56:08 - INFO - codeparrot_training - Step 21893: {'lr': 0.0003163942876806234, 'samples': 11209728, 'steps': 21893, 'loss/train': 1.6515058279037476} +02/25/2022 05:56:12 - INFO - codeparrot_training - Step 21894: {'lr': 0.00031637851265906594, 'samples': 11210240, 'steps': 21894, 'loss/train': 1.353257417678833} +02/25/2022 05:56:17 - INFO - codeparrot_training - Step 21895: {'lr': 0.0003163627373531639, 'samples': 11210752, 'steps': 21895, 'loss/train': 2.082242965698242} +02/25/2022 05:56:21 - INFO - codeparrot_training - Step 21896: {'lr': 0.000316346961762985, 'samples': 11211264, 'steps': 21896, 'loss/train': 1.565011978149414} +02/25/2022 05:56:26 - INFO - codeparrot_training - Step 21897: {'lr': 0.00031633118588859677, 'samples': 11211776, 'steps': 21897, 'loss/train': 2.1991031169891357} +02/25/2022 05:56:30 - INFO - codeparrot_training - Step 21898: {'lr': 0.00031631540973006683, 'samples': 11212288, 'steps': 21898, 'loss/train': 1.5421130657196045} +02/25/2022 05:56:35 - INFO - codeparrot_training - Step 21899: {'lr': 0.0003162996332874627, 'samples': 11212800, 'steps': 21899, 'loss/train': 1.5429047346115112} +02/25/2022 05:56:39 - INFO - codeparrot_training - Step 21900: {'lr': 0.000316283856560852, 'samples': 11213312, 'steps': 21900, 'loss/train': 1.492174744606018} +02/25/2022 05:56:45 - INFO - codeparrot_training - Step 21901: {'lr': 0.00031626807955030236, 'samples': 11213824, 'steps': 21901, 'loss/train': 0.4489256739616394} +02/25/2022 05:56:48 - INFO - codeparrot_training - Step 21902: {'lr': 0.00031625230225588123, 'samples': 11214336, 'steps': 21902, 'loss/train': 1.5887861251831055} +02/25/2022 05:56:54 - INFO - codeparrot_training - Step 21903: {'lr': 0.0003162365246776564, 'samples': 11214848, 'steps': 21903, 'loss/train': 2.179230213165283} +02/25/2022 05:56:57 - INFO - codeparrot_training - Step 21904: {'lr': 0.0003162207468156952, 'samples': 11215360, 'steps': 21904, 'loss/train': 2.1936845779418945} +02/25/2022 05:57:03 - INFO - codeparrot_training - Step 21905: {'lr': 0.00031620496867006543, 'samples': 11215872, 'steps': 21905, 'loss/train': 1.948491096496582} +02/25/2022 05:57:06 - INFO - codeparrot_training - Step 21906: {'lr': 0.0003161891902408345, 'samples': 11216384, 'steps': 21906, 'loss/train': 1.604262351989746} +02/25/2022 05:57:13 - INFO - codeparrot_training - Step 21907: {'lr': 0.00031617341152807024, 'samples': 11216896, 'steps': 21907, 'loss/train': 1.1284022331237793} +02/25/2022 05:57:16 - INFO - codeparrot_training - Step 21908: {'lr': 0.00031615763253183996, 'samples': 11217408, 'steps': 21908, 'loss/train': 1.889078140258789} +02/25/2022 05:57:22 - INFO - codeparrot_training - Step 21909: {'lr': 0.00031614185325221143, 'samples': 11217920, 'steps': 21909, 'loss/train': 2.71724271774292} +02/25/2022 05:57:25 - INFO - codeparrot_training - Step 21910: {'lr': 0.0003161260736892523, 'samples': 11218432, 'steps': 21910, 'loss/train': 2.512601613998413} +02/25/2022 05:57:31 - INFO - codeparrot_training - Step 21911: {'lr': 0.00031611029384302997, 'samples': 11218944, 'steps': 21911, 'loss/train': 1.430791974067688} +02/25/2022 05:57:34 - INFO - codeparrot_training - Step 21912: {'lr': 0.0003160945137136121, 'samples': 11219456, 'steps': 21912, 'loss/train': 1.1917078495025635} +02/25/2022 05:57:40 - INFO - codeparrot_training - Step 21913: {'lr': 0.0003160787333010664, 'samples': 11219968, 'steps': 21913, 'loss/train': 1.4198331832885742} +02/25/2022 05:57:43 - INFO - codeparrot_training - Step 21914: {'lr': 0.00031606295260546037, 'samples': 11220480, 'steps': 21914, 'loss/train': 1.0272213220596313} +02/25/2022 05:57:49 - INFO - codeparrot_training - Step 21915: {'lr': 0.00031604717162686156, 'samples': 11220992, 'steps': 21915, 'loss/train': 1.8996394872665405} +02/25/2022 05:57:52 - INFO - codeparrot_training - Step 21916: {'lr': 0.00031603139036533775, 'samples': 11221504, 'steps': 21916, 'loss/train': 1.981220006942749} +02/25/2022 05:57:58 - INFO - codeparrot_training - Step 21917: {'lr': 0.0003160156088209564, 'samples': 11222016, 'steps': 21917, 'loss/train': 1.6076395511627197} +02/25/2022 05:58:02 - INFO - codeparrot_training - Step 21918: {'lr': 0.0003159998269937851, 'samples': 11222528, 'steps': 21918, 'loss/train': 1.584609031677246} +02/25/2022 05:58:07 - INFO - codeparrot_training - Step 21919: {'lr': 0.0003159840448838915, 'samples': 11223040, 'steps': 21919, 'loss/train': 2.040466547012329} +02/25/2022 05:58:11 - INFO - codeparrot_training - Step 21920: {'lr': 0.00031596826249134324, 'samples': 11223552, 'steps': 21920, 'loss/train': 2.3362655639648438} +02/25/2022 05:58:17 - INFO - codeparrot_training - Step 21921: {'lr': 0.0003159524798162079, 'samples': 11224064, 'steps': 21921, 'loss/train': 1.411509394645691} +02/25/2022 05:58:20 - INFO - codeparrot_training - Step 21922: {'lr': 0.000315936696858553, 'samples': 11224576, 'steps': 21922, 'loss/train': 1.3273935317993164} +02/25/2022 05:58:26 - INFO - codeparrot_training - Step 21923: {'lr': 0.00031592091361844633, 'samples': 11225088, 'steps': 21923, 'loss/train': 1.3492838144302368} +02/25/2022 05:58:29 - INFO - codeparrot_training - Step 21924: {'lr': 0.0003159051300959553, 'samples': 11225600, 'steps': 21924, 'loss/train': 1.7265515327453613} +02/25/2022 05:58:35 - INFO - codeparrot_training - Step 21925: {'lr': 0.0003158893462911477, 'samples': 11226112, 'steps': 21925, 'loss/train': 1.8900066614151} +02/25/2022 05:58:38 - INFO - codeparrot_training - Step 21926: {'lr': 0.00031587356220409105, 'samples': 11226624, 'steps': 21926, 'loss/train': 1.0663362741470337} +02/25/2022 05:58:44 - INFO - codeparrot_training - Step 21927: {'lr': 0.0003158577778348529, 'samples': 11227136, 'steps': 21927, 'loss/train': 1.4324028491973877} +02/25/2022 05:58:48 - INFO - codeparrot_training - Step 21928: {'lr': 0.000315841993183501, 'samples': 11227648, 'steps': 21928, 'loss/train': 2.416149616241455} +02/25/2022 05:58:53 - INFO - codeparrot_training - Step 21929: {'lr': 0.000315826208250103, 'samples': 11228160, 'steps': 21929, 'loss/train': 1.052208662033081} +02/25/2022 05:58:57 - INFO - codeparrot_training - Step 21930: {'lr': 0.0003158104230347263, 'samples': 11228672, 'steps': 21930, 'loss/train': 2.507903575897217} +02/25/2022 05:59:02 - INFO - codeparrot_training - Step 21931: {'lr': 0.0003157946375374387, 'samples': 11229184, 'steps': 21931, 'loss/train': 1.5963064432144165} +02/25/2022 05:59:06 - INFO - codeparrot_training - Step 21932: {'lr': 0.0003157788517583077, 'samples': 11229696, 'steps': 21932, 'loss/train': 1.5115591287612915} +02/25/2022 05:59:11 - INFO - codeparrot_training - Step 21933: {'lr': 0.00031576306569740107, 'samples': 11230208, 'steps': 21933, 'loss/train': 1.7534328699111938} +02/25/2022 05:59:15 - INFO - codeparrot_training - Step 21934: {'lr': 0.00031574727935478633, 'samples': 11230720, 'steps': 21934, 'loss/train': 2.5700552463531494} +02/25/2022 05:59:20 - INFO - codeparrot_training - Step 21935: {'lr': 0.0003157314927305311, 'samples': 11231232, 'steps': 21935, 'loss/train': 2.3193955421447754} +02/25/2022 05:59:24 - INFO - codeparrot_training - Step 21936: {'lr': 0.00031571570582470304, 'samples': 11231744, 'steps': 21936, 'loss/train': 2.091813564300537} +02/25/2022 05:59:30 - INFO - codeparrot_training - Step 21937: {'lr': 0.00031569991863736975, 'samples': 11232256, 'steps': 21937, 'loss/train': 0.7968567609786987} +02/25/2022 05:59:34 - INFO - codeparrot_training - Step 21938: {'lr': 0.00031568413116859887, 'samples': 11232768, 'steps': 21938, 'loss/train': 2.351186990737915} +02/25/2022 05:59:39 - INFO - codeparrot_training - Step 21939: {'lr': 0.000315668343418458, 'samples': 11233280, 'steps': 21939, 'loss/train': 1.532464861869812} +02/25/2022 05:59:43 - INFO - codeparrot_training - Step 21940: {'lr': 0.0003156525553870149, 'samples': 11233792, 'steps': 21940, 'loss/train': 1.3910305500030518} +02/25/2022 05:59:46 - INFO - codeparrot_training - Step 21941: {'lr': 0.000315636767074337, 'samples': 11234304, 'steps': 21941, 'loss/train': 2.2487399578094482} +02/25/2022 05:59:52 - INFO - codeparrot_training - Step 21942: {'lr': 0.00031562097848049207, 'samples': 11234816, 'steps': 21942, 'loss/train': 1.386334776878357} +02/25/2022 05:59:55 - INFO - codeparrot_training - Step 21943: {'lr': 0.0003156051896055477, 'samples': 11235328, 'steps': 21943, 'loss/train': 1.9307063817977905} +02/25/2022 06:00:01 - INFO - codeparrot_training - Step 21944: {'lr': 0.0003155894004495716, 'samples': 11235840, 'steps': 21944, 'loss/train': 2.247981071472168} +02/25/2022 06:00:04 - INFO - codeparrot_training - Step 21945: {'lr': 0.0003155736110126312, 'samples': 11236352, 'steps': 21945, 'loss/train': 1.0795257091522217} +02/25/2022 06:00:10 - INFO - codeparrot_training - Step 21946: {'lr': 0.0003155578212947944, 'samples': 11236864, 'steps': 21946, 'loss/train': 0.4491962492465973} +02/25/2022 06:00:13 - INFO - codeparrot_training - Step 21947: {'lr': 0.0003155420312961286, 'samples': 11237376, 'steps': 21947, 'loss/train': 2.270949602127075} +02/25/2022 06:00:19 - INFO - codeparrot_training - Step 21948: {'lr': 0.00031552624101670155, 'samples': 11237888, 'steps': 21948, 'loss/train': 1.7403013706207275} +02/25/2022 06:00:23 - INFO - codeparrot_training - Step 21949: {'lr': 0.000315510450456581, 'samples': 11238400, 'steps': 21949, 'loss/train': 1.7849270105361938} +02/25/2022 06:00:28 - INFO - codeparrot_training - Step 21950: {'lr': 0.0003154946596158343, 'samples': 11238912, 'steps': 21950, 'loss/train': 2.225351572036743} +02/25/2022 06:00:32 - INFO - codeparrot_training - Step 21951: {'lr': 0.0003154788684945295, 'samples': 11239424, 'steps': 21951, 'loss/train': 1.7404541969299316} +02/25/2022 06:00:38 - INFO - codeparrot_training - Step 21952: {'lr': 0.00031546307709273393, 'samples': 11239936, 'steps': 21952, 'loss/train': 1.300930142402649} +02/25/2022 06:00:41 - INFO - codeparrot_training - Step 21953: {'lr': 0.0003154472854105153, 'samples': 11240448, 'steps': 21953, 'loss/train': 0.9668514132499695} +02/25/2022 06:00:47 - INFO - codeparrot_training - Step 21954: {'lr': 0.0003154314934479413, 'samples': 11240960, 'steps': 21954, 'loss/train': 3.051269292831421} +02/25/2022 06:00:50 - INFO - codeparrot_training - Step 21955: {'lr': 0.00031541570120507956, 'samples': 11241472, 'steps': 21955, 'loss/train': 1.9547733068466187} +02/25/2022 06:00:56 - INFO - codeparrot_training - Step 21956: {'lr': 0.0003153999086819977, 'samples': 11241984, 'steps': 21956, 'loss/train': 1.877734899520874} +02/25/2022 06:00:59 - INFO - codeparrot_training - Step 21957: {'lr': 0.00031538411587876344, 'samples': 11242496, 'steps': 21957, 'loss/train': 1.4035985469818115} +02/25/2022 06:01:05 - INFO - codeparrot_training - Step 21958: {'lr': 0.0003153683227954443, 'samples': 11243008, 'steps': 21958, 'loss/train': 1.8795199394226074} +02/25/2022 06:01:08 - INFO - codeparrot_training - Step 21959: {'lr': 0.00031535252943210815, 'samples': 11243520, 'steps': 21959, 'loss/train': 1.7047089338302612} +02/25/2022 06:01:14 - INFO - codeparrot_training - Step 21960: {'lr': 0.0003153367357888224, 'samples': 11244032, 'steps': 21960, 'loss/train': 2.137213945388794} +02/25/2022 06:01:17 - INFO - codeparrot_training - Step 21961: {'lr': 0.00031532094186565487, 'samples': 11244544, 'steps': 21961, 'loss/train': 1.8743616342544556} +02/25/2022 06:01:23 - INFO - codeparrot_training - Step 21962: {'lr': 0.0003153051476626732, 'samples': 11245056, 'steps': 21962, 'loss/train': 1.7010562419891357} +02/25/2022 06:01:26 - INFO - codeparrot_training - Step 21963: {'lr': 0.00031528935317994493, 'samples': 11245568, 'steps': 21963, 'loss/train': 1.934483289718628} +02/25/2022 06:01:33 - INFO - codeparrot_training - Step 21964: {'lr': 0.00031527355841753793, 'samples': 11246080, 'steps': 21964, 'loss/train': 2.77974271774292} +02/25/2022 06:01:36 - INFO - codeparrot_training - Step 21965: {'lr': 0.0003152577633755196, 'samples': 11246592, 'steps': 21965, 'loss/train': 2.4961276054382324} +02/25/2022 06:01:42 - INFO - codeparrot_training - Step 21966: {'lr': 0.0003152419680539578, 'samples': 11247104, 'steps': 21966, 'loss/train': 2.3780453205108643} +02/25/2022 06:01:45 - INFO - codeparrot_training - Step 21967: {'lr': 0.0003152261724529201, 'samples': 11247616, 'steps': 21967, 'loss/train': 2.140078544616699} +02/25/2022 06:01:51 - INFO - codeparrot_training - Step 21968: {'lr': 0.0003152103765724743, 'samples': 11248128, 'steps': 21968, 'loss/train': 1.5463610887527466} +02/25/2022 06:01:54 - INFO - codeparrot_training - Step 21969: {'lr': 0.0003151945804126878, 'samples': 11248640, 'steps': 21969, 'loss/train': 1.4795680046081543} +02/25/2022 06:02:00 - INFO - codeparrot_training - Step 21970: {'lr': 0.00031517878397362847, 'samples': 11249152, 'steps': 21970, 'loss/train': 1.890347957611084} +02/25/2022 06:02:03 - INFO - codeparrot_training - Step 21971: {'lr': 0.00031516298725536396, 'samples': 11249664, 'steps': 21971, 'loss/train': 1.8169739246368408} +02/25/2022 06:02:09 - INFO - codeparrot_training - Step 21972: {'lr': 0.00031514719025796183, 'samples': 11250176, 'steps': 21972, 'loss/train': 2.1855034828186035} +02/25/2022 06:02:15 - INFO - codeparrot_training - Step 21973: {'lr': 0.0003151313929814899, 'samples': 11250688, 'steps': 21973, 'loss/train': 1.1833609342575073} +02/25/2022 06:02:18 - INFO - codeparrot_training - Step 21974: {'lr': 0.0003151155954260157, 'samples': 11251200, 'steps': 21974, 'loss/train': 2.4833056926727295} +02/25/2022 06:02:24 - INFO - codeparrot_training - Step 21975: {'lr': 0.00031509979759160707, 'samples': 11251712, 'steps': 21975, 'loss/train': 0.6434096097946167} +02/25/2022 06:02:27 - INFO - codeparrot_training - Step 21976: {'lr': 0.00031508399947833155, 'samples': 11252224, 'steps': 21976, 'loss/train': 2.0098721981048584} +02/25/2022 06:02:33 - INFO - codeparrot_training - Step 21977: {'lr': 0.0003150682010862568, 'samples': 11252736, 'steps': 21977, 'loss/train': 1.5231654644012451} +02/25/2022 06:02:37 - INFO - codeparrot_training - Step 21978: {'lr': 0.00031505240241545054, 'samples': 11253248, 'steps': 21978, 'loss/train': 2.677313804626465} +02/25/2022 06:02:40 - INFO - codeparrot_training - Step 21979: {'lr': 0.0003150366034659804, 'samples': 11253760, 'steps': 21979, 'loss/train': 1.5920900106430054} +02/25/2022 06:02:46 - INFO - codeparrot_training - Step 21980: {'lr': 0.00031502080423791417, 'samples': 11254272, 'steps': 21980, 'loss/train': 2.176215887069702} +02/25/2022 06:02:49 - INFO - codeparrot_training - Step 21981: {'lr': 0.00031500500473131943, 'samples': 11254784, 'steps': 21981, 'loss/train': 2.0931975841522217} +02/25/2022 06:02:55 - INFO - codeparrot_training - Step 21982: {'lr': 0.00031498920494626396, 'samples': 11255296, 'steps': 21982, 'loss/train': 1.997752070426941} +02/25/2022 06:02:58 - INFO - codeparrot_training - Step 21983: {'lr': 0.00031497340488281534, 'samples': 11255808, 'steps': 21983, 'loss/train': 1.963989496231079} +02/25/2022 06:03:04 - INFO - codeparrot_training - Step 21984: {'lr': 0.0003149576045410412, 'samples': 11256320, 'steps': 21984, 'loss/train': 0.0632946640253067} +02/25/2022 06:03:08 - INFO - codeparrot_training - Step 21985: {'lr': 0.00031494180392100936, 'samples': 11256832, 'steps': 21985, 'loss/train': 1.512750267982483} +02/25/2022 06:03:13 - INFO - codeparrot_training - Step 21986: {'lr': 0.00031492600302278746, 'samples': 11257344, 'steps': 21986, 'loss/train': 1.5110458135604858} +02/25/2022 06:03:17 - INFO - codeparrot_training - Step 21987: {'lr': 0.00031491020184644316, 'samples': 11257856, 'steps': 21987, 'loss/train': 2.3577587604522705} +02/25/2022 06:03:22 - INFO - codeparrot_training - Step 21988: {'lr': 0.0003148944003920442, 'samples': 11258368, 'steps': 21988, 'loss/train': 1.7468795776367188} +02/25/2022 06:03:26 - INFO - codeparrot_training - Step 21989: {'lr': 0.00031487859865965824, 'samples': 11258880, 'steps': 21989, 'loss/train': 0.12696422636508942} +02/25/2022 06:03:32 - INFO - codeparrot_training - Step 21990: {'lr': 0.000314862796649353, 'samples': 11259392, 'steps': 21990, 'loss/train': 1.486364483833313} +02/25/2022 06:03:35 - INFO - codeparrot_training - Step 21991: {'lr': 0.000314846994361196, 'samples': 11259904, 'steps': 21991, 'loss/train': 2.426990032196045} +02/25/2022 06:03:41 - INFO - codeparrot_training - Step 21992: {'lr': 0.0003148311917952552, 'samples': 11260416, 'steps': 21992, 'loss/train': 1.7946034669876099} +02/25/2022 06:03:44 - INFO - codeparrot_training - Step 21993: {'lr': 0.00031481538895159806, 'samples': 11260928, 'steps': 21993, 'loss/train': 0.2279038429260254} +02/25/2022 06:03:50 - INFO - codeparrot_training - Step 21994: {'lr': 0.00031479958583029247, 'samples': 11261440, 'steps': 21994, 'loss/train': 2.484208345413208} +02/25/2022 06:03:53 - INFO - codeparrot_training - Step 21995: {'lr': 0.0003147837824314059, 'samples': 11261952, 'steps': 21995, 'loss/train': 2.4712982177734375} +02/25/2022 06:03:59 - INFO - codeparrot_training - Step 21996: {'lr': 0.00031476797875500627, 'samples': 11262464, 'steps': 21996, 'loss/train': 6.382114887237549} +02/25/2022 06:04:03 - INFO - codeparrot_training - Step 21997: {'lr': 0.00031475217480116124, 'samples': 11262976, 'steps': 21997, 'loss/train': 3.1666152477264404} +02/25/2022 06:04:08 - INFO - codeparrot_training - Step 21998: {'lr': 0.00031473637056993837, 'samples': 11263488, 'steps': 21998, 'loss/train': 2.339315176010132} +02/25/2022 06:04:12 - INFO - codeparrot_training - Step 21999: {'lr': 0.0003147205660614055, 'samples': 11264000, 'steps': 21999, 'loss/train': 2.7713630199432373} +02/25/2022 06:04:12 - INFO - codeparrot_training - Evaluating and saving model checkpoint