diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -19415,3 +19415,1009 @@ Use FP16 precision: False 02/25/2022 02:10:22 - INFO - codeparrot_training - Step 18998: {'lr': 0.0003606015216991877, 'samples': 9727488, 'steps': 18998, 'loss/train': 2.8631699085235596} 02/25/2022 02:10:25 - INFO - codeparrot_training - Step 18999: {'lr': 0.0003605868473638285, 'samples': 9728000, 'steps': 18999, 'loss/train': 1.1513526439666748} 02/25/2022 02:10:25 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/25/2022 02:10:43 - WARNING - huggingface_hub.repository - Several commits (19) will be pushed upstream. +02/25/2022 02:10:43 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/25/2022 02:11:16 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + 4d2b52a..e9773d2 floral-grass-11 -> floral-grass-11 + +02/25/2022 02:11:22 - INFO - codeparrot_training - Step 19000: {'lr': 0.00036057217255475036, 'samples': 9728512, 'steps': 19000, 'loss/train': 1.9880913496017456} +02/25/2022 02:11:26 - INFO - codeparrot_training - Step 19001: {'lr': 0.0003605574972720161, 'samples': 9729024, 'steps': 19001, 'loss/train': 2.0283875465393066} +02/25/2022 02:11:31 - INFO - codeparrot_training - Step 19002: {'lr': 0.00036054282151568864, 'samples': 9729536, 'steps': 19002, 'loss/train': 1.7518035173416138} +02/25/2022 02:11:35 - INFO - codeparrot_training - Step 19003: {'lr': 0.0003605281452858308, 'samples': 9730048, 'steps': 19003, 'loss/train': 2.9040071964263916} +02/25/2022 02:11:42 - INFO - codeparrot_training - Step 19004: {'lr': 0.00036051346858250556, 'samples': 9730560, 'steps': 19004, 'loss/train': 1.6045945882797241} +02/25/2022 02:11:45 - INFO - codeparrot_training - Step 19005: {'lr': 0.00036049879140577566, 'samples': 9731072, 'steps': 19005, 'loss/train': 1.8744827508926392} +02/25/2022 02:11:51 - INFO - codeparrot_training - Step 19006: {'lr': 0.000360484113755704, 'samples': 9731584, 'steps': 19006, 'loss/train': 0.5272473692893982} +02/25/2022 02:11:54 - INFO - codeparrot_training - Step 19007: {'lr': 0.0003604694356323536, 'samples': 9732096, 'steps': 19007, 'loss/train': 2.0560686588287354} +02/25/2022 02:12:00 - INFO - codeparrot_training - Step 19008: {'lr': 0.00036045475703578705, 'samples': 9732608, 'steps': 19008, 'loss/train': 1.4259134531021118} +02/25/2022 02:12:03 - INFO - codeparrot_training - Step 19009: {'lr': 0.0003604400779660674, 'samples': 9733120, 'steps': 19009, 'loss/train': 1.5217783451080322} +02/25/2022 02:12:09 - INFO - codeparrot_training - Step 19010: {'lr': 0.00036042539842325765, 'samples': 9733632, 'steps': 19010, 'loss/train': 1.9425674676895142} +02/25/2022 02:12:12 - INFO - codeparrot_training - Step 19011: {'lr': 0.0003604107184074205, 'samples': 9734144, 'steps': 19011, 'loss/train': 2.312227249145508} +02/25/2022 02:12:18 - INFO - codeparrot_training - Step 19012: {'lr': 0.0003603960379186189, 'samples': 9734656, 'steps': 19012, 'loss/train': 2.4640073776245117} +02/25/2022 02:12:21 - INFO - codeparrot_training - Step 19013: {'lr': 0.0003603813569569157, 'samples': 9735168, 'steps': 19013, 'loss/train': 1.3639686107635498} +02/25/2022 02:12:27 - INFO - codeparrot_training - Step 19014: {'lr': 0.0003603666755223739, 'samples': 9735680, 'steps': 19014, 'loss/train': 3.058096408843994} +02/25/2022 02:12:30 - INFO - codeparrot_training - Step 19015: {'lr': 0.0003603519936150562, 'samples': 9736192, 'steps': 19015, 'loss/train': 1.3021838665008545} +02/25/2022 02:12:36 - INFO - codeparrot_training - Step 19016: {'lr': 0.00036033731123502567, 'samples': 9736704, 'steps': 19016, 'loss/train': 2.157147169113159} +02/25/2022 02:12:39 - INFO - codeparrot_training - Step 19017: {'lr': 0.00036032262838234507, 'samples': 9737216, 'steps': 19017, 'loss/train': 2.8765664100646973} +02/25/2022 02:12:46 - INFO - codeparrot_training - Step 19018: {'lr': 0.0003603079450570775, 'samples': 9737728, 'steps': 19018, 'loss/train': 2.0201380252838135} +02/25/2022 02:12:50 - INFO - codeparrot_training - Step 19019: {'lr': 0.00036029326125928556, 'samples': 9738240, 'steps': 19019, 'loss/train': 2.51269268989563} +02/25/2022 02:12:55 - INFO - codeparrot_training - Step 19020: {'lr': 0.00036027857698903235, 'samples': 9738752, 'steps': 19020, 'loss/train': 1.681691288948059} +02/25/2022 02:12:59 - INFO - codeparrot_training - Step 19021: {'lr': 0.00036026389224638077, 'samples': 9739264, 'steps': 19021, 'loss/train': 3.8848133087158203} +02/25/2022 02:13:04 - INFO - codeparrot_training - Step 19022: {'lr': 0.00036024920703139375, 'samples': 9739776, 'steps': 19022, 'loss/train': 3.7390666007995605} +02/25/2022 02:13:08 - INFO - codeparrot_training - Step 19023: {'lr': 0.00036023452134413406, 'samples': 9740288, 'steps': 19023, 'loss/train': 3.0671582221984863} +02/25/2022 02:13:13 - INFO - codeparrot_training - Step 19024: {'lr': 0.00036021983518466465, 'samples': 9740800, 'steps': 19024, 'loss/train': 1.5447514057159424} +02/25/2022 02:13:17 - INFO - codeparrot_training - Step 19025: {'lr': 0.00036020514855304855, 'samples': 9741312, 'steps': 19025, 'loss/train': 1.8689544200897217} +02/25/2022 02:13:22 - INFO - codeparrot_training - Step 19026: {'lr': 0.00036019046144934855, 'samples': 9741824, 'steps': 19026, 'loss/train': 1.758233666419983} +02/25/2022 02:13:25 - INFO - codeparrot_training - Step 19027: {'lr': 0.0003601757738736275, 'samples': 9742336, 'steps': 19027, 'loss/train': 2.0444414615631104} +02/25/2022 02:13:32 - INFO - codeparrot_training - Step 19028: {'lr': 0.0003601610858259485, 'samples': 9742848, 'steps': 19028, 'loss/train': 0.7698665261268616} +02/25/2022 02:13:35 - INFO - codeparrot_training - Step 19029: {'lr': 0.0003601463973063745, 'samples': 9743360, 'steps': 19029, 'loss/train': 1.7188829183578491} +02/25/2022 02:13:41 - INFO - codeparrot_training - Step 19030: {'lr': 0.0003601317083149682, 'samples': 9743872, 'steps': 19030, 'loss/train': 1.8872079849243164} +02/25/2022 02:13:44 - INFO - codeparrot_training - Step 19031: {'lr': 0.0003601170188517926, 'samples': 9744384, 'steps': 19031, 'loss/train': 2.456594944000244} +02/25/2022 02:13:50 - INFO - codeparrot_training - Step 19032: {'lr': 0.0003601023289169107, 'samples': 9744896, 'steps': 19032, 'loss/train': 2.471778631210327} +02/25/2022 02:13:53 - INFO - codeparrot_training - Step 19033: {'lr': 0.00036008763851038534, 'samples': 9745408, 'steps': 19033, 'loss/train': 1.528847575187683} +02/25/2022 02:13:59 - INFO - codeparrot_training - Step 19034: {'lr': 0.0003600729476322796, 'samples': 9745920, 'steps': 19034, 'loss/train': 0.5691848397254944} +02/25/2022 02:14:02 - INFO - codeparrot_training - Step 19035: {'lr': 0.0003600582562826562, 'samples': 9746432, 'steps': 19035, 'loss/train': 1.7924342155456543} +02/25/2022 02:14:08 - INFO - codeparrot_training - Step 19036: {'lr': 0.0003600435644615783, 'samples': 9746944, 'steps': 19036, 'loss/train': 1.8466625213623047} +02/25/2022 02:14:11 - INFO - codeparrot_training - Step 19037: {'lr': 0.0003600288721691085, 'samples': 9747456, 'steps': 19037, 'loss/train': 2.1249301433563232} +02/25/2022 02:14:17 - INFO - codeparrot_training - Step 19038: {'lr': 0.0003600141794053102, 'samples': 9747968, 'steps': 19038, 'loss/train': 0.725248396396637} +02/25/2022 02:14:20 - INFO - codeparrot_training - Step 19039: {'lr': 0.00035999948617024594, 'samples': 9748480, 'steps': 19039, 'loss/train': 0.7762969136238098} +02/25/2022 02:14:27 - INFO - codeparrot_training - Step 19040: {'lr': 0.0003599847924639788, 'samples': 9748992, 'steps': 19040, 'loss/train': 1.429334044456482} +02/25/2022 02:14:30 - INFO - codeparrot_training - Step 19041: {'lr': 0.0003599700982865718, 'samples': 9749504, 'steps': 19041, 'loss/train': 1.6418672800064087} +02/25/2022 02:14:36 - INFO - codeparrot_training - Step 19042: {'lr': 0.00035995540363808776, 'samples': 9750016, 'steps': 19042, 'loss/train': 2.384359359741211} +02/25/2022 02:14:39 - INFO - codeparrot_training - Step 19043: {'lr': 0.0003599407085185897, 'samples': 9750528, 'steps': 19043, 'loss/train': 1.5225509405136108} +02/25/2022 02:14:45 - INFO - codeparrot_training - Step 19044: {'lr': 0.00035992601292814065, 'samples': 9751040, 'steps': 19044, 'loss/train': 1.8368422985076904} +02/25/2022 02:14:48 - INFO - codeparrot_training - Step 19045: {'lr': 0.00035991131686680333, 'samples': 9751552, 'steps': 19045, 'loss/train': 1.9882451295852661} +02/25/2022 02:14:54 - INFO - codeparrot_training - Step 19046: {'lr': 0.00035989662033464087, 'samples': 9752064, 'steps': 19046, 'loss/train': 1.8748501539230347} +02/25/2022 02:14:57 - INFO - codeparrot_training - Step 19047: {'lr': 0.0003598819233317162, 'samples': 9752576, 'steps': 19047, 'loss/train': 0.7991703748703003} +02/25/2022 02:15:03 - INFO - codeparrot_training - Step 19048: {'lr': 0.0003598672258580923, 'samples': 9753088, 'steps': 19048, 'loss/train': 2.144665479660034} +02/25/2022 02:15:06 - INFO - codeparrot_training - Step 19049: {'lr': 0.00035985252791383203, 'samples': 9753600, 'steps': 19049, 'loss/train': 1.8054879903793335} +02/25/2022 02:15:12 - INFO - codeparrot_training - Step 19050: {'lr': 0.0003598378294989984, 'samples': 9754112, 'steps': 19050, 'loss/train': 1.294025182723999} +02/25/2022 02:15:16 - INFO - codeparrot_training - Step 19051: {'lr': 0.00035982313061365444, 'samples': 9754624, 'steps': 19051, 'loss/train': 2.0888357162475586} +02/25/2022 02:15:21 - INFO - codeparrot_training - Step 19052: {'lr': 0.00035980843125786306, 'samples': 9755136, 'steps': 19052, 'loss/train': 1.6288892030715942} +02/25/2022 02:15:25 - INFO - codeparrot_training - Step 19053: {'lr': 0.00035979373143168726, 'samples': 9755648, 'steps': 19053, 'loss/train': 2.3704967498779297} +02/25/2022 02:15:30 - INFO - codeparrot_training - Step 19054: {'lr': 0.0003597790311351898, 'samples': 9756160, 'steps': 19054, 'loss/train': 1.7540143728256226} +02/25/2022 02:15:34 - INFO - codeparrot_training - Step 19055: {'lr': 0.00035976433036843405, 'samples': 9756672, 'steps': 19055, 'loss/train': 1.3987394571304321} +02/25/2022 02:15:39 - INFO - codeparrot_training - Step 19056: {'lr': 0.00035974962913148267, 'samples': 9757184, 'steps': 19056, 'loss/train': 1.8322112560272217} +02/25/2022 02:15:45 - INFO - codeparrot_training - Step 19057: {'lr': 0.0003597349274243987, 'samples': 9757696, 'steps': 19057, 'loss/train': 1.9115732908248901} +02/25/2022 02:15:48 - INFO - codeparrot_training - Step 19058: {'lr': 0.0003597202252472452, 'samples': 9758208, 'steps': 19058, 'loss/train': 2.3142635822296143} +02/25/2022 02:15:54 - INFO - codeparrot_training - Step 19059: {'lr': 0.00035970552260008505, 'samples': 9758720, 'steps': 19059, 'loss/train': 1.919982671737671} +02/25/2022 02:15:57 - INFO - codeparrot_training - Step 19060: {'lr': 0.00035969081948298133, 'samples': 9759232, 'steps': 19060, 'loss/train': 2.7326436042785645} +02/25/2022 02:16:03 - INFO - codeparrot_training - Step 19061: {'lr': 0.0003596761158959969, 'samples': 9759744, 'steps': 19061, 'loss/train': 1.749724268913269} +02/25/2022 02:16:06 - INFO - codeparrot_training - Step 19062: {'lr': 0.00035966141183919485, 'samples': 9760256, 'steps': 19062, 'loss/train': 2.1644325256347656} +02/25/2022 02:16:12 - INFO - codeparrot_training - Step 19063: {'lr': 0.0003596467073126382, 'samples': 9760768, 'steps': 19063, 'loss/train': 2.2269372940063477} +02/25/2022 02:16:15 - INFO - codeparrot_training - Step 19064: {'lr': 0.0003596320023163898, 'samples': 9761280, 'steps': 19064, 'loss/train': 2.4966979026794434} +02/25/2022 02:16:22 - INFO - codeparrot_training - Step 19065: {'lr': 0.0003596172968505127, 'samples': 9761792, 'steps': 19065, 'loss/train': 1.328292965888977} +02/25/2022 02:16:25 - INFO - codeparrot_training - Step 19066: {'lr': 0.0003596025909150699, 'samples': 9762304, 'steps': 19066, 'loss/train': 1.5774143934249878} +02/25/2022 02:16:31 - INFO - codeparrot_training - Step 19067: {'lr': 0.00035958788451012446, 'samples': 9762816, 'steps': 19067, 'loss/train': 1.2293187379837036} +02/25/2022 02:16:34 - INFO - codeparrot_training - Step 19068: {'lr': 0.0003595731776357393, 'samples': 9763328, 'steps': 19068, 'loss/train': 0.6156061291694641} +02/25/2022 02:16:40 - INFO - codeparrot_training - Step 19069: {'lr': 0.00035955847029197746, 'samples': 9763840, 'steps': 19069, 'loss/train': 0.13419534265995026} +02/25/2022 02:16:43 - INFO - codeparrot_training - Step 19070: {'lr': 0.0003595437624789019, 'samples': 9764352, 'steps': 19070, 'loss/train': 1.035567283630371} +02/25/2022 02:16:49 - INFO - codeparrot_training - Step 19071: {'lr': 0.00035952905419657565, 'samples': 9764864, 'steps': 19071, 'loss/train': 1.6488547325134277} +02/25/2022 02:16:52 - INFO - codeparrot_training - Step 19072: {'lr': 0.0003595143454450617, 'samples': 9765376, 'steps': 19072, 'loss/train': 2.506180763244629} +02/25/2022 02:16:58 - INFO - codeparrot_training - Step 19073: {'lr': 0.0003594996362244231, 'samples': 9765888, 'steps': 19073, 'loss/train': 2.0034542083740234} +02/25/2022 02:17:01 - INFO - codeparrot_training - Step 19074: {'lr': 0.0003594849265347228, 'samples': 9766400, 'steps': 19074, 'loss/train': 1.6224825382232666} +02/25/2022 02:17:08 - INFO - codeparrot_training - Step 19075: {'lr': 0.00035947021637602384, 'samples': 9766912, 'steps': 19075, 'loss/train': 1.6428803205490112} +02/25/2022 02:17:11 - INFO - codeparrot_training - Step 19076: {'lr': 0.0003594555057483892, 'samples': 9767424, 'steps': 19076, 'loss/train': 1.0455342531204224} +02/25/2022 02:17:17 - INFO - codeparrot_training - Step 19077: {'lr': 0.00035944079465188194, 'samples': 9767936, 'steps': 19077, 'loss/train': 2.4600830078125} +02/25/2022 02:17:20 - INFO - codeparrot_training - Step 19078: {'lr': 0.0003594260830865651, 'samples': 9768448, 'steps': 19078, 'loss/train': 2.7476115226745605} +02/25/2022 02:17:26 - INFO - codeparrot_training - Step 19079: {'lr': 0.00035941137105250173, 'samples': 9768960, 'steps': 19079, 'loss/train': 2.278362989425659} +02/25/2022 02:17:29 - INFO - codeparrot_training - Step 19080: {'lr': 0.00035939665854975466, 'samples': 9769472, 'steps': 19080, 'loss/train': 1.9511642456054688} +02/25/2022 02:17:35 - INFO - codeparrot_training - Step 19081: {'lr': 0.0003593819455783871, 'samples': 9769984, 'steps': 19081, 'loss/train': 1.589357614517212} +02/25/2022 02:17:38 - INFO - codeparrot_training - Step 19082: {'lr': 0.000359367232138462, 'samples': 9770496, 'steps': 19082, 'loss/train': 1.426775336265564} +02/25/2022 02:17:44 - INFO - codeparrot_training - Step 19083: {'lr': 0.00035935251823004244, 'samples': 9771008, 'steps': 19083, 'loss/train': 1.2051615715026855} +02/25/2022 02:17:47 - INFO - codeparrot_training - Step 19084: {'lr': 0.00035933780385319136, 'samples': 9771520, 'steps': 19084, 'loss/train': 0.9950919151306152} +02/25/2022 02:17:54 - INFO - codeparrot_training - Step 19085: {'lr': 0.0003593230890079719, 'samples': 9772032, 'steps': 19085, 'loss/train': 1.2789618968963623} +02/25/2022 02:17:58 - INFO - codeparrot_training - Step 19086: {'lr': 0.0003593083736944471, 'samples': 9772544, 'steps': 19086, 'loss/train': 1.669321060180664} +02/25/2022 02:18:03 - INFO - codeparrot_training - Step 19087: {'lr': 0.00035929365791267974, 'samples': 9773056, 'steps': 19087, 'loss/train': 1.6883490085601807} +02/25/2022 02:18:07 - INFO - codeparrot_training - Step 19088: {'lr': 0.00035927894166273323, 'samples': 9773568, 'steps': 19088, 'loss/train': 0.8025223016738892} +02/25/2022 02:18:12 - INFO - codeparrot_training - Step 19089: {'lr': 0.00035926422494467035, 'samples': 9774080, 'steps': 19089, 'loss/train': 8.725171089172363} +02/25/2022 02:18:16 - INFO - codeparrot_training - Step 19090: {'lr': 0.0003592495077585543, 'samples': 9774592, 'steps': 19090, 'loss/train': 2.58119535446167} +02/25/2022 02:18:21 - INFO - codeparrot_training - Step 19091: {'lr': 0.000359234790104448, 'samples': 9775104, 'steps': 19091, 'loss/train': 2.629284381866455} +02/25/2022 02:18:25 - INFO - codeparrot_training - Step 19092: {'lr': 0.0003592200719824146, 'samples': 9775616, 'steps': 19092, 'loss/train': 2.485776424407959} +02/25/2022 02:18:30 - INFO - codeparrot_training - Step 19093: {'lr': 0.000359205353392517, 'samples': 9776128, 'steps': 19093, 'loss/train': 2.118177652359009} +02/25/2022 02:18:34 - INFO - codeparrot_training - Step 19094: {'lr': 0.00035919063433481835, 'samples': 9776640, 'steps': 19094, 'loss/train': 1.720985770225525} +02/25/2022 02:18:41 - INFO - codeparrot_training - Step 19095: {'lr': 0.0003591759148093818, 'samples': 9777152, 'steps': 19095, 'loss/train': 3.0862491130828857} +02/25/2022 02:18:44 - INFO - codeparrot_training - Step 19096: {'lr': 0.0003591611948162702, 'samples': 9777664, 'steps': 19096, 'loss/train': 1.9200283288955688} +02/25/2022 02:18:50 - INFO - codeparrot_training - Step 19097: {'lr': 0.0003591464743555467, 'samples': 9778176, 'steps': 19097, 'loss/train': 2.2101798057556152} +02/25/2022 02:18:53 - INFO - codeparrot_training - Step 19098: {'lr': 0.0003591317534272744, 'samples': 9778688, 'steps': 19098, 'loss/train': 1.1743124723434448} +02/25/2022 02:18:59 - INFO - codeparrot_training - Step 19099: {'lr': 0.0003591170320315164, 'samples': 9779200, 'steps': 19099, 'loss/train': 1.870015025138855} +02/25/2022 02:19:02 - INFO - codeparrot_training - Step 19100: {'lr': 0.0003591023101683355, 'samples': 9779712, 'steps': 19100, 'loss/train': 1.0984140634536743} +02/25/2022 02:19:08 - INFO - codeparrot_training - Step 19101: {'lr': 0.00035908758783779506, 'samples': 9780224, 'steps': 19101, 'loss/train': 5.222268104553223} +02/25/2022 02:19:11 - INFO - codeparrot_training - Step 19102: {'lr': 0.000359072865039958, 'samples': 9780736, 'steps': 19102, 'loss/train': 1.0986119508743286} +02/25/2022 02:19:17 - INFO - codeparrot_training - Step 19103: {'lr': 0.0003590581417748875, 'samples': 9781248, 'steps': 19103, 'loss/train': 1.209088921546936} +02/25/2022 02:19:20 - INFO - codeparrot_training - Step 19104: {'lr': 0.0003590434180426465, 'samples': 9781760, 'steps': 19104, 'loss/train': 0.7341287732124329} +02/25/2022 02:19:26 - INFO - codeparrot_training - Step 19105: {'lr': 0.00035902869384329803, 'samples': 9782272, 'steps': 19105, 'loss/train': 1.9990285634994507} +02/25/2022 02:19:29 - INFO - codeparrot_training - Step 19106: {'lr': 0.0003590139691769054, 'samples': 9782784, 'steps': 19106, 'loss/train': 2.1960625648498535} +02/25/2022 02:19:35 - INFO - codeparrot_training - Step 19107: {'lr': 0.00035899924404353146, 'samples': 9783296, 'steps': 19107, 'loss/train': 1.4845483303070068} +02/25/2022 02:19:38 - INFO - codeparrot_training - Step 19108: {'lr': 0.00035898451844323937, 'samples': 9783808, 'steps': 19108, 'loss/train': 1.3414267301559448} +02/25/2022 02:19:44 - INFO - codeparrot_training - Step 19109: {'lr': 0.0003589697923760923, 'samples': 9784320, 'steps': 19109, 'loss/train': 1.7662800550460815} +02/25/2022 02:19:47 - INFO - codeparrot_training - Step 19110: {'lr': 0.00035895506584215307, 'samples': 9784832, 'steps': 19110, 'loss/train': 1.7849555015563965} +02/25/2022 02:19:53 - INFO - codeparrot_training - Step 19111: {'lr': 0.0003589403388414851, 'samples': 9785344, 'steps': 19111, 'loss/train': 0.9868466258049011} +02/25/2022 02:19:57 - INFO - codeparrot_training - Step 19112: {'lr': 0.0003589256113741513, 'samples': 9785856, 'steps': 19112, 'loss/train': 1.3435007333755493} +02/25/2022 02:20:02 - INFO - codeparrot_training - Step 19113: {'lr': 0.00035891088344021464, 'samples': 9786368, 'steps': 19113, 'loss/train': 0.20858079195022583} +02/25/2022 02:20:06 - INFO - codeparrot_training - Step 19114: {'lr': 0.00035889615503973847, 'samples': 9786880, 'steps': 19114, 'loss/train': 4.169969081878662} +02/25/2022 02:20:11 - INFO - codeparrot_training - Step 19115: {'lr': 0.00035888142617278567, 'samples': 9787392, 'steps': 19115, 'loss/train': 1.5138440132141113} +02/25/2022 02:20:15 - INFO - codeparrot_training - Step 19116: {'lr': 0.00035886669683941945, 'samples': 9787904, 'steps': 19116, 'loss/train': 1.705866813659668} +02/25/2022 02:20:20 - INFO - codeparrot_training - Step 19117: {'lr': 0.00035885196703970286, 'samples': 9788416, 'steps': 19117, 'loss/train': 2.3572585582733154} +02/25/2022 02:20:24 - INFO - codeparrot_training - Step 19118: {'lr': 0.00035883723677369893, 'samples': 9788928, 'steps': 19118, 'loss/train': 1.2012253999710083} +02/25/2022 02:20:29 - INFO - codeparrot_training - Step 19119: {'lr': 0.000358822506041471, 'samples': 9789440, 'steps': 19119, 'loss/train': 0.9769202470779419} +02/25/2022 02:20:33 - INFO - codeparrot_training - Step 19120: {'lr': 0.0003588077748430819, 'samples': 9789952, 'steps': 19120, 'loss/train': 1.7477604150772095} +02/25/2022 02:20:39 - INFO - codeparrot_training - Step 19121: {'lr': 0.00035879304317859486, 'samples': 9790464, 'steps': 19121, 'loss/train': 2.4582486152648926} +02/25/2022 02:20:42 - INFO - codeparrot_training - Step 19122: {'lr': 0.00035877831104807296, 'samples': 9790976, 'steps': 19122, 'loss/train': 2.092266321182251} +02/25/2022 02:20:50 - INFO - codeparrot_training - Step 19123: {'lr': 0.00035876357845157934, 'samples': 9791488, 'steps': 19123, 'loss/train': 2.037261486053467} +02/25/2022 02:20:53 - INFO - codeparrot_training - Step 19124: {'lr': 0.00035874884538917705, 'samples': 9792000, 'steps': 19124, 'loss/train': 1.3121665716171265} +02/25/2022 02:20:59 - INFO - codeparrot_training - Step 19125: {'lr': 0.0003587341118609293, 'samples': 9792512, 'steps': 19125, 'loss/train': 1.3642024993896484} +02/25/2022 02:21:02 - INFO - codeparrot_training - Step 19126: {'lr': 0.00035871937786689914, 'samples': 9793024, 'steps': 19126, 'loss/train': 2.366123676300049} +02/25/2022 02:21:06 - INFO - codeparrot_training - Step 19127: {'lr': 0.00035870464340714966, 'samples': 9793536, 'steps': 19127, 'loss/train': 1.6785504817962646} +02/25/2022 02:21:11 - INFO - codeparrot_training - Step 19128: {'lr': 0.00035868990848174397, 'samples': 9794048, 'steps': 19128, 'loss/train': 2.1995596885681152} +02/25/2022 02:21:17 - INFO - codeparrot_training - Step 19129: {'lr': 0.00035867517309074527, 'samples': 9794560, 'steps': 19129, 'loss/train': 1.1732362508773804} +02/25/2022 02:21:21 - INFO - codeparrot_training - Step 19130: {'lr': 0.0003586604372342166, 'samples': 9795072, 'steps': 19130, 'loss/train': 2.041181802749634} +02/25/2022 02:21:24 - INFO - codeparrot_training - Step 19131: {'lr': 0.0003586457009122211, 'samples': 9795584, 'steps': 19131, 'loss/train': 4.172245979309082} +02/25/2022 02:21:30 - INFO - codeparrot_training - Step 19132: {'lr': 0.000358630964124822, 'samples': 9796096, 'steps': 19132, 'loss/train': 2.8790414333343506} +02/25/2022 02:21:37 - INFO - codeparrot_training - Step 19133: {'lr': 0.0003586162268720823, 'samples': 9796608, 'steps': 19133, 'loss/train': 2.4208638668060303} +02/25/2022 02:21:41 - INFO - codeparrot_training - Step 19134: {'lr': 0.00035860148915406513, 'samples': 9797120, 'steps': 19134, 'loss/train': 1.800406575202942} +02/25/2022 02:21:46 - INFO - codeparrot_training - Step 19135: {'lr': 0.00035858675097083373, 'samples': 9797632, 'steps': 19135, 'loss/train': 1.9086666107177734} +02/25/2022 02:21:50 - INFO - codeparrot_training - Step 19136: {'lr': 0.0003585720123224512, 'samples': 9798144, 'steps': 19136, 'loss/train': 0.8670181632041931} +02/25/2022 02:21:53 - INFO - codeparrot_training - Step 19137: {'lr': 0.0003585572732089806, 'samples': 9798656, 'steps': 19137, 'loss/train': 2.753687858581543} +02/25/2022 02:21:59 - INFO - codeparrot_training - Step 19138: {'lr': 0.00035854253363048507, 'samples': 9799168, 'steps': 19138, 'loss/train': 3.754897356033325} +02/25/2022 02:22:02 - INFO - codeparrot_training - Step 19139: {'lr': 0.00035852779358702783, 'samples': 9799680, 'steps': 19139, 'loss/train': 1.8329936265945435} +02/25/2022 02:22:08 - INFO - codeparrot_training - Step 19140: {'lr': 0.00035851305307867197, 'samples': 9800192, 'steps': 19140, 'loss/train': 1.8142180442810059} +02/25/2022 02:22:11 - INFO - codeparrot_training - Step 19141: {'lr': 0.0003584983121054807, 'samples': 9800704, 'steps': 19141, 'loss/train': 0.7222133874893188} +02/25/2022 02:22:17 - INFO - codeparrot_training - Step 19142: {'lr': 0.00035848357066751703, 'samples': 9801216, 'steps': 19142, 'loss/train': 1.2115339040756226} +02/25/2022 02:22:20 - INFO - codeparrot_training - Step 19143: {'lr': 0.0003584688287648442, 'samples': 9801728, 'steps': 19143, 'loss/train': 1.5355087518692017} +02/25/2022 02:22:28 - INFO - codeparrot_training - Step 19144: {'lr': 0.00035845408639752544, 'samples': 9802240, 'steps': 19144, 'loss/train': 1.1106281280517578} +02/25/2022 02:22:31 - INFO - codeparrot_training - Step 19145: {'lr': 0.00035843934356562376, 'samples': 9802752, 'steps': 19145, 'loss/train': 1.4328804016113281} +02/25/2022 02:22:37 - INFO - codeparrot_training - Step 19146: {'lr': 0.00035842460026920233, 'samples': 9803264, 'steps': 19146, 'loss/train': 1.9341048002243042} +02/25/2022 02:22:40 - INFO - codeparrot_training - Step 19147: {'lr': 0.00035840985650832435, 'samples': 9803776, 'steps': 19147, 'loss/train': 0.9028723239898682} +02/25/2022 02:22:46 - INFO - codeparrot_training - Step 19148: {'lr': 0.000358395112283053, 'samples': 9804288, 'steps': 19148, 'loss/train': 2.2050766944885254} +02/25/2022 02:22:49 - INFO - codeparrot_training - Step 19149: {'lr': 0.00035838036759345144, 'samples': 9804800, 'steps': 19149, 'loss/train': 1.6372251510620117} +02/25/2022 02:22:55 - INFO - codeparrot_training - Step 19150: {'lr': 0.0003583656224395827, 'samples': 9805312, 'steps': 19150, 'loss/train': 2.295809268951416} +02/25/2022 02:22:58 - INFO - codeparrot_training - Step 19151: {'lr': 0.00035835087682151016, 'samples': 9805824, 'steps': 19151, 'loss/train': 1.2212183475494385} +02/25/2022 02:23:04 - INFO - codeparrot_training - Step 19152: {'lr': 0.00035833613073929684, 'samples': 9806336, 'steps': 19152, 'loss/train': 1.8894333839416504} +02/25/2022 02:23:07 - INFO - codeparrot_training - Step 19153: {'lr': 0.00035832138419300585, 'samples': 9806848, 'steps': 19153, 'loss/train': 2.1929712295532227} +02/25/2022 02:23:15 - INFO - codeparrot_training - Step 19154: {'lr': 0.00035830663718270056, 'samples': 9807360, 'steps': 19154, 'loss/train': 1.6250994205474854} +02/25/2022 02:23:18 - INFO - codeparrot_training - Step 19155: {'lr': 0.00035829188970844397, 'samples': 9807872, 'steps': 19155, 'loss/train': 1.4156405925750732} +02/25/2022 02:23:24 - INFO - codeparrot_training - Step 19156: {'lr': 0.0003582771417702993, 'samples': 9808384, 'steps': 19156, 'loss/train': 1.7554818391799927} +02/25/2022 02:23:27 - INFO - codeparrot_training - Step 19157: {'lr': 0.0003582623933683297, 'samples': 9808896, 'steps': 19157, 'loss/train': 1.4958889484405518} +02/25/2022 02:23:33 - INFO - codeparrot_training - Step 19158: {'lr': 0.0003582476445025985, 'samples': 9809408, 'steps': 19158, 'loss/train': 2.397771120071411} +02/25/2022 02:23:36 - INFO - codeparrot_training - Step 19159: {'lr': 0.00035823289517316866, 'samples': 9809920, 'steps': 19159, 'loss/train': 2.149447441101074} +02/25/2022 02:23:42 - INFO - codeparrot_training - Step 19160: {'lr': 0.00035821814538010356, 'samples': 9810432, 'steps': 19160, 'loss/train': 0.8857444524765015} +02/25/2022 02:23:47 - INFO - codeparrot_training - Step 19161: {'lr': 0.00035820339512346614, 'samples': 9810944, 'steps': 19161, 'loss/train': 1.5802701711654663} +02/25/2022 02:23:51 - INFO - codeparrot_training - Step 19162: {'lr': 0.0003581886444033199, 'samples': 9811456, 'steps': 19162, 'loss/train': 1.6313536167144775} +02/25/2022 02:23:54 - INFO - codeparrot_training - Step 19163: {'lr': 0.00035817389321972777, 'samples': 9811968, 'steps': 19163, 'loss/train': 1.4544962644577026} +02/25/2022 02:24:00 - INFO - codeparrot_training - Step 19164: {'lr': 0.000358159141572753, 'samples': 9812480, 'steps': 19164, 'loss/train': 2.8580377101898193} +02/25/2022 02:24:05 - INFO - codeparrot_training - Step 19165: {'lr': 0.0003581443894624589, 'samples': 9812992, 'steps': 19165, 'loss/train': 1.9356400966644287} +02/25/2022 02:24:09 - INFO - codeparrot_training - Step 19166: {'lr': 0.0003581296368889085, 'samples': 9813504, 'steps': 19166, 'loss/train': 0.6926233768463135} +02/25/2022 02:24:14 - INFO - codeparrot_training - Step 19167: {'lr': 0.0003581148838521651, 'samples': 9814016, 'steps': 19167, 'loss/train': 2.0719878673553467} +02/25/2022 02:24:18 - INFO - codeparrot_training - Step 19168: {'lr': 0.00035810013035229187, 'samples': 9814528, 'steps': 19168, 'loss/train': 1.8100639581680298} +02/25/2022 02:24:25 - INFO - codeparrot_training - Step 19169: {'lr': 0.000358085376389352, 'samples': 9815040, 'steps': 19169, 'loss/train': 2.408417224884033} +02/25/2022 02:24:28 - INFO - codeparrot_training - Step 19170: {'lr': 0.0003580706219634087, 'samples': 9815552, 'steps': 19170, 'loss/train': 1.9094805717468262} +02/25/2022 02:24:34 - INFO - codeparrot_training - Step 19171: {'lr': 0.0003580558670745252, 'samples': 9816064, 'steps': 19171, 'loss/train': 2.222322702407837} +02/25/2022 02:24:38 - INFO - codeparrot_training - Step 19172: {'lr': 0.00035804111172276464, 'samples': 9816576, 'steps': 19172, 'loss/train': 1.8012343645095825} +02/25/2022 02:24:43 - INFO - codeparrot_training - Step 19173: {'lr': 0.00035802635590819035, 'samples': 9817088, 'steps': 19173, 'loss/train': 2.3293824195861816} +02/25/2022 02:24:47 - INFO - codeparrot_training - Step 19174: {'lr': 0.00035801159963086535, 'samples': 9817600, 'steps': 19174, 'loss/train': 1.8405214548110962} +02/25/2022 02:24:52 - INFO - codeparrot_training - Step 19175: {'lr': 0.000357996842890853, 'samples': 9818112, 'steps': 19175, 'loss/train': 0.4635801911354065} +02/25/2022 02:24:56 - INFO - codeparrot_training - Step 19176: {'lr': 0.00035798208568821647, 'samples': 9818624, 'steps': 19176, 'loss/train': 0.1690002977848053} +02/25/2022 02:25:01 - INFO - codeparrot_training - Step 19177: {'lr': 0.00035796732802301895, 'samples': 9819136, 'steps': 19177, 'loss/train': 2.4281907081604004} +02/25/2022 02:25:05 - INFO - codeparrot_training - Step 19178: {'lr': 0.00035795256989532367, 'samples': 9819648, 'steps': 19178, 'loss/train': 0.8867130279541016} +02/25/2022 02:25:12 - INFO - codeparrot_training - Step 19179: {'lr': 0.0003579378113051939, 'samples': 9820160, 'steps': 19179, 'loss/train': 1.3388879299163818} +02/25/2022 02:25:16 - INFO - codeparrot_training - Step 19180: {'lr': 0.0003579230522526928, 'samples': 9820672, 'steps': 19180, 'loss/train': 1.3108952045440674} +02/25/2022 02:25:21 - INFO - codeparrot_training - Step 19181: {'lr': 0.00035790829273788356, 'samples': 9821184, 'steps': 19181, 'loss/train': 1.4172788858413696} +02/25/2022 02:25:25 - INFO - codeparrot_training - Step 19182: {'lr': 0.0003578935327608295, 'samples': 9821696, 'steps': 19182, 'loss/train': 1.2417763471603394} +02/25/2022 02:25:31 - INFO - codeparrot_training - Step 19183: {'lr': 0.00035787877232159384, 'samples': 9822208, 'steps': 19183, 'loss/train': 2.0434372425079346} +02/25/2022 02:25:34 - INFO - codeparrot_training - Step 19184: {'lr': 0.00035786401142023975, 'samples': 9822720, 'steps': 19184, 'loss/train': 1.7178515195846558} +02/25/2022 02:25:38 - INFO - codeparrot_training - Step 19185: {'lr': 0.0003578492500568304, 'samples': 9823232, 'steps': 19185, 'loss/train': 1.2154598236083984} +02/25/2022 02:25:44 - INFO - codeparrot_training - Step 19186: {'lr': 0.00035783448823142926, 'samples': 9823744, 'steps': 19186, 'loss/train': 1.6298420429229736} +02/25/2022 02:25:47 - INFO - codeparrot_training - Step 19187: {'lr': 0.00035781972594409937, 'samples': 9824256, 'steps': 19187, 'loss/train': 2.3240180015563965} +02/25/2022 02:25:53 - INFO - codeparrot_training - Step 19188: {'lr': 0.000357804963194904, 'samples': 9824768, 'steps': 19188, 'loss/train': 0.30438581109046936} +02/25/2022 02:25:56 - INFO - codeparrot_training - Step 19189: {'lr': 0.00035779019998390636, 'samples': 9825280, 'steps': 19189, 'loss/train': 2.120345115661621} +02/25/2022 02:26:03 - INFO - codeparrot_training - Step 19190: {'lr': 0.00035777543631116977, 'samples': 9825792, 'steps': 19190, 'loss/train': 0.20522382855415344} +02/25/2022 02:26:07 - INFO - codeparrot_training - Step 19191: {'lr': 0.00035776067217675744, 'samples': 9826304, 'steps': 19191, 'loss/train': 1.9031397104263306} +02/25/2022 02:26:13 - INFO - codeparrot_training - Step 19192: {'lr': 0.0003577459075807326, 'samples': 9826816, 'steps': 19192, 'loss/train': 2.523228406906128} +02/25/2022 02:26:16 - INFO - codeparrot_training - Step 19193: {'lr': 0.00035773114252315844, 'samples': 9827328, 'steps': 19193, 'loss/train': 1.9494825601577759} +02/25/2022 02:26:22 - INFO - codeparrot_training - Step 19194: {'lr': 0.0003577163770040984, 'samples': 9827840, 'steps': 19194, 'loss/train': 1.5968466997146606} +02/25/2022 02:26:25 - INFO - codeparrot_training - Step 19195: {'lr': 0.00035770161102361553, 'samples': 9828352, 'steps': 19195, 'loss/train': 8.619075775146484} +02/25/2022 02:26:31 - INFO - codeparrot_training - Step 19196: {'lr': 0.0003576868445817732, 'samples': 9828864, 'steps': 19196, 'loss/train': 1.5660347938537598} +02/25/2022 02:26:34 - INFO - codeparrot_training - Step 19197: {'lr': 0.00035767207767863453, 'samples': 9829376, 'steps': 19197, 'loss/train': 0.23878076672554016} +02/25/2022 02:26:40 - INFO - codeparrot_training - Step 19198: {'lr': 0.00035765731031426294, 'samples': 9829888, 'steps': 19198, 'loss/train': 2.0039756298065186} +02/25/2022 02:26:43 - INFO - codeparrot_training - Step 19199: {'lr': 0.0003576425424887216, 'samples': 9830400, 'steps': 19199, 'loss/train': 1.539983868598938} +02/25/2022 02:26:50 - INFO - codeparrot_training - Step 19200: {'lr': 0.0003576277742020738, 'samples': 9830912, 'steps': 19200, 'loss/train': 2.3642733097076416} +02/25/2022 02:26:54 - INFO - codeparrot_training - Step 19201: {'lr': 0.0003576130054543828, 'samples': 9831424, 'steps': 19201, 'loss/train': 1.3938478231430054} +02/25/2022 02:26:59 - INFO - codeparrot_training - Step 19202: {'lr': 0.00035759823624571184, 'samples': 9831936, 'steps': 19202, 'loss/train': 3.6907947063446045} +02/25/2022 02:27:03 - INFO - codeparrot_training - Step 19203: {'lr': 0.00035758346657612417, 'samples': 9832448, 'steps': 19203, 'loss/train': 1.7668558359146118} +02/25/2022 02:27:09 - INFO - codeparrot_training - Step 19204: {'lr': 0.00035756869644568314, 'samples': 9832960, 'steps': 19204, 'loss/train': 2.613354206085205} +02/25/2022 02:27:12 - INFO - codeparrot_training - Step 19205: {'lr': 0.00035755392585445195, 'samples': 9833472, 'steps': 19205, 'loss/train': 2.5741567611694336} +02/25/2022 02:27:18 - INFO - codeparrot_training - Step 19206: {'lr': 0.0003575391548024939, 'samples': 9833984, 'steps': 19206, 'loss/train': 1.9574772119522095} +02/25/2022 02:27:21 - INFO - codeparrot_training - Step 19207: {'lr': 0.00035752438328987224, 'samples': 9834496, 'steps': 19207, 'loss/train': 2.3425042629241943} +02/25/2022 02:27:27 - INFO - codeparrot_training - Step 19208: {'lr': 0.00035750961131665034, 'samples': 9835008, 'steps': 19208, 'loss/train': 3.004188060760498} +02/25/2022 02:27:30 - INFO - codeparrot_training - Step 19209: {'lr': 0.0003574948388828913, 'samples': 9835520, 'steps': 19209, 'loss/train': 2.12380051612854} +02/25/2022 02:27:36 - INFO - codeparrot_training - Step 19210: {'lr': 0.0003574800659886586, 'samples': 9836032, 'steps': 19210, 'loss/train': 1.2154111862182617} +02/25/2022 02:27:39 - INFO - codeparrot_training - Step 19211: {'lr': 0.0003574652926340153, 'samples': 9836544, 'steps': 19211, 'loss/train': 1.6454066038131714} +02/25/2022 02:27:45 - INFO - codeparrot_training - Step 19212: {'lr': 0.0003574505188190249, 'samples': 9837056, 'steps': 19212, 'loss/train': 1.9090946912765503} +02/25/2022 02:27:48 - INFO - codeparrot_training - Step 19213: {'lr': 0.0003574357445437506, 'samples': 9837568, 'steps': 19213, 'loss/train': 0.6663073301315308} +02/25/2022 02:27:54 - INFO - codeparrot_training - Step 19214: {'lr': 0.0003574209698082556, 'samples': 9838080, 'steps': 19214, 'loss/train': 0.4567076861858368} +02/25/2022 02:27:57 - INFO - codeparrot_training - Step 19215: {'lr': 0.0003574061946126034, 'samples': 9838592, 'steps': 19215, 'loss/train': 1.997956395149231} +02/25/2022 02:28:04 - INFO - codeparrot_training - Step 19216: {'lr': 0.0003573914189568571, 'samples': 9839104, 'steps': 19216, 'loss/train': 2.9051742553710938} +02/25/2022 02:28:08 - INFO - codeparrot_training - Step 19217: {'lr': 0.0003573766428410801, 'samples': 9839616, 'steps': 19217, 'loss/train': 2.022897958755493} +02/25/2022 02:28:13 - INFO - codeparrot_training - Step 19218: {'lr': 0.00035736186626533556, 'samples': 9840128, 'steps': 19218, 'loss/train': 1.0769011974334717} +02/25/2022 02:28:17 - INFO - codeparrot_training - Step 19219: {'lr': 0.00035734708922968695, 'samples': 9840640, 'steps': 19219, 'loss/train': 2.1259169578552246} +02/25/2022 02:28:22 - INFO - codeparrot_training - Step 19220: {'lr': 0.0003573323117341975, 'samples': 9841152, 'steps': 19220, 'loss/train': 5.245190620422363} +02/25/2022 02:28:26 - INFO - codeparrot_training - Step 19221: {'lr': 0.0003573175337789305, 'samples': 9841664, 'steps': 19221, 'loss/train': 1.7913321256637573} +02/25/2022 02:28:31 - INFO - codeparrot_training - Step 19222: {'lr': 0.0003573027553639492, 'samples': 9842176, 'steps': 19222, 'loss/train': 2.099760055541992} +02/25/2022 02:28:35 - INFO - codeparrot_training - Step 19223: {'lr': 0.0003572879764893171, 'samples': 9842688, 'steps': 19223, 'loss/train': 1.3859553337097168} +02/25/2022 02:28:40 - INFO - codeparrot_training - Step 19224: {'lr': 0.0003572731971550973, 'samples': 9843200, 'steps': 19224, 'loss/train': 2.9749035835266113} +02/25/2022 02:28:44 - INFO - codeparrot_training - Step 19225: {'lr': 0.0003572584173613532, 'samples': 9843712, 'steps': 19225, 'loss/train': 1.63325035572052} +02/25/2022 02:28:51 - INFO - codeparrot_training - Step 19226: {'lr': 0.00035724363710814807, 'samples': 9844224, 'steps': 19226, 'loss/train': 2.308938980102539} +02/25/2022 02:28:54 - INFO - codeparrot_training - Step 19227: {'lr': 0.00035722885639554526, 'samples': 9844736, 'steps': 19227, 'loss/train': 1.8411939144134521} +02/25/2022 02:29:00 - INFO - codeparrot_training - Step 19228: {'lr': 0.0003572140752236081, 'samples': 9845248, 'steps': 19228, 'loss/train': 1.3928197622299194} +02/25/2022 02:29:04 - INFO - codeparrot_training - Step 19229: {'lr': 0.0003571992935923999, 'samples': 9845760, 'steps': 19229, 'loss/train': 2.289527416229248} +02/25/2022 02:29:09 - INFO - codeparrot_training - Step 19230: {'lr': 0.0003571845115019839, 'samples': 9846272, 'steps': 19230, 'loss/train': 1.6074715852737427} +02/25/2022 02:29:13 - INFO - codeparrot_training - Step 19231: {'lr': 0.0003571697289524235, 'samples': 9846784, 'steps': 19231, 'loss/train': 0.21067163348197937} +02/25/2022 02:29:18 - INFO - codeparrot_training - Step 19232: {'lr': 0.0003571549459437821, 'samples': 9847296, 'steps': 19232, 'loss/train': 1.397789716720581} +02/25/2022 02:29:22 - INFO - codeparrot_training - Step 19233: {'lr': 0.0003571401624761229, 'samples': 9847808, 'steps': 19233, 'loss/train': 1.697490930557251} +02/25/2022 02:29:28 - INFO - codeparrot_training - Step 19234: {'lr': 0.0003571253785495092, 'samples': 9848320, 'steps': 19234, 'loss/train': 1.3955912590026855} +02/25/2022 02:29:31 - INFO - codeparrot_training - Step 19235: {'lr': 0.00035711059416400446, 'samples': 9848832, 'steps': 19235, 'loss/train': 1.4052941799163818} +02/25/2022 02:29:35 - INFO - codeparrot_training - Step 19236: {'lr': 0.0003570958093196719, 'samples': 9849344, 'steps': 19236, 'loss/train': 0.811290979385376} +02/25/2022 02:29:42 - INFO - codeparrot_training - Step 19237: {'lr': 0.00035708102401657495, 'samples': 9849856, 'steps': 19237, 'loss/train': 2.0424890518188477} +02/25/2022 02:29:45 - INFO - codeparrot_training - Step 19238: {'lr': 0.00035706623825477687, 'samples': 9850368, 'steps': 19238, 'loss/train': 0.5274319648742676} +02/25/2022 02:29:51 - INFO - codeparrot_training - Step 19239: {'lr': 0.000357051452034341, 'samples': 9850880, 'steps': 19239, 'loss/train': 1.2534005641937256} +02/25/2022 02:29:54 - INFO - codeparrot_training - Step 19240: {'lr': 0.00035703666535533076, 'samples': 9851392, 'steps': 19240, 'loss/train': 0.9870202541351318} +02/25/2022 02:30:00 - INFO - codeparrot_training - Step 19241: {'lr': 0.0003570218782178094, 'samples': 9851904, 'steps': 19241, 'loss/train': 1.8944878578186035} +02/25/2022 02:30:03 - INFO - codeparrot_training - Step 19242: {'lr': 0.0003570070906218403, 'samples': 9852416, 'steps': 19242, 'loss/train': 2.7563107013702393} +02/25/2022 02:30:09 - INFO - codeparrot_training - Step 19243: {'lr': 0.00035699230256748684, 'samples': 9852928, 'steps': 19243, 'loss/train': 1.8566226959228516} +02/25/2022 02:30:12 - INFO - codeparrot_training - Step 19244: {'lr': 0.0003569775140548122, 'samples': 9853440, 'steps': 19244, 'loss/train': 1.938361406326294} +02/25/2022 02:30:18 - INFO - codeparrot_training - Step 19245: {'lr': 0.00035696272508388, 'samples': 9853952, 'steps': 19245, 'loss/train': 2.3620991706848145} +02/25/2022 02:30:21 - INFO - codeparrot_training - Step 19246: {'lr': 0.00035694793565475337, 'samples': 9854464, 'steps': 19246, 'loss/train': 2.646620988845825} +02/25/2022 02:30:29 - INFO - codeparrot_training - Step 19247: {'lr': 0.0003569331457674958, 'samples': 9854976, 'steps': 19247, 'loss/train': 2.3118441104888916} +02/25/2022 02:30:32 - INFO - codeparrot_training - Step 19248: {'lr': 0.00035691835542217055, 'samples': 9855488, 'steps': 19248, 'loss/train': 1.075470209121704} +02/25/2022 02:30:38 - INFO - codeparrot_training - Step 19249: {'lr': 0.00035690356461884104, 'samples': 9856000, 'steps': 19249, 'loss/train': 1.482853651046753} +02/25/2022 02:30:43 - INFO - codeparrot_training - Step 19250: {'lr': 0.0003568887733575705, 'samples': 9856512, 'steps': 19250, 'loss/train': 1.8895548582077026} +02/25/2022 02:30:46 - INFO - codeparrot_training - Step 19251: {'lr': 0.0003568739816384225, 'samples': 9857024, 'steps': 19251, 'loss/train': 1.1049559116363525} +02/25/2022 02:30:52 - INFO - codeparrot_training - Step 19252: {'lr': 0.00035685918946146036, 'samples': 9857536, 'steps': 19252, 'loss/train': 2.8000295162200928} +02/25/2022 02:30:56 - INFO - codeparrot_training - Step 19253: {'lr': 0.00035684439682674723, 'samples': 9858048, 'steps': 19253, 'loss/train': 1.9263113737106323} +02/25/2022 02:31:01 - INFO - codeparrot_training - Step 19254: {'lr': 0.00035682960373434677, 'samples': 9858560, 'steps': 19254, 'loss/train': 0.33952149748802185} +02/25/2022 02:31:05 - INFO - codeparrot_training - Step 19255: {'lr': 0.0003568148101843221, 'samples': 9859072, 'steps': 19255, 'loss/train': 2.2760066986083984} +02/25/2022 02:31:08 - INFO - codeparrot_training - Step 19256: {'lr': 0.0003568000161767368, 'samples': 9859584, 'steps': 19256, 'loss/train': 2.2122201919555664} +02/25/2022 02:31:14 - INFO - codeparrot_training - Step 19257: {'lr': 0.00035678522171165406, 'samples': 9860096, 'steps': 19257, 'loss/train': 2.1850576400756836} +02/25/2022 02:31:19 - INFO - codeparrot_training - Step 19258: {'lr': 0.0003567704267891374, 'samples': 9860608, 'steps': 19258, 'loss/train': 1.573604941368103} +02/25/2022 02:31:23 - INFO - codeparrot_training - Step 19259: {'lr': 0.00035675563140925, 'samples': 9861120, 'steps': 19259, 'loss/train': 2.432962417602539} +02/25/2022 02:31:28 - INFO - codeparrot_training - Step 19260: {'lr': 0.00035674083557205553, 'samples': 9861632, 'steps': 19260, 'loss/train': 1.551035761833191} +02/25/2022 02:31:32 - INFO - codeparrot_training - Step 19261: {'lr': 0.00035672603927761716, 'samples': 9862144, 'steps': 19261, 'loss/train': 1.7010586261749268} +02/25/2022 02:31:39 - INFO - codeparrot_training - Step 19262: {'lr': 0.0003567112425259984, 'samples': 9862656, 'steps': 19262, 'loss/train': 1.618138074874878} +02/25/2022 02:31:42 - INFO - codeparrot_training - Step 19263: {'lr': 0.00035669644531726244, 'samples': 9863168, 'steps': 19263, 'loss/train': 2.0759124755859375} +02/25/2022 02:31:48 - INFO - codeparrot_training - Step 19264: {'lr': 0.00035668164765147284, 'samples': 9863680, 'steps': 19264, 'loss/train': 1.9786914587020874} +02/25/2022 02:31:51 - INFO - codeparrot_training - Step 19265: {'lr': 0.00035666684952869295, 'samples': 9864192, 'steps': 19265, 'loss/train': 2.208364963531494} +02/25/2022 02:31:57 - INFO - codeparrot_training - Step 19266: {'lr': 0.00035665205094898613, 'samples': 9864704, 'steps': 19266, 'loss/train': 1.7359892129898071} +02/25/2022 02:32:00 - INFO - codeparrot_training - Step 19267: {'lr': 0.0003566372519124158, 'samples': 9865216, 'steps': 19267, 'loss/train': 2.4588685035705566} +02/25/2022 02:32:06 - INFO - codeparrot_training - Step 19268: {'lr': 0.00035662245241904533, 'samples': 9865728, 'steps': 19268, 'loss/train': 1.9101704359054565} +02/25/2022 02:32:09 - INFO - codeparrot_training - Step 19269: {'lr': 0.00035660765246893814, 'samples': 9866240, 'steps': 19269, 'loss/train': 1.653025507926941} +02/25/2022 02:32:15 - INFO - codeparrot_training - Step 19270: {'lr': 0.00035659285206215757, 'samples': 9866752, 'steps': 19270, 'loss/train': 2.1290361881256104} +02/25/2022 02:32:18 - INFO - codeparrot_training - Step 19271: {'lr': 0.0003565780511987672, 'samples': 9867264, 'steps': 19271, 'loss/train': 2.0186846256256104} +02/25/2022 02:32:25 - INFO - codeparrot_training - Step 19272: {'lr': 0.00035656324987883015, 'samples': 9867776, 'steps': 19272, 'loss/train': 1.522246241569519} +02/25/2022 02:32:29 - INFO - codeparrot_training - Step 19273: {'lr': 0.00035654844810241004, 'samples': 9868288, 'steps': 19273, 'loss/train': 2.132499933242798} +02/25/2022 02:32:34 - INFO - codeparrot_training - Step 19274: {'lr': 0.0003565336458695702, 'samples': 9868800, 'steps': 19274, 'loss/train': 2.6792471408843994} +02/25/2022 02:32:38 - INFO - codeparrot_training - Step 19275: {'lr': 0.0003565188431803741, 'samples': 9869312, 'steps': 19275, 'loss/train': 2.4057610034942627} +02/25/2022 02:32:44 - INFO - codeparrot_training - Step 19276: {'lr': 0.0003565040400348851, 'samples': 9869824, 'steps': 19276, 'loss/train': 1.4288160800933838} +02/25/2022 02:32:47 - INFO - codeparrot_training - Step 19277: {'lr': 0.0003564892364331665, 'samples': 9870336, 'steps': 19277, 'loss/train': 1.3318730592727661} +02/25/2022 02:32:53 - INFO - codeparrot_training - Step 19278: {'lr': 0.0003564744323752819, 'samples': 9870848, 'steps': 19278, 'loss/train': 0.21651515364646912} +02/25/2022 02:32:56 - INFO - codeparrot_training - Step 19279: {'lr': 0.00035645962786129464, 'samples': 9871360, 'steps': 19279, 'loss/train': 1.3973026275634766} +02/25/2022 02:33:02 - INFO - codeparrot_training - Step 19280: {'lr': 0.0003564448228912682, 'samples': 9871872, 'steps': 19280, 'loss/train': 2.7846038341522217} +02/25/2022 02:33:05 - INFO - codeparrot_training - Step 19281: {'lr': 0.00035643001746526586, 'samples': 9872384, 'steps': 19281, 'loss/train': 1.4548993110656738} +02/25/2022 02:33:13 - INFO - codeparrot_training - Step 19282: {'lr': 0.0003564152115833511, 'samples': 9872896, 'steps': 19282, 'loss/train': 1.5476588010787964} +02/25/2022 02:33:16 - INFO - codeparrot_training - Step 19283: {'lr': 0.00035640040524558746, 'samples': 9873408, 'steps': 19283, 'loss/train': 2.1892807483673096} +02/25/2022 02:33:22 - INFO - codeparrot_training - Step 19284: {'lr': 0.00035638559845203817, 'samples': 9873920, 'steps': 19284, 'loss/train': 1.2143034934997559} +02/25/2022 02:33:25 - INFO - codeparrot_training - Step 19285: {'lr': 0.00035637079120276683, 'samples': 9874432, 'steps': 19285, 'loss/train': 1.911623239517212} +02/25/2022 02:33:31 - INFO - codeparrot_training - Step 19286: {'lr': 0.00035635598349783676, 'samples': 9874944, 'steps': 19286, 'loss/train': 2.026838541030884} +02/25/2022 02:33:34 - INFO - codeparrot_training - Step 19287: {'lr': 0.0003563411753373115, 'samples': 9875456, 'steps': 19287, 'loss/train': 0.9892092347145081} +02/25/2022 02:33:40 - INFO - codeparrot_training - Step 19288: {'lr': 0.0003563263667212543, 'samples': 9875968, 'steps': 19288, 'loss/train': 1.7014323472976685} +02/25/2022 02:33:43 - INFO - codeparrot_training - Step 19289: {'lr': 0.0003563115576497288, 'samples': 9876480, 'steps': 19289, 'loss/train': 2.5118744373321533} +02/25/2022 02:33:49 - INFO - codeparrot_training - Step 19290: {'lr': 0.0003562967481227982, 'samples': 9876992, 'steps': 19290, 'loss/train': 2.366943359375} +02/25/2022 02:33:52 - INFO - codeparrot_training - Step 19291: {'lr': 0.0003562819381405262, 'samples': 9877504, 'steps': 19291, 'loss/train': 1.967867374420166} +02/25/2022 02:33:58 - INFO - codeparrot_training - Step 19292: {'lr': 0.0003562671277029761, 'samples': 9878016, 'steps': 19292, 'loss/train': 2.4265329837799072} +02/25/2022 02:34:01 - INFO - codeparrot_training - Step 19293: {'lr': 0.0003562523168102114, 'samples': 9878528, 'steps': 19293, 'loss/train': 0.9066846966743469} +02/25/2022 02:34:07 - INFO - codeparrot_training - Step 19294: {'lr': 0.00035623750546229547, 'samples': 9879040, 'steps': 19294, 'loss/train': 2.2698421478271484} +02/25/2022 02:34:11 - INFO - codeparrot_training - Step 19295: {'lr': 0.0003562226936592919, 'samples': 9879552, 'steps': 19295, 'loss/train': 2.1982572078704834} +02/25/2022 02:34:16 - INFO - codeparrot_training - Step 19296: {'lr': 0.0003562078814012639, 'samples': 9880064, 'steps': 19296, 'loss/train': 1.8500181436538696} +02/25/2022 02:34:20 - INFO - codeparrot_training - Step 19297: {'lr': 0.00035619306868827516, 'samples': 9880576, 'steps': 19297, 'loss/train': 1.2079533338546753} +02/25/2022 02:34:25 - INFO - codeparrot_training - Step 19298: {'lr': 0.00035617825552038894, 'samples': 9881088, 'steps': 19298, 'loss/train': 1.5644276142120361} +02/25/2022 02:34:29 - INFO - codeparrot_training - Step 19299: {'lr': 0.00035616344189766885, 'samples': 9881600, 'steps': 19299, 'loss/train': 1.6737005710601807} +02/25/2022 02:34:34 - INFO - codeparrot_training - Step 19300: {'lr': 0.0003561486278201783, 'samples': 9882112, 'steps': 19300, 'loss/train': 1.916688084602356} +02/25/2022 02:34:38 - INFO - codeparrot_training - Step 19301: {'lr': 0.00035613381328798065, 'samples': 9882624, 'steps': 19301, 'loss/train': 3.0540406703948975} +02/25/2022 02:34:43 - INFO - codeparrot_training - Step 19302: {'lr': 0.0003561189983011396, 'samples': 9883136, 'steps': 19302, 'loss/train': 1.4626095294952393} +02/25/2022 02:34:47 - INFO - codeparrot_training - Step 19303: {'lr': 0.00035610418285971835, 'samples': 9883648, 'steps': 19303, 'loss/train': 2.0717930793762207} +02/25/2022 02:34:52 - INFO - codeparrot_training - Step 19304: {'lr': 0.00035608936696378046, 'samples': 9884160, 'steps': 19304, 'loss/train': 0.9057942032814026} +02/25/2022 02:34:56 - INFO - codeparrot_training - Step 19305: {'lr': 0.00035607455061338947, 'samples': 9884672, 'steps': 19305, 'loss/train': 3.1500675678253174} +02/25/2022 02:35:01 - INFO - codeparrot_training - Step 19306: {'lr': 0.0003560597338086088, 'samples': 9885184, 'steps': 19306, 'loss/train': 1.528065800666809} +02/25/2022 02:35:05 - INFO - codeparrot_training - Step 19307: {'lr': 0.0003560449165495018, 'samples': 9885696, 'steps': 19307, 'loss/train': 1.8838484287261963} +02/25/2022 02:35:12 - INFO - codeparrot_training - Step 19308: {'lr': 0.0003560300988361321, 'samples': 9886208, 'steps': 19308, 'loss/train': 1.3809232711791992} +02/25/2022 02:35:15 - INFO - codeparrot_training - Step 19309: {'lr': 0.00035601528066856315, 'samples': 9886720, 'steps': 19309, 'loss/train': 2.148930788040161} +02/25/2022 02:35:21 - INFO - codeparrot_training - Step 19310: {'lr': 0.00035600046204685844, 'samples': 9887232, 'steps': 19310, 'loss/train': 1.2641681432724} +02/25/2022 02:35:24 - INFO - codeparrot_training - Step 19311: {'lr': 0.00035598564297108134, 'samples': 9887744, 'steps': 19311, 'loss/train': 2.1563804149627686} +02/25/2022 02:35:29 - INFO - codeparrot_training - Step 19312: {'lr': 0.0003559708234412954, 'samples': 9888256, 'steps': 19312, 'loss/train': 2.0812201499938965} +02/25/2022 02:35:33 - INFO - codeparrot_training - Step 19313: {'lr': 0.00035595600345756414, 'samples': 9888768, 'steps': 19313, 'loss/train': 1.456243872642517} +02/25/2022 02:35:39 - INFO - codeparrot_training - Step 19314: {'lr': 0.00035594118301995095, 'samples': 9889280, 'steps': 19314, 'loss/train': 0.8808965086936951} +02/25/2022 02:35:42 - INFO - codeparrot_training - Step 19315: {'lr': 0.00035592636212851945, 'samples': 9889792, 'steps': 19315, 'loss/train': 1.7614250183105469} +02/25/2022 02:35:48 - INFO - codeparrot_training - Step 19316: {'lr': 0.000355911540783333, 'samples': 9890304, 'steps': 19316, 'loss/train': 0.846782922744751} +02/25/2022 02:35:51 - INFO - codeparrot_training - Step 19317: {'lr': 0.00035589671898445517, 'samples': 9890816, 'steps': 19317, 'loss/train': 2.443593740463257} +02/25/2022 02:35:57 - INFO - codeparrot_training - Step 19318: {'lr': 0.0003558818967319494, 'samples': 9891328, 'steps': 19318, 'loss/train': 1.5117106437683105} +02/25/2022 02:36:00 - INFO - codeparrot_training - Step 19319: {'lr': 0.0003558670740258792, 'samples': 9891840, 'steps': 19319, 'loss/train': 1.2961843013763428} +02/25/2022 02:36:07 - INFO - codeparrot_training - Step 19320: {'lr': 0.00035585225086630807, 'samples': 9892352, 'steps': 19320, 'loss/train': 1.956886887550354} +02/25/2022 02:36:11 - INFO - codeparrot_training - Step 19321: {'lr': 0.00035583742725329954, 'samples': 9892864, 'steps': 19321, 'loss/train': 1.6915360689163208} +02/25/2022 02:36:16 - INFO - codeparrot_training - Step 19322: {'lr': 0.0003558226031869171, 'samples': 9893376, 'steps': 19322, 'loss/train': 0.9970734119415283} +02/25/2022 02:36:20 - INFO - codeparrot_training - Step 19323: {'lr': 0.00035580777866722415, 'samples': 9893888, 'steps': 19323, 'loss/train': 0.5012323260307312} +02/25/2022 02:36:25 - INFO - codeparrot_training - Step 19324: {'lr': 0.00035579295369428425, 'samples': 9894400, 'steps': 19324, 'loss/train': 1.9734588861465454} +02/25/2022 02:36:29 - INFO - codeparrot_training - Step 19325: {'lr': 0.000355778128268161, 'samples': 9894912, 'steps': 19325, 'loss/train': 1.9326817989349365} +02/25/2022 02:36:34 - INFO - codeparrot_training - Step 19326: {'lr': 0.0003557633023889179, 'samples': 9895424, 'steps': 19326, 'loss/train': 1.4279378652572632} +02/25/2022 02:36:37 - INFO - codeparrot_training - Step 19327: {'lr': 0.0003557484760566183, 'samples': 9895936, 'steps': 19327, 'loss/train': 1.6264982223510742} +02/25/2022 02:36:43 - INFO - codeparrot_training - Step 19328: {'lr': 0.0003557336492713258, 'samples': 9896448, 'steps': 19328, 'loss/train': 1.450286626815796} +02/25/2022 02:36:46 - INFO - codeparrot_training - Step 19329: {'lr': 0.000355718822033104, 'samples': 9896960, 'steps': 19329, 'loss/train': 2.410184383392334} +02/25/2022 02:36:53 - INFO - codeparrot_training - Step 19330: {'lr': 0.0003557039943420163, 'samples': 9897472, 'steps': 19330, 'loss/train': 1.1350754499435425} +02/25/2022 02:36:56 - INFO - codeparrot_training - Step 19331: {'lr': 0.00035568916619812624, 'samples': 9897984, 'steps': 19331, 'loss/train': 2.0210282802581787} +02/25/2022 02:37:02 - INFO - codeparrot_training - Step 19332: {'lr': 0.00035567433760149737, 'samples': 9898496, 'steps': 19332, 'loss/train': 0.6852190494537354} +02/25/2022 02:37:05 - INFO - codeparrot_training - Step 19333: {'lr': 0.0003556595085521931, 'samples': 9899008, 'steps': 19333, 'loss/train': 0.6045637130737305} +02/25/2022 02:37:11 - INFO - codeparrot_training - Step 19334: {'lr': 0.0003556446790502772, 'samples': 9899520, 'steps': 19334, 'loss/train': 2.5709099769592285} +02/25/2022 02:37:14 - INFO - codeparrot_training - Step 19335: {'lr': 0.00035562984909581297, 'samples': 9900032, 'steps': 19335, 'loss/train': 2.6259469985961914} +02/25/2022 02:37:20 - INFO - codeparrot_training - Step 19336: {'lr': 0.0003556150186888639, 'samples': 9900544, 'steps': 19336, 'loss/train': 1.0845199823379517} +02/25/2022 02:37:23 - INFO - codeparrot_training - Step 19337: {'lr': 0.00035560018782949384, 'samples': 9901056, 'steps': 19337, 'loss/train': 1.792197823524475} +02/25/2022 02:37:29 - INFO - codeparrot_training - Step 19338: {'lr': 0.0003555853565177659, 'samples': 9901568, 'steps': 19338, 'loss/train': 1.0452955961227417} +02/25/2022 02:37:32 - INFO - codeparrot_training - Step 19339: {'lr': 0.00035557052475374397, 'samples': 9902080, 'steps': 19339, 'loss/train': 2.3191845417022705} +02/25/2022 02:37:38 - INFO - codeparrot_training - Step 19340: {'lr': 0.00035555569253749135, 'samples': 9902592, 'steps': 19340, 'loss/train': 1.4723395109176636} +02/25/2022 02:37:41 - INFO - codeparrot_training - Step 19341: {'lr': 0.0003555408598690718, 'samples': 9903104, 'steps': 19341, 'loss/train': 2.416355609893799} +02/25/2022 02:37:47 - INFO - codeparrot_training - Step 19342: {'lr': 0.0003555260267485485, 'samples': 9903616, 'steps': 19342, 'loss/train': 5.271160125732422} +02/25/2022 02:37:50 - INFO - codeparrot_training - Step 19343: {'lr': 0.00035551119317598533, 'samples': 9904128, 'steps': 19343, 'loss/train': 1.906678318977356} +02/25/2022 02:37:56 - INFO - codeparrot_training - Step 19344: {'lr': 0.00035549635915144574, 'samples': 9904640, 'steps': 19344, 'loss/train': 1.712480902671814} +02/25/2022 02:38:00 - INFO - codeparrot_training - Step 19345: {'lr': 0.0003554815246749932, 'samples': 9905152, 'steps': 19345, 'loss/train': 0.8412127494812012} +02/25/2022 02:38:05 - INFO - codeparrot_training - Step 19346: {'lr': 0.00035546668974669127, 'samples': 9905664, 'steps': 19346, 'loss/train': 1.625795841217041} +02/25/2022 02:38:09 - INFO - codeparrot_training - Step 19347: {'lr': 0.00035545185436660357, 'samples': 9906176, 'steps': 19347, 'loss/train': 2.363006353378296} +02/25/2022 02:38:14 - INFO - codeparrot_training - Step 19348: {'lr': 0.00035543701853479366, 'samples': 9906688, 'steps': 19348, 'loss/train': 1.4702732563018799} +02/25/2022 02:38:18 - INFO - codeparrot_training - Step 19349: {'lr': 0.00035542218225132497, 'samples': 9907200, 'steps': 19349, 'loss/train': 1.820178747177124} +02/25/2022 02:38:23 - INFO - codeparrot_training - Step 19350: {'lr': 0.00035540734551626113, 'samples': 9907712, 'steps': 19350, 'loss/train': 1.2415663003921509} +02/25/2022 02:38:27 - INFO - codeparrot_training - Step 19351: {'lr': 0.00035539250832966574, 'samples': 9908224, 'steps': 19351, 'loss/train': 2.0476346015930176} +02/25/2022 02:38:32 - INFO - codeparrot_training - Step 19352: {'lr': 0.00035537767069160234, 'samples': 9908736, 'steps': 19352, 'loss/train': 2.33284592628479} +02/25/2022 02:38:36 - INFO - codeparrot_training - Step 19353: {'lr': 0.00035536283260213434, 'samples': 9909248, 'steps': 19353, 'loss/train': 2.6625912189483643} +02/25/2022 02:38:41 - INFO - codeparrot_training - Step 19354: {'lr': 0.0003553479940613255, 'samples': 9909760, 'steps': 19354, 'loss/train': 2.5236716270446777} +02/25/2022 02:38:45 - INFO - codeparrot_training - Step 19355: {'lr': 0.00035533315506923924, 'samples': 9910272, 'steps': 19355, 'loss/train': 2.708268165588379} +02/25/2022 02:38:51 - INFO - codeparrot_training - Step 19356: {'lr': 0.0003553183156259393, 'samples': 9910784, 'steps': 19356, 'loss/train': 1.909191370010376} +02/25/2022 02:38:54 - INFO - codeparrot_training - Step 19357: {'lr': 0.00035530347573148904, 'samples': 9911296, 'steps': 19357, 'loss/train': 2.39231276512146} +02/25/2022 02:39:00 - INFO - codeparrot_training - Step 19358: {'lr': 0.0003552886353859522, 'samples': 9911808, 'steps': 19358, 'loss/train': 2.1169612407684326} +02/25/2022 02:39:03 - INFO - codeparrot_training - Step 19359: {'lr': 0.00035527379458939225, 'samples': 9912320, 'steps': 19359, 'loss/train': 2.254922389984131} +02/25/2022 02:39:09 - INFO - codeparrot_training - Step 19360: {'lr': 0.00035525895334187274, 'samples': 9912832, 'steps': 19360, 'loss/train': 1.441593050956726} +02/25/2022 02:39:12 - INFO - codeparrot_training - Step 19361: {'lr': 0.0003552441116434574, 'samples': 9913344, 'steps': 19361, 'loss/train': 2.53437876701355} +02/25/2022 02:39:18 - INFO - codeparrot_training - Step 19362: {'lr': 0.0003552292694942096, 'samples': 9913856, 'steps': 19362, 'loss/train': 3.4571027755737305} +02/25/2022 02:39:21 - INFO - codeparrot_training - Step 19363: {'lr': 0.0003552144268941931, 'samples': 9914368, 'steps': 19363, 'loss/train': 2.1294069290161133} +02/25/2022 02:39:27 - INFO - codeparrot_training - Step 19364: {'lr': 0.00035519958384347134, 'samples': 9914880, 'steps': 19364, 'loss/train': 3.0064258575439453} +02/25/2022 02:39:31 - INFO - codeparrot_training - Step 19365: {'lr': 0.000355184740342108, 'samples': 9915392, 'steps': 19365, 'loss/train': 2.2653021812438965} +02/25/2022 02:39:37 - INFO - codeparrot_training - Step 19366: {'lr': 0.00035516989639016664, 'samples': 9915904, 'steps': 19366, 'loss/train': 0.742005467414856} +02/25/2022 02:39:40 - INFO - codeparrot_training - Step 19367: {'lr': 0.00035515505198771086, 'samples': 9916416, 'steps': 19367, 'loss/train': 2.0157525539398193} +02/25/2022 02:39:46 - INFO - codeparrot_training - Step 19368: {'lr': 0.0003551402071348042, 'samples': 9916928, 'steps': 19368, 'loss/train': 1.8428932428359985} +02/25/2022 02:39:49 - INFO - codeparrot_training - Step 19369: {'lr': 0.0003551253618315103, 'samples': 9917440, 'steps': 19369, 'loss/train': 0.5470841526985168} +02/25/2022 02:39:55 - INFO - codeparrot_training - Step 19370: {'lr': 0.0003551105160778927, 'samples': 9917952, 'steps': 19370, 'loss/train': 2.891383171081543} +02/25/2022 02:39:58 - INFO - codeparrot_training - Step 19371: {'lr': 0.000355095669874015, 'samples': 9918464, 'steps': 19371, 'loss/train': 2.265749931335449} +02/25/2022 02:40:04 - INFO - codeparrot_training - Step 19372: {'lr': 0.00035508082321994097, 'samples': 9918976, 'steps': 19372, 'loss/train': 0.6018871068954468} +02/25/2022 02:40:08 - INFO - codeparrot_training - Step 19373: {'lr': 0.00035506597611573387, 'samples': 9919488, 'steps': 19373, 'loss/train': 1.6407136917114258} +02/25/2022 02:40:13 - INFO - codeparrot_training - Step 19374: {'lr': 0.0003550511285614576, 'samples': 9920000, 'steps': 19374, 'loss/train': 1.9741944074630737} +02/25/2022 02:40:17 - INFO - codeparrot_training - Step 19375: {'lr': 0.0003550362805571756, 'samples': 9920512, 'steps': 19375, 'loss/train': 0.6745988130569458} +02/25/2022 02:40:23 - INFO - codeparrot_training - Step 19376: {'lr': 0.00035502143210295163, 'samples': 9921024, 'steps': 19376, 'loss/train': 1.1684695482254028} +02/25/2022 02:40:26 - INFO - codeparrot_training - Step 19377: {'lr': 0.000355006583198849, 'samples': 9921536, 'steps': 19377, 'loss/train': 1.9040051698684692} +02/25/2022 02:40:32 - INFO - codeparrot_training - Step 19378: {'lr': 0.00035499173384493174, 'samples': 9922048, 'steps': 19378, 'loss/train': 2.65724515914917} +02/25/2022 02:40:35 - INFO - codeparrot_training - Step 19379: {'lr': 0.00035497688404126306, 'samples': 9922560, 'steps': 19379, 'loss/train': 2.56378436088562} +02/25/2022 02:40:41 - INFO - codeparrot_training - Step 19380: {'lr': 0.00035496203378790683, 'samples': 9923072, 'steps': 19380, 'loss/train': 1.8325612545013428} +02/25/2022 02:40:44 - INFO - codeparrot_training - Step 19381: {'lr': 0.0003549471830849265, 'samples': 9923584, 'steps': 19381, 'loss/train': 2.72446870803833} +02/25/2022 02:40:50 - INFO - codeparrot_training - Step 19382: {'lr': 0.00035493233193238584, 'samples': 9924096, 'steps': 19382, 'loss/train': 2.2228333950042725} +02/25/2022 02:40:53 - INFO - codeparrot_training - Step 19383: {'lr': 0.00035491748033034836, 'samples': 9924608, 'steps': 19383, 'loss/train': 2.292468547821045} +02/25/2022 02:40:59 - INFO - codeparrot_training - Step 19384: {'lr': 0.00035490262827887764, 'samples': 9925120, 'steps': 19384, 'loss/train': 1.4534056186676025} +02/25/2022 02:41:02 - INFO - codeparrot_training - Step 19385: {'lr': 0.0003548877757780375, 'samples': 9925632, 'steps': 19385, 'loss/train': 2.4866769313812256} +02/25/2022 02:41:09 - INFO - codeparrot_training - Step 19386: {'lr': 0.00035487292282789136, 'samples': 9926144, 'steps': 19386, 'loss/train': 2.3067026138305664} +02/25/2022 02:41:12 - INFO - codeparrot_training - Step 19387: {'lr': 0.000354858069428503, 'samples': 9926656, 'steps': 19387, 'loss/train': 1.1646324396133423} +02/25/2022 02:41:18 - INFO - codeparrot_training - Step 19388: {'lr': 0.0003548432155799358, 'samples': 9927168, 'steps': 19388, 'loss/train': 2.183593511581421} +02/25/2022 02:41:21 - INFO - codeparrot_training - Step 19389: {'lr': 0.0003548283612822537, 'samples': 9927680, 'steps': 19389, 'loss/train': 0.9730269312858582} +02/25/2022 02:41:27 - INFO - codeparrot_training - Step 19390: {'lr': 0.0003548135065355201, 'samples': 9928192, 'steps': 19390, 'loss/train': 2.060657262802124} +02/25/2022 02:41:30 - INFO - codeparrot_training - Step 19391: {'lr': 0.0003547986513397988, 'samples': 9928704, 'steps': 19391, 'loss/train': 2.292754888534546} +02/25/2022 02:41:36 - INFO - codeparrot_training - Step 19392: {'lr': 0.0003547837956951533, 'samples': 9929216, 'steps': 19392, 'loss/train': 2.0760703086853027} +02/25/2022 02:41:41 - INFO - codeparrot_training - Step 19393: {'lr': 0.00035476893960164734, 'samples': 9929728, 'steps': 19393, 'loss/train': 2.142906665802002} +02/25/2022 02:41:45 - INFO - codeparrot_training - Step 19394: {'lr': 0.00035475408305934444, 'samples': 9930240, 'steps': 19394, 'loss/train': 2.280719518661499} +02/25/2022 02:41:50 - INFO - codeparrot_training - Step 19395: {'lr': 0.0003547392260683084, 'samples': 9930752, 'steps': 19395, 'loss/train': 0.554097056388855} +02/25/2022 02:41:54 - INFO - codeparrot_training - Step 19396: {'lr': 0.0003547243686286027, 'samples': 9931264, 'steps': 19396, 'loss/train': 2.9527838230133057} +02/25/2022 02:41:57 - INFO - codeparrot_training - Step 19397: {'lr': 0.000354709510740291, 'samples': 9931776, 'steps': 19397, 'loss/train': 0.9633437991142273} +02/25/2022 02:42:03 - INFO - codeparrot_training - Step 19398: {'lr': 0.0003546946524034371, 'samples': 9932288, 'steps': 19398, 'loss/train': 3.556969404220581} +02/25/2022 02:42:06 - INFO - codeparrot_training - Step 19399: {'lr': 0.00035467979361810455, 'samples': 9932800, 'steps': 19399, 'loss/train': 1.7744941711425781} +02/25/2022 02:42:12 - INFO - codeparrot_training - Step 19400: {'lr': 0.00035466493438435703, 'samples': 9933312, 'steps': 19400, 'loss/train': 1.5338068008422852} +02/25/2022 02:42:18 - INFO - codeparrot_training - Step 19401: {'lr': 0.00035465007470225813, 'samples': 9933824, 'steps': 19401, 'loss/train': 1.9150464534759521} +02/25/2022 02:42:21 - INFO - codeparrot_training - Step 19402: {'lr': 0.0003546352145718715, 'samples': 9934336, 'steps': 19402, 'loss/train': 1.7839462757110596} +02/25/2022 02:42:27 - INFO - codeparrot_training - Step 19403: {'lr': 0.0003546203539932609, 'samples': 9934848, 'steps': 19403, 'loss/train': 2.4478936195373535} +02/25/2022 02:42:30 - INFO - codeparrot_training - Step 19404: {'lr': 0.0003546054929664899, 'samples': 9935360, 'steps': 19404, 'loss/train': 2.5634331703186035} +02/25/2022 02:42:34 - INFO - codeparrot_training - Step 19405: {'lr': 0.0003545906314916222, 'samples': 9935872, 'steps': 19405, 'loss/train': 1.7341463565826416} +02/25/2022 02:42:39 - INFO - codeparrot_training - Step 19406: {'lr': 0.00035457576956872145, 'samples': 9936384, 'steps': 19406, 'loss/train': 2.2536680698394775} +02/25/2022 02:42:43 - INFO - codeparrot_training - Step 19407: {'lr': 0.00035456090719785126, 'samples': 9936896, 'steps': 19407, 'loss/train': 1.537829041481018} +02/25/2022 02:42:48 - INFO - codeparrot_training - Step 19408: {'lr': 0.00035454604437907536, 'samples': 9937408, 'steps': 19408, 'loss/train': 1.2349534034729004} +02/25/2022 02:42:54 - INFO - codeparrot_training - Step 19409: {'lr': 0.0003545311811124574, 'samples': 9937920, 'steps': 19409, 'loss/train': 2.5081257820129395} +02/25/2022 02:42:57 - INFO - codeparrot_training - Step 19410: {'lr': 0.0003545163173980611, 'samples': 9938432, 'steps': 19410, 'loss/train': 2.3050005435943604} +02/25/2022 02:43:03 - INFO - codeparrot_training - Step 19411: {'lr': 0.0003545014532359501, 'samples': 9938944, 'steps': 19411, 'loss/train': 0.22542831301689148} +02/25/2022 02:43:07 - INFO - codeparrot_training - Step 19412: {'lr': 0.000354486588626188, 'samples': 9939456, 'steps': 19412, 'loss/train': 1.9168795347213745} +02/25/2022 02:43:13 - INFO - codeparrot_training - Step 19413: {'lr': 0.0003544717235688385, 'samples': 9939968, 'steps': 19413, 'loss/train': 2.002089738845825} +02/25/2022 02:43:16 - INFO - codeparrot_training - Step 19414: {'lr': 0.00035445685806396543, 'samples': 9940480, 'steps': 19414, 'loss/train': 2.94095778465271} +02/25/2022 02:43:21 - INFO - codeparrot_training - Step 19415: {'lr': 0.00035444199211163226, 'samples': 9940992, 'steps': 19415, 'loss/train': 2.3083767890930176} +02/25/2022 02:43:25 - INFO - codeparrot_training - Step 19416: {'lr': 0.0003544271257119028, 'samples': 9941504, 'steps': 19416, 'loss/train': 2.683513879776001} +02/25/2022 02:43:30 - INFO - codeparrot_training - Step 19417: {'lr': 0.00035441225886484066, 'samples': 9942016, 'steps': 19417, 'loss/train': 1.9332672357559204} +02/25/2022 02:43:34 - INFO - codeparrot_training - Step 19418: {'lr': 0.0003543973915705095, 'samples': 9942528, 'steps': 19418, 'loss/train': 1.6198923587799072} +02/25/2022 02:43:39 - INFO - codeparrot_training - Step 19419: {'lr': 0.0003543825238289732, 'samples': 9943040, 'steps': 19419, 'loss/train': 0.8241666555404663} +02/25/2022 02:43:43 - INFO - codeparrot_training - Step 19420: {'lr': 0.0003543676556402952, 'samples': 9943552, 'steps': 19420, 'loss/train': 1.546484112739563} +02/25/2022 02:43:48 - INFO - codeparrot_training - Step 19421: {'lr': 0.00035435278700453934, 'samples': 9944064, 'steps': 19421, 'loss/train': 2.1739501953125} +02/25/2022 02:43:52 - INFO - codeparrot_training - Step 19422: {'lr': 0.0003543379179217693, 'samples': 9944576, 'steps': 19422, 'loss/train': 3.12174129486084} +02/25/2022 02:43:58 - INFO - codeparrot_training - Step 19423: {'lr': 0.0003543230483920487, 'samples': 9945088, 'steps': 19423, 'loss/train': 0.8596010208129883} +02/25/2022 02:44:01 - INFO - codeparrot_training - Step 19424: {'lr': 0.0003543081784154414, 'samples': 9945600, 'steps': 19424, 'loss/train': 2.4726855754852295} +02/25/2022 02:44:07 - INFO - codeparrot_training - Step 19425: {'lr': 0.00035429330799201085, 'samples': 9946112, 'steps': 19425, 'loss/train': 1.9892871379852295} +02/25/2022 02:44:10 - INFO - codeparrot_training - Step 19426: {'lr': 0.00035427843712182097, 'samples': 9946624, 'steps': 19426, 'loss/train': 2.166886806488037} +02/25/2022 02:44:16 - INFO - codeparrot_training - Step 19427: {'lr': 0.0003542635658049353, 'samples': 9947136, 'steps': 19427, 'loss/train': 2.1598165035247803} +02/25/2022 02:44:19 - INFO - codeparrot_training - Step 19428: {'lr': 0.0003542486940414177, 'samples': 9947648, 'steps': 19428, 'loss/train': 0.9542031288146973} +02/25/2022 02:44:25 - INFO - codeparrot_training - Step 19429: {'lr': 0.00035423382183133174, 'samples': 9948160, 'steps': 19429, 'loss/train': 2.6849005222320557} +02/25/2022 02:44:28 - INFO - codeparrot_training - Step 19430: {'lr': 0.00035421894917474125, 'samples': 9948672, 'steps': 19430, 'loss/train': 2.466521739959717} +02/25/2022 02:44:34 - INFO - codeparrot_training - Step 19431: {'lr': 0.0003542040760717098, 'samples': 9949184, 'steps': 19431, 'loss/train': 1.6099371910095215} +02/25/2022 02:44:38 - INFO - codeparrot_training - Step 19432: {'lr': 0.0003541892025223012, 'samples': 9949696, 'steps': 19432, 'loss/train': 0.8487576246261597} +02/25/2022 02:44:43 - INFO - codeparrot_training - Step 19433: {'lr': 0.00035417432852657916, 'samples': 9950208, 'steps': 19433, 'loss/train': 0.7949591279029846} +02/25/2022 02:44:46 - INFO - codeparrot_training - Step 19434: {'lr': 0.00035415945408460737, 'samples': 9950720, 'steps': 19434, 'loss/train': 1.8293412923812866} +02/25/2022 02:44:52 - INFO - codeparrot_training - Step 19435: {'lr': 0.0003541445791964496, 'samples': 9951232, 'steps': 19435, 'loss/train': 1.6136757135391235} +02/25/2022 02:44:55 - INFO - codeparrot_training - Step 19436: {'lr': 0.0003541297038621694, 'samples': 9951744, 'steps': 19436, 'loss/train': 2.080000638961792} +02/25/2022 02:45:02 - INFO - codeparrot_training - Step 19437: {'lr': 0.0003541148280818307, 'samples': 9952256, 'steps': 19437, 'loss/train': 1.9385898113250732} +02/25/2022 02:45:05 - INFO - codeparrot_training - Step 19438: {'lr': 0.00035409995185549717, 'samples': 9952768, 'steps': 19438, 'loss/train': 2.651024103164673} +02/25/2022 02:45:11 - INFO - codeparrot_training - Step 19439: {'lr': 0.00035408507518323244, 'samples': 9953280, 'steps': 19439, 'loss/train': 1.0612624883651733} +02/25/2022 02:45:14 - INFO - codeparrot_training - Step 19440: {'lr': 0.00035407019806510035, 'samples': 9953792, 'steps': 19440, 'loss/train': 2.1233155727386475} +02/25/2022 02:45:20 - INFO - codeparrot_training - Step 19441: {'lr': 0.0003540553205011645, 'samples': 9954304, 'steps': 19441, 'loss/train': 2.258026123046875} +02/25/2022 02:45:23 - INFO - codeparrot_training - Step 19442: {'lr': 0.00035404044249148873, 'samples': 9954816, 'steps': 19442, 'loss/train': 2.034329652786255} +02/25/2022 02:45:29 - INFO - codeparrot_training - Step 19443: {'lr': 0.0003540255640361368, 'samples': 9955328, 'steps': 19443, 'loss/train': 0.7765913009643555} +02/25/2022 02:45:32 - INFO - codeparrot_training - Step 19444: {'lr': 0.0003540106851351723, 'samples': 9955840, 'steps': 19444, 'loss/train': 1.3520792722702026} +02/25/2022 02:45:38 - INFO - codeparrot_training - Step 19445: {'lr': 0.00035399580578865907, 'samples': 9956352, 'steps': 19445, 'loss/train': 0.47456982731819153} +02/25/2022 02:45:41 - INFO - codeparrot_training - Step 19446: {'lr': 0.00035398092599666086, 'samples': 9956864, 'steps': 19446, 'loss/train': 2.323485851287842} +02/25/2022 02:45:48 - INFO - codeparrot_training - Step 19447: {'lr': 0.00035396604575924133, 'samples': 9957376, 'steps': 19447, 'loss/train': 2.045677661895752} +02/25/2022 02:45:51 - INFO - codeparrot_training - Step 19448: {'lr': 0.00035395116507646435, 'samples': 9957888, 'steps': 19448, 'loss/train': 1.7655506134033203} +02/25/2022 02:45:57 - INFO - codeparrot_training - Step 19449: {'lr': 0.00035393628394839356, 'samples': 9958400, 'steps': 19449, 'loss/train': 3.113555431365967} +02/25/2022 02:46:00 - INFO - codeparrot_training - Step 19450: {'lr': 0.00035392140237509276, 'samples': 9958912, 'steps': 19450, 'loss/train': 1.6115671396255493} +02/25/2022 02:46:06 - INFO - codeparrot_training - Step 19451: {'lr': 0.0003539065203566256, 'samples': 9959424, 'steps': 19451, 'loss/train': 2.853212356567383} +02/25/2022 02:46:09 - INFO - codeparrot_training - Step 19452: {'lr': 0.0003538916378930559, 'samples': 9959936, 'steps': 19452, 'loss/train': 2.6921048164367676} +02/25/2022 02:46:15 - INFO - codeparrot_training - Step 19453: {'lr': 0.0003538767549844475, 'samples': 9960448, 'steps': 19453, 'loss/train': 2.059866428375244} +02/25/2022 02:46:18 - INFO - codeparrot_training - Step 19454: {'lr': 0.000353861871630864, 'samples': 9960960, 'steps': 19454, 'loss/train': 3.673832893371582} +02/25/2022 02:46:24 - INFO - codeparrot_training - Step 19455: {'lr': 0.00035384698783236923, 'samples': 9961472, 'steps': 19455, 'loss/train': 1.805040955543518} +02/25/2022 02:46:27 - INFO - codeparrot_training - Step 19456: {'lr': 0.00035383210358902695, 'samples': 9961984, 'steps': 19456, 'loss/train': 1.6846179962158203} +02/25/2022 02:46:34 - INFO - codeparrot_training - Step 19457: {'lr': 0.0003538172189009009, 'samples': 9962496, 'steps': 19457, 'loss/train': 2.2305312156677246} +02/25/2022 02:46:37 - INFO - codeparrot_training - Step 19458: {'lr': 0.00035380233376805487, 'samples': 9963008, 'steps': 19458, 'loss/train': 1.8792197704315186} +02/25/2022 02:46:43 - INFO - codeparrot_training - Step 19459: {'lr': 0.00035378744819055264, 'samples': 9963520, 'steps': 19459, 'loss/train': 1.164211630821228} +02/25/2022 02:46:46 - INFO - codeparrot_training - Step 19460: {'lr': 0.0003537725621684578, 'samples': 9964032, 'steps': 19460, 'loss/train': 2.3472208976745605} +02/25/2022 02:46:52 - INFO - codeparrot_training - Step 19461: {'lr': 0.0003537576757018344, 'samples': 9964544, 'steps': 19461, 'loss/train': 2.610691785812378} +02/25/2022 02:46:55 - INFO - codeparrot_training - Step 19462: {'lr': 0.0003537427887907459, 'samples': 9965056, 'steps': 19462, 'loss/train': 2.193692445755005} +02/25/2022 02:47:01 - INFO - codeparrot_training - Step 19463: {'lr': 0.0003537279014352565, 'samples': 9965568, 'steps': 19463, 'loss/train': 1.923625111579895} +02/25/2022 02:47:04 - INFO - codeparrot_training - Step 19464: {'lr': 0.00035371301363542945, 'samples': 9966080, 'steps': 19464, 'loss/train': 2.0617213249206543} +02/25/2022 02:47:10 - INFO - codeparrot_training - Step 19465: {'lr': 0.00035369812539132894, 'samples': 9966592, 'steps': 19465, 'loss/train': 1.2559000253677368} +02/25/2022 02:47:13 - INFO - codeparrot_training - Step 19466: {'lr': 0.0003536832367030185, 'samples': 9967104, 'steps': 19466, 'loss/train': 1.83268141746521} +02/25/2022 02:47:20 - INFO - codeparrot_training - Step 19467: {'lr': 0.00035366834757056203, 'samples': 9967616, 'steps': 19467, 'loss/train': 1.2091857194900513} +02/25/2022 02:47:24 - INFO - codeparrot_training - Step 19468: {'lr': 0.0003536534579940233, 'samples': 9968128, 'steps': 19468, 'loss/train': 2.146350383758545} +02/25/2022 02:47:29 - INFO - codeparrot_training - Step 19469: {'lr': 0.0003536385679734659, 'samples': 9968640, 'steps': 19469, 'loss/train': 2.4267187118530273} +02/25/2022 02:47:33 - INFO - codeparrot_training - Step 19470: {'lr': 0.000353623677508954, 'samples': 9969152, 'steps': 19470, 'loss/train': 2.47650146484375} +02/25/2022 02:47:38 - INFO - codeparrot_training - Step 19471: {'lr': 0.00035360878660055107, 'samples': 9969664, 'steps': 19471, 'loss/train': 2.0601797103881836} +02/25/2022 02:47:42 - INFO - codeparrot_training - Step 19472: {'lr': 0.0003535938952483211, 'samples': 9970176, 'steps': 19472, 'loss/train': 2.7199342250823975} +02/25/2022 02:47:47 - INFO - codeparrot_training - Step 19473: {'lr': 0.00035357900345232764, 'samples': 9970688, 'steps': 19473, 'loss/train': 1.1251051425933838} +02/25/2022 02:47:51 - INFO - codeparrot_training - Step 19474: {'lr': 0.0003535641112126347, 'samples': 9971200, 'steps': 19474, 'loss/train': 1.7359498739242554} +02/25/2022 02:47:56 - INFO - codeparrot_training - Step 19475: {'lr': 0.00035354921852930596, 'samples': 9971712, 'steps': 19475, 'loss/train': 1.335972785949707} +02/25/2022 02:48:00 - INFO - codeparrot_training - Step 19476: {'lr': 0.0003535343254024053, 'samples': 9972224, 'steps': 19476, 'loss/train': 0.9973539113998413} +02/25/2022 02:48:06 - INFO - codeparrot_training - Step 19477: {'lr': 0.00035351943183199643, 'samples': 9972736, 'steps': 19477, 'loss/train': 2.1096160411834717} +02/25/2022 02:48:09 - INFO - codeparrot_training - Step 19478: {'lr': 0.0003535045378181432, 'samples': 9973248, 'steps': 19478, 'loss/train': 2.2010080814361572} +02/25/2022 02:48:15 - INFO - codeparrot_training - Step 19479: {'lr': 0.0003534896433609093, 'samples': 9973760, 'steps': 19479, 'loss/train': 2.0903377532958984} +02/25/2022 02:48:18 - INFO - codeparrot_training - Step 19480: {'lr': 0.0003534747484603587, 'samples': 9974272, 'steps': 19480, 'loss/train': 0.5457518696784973} +02/25/2022 02:48:24 - INFO - codeparrot_training - Step 19481: {'lr': 0.00035345985311655513, 'samples': 9974784, 'steps': 19481, 'loss/train': 2.3291385173797607} +02/25/2022 02:48:27 - INFO - codeparrot_training - Step 19482: {'lr': 0.0003534449573295624, 'samples': 9975296, 'steps': 19482, 'loss/train': 1.7085214853286743} +02/25/2022 02:48:34 - INFO - codeparrot_training - Step 19483: {'lr': 0.0003534300610994444, 'samples': 9975808, 'steps': 19483, 'loss/train': 0.7055985331535339} +02/25/2022 02:48:38 - INFO - codeparrot_training - Step 19484: {'lr': 0.00035341516442626475, 'samples': 9976320, 'steps': 19484, 'loss/train': 2.070014238357544} +02/25/2022 02:48:44 - INFO - codeparrot_training - Step 19485: {'lr': 0.00035340026731008745, 'samples': 9976832, 'steps': 19485, 'loss/train': 1.5858851671218872} +02/25/2022 02:48:47 - INFO - codeparrot_training - Step 19486: {'lr': 0.0003533853697509762, 'samples': 9977344, 'steps': 19486, 'loss/train': 1.0990734100341797} +02/25/2022 02:48:52 - INFO - codeparrot_training - Step 19487: {'lr': 0.0003533704717489949, 'samples': 9977856, 'steps': 19487, 'loss/train': 1.49764084815979} +02/25/2022 02:48:56 - INFO - codeparrot_training - Step 19488: {'lr': 0.00035335557330420724, 'samples': 9978368, 'steps': 19488, 'loss/train': 1.5058784484863281} +02/25/2022 02:49:01 - INFO - codeparrot_training - Step 19489: {'lr': 0.0003533406744166772, 'samples': 9978880, 'steps': 19489, 'loss/train': 1.4632439613342285} +02/25/2022 02:49:05 - INFO - codeparrot_training - Step 19490: {'lr': 0.00035332577508646846, 'samples': 9979392, 'steps': 19490, 'loss/train': 2.2159199714660645} +02/25/2022 02:49:10 - INFO - codeparrot_training - Step 19491: {'lr': 0.000353310875313645, 'samples': 9979904, 'steps': 19491, 'loss/train': 1.598966121673584} +02/25/2022 02:49:14 - INFO - codeparrot_training - Step 19492: {'lr': 0.00035329597509827046, 'samples': 9980416, 'steps': 19492, 'loss/train': 1.0308691263198853} +02/25/2022 02:49:21 - INFO - codeparrot_training - Step 19493: {'lr': 0.00035328107444040875, 'samples': 9980928, 'steps': 19493, 'loss/train': 2.353309392929077} +02/25/2022 02:49:27 - INFO - codeparrot_training - Step 19494: {'lr': 0.0003532661733401238, 'samples': 9981440, 'steps': 19494, 'loss/train': 0.5372552275657654} +02/25/2022 02:49:30 - INFO - codeparrot_training - Step 19495: {'lr': 0.0003532512717974793, 'samples': 9981952, 'steps': 19495, 'loss/train': 2.1641781330108643} +02/25/2022 02:49:36 - INFO - codeparrot_training - Step 19496: {'lr': 0.00035323636981253914, 'samples': 9982464, 'steps': 19496, 'loss/train': 0.7208479046821594} +02/25/2022 02:49:39 - INFO - codeparrot_training - Step 19497: {'lr': 0.00035322146738536714, 'samples': 9982976, 'steps': 19497, 'loss/train': 1.4075491428375244} +02/25/2022 02:49:45 - INFO - codeparrot_training - Step 19498: {'lr': 0.00035320656451602725, 'samples': 9983488, 'steps': 19498, 'loss/train': 2.5539238452911377} +02/25/2022 02:49:48 - INFO - codeparrot_training - Step 19499: {'lr': 0.00035319166120458315, 'samples': 9984000, 'steps': 19499, 'loss/train': 1.254536509513855} +02/25/2022 02:49:54 - INFO - codeparrot_training - Step 19500: {'lr': 0.00035317675745109866, 'samples': 9984512, 'steps': 19500, 'loss/train': 3.8736679553985596} +02/25/2022 02:49:57 - INFO - codeparrot_training - Step 19501: {'lr': 0.0003531618532556378, 'samples': 9985024, 'steps': 19501, 'loss/train': 2.3909385204315186} +02/25/2022 02:50:03 - INFO - codeparrot_training - Step 19502: {'lr': 0.00035314694861826427, 'samples': 9985536, 'steps': 19502, 'loss/train': 2.1281533241271973} +02/25/2022 02:50:07 - INFO - codeparrot_training - Step 19503: {'lr': 0.00035313204353904203, 'samples': 9986048, 'steps': 19503, 'loss/train': 1.9034149646759033} +02/25/2022 02:50:10 - INFO - codeparrot_training - Step 19504: {'lr': 0.0003531171380180348, 'samples': 9986560, 'steps': 19504, 'loss/train': 3.7592263221740723} +02/25/2022 02:50:18 - INFO - codeparrot_training - Step 19505: {'lr': 0.00035310223205530655, 'samples': 9987072, 'steps': 19505, 'loss/train': 2.2944185733795166} +02/25/2022 02:50:21 - INFO - codeparrot_training - Step 19506: {'lr': 0.00035308732565092097, 'samples': 9987584, 'steps': 19506, 'loss/train': 1.685953974723816} +02/25/2022 02:50:27 - INFO - codeparrot_training - Step 19507: {'lr': 0.0003530724188049421, 'samples': 9988096, 'steps': 19507, 'loss/train': 2.1296699047088623} +02/25/2022 02:50:30 - INFO - codeparrot_training - Step 19508: {'lr': 0.0003530575115174337, 'samples': 9988608, 'steps': 19508, 'loss/train': 1.947312593460083} +02/25/2022 02:50:36 - INFO - codeparrot_training - Step 19509: {'lr': 0.0003530426037884597, 'samples': 9989120, 'steps': 19509, 'loss/train': 2.0738906860351562} +02/25/2022 02:50:39 - INFO - codeparrot_training - Step 19510: {'lr': 0.00035302769561808387, 'samples': 9989632, 'steps': 19510, 'loss/train': 1.4905050992965698} +02/25/2022 02:50:45 - INFO - codeparrot_training - Step 19511: {'lr': 0.00035301278700637005, 'samples': 9990144, 'steps': 19511, 'loss/train': 1.7383122444152832} +02/25/2022 02:50:48 - INFO - codeparrot_training - Step 19512: {'lr': 0.0003529978779533822, 'samples': 9990656, 'steps': 19512, 'loss/train': 2.7670764923095703} +02/25/2022 02:50:54 - INFO - codeparrot_training - Step 19513: {'lr': 0.0003529829684591842, 'samples': 9991168, 'steps': 19513, 'loss/train': 1.805279016494751} +02/25/2022 02:50:57 - INFO - codeparrot_training - Step 19514: {'lr': 0.0003529680585238398, 'samples': 9991680, 'steps': 19514, 'loss/train': 2.214223861694336} +02/25/2022 02:51:04 - INFO - codeparrot_training - Step 19515: {'lr': 0.000352953148147413, 'samples': 9992192, 'steps': 19515, 'loss/train': 2.2694575786590576} +02/25/2022 02:51:08 - INFO - codeparrot_training - Step 19516: {'lr': 0.0003529382373299675, 'samples': 9992704, 'steps': 19516, 'loss/train': 2.026510238647461} +02/25/2022 02:51:13 - INFO - codeparrot_training - Step 19517: {'lr': 0.0003529233260715673, 'samples': 9993216, 'steps': 19517, 'loss/train': 2.6090493202209473} +02/25/2022 02:51:17 - INFO - codeparrot_training - Step 19518: {'lr': 0.00035290841437227627, 'samples': 9993728, 'steps': 19518, 'loss/train': 1.5069223642349243} +02/25/2022 02:51:23 - INFO - codeparrot_training - Step 19519: {'lr': 0.00035289350223215827, 'samples': 9994240, 'steps': 19519, 'loss/train': 3.166726589202881} +02/25/2022 02:51:26 - INFO - codeparrot_training - Step 19520: {'lr': 0.00035287858965127723, 'samples': 9994752, 'steps': 19520, 'loss/train': 0.9958294034004211} +02/25/2022 02:51:32 - INFO - codeparrot_training - Step 19521: {'lr': 0.0003528636766296969, 'samples': 9995264, 'steps': 19521, 'loss/train': 1.962761402130127} +02/25/2022 02:51:35 - INFO - codeparrot_training - Step 19522: {'lr': 0.0003528487631674813, 'samples': 9995776, 'steps': 19522, 'loss/train': 1.7990299463272095} +02/25/2022 02:51:41 - INFO - codeparrot_training - Step 19523: {'lr': 0.00035283384926469426, 'samples': 9996288, 'steps': 19523, 'loss/train': 1.3673853874206543} +02/25/2022 02:51:44 - INFO - codeparrot_training - Step 19524: {'lr': 0.00035281893492139965, 'samples': 9996800, 'steps': 19524, 'loss/train': 2.6090004444122314} +02/25/2022 02:51:52 - INFO - codeparrot_training - Step 19525: {'lr': 0.0003528040201376613, 'samples': 9997312, 'steps': 19525, 'loss/train': 2.215991973876953} +02/25/2022 02:51:55 - INFO - codeparrot_training - Step 19526: {'lr': 0.00035278910491354335, 'samples': 9997824, 'steps': 19526, 'loss/train': 2.9071054458618164} +02/25/2022 02:52:01 - INFO - codeparrot_training - Step 19527: {'lr': 0.0003527741892491093, 'samples': 9998336, 'steps': 19527, 'loss/train': 0.8466321229934692} +02/25/2022 02:52:04 - INFO - codeparrot_training - Step 19528: {'lr': 0.00035275927314442344, 'samples': 9998848, 'steps': 19528, 'loss/train': 1.6073174476623535} +02/25/2022 02:52:10 - INFO - codeparrot_training - Step 19529: {'lr': 0.00035274435659954936, 'samples': 9999360, 'steps': 19529, 'loss/train': 2.08135986328125} +02/25/2022 02:52:13 - INFO - codeparrot_training - Step 19530: {'lr': 0.00035272943961455106, 'samples': 9999872, 'steps': 19530, 'loss/train': 1.9042640924453735} +02/25/2022 02:52:19 - INFO - codeparrot_training - Step 19531: {'lr': 0.00035271452218949256, 'samples': 10000384, 'steps': 19531, 'loss/train': 0.2562394440174103} +02/25/2022 02:52:22 - INFO - codeparrot_training - Step 19532: {'lr': 0.0003526996043244376, 'samples': 10000896, 'steps': 19532, 'loss/train': 2.480272054672241} +02/25/2022 02:52:28 - INFO - codeparrot_training - Step 19533: {'lr': 0.0003526846860194502, 'samples': 10001408, 'steps': 19533, 'loss/train': 3.416160821914673} +02/25/2022 02:52:31 - INFO - codeparrot_training - Step 19534: {'lr': 0.0003526697672745942, 'samples': 10001920, 'steps': 19534, 'loss/train': 1.7121726274490356} +02/25/2022 02:52:37 - INFO - codeparrot_training - Step 19535: {'lr': 0.0003526548480899335, 'samples': 10002432, 'steps': 19535, 'loss/train': 0.3300989866256714} +02/25/2022 02:52:40 - INFO - codeparrot_training - Step 19536: {'lr': 0.000352639928465532, 'samples': 10002944, 'steps': 19536, 'loss/train': 2.5870585441589355} +02/25/2022 02:52:46 - INFO - codeparrot_training - Step 19537: {'lr': 0.00035262500840145365, 'samples': 10003456, 'steps': 19537, 'loss/train': 1.162061095237732} +02/25/2022 02:52:49 - INFO - codeparrot_training - Step 19538: {'lr': 0.00035261008789776237, 'samples': 10003968, 'steps': 19538, 'loss/train': 2.3007102012634277} +02/25/2022 02:52:55 - INFO - codeparrot_training - Step 19539: {'lr': 0.0003525951669545221, 'samples': 10004480, 'steps': 19539, 'loss/train': 1.0688713788986206} +02/25/2022 02:52:58 - INFO - codeparrot_training - Step 19540: {'lr': 0.00035258024557179664, 'samples': 10004992, 'steps': 19540, 'loss/train': 5.207286834716797} +02/25/2022 02:53:06 - INFO - codeparrot_training - Step 19541: {'lr': 0.00035256532374965, 'samples': 10005504, 'steps': 19541, 'loss/train': 2.109147310256958} +02/25/2022 02:53:09 - INFO - codeparrot_training - Step 19542: {'lr': 0.0003525504014881461, 'samples': 10006016, 'steps': 19542, 'loss/train': 1.3966560363769531} +02/25/2022 02:53:15 - INFO - codeparrot_training - Step 19543: {'lr': 0.0003525354787873488, 'samples': 10006528, 'steps': 19543, 'loss/train': 2.1022613048553467} +02/25/2022 02:53:18 - INFO - codeparrot_training - Step 19544: {'lr': 0.0003525205556473221, 'samples': 10007040, 'steps': 19544, 'loss/train': 3.664903402328491} +02/25/2022 02:53:24 - INFO - codeparrot_training - Step 19545: {'lr': 0.0003525056320681299, 'samples': 10007552, 'steps': 19545, 'loss/train': 2.0755460262298584} +02/25/2022 02:53:27 - INFO - codeparrot_training - Step 19546: {'lr': 0.00035249070804983616, 'samples': 10008064, 'steps': 19546, 'loss/train': 0.9586907625198364} +02/25/2022 02:53:33 - INFO - codeparrot_training - Step 19547: {'lr': 0.00035247578359250473, 'samples': 10008576, 'steps': 19547, 'loss/train': 1.0740753412246704} +02/25/2022 02:53:36 - INFO - codeparrot_training - Step 19548: {'lr': 0.0003524608586961996, 'samples': 10009088, 'steps': 19548, 'loss/train': 1.3718539476394653} +02/25/2022 02:53:42 - INFO - codeparrot_training - Step 19549: {'lr': 0.00035244593336098464, 'samples': 10009600, 'steps': 19549, 'loss/train': 1.3043261766433716} +02/25/2022 02:53:45 - INFO - codeparrot_training - Step 19550: {'lr': 0.0003524310075869239, 'samples': 10010112, 'steps': 19550, 'loss/train': 1.4652314186096191} +02/25/2022 02:53:53 - INFO - codeparrot_training - Step 19551: {'lr': 0.0003524160813740812, 'samples': 10010624, 'steps': 19551, 'loss/train': 2.622642993927002} +02/25/2022 02:53:56 - INFO - codeparrot_training - Step 19552: {'lr': 0.00035240115472252056, 'samples': 10011136, 'steps': 19552, 'loss/train': 1.0312288999557495} +02/25/2022 02:54:02 - INFO - codeparrot_training - Step 19553: {'lr': 0.00035238622763230574, 'samples': 10011648, 'steps': 19553, 'loss/train': 0.9093035459518433} +02/25/2022 02:54:05 - INFO - codeparrot_training - Step 19554: {'lr': 0.000352371300103501, 'samples': 10012160, 'steps': 19554, 'loss/train': 2.064634084701538} +02/25/2022 02:54:11 - INFO - codeparrot_training - Step 19555: {'lr': 0.0003523563721361701, 'samples': 10012672, 'steps': 19555, 'loss/train': 1.5709894895553589} +02/25/2022 02:54:14 - INFO - codeparrot_training - Step 19556: {'lr': 0.0003523414437303769, 'samples': 10013184, 'steps': 19556, 'loss/train': 2.32289719581604} +02/25/2022 02:54:20 - INFO - codeparrot_training - Step 19557: {'lr': 0.00035232651488618564, 'samples': 10013696, 'steps': 19557, 'loss/train': 0.8737673163414001} +02/25/2022 02:54:23 - INFO - codeparrot_training - Step 19558: {'lr': 0.00035231158560365987, 'samples': 10014208, 'steps': 19558, 'loss/train': 2.232379913330078} +02/25/2022 02:54:29 - INFO - codeparrot_training - Step 19559: {'lr': 0.0003522966558828639, 'samples': 10014720, 'steps': 19559, 'loss/train': 2.5879552364349365} +02/25/2022 02:54:32 - INFO - codeparrot_training - Step 19560: {'lr': 0.00035228172572386146, 'samples': 10015232, 'steps': 19560, 'loss/train': 0.694169282913208} +02/25/2022 02:54:40 - INFO - codeparrot_training - Step 19561: {'lr': 0.00035226679512671664, 'samples': 10015744, 'steps': 19561, 'loss/train': 1.174360990524292} +02/25/2022 02:54:43 - INFO - codeparrot_training - Step 19562: {'lr': 0.00035225186409149333, 'samples': 10016256, 'steps': 19562, 'loss/train': 3.9466090202331543} +02/25/2022 02:54:49 - INFO - codeparrot_training - Step 19563: {'lr': 0.00035223693261825554, 'samples': 10016768, 'steps': 19563, 'loss/train': 1.5544453859329224} +02/25/2022 02:54:52 - INFO - codeparrot_training - Step 19564: {'lr': 0.0003522220007070671, 'samples': 10017280, 'steps': 19564, 'loss/train': 2.1561708450317383} +02/25/2022 02:54:58 - INFO - codeparrot_training - Step 19565: {'lr': 0.0003522070683579921, 'samples': 10017792, 'steps': 19565, 'loss/train': 1.282261848449707} +02/25/2022 02:55:01 - INFO - codeparrot_training - Step 19566: {'lr': 0.00035219213557109453, 'samples': 10018304, 'steps': 19566, 'loss/train': 2.3898255825042725} +02/25/2022 02:55:07 - INFO - codeparrot_training - Step 19567: {'lr': 0.00035217720234643823, 'samples': 10018816, 'steps': 19567, 'loss/train': 2.159210205078125} +02/25/2022 02:55:10 - INFO - codeparrot_training - Step 19568: {'lr': 0.0003521622686840873, 'samples': 10019328, 'steps': 19568, 'loss/train': 1.8321435451507568} +02/25/2022 02:55:16 - INFO - codeparrot_training - Step 19569: {'lr': 0.00035214733458410557, 'samples': 10019840, 'steps': 19569, 'loss/train': 0.6593620181083679} +02/25/2022 02:55:19 - INFO - codeparrot_training - Step 19570: {'lr': 0.00035213240004655714, 'samples': 10020352, 'steps': 19570, 'loss/train': 1.808738350868225} +02/25/2022 02:55:25 - INFO - codeparrot_training - Step 19571: {'lr': 0.000352117465071506, 'samples': 10020864, 'steps': 19571, 'loss/train': 0.38315537571907043} +02/25/2022 02:55:28 - INFO - codeparrot_training - Step 19572: {'lr': 0.000352102529659016, 'samples': 10021376, 'steps': 19572, 'loss/train': 1.3931598663330078} +02/25/2022 02:55:34 - INFO - codeparrot_training - Step 19573: {'lr': 0.00035208759380915116, 'samples': 10021888, 'steps': 19573, 'loss/train': 2.9910523891448975} +02/25/2022 02:55:37 - INFO - codeparrot_training - Step 19574: {'lr': 0.0003520726575219756, 'samples': 10022400, 'steps': 19574, 'loss/train': 1.307809591293335} +02/25/2022 02:55:43 - INFO - codeparrot_training - Step 19575: {'lr': 0.00035205772079755307, 'samples': 10022912, 'steps': 19575, 'loss/train': 2.1897032260894775} +02/25/2022 02:55:50 - INFO - codeparrot_training - Step 19576: {'lr': 0.0003520427836359477, 'samples': 10023424, 'steps': 19576, 'loss/train': 1.7167686223983765} +02/25/2022 02:55:54 - INFO - codeparrot_training - Step 19577: {'lr': 0.00035202784603722344, 'samples': 10023936, 'steps': 19577, 'loss/train': 2.5758914947509766} +02/25/2022 02:55:59 - INFO - codeparrot_training - Step 19578: {'lr': 0.00035201290800144423, 'samples': 10024448, 'steps': 19578, 'loss/train': 2.058234691619873} +02/25/2022 02:56:03 - INFO - codeparrot_training - Step 19579: {'lr': 0.00035199796952867425, 'samples': 10024960, 'steps': 19579, 'loss/train': 2.095686674118042} +02/25/2022 02:56:08 - INFO - codeparrot_training - Step 19580: {'lr': 0.0003519830306189773, 'samples': 10025472, 'steps': 19580, 'loss/train': 2.260281562805176} +02/25/2022 02:56:12 - INFO - codeparrot_training - Step 19581: {'lr': 0.0003519680912724174, 'samples': 10025984, 'steps': 19581, 'loss/train': 1.3114290237426758} +02/25/2022 02:56:17 - INFO - codeparrot_training - Step 19582: {'lr': 0.0003519531514890586, 'samples': 10026496, 'steps': 19582, 'loss/train': 1.6117550134658813} +02/25/2022 02:56:21 - INFO - codeparrot_training - Step 19583: {'lr': 0.00035193821126896493, 'samples': 10027008, 'steps': 19583, 'loss/train': 2.0356404781341553} +02/25/2022 02:56:26 - INFO - codeparrot_training - Step 19584: {'lr': 0.0003519232706122002, 'samples': 10027520, 'steps': 19584, 'loss/train': 1.8880693912506104} +02/25/2022 02:56:30 - INFO - codeparrot_training - Step 19585: {'lr': 0.0003519083295188287, 'samples': 10028032, 'steps': 19585, 'loss/train': 0.7321484088897705} +02/25/2022 02:56:38 - INFO - codeparrot_training - Step 19586: {'lr': 0.0003518933879889142, 'samples': 10028544, 'steps': 19586, 'loss/train': 1.8996468782424927} +02/25/2022 02:56:41 - INFO - codeparrot_training - Step 19587: {'lr': 0.00035187844602252075, 'samples': 10029056, 'steps': 19587, 'loss/train': 1.8454076051712036} +02/25/2022 02:56:47 - INFO - codeparrot_training - Step 19588: {'lr': 0.0003518635036197124, 'samples': 10029568, 'steps': 19588, 'loss/train': 1.3699305057525635} +02/25/2022 02:56:50 - INFO - codeparrot_training - Step 19589: {'lr': 0.00035184856078055316, 'samples': 10030080, 'steps': 19589, 'loss/train': 3.159069299697876} +02/25/2022 02:56:56 - INFO - codeparrot_training - Step 19590: {'lr': 0.000351833617505107, 'samples': 10030592, 'steps': 19590, 'loss/train': 1.8408854007720947} +02/25/2022 02:56:59 - INFO - codeparrot_training - Step 19591: {'lr': 0.000351818673793438, 'samples': 10031104, 'steps': 19591, 'loss/train': 1.092026948928833} +02/25/2022 02:57:05 - INFO - codeparrot_training - Step 19592: {'lr': 0.00035180372964561013, 'samples': 10031616, 'steps': 19592, 'loss/train': 2.6419565677642822} +02/25/2022 02:57:08 - INFO - codeparrot_training - Step 19593: {'lr': 0.00035178878506168733, 'samples': 10032128, 'steps': 19593, 'loss/train': 1.9302358627319336} +02/25/2022 02:57:13 - INFO - codeparrot_training - Step 19594: {'lr': 0.0003517738400417338, 'samples': 10032640, 'steps': 19594, 'loss/train': 4.34604024887085} +02/25/2022 02:57:17 - INFO - codeparrot_training - Step 19595: {'lr': 0.0003517588945858134, 'samples': 10033152, 'steps': 19595, 'loss/train': 2.429274559020996} +02/25/2022 02:57:25 - INFO - codeparrot_training - Step 19596: {'lr': 0.00035174394869399024, 'samples': 10033664, 'steps': 19596, 'loss/train': 2.749152421951294} +02/25/2022 02:57:28 - INFO - codeparrot_training - Step 19597: {'lr': 0.0003517290023663283, 'samples': 10034176, 'steps': 19597, 'loss/train': 2.5418922901153564} +02/25/2022 02:57:34 - INFO - codeparrot_training - Step 19598: {'lr': 0.0003517140556028916, 'samples': 10034688, 'steps': 19598, 'loss/train': 2.3519039154052734} +02/25/2022 02:57:37 - INFO - codeparrot_training - Step 19599: {'lr': 0.0003516991084037442, 'samples': 10035200, 'steps': 19599, 'loss/train': 1.9998564720153809} +02/25/2022 02:57:43 - INFO - codeparrot_training - Step 19600: {'lr': 0.0003516841607689501, 'samples': 10035712, 'steps': 19600, 'loss/train': 1.041944980621338} +02/25/2022 02:57:46 - INFO - codeparrot_training - Step 19601: {'lr': 0.0003516692126985733, 'samples': 10036224, 'steps': 19601, 'loss/train': 2.174870729446411} +02/25/2022 02:57:52 - INFO - codeparrot_training - Step 19602: {'lr': 0.00035165426419267795, 'samples': 10036736, 'steps': 19602, 'loss/train': 2.2092790603637695} +02/25/2022 02:57:55 - INFO - codeparrot_training - Step 19603: {'lr': 0.00035163931525132797, 'samples': 10037248, 'steps': 19603, 'loss/train': 1.6803193092346191} +02/25/2022 02:58:01 - INFO - codeparrot_training - Step 19604: {'lr': 0.00035162436587458744, 'samples': 10037760, 'steps': 19604, 'loss/train': 1.847691297531128} +02/25/2022 02:58:04 - INFO - codeparrot_training - Step 19605: {'lr': 0.00035160941606252044, 'samples': 10038272, 'steps': 19605, 'loss/train': 2.574420690536499} +02/25/2022 02:58:12 - INFO - codeparrot_training - Step 19606: {'lr': 0.00035159446581519094, 'samples': 10038784, 'steps': 19606, 'loss/train': 1.9931769371032715} +02/25/2022 02:58:15 - INFO - codeparrot_training - Step 19607: {'lr': 0.000351579515132663, 'samples': 10039296, 'steps': 19607, 'loss/train': 2.1745822429656982} +02/25/2022 02:58:21 - INFO - codeparrot_training - Step 19608: {'lr': 0.0003515645640150006, 'samples': 10039808, 'steps': 19608, 'loss/train': 2.42423939704895} +02/25/2022 02:58:24 - INFO - codeparrot_training - Step 19609: {'lr': 0.000351549612462268, 'samples': 10040320, 'steps': 19609, 'loss/train': 1.997499704360962} +02/25/2022 02:58:30 - INFO - codeparrot_training - Step 19610: {'lr': 0.00035153466047452904, 'samples': 10040832, 'steps': 19610, 'loss/train': 2.232370376586914} +02/25/2022 02:58:33 - INFO - codeparrot_training - Step 19611: {'lr': 0.00035151970805184785, 'samples': 10041344, 'steps': 19611, 'loss/train': 1.0818361043930054} +02/25/2022 02:58:39 - INFO - codeparrot_training - Step 19612: {'lr': 0.00035150475519428844, 'samples': 10041856, 'steps': 19612, 'loss/train': 2.621471405029297} +02/25/2022 02:58:43 - INFO - codeparrot_training - Step 19613: {'lr': 0.00035148980190191485, 'samples': 10042368, 'steps': 19613, 'loss/train': 3.741616725921631} +02/25/2022 02:58:46 - INFO - codeparrot_training - Step 19614: {'lr': 0.00035147484817479126, 'samples': 10042880, 'steps': 19614, 'loss/train': 2.7486398220062256} +02/25/2022 02:58:52 - INFO - codeparrot_training - Step 19615: {'lr': 0.00035145989401298163, 'samples': 10043392, 'steps': 19615, 'loss/train': 2.101335287094116} +02/25/2022 02:58:55 - INFO - codeparrot_training - Step 19616: {'lr': 0.00035144493941655, 'samples': 10043904, 'steps': 19616, 'loss/train': 2.3324625492095947} +02/25/2022 02:59:01 - INFO - codeparrot_training - Step 19617: {'lr': 0.0003514299843855605, 'samples': 10044416, 'steps': 19617, 'loss/train': 0.20641575753688812} +02/25/2022 02:59:04 - INFO - codeparrot_training - Step 19618: {'lr': 0.0003514150289200771, 'samples': 10044928, 'steps': 19618, 'loss/train': 2.2910702228546143} +02/25/2022 02:59:11 - INFO - codeparrot_training - Step 19619: {'lr': 0.00035140007302016395, 'samples': 10045440, 'steps': 19619, 'loss/train': 1.4463056325912476} +02/25/2022 02:59:15 - INFO - codeparrot_training - Step 19620: {'lr': 0.0003513851166858851, 'samples': 10045952, 'steps': 19620, 'loss/train': 2.1278014183044434} +02/25/2022 02:59:21 - INFO - codeparrot_training - Step 19621: {'lr': 0.0003513701599173046, 'samples': 10046464, 'steps': 19621, 'loss/train': 1.4461511373519897} +02/25/2022 02:59:24 - INFO - codeparrot_training - Step 19622: {'lr': 0.0003513552027144865, 'samples': 10046976, 'steps': 19622, 'loss/train': 1.8281314373016357} +02/25/2022 02:59:30 - INFO - codeparrot_training - Step 19623: {'lr': 0.00035134024507749487, 'samples': 10047488, 'steps': 19623, 'loss/train': 1.8796703815460205} +02/25/2022 02:59:34 - INFO - codeparrot_training - Step 19624: {'lr': 0.0003513252870063939, 'samples': 10048000, 'steps': 19624, 'loss/train': 2.665205955505371} +02/25/2022 02:59:39 - INFO - codeparrot_training - Step 19625: {'lr': 0.00035131032850124744, 'samples': 10048512, 'steps': 19625, 'loss/train': 2.2983086109161377} +02/25/2022 02:59:43 - INFO - codeparrot_training - Step 19626: {'lr': 0.0003512953695621198, 'samples': 10049024, 'steps': 19626, 'loss/train': 2.309155225753784} +02/25/2022 02:59:48 - INFO - codeparrot_training - Step 19627: {'lr': 0.00035128041018907496, 'samples': 10049536, 'steps': 19627, 'loss/train': 2.0799875259399414} +02/25/2022 02:59:52 - INFO - codeparrot_training - Step 19628: {'lr': 0.000351265450382177, 'samples': 10050048, 'steps': 19628, 'loss/train': 3.1819663047790527} +02/25/2022 02:59:57 - INFO - codeparrot_training - Step 19629: {'lr': 0.00035125049014148995, 'samples': 10050560, 'steps': 19629, 'loss/train': 2.152209758758545} +02/25/2022 03:00:01 - INFO - codeparrot_training - Step 19630: {'lr': 0.000351235529467078, 'samples': 10051072, 'steps': 19630, 'loss/train': 0.7848385572433472} +02/25/2022 03:00:06 - INFO - codeparrot_training - Step 19631: {'lr': 0.00035122056835900517, 'samples': 10051584, 'steps': 19631, 'loss/train': 2.8216657638549805} +02/25/2022 03:00:10 - INFO - codeparrot_training - Step 19632: {'lr': 0.0003512056068173356, 'samples': 10052096, 'steps': 19632, 'loss/train': 1.5875760316848755} +02/25/2022 03:00:17 - INFO - codeparrot_training - Step 19633: {'lr': 0.00035119064484213333, 'samples': 10052608, 'steps': 19633, 'loss/train': 2.5184178352355957} +02/25/2022 03:00:20 - INFO - codeparrot_training - Step 19634: {'lr': 0.0003511756824334625, 'samples': 10053120, 'steps': 19634, 'loss/train': 1.962084412574768} +02/25/2022 03:00:26 - INFO - codeparrot_training - Step 19635: {'lr': 0.0003511607195913872, 'samples': 10053632, 'steps': 19635, 'loss/train': 2.3503575325012207} +02/25/2022 03:00:29 - INFO - codeparrot_training - Step 19636: {'lr': 0.00035114575631597136, 'samples': 10054144, 'steps': 19636, 'loss/train': 1.3409106731414795} +02/25/2022 03:00:35 - INFO - codeparrot_training - Step 19637: {'lr': 0.0003511307926072793, 'samples': 10054656, 'steps': 19637, 'loss/train': 1.7063028812408447} +02/25/2022 03:00:38 - INFO - codeparrot_training - Step 19638: {'lr': 0.00035111582846537507, 'samples': 10055168, 'steps': 19638, 'loss/train': 2.3895628452301025} +02/25/2022 03:00:45 - INFO - codeparrot_training - Step 19639: {'lr': 0.00035110086389032264, 'samples': 10055680, 'steps': 19639, 'loss/train': 2.257286548614502} +02/25/2022 03:00:49 - INFO - codeparrot_training - Step 19640: {'lr': 0.0003510858988821863, 'samples': 10056192, 'steps': 19640, 'loss/train': 1.6644878387451172} +02/25/2022 03:00:54 - INFO - codeparrot_training - Step 19641: {'lr': 0.00035107093344103, 'samples': 10056704, 'steps': 19641, 'loss/train': 1.656775951385498} +02/25/2022 03:00:58 - INFO - codeparrot_training - Step 19642: {'lr': 0.000351055967566918, 'samples': 10057216, 'steps': 19642, 'loss/train': 0.5723735094070435} +02/25/2022 03:01:03 - INFO - codeparrot_training - Step 19643: {'lr': 0.0003510410012599142, 'samples': 10057728, 'steps': 19643, 'loss/train': 0.9587002396583557} +02/25/2022 03:01:06 - INFO - codeparrot_training - Step 19644: {'lr': 0.00035102603452008297, 'samples': 10058240, 'steps': 19644, 'loss/train': 1.5231226682662964} +02/25/2022 03:01:12 - INFO - codeparrot_training - Step 19645: {'lr': 0.00035101106734748824, 'samples': 10058752, 'steps': 19645, 'loss/train': 1.4624035358428955} +02/25/2022 03:01:16 - INFO - codeparrot_training - Step 19646: {'lr': 0.00035099609974219417, 'samples': 10059264, 'steps': 19646, 'loss/train': 1.7937335968017578} +02/25/2022 03:01:21 - INFO - codeparrot_training - Step 19647: {'lr': 0.00035098113170426484, 'samples': 10059776, 'steps': 19647, 'loss/train': 2.059809446334839} +02/25/2022 03:01:25 - INFO - codeparrot_training - Step 19648: {'lr': 0.0003509661632337645, 'samples': 10060288, 'steps': 19648, 'loss/train': 2.2621843814849854} +02/25/2022 03:01:31 - INFO - codeparrot_training - Step 19649: {'lr': 0.00035095119433075706, 'samples': 10060800, 'steps': 19649, 'loss/train': 4.312873363494873} +02/25/2022 03:01:35 - INFO - codeparrot_training - Step 19650: {'lr': 0.00035093622499530677, 'samples': 10061312, 'steps': 19650, 'loss/train': 4.907009601593018} +02/25/2022 03:01:40 - INFO - codeparrot_training - Step 19651: {'lr': 0.0003509212552274778, 'samples': 10061824, 'steps': 19651, 'loss/train': 2.3816497325897217} +02/25/2022 03:01:44 - INFO - codeparrot_training - Step 19652: {'lr': 0.0003509062850273342, 'samples': 10062336, 'steps': 19652, 'loss/train': 1.1412864923477173} +02/25/2022 03:01:49 - INFO - codeparrot_training - Step 19653: {'lr': 0.00035089131439494013, 'samples': 10062848, 'steps': 19653, 'loss/train': 1.5915791988372803} +02/25/2022 03:01:53 - INFO - codeparrot_training - Step 19654: {'lr': 0.00035087634333035966, 'samples': 10063360, 'steps': 19654, 'loss/train': 1.409009575843811} +02/25/2022 03:01:58 - INFO - codeparrot_training - Step 19655: {'lr': 0.00035086137183365707, 'samples': 10063872, 'steps': 19655, 'loss/train': 2.1974523067474365} +02/25/2022 03:02:02 - INFO - codeparrot_training - Step 19656: {'lr': 0.0003508463999048963, 'samples': 10064384, 'steps': 19656, 'loss/train': 1.8320226669311523} +02/25/2022 03:02:07 - INFO - codeparrot_training - Step 19657: {'lr': 0.0003508314275441416, 'samples': 10064896, 'steps': 19657, 'loss/train': 1.9070558547973633} +02/25/2022 03:02:11 - INFO - codeparrot_training - Step 19658: {'lr': 0.0003508164547514571, 'samples': 10065408, 'steps': 19658, 'loss/train': 1.9889055490493774} +02/25/2022 03:02:17 - INFO - codeparrot_training - Step 19659: {'lr': 0.00035080148152690687, 'samples': 10065920, 'steps': 19659, 'loss/train': 2.0737147331237793} +02/25/2022 03:02:21 - INFO - codeparrot_training - Step 19660: {'lr': 0.0003507865078705551, 'samples': 10066432, 'steps': 19660, 'loss/train': 2.672455310821533} +02/25/2022 03:02:26 - INFO - codeparrot_training - Step 19661: {'lr': 0.00035077153378246603, 'samples': 10066944, 'steps': 19661, 'loss/train': 0.9656722545623779} +02/25/2022 03:02:30 - INFO - codeparrot_training - Step 19662: {'lr': 0.0003507565592627036, 'samples': 10067456, 'steps': 19662, 'loss/train': 2.1578664779663086} +02/25/2022 03:02:35 - INFO - codeparrot_training - Step 19663: {'lr': 0.0003507415843113321, 'samples': 10067968, 'steps': 19663, 'loss/train': 3.3387155532836914} +02/25/2022 03:02:39 - INFO - codeparrot_training - Step 19664: {'lr': 0.00035072660892841566, 'samples': 10068480, 'steps': 19664, 'loss/train': 2.3395957946777344} +02/25/2022 03:02:44 - INFO - codeparrot_training - Step 19665: {'lr': 0.0003507116331140184, 'samples': 10068992, 'steps': 19665, 'loss/train': 0.7336011528968811} +02/25/2022 03:02:48 - INFO - codeparrot_training - Step 19666: {'lr': 0.00035069665686820453, 'samples': 10069504, 'steps': 19666, 'loss/train': 2.6456384658813477} +02/25/2022 03:02:53 - INFO - codeparrot_training - Step 19667: {'lr': 0.0003506816801910381, 'samples': 10070016, 'steps': 19667, 'loss/train': 2.104161024093628} +02/25/2022 03:02:57 - INFO - codeparrot_training - Step 19668: {'lr': 0.00035066670308258333, 'samples': 10070528, 'steps': 19668, 'loss/train': 1.2089543342590332} +02/25/2022 03:03:02 - INFO - codeparrot_training - Step 19669: {'lr': 0.00035065172554290435, 'samples': 10071040, 'steps': 19669, 'loss/train': 1.1023633480072021} +02/25/2022 03:03:06 - INFO - codeparrot_training - Step 19670: {'lr': 0.0003506367475720654, 'samples': 10071552, 'steps': 19670, 'loss/train': 2.4401018619537354} +02/25/2022 03:03:12 - INFO - codeparrot_training - Step 19671: {'lr': 0.0003506217691701305, 'samples': 10072064, 'steps': 19671, 'loss/train': 1.69490385055542} +02/25/2022 03:03:16 - INFO - codeparrot_training - Step 19672: {'lr': 0.000350606790337164, 'samples': 10072576, 'steps': 19672, 'loss/train': 1.5114063024520874} +02/25/2022 03:03:21 - INFO - codeparrot_training - Step 19673: {'lr': 0.00035059181107322977, 'samples': 10073088, 'steps': 19673, 'loss/train': 2.0572352409362793} +02/25/2022 03:03:25 - INFO - codeparrot_training - Step 19674: {'lr': 0.00035057683137839236, 'samples': 10073600, 'steps': 19674, 'loss/train': 1.143776535987854} +02/25/2022 03:03:30 - INFO - codeparrot_training - Step 19675: {'lr': 0.00035056185125271566, 'samples': 10074112, 'steps': 19675, 'loss/train': 2.620931386947632} +02/25/2022 03:03:34 - INFO - codeparrot_training - Step 19676: {'lr': 0.0003505468706962639, 'samples': 10074624, 'steps': 19676, 'loss/train': 3.6897389888763428} +02/25/2022 03:03:39 - INFO - codeparrot_training - Step 19677: {'lr': 0.0003505318897091013, 'samples': 10075136, 'steps': 19677, 'loss/train': 2.638714551925659} +02/25/2022 03:03:45 - INFO - codeparrot_training - Step 19678: {'lr': 0.000350516908291292, 'samples': 10075648, 'steps': 19678, 'loss/train': 1.2624706029891968} +02/25/2022 03:03:48 - INFO - codeparrot_training - Step 19679: {'lr': 0.00035050192644290023, 'samples': 10076160, 'steps': 19679, 'loss/train': 1.9817955493927002} +02/25/2022 03:03:54 - INFO - codeparrot_training - Step 19680: {'lr': 0.00035048694416399005, 'samples': 10076672, 'steps': 19680, 'loss/train': 2.1430068016052246} +02/25/2022 03:03:58 - INFO - codeparrot_training - Step 19681: {'lr': 0.0003504719614546258, 'samples': 10077184, 'steps': 19681, 'loss/train': 2.407341241836548} +02/25/2022 03:04:03 - INFO - codeparrot_training - Step 19682: {'lr': 0.00035045697831487146, 'samples': 10077696, 'steps': 19682, 'loss/train': 1.3414525985717773} +02/25/2022 03:04:07 - INFO - codeparrot_training - Step 19683: {'lr': 0.00035044199474479137, 'samples': 10078208, 'steps': 19683, 'loss/train': 1.7528821229934692} +02/25/2022 03:04:13 - INFO - codeparrot_training - Step 19684: {'lr': 0.00035042701074444965, 'samples': 10078720, 'steps': 19684, 'loss/train': 1.60835599899292} +02/25/2022 03:04:16 - INFO - codeparrot_training - Step 19685: {'lr': 0.00035041202631391056, 'samples': 10079232, 'steps': 19685, 'loss/train': 1.390791893005371} +02/25/2022 03:04:22 - INFO - codeparrot_training - Step 19686: {'lr': 0.0003503970414532382, 'samples': 10079744, 'steps': 19686, 'loss/train': 2.100835084915161} +02/25/2022 03:04:25 - INFO - codeparrot_training - Step 19687: {'lr': 0.00035038205616249674, 'samples': 10080256, 'steps': 19687, 'loss/train': 2.3965134620666504} +02/25/2022 03:04:31 - INFO - codeparrot_training - Step 19688: {'lr': 0.00035036707044175055, 'samples': 10080768, 'steps': 19688, 'loss/train': 1.8352265357971191} +02/25/2022 03:04:34 - INFO - codeparrot_training - Step 19689: {'lr': 0.00035035208429106356, 'samples': 10081280, 'steps': 19689, 'loss/train': 0.6369045376777649} +02/25/2022 03:04:40 - INFO - codeparrot_training - Step 19690: {'lr': 0.0003503370977105002, 'samples': 10081792, 'steps': 19690, 'loss/train': 1.8793842792510986} +02/25/2022 03:04:43 - INFO - codeparrot_training - Step 19691: {'lr': 0.00035032211070012455, 'samples': 10082304, 'steps': 19691, 'loss/train': 0.7447012662887573} +02/25/2022 03:04:49 - INFO - codeparrot_training - Step 19692: {'lr': 0.00035030712326000084, 'samples': 10082816, 'steps': 19692, 'loss/train': 2.0413427352905273} +02/25/2022 03:04:52 - INFO - codeparrot_training - Step 19693: {'lr': 0.00035029213539019324, 'samples': 10083328, 'steps': 19693, 'loss/train': 0.8735166192054749} +02/25/2022 03:04:58 - INFO - codeparrot_training - Step 19694: {'lr': 0.000350277147090766, 'samples': 10083840, 'steps': 19694, 'loss/train': 2.240682363510132} +02/25/2022 03:05:02 - INFO - codeparrot_training - Step 19695: {'lr': 0.0003502621583617833, 'samples': 10084352, 'steps': 19695, 'loss/train': 1.1770226955413818} +02/25/2022 03:05:07 - INFO - codeparrot_training - Step 19696: {'lr': 0.00035024716920330933, 'samples': 10084864, 'steps': 19696, 'loss/train': 0.5625348091125488} +02/25/2022 03:05:11 - INFO - codeparrot_training - Step 19697: {'lr': 0.0003502321796154084, 'samples': 10085376, 'steps': 19697, 'loss/train': 1.4729421138763428} +02/25/2022 03:05:16 - INFO - codeparrot_training - Step 19698: {'lr': 0.00035021718959814453, 'samples': 10085888, 'steps': 19698, 'loss/train': 0.38844922184944153} +02/25/2022 03:05:20 - INFO - codeparrot_training - Step 19699: {'lr': 0.0003502021991515821, 'samples': 10086400, 'steps': 19699, 'loss/train': 2.6359524726867676} +02/25/2022 03:05:25 - INFO - codeparrot_training - Step 19700: {'lr': 0.0003501872082757852, 'samples': 10086912, 'steps': 19700, 'loss/train': 2.1211774349212646} +02/25/2022 03:05:29 - INFO - codeparrot_training - Step 19701: {'lr': 0.00035017221697081826, 'samples': 10087424, 'steps': 19701, 'loss/train': 1.5974019765853882} +02/25/2022 03:05:34 - INFO - codeparrot_training - Step 19702: {'lr': 0.0003501572252367452, 'samples': 10087936, 'steps': 19702, 'loss/train': 2.3705458641052246} +02/25/2022 03:05:38 - INFO - codeparrot_training - Step 19703: {'lr': 0.00035014223307363045, 'samples': 10088448, 'steps': 19703, 'loss/train': 0.5771686434745789} +02/25/2022 03:05:43 - INFO - codeparrot_training - Step 19704: {'lr': 0.0003501272404815382, 'samples': 10088960, 'steps': 19704, 'loss/train': 0.4329005777835846} +02/25/2022 03:05:47 - INFO - codeparrot_training - Step 19705: {'lr': 0.0003501122474605326, 'samples': 10089472, 'steps': 19705, 'loss/train': 0.6630956530570984} +02/25/2022 03:05:53 - INFO - codeparrot_training - Step 19706: {'lr': 0.00035009725401067795, 'samples': 10089984, 'steps': 19706, 'loss/train': 2.5255801677703857} +02/25/2022 03:05:57 - INFO - codeparrot_training - Step 19707: {'lr': 0.00035008226013203845, 'samples': 10090496, 'steps': 19707, 'loss/train': 1.8529889583587646} +02/25/2022 03:06:02 - INFO - codeparrot_training - Step 19708: {'lr': 0.0003500672658246783, 'samples': 10091008, 'steps': 19708, 'loss/train': 2.2475297451019287} +02/25/2022 03:06:06 - INFO - codeparrot_training - Step 19709: {'lr': 0.0003500522710886618, 'samples': 10091520, 'steps': 19709, 'loss/train': 2.3407280445098877} +02/25/2022 03:06:12 - INFO - codeparrot_training - Step 19710: {'lr': 0.0003500372759240531, 'samples': 10092032, 'steps': 19710, 'loss/train': 1.3891665935516357} +02/25/2022 03:06:15 - INFO - codeparrot_training - Step 19711: {'lr': 0.00035002228033091643, 'samples': 10092544, 'steps': 19711, 'loss/train': 1.2385997772216797} +02/25/2022 03:06:21 - INFO - codeparrot_training - Step 19712: {'lr': 0.00035000728430931616, 'samples': 10093056, 'steps': 19712, 'loss/train': 1.5949411392211914} +02/25/2022 03:06:24 - INFO - codeparrot_training - Step 19713: {'lr': 0.00034999228785931644, 'samples': 10093568, 'steps': 19713, 'loss/train': 1.7539089918136597} +02/25/2022 03:06:30 - INFO - codeparrot_training - Step 19714: {'lr': 0.0003499772909809815, 'samples': 10094080, 'steps': 19714, 'loss/train': 0.2511043846607208} +02/25/2022 03:06:33 - INFO - codeparrot_training - Step 19715: {'lr': 0.0003499622936743756, 'samples': 10094592, 'steps': 19715, 'loss/train': 1.0134738683700562} +02/25/2022 03:06:39 - INFO - codeparrot_training - Step 19716: {'lr': 0.0003499472959395629, 'samples': 10095104, 'steps': 19716, 'loss/train': 1.9131131172180176} +02/25/2022 03:06:43 - INFO - codeparrot_training - Step 19717: {'lr': 0.00034993229777660785, 'samples': 10095616, 'steps': 19717, 'loss/train': 2.655975341796875} +02/25/2022 03:06:48 - INFO - codeparrot_training - Step 19718: {'lr': 0.0003499172991855744, 'samples': 10096128, 'steps': 19718, 'loss/train': 2.1876728534698486} +02/25/2022 03:06:52 - INFO - codeparrot_training - Step 19719: {'lr': 0.00034990230016652713, 'samples': 10096640, 'steps': 19719, 'loss/train': 1.8051618337631226} +02/25/2022 03:06:57 - INFO - codeparrot_training - Step 19720: {'lr': 0.00034988730071953, 'samples': 10097152, 'steps': 19720, 'loss/train': 1.1078022718429565} +02/25/2022 03:07:01 - INFO - codeparrot_training - Step 19721: {'lr': 0.0003498723008446475, 'samples': 10097664, 'steps': 19721, 'loss/train': 1.6421740055084229} +02/25/2022 03:07:06 - INFO - codeparrot_training - Step 19722: {'lr': 0.0003498573005419437, 'samples': 10098176, 'steps': 19722, 'loss/train': 1.2230674028396606} +02/25/2022 03:07:09 - INFO - codeparrot_training - Step 19723: {'lr': 0.000349842299811483, 'samples': 10098688, 'steps': 19723, 'loss/train': 1.839769721031189} +02/25/2022 03:07:15 - INFO - codeparrot_training - Step 19724: {'lr': 0.00034982729865332953, 'samples': 10099200, 'steps': 19724, 'loss/train': 2.547917366027832} +02/25/2022 03:07:19 - INFO - codeparrot_training - Step 19725: {'lr': 0.00034981229706754755, 'samples': 10099712, 'steps': 19725, 'loss/train': 3.229694366455078} +02/25/2022 03:07:25 - INFO - codeparrot_training - Step 19726: {'lr': 0.0003497972950542015, 'samples': 10100224, 'steps': 19726, 'loss/train': 0.8456030488014221} +02/25/2022 03:07:28 - INFO - codeparrot_training - Step 19727: {'lr': 0.0003497822926133555, 'samples': 10100736, 'steps': 19727, 'loss/train': 1.3858362436294556} +02/25/2022 03:07:34 - INFO - codeparrot_training - Step 19728: {'lr': 0.0003497672897450738, 'samples': 10101248, 'steps': 19728, 'loss/train': 1.8247514963150024} +02/25/2022 03:07:37 - INFO - codeparrot_training - Step 19729: {'lr': 0.0003497522864494208, 'samples': 10101760, 'steps': 19729, 'loss/train': 1.6876375675201416} +02/25/2022 03:07:43 - INFO - codeparrot_training - Step 19730: {'lr': 0.0003497372827264606, 'samples': 10102272, 'steps': 19730, 'loss/train': 1.3257802724838257} +02/25/2022 03:07:46 - INFO - codeparrot_training - Step 19731: {'lr': 0.0003497222785762576, 'samples': 10102784, 'steps': 19731, 'loss/train': 2.0304720401763916} +02/25/2022 03:07:52 - INFO - codeparrot_training - Step 19732: {'lr': 0.000349707273998876, 'samples': 10103296, 'steps': 19732, 'loss/train': 2.7126681804656982} +02/25/2022 03:07:55 - INFO - codeparrot_training - Step 19733: {'lr': 0.00034969226899438013, 'samples': 10103808, 'steps': 19733, 'loss/train': 1.9379675388336182} +02/25/2022 03:08:01 - INFO - codeparrot_training - Step 19734: {'lr': 0.00034967726356283416, 'samples': 10104320, 'steps': 19734, 'loss/train': 2.1391079425811768} +02/25/2022 03:08:04 - INFO - codeparrot_training - Step 19735: {'lr': 0.00034966225770430244, 'samples': 10104832, 'steps': 19735, 'loss/train': 1.4799776077270508} +02/25/2022 03:08:10 - INFO - codeparrot_training - Step 19736: {'lr': 0.00034964725141884936, 'samples': 10105344, 'steps': 19736, 'loss/train': 2.34786319732666} +02/25/2022 03:08:13 - INFO - codeparrot_training - Step 19737: {'lr': 0.000349632244706539, 'samples': 10105856, 'steps': 19737, 'loss/train': 1.2513939142227173} +02/25/2022 03:08:19 - INFO - codeparrot_training - Step 19738: {'lr': 0.0003496172375674358, 'samples': 10106368, 'steps': 19738, 'loss/train': 2.008321762084961} +02/25/2022 03:08:22 - INFO - codeparrot_training - Step 19739: {'lr': 0.0003496022300016039, 'samples': 10106880, 'steps': 19739, 'loss/train': 2.2683513164520264} +02/25/2022 03:08:28 - INFO - codeparrot_training - Step 19740: {'lr': 0.00034958722200910777, 'samples': 10107392, 'steps': 19740, 'loss/train': 1.4257746934890747} +02/25/2022 03:08:32 - INFO - codeparrot_training - Step 19741: {'lr': 0.00034957221359001154, 'samples': 10107904, 'steps': 19741, 'loss/train': 2.0964877605438232} +02/25/2022 03:08:38 - INFO - codeparrot_training - Step 19742: {'lr': 0.0003495572047443796, 'samples': 10108416, 'steps': 19742, 'loss/train': 2.4307374954223633} +02/25/2022 03:08:41 - INFO - codeparrot_training - Step 19743: {'lr': 0.0003495421954722762, 'samples': 10108928, 'steps': 19743, 'loss/train': 1.7479761838912964} +02/25/2022 03:08:45 - INFO - codeparrot_training - Step 19744: {'lr': 0.0003495271857737657, 'samples': 10109440, 'steps': 19744, 'loss/train': 0.9561011791229248} +02/25/2022 03:08:50 - INFO - codeparrot_training - Step 19745: {'lr': 0.00034951217564891226, 'samples': 10109952, 'steps': 19745, 'loss/train': 2.7571966648101807} +02/25/2022 03:08:54 - INFO - codeparrot_training - Step 19746: {'lr': 0.0003494971650977802, 'samples': 10110464, 'steps': 19746, 'loss/train': 1.5141698122024536} +02/25/2022 03:08:59 - INFO - codeparrot_training - Step 19747: {'lr': 0.00034948215412043405, 'samples': 10110976, 'steps': 19747, 'loss/train': 1.3123365640640259} +02/25/2022 03:09:03 - INFO - codeparrot_training - Step 19748: {'lr': 0.00034946714271693783, 'samples': 10111488, 'steps': 19748, 'loss/train': 1.062041997909546} +02/25/2022 03:09:08 - INFO - codeparrot_training - Step 19749: {'lr': 0.00034945213088735595, 'samples': 10112000, 'steps': 19749, 'loss/train': 1.904049277305603} +02/25/2022 03:09:12 - INFO - codeparrot_training - Step 19750: {'lr': 0.00034943711863175277, 'samples': 10112512, 'steps': 19750, 'loss/train': 1.5080755949020386} +02/25/2022 03:09:17 - INFO - codeparrot_training - Step 19751: {'lr': 0.0003494221059501925, 'samples': 10113024, 'steps': 19751, 'loss/train': 1.5641701221466064} +02/25/2022 03:09:21 - INFO - codeparrot_training - Step 19752: {'lr': 0.0003494070928427395, 'samples': 10113536, 'steps': 19752, 'loss/train': 2.83099365234375} +02/25/2022 03:09:27 - INFO - codeparrot_training - Step 19753: {'lr': 0.00034939207930945816, 'samples': 10114048, 'steps': 19753, 'loss/train': 2.6274471282958984} +02/25/2022 03:09:30 - INFO - codeparrot_training - Step 19754: {'lr': 0.00034937706535041263, 'samples': 10114560, 'steps': 19754, 'loss/train': 1.526594877243042} +02/25/2022 03:09:36 - INFO - codeparrot_training - Step 19755: {'lr': 0.0003493620509656674, 'samples': 10115072, 'steps': 19755, 'loss/train': 1.7514238357543945} +02/25/2022 03:09:41 - INFO - codeparrot_training - Step 19756: {'lr': 0.00034934703615528657, 'samples': 10115584, 'steps': 19756, 'loss/train': 2.7907276153564453} +02/25/2022 03:09:45 - INFO - codeparrot_training - Step 19757: {'lr': 0.0003493320209193347, 'samples': 10116096, 'steps': 19757, 'loss/train': 2.051156759262085} +02/25/2022 03:09:51 - INFO - codeparrot_training - Step 19758: {'lr': 0.0003493170052578759, 'samples': 10116608, 'steps': 19758, 'loss/train': 1.8145142793655396} +02/25/2022 03:09:54 - INFO - codeparrot_training - Step 19759: {'lr': 0.00034930198917097467, 'samples': 10117120, 'steps': 19759, 'loss/train': 2.934638023376465} +02/25/2022 03:09:57 - INFO - codeparrot_training - Step 19760: {'lr': 0.00034928697265869515, 'samples': 10117632, 'steps': 19760, 'loss/train': 1.642092227935791} +02/25/2022 03:10:03 - INFO - codeparrot_training - Step 19761: {'lr': 0.0003492719557211018, 'samples': 10118144, 'steps': 19761, 'loss/train': 2.2578444480895996} +02/25/2022 03:10:06 - INFO - codeparrot_training - Step 19762: {'lr': 0.0003492569383582589, 'samples': 10118656, 'steps': 19762, 'loss/train': 2.276261329650879} +02/25/2022 03:10:13 - INFO - codeparrot_training - Step 19763: {'lr': 0.0003492419205702309, 'samples': 10119168, 'steps': 19763, 'loss/train': 1.4965124130249023} +02/25/2022 03:10:16 - INFO - codeparrot_training - Step 19764: {'lr': 0.00034922690235708197, 'samples': 10119680, 'steps': 19764, 'loss/train': 2.066959857940674} +02/25/2022 03:10:22 - INFO - codeparrot_training - Step 19765: {'lr': 0.0003492118837188765, 'samples': 10120192, 'steps': 19765, 'loss/train': 2.168503522872925} +02/25/2022 03:10:25 - INFO - codeparrot_training - Step 19766: {'lr': 0.0003491968646556788, 'samples': 10120704, 'steps': 19766, 'loss/train': 2.2297017574310303} +02/25/2022 03:10:31 - INFO - codeparrot_training - Step 19767: {'lr': 0.00034918184516755324, 'samples': 10121216, 'steps': 19767, 'loss/train': 1.77622389793396} +02/25/2022 03:10:34 - INFO - codeparrot_training - Step 19768: {'lr': 0.00034916682525456416, 'samples': 10121728, 'steps': 19768, 'loss/train': 2.5508880615234375} +02/25/2022 03:10:40 - INFO - codeparrot_training - Step 19769: {'lr': 0.00034915180491677583, 'samples': 10122240, 'steps': 19769, 'loss/train': 0.801866352558136} +02/25/2022 03:10:43 - INFO - codeparrot_training - Step 19770: {'lr': 0.00034913678415425276, 'samples': 10122752, 'steps': 19770, 'loss/train': 2.4061455726623535} +02/25/2022 03:10:49 - INFO - codeparrot_training - Step 19771: {'lr': 0.00034912176296705903, 'samples': 10123264, 'steps': 19771, 'loss/train': 2.761408805847168} +02/25/2022 03:10:52 - INFO - codeparrot_training - Step 19772: {'lr': 0.00034910674135525926, 'samples': 10123776, 'steps': 19772, 'loss/train': 1.70564603805542} +02/25/2022 03:10:58 - INFO - codeparrot_training - Step 19773: {'lr': 0.0003490917193189177, 'samples': 10124288, 'steps': 19773, 'loss/train': 1.8040701150894165} +02/25/2022 03:11:04 - INFO - codeparrot_training - Step 19774: {'lr': 0.00034907669685809855, 'samples': 10124800, 'steps': 19774, 'loss/train': 1.7402268648147583} +02/25/2022 03:11:07 - INFO - codeparrot_training - Step 19775: {'lr': 0.0003490616739728664, 'samples': 10125312, 'steps': 19775, 'loss/train': 1.9939600229263306} +02/25/2022 03:11:11 - INFO - codeparrot_training - Step 19776: {'lr': 0.00034904665066328545, 'samples': 10125824, 'steps': 19776, 'loss/train': 2.2742834091186523} +02/25/2022 03:11:17 - INFO - codeparrot_training - Step 19777: {'lr': 0.00034903162692942013, 'samples': 10126336, 'steps': 19777, 'loss/train': 0.256330281496048} +02/25/2022 03:11:22 - INFO - codeparrot_training - Step 19778: {'lr': 0.0003490166027713348, 'samples': 10126848, 'steps': 19778, 'loss/train': 2.058600425720215} +02/25/2022 03:11:25 - INFO - codeparrot_training - Step 19779: {'lr': 0.0003490015781890937, 'samples': 10127360, 'steps': 19779, 'loss/train': 2.046503782272339} +02/25/2022 03:11:31 - INFO - codeparrot_training - Step 19780: {'lr': 0.00034898655318276134, 'samples': 10127872, 'steps': 19780, 'loss/train': 1.8526111841201782} +02/25/2022 03:11:34 - INFO - codeparrot_training - Step 19781: {'lr': 0.00034897152775240206, 'samples': 10128384, 'steps': 19781, 'loss/train': 4.541769027709961} +02/25/2022 03:11:40 - INFO - codeparrot_training - Step 19782: {'lr': 0.00034895650189808005, 'samples': 10128896, 'steps': 19782, 'loss/train': 2.008277177810669} +02/25/2022 03:11:43 - INFO - codeparrot_training - Step 19783: {'lr': 0.0003489414756198598, 'samples': 10129408, 'steps': 19783, 'loss/train': 2.068462371826172} +02/25/2022 03:11:49 - INFO - codeparrot_training - Step 19784: {'lr': 0.0003489264489178058, 'samples': 10129920, 'steps': 19784, 'loss/train': 1.4204087257385254} +02/25/2022 03:11:52 - INFO - codeparrot_training - Step 19785: {'lr': 0.0003489114217919823, 'samples': 10130432, 'steps': 19785, 'loss/train': 1.5703151226043701} +02/25/2022 03:11:58 - INFO - codeparrot_training - Step 19786: {'lr': 0.00034889639424245364, 'samples': 10130944, 'steps': 19786, 'loss/train': 1.9224170446395874} +02/25/2022 03:12:01 - INFO - codeparrot_training - Step 19787: {'lr': 0.00034888136626928427, 'samples': 10131456, 'steps': 19787, 'loss/train': 2.3439974784851074} +02/25/2022 03:12:08 - INFO - codeparrot_training - Step 19788: {'lr': 0.00034886633787253846, 'samples': 10131968, 'steps': 19788, 'loss/train': 1.9139883518218994} +02/25/2022 03:12:11 - INFO - codeparrot_training - Step 19789: {'lr': 0.00034885130905228063, 'samples': 10132480, 'steps': 19789, 'loss/train': 1.4127751588821411} +02/25/2022 03:12:15 - INFO - codeparrot_training - Step 19790: {'lr': 0.00034883627980857523, 'samples': 10132992, 'steps': 19790, 'loss/train': 1.917236328125} +02/25/2022 03:12:20 - INFO - codeparrot_training - Step 19791: {'lr': 0.0003488212501414866, 'samples': 10133504, 'steps': 19791, 'loss/train': 1.3780182600021362} +02/25/2022 03:12:24 - INFO - codeparrot_training - Step 19792: {'lr': 0.00034880622005107915, 'samples': 10134016, 'steps': 19792, 'loss/train': 0.6669219136238098} +02/25/2022 03:12:29 - INFO - codeparrot_training - Step 19793: {'lr': 0.00034879118953741716, 'samples': 10134528, 'steps': 19793, 'loss/train': 1.807779312133789} +02/25/2022 03:12:33 - INFO - codeparrot_training - Step 19794: {'lr': 0.0003487761586005651, 'samples': 10135040, 'steps': 19794, 'loss/train': 2.2414004802703857} +02/25/2022 03:12:38 - INFO - codeparrot_training - Step 19795: {'lr': 0.0003487611272405873, 'samples': 10135552, 'steps': 19795, 'loss/train': 3.0148677825927734} +02/25/2022 03:12:44 - INFO - codeparrot_training - Step 19796: {'lr': 0.00034874609545754826, 'samples': 10136064, 'steps': 19796, 'loss/train': 2.7842581272125244} +02/25/2022 03:12:47 - INFO - codeparrot_training - Step 19797: {'lr': 0.00034873106325151234, 'samples': 10136576, 'steps': 19797, 'loss/train': 0.2121119499206543} +02/25/2022 03:12:54 - INFO - codeparrot_training - Step 19798: {'lr': 0.0003487160306225438, 'samples': 10137088, 'steps': 19798, 'loss/train': 1.2804843187332153} +02/25/2022 03:12:58 - INFO - codeparrot_training - Step 19799: {'lr': 0.00034870099757070716, 'samples': 10137600, 'steps': 19799, 'loss/train': 2.1828575134277344} +02/25/2022 03:13:01 - INFO - codeparrot_training - Step 19800: {'lr': 0.0003486859640960668, 'samples': 10138112, 'steps': 19800, 'loss/train': 0.934968888759613} +02/25/2022 03:13:07 - INFO - codeparrot_training - Step 19801: {'lr': 0.0003486709301986871, 'samples': 10138624, 'steps': 19801, 'loss/train': 3.027498483657837} +02/25/2022 03:13:10 - INFO - codeparrot_training - Step 19802: {'lr': 0.00034865589587863247, 'samples': 10139136, 'steps': 19802, 'loss/train': 2.7640810012817383} +02/25/2022 03:13:16 - INFO - codeparrot_training - Step 19803: {'lr': 0.0003486408611359673, 'samples': 10139648, 'steps': 19803, 'loss/train': 1.5279866456985474} +02/25/2022 03:13:19 - INFO - codeparrot_training - Step 19804: {'lr': 0.00034862582597075607, 'samples': 10140160, 'steps': 19804, 'loss/train': 2.3333680629730225} +02/25/2022 03:13:25 - INFO - codeparrot_training - Step 19805: {'lr': 0.00034861079038306305, 'samples': 10140672, 'steps': 19805, 'loss/train': 1.9807062149047852} +02/25/2022 03:13:28 - INFO - codeparrot_training - Step 19806: {'lr': 0.00034859575437295277, 'samples': 10141184, 'steps': 19806, 'loss/train': 2.265599250793457} +02/25/2022 03:13:34 - INFO - codeparrot_training - Step 19807: {'lr': 0.00034858071794048953, 'samples': 10141696, 'steps': 19807, 'loss/train': 2.709702253341675} +02/25/2022 03:13:37 - INFO - codeparrot_training - Step 19808: {'lr': 0.0003485656810857378, 'samples': 10142208, 'steps': 19808, 'loss/train': 1.8666470050811768} +02/25/2022 03:13:43 - INFO - codeparrot_training - Step 19809: {'lr': 0.00034855064380876193, 'samples': 10142720, 'steps': 19809, 'loss/train': 1.3959065675735474} +02/25/2022 03:13:47 - INFO - codeparrot_training - Step 19810: {'lr': 0.00034853560610962654, 'samples': 10143232, 'steps': 19810, 'loss/train': 1.6216082572937012} +02/25/2022 03:13:52 - INFO - codeparrot_training - Step 19811: {'lr': 0.0003485205679883958, 'samples': 10143744, 'steps': 19811, 'loss/train': 2.1622729301452637} +02/25/2022 03:13:56 - INFO - codeparrot_training - Step 19812: {'lr': 0.00034850552944513426, 'samples': 10144256, 'steps': 19812, 'loss/train': 2.237422227859497} +02/25/2022 03:14:01 - INFO - codeparrot_training - Step 19813: {'lr': 0.00034849049047990633, 'samples': 10144768, 'steps': 19813, 'loss/train': 2.4103262424468994} +02/25/2022 03:14:05 - INFO - codeparrot_training - Step 19814: {'lr': 0.0003484754510927764, 'samples': 10145280, 'steps': 19814, 'loss/train': 2.142704486846924} +02/25/2022 03:14:11 - INFO - codeparrot_training - Step 19815: {'lr': 0.00034846041128380886, 'samples': 10145792, 'steps': 19815, 'loss/train': 1.2305799722671509} +02/25/2022 03:14:14 - INFO - codeparrot_training - Step 19816: {'lr': 0.0003484453710530682, 'samples': 10146304, 'steps': 19816, 'loss/train': 0.07420670986175537} +02/25/2022 03:14:20 - INFO - codeparrot_training - Step 19817: {'lr': 0.0003484303304006189, 'samples': 10146816, 'steps': 19817, 'loss/train': 1.7484591007232666} +02/25/2022 03:14:23 - INFO - codeparrot_training - Step 19818: {'lr': 0.0003484152893265253, 'samples': 10147328, 'steps': 19818, 'loss/train': 1.968850016593933} +02/25/2022 03:14:29 - INFO - codeparrot_training - Step 19819: {'lr': 0.00034840024783085177, 'samples': 10147840, 'steps': 19819, 'loss/train': 2.4700424671173096} +02/25/2022 03:14:33 - INFO - codeparrot_training - Step 19820: {'lr': 0.00034838520591366285, 'samples': 10148352, 'steps': 19820, 'loss/train': 3.0214126110076904} +02/25/2022 03:14:38 - INFO - codeparrot_training - Step 19821: {'lr': 0.00034837016357502297, 'samples': 10148864, 'steps': 19821, 'loss/train': 1.083014965057373} +02/25/2022 03:14:42 - INFO - codeparrot_training - Step 19822: {'lr': 0.0003483551208149965, 'samples': 10149376, 'steps': 19822, 'loss/train': 0.3195439577102661} +02/25/2022 03:14:48 - INFO - codeparrot_training - Step 19823: {'lr': 0.00034834007763364803, 'samples': 10149888, 'steps': 19823, 'loss/train': 1.2361674308776855} +02/25/2022 03:14:51 - INFO - codeparrot_training - Step 19824: {'lr': 0.00034832503403104184, 'samples': 10150400, 'steps': 19824, 'loss/train': 1.9308634996414185} +02/25/2022 03:14:57 - INFO - codeparrot_training - Step 19825: {'lr': 0.00034830999000724246, 'samples': 10150912, 'steps': 19825, 'loss/train': 1.9489511251449585} +02/25/2022 03:15:00 - INFO - codeparrot_training - Step 19826: {'lr': 0.00034829494556231423, 'samples': 10151424, 'steps': 19826, 'loss/train': 2.585437297821045} +02/25/2022 03:15:06 - INFO - codeparrot_training - Step 19827: {'lr': 0.00034827990069632173, 'samples': 10151936, 'steps': 19827, 'loss/train': 3.128664493560791} +02/25/2022 03:15:09 - INFO - codeparrot_training - Step 19828: {'lr': 0.0003482648554093293, 'samples': 10152448, 'steps': 19828, 'loss/train': 2.040684223175049} +02/25/2022 03:15:15 - INFO - codeparrot_training - Step 19829: {'lr': 0.0003482498097014015, 'samples': 10152960, 'steps': 19829, 'loss/train': 2.1903162002563477} +02/25/2022 03:15:18 - INFO - codeparrot_training - Step 19830: {'lr': 0.0003482347635726026, 'samples': 10153472, 'steps': 19830, 'loss/train': 1.3641180992126465} +02/25/2022 03:15:24 - INFO - codeparrot_training - Step 19831: {'lr': 0.00034821971702299716, 'samples': 10153984, 'steps': 19831, 'loss/train': 2.8737971782684326} +02/25/2022 03:15:27 - INFO - codeparrot_training - Step 19832: {'lr': 0.0003482046700526498, 'samples': 10154496, 'steps': 19832, 'loss/train': 2.024074077606201} +02/25/2022 03:15:33 - INFO - codeparrot_training - Step 19833: {'lr': 0.0003481896226616246, 'samples': 10155008, 'steps': 19833, 'loss/train': 1.6594007015228271} +02/25/2022 03:15:36 - INFO - codeparrot_training - Step 19834: {'lr': 0.00034817457484998644, 'samples': 10155520, 'steps': 19834, 'loss/train': 2.130175828933716} +02/25/2022 03:15:42 - INFO - codeparrot_training - Step 19835: {'lr': 0.00034815952661779946, 'samples': 10156032, 'steps': 19835, 'loss/train': 2.5905861854553223} +02/25/2022 03:15:46 - INFO - codeparrot_training - Step 19836: {'lr': 0.00034814447796512824, 'samples': 10156544, 'steps': 19836, 'loss/train': 1.9122241735458374} +02/25/2022 03:15:51 - INFO - codeparrot_training - Step 19837: {'lr': 0.0003481294288920373, 'samples': 10157056, 'steps': 19837, 'loss/train': 2.485546112060547} +02/25/2022 03:15:55 - INFO - codeparrot_training - Step 19838: {'lr': 0.00034811437939859106, 'samples': 10157568, 'steps': 19838, 'loss/train': 1.7731329202651978} +02/25/2022 03:16:00 - INFO - codeparrot_training - Step 19839: {'lr': 0.0003480993294848539, 'samples': 10158080, 'steps': 19839, 'loss/train': 1.0564831495285034} +02/25/2022 03:16:04 - INFO - codeparrot_training - Step 19840: {'lr': 0.00034808427915089036, 'samples': 10158592, 'steps': 19840, 'loss/train': 1.7406283617019653} +02/25/2022 03:16:09 - INFO - codeparrot_training - Step 19841: {'lr': 0.000348069228396765, 'samples': 10159104, 'steps': 19841, 'loss/train': 2.0025475025177} +02/25/2022 03:16:13 - INFO - codeparrot_training - Step 19842: {'lr': 0.00034805417722254213, 'samples': 10159616, 'steps': 19842, 'loss/train': 2.2896740436553955} +02/25/2022 03:16:18 - INFO - codeparrot_training - Step 19843: {'lr': 0.00034803912562828633, 'samples': 10160128, 'steps': 19843, 'loss/train': 2.4485151767730713} +02/25/2022 03:16:25 - INFO - codeparrot_training - Step 19844: {'lr': 0.000348024073614062, 'samples': 10160640, 'steps': 19844, 'loss/train': 2.3837153911590576} +02/25/2022 03:16:28 - INFO - codeparrot_training - Step 19845: {'lr': 0.0003480090211799337, 'samples': 10161152, 'steps': 19845, 'loss/train': 1.630618929862976} +02/25/2022 03:16:34 - INFO - codeparrot_training - Step 19846: {'lr': 0.0003479939683259659, 'samples': 10161664, 'steps': 19846, 'loss/train': 1.957349181175232} +02/25/2022 03:16:37 - INFO - codeparrot_training - Step 19847: {'lr': 0.000347978915052223, 'samples': 10162176, 'steps': 19847, 'loss/train': 1.1609623432159424} +02/25/2022 03:16:41 - INFO - codeparrot_training - Step 19848: {'lr': 0.0003479638613587696, 'samples': 10162688, 'steps': 19848, 'loss/train': 2.6124157905578613} +02/25/2022 03:16:46 - INFO - codeparrot_training - Step 19849: {'lr': 0.0003479488072456701, 'samples': 10163200, 'steps': 19849, 'loss/train': 2.1950464248657227} +02/25/2022 03:16:52 - INFO - codeparrot_training - Step 19850: {'lr': 0.000347933752712989, 'samples': 10163712, 'steps': 19850, 'loss/train': 2.1847610473632812} +02/25/2022 03:16:55 - INFO - codeparrot_training - Step 19851: {'lr': 0.00034791869776079084, 'samples': 10164224, 'steps': 19851, 'loss/train': 2.757704257965088} +02/25/2022 03:17:01 - INFO - codeparrot_training - Step 19852: {'lr': 0.00034790364238914003, 'samples': 10164736, 'steps': 19852, 'loss/train': 2.469663143157959} +02/25/2022 03:17:04 - INFO - codeparrot_training - Step 19853: {'lr': 0.0003478885865981011, 'samples': 10165248, 'steps': 19853, 'loss/train': 1.9874293804168701} +02/25/2022 03:17:10 - INFO - codeparrot_training - Step 19854: {'lr': 0.0003478735303877386, 'samples': 10165760, 'steps': 19854, 'loss/train': 1.5458056926727295} +02/25/2022 03:17:14 - INFO - codeparrot_training - Step 19855: {'lr': 0.0003478584737581169, 'samples': 10166272, 'steps': 19855, 'loss/train': 1.4001595973968506} +02/25/2022 03:17:19 - INFO - codeparrot_training - Step 19856: {'lr': 0.00034784341670930066, 'samples': 10166784, 'steps': 19856, 'loss/train': 1.5884674787521362} +02/25/2022 03:17:23 - INFO - codeparrot_training - Step 19857: {'lr': 0.00034782835924135417, 'samples': 10167296, 'steps': 19857, 'loss/train': 3.9674744606018066} +02/25/2022 03:17:28 - INFO - codeparrot_training - Step 19858: {'lr': 0.0003478133013543422, 'samples': 10167808, 'steps': 19858, 'loss/train': 1.4108281135559082} +02/25/2022 03:17:32 - INFO - codeparrot_training - Step 19859: {'lr': 0.000347798243048329, 'samples': 10168320, 'steps': 19859, 'loss/train': 1.3814793825149536} +02/25/2022 03:17:37 - INFO - codeparrot_training - Step 19860: {'lr': 0.00034778318432337926, 'samples': 10168832, 'steps': 19860, 'loss/train': 3.2302088737487793} +02/25/2022 03:17:41 - INFO - codeparrot_training - Step 19861: {'lr': 0.0003477681251795573, 'samples': 10169344, 'steps': 19861, 'loss/train': 1.1204324960708618} +02/25/2022 03:17:46 - INFO - codeparrot_training - Step 19862: {'lr': 0.0003477530656169278, 'samples': 10169856, 'steps': 19862, 'loss/train': 1.9495435953140259} +02/25/2022 03:17:50 - INFO - codeparrot_training - Step 19863: {'lr': 0.00034773800563555517, 'samples': 10170368, 'steps': 19863, 'loss/train': 1.4796690940856934} +02/25/2022 03:17:55 - INFO - codeparrot_training - Step 19864: {'lr': 0.0003477229452355041, 'samples': 10170880, 'steps': 19864, 'loss/train': 1.983762502670288} +02/25/2022 03:17:59 - INFO - codeparrot_training - Step 19865: {'lr': 0.00034770788441683875, 'samples': 10171392, 'steps': 19865, 'loss/train': 3.638152837753296} +02/25/2022 03:18:05 - INFO - codeparrot_training - Step 19866: {'lr': 0.00034769282317962405, 'samples': 10171904, 'steps': 19866, 'loss/train': 1.7491425275802612} +02/25/2022 03:18:09 - INFO - codeparrot_training - Step 19867: {'lr': 0.00034767776152392417, 'samples': 10172416, 'steps': 19867, 'loss/train': 2.5320382118225098} +02/25/2022 03:18:14 - INFO - codeparrot_training - Step 19868: {'lr': 0.0003476626994498038, 'samples': 10172928, 'steps': 19868, 'loss/train': 1.660361409187317} +02/25/2022 03:18:18 - INFO - codeparrot_training - Step 19869: {'lr': 0.00034764763695732746, 'samples': 10173440, 'steps': 19869, 'loss/train': 1.7201393842697144} +02/25/2022 03:18:23 - INFO - codeparrot_training - Step 19870: {'lr': 0.0003476325740465597, 'samples': 10173952, 'steps': 19870, 'loss/train': 1.8269997835159302} +02/25/2022 03:18:27 - INFO - codeparrot_training - Step 19871: {'lr': 0.0003476175107175649, 'samples': 10174464, 'steps': 19871, 'loss/train': 2.901841402053833} +02/25/2022 03:18:33 - INFO - codeparrot_training - Step 19872: {'lr': 0.00034760244697040776, 'samples': 10174976, 'steps': 19872, 'loss/train': 2.4298744201660156} +02/25/2022 03:18:36 - INFO - codeparrot_training - Step 19873: {'lr': 0.00034758738280515265, 'samples': 10175488, 'steps': 19873, 'loss/train': 2.4408974647521973} +02/25/2022 03:18:42 - INFO - codeparrot_training - Step 19874: {'lr': 0.00034757231822186426, 'samples': 10176000, 'steps': 19874, 'loss/train': 0.612687349319458} +02/25/2022 03:18:45 - INFO - codeparrot_training - Step 19875: {'lr': 0.00034755725322060705, 'samples': 10176512, 'steps': 19875, 'loss/train': 1.9196265935897827} +02/25/2022 03:18:51 - INFO - codeparrot_training - Step 19876: {'lr': 0.00034754218780144546, 'samples': 10177024, 'steps': 19876, 'loss/train': 2.2596445083618164} +02/25/2022 03:18:54 - INFO - codeparrot_training - Step 19877: {'lr': 0.00034752712196444417, 'samples': 10177536, 'steps': 19877, 'loss/train': 5.748746395111084} +02/25/2022 03:19:00 - INFO - codeparrot_training - Step 19878: {'lr': 0.00034751205570966764, 'samples': 10178048, 'steps': 19878, 'loss/train': 1.9081982374191284} +02/25/2022 03:19:03 - INFO - codeparrot_training - Step 19879: {'lr': 0.0003474969890371805, 'samples': 10178560, 'steps': 19879, 'loss/train': 3.318934440612793} +02/25/2022 03:19:11 - INFO - codeparrot_training - Step 19880: {'lr': 0.0003474819219470471, 'samples': 10179072, 'steps': 19880, 'loss/train': 1.8040556907653809} +02/25/2022 03:19:14 - INFO - codeparrot_training - Step 19881: {'lr': 0.0003474668544393321, 'samples': 10179584, 'steps': 19881, 'loss/train': 1.8916703462600708} +02/25/2022 03:19:20 - INFO - codeparrot_training - Step 19882: {'lr': 0.00034745178651410014, 'samples': 10180096, 'steps': 19882, 'loss/train': 2.4303483963012695} +02/25/2022 03:19:23 - INFO - codeparrot_training - Step 19883: {'lr': 0.0003474367181714156, 'samples': 10180608, 'steps': 19883, 'loss/train': 1.925785779953003} +02/25/2022 03:19:29 - INFO - codeparrot_training - Step 19884: {'lr': 0.0003474216494113431, 'samples': 10181120, 'steps': 19884, 'loss/train': 2.178867816925049} +02/25/2022 03:19:32 - INFO - codeparrot_training - Step 19885: {'lr': 0.00034740658023394723, 'samples': 10181632, 'steps': 19885, 'loss/train': 2.8438267707824707} +02/25/2022 03:19:38 - INFO - codeparrot_training - Step 19886: {'lr': 0.0003473915106392925, 'samples': 10182144, 'steps': 19886, 'loss/train': 1.2732062339782715} +02/25/2022 03:19:41 - INFO - codeparrot_training - Step 19887: {'lr': 0.00034737644062744343, 'samples': 10182656, 'steps': 19887, 'loss/train': 1.9972949028015137} +02/25/2022 03:19:47 - INFO - codeparrot_training - Step 19888: {'lr': 0.0003473613701984646, 'samples': 10183168, 'steps': 19888, 'loss/train': 1.4803389310836792} +02/25/2022 03:19:50 - INFO - codeparrot_training - Step 19889: {'lr': 0.0003473462993524206, 'samples': 10183680, 'steps': 19889, 'loss/train': 1.1882394552230835} +02/25/2022 03:19:56 - INFO - codeparrot_training - Step 19890: {'lr': 0.000347331228089376, 'samples': 10184192, 'steps': 19890, 'loss/train': 1.7638345956802368} +02/25/2022 03:20:00 - INFO - codeparrot_training - Step 19891: {'lr': 0.0003473161564093953, 'samples': 10184704, 'steps': 19891, 'loss/train': 2.086263656616211} +02/25/2022 03:20:05 - INFO - codeparrot_training - Step 19892: {'lr': 0.0003473010843125431, 'samples': 10185216, 'steps': 19892, 'loss/train': 2.5646424293518066} +02/25/2022 03:20:09 - INFO - codeparrot_training - Step 19893: {'lr': 0.00034728601179888395, 'samples': 10185728, 'steps': 19893, 'loss/train': 2.2138781547546387} +02/25/2022 03:20:14 - INFO - codeparrot_training - Step 19894: {'lr': 0.00034727093886848236, 'samples': 10186240, 'steps': 19894, 'loss/train': 2.007449150085449} +02/25/2022 03:20:18 - INFO - codeparrot_training - Step 19895: {'lr': 0.000347255865521403, 'samples': 10186752, 'steps': 19895, 'loss/train': 1.0181776285171509} +02/25/2022 03:20:23 - INFO - codeparrot_training - Step 19896: {'lr': 0.0003472407917577104, 'samples': 10187264, 'steps': 19896, 'loss/train': 1.5233153104782104} +02/25/2022 03:20:27 - INFO - codeparrot_training - Step 19897: {'lr': 0.0003472257175774691, 'samples': 10187776, 'steps': 19897, 'loss/train': 0.6901962161064148} +02/25/2022 03:20:33 - INFO - codeparrot_training - Step 19898: {'lr': 0.00034721064298074366, 'samples': 10188288, 'steps': 19898, 'loss/train': 2.7319552898406982} +02/25/2022 03:20:36 - INFO - codeparrot_training - Step 19899: {'lr': 0.0003471955679675988, 'samples': 10188800, 'steps': 19899, 'loss/train': 0.7970505952835083} +02/25/2022 03:20:42 - INFO - codeparrot_training - Step 19900: {'lr': 0.0003471804925380989, 'samples': 10189312, 'steps': 19900, 'loss/train': 1.6189533472061157} +02/25/2022 03:20:45 - INFO - codeparrot_training - Step 19901: {'lr': 0.0003471654166923087, 'samples': 10189824, 'steps': 19901, 'loss/train': 2.3713557720184326} +02/25/2022 03:20:51 - INFO - codeparrot_training - Step 19902: {'lr': 0.00034715034043029263, 'samples': 10190336, 'steps': 19902, 'loss/train': 2.5521323680877686} +02/25/2022 03:20:55 - INFO - codeparrot_training - Step 19903: {'lr': 0.00034713526375211546, 'samples': 10190848, 'steps': 19903, 'loss/train': 1.1911510229110718} +02/25/2022 03:21:00 - INFO - codeparrot_training - Step 19904: {'lr': 0.00034712018665784155, 'samples': 10191360, 'steps': 19904, 'loss/train': 1.551103949546814} +02/25/2022 03:21:04 - INFO - codeparrot_training - Step 19905: {'lr': 0.00034710510914753563, 'samples': 10191872, 'steps': 19905, 'loss/train': 1.9401558637619019} +02/25/2022 03:21:09 - INFO - codeparrot_training - Step 19906: {'lr': 0.00034709003122126227, 'samples': 10192384, 'steps': 19906, 'loss/train': 2.7029035091400146} +02/25/2022 03:21:13 - INFO - codeparrot_training - Step 19907: {'lr': 0.000347074952879086, 'samples': 10192896, 'steps': 19907, 'loss/train': 1.5081888437271118} +02/25/2022 03:21:18 - INFO - codeparrot_training - Step 19908: {'lr': 0.0003470598741210715, 'samples': 10193408, 'steps': 19908, 'loss/train': 1.5659664869308472} +02/25/2022 03:21:22 - INFO - codeparrot_training - Step 19909: {'lr': 0.00034704479494728337, 'samples': 10193920, 'steps': 19909, 'loss/train': 0.2435697615146637} +02/25/2022 03:21:27 - INFO - codeparrot_training - Step 19910: {'lr': 0.00034702971535778614, 'samples': 10194432, 'steps': 19910, 'loss/train': 1.5257058143615723} +02/25/2022 03:21:31 - INFO - codeparrot_training - Step 19911: {'lr': 0.00034701463535264434, 'samples': 10194944, 'steps': 19911, 'loss/train': 1.5345323085784912} +02/25/2022 03:21:37 - INFO - codeparrot_training - Step 19912: {'lr': 0.0003469995549319227, 'samples': 10195456, 'steps': 19912, 'loss/train': 2.294989824295044} +02/25/2022 03:21:40 - INFO - codeparrot_training - Step 19913: {'lr': 0.0003469844740956858, 'samples': 10195968, 'steps': 19913, 'loss/train': 2.1156210899353027} +02/25/2022 03:21:46 - INFO - codeparrot_training - Step 19914: {'lr': 0.0003469693928439982, 'samples': 10196480, 'steps': 19914, 'loss/train': 1.6220123767852783} +02/25/2022 03:21:49 - INFO - codeparrot_training - Step 19915: {'lr': 0.00034695431117692446, 'samples': 10196992, 'steps': 19915, 'loss/train': 0.9607713222503662} +02/25/2022 03:21:55 - INFO - codeparrot_training - Step 19916: {'lr': 0.0003469392290945292, 'samples': 10197504, 'steps': 19916, 'loss/train': 1.727417230606079} +02/25/2022 03:21:58 - INFO - codeparrot_training - Step 19917: {'lr': 0.00034692414659687714, 'samples': 10198016, 'steps': 19917, 'loss/train': 1.8917516469955444} +02/25/2022 03:22:04 - INFO - codeparrot_training - Step 19918: {'lr': 0.0003469090636840328, 'samples': 10198528, 'steps': 19918, 'loss/train': 1.4184610843658447} +02/25/2022 03:22:07 - INFO - codeparrot_training - Step 19919: {'lr': 0.0003468939803560608, 'samples': 10199040, 'steps': 19919, 'loss/train': 1.2965744733810425} +02/25/2022 03:22:13 - INFO - codeparrot_training - Step 19920: {'lr': 0.00034687889661302575, 'samples': 10199552, 'steps': 19920, 'loss/train': 0.7373710870742798} +02/25/2022 03:22:16 - INFO - codeparrot_training - Step 19921: {'lr': 0.0003468638124549923, 'samples': 10200064, 'steps': 19921, 'loss/train': 3.1162328720092773} +02/25/2022 03:22:22 - INFO - codeparrot_training - Step 19922: {'lr': 0.00034684872788202497, 'samples': 10200576, 'steps': 19922, 'loss/train': 2.807286024093628} +02/25/2022 03:22:25 - INFO - codeparrot_training - Step 19923: {'lr': 0.0003468336428941885, 'samples': 10201088, 'steps': 19923, 'loss/train': 0.7555232048034668} +02/25/2022 03:22:31 - INFO - codeparrot_training - Step 19924: {'lr': 0.00034681855749154743, 'samples': 10201600, 'steps': 19924, 'loss/train': 2.127145767211914} +02/25/2022 03:22:35 - INFO - codeparrot_training - Step 19925: {'lr': 0.00034680347167416643, 'samples': 10202112, 'steps': 19925, 'loss/train': 2.4664297103881836} +02/25/2022 03:22:41 - INFO - codeparrot_training - Step 19926: {'lr': 0.00034678838544211003, 'samples': 10202624, 'steps': 19926, 'loss/train': 1.6883305311203003} +02/25/2022 03:22:44 - INFO - codeparrot_training - Step 19927: {'lr': 0.000346773298795443, 'samples': 10203136, 'steps': 19927, 'loss/train': 2.5180041790008545} +02/25/2022 03:22:50 - INFO - codeparrot_training - Step 19928: {'lr': 0.00034675821173422983, 'samples': 10203648, 'steps': 19928, 'loss/train': 1.777349829673767} +02/25/2022 03:22:55 - INFO - codeparrot_training - Step 19929: {'lr': 0.0003467431242585352, 'samples': 10204160, 'steps': 19929, 'loss/train': 2.1454718112945557} +02/25/2022 03:22:59 - INFO - codeparrot_training - Step 19930: {'lr': 0.0003467280363684238, 'samples': 10204672, 'steps': 19930, 'loss/train': 1.5410213470458984} +02/25/2022 03:23:02 - INFO - codeparrot_training - Step 19931: {'lr': 0.0003467129480639601, 'samples': 10205184, 'steps': 19931, 'loss/train': 2.920882225036621} +02/25/2022 03:23:08 - INFO - codeparrot_training - Step 19932: {'lr': 0.000346697859345209, 'samples': 10205696, 'steps': 19932, 'loss/train': 3.7892770767211914} +02/25/2022 03:23:11 - INFO - codeparrot_training - Step 19933: {'lr': 0.00034668277021223493, 'samples': 10206208, 'steps': 19933, 'loss/train': 1.9957493543624878} +02/25/2022 03:23:17 - INFO - codeparrot_training - Step 19934: {'lr': 0.0003466676806651025, 'samples': 10206720, 'steps': 19934, 'loss/train': 0.9894843101501465} +02/25/2022 03:23:20 - INFO - codeparrot_training - Step 19935: {'lr': 0.0003466525907038765, 'samples': 10207232, 'steps': 19935, 'loss/train': 2.076892614364624} +02/25/2022 03:23:26 - INFO - codeparrot_training - Step 19936: {'lr': 0.0003466375003286214, 'samples': 10207744, 'steps': 19936, 'loss/train': 0.9773076176643372} +02/25/2022 03:23:29 - INFO - codeparrot_training - Step 19937: {'lr': 0.00034662240953940205, 'samples': 10208256, 'steps': 19937, 'loss/train': 2.639242172241211} +02/25/2022 03:23:35 - INFO - codeparrot_training - Step 19938: {'lr': 0.0003466073183362829, 'samples': 10208768, 'steps': 19938, 'loss/train': 1.8597412109375} +02/25/2022 03:23:39 - INFO - codeparrot_training - Step 19939: {'lr': 0.00034659222671932865, 'samples': 10209280, 'steps': 19939, 'loss/train': 2.3973875045776367} +02/25/2022 03:23:44 - INFO - codeparrot_training - Step 19940: {'lr': 0.000346577134688604, 'samples': 10209792, 'steps': 19940, 'loss/train': 1.4727375507354736} +02/25/2022 03:23:48 - INFO - codeparrot_training - Step 19941: {'lr': 0.0003465620422441737, 'samples': 10210304, 'steps': 19941, 'loss/train': 1.929829478263855} +02/25/2022 03:23:54 - INFO - codeparrot_training - Step 19942: {'lr': 0.00034654694938610205, 'samples': 10210816, 'steps': 19942, 'loss/train': 2.0960168838500977} +02/25/2022 03:23:57 - INFO - codeparrot_training - Step 19943: {'lr': 0.00034653185611445403, 'samples': 10211328, 'steps': 19943, 'loss/train': 1.7377725839614868} +02/25/2022 03:24:03 - INFO - codeparrot_training - Step 19944: {'lr': 0.0003465167624292942, 'samples': 10211840, 'steps': 19944, 'loss/train': 3.166250705718994} +02/25/2022 03:24:08 - INFO - codeparrot_training - Step 19945: {'lr': 0.0003465016683306872, 'samples': 10212352, 'steps': 19945, 'loss/train': 2.699152708053589} +02/25/2022 03:24:12 - INFO - codeparrot_training - Step 19946: {'lr': 0.0003464865738186977, 'samples': 10212864, 'steps': 19946, 'loss/train': 0.7595689296722412} +02/25/2022 03:24:15 - INFO - codeparrot_training - Step 19947: {'lr': 0.0003464714788933904, 'samples': 10213376, 'steps': 19947, 'loss/train': 2.344856023788452} +02/25/2022 03:24:21 - INFO - codeparrot_training - Step 19948: {'lr': 0.0003464563835548298, 'samples': 10213888, 'steps': 19948, 'loss/train': 1.5992101430892944} +02/25/2022 03:24:25 - INFO - codeparrot_training - Step 19949: {'lr': 0.0003464412878030808, 'samples': 10214400, 'steps': 19949, 'loss/train': 2.587639808654785} +02/25/2022 03:24:30 - INFO - codeparrot_training - Step 19950: {'lr': 0.0003464261916382079, 'samples': 10214912, 'steps': 19950, 'loss/train': 2.1051950454711914} +02/25/2022 03:24:34 - INFO - codeparrot_training - Step 19951: {'lr': 0.0003464110950602758, 'samples': 10215424, 'steps': 19951, 'loss/train': 2.285545825958252} +02/25/2022 03:24:39 - INFO - codeparrot_training - Step 19952: {'lr': 0.00034639599806934917, 'samples': 10215936, 'steps': 19952, 'loss/train': 2.064997911453247} +02/25/2022 03:24:43 - INFO - codeparrot_training - Step 19953: {'lr': 0.0003463809006654927, 'samples': 10216448, 'steps': 19953, 'loss/train': 2.6954777240753174} +02/25/2022 03:24:48 - INFO - codeparrot_training - Step 19954: {'lr': 0.0003463658028487711, 'samples': 10216960, 'steps': 19954, 'loss/train': 2.060100555419922} +02/25/2022 03:24:52 - INFO - codeparrot_training - Step 19955: {'lr': 0.0003463507046192489, 'samples': 10217472, 'steps': 19955, 'loss/train': 3.039870500564575} +02/25/2022 03:24:57 - INFO - codeparrot_training - Step 19956: {'lr': 0.000346335605976991, 'samples': 10217984, 'steps': 19956, 'loss/train': 1.7583907842636108} +02/25/2022 03:25:01 - INFO - codeparrot_training - Step 19957: {'lr': 0.00034632050692206184, 'samples': 10218496, 'steps': 19957, 'loss/train': 1.969556450843811} +02/25/2022 03:25:07 - INFO - codeparrot_training - Step 19958: {'lr': 0.0003463054074545263, 'samples': 10219008, 'steps': 19958, 'loss/train': 1.9443734884262085} +02/25/2022 03:25:10 - INFO - codeparrot_training - Step 19959: {'lr': 0.00034629030757444895, 'samples': 10219520, 'steps': 19959, 'loss/train': 2.1260299682617188} +02/25/2022 03:25:16 - INFO - codeparrot_training - Step 19960: {'lr': 0.00034627520728189456, 'samples': 10220032, 'steps': 19960, 'loss/train': 1.9024779796600342} +02/25/2022 03:25:22 - INFO - codeparrot_training - Step 19961: {'lr': 0.00034626010657692766, 'samples': 10220544, 'steps': 19961, 'loss/train': 1.2565720081329346} +02/25/2022 03:25:25 - INFO - codeparrot_training - Step 19962: {'lr': 0.0003462450054596131, 'samples': 10221056, 'steps': 19962, 'loss/train': 1.2504113912582397} +02/25/2022 03:25:31 - INFO - codeparrot_training - Step 19963: {'lr': 0.0003462299039300154, 'samples': 10221568, 'steps': 19963, 'loss/train': 1.6858859062194824} +02/25/2022 03:25:34 - INFO - codeparrot_training - Step 19964: {'lr': 0.0003462148019881994, 'samples': 10222080, 'steps': 19964, 'loss/train': 1.0568069219589233} +02/25/2022 03:25:40 - INFO - codeparrot_training - Step 19965: {'lr': 0.0003461996996342298, 'samples': 10222592, 'steps': 19965, 'loss/train': 0.959199070930481} +02/25/2022 03:25:43 - INFO - codeparrot_training - Step 19966: {'lr': 0.00034618459686817113, 'samples': 10223104, 'steps': 19966, 'loss/train': 2.508249044418335} +02/25/2022 03:25:49 - INFO - codeparrot_training - Step 19967: {'lr': 0.0003461694936900883, 'samples': 10223616, 'steps': 19967, 'loss/train': 2.3394620418548584} +02/25/2022 03:25:52 - INFO - codeparrot_training - Step 19968: {'lr': 0.0003461543901000458, 'samples': 10224128, 'steps': 19968, 'loss/train': 2.048583507537842} +02/25/2022 03:25:58 - INFO - codeparrot_training - Step 19969: {'lr': 0.00034613928609810845, 'samples': 10224640, 'steps': 19969, 'loss/train': 0.318865031003952} +02/25/2022 03:26:01 - INFO - codeparrot_training - Step 19970: {'lr': 0.0003461241816843409, 'samples': 10225152, 'steps': 19970, 'loss/train': 2.4739160537719727} +02/25/2022 03:26:07 - INFO - codeparrot_training - Step 19971: {'lr': 0.00034610907685880794, 'samples': 10225664, 'steps': 19971, 'loss/train': 1.484741449356079} +02/25/2022 03:26:10 - INFO - codeparrot_training - Step 19972: {'lr': 0.00034609397162157417, 'samples': 10226176, 'steps': 19972, 'loss/train': 1.639478325843811} +02/25/2022 03:26:16 - INFO - codeparrot_training - Step 19973: {'lr': 0.0003460788659727044, 'samples': 10226688, 'steps': 19973, 'loss/train': 2.4710071086883545} +02/25/2022 03:26:20 - INFO - codeparrot_training - Step 19974: {'lr': 0.0003460637599122632, 'samples': 10227200, 'steps': 19974, 'loss/train': 0.15729981660842896} +02/25/2022 03:26:25 - INFO - codeparrot_training - Step 19975: {'lr': 0.0003460486534403154, 'samples': 10227712, 'steps': 19975, 'loss/train': 1.7175712585449219} +02/25/2022 03:26:29 - INFO - codeparrot_training - Step 19976: {'lr': 0.0003460335465569256, 'samples': 10228224, 'steps': 19976, 'loss/train': 2.6938610076904297} +02/25/2022 03:26:34 - INFO - codeparrot_training - Step 19977: {'lr': 0.0003460184392621587, 'samples': 10228736, 'steps': 19977, 'loss/train': 2.1446146965026855} +02/25/2022 03:26:38 - INFO - codeparrot_training - Step 19978: {'lr': 0.0003460033315560792, 'samples': 10229248, 'steps': 19978, 'loss/train': 1.577078938484192} +02/25/2022 03:26:43 - INFO - codeparrot_training - Step 19979: {'lr': 0.00034598822343875197, 'samples': 10229760, 'steps': 19979, 'loss/train': 2.1239805221557617} +02/25/2022 03:26:47 - INFO - codeparrot_training - Step 19980: {'lr': 0.0003459731149102417, 'samples': 10230272, 'steps': 19980, 'loss/train': 3.171340227127075} +02/25/2022 03:26:52 - INFO - codeparrot_training - Step 19981: {'lr': 0.000345958005970613, 'samples': 10230784, 'steps': 19981, 'loss/train': 1.18368661403656} +02/25/2022 03:26:56 - INFO - codeparrot_training - Step 19982: {'lr': 0.0003459428966199307, 'samples': 10231296, 'steps': 19982, 'loss/train': 2.0071804523468018} +02/25/2022 03:27:02 - INFO - codeparrot_training - Step 19983: {'lr': 0.0003459277868582595, 'samples': 10231808, 'steps': 19983, 'loss/train': 2.0091047286987305} +02/25/2022 03:27:05 - INFO - codeparrot_training - Step 19984: {'lr': 0.00034591267668566413, 'samples': 10232320, 'steps': 19984, 'loss/train': 2.1538147926330566} +02/25/2022 03:27:11 - INFO - codeparrot_training - Step 19985: {'lr': 0.00034589756610220923, 'samples': 10232832, 'steps': 19985, 'loss/train': 1.5419145822525024} +02/25/2022 03:27:14 - INFO - codeparrot_training - Step 19986: {'lr': 0.0003458824551079597, 'samples': 10233344, 'steps': 19986, 'loss/train': 2.545056104660034} +02/25/2022 03:27:20 - INFO - codeparrot_training - Step 19987: {'lr': 0.00034586734370298017, 'samples': 10233856, 'steps': 19987, 'loss/train': 1.6556711196899414} +02/25/2022 03:27:23 - INFO - codeparrot_training - Step 19988: {'lr': 0.00034585223188733535, 'samples': 10234368, 'steps': 19988, 'loss/train': 2.288792848587036} +02/25/2022 03:27:29 - INFO - codeparrot_training - Step 19989: {'lr': 0.00034583711966109005, 'samples': 10234880, 'steps': 19989, 'loss/train': 1.9010210037231445} +02/25/2022 03:27:32 - INFO - codeparrot_training - Step 19990: {'lr': 0.0003458220070243089, 'samples': 10235392, 'steps': 19990, 'loss/train': 0.43585482239723206} +02/25/2022 03:27:38 - INFO - codeparrot_training - Step 19991: {'lr': 0.0003458068939770567, 'samples': 10235904, 'steps': 19991, 'loss/train': 2.1353538036346436} +02/25/2022 03:27:41 - INFO - codeparrot_training - Step 19992: {'lr': 0.00034579178051939827, 'samples': 10236416, 'steps': 19992, 'loss/train': 2.5798909664154053} +02/25/2022 03:27:48 - INFO - codeparrot_training - Step 19993: {'lr': 0.00034577666665139815, 'samples': 10236928, 'steps': 19993, 'loss/train': 1.2594544887542725} +02/25/2022 03:27:51 - INFO - codeparrot_training - Step 19994: {'lr': 0.0003457615523731213, 'samples': 10237440, 'steps': 19994, 'loss/train': 2.1348679065704346} +02/25/2022 03:27:57 - INFO - codeparrot_training - Step 19995: {'lr': 0.00034574643768463237, 'samples': 10237952, 'steps': 19995, 'loss/train': 2.14424467086792} +02/25/2022 03:28:00 - INFO - codeparrot_training - Step 19996: {'lr': 0.00034573132258599606, 'samples': 10238464, 'steps': 19996, 'loss/train': 1.6995242834091187} +02/25/2022 03:28:06 - INFO - codeparrot_training - Step 19997: {'lr': 0.00034571620707727713, 'samples': 10238976, 'steps': 19997, 'loss/train': 1.9517722129821777} +02/25/2022 03:28:09 - INFO - codeparrot_training - Step 19998: {'lr': 0.0003457010911585404, 'samples': 10239488, 'steps': 19998, 'loss/train': 1.497208595275879} +02/25/2022 03:28:15 - INFO - codeparrot_training - Step 19999: {'lr': 0.00034568597482985067, 'samples': 10240000, 'steps': 19999, 'loss/train': 1.5917458534240723} +02/25/2022 03:28:15 - INFO - codeparrot_training - Evaluating and saving model checkpoint