diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -45082,3 +45082,2506 @@ Use FP16 precision: False 12/28/2021 13:24:05 - INFO - codeparrot_training - Step 42499: {'lr': 2.744370718921152e-05, 'samples': 21760000, 'steps': 42499, 'batch_loss/train': 0.7796046268194914} 12/28/2021 13:24:05 - INFO - codeparrot_training - Evaluating and saving model checkpoint 12/28/2021 13:27:27 - INFO - codeparrot_training - Step 42500: {'loss/eval': 0.7435383200645447, 'perplexity': 2.1033647060394287} +12/28/2021 13:27:46 - WARNING - huggingface_hub.repository - Several commits (15) will be pushed upstream. +12/28/2021 13:28:03 - INFO - codeparrot_training - Step 42500: {'lr': 2.743652719704498e-05, 'samples': 21760512, 'steps': 42500, 'batch_loss/train': 0.7004945147782564} +12/28/2021 13:28:13 - INFO - codeparrot_training - Step 42501: {'lr': 2.7429348089706735e-05, 'samples': 21761024, 'steps': 42501, 'batch_loss/train': 0.6976831629872322} +12/28/2021 13:28:24 - INFO - codeparrot_training - Step 42502: {'lr': 2.7422169867225344e-05, 'samples': 21761536, 'steps': 42502, 'batch_loss/train': 0.8051681257784367} +12/28/2021 13:28:36 - INFO - codeparrot_training - Step 42503: {'lr': 2.741499252962934e-05, 'samples': 21762048, 'steps': 42503, 'batch_loss/train': 0.754157162969932} +12/28/2021 13:28:47 - INFO - codeparrot_training - Step 42504: {'lr': 2.740781607694731e-05, 'samples': 21762560, 'steps': 42504, 'batch_loss/train': 0.6706830877810717} +12/28/2021 13:28:58 - INFO - codeparrot_training - Step 42505: {'lr': 2.7400640509207764e-05, 'samples': 21763072, 'steps': 42505, 'batch_loss/train': 0.7312038233503699} +12/28/2021 13:29:10 - INFO - codeparrot_training - Step 42506: {'lr': 2.7393465826439258e-05, 'samples': 21763584, 'steps': 42506, 'batch_loss/train': 0.7785771051421762} +12/28/2021 13:29:20 - INFO - codeparrot_training - Step 42507: {'lr': 2.7386292028670158e-05, 'samples': 21764096, 'steps': 42507, 'batch_loss/train': 0.6454480632673949} +12/28/2021 13:29:31 - INFO - codeparrot_training - Step 42508: {'lr': 2.737911911592919e-05, 'samples': 21764608, 'steps': 42508, 'batch_loss/train': 0.8011961434967816} +12/28/2021 13:29:43 - INFO - codeparrot_training - Step 42509: {'lr': 2.7371947088244807e-05, 'samples': 21765120, 'steps': 42509, 'batch_loss/train': 0.718452975153923} +12/28/2021 13:29:54 - INFO - codeparrot_training - Step 42510: {'lr': 2.7364775945645432e-05, 'samples': 21765632, 'steps': 42510, 'batch_loss/train': 0.744437062414363} +12/28/2021 13:30:04 - INFO - codeparrot_training - Step 42511: {'lr': 2.7357605688159674e-05, 'samples': 21766144, 'steps': 42511, 'batch_loss/train': 0.7457643551751971} +12/28/2021 13:30:15 - INFO - codeparrot_training - Step 42512: {'lr': 2.7350436315816075e-05, 'samples': 21766656, 'steps': 42512, 'batch_loss/train': 0.781331030651927} +12/28/2021 13:30:29 - INFO - codeparrot_training - Step 42513: {'lr': 2.7343267828642994e-05, 'samples': 21767168, 'steps': 42513, 'batch_loss/train': 0.6672748511191458} +12/28/2021 13:30:40 - INFO - codeparrot_training - Step 42514: {'lr': 2.733610022666902e-05, 'samples': 21767680, 'steps': 42514, 'batch_loss/train': 0.7080229916609824} +12/28/2021 13:30:50 - INFO - codeparrot_training - Step 42515: {'lr': 2.732893350992266e-05, 'samples': 21768192, 'steps': 42515, 'batch_loss/train': 0.782982237637043} +12/28/2021 13:31:02 - INFO - codeparrot_training - Step 42516: {'lr': 2.7321767678432363e-05, 'samples': 21768704, 'steps': 42516, 'batch_loss/train': 0.8302211249247193} +12/28/2021 13:31:13 - INFO - codeparrot_training - Step 42517: {'lr': 2.7314602732226633e-05, 'samples': 21769216, 'steps': 42517, 'batch_loss/train': 0.5815172280999832} +12/28/2021 13:31:24 - INFO - codeparrot_training - Step 42518: {'lr': 2.730743867133395e-05, 'samples': 21769728, 'steps': 42518, 'batch_loss/train': 0.7217157271225005} +12/28/2021 13:31:38 - INFO - codeparrot_training - Step 42519: {'lr': 2.7300275495782816e-05, 'samples': 21770240, 'steps': 42519, 'batch_loss/train': 0.9673175634816289} +12/28/2021 13:31:49 - INFO - codeparrot_training - Step 42520: {'lr': 2.729311320560171e-05, 'samples': 21770752, 'steps': 42520, 'batch_loss/train': 0.7669903440400958} +12/28/2021 13:31:59 - INFO - codeparrot_training - Step 42521: {'lr': 2.728595180081911e-05, 'samples': 21771264, 'steps': 42521, 'batch_loss/train': 0.6837764789815992} +12/28/2021 13:32:10 - INFO - codeparrot_training - Step 42522: {'lr': 2.7278791281463433e-05, 'samples': 21771776, 'steps': 42522, 'batch_loss/train': 0.951064151711762} +12/28/2021 13:32:22 - INFO - codeparrot_training - Step 42523: {'lr': 2.7271631647563244e-05, 'samples': 21772288, 'steps': 42523, 'batch_loss/train': 0.9202890751184896} +12/28/2021 13:32:33 - INFO - codeparrot_training - Step 42524: {'lr': 2.726447289914691e-05, 'samples': 21772800, 'steps': 42524, 'batch_loss/train': 0.8279555565677583} +12/28/2021 13:32:43 - INFO - codeparrot_training - Step 42525: {'lr': 2.7257315036242875e-05, 'samples': 21773312, 'steps': 42525, 'batch_loss/train': 0.8006839888403192} +12/28/2021 13:32:55 - INFO - codeparrot_training - Step 42526: {'lr': 2.7250158058879758e-05, 'samples': 21773824, 'steps': 42526, 'batch_loss/train': 0.8003761358559132} +12/28/2021 13:33:06 - INFO - codeparrot_training - Step 42527: {'lr': 2.7243001967085847e-05, 'samples': 21774336, 'steps': 42527, 'batch_loss/train': 0.7650872804224491} +12/28/2021 13:33:17 - INFO - codeparrot_training - Step 42528: {'lr': 2.7235846760889666e-05, 'samples': 21774848, 'steps': 42528, 'batch_loss/train': 0.7833704184740782} +12/28/2021 13:33:31 - INFO - codeparrot_training - Step 42529: {'lr': 2.7228692440319643e-05, 'samples': 21775360, 'steps': 42529, 'batch_loss/train': 0.7461020218906924} +12/28/2021 13:33:41 - INFO - codeparrot_training - Step 42530: {'lr': 2.7221539005404196e-05, 'samples': 21775872, 'steps': 42530, 'batch_loss/train': 0.7323348419740796} +12/28/2021 13:33:52 - INFO - codeparrot_training - Step 42531: {'lr': 2.721438645617183e-05, 'samples': 21776384, 'steps': 42531, 'batch_loss/train': 0.7417780694086105} +12/28/2021 13:34:04 - INFO - codeparrot_training - Step 42532: {'lr': 2.7207234792650914e-05, 'samples': 21776896, 'steps': 42532, 'batch_loss/train': 0.7368893548846245} +12/28/2021 13:34:15 - INFO - codeparrot_training - Step 42533: {'lr': 2.7200084014869925e-05, 'samples': 21777408, 'steps': 42533, 'batch_loss/train': 0.6156201013363898} +12/28/2021 13:34:26 - INFO - codeparrot_training - Step 42534: {'lr': 2.7192934122857255e-05, 'samples': 21777920, 'steps': 42534, 'batch_loss/train': 0.7076956313103437} +12/28/2021 13:34:37 - INFO - codeparrot_training - Step 42535: {'lr': 2.7185785116641436e-05, 'samples': 21778432, 'steps': 42535, 'batch_loss/train': 0.6264114785008132} +12/28/2021 13:34:49 - INFO - codeparrot_training - Step 42536: {'lr': 2.7178636996250672e-05, 'samples': 21778944, 'steps': 42536, 'batch_loss/train': 0.4964801352471113} +12/28/2021 13:34:59 - INFO - codeparrot_training - Step 42537: {'lr': 2.7171489761713574e-05, 'samples': 21779456, 'steps': 42537, 'batch_loss/train': 0.7473274613730609} +12/28/2021 13:35:10 - INFO - codeparrot_training - Step 42538: {'lr': 2.716434341305854e-05, 'samples': 21779968, 'steps': 42538, 'batch_loss/train': 0.7581436401233077} +12/28/2021 13:35:22 - INFO - codeparrot_training - Step 42539: {'lr': 2.7157197950313904e-05, 'samples': 21780480, 'steps': 42539, 'batch_loss/train': 0.7688611038029194} +12/28/2021 13:35:33 - INFO - codeparrot_training - Step 42540: {'lr': 2.7150053373508093e-05, 'samples': 21780992, 'steps': 42540, 'batch_loss/train': 0.7601089146919549} +12/28/2021 13:35:43 - INFO - codeparrot_training - Step 42541: {'lr': 2.7142909682669526e-05, 'samples': 21781504, 'steps': 42541, 'batch_loss/train': 0.7075431370176375} +12/28/2021 13:35:54 - INFO - codeparrot_training - Step 42542: {'lr': 2.7135766877826622e-05, 'samples': 21782016, 'steps': 42542, 'batch_loss/train': 0.6439506970345974} +12/28/2021 13:36:08 - INFO - codeparrot_training - Step 42543: {'lr': 2.7128624959007723e-05, 'samples': 21782528, 'steps': 42543, 'batch_loss/train': 0.7970273387618363} +12/28/2021 13:36:19 - INFO - codeparrot_training - Step 42544: {'lr': 2.712148392624128e-05, 'samples': 21783040, 'steps': 42544, 'batch_loss/train': 0.7758398083969951} +12/28/2021 13:36:29 - INFO - codeparrot_training - Step 42545: {'lr': 2.7114343779555656e-05, 'samples': 21783552, 'steps': 42545, 'batch_loss/train': 0.6897365497425199} +12/28/2021 13:36:42 - INFO - codeparrot_training - Step 42546: {'lr': 2.7107204518979246e-05, 'samples': 21784064, 'steps': 42546, 'batch_loss/train': 0.8020186331123114} +12/28/2021 13:36:52 - INFO - codeparrot_training - Step 42547: {'lr': 2.7100066144540442e-05, 'samples': 21784576, 'steps': 42547, 'batch_loss/train': 0.7580157183110714} +12/28/2021 13:37:03 - INFO - codeparrot_training - Step 42548: {'lr': 2.7092928656267585e-05, 'samples': 21785088, 'steps': 42548, 'batch_loss/train': 0.7899861484766006} +12/28/2021 13:37:17 - INFO - codeparrot_training - Step 42549: {'lr': 2.7085792054189152e-05, 'samples': 21785600, 'steps': 42549, 'batch_loss/train': 0.8068087231367826} +12/28/2021 13:37:27 - INFO - codeparrot_training - Step 42550: {'lr': 2.7078656338333367e-05, 'samples': 21786112, 'steps': 42550, 'batch_loss/train': 0.7451301126857288} +12/28/2021 13:37:38 - INFO - codeparrot_training - Step 42551: {'lr': 2.707152150872863e-05, 'samples': 21786624, 'steps': 42551, 'batch_loss/train': 0.8407262861728668} +12/28/2021 13:37:50 - INFO - codeparrot_training - Step 42552: {'lr': 2.706438756540347e-05, 'samples': 21787136, 'steps': 42552, 'batch_loss/train': 0.7610770891187713} +12/28/2021 13:38:01 - INFO - codeparrot_training - Step 42553: {'lr': 2.7057254508386058e-05, 'samples': 21787648, 'steps': 42553, 'batch_loss/train': 0.5865780641324818} +12/28/2021 13:38:11 - INFO - codeparrot_training - Step 42554: {'lr': 2.705012233770482e-05, 'samples': 21788160, 'steps': 42554, 'batch_loss/train': 0.5243561934621539} +12/28/2021 13:38:22 - INFO - codeparrot_training - Step 42555: {'lr': 2.7042991053388118e-05, 'samples': 21788672, 'steps': 42555, 'batch_loss/train': 0.7506950273527764} +12/28/2021 13:38:34 - INFO - codeparrot_training - Step 42556: {'lr': 2.703586065546429e-05, 'samples': 21789184, 'steps': 42556, 'batch_loss/train': 0.7168807063717395} +12/28/2021 13:38:45 - INFO - codeparrot_training - Step 42557: {'lr': 2.7028731143961705e-05, 'samples': 21789696, 'steps': 42557, 'batch_loss/train': 0.6997908125631511} +12/28/2021 13:38:55 - INFO - codeparrot_training - Step 42558: {'lr': 2.70216025189087e-05, 'samples': 21790208, 'steps': 42558, 'batch_loss/train': 0.8655804060399532} +12/28/2021 13:39:09 - INFO - codeparrot_training - Step 42559: {'lr': 2.7014474780333588e-05, 'samples': 21790720, 'steps': 42559, 'batch_loss/train': 0.6738541885279119} +12/28/2021 13:39:20 - INFO - codeparrot_training - Step 42560: {'lr': 2.700734792826476e-05, 'samples': 21791232, 'steps': 42560, 'batch_loss/train': 0.7529141092672944} +12/28/2021 13:39:31 - INFO - codeparrot_training - Step 42561: {'lr': 2.7000221962730526e-05, 'samples': 21791744, 'steps': 42561, 'batch_loss/train': 0.7760366229340434} +12/28/2021 13:39:43 - INFO - codeparrot_training - Step 42562: {'lr': 2.6993096883759144e-05, 'samples': 21792256, 'steps': 42562, 'batch_loss/train': 0.7424377778079361} +12/28/2021 13:39:54 - INFO - codeparrot_training - Step 42563: {'lr': 2.6985972691379034e-05, 'samples': 21792768, 'steps': 42563, 'batch_loss/train': 0.7543232021853328} +12/28/2021 13:40:04 - INFO - codeparrot_training - Step 42564: {'lr': 2.6978849385618536e-05, 'samples': 21793280, 'steps': 42564, 'batch_loss/train': 0.6621879264712334} +12/28/2021 13:40:15 - INFO - codeparrot_training - Step 42565: {'lr': 2.6971726966505907e-05, 'samples': 21793792, 'steps': 42565, 'batch_loss/train': 0.8013642802834511} +12/28/2021 13:40:29 - INFO - codeparrot_training - Step 42566: {'lr': 2.6964605434069396e-05, 'samples': 21794304, 'steps': 42566, 'batch_loss/train': 0.6796853286214173} +12/28/2021 13:40:39 - INFO - codeparrot_training - Step 42567: {'lr': 2.695748478833751e-05, 'samples': 21794816, 'steps': 42567, 'batch_loss/train': 0.7260332014411688} +12/28/2021 13:40:50 - INFO - codeparrot_training - Step 42568: {'lr': 2.69503650293384e-05, 'samples': 21795328, 'steps': 42568, 'batch_loss/train': 0.8546094726771116} +12/28/2021 13:41:02 - INFO - codeparrot_training - Step 42569: {'lr': 2.6943246157100392e-05, 'samples': 21795840, 'steps': 42569, 'batch_loss/train': 0.7167334347032011} +12/28/2021 13:41:13 - INFO - codeparrot_training - Step 42570: {'lr': 2.6936128171651865e-05, 'samples': 21796352, 'steps': 42570, 'batch_loss/train': 0.6830033729784191} +12/28/2021 13:41:23 - INFO - codeparrot_training - Step 42571: {'lr': 2.6929011073021036e-05, 'samples': 21796864, 'steps': 42571, 'batch_loss/train': 0.747876406647265} +12/28/2021 13:41:35 - INFO - codeparrot_training - Step 42572: {'lr': 2.6921894861236224e-05, 'samples': 21797376, 'steps': 42572, 'batch_loss/train': 0.6972904298454523} +12/28/2021 13:41:46 - INFO - codeparrot_training - Step 42573: {'lr': 2.6914779536325736e-05, 'samples': 21797888, 'steps': 42573, 'batch_loss/train': 0.8056711326353252} +12/28/2021 13:41:57 - INFO - codeparrot_training - Step 42574: {'lr': 2.6907665098317853e-05, 'samples': 21798400, 'steps': 42574, 'batch_loss/train': 0.7692394540645182} +12/28/2021 13:42:07 - INFO - codeparrot_training - Step 42575: {'lr': 2.6900551547240886e-05, 'samples': 21798912, 'steps': 42575, 'batch_loss/train': 0.7310243956744671} +12/28/2021 13:42:21 - INFO - codeparrot_training - Step 42576: {'lr': 2.6893438883123066e-05, 'samples': 21799424, 'steps': 42576, 'batch_loss/train': 0.7319705891422927} +12/28/2021 13:42:32 - INFO - codeparrot_training - Step 42577: {'lr': 2.688632710599265e-05, 'samples': 21799936, 'steps': 42577, 'batch_loss/train': 1.1027760760043748} +12/28/2021 13:42:43 - INFO - codeparrot_training - Step 42578: {'lr': 2.687921621587802e-05, 'samples': 21800448, 'steps': 42578, 'batch_loss/train': 1.7960756188258529} +12/28/2021 13:42:55 - INFO - codeparrot_training - Step 42579: {'lr': 2.6872106212807334e-05, 'samples': 21800960, 'steps': 42579, 'batch_loss/train': 0.7497339071705937} +12/28/2021 13:43:06 - INFO - codeparrot_training - Step 42580: {'lr': 2.6864997096808923e-05, 'samples': 21801472, 'steps': 42580, 'batch_loss/train': 0.8136100899428129} +12/28/2021 13:43:16 - INFO - codeparrot_training - Step 42581: {'lr': 2.6857888867911018e-05, 'samples': 21801984, 'steps': 42581, 'batch_loss/train': 0.7353853737004101} +12/28/2021 13:43:27 - INFO - codeparrot_training - Step 42582: {'lr': 2.68507815261419e-05, 'samples': 21802496, 'steps': 42582, 'batch_loss/train': 0.7235416723415256} +12/28/2021 13:43:40 - INFO - codeparrot_training - Step 42583: {'lr': 2.6843675071529795e-05, 'samples': 21803008, 'steps': 42583, 'batch_loss/train': 0.8082741731777787} +12/28/2021 13:43:50 - INFO - codeparrot_training - Step 42584: {'lr': 2.6836569504103014e-05, 'samples': 21803520, 'steps': 42584, 'batch_loss/train': 0.8072171160019934} +12/28/2021 13:44:01 - INFO - codeparrot_training - Step 42585: {'lr': 2.6829464823889733e-05, 'samples': 21804032, 'steps': 42585, 'batch_loss/train': 0.6649309366475791} +12/28/2021 13:44:13 - INFO - codeparrot_training - Step 42586: {'lr': 2.6822361030918257e-05, 'samples': 21804544, 'steps': 42586, 'batch_loss/train': 0.7539305472746491} +12/28/2021 13:44:24 - INFO - codeparrot_training - Step 42587: {'lr': 2.6815258125216846e-05, 'samples': 21805056, 'steps': 42587, 'batch_loss/train': 0.815368739888072} +12/28/2021 13:44:34 - INFO - codeparrot_training - Step 42588: {'lr': 2.6808156106813614e-05, 'samples': 21805568, 'steps': 42588, 'batch_loss/train': 0.7671287544071674} +12/28/2021 13:44:45 - INFO - codeparrot_training - Step 42589: {'lr': 2.68010549757369e-05, 'samples': 21806080, 'steps': 42589, 'batch_loss/train': 0.8705878118053079} +12/28/2021 13:44:59 - INFO - codeparrot_training - Step 42590: {'lr': 2.6793954732014986e-05, 'samples': 21806592, 'steps': 42590, 'batch_loss/train': 0.6935642041498795} +12/28/2021 13:45:09 - INFO - codeparrot_training - Step 42591: {'lr': 2.678685537567596e-05, 'samples': 21807104, 'steps': 42591, 'batch_loss/train': 0.7978236945345998} +12/28/2021 13:45:20 - INFO - codeparrot_training - Step 42592: {'lr': 2.6779756906748083e-05, 'samples': 21807616, 'steps': 42592, 'batch_loss/train': 0.7062590192072093} +12/28/2021 13:45:32 - INFO - codeparrot_training - Step 42593: {'lr': 2.6772659325259713e-05, 'samples': 21808128, 'steps': 42593, 'batch_loss/train': 0.7921862113289535} +12/28/2021 13:45:43 - INFO - codeparrot_training - Step 42594: {'lr': 2.676556263123889e-05, 'samples': 21808640, 'steps': 42594, 'batch_loss/train': 0.731928922701627} +12/28/2021 13:45:53 - INFO - codeparrot_training - Step 42595: {'lr': 2.675846682471392e-05, 'samples': 21809152, 'steps': 42595, 'batch_loss/train': 0.6972706005908549} +12/28/2021 13:46:08 - INFO - codeparrot_training - Step 42596: {'lr': 2.6751371905712974e-05, 'samples': 21809664, 'steps': 42596, 'batch_loss/train': 0.7904366091825068} +12/28/2021 13:46:19 - INFO - codeparrot_training - Step 42597: {'lr': 2.6744277874264282e-05, 'samples': 21810176, 'steps': 42597, 'batch_loss/train': 0.7564612647984177} +12/28/2021 13:46:29 - INFO - codeparrot_training - Step 42598: {'lr': 2.6737184730396075e-05, 'samples': 21810688, 'steps': 42598, 'batch_loss/train': 0.8814279062207788} +12/28/2021 13:46:41 - INFO - codeparrot_training - Step 42599: {'lr': 2.6730092474136492e-05, 'samples': 21811200, 'steps': 42599, 'batch_loss/train': 0.45733456080779433} +12/28/2021 13:46:52 - INFO - codeparrot_training - Step 42600: {'lr': 2.672300110551376e-05, 'samples': 21811712, 'steps': 42600, 'batch_loss/train': 0.6902219248004258} +12/28/2021 13:47:03 - INFO - codeparrot_training - Step 42601: {'lr': 2.6715910624556054e-05, 'samples': 21812224, 'steps': 42601, 'batch_loss/train': 0.702801987528801} +12/28/2021 13:47:13 - INFO - codeparrot_training - Step 42602: {'lr': 2.670882103129166e-05, 'samples': 21812736, 'steps': 42602, 'batch_loss/train': 0.8059949530288577} +12/28/2021 13:47:28 - INFO - codeparrot_training - Step 42603: {'lr': 2.6701732325748573e-05, 'samples': 21813248, 'steps': 42603, 'batch_loss/train': 0.8339343033730984} +12/28/2021 13:47:39 - INFO - codeparrot_training - Step 42604: {'lr': 2.669464450795517e-05, 'samples': 21813760, 'steps': 42604, 'batch_loss/train': 0.6041291406145319} +12/28/2021 13:47:49 - INFO - codeparrot_training - Step 42605: {'lr': 2.66875575779395e-05, 'samples': 21814272, 'steps': 42605, 'batch_loss/train': 0.5308420069050044} +12/28/2021 13:48:02 - INFO - codeparrot_training - Step 42606: {'lr': 2.6680471535729722e-05, 'samples': 21814784, 'steps': 42606, 'batch_loss/train': 0.867158685810864} +12/28/2021 13:48:12 - INFO - codeparrot_training - Step 42607: {'lr': 2.6673386381354192e-05, 'samples': 21815296, 'steps': 42607, 'batch_loss/train': 0.706735952058807} +12/28/2021 13:48:23 - INFO - codeparrot_training - Step 42608: {'lr': 2.666630211484089e-05, 'samples': 21815808, 'steps': 42608, 'batch_loss/train': 0.6501704258553218} +12/28/2021 13:48:35 - INFO - codeparrot_training - Step 42609: {'lr': 2.6659218736218042e-05, 'samples': 21816320, 'steps': 42609, 'batch_loss/train': 0.6536305669287685} +12/28/2021 13:48:46 - INFO - codeparrot_training - Step 42610: {'lr': 2.6652136245513798e-05, 'samples': 21816832, 'steps': 42610, 'batch_loss/train': 0.6879867550451308} +12/28/2021 13:48:56 - INFO - codeparrot_training - Step 42611: {'lr': 2.664505464275635e-05, 'samples': 21817344, 'steps': 42611, 'batch_loss/train': 0.6499570969026536} +12/28/2021 13:49:07 - INFO - codeparrot_training - Step 42612: {'lr': 2.6637973927973817e-05, 'samples': 21817856, 'steps': 42612, 'batch_loss/train': 0.6431973652215675} +12/28/2021 13:49:21 - INFO - codeparrot_training - Step 42613: {'lr': 2.663089410119443e-05, 'samples': 21818368, 'steps': 42613, 'batch_loss/train': 0.5568706778576598} +12/28/2021 13:49:32 - INFO - codeparrot_training - Step 42614: {'lr': 2.6623815162446135e-05, 'samples': 21818880, 'steps': 42614, 'batch_loss/train': 0.6892455318011343} +12/28/2021 13:49:42 - INFO - codeparrot_training - Step 42615: {'lr': 2.6616737111757273e-05, 'samples': 21819392, 'steps': 42615, 'batch_loss/train': 0.6375196129083633} +12/28/2021 13:49:55 - INFO - codeparrot_training - Step 42616: {'lr': 2.6609659949155984e-05, 'samples': 21819904, 'steps': 42616, 'batch_loss/train': 0.7482366198673844} +12/28/2021 13:50:05 - INFO - codeparrot_training - Step 42617: {'lr': 2.660258367467025e-05, 'samples': 21820416, 'steps': 42617, 'batch_loss/train': 0.702512756921351} +12/28/2021 13:50:16 - INFO - codeparrot_training - Step 42618: {'lr': 2.659550828832827e-05, 'samples': 21820928, 'steps': 42618, 'batch_loss/train': 0.5536389632616192} +12/28/2021 13:50:30 - INFO - codeparrot_training - Step 42619: {'lr': 2.6588433790158295e-05, 'samples': 21821440, 'steps': 42619, 'batch_loss/train': 0.7442729263566434} +12/28/2021 13:50:40 - INFO - codeparrot_training - Step 42620: {'lr': 2.658136018018831e-05, 'samples': 21821952, 'steps': 42620, 'batch_loss/train': 0.6679021596210077} +12/28/2021 13:50:51 - INFO - codeparrot_training - Step 42621: {'lr': 2.657428745844645e-05, 'samples': 21822464, 'steps': 42621, 'batch_loss/train': 0.62143256329} +12/28/2021 13:51:03 - INFO - codeparrot_training - Step 42622: {'lr': 2.6567215624960868e-05, 'samples': 21822976, 'steps': 42622, 'batch_loss/train': 0.6605080173467286} +12/28/2021 13:51:14 - INFO - codeparrot_training - Step 42623: {'lr': 2.6560144679759675e-05, 'samples': 21823488, 'steps': 42623, 'batch_loss/train': 0.6883854467305355} +12/28/2021 13:51:24 - INFO - codeparrot_training - Step 42624: {'lr': 2.6553074622870987e-05, 'samples': 21824000, 'steps': 42624, 'batch_loss/train': 0.7646001456305385} +12/28/2021 13:51:35 - INFO - codeparrot_training - Step 42625: {'lr': 2.6546005454322896e-05, 'samples': 21824512, 'steps': 42625, 'batch_loss/train': 0.6742428004508838} +12/28/2021 13:51:47 - INFO - codeparrot_training - Step 42626: {'lr': 2.653893717414352e-05, 'samples': 21825024, 'steps': 42626, 'batch_loss/train': 0.5266303234966472} +12/28/2021 13:51:58 - INFO - codeparrot_training - Step 42627: {'lr': 2.653186978236094e-05, 'samples': 21825536, 'steps': 42627, 'batch_loss/train': 0.6726481071673334} +12/28/2021 13:52:08 - INFO - codeparrot_training - Step 42628: {'lr': 2.6524803279003334e-05, 'samples': 21826048, 'steps': 42628, 'batch_loss/train': 0.7025937992148101} +12/28/2021 13:52:20 - INFO - codeparrot_training - Step 42629: {'lr': 2.6517737664098625e-05, 'samples': 21826560, 'steps': 42629, 'batch_loss/train': 0.7352643045596778} +12/28/2021 13:52:31 - INFO - codeparrot_training - Step 42630: {'lr': 2.651067293767509e-05, 'samples': 21827072, 'steps': 42630, 'batch_loss/train': 0.609323800279526} +12/28/2021 13:52:42 - INFO - codeparrot_training - Step 42631: {'lr': 2.650360909976071e-05, 'samples': 21827584, 'steps': 42631, 'batch_loss/train': 0.7804056378081441} +12/28/2021 13:52:56 - INFO - codeparrot_training - Step 42632: {'lr': 2.6496546150383522e-05, 'samples': 21828096, 'steps': 42632, 'batch_loss/train': 0.5807799606118351} +12/28/2021 13:53:06 - INFO - codeparrot_training - Step 42633: {'lr': 2.6489484089571776e-05, 'samples': 21828608, 'steps': 42633, 'batch_loss/train': 0.5306811866466887} +12/28/2021 13:53:17 - INFO - codeparrot_training - Step 42634: {'lr': 2.6482422917353422e-05, 'samples': 21829120, 'steps': 42634, 'batch_loss/train': 0.7377413091016933} +12/28/2021 13:53:28 - INFO - codeparrot_training - Step 42635: {'lr': 2.6475362633756523e-05, 'samples': 21829632, 'steps': 42635, 'batch_loss/train': 0.7946821767836809} +12/28/2021 13:53:40 - INFO - codeparrot_training - Step 42636: {'lr': 2.6468303238809193e-05, 'samples': 21830144, 'steps': 42636, 'batch_loss/train': 0.7524408791214228} +12/28/2021 13:53:51 - INFO - codeparrot_training - Step 42637: {'lr': 2.6461244732539497e-05, 'samples': 21830656, 'steps': 42637, 'batch_loss/train': 0.7606656635180116} +12/28/2021 13:54:01 - INFO - codeparrot_training - Step 42638: {'lr': 2.6454187114975493e-05, 'samples': 21831168, 'steps': 42638, 'batch_loss/train': 0.6665282784961164} +12/28/2021 13:54:14 - INFO - codeparrot_training - Step 42639: {'lr': 2.644713038614527e-05, 'samples': 21831680, 'steps': 42639, 'batch_loss/train': 0.764296853041742} +12/28/2021 13:54:24 - INFO - codeparrot_training - Step 42640: {'lr': 2.6440074546076747e-05, 'samples': 21832192, 'steps': 42640, 'batch_loss/train': 0.4732684787886683} +12/28/2021 13:54:35 - INFO - codeparrot_training - Step 42641: {'lr': 2.643301959479813e-05, 'samples': 21832704, 'steps': 42641, 'batch_loss/train': 0.6583535165991634} +12/28/2021 13:54:49 - INFO - codeparrot_training - Step 42642: {'lr': 2.6425965532337448e-05, 'samples': 21833216, 'steps': 42642, 'batch_loss/train': 0.6509473692858592} +12/28/2021 13:55:00 - INFO - codeparrot_training - Step 42643: {'lr': 2.6418912358722626e-05, 'samples': 21833728, 'steps': 42643, 'batch_loss/train': 0.7536401017569005} +12/28/2021 13:55:10 - INFO - codeparrot_training - Step 42644: {'lr': 2.641186007398183e-05, 'samples': 21834240, 'steps': 42644, 'batch_loss/train': 0.6629094027448446} +12/28/2021 13:55:22 - INFO - codeparrot_training - Step 42645: {'lr': 2.64048086781431e-05, 'samples': 21834752, 'steps': 42645, 'batch_loss/train': 0.5506430640816689} +12/28/2021 13:55:33 - INFO - codeparrot_training - Step 42646: {'lr': 2.6397758171234383e-05, 'samples': 21835264, 'steps': 42646, 'batch_loss/train': 0.6761122043244541} +12/28/2021 13:55:44 - INFO - codeparrot_training - Step 42647: {'lr': 2.639070855328371e-05, 'samples': 21835776, 'steps': 42647, 'batch_loss/train': 0.7262574099004269} +12/28/2021 13:55:54 - INFO - codeparrot_training - Step 42648: {'lr': 2.6383659824319228e-05, 'samples': 21836288, 'steps': 42648, 'batch_loss/train': 0.6504286681301892} +12/28/2021 13:56:08 - INFO - codeparrot_training - Step 42649: {'lr': 2.637661198436883e-05, 'samples': 21836800, 'steps': 42649, 'batch_loss/train': 0.6435676994733512} +12/28/2021 13:56:19 - INFO - codeparrot_training - Step 42650: {'lr': 2.6369565033460607e-05, 'samples': 21837312, 'steps': 42650, 'batch_loss/train': 0.7217610152438283} +12/28/2021 13:56:29 - INFO - codeparrot_training - Step 42651: {'lr': 2.6362518971622535e-05, 'samples': 21837824, 'steps': 42651, 'batch_loss/train': 0.5809395376127213} +12/28/2021 13:56:42 - INFO - codeparrot_training - Step 42652: {'lr': 2.6355473798882646e-05, 'samples': 21838336, 'steps': 42652, 'batch_loss/train': 0.6126077813096344} +12/28/2021 13:56:52 - INFO - codeparrot_training - Step 42653: {'lr': 2.6348429515268945e-05, 'samples': 21838848, 'steps': 42653, 'batch_loss/train': 0.5868668639159296} +12/28/2021 13:57:03 - INFO - codeparrot_training - Step 42654: {'lr': 2.6341386120809497e-05, 'samples': 21839360, 'steps': 42654, 'batch_loss/train': 0.7217547670006752} +12/28/2021 13:57:15 - INFO - codeparrot_training - Step 42655: {'lr': 2.6334343615532162e-05, 'samples': 21839872, 'steps': 42655, 'batch_loss/train': 0.6668973327614367} +12/28/2021 13:57:26 - INFO - codeparrot_training - Step 42656: {'lr': 2.6327301999465065e-05, 'samples': 21840384, 'steps': 42656, 'batch_loss/train': 0.7632475201971829} +12/28/2021 13:57:36 - INFO - codeparrot_training - Step 42657: {'lr': 2.6320261272636202e-05, 'samples': 21840896, 'steps': 42657, 'batch_loss/train': 0.6405709958635271} +12/28/2021 13:57:47 - INFO - codeparrot_training - Step 42658: {'lr': 2.631322143507342e-05, 'samples': 21841408, 'steps': 42658, 'batch_loss/train': 0.646905355155468} +12/28/2021 13:57:59 - INFO - codeparrot_training - Step 42659: {'lr': 2.6306182486804915e-05, 'samples': 21841920, 'steps': 42659, 'batch_loss/train': 0.7268227366730571} +12/28/2021 13:58:10 - INFO - codeparrot_training - Step 42660: {'lr': 2.6299144427858524e-05, 'samples': 21842432, 'steps': 42660, 'batch_loss/train': 0.632962703704834} +12/28/2021 13:58:20 - INFO - codeparrot_training - Step 42661: {'lr': 2.6292107258262254e-05, 'samples': 21842944, 'steps': 42661, 'batch_loss/train': 0.6920384906115942} +12/28/2021 13:58:33 - INFO - codeparrot_training - Step 42662: {'lr': 2.6285070978044083e-05, 'samples': 21843456, 'steps': 42662, 'batch_loss/train': 0.7482318715192378} +12/28/2021 13:58:43 - INFO - codeparrot_training - Step 42663: {'lr': 2.627803558723202e-05, 'samples': 21843968, 'steps': 42663, 'batch_loss/train': 0.6776120068971068} +12/28/2021 13:58:54 - INFO - codeparrot_training - Step 42664: {'lr': 2.6271001085854036e-05, 'samples': 21844480, 'steps': 42664, 'batch_loss/train': 0.6937527982518077} +12/28/2021 13:59:08 - INFO - codeparrot_training - Step 42665: {'lr': 2.6263967473938083e-05, 'samples': 21844992, 'steps': 42665, 'batch_loss/train': 0.5958826458081603} +12/28/2021 13:59:18 - INFO - codeparrot_training - Step 42666: {'lr': 2.6256934751512028e-05, 'samples': 21845504, 'steps': 42666, 'batch_loss/train': 0.7347174407914281} +12/28/2021 13:59:29 - INFO - codeparrot_training - Step 42667: {'lr': 2.6249902918603992e-05, 'samples': 21846016, 'steps': 42667, 'batch_loss/train': 0.5774855406489223} +12/28/2021 13:59:41 - INFO - codeparrot_training - Step 42668: {'lr': 2.624287197524186e-05, 'samples': 21846528, 'steps': 42668, 'batch_loss/train': 0.734719910658896} +12/28/2021 13:59:52 - INFO - codeparrot_training - Step 42669: {'lr': 2.6235841921453536e-05, 'samples': 21847040, 'steps': 42669, 'batch_loss/train': 0.6459910857956856} +12/28/2021 14:00:03 - INFO - codeparrot_training - Step 42670: {'lr': 2.6228812757267016e-05, 'samples': 21847552, 'steps': 42670, 'batch_loss/train': 0.6158253633766435} +12/28/2021 14:00:13 - INFO - codeparrot_training - Step 42671: {'lr': 2.6221784482710314e-05, 'samples': 21848064, 'steps': 42671, 'batch_loss/train': 0.6897213058546185} +12/28/2021 14:00:27 - INFO - codeparrot_training - Step 42672: {'lr': 2.6214757097811266e-05, 'samples': 21848576, 'steps': 42672, 'batch_loss/train': 0.7105961139313877} +12/28/2021 14:00:38 - INFO - codeparrot_training - Step 42673: {'lr': 2.620773060259779e-05, 'samples': 21849088, 'steps': 42673, 'batch_loss/train': 0.8798572331434116} +12/28/2021 14:00:49 - INFO - codeparrot_training - Step 42674: {'lr': 2.620070499709795e-05, 'samples': 21849600, 'steps': 42674, 'batch_loss/train': 0.5753807719156612} +12/28/2021 14:01:01 - INFO - codeparrot_training - Step 42675: {'lr': 2.619368028133959e-05, 'samples': 21850112, 'steps': 42675, 'batch_loss/train': 0.7281739199534059} +12/28/2021 14:01:11 - INFO - codeparrot_training - Step 42676: {'lr': 2.6186656455350622e-05, 'samples': 21850624, 'steps': 42676, 'batch_loss/train': 0.7511681746691465} +12/28/2021 14:01:22 - INFO - codeparrot_training - Step 42677: {'lr': 2.617963351915903e-05, 'samples': 21851136, 'steps': 42677, 'batch_loss/train': 0.7124555300688371} +12/28/2021 14:01:34 - INFO - codeparrot_training - Step 42678: {'lr': 2.617261147279268e-05, 'samples': 21851648, 'steps': 42678, 'batch_loss/train': 0.7763481643050909} +12/28/2021 14:01:45 - INFO - codeparrot_training - Step 42679: {'lr': 2.616559031627952e-05, 'samples': 21852160, 'steps': 42679, 'batch_loss/train': 0.6218530816840939} +12/28/2021 14:01:55 - INFO - codeparrot_training - Step 42680: {'lr': 2.6158570049647446e-05, 'samples': 21852672, 'steps': 42680, 'batch_loss/train': 0.6712822439149022} +12/28/2021 14:02:09 - INFO - codeparrot_training - Step 42681: {'lr': 2.615155067292438e-05, 'samples': 21853184, 'steps': 42681, 'batch_loss/train': 0.655717562418431} +12/28/2021 14:02:20 - INFO - codeparrot_training - Step 42682: {'lr': 2.6144532186138242e-05, 'samples': 21853696, 'steps': 42682, 'batch_loss/train': 0.5959334959916305} +12/28/2021 14:02:31 - INFO - codeparrot_training - Step 42683: {'lr': 2.6137514589316956e-05, 'samples': 21854208, 'steps': 42683, 'batch_loss/train': 0.8341362401843071} +12/28/2021 14:02:41 - INFO - codeparrot_training - Step 42684: {'lr': 2.613049788248828e-05, 'samples': 21854720, 'steps': 42684, 'batch_loss/train': 0.7855358086526394} +12/28/2021 14:02:53 - INFO - codeparrot_training - Step 42685: {'lr': 2.6123482065680326e-05, 'samples': 21855232, 'steps': 42685, 'batch_loss/train': 0.6690132618532516} +12/28/2021 14:03:04 - INFO - codeparrot_training - Step 42686: {'lr': 2.6116467138920823e-05, 'samples': 21855744, 'steps': 42686, 'batch_loss/train': 0.7577284411527216} +12/28/2021 14:03:15 - INFO - codeparrot_training - Step 42687: {'lr': 2.6109453102237723e-05, 'samples': 21856256, 'steps': 42687, 'batch_loss/train': 0.6558839688077569} +12/28/2021 14:03:29 - INFO - codeparrot_training - Step 42688: {'lr': 2.610243995565889e-05, 'samples': 21856768, 'steps': 42688, 'batch_loss/train': 0.836928189266473} +12/28/2021 14:03:39 - INFO - codeparrot_training - Step 42689: {'lr': 2.609542769921222e-05, 'samples': 21857280, 'steps': 42689, 'batch_loss/train': 0.6446058531291783} +12/28/2021 14:03:50 - INFO - codeparrot_training - Step 42690: {'lr': 2.608841633292558e-05, 'samples': 21857792, 'steps': 42690, 'batch_loss/train': 0.6489140770863742} +12/28/2021 14:04:02 - INFO - codeparrot_training - Step 42691: {'lr': 2.6081405856826835e-05, 'samples': 21858304, 'steps': 42691, 'batch_loss/train': 0.6735691466019489} +12/28/2021 14:04:13 - INFO - codeparrot_training - Step 42692: {'lr': 2.607439627094388e-05, 'samples': 21858816, 'steps': 42692, 'batch_loss/train': 0.7180614592507482} +12/28/2021 14:04:24 - INFO - codeparrot_training - Step 42693: {'lr': 2.606738757530458e-05, 'samples': 21859328, 'steps': 42693, 'batch_loss/train': 0.6305480939336121} +12/28/2021 14:04:35 - INFO - codeparrot_training - Step 42694: {'lr': 2.606037976993686e-05, 'samples': 21859840, 'steps': 42694, 'batch_loss/train': 0.6423400822095573} +12/28/2021 14:04:46 - INFO - codeparrot_training - Step 42695: {'lr': 2.605337285486839e-05, 'samples': 21860352, 'steps': 42695, 'batch_loss/train': 0.7661818042397499} +12/28/2021 14:04:57 - INFO - codeparrot_training - Step 42696: {'lr': 2.6046366830127205e-05, 'samples': 21860864, 'steps': 42696, 'batch_loss/train': 0.6420432568993419} +12/28/2021 14:05:07 - INFO - codeparrot_training - Step 42697: {'lr': 2.603936169574117e-05, 'samples': 21861376, 'steps': 42697, 'batch_loss/train': 0.7010422325693071} +12/28/2021 14:05:20 - INFO - codeparrot_training - Step 42698: {'lr': 2.6032357451738013e-05, 'samples': 21861888, 'steps': 42698, 'batch_loss/train': 0.7938134018331766} +12/28/2021 14:05:30 - INFO - codeparrot_training - Step 42699: {'lr': 2.6025354098145576e-05, 'samples': 21862400, 'steps': 42699, 'batch_loss/train': 0.44332133073476143} +12/28/2021 14:05:41 - INFO - codeparrot_training - Step 42700: {'lr': 2.601835163499186e-05, 'samples': 21862912, 'steps': 42700, 'batch_loss/train': 0.6936666793189943} +12/28/2021 14:05:55 - INFO - codeparrot_training - Step 42701: {'lr': 2.601135006230457e-05, 'samples': 21863424, 'steps': 42701, 'batch_loss/train': 0.7132807960733771} +12/28/2021 14:06:06 - INFO - codeparrot_training - Step 42702: {'lr': 2.6004349380111563e-05, 'samples': 21863936, 'steps': 42702, 'batch_loss/train': 0.7249266132712364} +12/28/2021 14:06:16 - INFO - codeparrot_training - Step 42703: {'lr': 2.599734958844069e-05, 'samples': 21864448, 'steps': 42703, 'batch_loss/train': 0.6998108634725213} +12/28/2021 14:06:29 - INFO - codeparrot_training - Step 42704: {'lr': 2.599035068731978e-05, 'samples': 21864960, 'steps': 42704, 'batch_loss/train': 0.6226721687417012} +12/28/2021 14:06:39 - INFO - codeparrot_training - Step 42705: {'lr': 2.598335267677665e-05, 'samples': 21865472, 'steps': 42705, 'batch_loss/train': 0.6622943640686572} +12/28/2021 14:06:50 - INFO - codeparrot_training - Step 42706: {'lr': 2.5976355556839108e-05, 'samples': 21865984, 'steps': 42706, 'batch_loss/train': 0.7962149479426444} +12/28/2021 14:07:01 - INFO - codeparrot_training - Step 42707: {'lr': 2.5969359327535024e-05, 'samples': 21866496, 'steps': 42707, 'batch_loss/train': 0.6450060431379825} +12/28/2021 14:07:13 - INFO - codeparrot_training - Step 42708: {'lr': 2.596236398889215e-05, 'samples': 21867008, 'steps': 42708, 'batch_loss/train': 0.6783351209014654} +12/28/2021 14:07:24 - INFO - codeparrot_training - Step 42709: {'lr': 2.5955369540938385e-05, 'samples': 21867520, 'steps': 42709, 'batch_loss/train': 0.5752347942034248} +12/28/2021 14:07:34 - INFO - codeparrot_training - Step 42710: {'lr': 2.5948375983701366e-05, 'samples': 21868032, 'steps': 42710, 'batch_loss/train': 0.7420150395482779} +12/28/2021 14:07:48 - INFO - codeparrot_training - Step 42711: {'lr': 2.5941383317209104e-05, 'samples': 21868544, 'steps': 42711, 'batch_loss/train': 0.6711917980574071} +12/28/2021 14:07:59 - INFO - codeparrot_training - Step 42712: {'lr': 2.5934391541489243e-05, 'samples': 21869056, 'steps': 42712, 'batch_loss/train': 0.7332440417958423} +12/28/2021 14:08:09 - INFO - codeparrot_training - Step 42713: {'lr': 2.592740065656965e-05, 'samples': 21869568, 'steps': 42713, 'batch_loss/train': 0.7006997730350122} +12/28/2021 14:08:22 - INFO - codeparrot_training - Step 42714: {'lr': 2.5920410662478106e-05, 'samples': 21870080, 'steps': 42714, 'batch_loss/train': 0.6947369808331132} +12/28/2021 14:08:32 - INFO - codeparrot_training - Step 42715: {'lr': 2.5913421559242423e-05, 'samples': 21870592, 'steps': 42715, 'batch_loss/train': 0.6747318613342941} +12/28/2021 14:08:43 - INFO - codeparrot_training - Step 42716: {'lr': 2.590643334689033e-05, 'samples': 21871104, 'steps': 42716, 'batch_loss/train': 0.6451702192425728} +12/28/2021 14:08:53 - INFO - codeparrot_training - Step 42717: {'lr': 2.5899446025449665e-05, 'samples': 21871616, 'steps': 42717, 'batch_loss/train': 0.6613732678815722} +12/28/2021 14:09:08 - INFO - codeparrot_training - Step 42718: {'lr': 2.5892459594948154e-05, 'samples': 21872128, 'steps': 42718, 'batch_loss/train': 0.6830317909189034} +12/28/2021 14:09:18 - INFO - codeparrot_training - Step 42719: {'lr': 2.5885474055413638e-05, 'samples': 21872640, 'steps': 42719, 'batch_loss/train': 0.5512840462324675} +12/28/2021 14:09:29 - INFO - codeparrot_training - Step 42720: {'lr': 2.587848940687387e-05, 'samples': 21873152, 'steps': 42720, 'batch_loss/train': 0.6923006419092417} +12/28/2021 14:09:41 - INFO - codeparrot_training - Step 42721: {'lr': 2.587150564935653e-05, 'samples': 21873664, 'steps': 42721, 'batch_loss/train': 0.6877042753621936} +12/28/2021 14:09:51 - INFO - codeparrot_training - Step 42722: {'lr': 2.5864522782889503e-05, 'samples': 21874176, 'steps': 42722, 'batch_loss/train': 0.6821292047388852} +12/28/2021 14:10:02 - INFO - codeparrot_training - Step 42723: {'lr': 2.585754080750055e-05, 'samples': 21874688, 'steps': 42723, 'batch_loss/train': 0.7414089194498956} +12/28/2021 14:10:14 - INFO - codeparrot_training - Step 42724: {'lr': 2.5850559723217344e-05, 'samples': 21875200, 'steps': 42724, 'batch_loss/train': 0.7096676058135927} +12/28/2021 14:10:25 - INFO - codeparrot_training - Step 42725: {'lr': 2.5843579530067608e-05, 'samples': 21875712, 'steps': 42725, 'batch_loss/train': 0.7096436787396669} +12/28/2021 14:10:36 - INFO - codeparrot_training - Step 42726: {'lr': 2.5836600228079267e-05, 'samples': 21876224, 'steps': 42726, 'batch_loss/train': 0.7268537264317274} +12/28/2021 14:10:49 - INFO - codeparrot_training - Step 42727: {'lr': 2.5829621817279907e-05, 'samples': 21876736, 'steps': 42727, 'batch_loss/train': 0.6648406460299157} +12/28/2021 14:11:00 - INFO - codeparrot_training - Step 42728: {'lr': 2.5822644297697345e-05, 'samples': 21877248, 'steps': 42728, 'batch_loss/train': 0.8945381678640842} +12/28/2021 14:11:11 - INFO - codeparrot_training - Step 42729: {'lr': 2.5815667669359307e-05, 'samples': 21877760, 'steps': 42729, 'batch_loss/train': 0.671026018448174} +12/28/2021 14:11:21 - INFO - codeparrot_training - Step 42730: {'lr': 2.580869193229349e-05, 'samples': 21878272, 'steps': 42730, 'batch_loss/train': 0.684489656239748} +12/28/2021 14:11:34 - INFO - codeparrot_training - Step 42731: {'lr': 2.5801717086527703e-05, 'samples': 21878784, 'steps': 42731, 'batch_loss/train': 0.7406664853915572} +12/28/2021 14:11:44 - INFO - codeparrot_training - Step 42732: {'lr': 2.5794743132089597e-05, 'samples': 21879296, 'steps': 42732, 'batch_loss/train': 0.8505321433767676} +12/28/2021 14:11:55 - INFO - codeparrot_training - Step 42733: {'lr': 2.5787770069006956e-05, 'samples': 21879808, 'steps': 42733, 'batch_loss/train': 0.6728843036107719} +12/28/2021 14:12:07 - INFO - codeparrot_training - Step 42734: {'lr': 2.5780797897307474e-05, 'samples': 21880320, 'steps': 42734, 'batch_loss/train': 0.6487867531832308} +12/28/2021 14:12:18 - INFO - codeparrot_training - Step 42735: {'lr': 2.5773826617018936e-05, 'samples': 21880832, 'steps': 42735, 'batch_loss/train': 0.7023163144476712} +12/28/2021 14:12:29 - INFO - codeparrot_training - Step 42736: {'lr': 2.5766856228168906e-05, 'samples': 21881344, 'steps': 42736, 'batch_loss/train': 0.7395326755940914} +12/28/2021 14:12:41 - INFO - codeparrot_training - Step 42737: {'lr': 2.575988673078522e-05, 'samples': 21881856, 'steps': 42737, 'batch_loss/train': 0.8164555989205837} +12/28/2021 14:12:51 - INFO - codeparrot_training - Step 42738: {'lr': 2.5752918124895636e-05, 'samples': 21882368, 'steps': 42738, 'batch_loss/train': 0.7279195077717304} +12/28/2021 14:13:02 - INFO - codeparrot_training - Step 42739: {'lr': 2.5745950410527685e-05, 'samples': 21882880, 'steps': 42739, 'batch_loss/train': 0.8268435932695866} +12/28/2021 14:13:12 - INFO - codeparrot_training - Step 42740: {'lr': 2.573898358770918e-05, 'samples': 21883392, 'steps': 42740, 'batch_loss/train': 0.6816207598894835} +12/28/2021 14:13:26 - INFO - codeparrot_training - Step 42741: {'lr': 2.573201765646782e-05, 'samples': 21883904, 'steps': 42741, 'batch_loss/train': 0.6372739658690989} +12/28/2021 14:13:37 - INFO - codeparrot_training - Step 42742: {'lr': 2.5725052616831253e-05, 'samples': 21884416, 'steps': 42742, 'batch_loss/train': 0.7084128549322486} +12/28/2021 14:13:48 - INFO - codeparrot_training - Step 42743: {'lr': 2.57180884688272e-05, 'samples': 21884928, 'steps': 42743, 'batch_loss/train': 0.7360221417620778} +12/28/2021 14:14:00 - INFO - codeparrot_training - Step 42744: {'lr': 2.5711125212483338e-05, 'samples': 21885440, 'steps': 42744, 'batch_loss/train': 0.594686200201977} +12/28/2021 14:14:11 - INFO - codeparrot_training - Step 42745: {'lr': 2.570416284782734e-05, 'samples': 21885952, 'steps': 42745, 'batch_loss/train': 0.6182454983354546} +12/28/2021 14:14:21 - INFO - codeparrot_training - Step 42746: {'lr': 2.569720137488693e-05, 'samples': 21886464, 'steps': 42746, 'batch_loss/train': 0.7049994447734207} +12/28/2021 14:14:35 - INFO - codeparrot_training - Step 42747: {'lr': 2.569024079368973e-05, 'samples': 21886976, 'steps': 42747, 'batch_loss/train': 0.7046919353306293} +12/28/2021 14:14:46 - INFO - codeparrot_training - Step 42748: {'lr': 2.5683281104263433e-05, 'samples': 21887488, 'steps': 42748, 'batch_loss/train': 0.7549136239103973} +12/28/2021 14:14:56 - INFO - codeparrot_training - Step 42749: {'lr': 2.5676322306635773e-05, 'samples': 21888000, 'steps': 42749, 'batch_loss/train': 0.6928617982193828} +12/28/2021 14:15:08 - INFO - codeparrot_training - Step 42750: {'lr': 2.566936440083431e-05, 'samples': 21888512, 'steps': 42750, 'batch_loss/train': 0.7821282586082816} +12/28/2021 14:15:19 - INFO - codeparrot_training - Step 42751: {'lr': 2.566240738688669e-05, 'samples': 21889024, 'steps': 42751, 'batch_loss/train': 0.7011549919843674} +12/28/2021 14:15:30 - INFO - codeparrot_training - Step 42752: {'lr': 2.5655451264820746e-05, 'samples': 21889536, 'steps': 42752, 'batch_loss/train': 0.6487720826116856} +12/28/2021 14:15:40 - INFO - codeparrot_training - Step 42753: {'lr': 2.564849603466396e-05, 'samples': 21890048, 'steps': 42753, 'batch_loss/train': 0.7481093555688858} +12/28/2021 14:15:53 - INFO - codeparrot_training - Step 42754: {'lr': 2.5641541696444005e-05, 'samples': 21890560, 'steps': 42754, 'batch_loss/train': 0.7410822524689138} +12/28/2021 14:16:03 - INFO - codeparrot_training - Step 42755: {'lr': 2.5634588250188634e-05, 'samples': 21891072, 'steps': 42755, 'batch_loss/train': 0.7211113171651959} +12/28/2021 14:16:14 - INFO - codeparrot_training - Step 42756: {'lr': 2.5627635695925384e-05, 'samples': 21891584, 'steps': 42756, 'batch_loss/train': 0.6444102439563721} +12/28/2021 14:16:28 - INFO - codeparrot_training - Step 42757: {'lr': 2.562068403368195e-05, 'samples': 21892096, 'steps': 42757, 'batch_loss/train': 0.659648559987545} +12/28/2021 14:16:38 - INFO - codeparrot_training - Step 42758: {'lr': 2.5613733263485955e-05, 'samples': 21892608, 'steps': 42758, 'batch_loss/train': 0.6973035028204322} +12/28/2021 14:16:49 - INFO - codeparrot_training - Step 42759: {'lr': 2.5606783385365012e-05, 'samples': 21893120, 'steps': 42759, 'batch_loss/train': 0.6015217194799334} +12/28/2021 14:17:01 - INFO - codeparrot_training - Step 42760: {'lr': 2.5599834399346793e-05, 'samples': 21893632, 'steps': 42760, 'batch_loss/train': 0.9216554001905024} +12/28/2021 14:17:12 - INFO - codeparrot_training - Step 42761: {'lr': 2.5592886305458917e-05, 'samples': 21894144, 'steps': 42761, 'batch_loss/train': 1.5440112696960568} +12/28/2021 14:17:22 - INFO - codeparrot_training - Step 42762: {'lr': 2.5585939103728917e-05, 'samples': 21894656, 'steps': 42762, 'batch_loss/train': 0.76991150341928} +12/28/2021 14:17:33 - INFO - codeparrot_training - Step 42763: {'lr': 2.5578992794184547e-05, 'samples': 21895168, 'steps': 42763, 'batch_loss/train': 0.7136429056990892} +12/28/2021 14:17:47 - INFO - codeparrot_training - Step 42764: {'lr': 2.55720473768534e-05, 'samples': 21895680, 'steps': 42764, 'batch_loss/train': 0.6414009278523736} +12/28/2021 14:17:58 - INFO - codeparrot_training - Step 42765: {'lr': 2.5565102851763002e-05, 'samples': 21896192, 'steps': 42765, 'batch_loss/train': 0.640815129969269} +12/28/2021 14:18:08 - INFO - codeparrot_training - Step 42766: {'lr': 2.555815921894103e-05, 'samples': 21896704, 'steps': 42766, 'batch_loss/train': 0.7031587292440236} +12/28/2021 14:18:20 - INFO - codeparrot_training - Step 42767: {'lr': 2.5551216478415045e-05, 'samples': 21897216, 'steps': 42767, 'batch_loss/train': 0.6807758179493248} +12/28/2021 14:18:31 - INFO - codeparrot_training - Step 42768: {'lr': 2.5544274630212693e-05, 'samples': 21897728, 'steps': 42768, 'batch_loss/train': 0.7680954309180379} +12/28/2021 14:18:42 - INFO - codeparrot_training - Step 42769: {'lr': 2.5537333674361534e-05, 'samples': 21898240, 'steps': 42769, 'batch_loss/train': 0.6853818618692458} +12/28/2021 14:18:54 - INFO - codeparrot_training - Step 42770: {'lr': 2.5530393610889214e-05, 'samples': 21898752, 'steps': 42770, 'batch_loss/train': 0.692243620287627} +12/28/2021 14:19:04 - INFO - codeparrot_training - Step 42771: {'lr': 2.5523454439823262e-05, 'samples': 21899264, 'steps': 42771, 'batch_loss/train': 0.79577611759305} +12/28/2021 14:19:15 - INFO - codeparrot_training - Step 42772: {'lr': 2.55165161611913e-05, 'samples': 21899776, 'steps': 42772, 'batch_loss/train': 0.6576126329600811} +12/28/2021 14:19:26 - INFO - codeparrot_training - Step 42773: {'lr': 2.5509578775020914e-05, 'samples': 21900288, 'steps': 42773, 'batch_loss/train': 0.7357192793861032} +12/28/2021 14:19:38 - INFO - codeparrot_training - Step 42774: {'lr': 2.5502642281339693e-05, 'samples': 21900800, 'steps': 42774, 'batch_loss/train': 0.5522103044204414} +12/28/2021 14:19:49 - INFO - codeparrot_training - Step 42775: {'lr': 2.549570668017523e-05, 'samples': 21901312, 'steps': 42775, 'batch_loss/train': 0.7295858213328756} +12/28/2021 14:19:59 - INFO - codeparrot_training - Step 42776: {'lr': 2.5488771971555023e-05, 'samples': 21901824, 'steps': 42776, 'batch_loss/train': 0.7363025257363915} +12/28/2021 14:20:13 - INFO - codeparrot_training - Step 42777: {'lr': 2.548183815550664e-05, 'samples': 21902336, 'steps': 42777, 'batch_loss/train': 0.694617934525013} +12/28/2021 14:20:24 - INFO - codeparrot_training - Step 42778: {'lr': 2.5474905232057784e-05, 'samples': 21902848, 'steps': 42778, 'batch_loss/train': 0.6330925300717354} +12/28/2021 14:20:34 - INFO - codeparrot_training - Step 42779: {'lr': 2.5467973201235896e-05, 'samples': 21903360, 'steps': 42779, 'batch_loss/train': 0.7470101742073894} +12/28/2021 14:20:47 - INFO - codeparrot_training - Step 42780: {'lr': 2.5461042063068485e-05, 'samples': 21903872, 'steps': 42780, 'batch_loss/train': 0.7204806893132627} +12/28/2021 14:20:57 - INFO - codeparrot_training - Step 42781: {'lr': 2.545411181758331e-05, 'samples': 21904384, 'steps': 42781, 'batch_loss/train': 0.7517221737653017} +12/28/2021 14:21:08 - INFO - codeparrot_training - Step 42782: {'lr': 2.544718246480776e-05, 'samples': 21904896, 'steps': 42782, 'batch_loss/train': 0.7077350728213787} +12/28/2021 14:21:18 - INFO - codeparrot_training - Step 42783: {'lr': 2.5440254004769404e-05, 'samples': 21905408, 'steps': 42783, 'batch_loss/train': 0.7426349364686757} +12/28/2021 14:21:31 - INFO - codeparrot_training - Step 42784: {'lr': 2.5433326437495824e-05, 'samples': 21905920, 'steps': 42784, 'batch_loss/train': 0.6405955068767071} +12/28/2021 14:21:41 - INFO - codeparrot_training - Step 42785: {'lr': 2.5426399763014528e-05, 'samples': 21906432, 'steps': 42785, 'batch_loss/train': 0.5949881731648929} +12/28/2021 14:21:52 - INFO - codeparrot_training - Step 42786: {'lr': 2.541947398135308e-05, 'samples': 21906944, 'steps': 42786, 'batch_loss/train': 0.5637305612035561} +12/28/2021 14:22:06 - INFO - codeparrot_training - Step 42787: {'lr': 2.5412549092539066e-05, 'samples': 21907456, 'steps': 42787, 'batch_loss/train': 0.8043093690648675} +12/28/2021 14:22:17 - INFO - codeparrot_training - Step 42788: {'lr': 2.5405625096599853e-05, 'samples': 21907968, 'steps': 42788, 'batch_loss/train': 0.6525299986824393} +12/28/2021 14:22:27 - INFO - codeparrot_training - Step 42789: {'lr': 2.5398701993563115e-05, 'samples': 21908480, 'steps': 42789, 'batch_loss/train': 0.7140269577503204} +12/28/2021 14:22:39 - INFO - codeparrot_training - Step 42790: {'lr': 2.5391779783456388e-05, 'samples': 21908992, 'steps': 42790, 'batch_loss/train': 0.7434944584965706} +12/28/2021 14:22:50 - INFO - codeparrot_training - Step 42791: {'lr': 2.5384858466307003e-05, 'samples': 21909504, 'steps': 42791, 'batch_loss/train': 0.7174299934995361} +12/28/2021 14:23:01 - INFO - codeparrot_training - Step 42792: {'lr': 2.5377938042142695e-05, 'samples': 21910016, 'steps': 42792, 'batch_loss/train': 0.7593349497765303} +12/28/2021 14:23:13 - INFO - codeparrot_training - Step 42793: {'lr': 2.537101851099094e-05, 'samples': 21910528, 'steps': 42793, 'batch_loss/train': 0.6999371766578406} +12/28/2021 14:23:23 - INFO - codeparrot_training - Step 42794: {'lr': 2.536409987287913e-05, 'samples': 21911040, 'steps': 42794, 'batch_loss/train': 0.6934655061922967} +12/28/2021 14:23:34 - INFO - codeparrot_training - Step 42795: {'lr': 2.535718212783486e-05, 'samples': 21911552, 'steps': 42795, 'batch_loss/train': 0.7147474754601717} +12/28/2021 14:23:45 - INFO - codeparrot_training - Step 42796: {'lr': 2.5350265275885605e-05, 'samples': 21912064, 'steps': 42796, 'batch_loss/train': 0.5786329702241346} +12/28/2021 14:23:59 - INFO - codeparrot_training - Step 42797: {'lr': 2.5343349317058868e-05, 'samples': 21912576, 'steps': 42797, 'batch_loss/train': 0.7859419812448323} +12/28/2021 14:24:09 - INFO - codeparrot_training - Step 42798: {'lr': 2.533643425138213e-05, 'samples': 21913088, 'steps': 42798, 'batch_loss/train': 0.6968350079841912} +12/28/2021 14:24:20 - INFO - codeparrot_training - Step 42799: {'lr': 2.532952007888292e-05, 'samples': 21913600, 'steps': 42799, 'batch_loss/train': 0.5781924861948937} +12/28/2021 14:24:32 - INFO - codeparrot_training - Step 42800: {'lr': 2.5322606799588697e-05, 'samples': 21914112, 'steps': 42800, 'batch_loss/train': 0.6763314958661795} +12/28/2021 14:24:43 - INFO - codeparrot_training - Step 42801: {'lr': 2.5315694413527017e-05, 'samples': 21914624, 'steps': 42801, 'batch_loss/train': 0.6646315176039934} +12/28/2021 14:24:53 - INFO - codeparrot_training - Step 42802: {'lr': 2.530878292072525e-05, 'samples': 21915136, 'steps': 42802, 'batch_loss/train': 0.7928918190300465} +12/28/2021 14:25:07 - INFO - codeparrot_training - Step 42803: {'lr': 2.5301872321210868e-05, 'samples': 21915648, 'steps': 42803, 'batch_loss/train': 0.7807258386164904} +12/28/2021 14:25:18 - INFO - codeparrot_training - Step 42804: {'lr': 2.529496261501149e-05, 'samples': 21916160, 'steps': 42804, 'batch_loss/train': 0.7239598288433626} +12/28/2021 14:25:29 - INFO - codeparrot_training - Step 42805: {'lr': 2.5288053802154432e-05, 'samples': 21916672, 'steps': 42805, 'batch_loss/train': 0.4254409911809489} +12/28/2021 14:25:41 - INFO - codeparrot_training - Step 42806: {'lr': 2.5281145882667194e-05, 'samples': 21917184, 'steps': 42806, 'batch_loss/train': 0.6412277482450008} +12/28/2021 14:25:52 - INFO - codeparrot_training - Step 42807: {'lr': 2.527423885657737e-05, 'samples': 21917696, 'steps': 42807, 'batch_loss/train': 0.6783397980616428} +12/28/2021 14:26:03 - INFO - codeparrot_training - Step 42808: {'lr': 2.526733272391227e-05, 'samples': 21918208, 'steps': 42808, 'batch_loss/train': 0.6620555591362063} +12/28/2021 14:26:13 - INFO - codeparrot_training - Step 42809: {'lr': 2.5260427484699395e-05, 'samples': 21918720, 'steps': 42809, 'batch_loss/train': 0.6546889995224774} +12/28/2021 14:26:26 - INFO - codeparrot_training - Step 42810: {'lr': 2.52535231389662e-05, 'samples': 21919232, 'steps': 42810, 'batch_loss/train': 0.6171129299327731} +12/28/2021 14:26:36 - INFO - codeparrot_training - Step 42811: {'lr': 2.5246619686740136e-05, 'samples': 21919744, 'steps': 42811, 'batch_loss/train': 0.6654891101643443} +12/28/2021 14:26:47 - INFO - codeparrot_training - Step 42812: {'lr': 2.523971712804865e-05, 'samples': 21920256, 'steps': 42812, 'batch_loss/train': 0.5938084699737374} +12/28/2021 14:26:59 - INFO - codeparrot_training - Step 42813: {'lr': 2.523281546291925e-05, 'samples': 21920768, 'steps': 42813, 'batch_loss/train': 0.7413388143759221} +12/28/2021 14:27:10 - INFO - codeparrot_training - Step 42814: {'lr': 2.522591469137919e-05, 'samples': 21921280, 'steps': 42814, 'batch_loss/train': 1.0359428878873587} +12/28/2021 14:27:20 - INFO - codeparrot_training - Step 42815: {'lr': 2.5219014813456065e-05, 'samples': 21921792, 'steps': 42815, 'batch_loss/train': 0.6838598009198904} +12/28/2021 14:27:34 - INFO - codeparrot_training - Step 42816: {'lr': 2.5212115829177316e-05, 'samples': 21922304, 'steps': 42816, 'batch_loss/train': 0.7425763597711921} +12/28/2021 14:27:45 - INFO - codeparrot_training - Step 42817: {'lr': 2.5205217738570236e-05, 'samples': 21922816, 'steps': 42817, 'batch_loss/train': 0.6282291430979967} +12/28/2021 14:27:56 - INFO - codeparrot_training - Step 42818: {'lr': 2.519832054166235e-05, 'samples': 21923328, 'steps': 42818, 'batch_loss/train': 0.759013238362968} +12/28/2021 14:28:06 - INFO - codeparrot_training - Step 42819: {'lr': 2.5191424238481143e-05, 'samples': 21923840, 'steps': 42819, 'batch_loss/train': 0.7557497806847095} +12/28/2021 14:28:19 - INFO - codeparrot_training - Step 42820: {'lr': 2.5184528829053866e-05, 'samples': 21924352, 'steps': 42820, 'batch_loss/train': 0.6783348633907735} +12/28/2021 14:28:29 - INFO - codeparrot_training - Step 42821: {'lr': 2.5177634313408056e-05, 'samples': 21924864, 'steps': 42821, 'batch_loss/train': 0.5342044270655606} +12/28/2021 14:28:40 - INFO - codeparrot_training - Step 42822: {'lr': 2.5170740691571046e-05, 'samples': 21925376, 'steps': 42822, 'batch_loss/train': 0.803897196543403} +12/28/2021 14:28:52 - INFO - codeparrot_training - Step 42823: {'lr': 2.5163847963570298e-05, 'samples': 21925888, 'steps': 42823, 'batch_loss/train': 0.6911729069543071} +12/28/2021 14:29:03 - INFO - codeparrot_training - Step 42824: {'lr': 2.5156956129433197e-05, 'samples': 21926400, 'steps': 42824, 'batch_loss/train': 0.6198061590257566} +12/28/2021 14:29:13 - INFO - codeparrot_training - Step 42825: {'lr': 2.515006518918711e-05, 'samples': 21926912, 'steps': 42825, 'batch_loss/train': 0.6921493839472532} +12/28/2021 14:29:28 - INFO - codeparrot_training - Step 42826: {'lr': 2.514317514285949e-05, 'samples': 21927424, 'steps': 42826, 'batch_loss/train': 0.718626297544688} +12/28/2021 14:29:38 - INFO - codeparrot_training - Step 42827: {'lr': 2.513628599047771e-05, 'samples': 21927936, 'steps': 42827, 'batch_loss/train': 0.47944675784674473} +12/28/2021 14:29:49 - INFO - codeparrot_training - Step 42828: {'lr': 2.5129397732069152e-05, 'samples': 21928448, 'steps': 42828, 'batch_loss/train': 0.7290273830294609} +12/28/2021 14:29:59 - INFO - codeparrot_training - Step 42829: {'lr': 2.5122510367661166e-05, 'samples': 21928960, 'steps': 42829, 'batch_loss/train': 0.6491705002263188} +12/28/2021 14:30:12 - INFO - codeparrot_training - Step 42830: {'lr': 2.5115623897281253e-05, 'samples': 21929472, 'steps': 42830, 'batch_loss/train': 0.7589916801080108} +12/28/2021 14:30:22 - INFO - codeparrot_training - Step 42831: {'lr': 2.5108738320956643e-05, 'samples': 21929984, 'steps': 42831, 'batch_loss/train': 0.6460111024789512} +12/28/2021 14:30:33 - INFO - codeparrot_training - Step 42832: {'lr': 2.5101853638714727e-05, 'samples': 21930496, 'steps': 42832, 'batch_loss/train': 0.7435944303870201} +12/28/2021 14:30:47 - INFO - codeparrot_training - Step 42833: {'lr': 2.5094969850583016e-05, 'samples': 21931008, 'steps': 42833, 'batch_loss/train': 0.6946047199890018} +12/28/2021 14:30:57 - INFO - codeparrot_training - Step 42834: {'lr': 2.5088086956588764e-05, 'samples': 21931520, 'steps': 42834, 'batch_loss/train': 0.5463161512743682} +12/28/2021 14:31:08 - INFO - codeparrot_training - Step 42835: {'lr': 2.5081204956759307e-05, 'samples': 21932032, 'steps': 42835, 'batch_loss/train': 0.5818191077560186} +12/28/2021 14:31:20 - INFO - codeparrot_training - Step 42836: {'lr': 2.5074323851122098e-05, 'samples': 21932544, 'steps': 42836, 'batch_loss/train': 0.7145186886191368} +12/28/2021 14:31:31 - INFO - codeparrot_training - Step 42837: {'lr': 2.5067443639704423e-05, 'samples': 21933056, 'steps': 42837, 'batch_loss/train': 0.5675098836654797} +12/28/2021 14:31:41 - INFO - codeparrot_training - Step 42838: {'lr': 2.5060564322533673e-05, 'samples': 21933568, 'steps': 42838, 'batch_loss/train': 0.5507583320140839} +12/28/2021 14:31:53 - INFO - codeparrot_training - Step 42839: {'lr': 2.505368589963722e-05, 'samples': 21934080, 'steps': 42839, 'batch_loss/train': 0.6230059087392874} +12/28/2021 14:32:04 - INFO - codeparrot_training - Step 42840: {'lr': 2.5046808371042285e-05, 'samples': 21934592, 'steps': 42840, 'batch_loss/train': 0.6539830882102251} +12/28/2021 14:32:15 - INFO - codeparrot_training - Step 42841: {'lr': 2.503993173677635e-05, 'samples': 21935104, 'steps': 42841, 'batch_loss/train': 0.7238431451842189} +12/28/2021 14:32:25 - INFO - codeparrot_training - Step 42842: {'lr': 2.5033055996866756e-05, 'samples': 21935616, 'steps': 42842, 'batch_loss/train': 0.6003724434704054} +12/28/2021 14:32:38 - INFO - codeparrot_training - Step 42843: {'lr': 2.502618115134067e-05, 'samples': 21936128, 'steps': 42843, 'batch_loss/train': 0.6600670958869159} +12/28/2021 14:32:48 - INFO - codeparrot_training - Step 42844: {'lr': 2.5019307200225604e-05, 'samples': 21936640, 'steps': 42844, 'batch_loss/train': 0.7215003808960319} +12/28/2021 14:32:59 - INFO - codeparrot_training - Step 42845: {'lr': 2.5012434143548897e-05, 'samples': 21937152, 'steps': 42845, 'batch_loss/train': 0.5353306455072016} +12/28/2021 14:33:13 - INFO - codeparrot_training - Step 42846: {'lr': 2.5005561981337715e-05, 'samples': 21937664, 'steps': 42846, 'batch_loss/train': 0.7256592493504286} +12/28/2021 14:33:23 - INFO - codeparrot_training - Step 42847: {'lr': 2.499869071361949e-05, 'samples': 21938176, 'steps': 42847, 'batch_loss/train': 0.6334453218150884} +12/28/2021 14:33:34 - INFO - codeparrot_training - Step 42848: {'lr': 2.4991820340421496e-05, 'samples': 21938688, 'steps': 42848, 'batch_loss/train': 0.7250768393278122} +12/28/2021 14:33:46 - INFO - codeparrot_training - Step 42849: {'lr': 2.498495086177108e-05, 'samples': 21939200, 'steps': 42849, 'batch_loss/train': 0.6102676363661885} +12/28/2021 14:33:57 - INFO - codeparrot_training - Step 42850: {'lr': 2.4978082277695518e-05, 'samples': 21939712, 'steps': 42850, 'batch_loss/train': 0.6937053129076958} +12/28/2021 14:34:07 - INFO - codeparrot_training - Step 42851: {'lr': 2.497121458822213e-05, 'samples': 21940224, 'steps': 42851, 'batch_loss/train': 0.6893827985040843} +12/28/2021 14:34:18 - INFO - codeparrot_training - Step 42852: {'lr': 2.496434779337825e-05, 'samples': 21940736, 'steps': 42852, 'batch_loss/train': 0.8282265090383589} +12/28/2021 14:34:30 - INFO - codeparrot_training - Step 42853: {'lr': 2.4957481893191138e-05, 'samples': 21941248, 'steps': 42853, 'batch_loss/train': 0.6762414088007063} +12/28/2021 14:34:41 - INFO - codeparrot_training - Step 42854: {'lr': 2.4950616887688128e-05, 'samples': 21941760, 'steps': 42854, 'batch_loss/train': 0.6827224921435118} +12/28/2021 14:34:51 - INFO - codeparrot_training - Step 42855: {'lr': 2.494375277689645e-05, 'samples': 21942272, 'steps': 42855, 'batch_loss/train': 0.6704500191845} +12/28/2021 14:35:05 - INFO - codeparrot_training - Step 42856: {'lr': 2.49368895608435e-05, 'samples': 21942784, 'steps': 42856, 'batch_loss/train': 0.6936539728194475} +12/28/2021 14:35:16 - INFO - codeparrot_training - Step 42857: {'lr': 2.4930027239556453e-05, 'samples': 21943296, 'steps': 42857, 'batch_loss/train': 0.6632503694854677} +12/28/2021 14:35:27 - INFO - codeparrot_training - Step 42858: {'lr': 2.4923165813062586e-05, 'samples': 21943808, 'steps': 42858, 'batch_loss/train': 0.7816369347274303} +12/28/2021 14:35:39 - INFO - codeparrot_training - Step 42859: {'lr': 2.491630528138933e-05, 'samples': 21944320, 'steps': 42859, 'batch_loss/train': 0.6740704560652375} +12/28/2021 14:35:50 - INFO - codeparrot_training - Step 42860: {'lr': 2.4909445644563794e-05, 'samples': 21944832, 'steps': 42860, 'batch_loss/train': 0.7299590781331062} +12/28/2021 14:36:00 - INFO - codeparrot_training - Step 42861: {'lr': 2.490258690261332e-05, 'samples': 21945344, 'steps': 42861, 'batch_loss/train': 0.6960535527323373} +12/28/2021 14:36:14 - INFO - codeparrot_training - Step 42862: {'lr': 2.489572905556517e-05, 'samples': 21945856, 'steps': 42862, 'batch_loss/train': 0.6239629140764009} +12/28/2021 14:36:25 - INFO - codeparrot_training - Step 42863: {'lr': 2.488887210344659e-05, 'samples': 21946368, 'steps': 42863, 'batch_loss/train': 0.5276808957569301} +12/28/2021 14:36:35 - INFO - codeparrot_training - Step 42864: {'lr': 2.4882016046284872e-05, 'samples': 21946880, 'steps': 42864, 'batch_loss/train': 0.6689766675699502} +12/28/2021 14:36:46 - INFO - codeparrot_training - Step 42865: {'lr': 2.487516088410724e-05, 'samples': 21947392, 'steps': 42865, 'batch_loss/train': 0.696465949062258} +12/28/2021 14:36:58 - INFO - codeparrot_training - Step 42866: {'lr': 2.4868306616940977e-05, 'samples': 21947904, 'steps': 42866, 'batch_loss/train': 0.7425555000081658} +12/28/2021 14:37:09 - INFO - codeparrot_training - Step 42867: {'lr': 2.4861453244813315e-05, 'samples': 21948416, 'steps': 42867, 'batch_loss/train': 0.6640648851171136} +12/28/2021 14:37:19 - INFO - codeparrot_training - Step 42868: {'lr': 2.4854600767751562e-05, 'samples': 21948928, 'steps': 42868, 'batch_loss/train': 0.666034581779968} +12/28/2021 14:37:32 - INFO - codeparrot_training - Step 42869: {'lr': 2.484774918578281e-05, 'samples': 21949440, 'steps': 42869, 'batch_loss/train': 0.9163078740239143} +12/28/2021 14:37:42 - INFO - codeparrot_training - Step 42870: {'lr': 2.4840898498934423e-05, 'samples': 21949952, 'steps': 42870, 'batch_loss/train': 0.7225278849364258} +12/28/2021 14:37:53 - INFO - codeparrot_training - Step 42871: {'lr': 2.4834048707233657e-05, 'samples': 21950464, 'steps': 42871, 'batch_loss/train': 0.7016482655890286} +12/28/2021 14:38:07 - INFO - codeparrot_training - Step 42872: {'lr': 2.4827199810707664e-05, 'samples': 21950976, 'steps': 42872, 'batch_loss/train': 0.5205401700804941} +12/28/2021 14:38:18 - INFO - codeparrot_training - Step 42873: {'lr': 2.4820351809383633e-05, 'samples': 21951488, 'steps': 42873, 'batch_loss/train': 0.7500793114304543} +12/28/2021 14:38:28 - INFO - codeparrot_training - Step 42874: {'lr': 2.481350470328897e-05, 'samples': 21952000, 'steps': 42874, 'batch_loss/train': 0.7738585125189275} +12/28/2021 14:38:39 - INFO - codeparrot_training - Step 42875: {'lr': 2.4806658492450755e-05, 'samples': 21952512, 'steps': 42875, 'batch_loss/train': 0.6189043829217553} +12/28/2021 14:38:51 - INFO - codeparrot_training - Step 42876: {'lr': 2.4799813176896195e-05, 'samples': 21953024, 'steps': 42876, 'batch_loss/train': 0.7489926163107157} +12/28/2021 14:39:02 - INFO - codeparrot_training - Step 42877: {'lr': 2.4792968756652574e-05, 'samples': 21953536, 'steps': 42877, 'batch_loss/train': 0.6778360074386001} +12/28/2021 14:39:12 - INFO - codeparrot_training - Step 42878: {'lr': 2.4786125231747057e-05, 'samples': 21954048, 'steps': 42878, 'batch_loss/train': 0.6778342113830149} +12/28/2021 14:39:25 - INFO - codeparrot_training - Step 42879: {'lr': 2.4779282602206882e-05, 'samples': 21954560, 'steps': 42879, 'batch_loss/train': 0.6453069183044136} +12/28/2021 14:39:35 - INFO - codeparrot_training - Step 42880: {'lr': 2.4772440868059242e-05, 'samples': 21955072, 'steps': 42880, 'batch_loss/train': 0.7372348327189684} +12/28/2021 14:39:46 - INFO - codeparrot_training - Step 42881: {'lr': 2.4765600029331313e-05, 'samples': 21955584, 'steps': 42881, 'batch_loss/train': 0.7964602924475912} +12/28/2021 14:39:58 - INFO - codeparrot_training - Step 42882: {'lr': 2.4758760086050353e-05, 'samples': 21956096, 'steps': 42882, 'batch_loss/train': 0.6845827656798065} +12/28/2021 14:40:09 - INFO - codeparrot_training - Step 42883: {'lr': 2.475192103824353e-05, 'samples': 21956608, 'steps': 42883, 'batch_loss/train': 0.6654734909534454} +12/28/2021 14:40:19 - INFO - codeparrot_training - Step 42884: {'lr': 2.4745082885937935e-05, 'samples': 21957120, 'steps': 42884, 'batch_loss/train': 0.7208289038389921} +12/28/2021 14:40:33 - INFO - codeparrot_training - Step 42885: {'lr': 2.473824562916091e-05, 'samples': 21957632, 'steps': 42885, 'batch_loss/train': 0.6737242843955755} +12/28/2021 14:40:44 - INFO - codeparrot_training - Step 42886: {'lr': 2.4731409267939543e-05, 'samples': 21958144, 'steps': 42886, 'batch_loss/train': 0.8079843977466226} +12/28/2021 14:40:54 - INFO - codeparrot_training - Step 42887: {'lr': 2.4724573802301032e-05, 'samples': 21958656, 'steps': 42887, 'batch_loss/train': 0.6118491585366428} +12/28/2021 14:41:05 - INFO - codeparrot_training - Step 42888: {'lr': 2.471773923227255e-05, 'samples': 21959168, 'steps': 42888, 'batch_loss/train': 0.7072059424826875} +12/28/2021 14:41:17 - INFO - codeparrot_training - Step 42889: {'lr': 2.4710905557881274e-05, 'samples': 21959680, 'steps': 42889, 'batch_loss/train': 0.7179166376590729} +12/28/2021 14:41:28 - INFO - codeparrot_training - Step 42890: {'lr': 2.4704072779154373e-05, 'samples': 21960192, 'steps': 42890, 'batch_loss/train': 0.8112422528211027} +12/28/2021 14:41:38 - INFO - codeparrot_training - Step 42891: {'lr': 2.4697240896118995e-05, 'samples': 21960704, 'steps': 42891, 'batch_loss/train': 0.6932994364760816} +12/28/2021 14:41:51 - INFO - codeparrot_training - Step 42892: {'lr': 2.4690409908802335e-05, 'samples': 21961216, 'steps': 42892, 'batch_loss/train': 0.6944080349057913} +12/28/2021 14:42:01 - INFO - codeparrot_training - Step 42893: {'lr': 2.4683579817231544e-05, 'samples': 21961728, 'steps': 42893, 'batch_loss/train': 0.6393844303674996} +12/28/2021 14:42:12 - INFO - codeparrot_training - Step 42894: {'lr': 2.4676750621433793e-05, 'samples': 21962240, 'steps': 42894, 'batch_loss/train': 0.7361234896816313} +12/28/2021 14:42:26 - INFO - codeparrot_training - Step 42895: {'lr': 2.4669922321436084e-05, 'samples': 21962752, 'steps': 42895, 'batch_loss/train': 0.6437515001744032} +12/28/2021 14:42:37 - INFO - codeparrot_training - Step 42896: {'lr': 2.466309491726576e-05, 'samples': 21963264, 'steps': 42896, 'batch_loss/train': 0.7167964857071638} +12/28/2021 14:42:47 - INFO - codeparrot_training - Step 42897: {'lr': 2.465626840894994e-05, 'samples': 21963776, 'steps': 42897, 'batch_loss/train': 0.6593714961200021} +12/28/2021 14:42:58 - INFO - codeparrot_training - Step 42898: {'lr': 2.4649442796515655e-05, 'samples': 21964288, 'steps': 42898, 'batch_loss/train': 0.6590352016501129} +12/28/2021 14:43:10 - INFO - codeparrot_training - Step 42899: {'lr': 2.4642618079990047e-05, 'samples': 21964800, 'steps': 42899, 'batch_loss/train': 0.6818867707625031} +12/28/2021 14:43:21 - INFO - codeparrot_training - Step 42900: {'lr': 2.463579425940038e-05, 'samples': 21965312, 'steps': 42900, 'batch_loss/train': 0.6405623502796516} +12/28/2021 14:43:31 - INFO - codeparrot_training - Step 42901: {'lr': 2.4628971334773653e-05, 'samples': 21965824, 'steps': 42901, 'batch_loss/train': 0.6640281956642866} +12/28/2021 14:43:45 - INFO - codeparrot_training - Step 42902: {'lr': 2.462214930613707e-05, 'samples': 21966336, 'steps': 42902, 'batch_loss/train': 0.7069559432566166} +12/28/2021 14:43:56 - INFO - codeparrot_training - Step 42903: {'lr': 2.4615328173517692e-05, 'samples': 21966848, 'steps': 42903, 'batch_loss/train': 0.6705914009362459} +12/28/2021 14:44:07 - INFO - codeparrot_training - Step 42904: {'lr': 2.4608507936942665e-05, 'samples': 21967360, 'steps': 42904, 'batch_loss/train': 0.7076231817482039} +12/28/2021 14:44:19 - INFO - codeparrot_training - Step 42905: {'lr': 2.460168859643913e-05, 'samples': 21967872, 'steps': 42905, 'batch_loss/train': 0.7553669568151236} +12/28/2021 14:44:30 - INFO - codeparrot_training - Step 42906: {'lr': 2.4594870152034156e-05, 'samples': 21968384, 'steps': 42906, 'batch_loss/train': 0.5939631969667971} +12/28/2021 14:44:40 - INFO - codeparrot_training - Step 42907: {'lr': 2.4588052603754878e-05, 'samples': 21968896, 'steps': 42907, 'batch_loss/train': 0.6999860443174839} +12/28/2021 14:44:52 - INFO - codeparrot_training - Step 42908: {'lr': 2.4581235951628394e-05, 'samples': 21969408, 'steps': 42908, 'batch_loss/train': 0.7559133642353117} +12/28/2021 14:45:04 - INFO - codeparrot_training - Step 42909: {'lr': 2.4574420195681847e-05, 'samples': 21969920, 'steps': 42909, 'batch_loss/train': 0.698761741630733} +12/28/2021 14:45:14 - INFO - codeparrot_training - Step 42910: {'lr': 2.4567605335942183e-05, 'samples': 21970432, 'steps': 42910, 'batch_loss/train': 0.6120567484758794} +12/28/2021 14:45:25 - INFO - codeparrot_training - Step 42911: {'lr': 2.4560791372436693e-05, 'samples': 21970944, 'steps': 42911, 'batch_loss/train': 0.6621054589631967} +12/28/2021 14:45:39 - INFO - codeparrot_training - Step 42912: {'lr': 2.455397830519235e-05, 'samples': 21971456, 'steps': 42912, 'batch_loss/train': 0.8168098246096633} +12/28/2021 14:45:50 - INFO - codeparrot_training - Step 42913: {'lr': 2.454716613423624e-05, 'samples': 21971968, 'steps': 42913, 'batch_loss/train': 0.716584493406117} +12/28/2021 14:46:00 - INFO - codeparrot_training - Step 42914: {'lr': 2.4540354859595466e-05, 'samples': 21972480, 'steps': 42914, 'batch_loss/train': 0.6817581849172711} +12/28/2021 14:46:12 - INFO - codeparrot_training - Step 42915: {'lr': 2.4533544481297133e-05, 'samples': 21972992, 'steps': 42915, 'batch_loss/train': 0.8888501385226846} +12/28/2021 14:46:23 - INFO - codeparrot_training - Step 42916: {'lr': 2.452673499936828e-05, 'samples': 21973504, 'steps': 42916, 'batch_loss/train': 0.6284152949228883} +12/28/2021 14:46:34 - INFO - codeparrot_training - Step 42917: {'lr': 2.451992641383599e-05, 'samples': 21974016, 'steps': 42917, 'batch_loss/train': 0.7843422621954232} +12/28/2021 14:46:44 - INFO - codeparrot_training - Step 42918: {'lr': 2.4513118724727363e-05, 'samples': 21974528, 'steps': 42918, 'batch_loss/train': 0.6309530581347644} +12/28/2021 14:46:57 - INFO - codeparrot_training - Step 42919: {'lr': 2.4506311932069397e-05, 'samples': 21975040, 'steps': 42919, 'batch_loss/train': 0.7111790790222585} +12/28/2021 14:47:07 - INFO - codeparrot_training - Step 42920: {'lr': 2.449950603588927e-05, 'samples': 21975552, 'steps': 42920, 'batch_loss/train': 0.67299650516361} +12/28/2021 14:47:18 - INFO - codeparrot_training - Step 42921: {'lr': 2.4492701036213843e-05, 'samples': 21976064, 'steps': 42921, 'batch_loss/train': 0.5042471377528273} +12/28/2021 14:47:30 - INFO - codeparrot_training - Step 42922: {'lr': 2.448589693307035e-05, 'samples': 21976576, 'steps': 42922, 'batch_loss/train': 0.6798484059982002} +12/28/2021 14:47:40 - INFO - codeparrot_training - Step 42923: {'lr': 2.4479093726485825e-05, 'samples': 21977088, 'steps': 42923, 'batch_loss/train': 0.6396222955081612} +12/28/2021 14:47:51 - INFO - codeparrot_training - Step 42924: {'lr': 2.4472291416487213e-05, 'samples': 21977600, 'steps': 42924, 'batch_loss/train': 0.7002434376627207} +12/28/2021 14:48:05 - INFO - codeparrot_training - Step 42925: {'lr': 2.446549000310158e-05, 'samples': 21978112, 'steps': 42925, 'batch_loss/train': 0.7542052504140884} +12/28/2021 14:48:16 - INFO - codeparrot_training - Step 42926: {'lr': 2.44586894863561e-05, 'samples': 21978624, 'steps': 42926, 'batch_loss/train': 0.7706100754439831} +12/28/2021 14:48:26 - INFO - codeparrot_training - Step 42927: {'lr': 2.445188986627764e-05, 'samples': 21979136, 'steps': 42927, 'batch_loss/train': 0.6813106233894359} +12/28/2021 14:48:38 - INFO - codeparrot_training - Step 42928: {'lr': 2.4445091142893313e-05, 'samples': 21979648, 'steps': 42928, 'batch_loss/train': 0.7603783532977104} +12/28/2021 14:48:49 - INFO - codeparrot_training - Step 42929: {'lr': 2.4438293316230125e-05, 'samples': 21980160, 'steps': 42929, 'batch_loss/train': 0.7112086699344218} +12/28/2021 14:49:00 - INFO - codeparrot_training - Step 42930: {'lr': 2.4431496386315143e-05, 'samples': 21980672, 'steps': 42930, 'batch_loss/train': 0.6996079073287547} +12/28/2021 14:49:10 - INFO - codeparrot_training - Step 42931: {'lr': 2.4424700353175344e-05, 'samples': 21981184, 'steps': 42931, 'batch_loss/train': 0.5855051049147733} +12/28/2021 14:49:24 - INFO - codeparrot_training - Step 42932: {'lr': 2.441790521683776e-05, 'samples': 21981696, 'steps': 42932, 'batch_loss/train': 0.5795045546838082} +12/28/2021 14:49:35 - INFO - codeparrot_training - Step 42933: {'lr': 2.4411110977329398e-05, 'samples': 21982208, 'steps': 42933, 'batch_loss/train': 0.7990483939647675} +12/28/2021 14:49:46 - INFO - codeparrot_training - Step 42934: {'lr': 2.440431763467729e-05, 'samples': 21982720, 'steps': 42934, 'batch_loss/train': 0.7265011421404779} +12/28/2021 14:49:58 - INFO - codeparrot_training - Step 42935: {'lr': 2.4397525188908503e-05, 'samples': 21983232, 'steps': 42935, 'batch_loss/train': 0.7257592556998134} +12/28/2021 14:50:08 - INFO - codeparrot_training - Step 42936: {'lr': 2.4390733640049846e-05, 'samples': 21983744, 'steps': 42936, 'batch_loss/train': 0.6007625595084392} +12/28/2021 14:50:19 - INFO - codeparrot_training - Step 42937: {'lr': 2.4383942988128572e-05, 'samples': 21984256, 'steps': 42937, 'batch_loss/train': 0.7851564618758857} +12/28/2021 14:50:31 - INFO - codeparrot_training - Step 42938: {'lr': 2.4377153233171466e-05, 'samples': 21984768, 'steps': 42938, 'batch_loss/train': 0.7219605622813106} +12/28/2021 14:50:42 - INFO - codeparrot_training - Step 42939: {'lr': 2.4370364375205568e-05, 'samples': 21985280, 'steps': 42939, 'batch_loss/train': 0.7515822381246835} +12/28/2021 14:50:53 - INFO - codeparrot_training - Step 42940: {'lr': 2.4363576414258017e-05, 'samples': 21985792, 'steps': 42940, 'batch_loss/train': 2.1174383223988116} +12/28/2021 14:51:03 - INFO - codeparrot_training - Step 42941: {'lr': 2.4356789350355653e-05, 'samples': 21986304, 'steps': 42941, 'batch_loss/train': 0.7385994717478752} +12/28/2021 14:51:17 - INFO - codeparrot_training - Step 42942: {'lr': 2.4350003183525454e-05, 'samples': 21986816, 'steps': 42942, 'batch_loss/train': 0.6648600553162396} +12/28/2021 14:51:28 - INFO - codeparrot_training - Step 42943: {'lr': 2.4343217913794486e-05, 'samples': 21987328, 'steps': 42943, 'batch_loss/train': 0.6760694617405534} +12/28/2021 14:51:39 - INFO - codeparrot_training - Step 42944: {'lr': 2.433643354118964e-05, 'samples': 21987840, 'steps': 42944, 'batch_loss/train': 0.6627284134738147} +12/28/2021 14:51:51 - INFO - codeparrot_training - Step 42945: {'lr': 2.432965006573795e-05, 'samples': 21988352, 'steps': 42945, 'batch_loss/train': 0.7267396077513695} +12/28/2021 14:52:01 - INFO - codeparrot_training - Step 42946: {'lr': 2.4322867487466398e-05, 'samples': 21988864, 'steps': 42946, 'batch_loss/train': 0.6660306504927576} +12/28/2021 14:52:12 - INFO - codeparrot_training - Step 42947: {'lr': 2.4316085806401822e-05, 'samples': 21989376, 'steps': 42947, 'batch_loss/train': 0.6039609722211026} +12/28/2021 14:52:26 - INFO - codeparrot_training - Step 42948: {'lr': 2.430930502257134e-05, 'samples': 21989888, 'steps': 42948, 'batch_loss/train': 0.6560165181290358} +12/28/2021 14:52:37 - INFO - codeparrot_training - Step 42949: {'lr': 2.4302525136001868e-05, 'samples': 21990400, 'steps': 42949, 'batch_loss/train': 0.7097772872075438} +12/28/2021 14:52:47 - INFO - codeparrot_training - Step 42950: {'lr': 2.4295746146720283e-05, 'samples': 21990912, 'steps': 42950, 'batch_loss/train': 1.5288089541718364} +12/28/2021 14:52:58 - INFO - codeparrot_training - Step 42951: {'lr': 2.4288968054753558e-05, 'samples': 21991424, 'steps': 42951, 'batch_loss/train': 0.6773628629744053} +12/28/2021 14:53:10 - INFO - codeparrot_training - Step 42952: {'lr': 2.4282190860128756e-05, 'samples': 21991936, 'steps': 42952, 'batch_loss/train': 0.9283945113420486} +12/28/2021 14:53:21 - INFO - codeparrot_training - Step 42953: {'lr': 2.427541456287269e-05, 'samples': 21992448, 'steps': 42953, 'batch_loss/train': 0.6807875689119101} +12/28/2021 14:53:32 - INFO - codeparrot_training - Step 42954: {'lr': 2.4268639163012307e-05, 'samples': 21992960, 'steps': 42954, 'batch_loss/train': 0.5759236401645467} +12/28/2021 14:53:44 - INFO - codeparrot_training - Step 42955: {'lr': 2.4261864660574647e-05, 'samples': 21993472, 'steps': 42955, 'batch_loss/train': 0.753335440531373} +12/28/2021 14:53:54 - INFO - codeparrot_training - Step 42956: {'lr': 2.425509105558657e-05, 'samples': 21993984, 'steps': 42956, 'batch_loss/train': 0.6944567620521411} +12/28/2021 14:54:05 - INFO - codeparrot_training - Step 42957: {'lr': 2.4248318348074978e-05, 'samples': 21994496, 'steps': 42957, 'batch_loss/train': 0.6890019420534372} +12/28/2021 14:54:19 - INFO - codeparrot_training - Step 42958: {'lr': 2.4241546538066845e-05, 'samples': 21995008, 'steps': 42958, 'batch_loss/train': 0.74009492341429} +12/28/2021 14:54:30 - INFO - codeparrot_training - Step 42959: {'lr': 2.4234775625589094e-05, 'samples': 21995520, 'steps': 42959, 'batch_loss/train': 0.8160582715063356} +12/28/2021 14:54:40 - INFO - codeparrot_training - Step 42960: {'lr': 2.4228005610668623e-05, 'samples': 21996032, 'steps': 42960, 'batch_loss/train': 0.7377775115892291} +12/28/2021 14:54:51 - INFO - codeparrot_training - Step 42961: {'lr': 2.422123649333238e-05, 'samples': 21996544, 'steps': 42961, 'batch_loss/train': 0.535505961481249} +12/28/2021 14:55:03 - INFO - codeparrot_training - Step 42962: {'lr': 2.4214468273607182e-05, 'samples': 21997056, 'steps': 42962, 'batch_loss/train': 0.6226595503976569} +12/28/2021 14:55:14 - INFO - codeparrot_training - Step 42963: {'lr': 2.4207700951520057e-05, 'samples': 21997568, 'steps': 42963, 'batch_loss/train': 0.6811142894439399} +12/28/2021 14:55:24 - INFO - codeparrot_training - Step 42964: {'lr': 2.4200934527097873e-05, 'samples': 21998080, 'steps': 42964, 'batch_loss/train': 0.7454754309728742} +12/28/2021 14:55:36 - INFO - codeparrot_training - Step 42965: {'lr': 2.4194169000367444e-05, 'samples': 21998592, 'steps': 42965, 'batch_loss/train': 0.6850551227107644} +12/28/2021 14:55:47 - INFO - codeparrot_training - Step 42966: {'lr': 2.418740437135583e-05, 'samples': 21999104, 'steps': 42966, 'batch_loss/train': 0.760031028650701} +12/28/2021 14:55:58 - INFO - codeparrot_training - Step 42967: {'lr': 2.4180640640089764e-05, 'samples': 21999616, 'steps': 42967, 'batch_loss/train': 0.7684129485860467} +12/28/2021 14:56:10 - INFO - codeparrot_training - Step 42968: {'lr': 2.4173877806596244e-05, 'samples': 22000128, 'steps': 42968, 'batch_loss/train': 0.7243165830150247} +12/28/2021 14:56:20 - INFO - codeparrot_training - Step 42969: {'lr': 2.416711587090209e-05, 'samples': 22000640, 'steps': 42969, 'batch_loss/train': 0.7321455222554505} +12/28/2021 14:56:31 - INFO - codeparrot_training - Step 42970: {'lr': 2.416035483303422e-05, 'samples': 22001152, 'steps': 42970, 'batch_loss/train': 0.6379069746471941} +12/28/2021 14:56:45 - INFO - codeparrot_training - Step 42971: {'lr': 2.4153594693019503e-05, 'samples': 22001664, 'steps': 42971, 'batch_loss/train': 0.7666100949281827} +12/28/2021 14:56:56 - INFO - codeparrot_training - Step 42972: {'lr': 2.4146835450884863e-05, 'samples': 22002176, 'steps': 42972, 'batch_loss/train': 0.7153888386674225} +12/28/2021 14:57:06 - INFO - codeparrot_training - Step 42973: {'lr': 2.4140077106657027e-05, 'samples': 22002688, 'steps': 42973, 'batch_loss/train': 0.7216653837822378} +12/28/2021 14:57:17 - INFO - codeparrot_training - Step 42974: {'lr': 2.4133319660363e-05, 'samples': 22003200, 'steps': 42974, 'batch_loss/train': 0.6298899282701313} +12/28/2021 14:57:29 - INFO - codeparrot_training - Step 42975: {'lr': 2.412656311202968e-05, 'samples': 22003712, 'steps': 42975, 'batch_loss/train': 0.7708557429723442} +12/28/2021 14:57:40 - INFO - codeparrot_training - Step 42976: {'lr': 2.411980746168377e-05, 'samples': 22004224, 'steps': 42976, 'batch_loss/train': 0.6659022428211756} +12/28/2021 14:57:51 - INFO - codeparrot_training - Step 42977: {'lr': 2.411305270935224e-05, 'samples': 22004736, 'steps': 42977, 'batch_loss/train': 0.6861565592698753} +12/28/2021 14:58:04 - INFO - codeparrot_training - Step 42978: {'lr': 2.410629885506199e-05, 'samples': 22005248, 'steps': 42978, 'batch_loss/train': 0.7543231868185103} +12/28/2021 14:58:15 - INFO - codeparrot_training - Step 42979: {'lr': 2.4099545898839754e-05, 'samples': 22005760, 'steps': 42979, 'batch_loss/train': 0.66950568347238} +12/28/2021 14:58:26 - INFO - codeparrot_training - Step 42980: {'lr': 2.4092793840712362e-05, 'samples': 22006272, 'steps': 42980, 'batch_loss/train': 0.6458720522059593} +12/28/2021 14:58:38 - INFO - codeparrot_training - Step 42981: {'lr': 2.4086042680706828e-05, 'samples': 22006784, 'steps': 42981, 'batch_loss/train': 0.7241249540820718} +12/28/2021 14:58:48 - INFO - codeparrot_training - Step 42982: {'lr': 2.407929241884982e-05, 'samples': 22007296, 'steps': 42982, 'batch_loss/train': 0.6549115055240691} +12/28/2021 14:58:59 - INFO - codeparrot_training - Step 42983: {'lr': 2.407254305516826e-05, 'samples': 22007808, 'steps': 42983, 'batch_loss/train': 0.4795214480836876} +12/28/2021 14:59:10 - INFO - codeparrot_training - Step 42984: {'lr': 2.4065794589688965e-05, 'samples': 22008320, 'steps': 42984, 'batch_loss/train': 0.7902352577075362} +12/28/2021 14:59:22 - INFO - codeparrot_training - Step 42985: {'lr': 2.4059047022438746e-05, 'samples': 22008832, 'steps': 42985, 'batch_loss/train': 0.6688861957518384} +12/28/2021 14:59:33 - INFO - codeparrot_training - Step 42986: {'lr': 2.4052300353444467e-05, 'samples': 22009344, 'steps': 42986, 'batch_loss/train': 0.6711880983784795} +12/28/2021 14:59:43 - INFO - codeparrot_training - Step 42987: {'lr': 2.404555458273294e-05, 'samples': 22009856, 'steps': 42987, 'batch_loss/train': 0.677357692271471} +12/28/2021 14:59:58 - INFO - codeparrot_training - Step 42988: {'lr': 2.40388097103309e-05, 'samples': 22010368, 'steps': 42988, 'batch_loss/train': 0.6918208310380578} +12/28/2021 15:00:08 - INFO - codeparrot_training - Step 42989: {'lr': 2.403206573626529e-05, 'samples': 22010880, 'steps': 42989, 'batch_loss/train': 0.6074634953401983} +12/28/2021 15:00:19 - INFO - codeparrot_training - Step 42990: {'lr': 2.4025322660562898e-05, 'samples': 22011392, 'steps': 42990, 'batch_loss/train': 0.6663519633002579} +12/28/2021 15:00:31 - INFO - codeparrot_training - Step 42991: {'lr': 2.4018580483250395e-05, 'samples': 22011904, 'steps': 42991, 'batch_loss/train': 0.5479464341478888} +12/28/2021 15:00:42 - INFO - codeparrot_training - Step 42992: {'lr': 2.4011839204354792e-05, 'samples': 22012416, 'steps': 42992, 'batch_loss/train': 0.7663297429680824} +12/28/2021 15:00:52 - INFO - codeparrot_training - Step 42993: {'lr': 2.4005098823902754e-05, 'samples': 22012928, 'steps': 42993, 'batch_loss/train': 0.6546267946250737} +12/28/2021 15:01:03 - INFO - codeparrot_training - Step 42994: {'lr': 2.39983593419211e-05, 'samples': 22013440, 'steps': 42994, 'batch_loss/train': 0.7508767800172791} +12/28/2021 15:01:15 - INFO - codeparrot_training - Step 42995: {'lr': 2.3991620758436612e-05, 'samples': 22013952, 'steps': 42995, 'batch_loss/train': 0.817843590863049} +12/28/2021 15:01:26 - INFO - codeparrot_training - Step 42996: {'lr': 2.3984883073476125e-05, 'samples': 22014464, 'steps': 42996, 'batch_loss/train': 0.7042088070884347} +12/28/2021 15:01:36 - INFO - codeparrot_training - Step 42997: {'lr': 2.3978146287066404e-05, 'samples': 22014976, 'steps': 42997, 'batch_loss/train': 0.7129263540264219} +12/28/2021 15:01:48 - INFO - codeparrot_training - Step 42998: {'lr': 2.3971410399234227e-05, 'samples': 22015488, 'steps': 42998, 'batch_loss/train': 0.6563752321526408} +12/28/2021 15:01:59 - INFO - codeparrot_training - Step 42999: {'lr': 2.3964675410006382e-05, 'samples': 22016000, 'steps': 42999, 'batch_loss/train': 0.894891794770956} +12/28/2021 15:02:10 - INFO - codeparrot_training - Step 43000: {'lr': 2.395794131940962e-05, 'samples': 22016512, 'steps': 43000, 'batch_loss/train': 0.6422113627195358} +12/28/2021 15:02:24 - INFO - codeparrot_training - Step 43001: {'lr': 2.3951208127470785e-05, 'samples': 22017024, 'steps': 43001, 'batch_loss/train': 0.6196160560066346} +12/28/2021 15:02:34 - INFO - codeparrot_training - Step 43002: {'lr': 2.394447583421652e-05, 'samples': 22017536, 'steps': 43002, 'batch_loss/train': 0.599912095349282} +12/28/2021 15:02:45 - INFO - codeparrot_training - Step 43003: {'lr': 2.3937744439673698e-05, 'samples': 22018048, 'steps': 43003, 'batch_loss/train': 0.6794987358152866} +12/28/2021 15:02:56 - INFO - codeparrot_training - Step 43004: {'lr': 2.3931013943869095e-05, 'samples': 22018560, 'steps': 43004, 'batch_loss/train': 0.6495391696225852} +12/28/2021 15:03:08 - INFO - codeparrot_training - Step 43005: {'lr': 2.392428434682939e-05, 'samples': 22019072, 'steps': 43005, 'batch_loss/train': 0.6771103050559759} +12/28/2021 15:03:19 - INFO - codeparrot_training - Step 43006: {'lr': 2.391755564858128e-05, 'samples': 22019584, 'steps': 43006, 'batch_loss/train': 0.6347163263708353} +12/28/2021 15:03:29 - INFO - codeparrot_training - Step 43007: {'lr': 2.3910827849151745e-05, 'samples': 22020096, 'steps': 43007, 'batch_loss/train': 0.6433123294264078} +12/28/2021 15:03:43 - INFO - codeparrot_training - Step 43008: {'lr': 2.390410094856732e-05, 'samples': 22020608, 'steps': 43008, 'batch_loss/train': 0.6594062935328111} +12/28/2021 15:03:54 - INFO - codeparrot_training - Step 43009: {'lr': 2.3897374946854816e-05, 'samples': 22021120, 'steps': 43009, 'batch_loss/train': 0.6990340799093246} +12/28/2021 15:04:04 - INFO - codeparrot_training - Step 43010: {'lr': 2.389064984404096e-05, 'samples': 22021632, 'steps': 43010, 'batch_loss/train': 0.789112652419135} +12/28/2021 15:04:16 - INFO - codeparrot_training - Step 43011: {'lr': 2.388392564015254e-05, 'samples': 22022144, 'steps': 43011, 'batch_loss/train': 0.6803093729540706} +12/28/2021 15:04:27 - INFO - codeparrot_training - Step 43012: {'lr': 2.387720233521623e-05, 'samples': 22022656, 'steps': 43012, 'batch_loss/train': 0.696424062596634} +12/28/2021 15:04:38 - INFO - codeparrot_training - Step 43013: {'lr': 2.3870479929258783e-05, 'samples': 22023168, 'steps': 43013, 'batch_loss/train': 0.7283290214836597} +12/28/2021 15:04:48 - INFO - codeparrot_training - Step 43014: {'lr': 2.3863758422306953e-05, 'samples': 22023680, 'steps': 43014, 'batch_loss/train': 0.7005381910130382} +12/28/2021 15:05:01 - INFO - codeparrot_training - Step 43015: {'lr': 2.385703781438739e-05, 'samples': 22024192, 'steps': 43015, 'batch_loss/train': 0.7882659332826734} +12/28/2021 15:05:11 - INFO - codeparrot_training - Step 43016: {'lr': 2.385031810552693e-05, 'samples': 22024704, 'steps': 43016, 'batch_loss/train': 0.7187886275351048} +12/28/2021 15:05:22 - INFO - codeparrot_training - Step 43017: {'lr': 2.384359929575211e-05, 'samples': 22025216, 'steps': 43017, 'batch_loss/train': 0.74976238142699} +12/28/2021 15:05:36 - INFO - codeparrot_training - Step 43018: {'lr': 2.3836881385089855e-05, 'samples': 22025728, 'steps': 43018, 'batch_loss/train': 0.7666850257664919} +12/28/2021 15:05:46 - INFO - codeparrot_training - Step 43019: {'lr': 2.383016437356672e-05, 'samples': 22026240, 'steps': 43019, 'batch_loss/train': 0.6646334289689548} +12/28/2021 15:05:57 - INFO - codeparrot_training - Step 43020: {'lr': 2.382344826120944e-05, 'samples': 22026752, 'steps': 43020, 'batch_loss/train': 0.6901934309862554} +12/28/2021 15:06:09 - INFO - codeparrot_training - Step 43021: {'lr': 2.3816733048044714e-05, 'samples': 22027264, 'steps': 43021, 'batch_loss/train': 0.7548415772616863} +12/28/2021 15:06:20 - INFO - codeparrot_training - Step 43022: {'lr': 2.381001873409927e-05, 'samples': 22027776, 'steps': 43022, 'batch_loss/train': 0.657152867410332} +12/28/2021 15:06:30 - INFO - codeparrot_training - Step 43023: {'lr': 2.3803305319399783e-05, 'samples': 22028288, 'steps': 43023, 'batch_loss/train': 0.7246419964358211} +12/28/2021 15:06:44 - INFO - codeparrot_training - Step 43024: {'lr': 2.379659280397295e-05, 'samples': 22028800, 'steps': 43024, 'batch_loss/train': 0.6783698829822242} +12/28/2021 15:06:55 - INFO - codeparrot_training - Step 43025: {'lr': 2.378988118784545e-05, 'samples': 22029312, 'steps': 43025, 'batch_loss/train': 0.652981020975858} +12/28/2021 15:07:06 - INFO - codeparrot_training - Step 43026: {'lr': 2.3783170471043974e-05, 'samples': 22029824, 'steps': 43026, 'batch_loss/train': 0.7327755531296134} +12/28/2021 15:07:16 - INFO - codeparrot_training - Step 43027: {'lr': 2.3776460653595233e-05, 'samples': 22030336, 'steps': 43027, 'batch_loss/train': 0.6881623519584537} +12/28/2021 15:07:28 - INFO - codeparrot_training - Step 43028: {'lr': 2.3769751735525786e-05, 'samples': 22030848, 'steps': 43028, 'batch_loss/train': 0.6840994087979198} +12/28/2021 15:07:39 - INFO - codeparrot_training - Step 43029: {'lr': 2.3763043716862415e-05, 'samples': 22031360, 'steps': 43029, 'batch_loss/train': 0.6407831767573953} +12/28/2021 15:07:49 - INFO - codeparrot_training - Step 43030: {'lr': 2.375633659763182e-05, 'samples': 22031872, 'steps': 43030, 'batch_loss/train': 0.6730134662939236} +12/28/2021 15:08:02 - INFO - codeparrot_training - Step 43031: {'lr': 2.3749630377860543e-05, 'samples': 22032384, 'steps': 43031, 'batch_loss/train': 0.7972085243090987} +12/28/2021 15:08:12 - INFO - codeparrot_training - Step 43032: {'lr': 2.3742925057575277e-05, 'samples': 22032896, 'steps': 43032, 'batch_loss/train': 0.7583199202781543} +12/28/2021 15:08:23 - INFO - codeparrot_training - Step 43033: {'lr': 2.3736220636802808e-05, 'samples': 22033408, 'steps': 43033, 'batch_loss/train': 0.732898055575788} +12/28/2021 15:08:37 - INFO - codeparrot_training - Step 43034: {'lr': 2.3729517115569616e-05, 'samples': 22033920, 'steps': 43034, 'batch_loss/train': 0.682277322455775} +12/28/2021 15:08:47 - INFO - codeparrot_training - Step 43035: {'lr': 2.3722814493902456e-05, 'samples': 22034432, 'steps': 43035, 'batch_loss/train': 0.7773096463643014} +12/28/2021 15:08:58 - INFO - codeparrot_training - Step 43036: {'lr': 2.371611277182792e-05, 'samples': 22034944, 'steps': 43036, 'batch_loss/train': 0.678251585457474} +12/28/2021 15:09:10 - INFO - codeparrot_training - Step 43037: {'lr': 2.3709411949372705e-05, 'samples': 22035456, 'steps': 43037, 'batch_loss/train': 0.7052710661664605} +12/28/2021 15:09:21 - INFO - codeparrot_training - Step 43038: {'lr': 2.3702712026563406e-05, 'samples': 22035968, 'steps': 43038, 'batch_loss/train': 0.6119980430230498} +12/28/2021 15:09:31 - INFO - codeparrot_training - Step 43039: {'lr': 2.3696013003426665e-05, 'samples': 22036480, 'steps': 43039, 'batch_loss/train': 0.9815329303964972} +12/28/2021 15:09:42 - INFO - codeparrot_training - Step 43040: {'lr': 2.3689314879989155e-05, 'samples': 22036992, 'steps': 43040, 'batch_loss/train': 0.6948422640562057} +12/28/2021 15:09:54 - INFO - codeparrot_training - Step 43041: {'lr': 2.3682617656277467e-05, 'samples': 22037504, 'steps': 43041, 'batch_loss/train': 0.7802142389118671} +12/28/2021 15:10:05 - INFO - codeparrot_training - Step 43042: {'lr': 2.3675921332318275e-05, 'samples': 22038016, 'steps': 43042, 'batch_loss/train': 0.6042674724012613} +12/28/2021 15:10:15 - INFO - codeparrot_training - Step 43043: {'lr': 2.3669225908138082e-05, 'samples': 22038528, 'steps': 43043, 'batch_loss/train': 0.7024561929283664} +12/28/2021 15:10:28 - INFO - codeparrot_training - Step 43044: {'lr': 2.3662531383763646e-05, 'samples': 22039040, 'steps': 43044, 'batch_loss/train': 0.7754815146327019} +12/28/2021 15:10:38 - INFO - codeparrot_training - Step 43045: {'lr': 2.3655837759221532e-05, 'samples': 22039552, 'steps': 43045, 'batch_loss/train': 0.6948979897424579} +12/28/2021 15:10:49 - INFO - codeparrot_training - Step 43046: {'lr': 2.3649145034538328e-05, 'samples': 22040064, 'steps': 43046, 'batch_loss/train': 0.6709362021065317} +12/28/2021 15:11:03 - INFO - codeparrot_training - Step 43047: {'lr': 2.364245320974065e-05, 'samples': 22040576, 'steps': 43047, 'batch_loss/train': 0.6963208923116326} +12/28/2021 15:11:14 - INFO - codeparrot_training - Step 43048: {'lr': 2.3635762284855118e-05, 'samples': 22041088, 'steps': 43048, 'batch_loss/train': 0.6978125190362334} +12/28/2021 15:11:25 - INFO - codeparrot_training - Step 43049: {'lr': 2.362907225990829e-05, 'samples': 22041600, 'steps': 43049, 'batch_loss/train': 0.697171373758465} +12/28/2021 15:11:35 - INFO - codeparrot_training - Step 43050: {'lr': 2.362238313492682e-05, 'samples': 22042112, 'steps': 43050, 'batch_loss/train': 0.5926320839789696} +12/28/2021 15:11:47 - INFO - codeparrot_training - Step 43051: {'lr': 2.3615694909937285e-05, 'samples': 22042624, 'steps': 43051, 'batch_loss/train': 0.6423852929729037} +12/28/2021 15:11:58 - INFO - codeparrot_training - Step 43052: {'lr': 2.360900758496626e-05, 'samples': 22043136, 'steps': 43052, 'batch_loss/train': 0.6518335163127631} +12/28/2021 15:12:09 - INFO - codeparrot_training - Step 43053: {'lr': 2.3602321160040323e-05, 'samples': 22043648, 'steps': 43053, 'batch_loss/train': 0.8107540337368846} +12/28/2021 15:12:23 - INFO - codeparrot_training - Step 43054: {'lr': 2.35956356351861e-05, 'samples': 22044160, 'steps': 43054, 'batch_loss/train': 0.6704482232162263} +12/28/2021 15:12:34 - INFO - codeparrot_training - Step 43055: {'lr': 2.3588951010430126e-05, 'samples': 22044672, 'steps': 43055, 'batch_loss/train': 0.6249303827062249} +12/28/2021 15:12:45 - INFO - codeparrot_training - Step 43056: {'lr': 2.3582267285799043e-05, 'samples': 22045184, 'steps': 43056, 'batch_loss/train': 0.728389760479331} +12/28/2021 15:12:57 - INFO - codeparrot_training - Step 43057: {'lr': 2.357558446131933e-05, 'samples': 22045696, 'steps': 43057, 'batch_loss/train': 0.7365194436279126} +12/28/2021 15:13:07 - INFO - codeparrot_training - Step 43058: {'lr': 2.3568902537017546e-05, 'samples': 22046208, 'steps': 43058, 'batch_loss/train': 0.6967570437118411} +12/28/2021 15:13:18 - INFO - codeparrot_training - Step 43059: {'lr': 2.3562221512920395e-05, 'samples': 22046720, 'steps': 43059, 'batch_loss/train': 0.8126242216676474} +12/28/2021 15:13:30 - INFO - codeparrot_training - Step 43060: {'lr': 2.355554138905433e-05, 'samples': 22047232, 'steps': 43060, 'batch_loss/train': 0.7607578607276082} +12/28/2021 15:13:41 - INFO - codeparrot_training - Step 43061: {'lr': 2.354886216544591e-05, 'samples': 22047744, 'steps': 43061, 'batch_loss/train': 0.6311725690029562} +12/28/2021 15:13:52 - INFO - codeparrot_training - Step 43062: {'lr': 2.3542183842121728e-05, 'samples': 22048256, 'steps': 43062, 'batch_loss/train': 0.6791292398702353} +12/28/2021 15:14:02 - INFO - codeparrot_training - Step 43063: {'lr': 2.353550641910829e-05, 'samples': 22048768, 'steps': 43063, 'batch_loss/train': 0.5972941300133243} +12/28/2021 15:14:16 - INFO - codeparrot_training - Step 43064: {'lr': 2.352882989643218e-05, 'samples': 22049280, 'steps': 43064, 'batch_loss/train': 0.5573675425257534} +12/28/2021 15:14:27 - INFO - codeparrot_training - Step 43065: {'lr': 2.352215427411994e-05, 'samples': 22049792, 'steps': 43065, 'batch_loss/train': 0.6764320685470011} +12/28/2021 15:14:37 - INFO - codeparrot_training - Step 43066: {'lr': 2.35154795521981e-05, 'samples': 22050304, 'steps': 43066, 'batch_loss/train': 0.7294329938013107} +12/28/2021 15:14:49 - INFO - codeparrot_training - Step 43067: {'lr': 2.3508805730693166e-05, 'samples': 22050816, 'steps': 43067, 'batch_loss/train': 0.6956647569313645} +12/28/2021 15:15:00 - INFO - codeparrot_training - Step 43068: {'lr': 2.3502132809631788e-05, 'samples': 22051328, 'steps': 43068, 'batch_loss/train': 0.6914100074209273} +12/28/2021 15:15:11 - INFO - codeparrot_training - Step 43069: {'lr': 2.3495460789040274e-05, 'samples': 22051840, 'steps': 43069, 'batch_loss/train': 0.7424819958396256} +12/28/2021 15:15:23 - INFO - codeparrot_training - Step 43070: {'lr': 2.3488789668945355e-05, 'samples': 22052352, 'steps': 43070, 'batch_loss/train': 0.6771417623385787} +12/28/2021 15:15:34 - INFO - codeparrot_training - Step 43071: {'lr': 2.3482119449373535e-05, 'samples': 22052864, 'steps': 43071, 'batch_loss/train': 0.5859214519150555} +12/28/2021 15:15:44 - INFO - codeparrot_training - Step 43072: {'lr': 2.347545013035121e-05, 'samples': 22053376, 'steps': 43072, 'batch_loss/train': 0.6370870299870148} +12/28/2021 15:15:55 - INFO - codeparrot_training - Step 43073: {'lr': 2.3468781711904974e-05, 'samples': 22053888, 'steps': 43073, 'batch_loss/train': 0.7574109099805355} +12/28/2021 15:16:09 - INFO - codeparrot_training - Step 43074: {'lr': 2.346211419406133e-05, 'samples': 22054400, 'steps': 43074, 'batch_loss/train': 0.7108112481655553} +12/28/2021 15:16:19 - INFO - codeparrot_training - Step 43075: {'lr': 2.3455447576846782e-05, 'samples': 22054912, 'steps': 43075, 'batch_loss/train': 0.6863126400858164} +12/28/2021 15:16:30 - INFO - codeparrot_training - Step 43076: {'lr': 2.3448781860287816e-05, 'samples': 22055424, 'steps': 43076, 'batch_loss/train': 0.7908679554238915} +12/28/2021 15:16:42 - INFO - codeparrot_training - Step 43077: {'lr': 2.3442117044410988e-05, 'samples': 22055936, 'steps': 43077, 'batch_loss/train': 0.8164316127076745} +12/28/2021 15:16:53 - INFO - codeparrot_training - Step 43078: {'lr': 2.3435453129242724e-05, 'samples': 22056448, 'steps': 43078, 'batch_loss/train': 0.6740169818513095} +12/28/2021 15:17:03 - INFO - codeparrot_training - Step 43079: {'lr': 2.3428790114809557e-05, 'samples': 22056960, 'steps': 43079, 'batch_loss/train': 0.7383521939627826} +12/28/2021 15:17:16 - INFO - codeparrot_training - Step 43080: {'lr': 2.3422128001137993e-05, 'samples': 22057472, 'steps': 43080, 'batch_loss/train': 0.6943894512951374} +12/28/2021 15:17:27 - INFO - codeparrot_training - Step 43081: {'lr': 2.3415466788254458e-05, 'samples': 22057984, 'steps': 43081, 'batch_loss/train': 0.6195501768961549} +12/28/2021 15:17:38 - INFO - codeparrot_training - Step 43082: {'lr': 2.340880647618554e-05, 'samples': 22058496, 'steps': 43082, 'batch_loss/train': 0.7381236683577299} +12/28/2021 15:17:50 - INFO - codeparrot_training - Step 43083: {'lr': 2.3402147064957608e-05, 'samples': 22059008, 'steps': 43083, 'batch_loss/train': 0.7234829762019217} +12/28/2021 15:18:01 - INFO - codeparrot_training - Step 43084: {'lr': 2.339548855459714e-05, 'samples': 22059520, 'steps': 43084, 'batch_loss/train': 0.8365113250911236} +12/28/2021 15:18:11 - INFO - codeparrot_training - Step 43085: {'lr': 2.3388830945130724e-05, 'samples': 22060032, 'steps': 43085, 'batch_loss/train': 0.8004371467977762} +12/28/2021 15:18:22 - INFO - codeparrot_training - Step 43086: {'lr': 2.33821742365847e-05, 'samples': 22060544, 'steps': 43086, 'batch_loss/train': 0.7684024563059211} +12/28/2021 15:18:34 - INFO - codeparrot_training - Step 43087: {'lr': 2.3375518428985555e-05, 'samples': 22061056, 'steps': 43087, 'batch_loss/train': 0.7835501129738986} +12/28/2021 15:18:45 - INFO - codeparrot_training - Step 43088: {'lr': 2.336886352235984e-05, 'samples': 22061568, 'steps': 43088, 'batch_loss/train': 0.9234914910048246} +12/28/2021 15:18:55 - INFO - codeparrot_training - Step 43089: {'lr': 2.336220951673393e-05, 'samples': 22062080, 'steps': 43089, 'batch_loss/train': 0.7747994055971503} +12/28/2021 15:19:08 - INFO - codeparrot_training - Step 43090: {'lr': 2.33555564121343e-05, 'samples': 22062592, 'steps': 43090, 'batch_loss/train': 0.6916887951083481} +12/28/2021 15:19:18 - INFO - codeparrot_training - Step 43091: {'lr': 2.3348904208587403e-05, 'samples': 22063104, 'steps': 43091, 'batch_loss/train': 0.6475855011958629} +12/28/2021 15:19:29 - INFO - codeparrot_training - Step 43092: {'lr': 2.3342252906119686e-05, 'samples': 22063616, 'steps': 43092, 'batch_loss/train': 0.6655355719849467} +12/28/2021 15:19:43 - INFO - codeparrot_training - Step 43093: {'lr': 2.333560250475758e-05, 'samples': 22064128, 'steps': 43093, 'batch_loss/train': 0.7106264936737716} +12/28/2021 15:19:54 - INFO - codeparrot_training - Step 43094: {'lr': 2.332895300452756e-05, 'samples': 22064640, 'steps': 43094, 'batch_loss/train': 0.647575793787837} +12/28/2021 15:20:05 - INFO - codeparrot_training - Step 43095: {'lr': 2.3322304405455962e-05, 'samples': 22065152, 'steps': 43095, 'batch_loss/train': 0.6521237911656499} +12/28/2021 15:20:15 - INFO - codeparrot_training - Step 43096: {'lr': 2.3315656707569356e-05, 'samples': 22065664, 'steps': 43096, 'batch_loss/train': 0.6754862212110311} +12/28/2021 15:20:27 - INFO - codeparrot_training - Step 43097: {'lr': 2.330900991089413e-05, 'samples': 22066176, 'steps': 43097, 'batch_loss/train': 0.6893166624940932} +12/28/2021 15:20:38 - INFO - codeparrot_training - Step 43098: {'lr': 2.3302364015456627e-05, 'samples': 22066688, 'steps': 43098, 'batch_loss/train': 0.7079307828098536} +12/28/2021 15:20:49 - INFO - codeparrot_training - Step 43099: {'lr': 2.3295719021283302e-05, 'samples': 22067200, 'steps': 43099, 'batch_loss/train': 0.6915857857093215} +12/28/2021 15:21:01 - INFO - codeparrot_training - Step 43100: {'lr': 2.3289074928400683e-05, 'samples': 22067712, 'steps': 43100, 'batch_loss/train': 0.6359905041754246} +12/28/2021 15:21:11 - INFO - codeparrot_training - Step 43101: {'lr': 2.328243173683506e-05, 'samples': 22068224, 'steps': 43101, 'batch_loss/train': 0.7423398615792394} +12/28/2021 15:21:22 - INFO - codeparrot_training - Step 43102: {'lr': 2.327578944661288e-05, 'samples': 22068736, 'steps': 43102, 'batch_loss/train': 0.7067517009563744} +12/28/2021 15:21:36 - INFO - codeparrot_training - Step 43103: {'lr': 2.3269148057760537e-05, 'samples': 22069248, 'steps': 43103, 'batch_loss/train': 0.8157958053052425} +12/28/2021 15:21:47 - INFO - codeparrot_training - Step 43104: {'lr': 2.3262507570304487e-05, 'samples': 22069760, 'steps': 43104, 'batch_loss/train': 0.7248985578771681} +12/28/2021 15:21:57 - INFO - codeparrot_training - Step 43105: {'lr': 2.3255867984271067e-05, 'samples': 22070272, 'steps': 43105, 'batch_loss/train': 0.7054540645913221} +12/28/2021 15:22:09 - INFO - codeparrot_training - Step 43106: {'lr': 2.32492292996867e-05, 'samples': 22070784, 'steps': 43106, 'batch_loss/train': 0.7282456024549901} +12/28/2021 15:22:20 - INFO - codeparrot_training - Step 43107: {'lr': 2.3242591516577787e-05, 'samples': 22071296, 'steps': 43107, 'batch_loss/train': 0.7359017436392605} +12/28/2021 15:22:31 - INFO - codeparrot_training - Step 43108: {'lr': 2.3235954634970748e-05, 'samples': 22071808, 'steps': 43108, 'batch_loss/train': 0.8080026591196656} +12/28/2021 15:22:41 - INFO - codeparrot_training - Step 43109: {'lr': 2.322931865489189e-05, 'samples': 22072320, 'steps': 43109, 'batch_loss/train': 0.7214582883752882} +12/28/2021 15:22:55 - INFO - codeparrot_training - Step 43110: {'lr': 2.3222683576367588e-05, 'samples': 22072832, 'steps': 43110, 'batch_loss/train': 0.6372262947261333} +12/28/2021 15:23:06 - INFO - codeparrot_training - Step 43111: {'lr': 2.3216049399424373e-05, 'samples': 22073344, 'steps': 43111, 'batch_loss/train': 0.6157099662814289} +12/28/2021 15:23:16 - INFO - codeparrot_training - Step 43112: {'lr': 2.3209416124088472e-05, 'samples': 22073856, 'steps': 43112, 'batch_loss/train': 0.6735547501593828} +12/28/2021 15:23:29 - INFO - codeparrot_training - Step 43113: {'lr': 2.320278375038623e-05, 'samples': 22074368, 'steps': 43113, 'batch_loss/train': 0.6376667278818786} +12/28/2021 15:23:39 - INFO - codeparrot_training - Step 43114: {'lr': 2.319615227834418e-05, 'samples': 22074880, 'steps': 43114, 'batch_loss/train': 0.6451920890249312} +12/28/2021 15:23:50 - INFO - codeparrot_training - Step 43115: {'lr': 2.3189521707988548e-05, 'samples': 22075392, 'steps': 43115, 'batch_loss/train': 0.7125572999939322} +12/28/2021 15:24:02 - INFO - codeparrot_training - Step 43116: {'lr': 2.318289203934576e-05, 'samples': 22075904, 'steps': 43116, 'batch_loss/train': 0.7878445014357567} +12/28/2021 15:24:13 - INFO - codeparrot_training - Step 43117: {'lr': 2.3176263272442126e-05, 'samples': 22076416, 'steps': 43117, 'batch_loss/train': 0.7296596057713032} +12/28/2021 15:24:23 - INFO - codeparrot_training - Step 43118: {'lr': 2.3169635407304017e-05, 'samples': 22076928, 'steps': 43118, 'batch_loss/train': 0.6704115383327007} +12/28/2021 15:24:37 - INFO - codeparrot_training - Step 43119: {'lr': 2.316300844395783e-05, 'samples': 22077440, 'steps': 43119, 'batch_loss/train': 0.7471813107840717} +12/28/2021 15:24:48 - INFO - codeparrot_training - Step 43120: {'lr': 2.3156382382429868e-05, 'samples': 22077952, 'steps': 43120, 'batch_loss/train': 0.6262448341585696} +12/28/2021 15:24:58 - INFO - codeparrot_training - Step 43121: {'lr': 2.3149757222746425e-05, 'samples': 22078464, 'steps': 43121, 'batch_loss/train': 0.7701826076954603} +12/28/2021 15:25:09 - INFO - codeparrot_training - Step 43122: {'lr': 2.3143132964933923e-05, 'samples': 22078976, 'steps': 43122, 'batch_loss/train': 0.6645978097803891} +12/28/2021 15:25:21 - INFO - codeparrot_training - Step 43123: {'lr': 2.3136509609018697e-05, 'samples': 22079488, 'steps': 43123, 'batch_loss/train': 0.7395355748012662} +12/28/2021 15:25:32 - INFO - codeparrot_training - Step 43124: {'lr': 2.312988715502698e-05, 'samples': 22080000, 'steps': 43124, 'batch_loss/train': 0.6840144917368889} +12/28/2021 15:25:42 - INFO - codeparrot_training - Step 43125: {'lr': 2.3123265602985195e-05, 'samples': 22080512, 'steps': 43125, 'batch_loss/train': 0.755999656394124} +12/28/2021 15:25:55 - INFO - codeparrot_training - Step 43126: {'lr': 2.311664495291968e-05, 'samples': 22081024, 'steps': 43126, 'batch_loss/train': 0.7181067764759064} +12/28/2021 15:26:06 - INFO - codeparrot_training - Step 43127: {'lr': 2.3110025204856693e-05, 'samples': 22081536, 'steps': 43127, 'batch_loss/train': 0.7220814863685519} +12/28/2021 15:26:16 - INFO - codeparrot_training - Step 43128: {'lr': 2.3103406358822577e-05, 'samples': 22082048, 'steps': 43128, 'batch_loss/train': 0.40659166482510045} +12/28/2021 15:26:28 - INFO - codeparrot_training - Step 43129: {'lr': 2.3096788414843638e-05, 'samples': 22082560, 'steps': 43129, 'batch_loss/train': 0.6750170066952705} +12/28/2021 15:26:39 - INFO - codeparrot_training - Step 43130: {'lr': 2.3090171372946196e-05, 'samples': 22083072, 'steps': 43130, 'batch_loss/train': 0.7284466624259949} +12/28/2021 15:26:50 - INFO - codeparrot_training - Step 43131: {'lr': 2.308355523315653e-05, 'samples': 22083584, 'steps': 43131, 'batch_loss/train': 0.7102112622815184} +12/28/2021 15:27:00 - INFO - codeparrot_training - Step 43132: {'lr': 2.3076939995501007e-05, 'samples': 22084096, 'steps': 43132, 'batch_loss/train': 0.7094631213694811} +12/28/2021 15:27:14 - INFO - codeparrot_training - Step 43133: {'lr': 2.307032566000586e-05, 'samples': 22084608, 'steps': 43133, 'batch_loss/train': 0.7027429363224655} +12/28/2021 15:27:25 - INFO - codeparrot_training - Step 43134: {'lr': 2.3063712226697425e-05, 'samples': 22085120, 'steps': 43134, 'batch_loss/train': 0.807369664311409} +12/28/2021 15:27:35 - INFO - codeparrot_training - Step 43135: {'lr': 2.3057099695602017e-05, 'samples': 22085632, 'steps': 43135, 'batch_loss/train': 0.7572835590690374} +12/28/2021 15:27:48 - INFO - codeparrot_training - Step 43136: {'lr': 2.3050488066745813e-05, 'samples': 22086144, 'steps': 43136, 'batch_loss/train': 0.662425896152854} +12/28/2021 15:27:58 - INFO - codeparrot_training - Step 43137: {'lr': 2.3043877340155257e-05, 'samples': 22086656, 'steps': 43137, 'batch_loss/train': 0.5419895498198457} +12/28/2021 15:28:09 - INFO - codeparrot_training - Step 43138: {'lr': 2.3037267515856496e-05, 'samples': 22087168, 'steps': 43138, 'batch_loss/train': 0.7064165755291469} +12/28/2021 15:28:23 - INFO - codeparrot_training - Step 43139: {'lr': 2.3030658593875848e-05, 'samples': 22087680, 'steps': 43139, 'batch_loss/train': 0.7772517679259181} +12/28/2021 15:28:33 - INFO - codeparrot_training - Step 43140: {'lr': 2.3024050574239673e-05, 'samples': 22088192, 'steps': 43140, 'batch_loss/train': 0.6794301588088274} +12/28/2021 15:28:44 - INFO - codeparrot_training - Step 43141: {'lr': 2.3017443456974124e-05, 'samples': 22088704, 'steps': 43141, 'batch_loss/train': 0.6663486361503601} +12/28/2021 15:28:56 - INFO - codeparrot_training - Step 43142: {'lr': 2.3010837242105507e-05, 'samples': 22089216, 'steps': 43142, 'batch_loss/train': 0.6819377385545522} +12/28/2021 15:29:07 - INFO - codeparrot_training - Step 43143: {'lr': 2.3004231929660087e-05, 'samples': 22089728, 'steps': 43143, 'batch_loss/train': 0.7176671382039785} +12/28/2021 15:29:18 - INFO - codeparrot_training - Step 43144: {'lr': 2.2997627519664165e-05, 'samples': 22090240, 'steps': 43144, 'batch_loss/train': 0.7028111983090639} +12/28/2021 15:29:28 - INFO - codeparrot_training - Step 43145: {'lr': 2.2991024012143924e-05, 'samples': 22090752, 'steps': 43145, 'batch_loss/train': 0.7319592470303178} +12/28/2021 15:29:40 - INFO - codeparrot_training - Step 43146: {'lr': 2.2984421407125726e-05, 'samples': 22091264, 'steps': 43146, 'batch_loss/train': 0.6759917112067342} +12/28/2021 15:29:51 - INFO - codeparrot_training - Step 43147: {'lr': 2.2977819704635637e-05, 'samples': 22091776, 'steps': 43147, 'batch_loss/train': 0.7318739145994186} +12/28/2021 15:30:02 - INFO - codeparrot_training - Step 43148: {'lr': 2.2971218904700082e-05, 'samples': 22092288, 'steps': 43148, 'batch_loss/train': 0.84724150178954} +12/28/2021 15:30:16 - INFO - codeparrot_training - Step 43149: {'lr': 2.2964619007345282e-05, 'samples': 22092800, 'steps': 43149, 'batch_loss/train': 0.6486416340630967} +12/28/2021 15:30:26 - INFO - codeparrot_training - Step 43150: {'lr': 2.2958020012597336e-05, 'samples': 22093312, 'steps': 43150, 'batch_loss/train': 0.7218127655796707} +12/28/2021 15:30:37 - INFO - codeparrot_training - Step 43151: {'lr': 2.2951421920482636e-05, 'samples': 22093824, 'steps': 43151, 'batch_loss/train': 0.6296669850125909} +12/28/2021 15:30:48 - INFO - codeparrot_training - Step 43152: {'lr': 2.294482473102738e-05, 'samples': 22094336, 'steps': 43152, 'batch_loss/train': 1.4224592512473464} +12/28/2021 15:31:00 - INFO - codeparrot_training - Step 43153: {'lr': 2.293822844425772e-05, 'samples': 22094848, 'steps': 43153, 'batch_loss/train': 0.8083369699306786} +12/28/2021 15:31:11 - INFO - codeparrot_training - Step 43154: {'lr': 2.2931633060199935e-05, 'samples': 22095360, 'steps': 43154, 'batch_loss/train': 0.7627990879118443} +12/28/2021 15:31:21 - INFO - codeparrot_training - Step 43155: {'lr': 2.292503857888026e-05, 'samples': 22095872, 'steps': 43155, 'batch_loss/train': 0.6867809062823653} +12/28/2021 15:31:36 - INFO - codeparrot_training - Step 43156: {'lr': 2.291844500032486e-05, 'samples': 22096384, 'steps': 43156, 'batch_loss/train': 0.7198542454279959} +12/28/2021 15:31:46 - INFO - codeparrot_training - Step 43157: {'lr': 2.291185232456e-05, 'samples': 22096896, 'steps': 43157, 'batch_loss/train': 0.7461458649486303} +12/28/2021 15:31:57 - INFO - codeparrot_training - Step 43158: {'lr': 2.290526055161185e-05, 'samples': 22097408, 'steps': 43158, 'batch_loss/train': 0.9314841451123357} +12/28/2021 15:32:07 - INFO - codeparrot_training - Step 43159: {'lr': 2.289866968150664e-05, 'samples': 22097920, 'steps': 43159, 'batch_loss/train': 0.6858532793121412} +12/28/2021 15:32:19 - INFO - codeparrot_training - Step 43160: {'lr': 2.28920797142706e-05, 'samples': 22098432, 'steps': 43160, 'batch_loss/train': 0.8726948411203921} +12/28/2021 15:32:30 - INFO - codeparrot_training - Step 43161: {'lr': 2.288549064992987e-05, 'samples': 22098944, 'steps': 43161, 'batch_loss/train': 0.9170409636572003} +12/28/2021 15:32:41 - INFO - codeparrot_training - Step 43162: {'lr': 2.2878902488510685e-05, 'samples': 22099456, 'steps': 43162, 'batch_loss/train': 0.7611195277422667} +12/28/2021 15:32:53 - INFO - codeparrot_training - Step 43163: {'lr': 2.2872315230039243e-05, 'samples': 22099968, 'steps': 43163, 'batch_loss/train': 0.712560894433409} +12/28/2021 15:33:04 - INFO - codeparrot_training - Step 43164: {'lr': 2.286572887454169e-05, 'samples': 22100480, 'steps': 43164, 'batch_loss/train': 0.7107262202189304} +12/28/2021 15:33:14 - INFO - codeparrot_training - Step 43165: {'lr': 2.2859143422044205e-05, 'samples': 22100992, 'steps': 43165, 'batch_loss/train': 0.8078114492818713} +12/28/2021 15:33:26 - INFO - codeparrot_training - Step 43166: {'lr': 2.2852558872573092e-05, 'samples': 22101504, 'steps': 43166, 'batch_loss/train': 0.6376179392682388} +12/28/2021 15:33:37 - INFO - codeparrot_training - Step 43167: {'lr': 2.2845975226154364e-05, 'samples': 22102016, 'steps': 43167, 'batch_loss/train': 0.6246074677910656} +12/28/2021 15:33:48 - INFO - codeparrot_training - Step 43168: {'lr': 2.2839392482814304e-05, 'samples': 22102528, 'steps': 43168, 'batch_loss/train': 0.7547674803063273} +12/28/2021 15:34:01 - INFO - codeparrot_training - Step 43169: {'lr': 2.2832810642579028e-05, 'samples': 22103040, 'steps': 43169, 'batch_loss/train': 0.7092382605187595} +12/28/2021 15:34:12 - INFO - codeparrot_training - Step 43170: {'lr': 2.2826229705474712e-05, 'samples': 22103552, 'steps': 43170, 'batch_loss/train': 0.6799740819260478} +12/28/2021 15:34:22 - INFO - codeparrot_training - Step 43171: {'lr': 2.2819649671527526e-05, 'samples': 22104064, 'steps': 43171, 'batch_loss/train': 0.7666466697119176} +12/28/2021 15:34:33 - INFO - codeparrot_training - Step 43172: {'lr': 2.2813070540763704e-05, 'samples': 22104576, 'steps': 43172, 'batch_loss/train': 0.7980128787457943} +12/28/2021 15:34:45 - INFO - codeparrot_training - Step 43173: {'lr': 2.280649231320922e-05, 'samples': 22105088, 'steps': 43173, 'batch_loss/train': 0.6942748259752989} +12/28/2021 15:34:56 - INFO - codeparrot_training - Step 43174: {'lr': 2.279991498889039e-05, 'samples': 22105600, 'steps': 43174, 'batch_loss/train': 0.6961007695645094} +12/28/2021 15:35:06 - INFO - codeparrot_training - Step 43175: {'lr': 2.279333856783336e-05, 'samples': 22106112, 'steps': 43175, 'batch_loss/train': 0.7497346303425729} +12/28/2021 15:35:18 - INFO - codeparrot_training - Step 43176: {'lr': 2.2786763050064135e-05, 'samples': 22106624, 'steps': 43176, 'batch_loss/train': 0.7517734542489052} +12/28/2021 15:35:29 - INFO - codeparrot_training - Step 43177: {'lr': 2.2780188435609e-05, 'samples': 22107136, 'steps': 43177, 'batch_loss/train': 0.7852203836664557} +12/28/2021 15:35:40 - INFO - codeparrot_training - Step 43178: {'lr': 2.2773614724494073e-05, 'samples': 22107648, 'steps': 43178, 'batch_loss/train': 0.7454232652089559} +12/28/2021 15:35:54 - INFO - codeparrot_training - Step 43179: {'lr': 2.276704191674542e-05, 'samples': 22108160, 'steps': 43179, 'batch_loss/train': 0.7341366014443338} +12/28/2021 15:36:04 - INFO - codeparrot_training - Step 43180: {'lr': 2.276047001238915e-05, 'samples': 22108672, 'steps': 43180, 'batch_loss/train': 0.6698859492316842} +12/28/2021 15:36:15 - INFO - codeparrot_training - Step 43181: {'lr': 2.275389901145153e-05, 'samples': 22109184, 'steps': 43181, 'batch_loss/train': 0.6986425807699561} +12/28/2021 15:36:26 - INFO - codeparrot_training - Step 43182: {'lr': 2.2747328913958587e-05, 'samples': 22109696, 'steps': 43182, 'batch_loss/train': 0.6698526625987142} +12/28/2021 15:36:38 - INFO - codeparrot_training - Step 43183: {'lr': 2.2740759719936445e-05, 'samples': 22110208, 'steps': 43183, 'batch_loss/train': 1.0559787834063172} +12/28/2021 15:36:49 - INFO - codeparrot_training - Step 43184: {'lr': 2.2734191429411246e-05, 'samples': 22110720, 'steps': 43184, 'batch_loss/train': 0.9131473479792476} +12/28/2021 15:36:59 - INFO - codeparrot_training - Step 43185: {'lr': 2.272762404240905e-05, 'samples': 22111232, 'steps': 43185, 'batch_loss/train': 0.7253661500290036} +12/28/2021 15:37:13 - INFO - codeparrot_training - Step 43186: {'lr': 2.2721057558956036e-05, 'samples': 22111744, 'steps': 43186, 'batch_loss/train': 0.6796532515436411} +12/28/2021 15:37:24 - INFO - codeparrot_training - Step 43187: {'lr': 2.2714491979078295e-05, 'samples': 22112256, 'steps': 43187, 'batch_loss/train': 0.6887432671501301} +12/28/2021 15:37:35 - INFO - codeparrot_training - Step 43188: {'lr': 2.2707927302801885e-05, 'samples': 22112768, 'steps': 43188, 'batch_loss/train': 0.6478459275094792} +12/28/2021 15:37:47 - INFO - codeparrot_training - Step 43189: {'lr': 2.2701363530152953e-05, 'samples': 22113280, 'steps': 43189, 'batch_loss/train': 0.7117877127602696} +12/28/2021 15:37:57 - INFO - codeparrot_training - Step 43190: {'lr': 2.269480066115759e-05, 'samples': 22113792, 'steps': 43190, 'batch_loss/train': 0.6683075542096049} +12/28/2021 15:38:08 - INFO - codeparrot_training - Step 43191: {'lr': 2.26882386958418e-05, 'samples': 22114304, 'steps': 43191, 'batch_loss/train': 0.6771488017402589} +12/28/2021 15:38:19 - INFO - codeparrot_training - Step 43192: {'lr': 2.2681677634231844e-05, 'samples': 22114816, 'steps': 43192, 'batch_loss/train': 0.6998860640451312} +12/28/2021 15:38:31 - INFO - codeparrot_training - Step 43193: {'lr': 2.2675117476353646e-05, 'samples': 22115328, 'steps': 43193, 'batch_loss/train': 0.6989975366741419} +12/28/2021 15:38:41 - INFO - codeparrot_training - Step 43194: {'lr': 2.2668558222233375e-05, 'samples': 22115840, 'steps': 43194, 'batch_loss/train': 0.7175268763676286} +12/28/2021 15:38:52 - INFO - codeparrot_training - Step 43195: {'lr': 2.2661999871897042e-05, 'samples': 22116352, 'steps': 43195, 'batch_loss/train': 0.5902242232114077} +12/28/2021 15:39:06 - INFO - codeparrot_training - Step 43196: {'lr': 2.2655442425370793e-05, 'samples': 22116864, 'steps': 43196, 'batch_loss/train': 1.285088169388473} +12/28/2021 15:39:17 - INFO - codeparrot_training - Step 43197: {'lr': 2.2648885882680632e-05, 'samples': 22117376, 'steps': 43197, 'batch_loss/train': 0.6445663319900632} +12/28/2021 15:39:27 - INFO - codeparrot_training - Step 43198: {'lr': 2.2642330243852678e-05, 'samples': 22117888, 'steps': 43198, 'batch_loss/train': 0.6749034058302641} +12/28/2021 15:39:39 - INFO - codeparrot_training - Step 43199: {'lr': 2.2635775508912966e-05, 'samples': 22118400, 'steps': 43199, 'batch_loss/train': 0.6811514087021351} +12/28/2021 15:39:50 - INFO - codeparrot_training - Step 43200: {'lr': 2.262922167788753e-05, 'samples': 22118912, 'steps': 43200, 'batch_loss/train': 0.5614826739765704} +12/28/2021 15:40:01 - INFO - codeparrot_training - Step 43201: {'lr': 2.2622668750802546e-05, 'samples': 22119424, 'steps': 43201, 'batch_loss/train': 0.7196296658366919} +12/28/2021 15:40:13 - INFO - codeparrot_training - Step 43202: {'lr': 2.2616116727683876e-05, 'samples': 22119936, 'steps': 43202, 'batch_loss/train': 0.6342722952831537} +12/28/2021 15:40:23 - INFO - codeparrot_training - Step 43203: {'lr': 2.2609565608557704e-05, 'samples': 22120448, 'steps': 43203, 'batch_loss/train': 0.7318437066860497} +12/28/2021 15:40:34 - INFO - codeparrot_training - Step 43204: {'lr': 2.260301539345008e-05, 'samples': 22120960, 'steps': 43204, 'batch_loss/train': 0.5769135761074722} +12/28/2021 15:40:44 - INFO - codeparrot_training - Step 43205: {'lr': 2.2596466082386968e-05, 'samples': 22121472, 'steps': 43205, 'batch_loss/train': 0.6534170005470514} +12/28/2021 15:40:58 - INFO - codeparrot_training - Step 43206: {'lr': 2.2589917675394394e-05, 'samples': 22121984, 'steps': 43206, 'batch_loss/train': 0.6903828596696258} +12/28/2021 15:41:09 - INFO - codeparrot_training - Step 43207: {'lr': 2.2583370172498537e-05, 'samples': 22122496, 'steps': 43207, 'batch_loss/train': 0.6697103129699826} +12/28/2021 15:41:20 - INFO - codeparrot_training - Step 43208: {'lr': 2.2576823573725285e-05, 'samples': 22123008, 'steps': 43208, 'batch_loss/train': 0.7387136324541643} +12/28/2021 15:41:32 - INFO - codeparrot_training - Step 43209: {'lr': 2.257027787910071e-05, 'samples': 22123520, 'steps': 43209, 'batch_loss/train': 0.7042991383932531} +12/28/2021 15:41:42 - INFO - codeparrot_training - Step 43210: {'lr': 2.256373308865084e-05, 'samples': 22124032, 'steps': 43210, 'batch_loss/train': 0.6265113013796508} +12/28/2021 15:41:53 - INFO - codeparrot_training - Step 43211: {'lr': 2.2557189202401685e-05, 'samples': 22124544, 'steps': 43211, 'batch_loss/train': 0.7859168858267367} +12/28/2021 15:42:05 - INFO - codeparrot_training - Step 43212: {'lr': 2.2550646220379277e-05, 'samples': 22125056, 'steps': 43212, 'batch_loss/train': 0.5394850022275932} +12/28/2021 15:42:16 - INFO - codeparrot_training - Step 43213: {'lr': 2.2544104142609602e-05, 'samples': 22125568, 'steps': 43213, 'batch_loss/train': 0.6655761594884098} +12/28/2021 15:42:27 - INFO - codeparrot_training - Step 43214: {'lr': 2.2537562969118684e-05, 'samples': 22126080, 'steps': 43214, 'batch_loss/train': 0.7326232839841396} +12/28/2021 15:42:41 - INFO - codeparrot_training - Step 43215: {'lr': 2.2531022699932542e-05, 'samples': 22126592, 'steps': 43215, 'batch_loss/train': 0.7360204723663628} +12/28/2021 15:42:52 - INFO - codeparrot_training - Step 43216: {'lr': 2.25244833350772e-05, 'samples': 22127104, 'steps': 43216, 'batch_loss/train': 0.9027981921099126} +12/28/2021 15:43:02 - INFO - codeparrot_training - Step 43217: {'lr': 2.251794487457856e-05, 'samples': 22127616, 'steps': 43217, 'batch_loss/train': 0.7145407389616594} +12/28/2021 15:43:13 - INFO - codeparrot_training - Step 43218: {'lr': 2.251140731846274e-05, 'samples': 22128128, 'steps': 43218, 'batch_loss/train': 0.7406329791992903} +12/28/2021 15:43:25 - INFO - codeparrot_training - Step 43219: {'lr': 2.2504870666755605e-05, 'samples': 22128640, 'steps': 43219, 'batch_loss/train': 0.7365324823185802} +12/28/2021 15:43:36 - INFO - codeparrot_training - Step 43220: {'lr': 2.2498334919483247e-05, 'samples': 22129152, 'steps': 43220, 'batch_loss/train': 0.7001616570632905} +12/28/2021 15:43:46 - INFO - codeparrot_training - Step 43221: {'lr': 2.2491800076671593e-05, 'samples': 22129664, 'steps': 43221, 'batch_loss/train': 0.6447814349085093} +12/28/2021 15:43:58 - INFO - codeparrot_training - Step 43222: {'lr': 2.2485266138346616e-05, 'samples': 22130176, 'steps': 43222, 'batch_loss/train': 0.5987016583094373} +12/28/2021 15:44:09 - INFO - codeparrot_training - Step 43223: {'lr': 2.2478733104534327e-05, 'samples': 22130688, 'steps': 43223, 'batch_loss/train': 0.6675793016329408} +12/28/2021 15:44:20 - INFO - codeparrot_training - Step 43224: {'lr': 2.2472200975260703e-05, 'samples': 22131200, 'steps': 43224, 'batch_loss/train': 0.7708332027541474} +12/28/2021 15:44:34 - INFO - codeparrot_training - Step 43225: {'lr': 2.246566975055167e-05, 'samples': 22131712, 'steps': 43225, 'batch_loss/train': 0.6740316440118477} +12/28/2021 15:44:45 - INFO - codeparrot_training - Step 43226: {'lr': 2.2459139430433233e-05, 'samples': 22132224, 'steps': 43226, 'batch_loss/train': 0.8713417379185557} +12/28/2021 15:44:55 - INFO - codeparrot_training - Step 43227: {'lr': 2.2452610014931402e-05, 'samples': 22132736, 'steps': 43227, 'batch_loss/train': 0.6982195246964693} +12/28/2021 15:45:06 - INFO - codeparrot_training - Step 43228: {'lr': 2.2446081504071987e-05, 'samples': 22133248, 'steps': 43228, 'batch_loss/train': 0.7139587252167985} +12/28/2021 15:45:18 - INFO - codeparrot_training - Step 43229: {'lr': 2.243955389788105e-05, 'samples': 22133760, 'steps': 43229, 'batch_loss/train': 0.6724719735793769} +12/28/2021 15:45:29 - INFO - codeparrot_training - Step 43230: {'lr': 2.243302719638457e-05, 'samples': 22134272, 'steps': 43230, 'batch_loss/train': 0.7042778767645359} +12/28/2021 15:45:39 - INFO - codeparrot_training - Step 43231: {'lr': 2.242650139960842e-05, 'samples': 22134784, 'steps': 43231, 'batch_loss/train': 0.6572889778763056} +12/28/2021 15:45:51 - INFO - codeparrot_training - Step 43232: {'lr': 2.2419976507578517e-05, 'samples': 22135296, 'steps': 43232, 'batch_loss/train': 0.7352759130299091} +12/28/2021 15:46:02 - INFO - codeparrot_training - Step 43233: {'lr': 2.2413452520320952e-05, 'samples': 22135808, 'steps': 43233, 'batch_loss/train': 0.6568824173882604} +12/28/2021 15:46:12 - INFO - codeparrot_training - Step 43234: {'lr': 2.240692943786152e-05, 'samples': 22136320, 'steps': 43234, 'batch_loss/train': 0.7585138129070401} +12/28/2021 15:46:26 - INFO - codeparrot_training - Step 43235: {'lr': 2.240040726022616e-05, 'samples': 22136832, 'steps': 43235, 'batch_loss/train': 0.7424291931092739} +12/28/2021 15:46:37 - INFO - codeparrot_training - Step 43236: {'lr': 2.2393885987440908e-05, 'samples': 22137344, 'steps': 43236, 'batch_loss/train': 0.7863342259079218} +12/28/2021 15:46:48 - INFO - codeparrot_training - Step 43237: {'lr': 2.2387365619531613e-05, 'samples': 22137856, 'steps': 43237, 'batch_loss/train': 0.7418826669454575} +12/28/2021 15:47:00 - INFO - codeparrot_training - Step 43238: {'lr': 2.2380846156524194e-05, 'samples': 22138368, 'steps': 43238, 'batch_loss/train': 0.7188719834666699} +12/28/2021 15:47:10 - INFO - codeparrot_training - Step 43239: {'lr': 2.237432759844457e-05, 'samples': 22138880, 'steps': 43239, 'batch_loss/train': 0.6417617936385795} +12/28/2021 15:47:21 - INFO - codeparrot_training - Step 43240: {'lr': 2.23678099453187e-05, 'samples': 22139392, 'steps': 43240, 'batch_loss/train': 0.6735881064087152} +12/28/2021 15:47:32 - INFO - codeparrot_training - Step 43241: {'lr': 2.236129319717245e-05, 'samples': 22139904, 'steps': 43241, 'batch_loss/train': 0.8048897637054324} +12/28/2021 15:47:46 - INFO - codeparrot_training - Step 43242: {'lr': 2.2354777354031798e-05, 'samples': 22140416, 'steps': 43242, 'batch_loss/train': 0.6386401169002056} +12/28/2021 15:47:56 - INFO - codeparrot_training - Step 43243: {'lr': 2.2348262415922498e-05, 'samples': 22140928, 'steps': 43243, 'batch_loss/train': 0.6465968115953729} +12/28/2021 15:48:07 - INFO - codeparrot_training - Step 43244: {'lr': 2.2341748382870647e-05, 'samples': 22141440, 'steps': 43244, 'batch_loss/train': 0.6769424863159657} +12/28/2021 15:48:19 - INFO - codeparrot_training - Step 43245: {'lr': 2.2335235254901997e-05, 'samples': 22141952, 'steps': 43245, 'batch_loss/train': 0.6246438961825334} +12/28/2021 15:48:30 - INFO - codeparrot_training - Step 43246: {'lr': 2.2328723032042475e-05, 'samples': 22142464, 'steps': 43246, 'batch_loss/train': 0.7362667531706393} +12/28/2021 15:48:41 - INFO - codeparrot_training - Step 43247: {'lr': 2.2322211714318004e-05, 'samples': 22142976, 'steps': 43247, 'batch_loss/train': 0.7267638598568738} +12/28/2021 15:48:51 - INFO - codeparrot_training - Step 43248: {'lr': 2.231570130175442e-05, 'samples': 22143488, 'steps': 43248, 'batch_loss/train': 0.5008768349944148} +12/28/2021 15:49:04 - INFO - codeparrot_training - Step 43249: {'lr': 2.230919179437768e-05, 'samples': 22144000, 'steps': 43249, 'batch_loss/train': 0.6468604592955671} +12/28/2021 15:49:14 - INFO - codeparrot_training - Step 43250: {'lr': 2.2302683192213598e-05, 'samples': 22144512, 'steps': 43250, 'batch_loss/train': 0.8469512906158343} +12/28/2021 15:49:25 - INFO - codeparrot_training - Step 43251: {'lr': 2.2296175495288062e-05, 'samples': 22145024, 'steps': 43251, 'batch_loss/train': 0.9310693680308759} +12/28/2021 15:49:37 - INFO - codeparrot_training - Step 43252: {'lr': 2.2289668703626976e-05, 'samples': 22145536, 'steps': 43252, 'batch_loss/train': 0.7027691397815943} +12/28/2021 15:49:48 - INFO - codeparrot_training - Step 43253: {'lr': 2.2283162817256232e-05, 'samples': 22146048, 'steps': 43253, 'batch_loss/train': 0.7342911288142204} +12/28/2021 15:49:58 - INFO - codeparrot_training - Step 43254: {'lr': 2.227665783620156e-05, 'samples': 22146560, 'steps': 43254, 'batch_loss/train': 0.6739062296692282} +12/28/2021 15:50:12 - INFO - codeparrot_training - Step 43255: {'lr': 2.227015376048894e-05, 'samples': 22147072, 'steps': 43255, 'batch_loss/train': 0.6422620805678889} +12/28/2021 15:50:23 - INFO - codeparrot_training - Step 43256: {'lr': 2.2263650590144265e-05, 'samples': 22147584, 'steps': 43256, 'batch_loss/train': 0.6986910989508033} +12/28/2021 15:50:33 - INFO - codeparrot_training - Step 43257: {'lr': 2.2257148325193267e-05, 'samples': 22148096, 'steps': 43257, 'batch_loss/train': 0.7043307768180966} +12/28/2021 15:50:45 - INFO - codeparrot_training - Step 43258: {'lr': 2.2250646965661815e-05, 'samples': 22148608, 'steps': 43258, 'batch_loss/train': 0.6326108139473945} +12/28/2021 15:50:56 - INFO - codeparrot_training - Step 43259: {'lr': 2.2244146511575857e-05, 'samples': 22149120, 'steps': 43259, 'batch_loss/train': 0.5912734722078312} +12/28/2021 15:51:07 - INFO - codeparrot_training - Step 43260: {'lr': 2.2237646962961184e-05, 'samples': 22149632, 'steps': 43260, 'batch_loss/train': 0.6832010226789862} +12/28/2021 15:51:17 - INFO - codeparrot_training - Step 43261: {'lr': 2.2231148319843547e-05, 'samples': 22150144, 'steps': 43261, 'batch_loss/train': 0.807847079820931} +12/28/2021 15:51:30 - INFO - codeparrot_training - Step 43262: {'lr': 2.2224650582248958e-05, 'samples': 22150656, 'steps': 43262, 'batch_loss/train': 0.5854616619180888} +12/28/2021 15:51:40 - INFO - codeparrot_training - Step 43263: {'lr': 2.2218153750203114e-05, 'samples': 22151168, 'steps': 43263, 'batch_loss/train': 0.7271020419429988} +12/28/2021 15:51:51 - INFO - codeparrot_training - Step 43264: {'lr': 2.2211657823731886e-05, 'samples': 22151680, 'steps': 43264, 'batch_loss/train': 0.6674121785908937} +12/28/2021 15:52:05 - INFO - codeparrot_training - Step 43265: {'lr': 2.2205162802861085e-05, 'samples': 22152192, 'steps': 43265, 'batch_loss/train': 0.691800672095269} +12/28/2021 15:52:16 - INFO - codeparrot_training - Step 43266: {'lr': 2.2198668687616553e-05, 'samples': 22152704, 'steps': 43266, 'batch_loss/train': 0.5494788854848593} +12/28/2021 15:52:26 - INFO - codeparrot_training - Step 43267: {'lr': 2.21921754780241e-05, 'samples': 22153216, 'steps': 43267, 'batch_loss/train': 0.7636395515874028} +12/28/2021 15:52:38 - INFO - codeparrot_training - Step 43268: {'lr': 2.218568317410957e-05, 'samples': 22153728, 'steps': 43268, 'batch_loss/train': 0.666564267128706} +12/28/2021 15:52:49 - INFO - codeparrot_training - Step 43269: {'lr': 2.2179191775898668e-05, 'samples': 22154240, 'steps': 43269, 'batch_loss/train': 0.6917397221550345} +12/28/2021 15:53:00 - INFO - codeparrot_training - Step 43270: {'lr': 2.2172701283417334e-05, 'samples': 22154752, 'steps': 43270, 'batch_loss/train': 0.6332859632093459} +12/28/2021 15:53:10 - INFO - codeparrot_training - Step 43271: {'lr': 2.2166211696691336e-05, 'samples': 22155264, 'steps': 43271, 'batch_loss/train': 0.6375606229994446} +12/28/2021 15:53:25 - INFO - codeparrot_training - Step 43272: {'lr': 2.2159723015746397e-05, 'samples': 22155776, 'steps': 43272, 'batch_loss/train': 0.7208762292284518} +12/28/2021 15:53:35 - INFO - codeparrot_training - Step 43273: {'lr': 2.2153235240608415e-05, 'samples': 22156288, 'steps': 43273, 'batch_loss/train': 0.9431757256388664} +12/28/2021 15:53:46 - INFO - codeparrot_training - Step 43274: {'lr': 2.214674837130312e-05, 'samples': 22156800, 'steps': 43274, 'batch_loss/train': 0.6308350601466373} +12/28/2021 15:53:58 - INFO - codeparrot_training - Step 43275: {'lr': 2.2140262407856327e-05, 'samples': 22157312, 'steps': 43275, 'batch_loss/train': 0.7637553154490888} +12/28/2021 15:54:09 - INFO - codeparrot_training - Step 43276: {'lr': 2.2133777350293815e-05, 'samples': 22157824, 'steps': 43276, 'batch_loss/train': 0.7143760037142783} +12/28/2021 15:54:19 - INFO - codeparrot_training - Step 43277: {'lr': 2.2127293198641347e-05, 'samples': 22158336, 'steps': 43277, 'batch_loss/train': 0.863854699768126} +12/28/2021 15:54:31 - INFO - codeparrot_training - Step 43278: {'lr': 2.2120809952924732e-05, 'samples': 22158848, 'steps': 43278, 'batch_loss/train': 0.6489950162358582} +12/28/2021 15:54:42 - INFO - codeparrot_training - Step 43279: {'lr': 2.2114327613169725e-05, 'samples': 22159360, 'steps': 43279, 'batch_loss/train': 0.631156057206681} +12/28/2021 15:54:53 - INFO - codeparrot_training - Step 43280: {'lr': 2.2107846179402118e-05, 'samples': 22159872, 'steps': 43280, 'batch_loss/train': 0.6928548724390566} +12/28/2021 15:55:03 - INFO - codeparrot_training - Step 43281: {'lr': 2.2101365651647636e-05, 'samples': 22160384, 'steps': 43281, 'batch_loss/train': 0.7976600434631109} +12/28/2021 15:55:16 - INFO - codeparrot_training - Step 43282: {'lr': 2.2094886029932148e-05, 'samples': 22160896, 'steps': 43282, 'batch_loss/train': 0.7305807955563068} +12/28/2021 15:55:26 - INFO - codeparrot_training - Step 43283: {'lr': 2.208840731428127e-05, 'samples': 22161408, 'steps': 43283, 'batch_loss/train': 0.7166097613517195} +12/28/2021 15:55:37 - INFO - codeparrot_training - Step 43284: {'lr': 2.208192950472082e-05, 'samples': 22161920, 'steps': 43284, 'batch_loss/train': 0.7632069159299135} +12/28/2021 15:55:51 - INFO - codeparrot_training - Step 43285: {'lr': 2.2075452601276608e-05, 'samples': 22162432, 'steps': 43285, 'batch_loss/train': 0.810283275321126} +12/28/2021 15:56:01 - INFO - codeparrot_training - Step 43286: {'lr': 2.2068976603974332e-05, 'samples': 22162944, 'steps': 43286, 'batch_loss/train': 0.6409243638627231} +12/28/2021 15:56:12 - INFO - codeparrot_training - Step 43287: {'lr': 2.2062501512839673e-05, 'samples': 22163456, 'steps': 43287, 'batch_loss/train': 0.6491158357821405} +12/28/2021 15:56:24 - INFO - codeparrot_training - Step 43288: {'lr': 2.205602732789852e-05, 'samples': 22163968, 'steps': 43288, 'batch_loss/train': 0.7643880667164922} +12/28/2021 15:56:35 - INFO - codeparrot_training - Step 43289: {'lr': 2.2049554049176524e-05, 'samples': 22164480, 'steps': 43289, 'batch_loss/train': 2.2011435497552156} +12/28/2021 15:56:46 - INFO - codeparrot_training - Step 43290: {'lr': 2.2043081676699382e-05, 'samples': 22164992, 'steps': 43290, 'batch_loss/train': 0.7043631562264636} +12/28/2021 15:56:58 - INFO - codeparrot_training - Step 43291: {'lr': 2.2036610210492914e-05, 'samples': 22165504, 'steps': 43291, 'batch_loss/train': 0.732106045819819} +12/28/2021 15:57:08 - INFO - codeparrot_training - Step 43292: {'lr': 2.2030139650582786e-05, 'samples': 22166016, 'steps': 43292, 'batch_loss/train': 0.7070007973816246} +12/28/2021 15:57:19 - INFO - codeparrot_training - Step 43293: {'lr': 2.202366999699476e-05, 'samples': 22166528, 'steps': 43293, 'batch_loss/train': 0.5293081232812256} +12/28/2021 15:57:30 - INFO - codeparrot_training - Step 43294: {'lr': 2.2017201249754592e-05, 'samples': 22167040, 'steps': 43294, 'batch_loss/train': 0.7463192762807012} +12/28/2021 15:57:44 - INFO - codeparrot_training - Step 43295: {'lr': 2.2010733408887845e-05, 'samples': 22167552, 'steps': 43295, 'batch_loss/train': 0.7719437456689775} +12/28/2021 15:57:54 - INFO - codeparrot_training - Step 43296: {'lr': 2.2004266474420388e-05, 'samples': 22168064, 'steps': 43296, 'batch_loss/train': 0.7097575557418168} +12/28/2021 15:58:05 - INFO - codeparrot_training - Step 43297: {'lr': 2.1997800446377918e-05, 'samples': 22168576, 'steps': 43297, 'batch_loss/train': 0.7072693705558777} +12/28/2021 15:58:17 - INFO - codeparrot_training - Step 43298: {'lr': 2.1991335324786005e-05, 'samples': 22169088, 'steps': 43298, 'batch_loss/train': 0.7802169601200148} +12/28/2021 15:58:28 - INFO - codeparrot_training - Step 43299: {'lr': 2.1984871109670568e-05, 'samples': 22169600, 'steps': 43299, 'batch_loss/train': 0.7832276336848736} +12/28/2021 15:58:38 - INFO - codeparrot_training - Step 43300: {'lr': 2.1978407801057116e-05, 'samples': 22170112, 'steps': 43300, 'batch_loss/train': 0.7124221157282591} +12/28/2021 15:58:52 - INFO - codeparrot_training - Step 43301: {'lr': 2.1971945398971433e-05, 'samples': 22170624, 'steps': 43301, 'batch_loss/train': 0.7662296965718269} +12/28/2021 15:59:03 - INFO - codeparrot_training - Step 43302: {'lr': 2.1965483903439194e-05, 'samples': 22171136, 'steps': 43302, 'batch_loss/train': 0.8087318586185575} +12/28/2021 15:59:14 - INFO - codeparrot_training - Step 43303: {'lr': 2.195902331448607e-05, 'samples': 22171648, 'steps': 43303, 'batch_loss/train': 0.7734012929722667} +12/28/2021 15:59:26 - INFO - codeparrot_training - Step 43304: {'lr': 2.1952563632137794e-05, 'samples': 22172160, 'steps': 43304, 'batch_loss/train': 0.6612389840884134} +12/28/2021 15:59:36 - INFO - codeparrot_training - Step 43305: {'lr': 2.1946104856419984e-05, 'samples': 22172672, 'steps': 43305, 'batch_loss/train': 0.7069524971302599} +12/28/2021 15:59:47 - INFO - codeparrot_training - Step 43306: {'lr': 2.193964698735837e-05, 'samples': 22173184, 'steps': 43306, 'batch_loss/train': 0.5636952693457715} +12/28/2021 15:59:58 - INFO - codeparrot_training - Step 43307: {'lr': 2.1933190024978623e-05, 'samples': 22173696, 'steps': 43307, 'batch_loss/train': 0.6378474442171864} +12/28/2021 16:00:10 - INFO - codeparrot_training - Step 43308: {'lr': 2.1926733969306417e-05, 'samples': 22174208, 'steps': 43308, 'batch_loss/train': 0.6290567773976363} +12/28/2021 16:00:21 - INFO - codeparrot_training - Step 43309: {'lr': 2.1920278820367317e-05, 'samples': 22174720, 'steps': 43309, 'batch_loss/train': 0.6828675165306777} +12/28/2021 16:00:31 - INFO - codeparrot_training - Step 43310: {'lr': 2.1913824578187135e-05, 'samples': 22175232, 'steps': 43310, 'batch_loss/train': 0.8220963790081441} +12/28/2021 16:00:45 - INFO - codeparrot_training - Step 43311: {'lr': 2.190737124279149e-05, 'samples': 22175744, 'steps': 43311, 'batch_loss/train': 0.7883556028828025} +12/28/2021 16:00:56 - INFO - codeparrot_training - Step 43312: {'lr': 2.1900918814205943e-05, 'samples': 22176256, 'steps': 43312, 'batch_loss/train': 0.7443929431028664} +12/28/2021 16:01:06 - INFO - codeparrot_training - Step 43313: {'lr': 2.18944672924562e-05, 'samples': 22176768, 'steps': 43313, 'batch_loss/train': 0.7248285822570324} +12/28/2021 16:01:19 - INFO - codeparrot_training - Step 43314: {'lr': 2.1888016677568012e-05, 'samples': 22177280, 'steps': 43314, 'batch_loss/train': 0.5844577640818898} +12/28/2021 16:01:29 - INFO - codeparrot_training - Step 43315: {'lr': 2.188156696956689e-05, 'samples': 22177792, 'steps': 43315, 'batch_loss/train': 0.7086435329401866} +12/28/2021 16:01:40 - INFO - codeparrot_training - Step 43316: {'lr': 2.1875118168478535e-05, 'samples': 22178304, 'steps': 43316, 'batch_loss/train': 0.6067683612927794} +12/28/2021 16:01:51 - INFO - codeparrot_training - Step 43317: {'lr': 2.1868670274328568e-05, 'samples': 22178816, 'steps': 43317, 'batch_loss/train': 0.583428226178512} +12/28/2021 16:02:03 - INFO - codeparrot_training - Step 43318: {'lr': 2.186222328714263e-05, 'samples': 22179328, 'steps': 43318, 'batch_loss/train': 0.6396189811639488} +12/28/2021 16:02:14 - INFO - codeparrot_training - Step 43319: {'lr': 2.1855777206946343e-05, 'samples': 22179840, 'steps': 43319, 'batch_loss/train': 0.594738052750472} +12/28/2021 16:02:24 - INFO - codeparrot_training - Step 43320: {'lr': 2.1849332033765378e-05, 'samples': 22180352, 'steps': 43320, 'batch_loss/train': 0.672515444457531} +12/28/2021 16:02:38 - INFO - codeparrot_training - Step 43321: {'lr': 2.1842887767625247e-05, 'samples': 22180864, 'steps': 43321, 'batch_loss/train': 0.8332160345744342} +12/28/2021 16:02:49 - INFO - codeparrot_training - Step 43322: {'lr': 2.1836444408551702e-05, 'samples': 22181376, 'steps': 43322, 'batch_loss/train': 0.8264926010742784} +12/28/2021 16:02:59 - INFO - codeparrot_training - Step 43323: {'lr': 2.183000195657034e-05, 'samples': 22181888, 'steps': 43323, 'batch_loss/train': 0.5611179923871532} +12/28/2021 16:03:12 - INFO - codeparrot_training - Step 43324: {'lr': 2.1823560411706635e-05, 'samples': 22182400, 'steps': 43324, 'batch_loss/train': 0.6796143655665219} +12/28/2021 16:03:22 - INFO - codeparrot_training - Step 43325: {'lr': 2.1817119773986398e-05, 'samples': 22182912, 'steps': 43325, 'batch_loss/train': 0.6947389226406813} +12/28/2021 16:03:33 - INFO - codeparrot_training - Step 43326: {'lr': 2.1810680043435087e-05, 'samples': 22183424, 'steps': 43326, 'batch_loss/train': 0.7310707119759172} +12/28/2021 16:03:46 - INFO - codeparrot_training - Step 43327: {'lr': 2.1804241220078346e-05, 'samples': 22183936, 'steps': 43327, 'batch_loss/train': 0.5977224005910102} +12/28/2021 16:03:56 - INFO - codeparrot_training - Step 43328: {'lr': 2.1797803303941766e-05, 'samples': 22184448, 'steps': 43328, 'batch_loss/train': 1.1736546885222197} +12/28/2021 16:04:07 - INFO - codeparrot_training - Step 43329: {'lr': 2.1791366295050964e-05, 'samples': 22184960, 'steps': 43329, 'batch_loss/train': 0.5920111741870642} +12/28/2021 16:04:17 - INFO - codeparrot_training - Step 43330: {'lr': 2.1784930193431528e-05, 'samples': 22185472, 'steps': 43330, 'batch_loss/train': 0.703611153177917} +12/28/2021 16:04:30 - INFO - codeparrot_training - Step 43331: {'lr': 2.1778494999109055e-05, 'samples': 22185984, 'steps': 43331, 'batch_loss/train': 0.7042631795629859} +12/28/2021 16:04:40 - INFO - codeparrot_training - Step 43332: {'lr': 2.1772060712109078e-05, 'samples': 22186496, 'steps': 43332, 'batch_loss/train': 0.7136447392404079} +12/28/2021 16:04:51 - INFO - codeparrot_training - Step 43333: {'lr': 2.1765627332457238e-05, 'samples': 22187008, 'steps': 43333, 'batch_loss/train': 0.6971557354554534} +12/28/2021 16:05:05 - INFO - codeparrot_training - Step 43334: {'lr': 2.1759194860179133e-05, 'samples': 22187520, 'steps': 43334, 'batch_loss/train': 0.513481305912137} +12/28/2021 16:05:15 - INFO - codeparrot_training - Step 43335: {'lr': 2.1752763295300183e-05, 'samples': 22188032, 'steps': 43335, 'batch_loss/train': 0.7349740946665406} +12/28/2021 16:05:26 - INFO - codeparrot_training - Step 43336: {'lr': 2.1746332637846116e-05, 'samples': 22188544, 'steps': 43336, 'batch_loss/train': 0.6128545513493009} +12/28/2021 16:05:38 - INFO - codeparrot_training - Step 43337: {'lr': 2.1739902887842473e-05, 'samples': 22189056, 'steps': 43337, 'batch_loss/train': 0.7628027908504009} +12/28/2021 16:05:49 - INFO - codeparrot_training - Step 43338: {'lr': 2.1733474045314782e-05, 'samples': 22189568, 'steps': 43338, 'batch_loss/train': 0.6292988726636395} +12/28/2021 16:06:00 - INFO - codeparrot_training - Step 43339: {'lr': 2.1727046110288528e-05, 'samples': 22190080, 'steps': 43339, 'batch_loss/train': 0.6111253134440631} +12/28/2021 16:06:10 - INFO - codeparrot_training - Step 43340: {'lr': 2.172061908278944e-05, 'samples': 22190592, 'steps': 43340, 'batch_loss/train': 0.6780309299938381} +12/28/2021 16:06:24 - INFO - codeparrot_training - Step 43341: {'lr': 2.171419296284294e-05, 'samples': 22191104, 'steps': 43341, 'batch_loss/train': 0.5961466294247657} +12/28/2021 16:06:35 - INFO - codeparrot_training - Step 43342: {'lr': 2.1707767750474615e-05, 'samples': 22191616, 'steps': 43342, 'batch_loss/train': 0.7909635016694665} +12/28/2021 16:06:45 - INFO - codeparrot_training - Step 43343: {'lr': 2.170134344571001e-05, 'samples': 22192128, 'steps': 43343, 'batch_loss/train': 0.7378318924456835} +12/28/2021 16:06:58 - INFO - codeparrot_training - Step 43344: {'lr': 2.1694920048574652e-05, 'samples': 22192640, 'steps': 43344, 'batch_loss/train': 0.7941763941198587} +12/28/2021 16:07:08 - INFO - codeparrot_training - Step 43345: {'lr': 2.1688497559094078e-05, 'samples': 22193152, 'steps': 43345, 'batch_loss/train': 0.7319325960706919} +12/28/2021 16:07:19 - INFO - codeparrot_training - Step 43346: {'lr': 2.1682075977293826e-05, 'samples': 22193664, 'steps': 43346, 'batch_loss/train': 0.6081641822238453} +12/28/2021 16:07:31 - INFO - codeparrot_training - Step 43347: {'lr': 2.1675655303199455e-05, 'samples': 22194176, 'steps': 43347, 'batch_loss/train': 0.654228859115392} +12/28/2021 16:07:42 - INFO - codeparrot_training - Step 43348: {'lr': 2.1669235536836446e-05, 'samples': 22194688, 'steps': 43348, 'batch_loss/train': 0.7644551806151867} +12/28/2021 16:07:52 - INFO - codeparrot_training - Step 43349: {'lr': 2.1662816678230417e-05, 'samples': 22195200, 'steps': 43349, 'batch_loss/train': 0.716303200693801} +12/28/2021 16:08:03 - INFO - codeparrot_training - Step 43350: {'lr': 2.1656398727406683e-05, 'samples': 22195712, 'steps': 43350, 'batch_loss/train': 0.5955838458612561} +12/28/2021 16:08:17 - INFO - codeparrot_training - Step 43351: {'lr': 2.1649981684390972e-05, 'samples': 22196224, 'steps': 43351, 'batch_loss/train': 0.7300532730296254} +12/28/2021 16:08:28 - INFO - codeparrot_training - Step 43352: {'lr': 2.1643565549208738e-05, 'samples': 22196736, 'steps': 43352, 'batch_loss/train': 0.686910844524391} +12/28/2021 16:08:39 - INFO - codeparrot_training - Step 43353: {'lr': 2.16371503218854e-05, 'samples': 22197248, 'steps': 43353, 'batch_loss/train': 0.6483570062555373} +12/28/2021 16:08:51 - INFO - codeparrot_training - Step 43354: {'lr': 2.1630736002446554e-05, 'samples': 22197760, 'steps': 43354, 'batch_loss/train': 0.5543011367553845} +12/28/2021 16:09:01 - INFO - codeparrot_training - Step 43355: {'lr': 2.162432259091765e-05, 'samples': 22198272, 'steps': 43355, 'batch_loss/train': 0.6953355425503105} +12/28/2021 16:09:12 - INFO - codeparrot_training - Step 43356: {'lr': 2.161791008732422e-05, 'samples': 22198784, 'steps': 43356, 'batch_loss/train': 0.6808953499421477} +12/28/2021 16:09:26 - INFO - codeparrot_training - Step 43357: {'lr': 2.161149849169175e-05, 'samples': 22199296, 'steps': 43357, 'batch_loss/train': 0.7150770118460059} +12/28/2021 16:09:37 - INFO - codeparrot_training - Step 43358: {'lr': 2.1605087804045715e-05, 'samples': 22199808, 'steps': 43358, 'batch_loss/train': 1.2556912135332823} +12/28/2021 16:09:47 - INFO - codeparrot_training - Step 43359: {'lr': 2.1598678024411622e-05, 'samples': 22200320, 'steps': 43359, 'batch_loss/train': 0.7253453461453319} +12/28/2021 16:09:59 - INFO - codeparrot_training - Step 43360: {'lr': 2.1592269152814954e-05, 'samples': 22200832, 'steps': 43360, 'batch_loss/train': 0.7976950593292713} +12/28/2021 16:10:10 - INFO - codeparrot_training - Step 43361: {'lr': 2.158586118928116e-05, 'samples': 22201344, 'steps': 43361, 'batch_loss/train': 0.7528185211122036} +12/28/2021 16:10:21 - INFO - codeparrot_training - Step 43362: {'lr': 2.1579454133835746e-05, 'samples': 22201856, 'steps': 43362, 'batch_loss/train': 0.6747239786200225} +12/28/2021 16:10:31 - INFO - codeparrot_training - Step 43363: {'lr': 2.1573047986504224e-05, 'samples': 22202368, 'steps': 43363, 'batch_loss/train': 0.7684647273272276} +12/28/2021 16:10:43 - INFO - codeparrot_training - Step 43364: {'lr': 2.156664274731196e-05, 'samples': 22202880, 'steps': 43364, 'batch_loss/train': 0.7817162503488362} +12/28/2021 16:10:54 - INFO - codeparrot_training - Step 43365: {'lr': 2.1560238416284433e-05, 'samples': 22203392, 'steps': 43365, 'batch_loss/train': 0.6831558621488512} +12/28/2021 16:11:05 - INFO - codeparrot_training - Step 43366: {'lr': 2.1553834993447207e-05, 'samples': 22203904, 'steps': 43366, 'batch_loss/train': 0.6603614282794297} +12/28/2021 16:11:17 - INFO - codeparrot_training - Step 43367: {'lr': 2.1547432478825652e-05, 'samples': 22204416, 'steps': 43367, 'batch_loss/train': 0.7529225572943687} +12/28/2021 16:11:28 - INFO - codeparrot_training - Step 43368: {'lr': 2.1541030872445245e-05, 'samples': 22204928, 'steps': 43368, 'batch_loss/train': 0.7688530217856169} +12/28/2021 16:11:38 - INFO - codeparrot_training - Step 43369: {'lr': 2.1534630174331436e-05, 'samples': 22205440, 'steps': 43369, 'batch_loss/train': 0.6634829529793933} +12/28/2021 16:11:52 - INFO - codeparrot_training - Step 43370: {'lr': 2.1528230384509685e-05, 'samples': 22205952, 'steps': 43370, 'batch_loss/train': 0.7654236999806017} +12/28/2021 16:12:03 - INFO - codeparrot_training - Step 43371: {'lr': 2.1521831503005406e-05, 'samples': 22206464, 'steps': 43371, 'batch_loss/train': 0.709577449131757} +12/28/2021 16:12:14 - INFO - codeparrot_training - Step 43372: {'lr': 2.151543352984406e-05, 'samples': 22206976, 'steps': 43372, 'batch_loss/train': 0.4964699135744013} +12/28/2021 16:12:25 - INFO - codeparrot_training - Step 43373: {'lr': 2.150903646505106e-05, 'samples': 22207488, 'steps': 43373, 'batch_loss/train': 0.7944808537140489} +12/28/2021 16:12:36 - INFO - codeparrot_training - Step 43374: {'lr': 2.1502640308651873e-05, 'samples': 22208000, 'steps': 43374, 'batch_loss/train': 0.7038555629551411} +12/28/2021 16:12:47 - INFO - codeparrot_training - Step 43375: {'lr': 2.1496245060671966e-05, 'samples': 22208512, 'steps': 43375, 'batch_loss/train': 0.5701923804590479} +12/28/2021 16:12:57 - INFO - codeparrot_training - Step 43376: {'lr': 2.14898507211366e-05, 'samples': 22209024, 'steps': 43376, 'batch_loss/train': 0.6784877830650657} +12/28/2021 16:13:10 - INFO - codeparrot_training - Step 43377: {'lr': 2.148345729007134e-05, 'samples': 22209536, 'steps': 43377, 'batch_loss/train': 0.6710027474910021} +12/28/2021 16:13:20 - INFO - codeparrot_training - Step 43378: {'lr': 2.1477064767501635e-05, 'samples': 22210048, 'steps': 43378, 'batch_loss/train': 0.7444843128323555} +12/28/2021 16:13:31 - INFO - codeparrot_training - Step 43379: {'lr': 2.1470673153452802e-05, 'samples': 22210560, 'steps': 43379, 'batch_loss/train': 1.0762330265715718} +12/28/2021 16:13:45 - INFO - codeparrot_training - Step 43380: {'lr': 2.1464282447950262e-05, 'samples': 22211072, 'steps': 43380, 'batch_loss/train': 0.6053799046203494} +12/28/2021 16:13:55 - INFO - codeparrot_training - Step 43381: {'lr': 2.1457892651019438e-05, 'samples': 22211584, 'steps': 43381, 'batch_loss/train': 0.7022740305401385} +12/28/2021 16:14:06 - INFO - codeparrot_training - Step 43382: {'lr': 2.145150376268576e-05, 'samples': 22212096, 'steps': 43382, 'batch_loss/train': 0.7525805216282606} +12/28/2021 16:14:18 - INFO - codeparrot_training - Step 43383: {'lr': 2.144511578297459e-05, 'samples': 22212608, 'steps': 43383, 'batch_loss/train': 0.6723636509850621} +12/28/2021 16:14:29 - INFO - codeparrot_training - Step 43384: {'lr': 2.1438728711911353e-05, 'samples': 22213120, 'steps': 43384, 'batch_loss/train': 0.6753179635852575} +12/28/2021 16:14:39 - INFO - codeparrot_training - Step 43385: {'lr': 2.1432342549521423e-05, 'samples': 22213632, 'steps': 43385, 'batch_loss/train': 0.7286082343198359} +12/28/2021 16:14:50 - INFO - codeparrot_training - Step 43386: {'lr': 2.142595729583019e-05, 'samples': 22214144, 'steps': 43386, 'batch_loss/train': 0.6325778475729749} +12/28/2021 16:15:05 - INFO - codeparrot_training - Step 43387: {'lr': 2.1419572950863053e-05, 'samples': 22214656, 'steps': 43387, 'batch_loss/train': 0.6217829412780702} +12/28/2021 16:15:15 - INFO - codeparrot_training - Step 43388: {'lr': 2.141318951464538e-05, 'samples': 22215168, 'steps': 43388, 'batch_loss/train': 0.6978725459775887} +12/28/2021 16:15:26 - INFO - codeparrot_training - Step 43389: {'lr': 2.1406806987202598e-05, 'samples': 22215680, 'steps': 43389, 'batch_loss/train': 0.7457254314795136} +12/28/2021 16:15:38 - INFO - codeparrot_training - Step 43390: {'lr': 2.1400425368559988e-05, 'samples': 22216192, 'steps': 43390, 'batch_loss/train': 0.6560113601153716} +12/28/2021 16:15:49 - INFO - codeparrot_training - Step 43391: {'lr': 2.1394044658742923e-05, 'samples': 22216704, 'steps': 43391, 'batch_loss/train': 0.6952397571876645} +12/28/2021 16:15:59 - INFO - codeparrot_training - Step 43392: {'lr': 2.1387664857776933e-05, 'samples': 22217216, 'steps': 43392, 'batch_loss/train': 0.6785569492494687} +12/28/2021 16:16:11 - INFO - codeparrot_training - Step 43393: {'lr': 2.1381285965687197e-05, 'samples': 22217728, 'steps': 43393, 'batch_loss/train': 0.7214775255415589} +12/28/2021 16:16:22 - INFO - codeparrot_training - Step 43394: {'lr': 2.1374907982499164e-05, 'samples': 22218240, 'steps': 43394, 'batch_loss/train': 0.6911683293874376} +12/28/2021 16:16:32 - INFO - codeparrot_training - Step 43395: {'lr': 2.1368530908238148e-05, 'samples': 22218752, 'steps': 43395, 'batch_loss/train': 0.6800731134135276} +12/28/2021 16:16:43 - INFO - codeparrot_training - Step 43396: {'lr': 2.1362154742929546e-05, 'samples': 22219264, 'steps': 43396, 'batch_loss/train': 0.7610093159601092} +12/28/2021 16:16:57 - INFO - codeparrot_training - Step 43397: {'lr': 2.135577948659867e-05, 'samples': 22219776, 'steps': 43397, 'batch_loss/train': 0.7300082375877537} +12/28/2021 16:17:08 - INFO - codeparrot_training - Step 43398: {'lr': 2.1349405139270915e-05, 'samples': 22220288, 'steps': 43398, 'batch_loss/train': 0.588575245346874} +12/28/2021 16:17:18 - INFO - codeparrot_training - Step 43399: {'lr': 2.1343031700971545e-05, 'samples': 22220800, 'steps': 43399, 'batch_loss/train': 0.7188713122159243} +12/28/2021 16:17:30 - INFO - codeparrot_training - Step 43400: {'lr': 2.1336659171725975e-05, 'samples': 22221312, 'steps': 43400, 'batch_loss/train': 0.7263796823099256} +12/28/2021 16:17:41 - INFO - codeparrot_training - Step 43401: {'lr': 2.1330287551559553e-05, 'samples': 22221824, 'steps': 43401, 'batch_loss/train': 0.8328963350504637} +12/28/2021 16:17:52 - INFO - codeparrot_training - Step 43402: {'lr': 2.132391684049745e-05, 'samples': 22222336, 'steps': 43402, 'batch_loss/train': 0.6649667671881616} +12/28/2021 16:18:04 - INFO - codeparrot_training - Step 43403: {'lr': 2.1317547038565172e-05, 'samples': 22222848, 'steps': 43403, 'batch_loss/train': 1.4663712866604328} +12/28/2021 16:18:14 - INFO - codeparrot_training - Step 43404: {'lr': 2.1311178145788035e-05, 'samples': 22223360, 'steps': 43404, 'batch_loss/train': 0.6948032539803535} +12/28/2021 16:18:25 - INFO - codeparrot_training - Step 43405: {'lr': 2.130481016219124e-05, 'samples': 22223872, 'steps': 43405, 'batch_loss/train': 0.7099218117073178} +12/28/2021 16:18:36 - INFO - codeparrot_training - Step 43406: {'lr': 2.129844308780013e-05, 'samples': 22224384, 'steps': 43406, 'batch_loss/train': 0.5963840489275753} +12/28/2021 16:18:48 - INFO - codeparrot_training - Step 43407: {'lr': 2.129207692264015e-05, 'samples': 22224896, 'steps': 43407, 'batch_loss/train': 0.7885635071434081} +12/28/2021 16:18:59 - INFO - codeparrot_training - Step 43408: {'lr': 2.1285711666736457e-05, 'samples': 22225408, 'steps': 43408, 'batch_loss/train': 0.508132541552186} +12/28/2021 16:19:09 - INFO - codeparrot_training - Step 43409: {'lr': 2.127934732011444e-05, 'samples': 22225920, 'steps': 43409, 'batch_loss/train': 0.7218504447955638} +12/28/2021 16:19:23 - INFO - codeparrot_training - Step 43410: {'lr': 2.127298388279936e-05, 'samples': 22226432, 'steps': 43410, 'batch_loss/train': 0.8126259529963136} +12/28/2021 16:19:33 - INFO - codeparrot_training - Step 43411: {'lr': 2.1266621354816523e-05, 'samples': 22226944, 'steps': 43411, 'batch_loss/train': 0.7171829538419843} +12/28/2021 16:19:44 - INFO - codeparrot_training - Step 43412: {'lr': 2.126025973619125e-05, 'samples': 22227456, 'steps': 43412, 'batch_loss/train': 0.6560357543639839} +12/28/2021 16:19:56 - INFO - codeparrot_training - Step 43413: {'lr': 2.1253899026948798e-05, 'samples': 22227968, 'steps': 43413, 'batch_loss/train': 0.9372843839228153} +12/28/2021 16:20:07 - INFO - codeparrot_training - Step 43414: {'lr': 2.1247539227114476e-05, 'samples': 22228480, 'steps': 43414, 'batch_loss/train': 2.1874730209819973} +12/28/2021 16:20:18 - INFO - codeparrot_training - Step 43415: {'lr': 2.124118033671363e-05, 'samples': 22228992, 'steps': 43415, 'batch_loss/train': 0.7250225730240345} +12/28/2021 16:20:28 - INFO - codeparrot_training - Step 43416: {'lr': 2.1234822355771428e-05, 'samples': 22229504, 'steps': 43416, 'batch_loss/train': 0.6952542496728711} +12/28/2021 16:20:40 - INFO - codeparrot_training - Step 43417: {'lr': 2.122846528431313e-05, 'samples': 22230016, 'steps': 43417, 'batch_loss/train': 0.588229913613759} +12/28/2021 16:20:51 - INFO - codeparrot_training - Step 43418: {'lr': 2.1222109122364164e-05, 'samples': 22230528, 'steps': 43418, 'batch_loss/train': 0.721971657127142} +12/28/2021 16:21:01 - INFO - codeparrot_training - Step 43419: {'lr': 2.1215753869949674e-05, 'samples': 22231040, 'steps': 43419, 'batch_loss/train': 0.6867283806204796} +12/28/2021 16:21:15 - INFO - codeparrot_training - Step 43420: {'lr': 2.1209399527094913e-05, 'samples': 22231552, 'steps': 43420, 'batch_loss/train': 0.6319937778171152} +12/28/2021 16:21:26 - INFO - codeparrot_training - Step 43421: {'lr': 2.1203046093825283e-05, 'samples': 22232064, 'steps': 43421, 'batch_loss/train': 1.133230926003307} +12/28/2021 16:21:37 - INFO - codeparrot_training - Step 43422: {'lr': 2.1196693570165904e-05, 'samples': 22232576, 'steps': 43422, 'batch_loss/train': 0.6592248587403446} +12/28/2021 16:21:49 - INFO - codeparrot_training - Step 43423: {'lr': 2.1190341956142085e-05, 'samples': 22233088, 'steps': 43423, 'batch_loss/train': 0.737396152690053} +12/28/2021 16:22:00 - INFO - codeparrot_training - Step 43424: {'lr': 2.1183991251779054e-05, 'samples': 22233600, 'steps': 43424, 'batch_loss/train': 1.5167793751461431} +12/28/2021 16:22:10 - INFO - codeparrot_training - Step 43425: {'lr': 2.1177641457102103e-05, 'samples': 22234112, 'steps': 43425, 'batch_loss/train': 0.6802961529465392} +12/28/2021 16:22:21 - INFO - codeparrot_training - Step 43426: {'lr': 2.117129257213643e-05, 'samples': 22234624, 'steps': 43426, 'batch_loss/train': 0.6054891520179808} +12/28/2021 16:22:34 - INFO - codeparrot_training - Step 43427: {'lr': 2.116494459690735e-05, 'samples': 22235136, 'steps': 43427, 'batch_loss/train': 0.6790391062386334} +12/28/2021 16:22:45 - INFO - codeparrot_training - Step 43428: {'lr': 2.115859753143995e-05, 'samples': 22235648, 'steps': 43428, 'batch_loss/train': 0.5557859149994329} +12/28/2021 16:22:56 - INFO - codeparrot_training - Step 43429: {'lr': 2.11522513757596e-05, 'samples': 22236160, 'steps': 43429, 'batch_loss/train': 0.6425271374173462} +12/28/2021 16:23:08 - INFO - codeparrot_training - Step 43430: {'lr': 2.1145906129891533e-05, 'samples': 22236672, 'steps': 43430, 'batch_loss/train': 0.7882128416094929} +12/28/2021 16:23:18 - INFO - codeparrot_training - Step 43431: {'lr': 2.1139561793860894e-05, 'samples': 22237184, 'steps': 43431, 'batch_loss/train': 0.7511121844872832} +12/28/2021 16:23:29 - INFO - codeparrot_training - Step 43432: {'lr': 2.1133218367692885e-05, 'samples': 22237696, 'steps': 43432, 'batch_loss/train': 0.7036580805433914} +12/28/2021 16:23:41 - INFO - codeparrot_training - Step 43433: {'lr': 2.1126875851412874e-05, 'samples': 22238208, 'steps': 43433, 'batch_loss/train': 0.7257085784804076} +12/28/2021 16:23:52 - INFO - codeparrot_training - Step 43434: {'lr': 2.1120534245045952e-05, 'samples': 22238720, 'steps': 43434, 'batch_loss/train': 0.7821944109164178} +12/28/2021 16:24:02 - INFO - codeparrot_training - Step 43435: {'lr': 2.1114193548617378e-05, 'samples': 22239232, 'steps': 43435, 'batch_loss/train': 0.6721549862995744} +12/28/2021 16:24:13 - INFO - codeparrot_training - Step 43436: {'lr': 2.1107853762152324e-05, 'samples': 22239744, 'steps': 43436, 'batch_loss/train': 0.7597637018188834} +12/28/2021 16:24:27 - INFO - codeparrot_training - Step 43437: {'lr': 2.110151488567602e-05, 'samples': 22240256, 'steps': 43437, 'batch_loss/train': 0.6532205655239522} +12/28/2021 16:24:38 - INFO - codeparrot_training - Step 43438: {'lr': 2.109517691921367e-05, 'samples': 22240768, 'steps': 43438, 'batch_loss/train': 0.726195146329701} +12/28/2021 16:24:48 - INFO - codeparrot_training - Step 43439: {'lr': 2.1088839862790478e-05, 'samples': 22241280, 'steps': 43439, 'batch_loss/train': 0.7164421593770385} +12/28/2021 16:25:01 - INFO - codeparrot_training - Step 43440: {'lr': 2.1082503716431612e-05, 'samples': 22241792, 'steps': 43440, 'batch_loss/train': 0.6635148623026907} +12/28/2021 16:25:11 - INFO - codeparrot_training - Step 43441: {'lr': 2.107616848016225e-05, 'samples': 22242304, 'steps': 43441, 'batch_loss/train': 0.7293125661090016} +12/28/2021 16:25:22 - INFO - codeparrot_training - Step 43442: {'lr': 2.106983415400768e-05, 'samples': 22242816, 'steps': 43442, 'batch_loss/train': 0.514497296884656} +12/28/2021 16:25:34 - INFO - codeparrot_training - Step 43443: {'lr': 2.10635007379929e-05, 'samples': 22243328, 'steps': 43443, 'batch_loss/train': 0.5486244403291494} +12/28/2021 16:25:45 - INFO - codeparrot_training - Step 43444: {'lr': 2.105716823214332e-05, 'samples': 22243840, 'steps': 43444, 'batch_loss/train': 0.8111923169344664} +12/28/2021 16:25:56 - INFO - codeparrot_training - Step 43445: {'lr': 2.105083663648391e-05, 'samples': 22244352, 'steps': 43445, 'batch_loss/train': 0.5786006919224747} +12/28/2021 16:26:06 - INFO - codeparrot_training - Step 43446: {'lr': 2.1044505951039905e-05, 'samples': 22244864, 'steps': 43446, 'batch_loss/train': 0.6420165575109422} +12/28/2021 16:26:18 - INFO - codeparrot_training - Step 43447: {'lr': 2.1038176175836588e-05, 'samples': 22245376, 'steps': 43447, 'batch_loss/train': 0.6816074838861823} +12/28/2021 16:26:29 - INFO - codeparrot_training - Step 43448: {'lr': 2.1031847310898967e-05, 'samples': 22245888, 'steps': 43448, 'batch_loss/train': 0.6573711177334189} +12/28/2021 16:26:40 - INFO - codeparrot_training - Step 43449: {'lr': 2.1025519356252272e-05, 'samples': 22246400, 'steps': 43449, 'batch_loss/train': 0.6489821881987154} +12/28/2021 16:26:54 - INFO - codeparrot_training - Step 43450: {'lr': 2.1019192311921646e-05, 'samples': 22246912, 'steps': 43450, 'batch_loss/train': 0.6678747157566249} +12/28/2021 16:27:04 - INFO - codeparrot_training - Step 43451: {'lr': 2.101286617793227e-05, 'samples': 22247424, 'steps': 43451, 'batch_loss/train': 0.5657179770059884} +12/28/2021 16:27:15 - INFO - codeparrot_training - Step 43452: {'lr': 2.1006540954309262e-05, 'samples': 22247936, 'steps': 43452, 'batch_loss/train': 0.7658347934484482} +12/28/2021 16:27:27 - INFO - codeparrot_training - Step 43453: {'lr': 2.1000216641077845e-05, 'samples': 22248448, 'steps': 43453, 'batch_loss/train': 0.6401138819055632} +12/28/2021 16:27:38 - INFO - codeparrot_training - Step 43454: {'lr': 2.0993893238263006e-05, 'samples': 22248960, 'steps': 43454, 'batch_loss/train': 0.7531860615126789} +12/28/2021 16:27:48 - INFO - codeparrot_training - Step 43455: {'lr': 2.098757074589e-05, 'samples': 22249472, 'steps': 43455, 'batch_loss/train': 0.7460372122004628} +12/28/2021 16:27:59 - INFO - codeparrot_training - Step 43456: {'lr': 2.0981249163984002e-05, 'samples': 22249984, 'steps': 43456, 'batch_loss/train': 0.7125974837690592} +12/28/2021 16:28:13 - INFO - codeparrot_training - Step 43457: {'lr': 2.097492849256999e-05, 'samples': 22250496, 'steps': 43457, 'batch_loss/train': 0.8054621149785817} +12/28/2021 16:28:23 - INFO - codeparrot_training - Step 43458: {'lr': 2.096860873167325e-05, 'samples': 22251008, 'steps': 43458, 'batch_loss/train': 0.6671698214486241} +12/28/2021 16:28:34 - INFO - codeparrot_training - Step 43459: {'lr': 2.0962289881318873e-05, 'samples': 22251520, 'steps': 43459, 'batch_loss/train': 0.7074168142862618} +12/28/2021 16:28:46 - INFO - codeparrot_training - Step 43460: {'lr': 2.0955971941531898e-05, 'samples': 22252032, 'steps': 43460, 'batch_loss/train': 0.7010107680689543} +12/28/2021 16:28:57 - INFO - codeparrot_training - Step 43461: {'lr': 2.0949654912337495e-05, 'samples': 22252544, 'steps': 43461, 'batch_loss/train': 0.7615444427356124} +12/28/2021 16:29:07 - INFO - codeparrot_training - Step 43462: {'lr': 2.0943338793760786e-05, 'samples': 22253056, 'steps': 43462, 'batch_loss/train': 0.539988616714254} +12/28/2021 16:29:20 - INFO - codeparrot_training - Step 43463: {'lr': 2.0937023585826857e-05, 'samples': 22253568, 'steps': 43463, 'batch_loss/train': 0.763832657597959} +12/28/2021 16:29:30 - INFO - codeparrot_training - Step 43464: {'lr': 2.0930709288560863e-05, 'samples': 22254080, 'steps': 43464, 'batch_loss/train': 0.7909394484013319} +12/28/2021 16:29:41 - INFO - codeparrot_training - Step 43465: {'lr': 2.0924395901987857e-05, 'samples': 22254592, 'steps': 43465, 'batch_loss/train': 0.7186170481145382} +12/28/2021 16:29:51 - INFO - codeparrot_training - Step 43466: {'lr': 2.0918083426132968e-05, 'samples': 22255104, 'steps': 43466, 'batch_loss/train': 0.7552293865010142} +12/28/2021 16:30:05 - INFO - codeparrot_training - Step 43467: {'lr': 2.0911771861021252e-05, 'samples': 22255616, 'steps': 43467, 'batch_loss/train': 0.5061210144194774} +12/28/2021 16:30:16 - INFO - codeparrot_training - Step 43468: {'lr': 2.090546120667791e-05, 'samples': 22256128, 'steps': 43468, 'batch_loss/train': 0.6864535566419363} +12/28/2021 16:30:27 - INFO - codeparrot_training - Step 43469: {'lr': 2.0899151463127848e-05, 'samples': 22256640, 'steps': 43469, 'batch_loss/train': 0.5870960247702897} +12/28/2021 16:30:39 - INFO - codeparrot_training - Step 43470: {'lr': 2.0892842630396342e-05, 'samples': 22257152, 'steps': 43470, 'batch_loss/train': 0.5378758502483834} +12/28/2021 16:30:49 - INFO - codeparrot_training - Step 43471: {'lr': 2.088653470850832e-05, 'samples': 22257664, 'steps': 43471, 'batch_loss/train': 0.6701990666333586} +12/28/2021 16:31:00 - INFO - codeparrot_training - Step 43472: {'lr': 2.0880227697488902e-05, 'samples': 22258176, 'steps': 43472, 'batch_loss/train': 0.6252141566365026} +12/28/2021 16:31:12 - INFO - codeparrot_training - Step 43473: {'lr': 2.087392159736329e-05, 'samples': 22258688, 'steps': 43473, 'batch_loss/train': 0.8176855519413948} +12/28/2021 16:31:23 - INFO - codeparrot_training - Step 43474: {'lr': 2.0867616408156402e-05, 'samples': 22259200, 'steps': 43474, 'batch_loss/train': 0.690254651941359} +12/28/2021 16:31:33 - INFO - codeparrot_training - Step 43475: {'lr': 2.086131212989334e-05, 'samples': 22259712, 'steps': 43475, 'batch_loss/train': 0.7532916510244831} +12/28/2021 16:31:47 - INFO - codeparrot_training - Step 43476: {'lr': 2.0855008762599186e-05, 'samples': 22260224, 'steps': 43476, 'batch_loss/train': 0.6940743746235967} +12/28/2021 16:31:58 - INFO - codeparrot_training - Step 43477: {'lr': 2.084870630629901e-05, 'samples': 22260736, 'steps': 43477, 'batch_loss/train': 0.7103328735101968} +12/28/2021 16:32:09 - INFO - codeparrot_training - Step 43478: {'lr': 2.0842404761017842e-05, 'samples': 22261248, 'steps': 43478, 'batch_loss/train': 0.7681093802675605} +12/28/2021 16:32:19 - INFO - codeparrot_training - Step 43479: {'lr': 2.0836104126780774e-05, 'samples': 22261760, 'steps': 43479, 'batch_loss/train': 0.7672809828072786} +12/28/2021 16:32:31 - INFO - codeparrot_training - Step 43480: {'lr': 2.082980440361276e-05, 'samples': 22262272, 'steps': 43480, 'batch_loss/train': 0.6652844045311213} +12/28/2021 16:32:42 - INFO - codeparrot_training - Step 43481: {'lr': 2.082350559153895e-05, 'samples': 22262784, 'steps': 43481, 'batch_loss/train': 0.7315475312061608} +12/28/2021 16:32:53 - INFO - codeparrot_training - Step 43482: {'lr': 2.08172076905844e-05, 'samples': 22263296, 'steps': 43482, 'batch_loss/train': 0.6115643093362451} +12/28/2021 16:33:05 - INFO - codeparrot_training - Step 43483: {'lr': 2.0810910700773984e-05, 'samples': 22263808, 'steps': 43483, 'batch_loss/train': 0.7365649726707488} +12/28/2021 16:33:15 - INFO - codeparrot_training - Step 43484: {'lr': 2.0804614622132902e-05, 'samples': 22264320, 'steps': 43484, 'batch_loss/train': 0.732046214689035} +12/28/2021 16:33:26 - INFO - codeparrot_training - Step 43485: {'lr': 2.0798319454686187e-05, 'samples': 22264832, 'steps': 43485, 'batch_loss/train': 0.6600302630104125} +12/28/2021 16:33:40 - INFO - codeparrot_training - Step 43486: {'lr': 2.0792025198458768e-05, 'samples': 22265344, 'steps': 43486, 'batch_loss/train': 0.7018168987706304} +12/28/2021 16:33:50 - INFO - codeparrot_training - Step 43487: {'lr': 2.0785731853475654e-05, 'samples': 22265856, 'steps': 43487, 'batch_loss/train': 0.6687226218637079} +12/28/2021 16:34:01 - INFO - codeparrot_training - Step 43488: {'lr': 2.0779439419762013e-05, 'samples': 22266368, 'steps': 43488, 'batch_loss/train': 0.7574154356261715} +12/28/2021 16:34:12 - INFO - codeparrot_training - Step 43489: {'lr': 2.0773147897342748e-05, 'samples': 22266880, 'steps': 43489, 'batch_loss/train': 0.6736445464193821} +12/28/2021 16:34:24 - INFO - codeparrot_training - Step 43490: {'lr': 2.0766857286242863e-05, 'samples': 22267392, 'steps': 43490, 'batch_loss/train': 0.7484052325598896} +12/28/2021 16:34:34 - INFO - codeparrot_training - Step 43491: {'lr': 2.076056758648742e-05, 'samples': 22267904, 'steps': 43491, 'batch_loss/train': 0.6988121203612536} +12/28/2021 16:34:45 - INFO - codeparrot_training - Step 43492: {'lr': 2.0754278798101404e-05, 'samples': 22268416, 'steps': 43492, 'batch_loss/train': 0.6214893334545195} +12/28/2021 16:34:57 - INFO - codeparrot_training - Step 43493: {'lr': 2.0747990921109795e-05, 'samples': 22268928, 'steps': 43493, 'batch_loss/train': 0.6829169760458171} +12/28/2021 16:35:08 - INFO - codeparrot_training - Step 43494: {'lr': 2.074170395553762e-05, 'samples': 22269440, 'steps': 43494, 'batch_loss/train': 0.6481304063927382} +12/28/2021 16:35:18 - INFO - codeparrot_training - Step 43495: {'lr': 2.073541790140987e-05, 'samples': 22269952, 'steps': 43495, 'batch_loss/train': 0.7129271635785699} +12/28/2021 16:35:33 - INFO - codeparrot_training - Step 43496: {'lr': 2.0729132758751547e-05, 'samples': 22270464, 'steps': 43496, 'batch_loss/train': 0.6788507187739015} +12/28/2021 16:35:44 - INFO - codeparrot_training - Step 43497: {'lr': 2.0722848527587657e-05, 'samples': 22270976, 'steps': 43497, 'batch_loss/train': 0.7145811431109905} +12/28/2021 16:35:54 - INFO - codeparrot_training - Step 43498: {'lr': 2.0716565207943043e-05, 'samples': 22271488, 'steps': 43498, 'batch_loss/train': 0.7570495740510523} +12/28/2021 16:36:06 - INFO - codeparrot_training - Step 43499: {'lr': 2.0710282799842883e-05, 'samples': 22272000, 'steps': 43499, 'batch_loss/train': 0.7523221110459417} +12/28/2021 16:36:17 - INFO - codeparrot_training - Step 43500: {'lr': 2.0704001303312042e-05, 'samples': 22272512, 'steps': 43500, 'batch_loss/train': 0.740969218313694} +12/28/2021 16:36:28 - INFO - codeparrot_training - Step 43501: {'lr': 2.06977207183755e-05, 'samples': 22273024, 'steps': 43501, 'batch_loss/train': 0.6990628009662032} +12/28/2021 16:36:38 - INFO - codeparrot_training - Step 43502: {'lr': 2.069144104505824e-05, 'samples': 22273536, 'steps': 43502, 'batch_loss/train': 0.7932597873732448} +12/28/2021 16:36:52 - INFO - codeparrot_training - Step 43503: {'lr': 2.0685162283385212e-05, 'samples': 22274048, 'steps': 43503, 'batch_loss/train': 0.7358053200878203} +12/28/2021 16:37:03 - INFO - codeparrot_training - Step 43504: {'lr': 2.067888443338142e-05, 'samples': 22274560, 'steps': 43504, 'batch_loss/train': 0.751731077209115} +12/28/2021 16:37:13 - INFO - codeparrot_training - Step 43505: {'lr': 2.0672607495071827e-05, 'samples': 22275072, 'steps': 43505, 'batch_loss/train': 0.7234721016138792} +12/28/2021 16:37:26 - INFO - codeparrot_training - Step 43506: {'lr': 2.0666331468481266e-05, 'samples': 22275584, 'steps': 43506, 'batch_loss/train': 0.756729599321261} +12/28/2021 16:37:36 - INFO - codeparrot_training - Step 43507: {'lr': 2.0660056353634797e-05, 'samples': 22276096, 'steps': 43507, 'batch_loss/train': 0.732085342053324} +12/28/2021 16:37:47 - INFO - codeparrot_training - Step 43508: {'lr': 2.0653782150557436e-05, 'samples': 22276608, 'steps': 43508, 'batch_loss/train': 0.533580829272978} +12/28/2021 16:37:59 - INFO - codeparrot_training - Step 43509: {'lr': 2.064750885927394e-05, 'samples': 22277120, 'steps': 43509, 'batch_loss/train': 0.6073612325126305} +12/28/2021 16:38:10 - INFO - codeparrot_training - Step 43510: {'lr': 2.0641236479809365e-05, 'samples': 22277632, 'steps': 43510, 'batch_loss/train': 0.7703288120683283} +12/28/2021 16:38:20 - INFO - codeparrot_training - Step 43511: {'lr': 2.06349650121887e-05, 'samples': 22278144, 'steps': 43511, 'batch_loss/train': 0.7124374387785792} +12/28/2021 16:38:31 - INFO - codeparrot_training - Step 43512: {'lr': 2.0628694456436753e-05, 'samples': 22278656, 'steps': 43512, 'batch_loss/train': 0.6481544259004295} +12/28/2021 16:38:45 - INFO - codeparrot_training - Step 43513: {'lr': 2.0622424812578476e-05, 'samples': 22279168, 'steps': 43513, 'batch_loss/train': 0.6729236876126379} +12/28/2021 16:38:56 - INFO - codeparrot_training - Step 43514: {'lr': 2.061615608063891e-05, 'samples': 22279680, 'steps': 43514, 'batch_loss/train': 0.783031296916306} +12/28/2021 16:39:06 - INFO - codeparrot_training - Step 43515: {'lr': 2.060988826064286e-05, 'samples': 22280192, 'steps': 43515, 'batch_loss/train': 0.6973662149393931} +12/28/2021 16:39:18 - INFO - codeparrot_training - Step 43516: {'lr': 2.0603621352615287e-05, 'samples': 22280704, 'steps': 43516, 'batch_loss/train': 0.695143059361726} +12/28/2021 16:39:29 - INFO - codeparrot_training - Step 43517: {'lr': 2.059735535658108e-05, 'samples': 22281216, 'steps': 43517, 'batch_loss/train': 0.6450837687007152} +12/28/2021 16:39:40 - INFO - codeparrot_training - Step 43518: {'lr': 2.0591090272565173e-05, 'samples': 22281728, 'steps': 43518, 'batch_loss/train': 0.488869501510635} +12/28/2021 16:39:54 - INFO - codeparrot_training - Step 43519: {'lr': 2.058482610059248e-05, 'samples': 22282240, 'steps': 43519, 'batch_loss/train': 0.7458866871893406} +12/28/2021 16:40:04 - INFO - codeparrot_training - Step 43520: {'lr': 2.0578562840687877e-05, 'samples': 22282752, 'steps': 43520, 'batch_loss/train': 0.5247964083682746} +12/28/2021 16:40:15 - INFO - codeparrot_training - Step 43521: {'lr': 2.0572300492876284e-05, 'samples': 22283264, 'steps': 43521, 'batch_loss/train': 0.6638183556497097} +12/28/2021 16:40:25 - INFO - codeparrot_training - Step 43522: {'lr': 2.0566039057182628e-05, 'samples': 22283776, 'steps': 43522, 'batch_loss/train': 0.7227807859890163} +12/28/2021 16:40:38 - INFO - codeparrot_training - Step 43523: {'lr': 2.055977853363178e-05, 'samples': 22284288, 'steps': 43523, 'batch_loss/train': 0.68414660776034} +12/28/2021 16:40:48 - INFO - codeparrot_training - Step 43524: {'lr': 2.055351892224855e-05, 'samples': 22284800, 'steps': 43524, 'batch_loss/train': 0.7904562894254923} +12/28/2021 16:40:59 - INFO - codeparrot_training - Step 43525: {'lr': 2.0547260223057974e-05, 'samples': 22285312, 'steps': 43525, 'batch_loss/train': 0.7486152248457074} +12/28/2021 16:41:11 - INFO - codeparrot_training - Step 43526: {'lr': 2.0541002436084783e-05, 'samples': 22285824, 'steps': 43526, 'batch_loss/train': 0.7563935816287994} +12/28/2021 16:41:21 - INFO - codeparrot_training - Step 43527: {'lr': 2.0534745561353957e-05, 'samples': 22286336, 'steps': 43527, 'batch_loss/train': 0.7613241728395224} +12/28/2021 16:41:32 - INFO - codeparrot_training - Step 43528: {'lr': 2.0528489598890337e-05, 'samples': 22286848, 'steps': 43528, 'batch_loss/train': 0.7935368400067091} +12/28/2021 16:41:44 - INFO - codeparrot_training - Step 43529: {'lr': 2.0522234548718793e-05, 'samples': 22287360, 'steps': 43529, 'batch_loss/train': 0.7135955947451293} +12/28/2021 16:41:55 - INFO - codeparrot_training - Step 43530: {'lr': 2.0515980410864193e-05, 'samples': 22287872, 'steps': 43530, 'batch_loss/train': 0.7602428835816681} +12/28/2021 16:42:06 - INFO - codeparrot_training - Step 43531: {'lr': 2.0509727185351407e-05, 'samples': 22288384, 'steps': 43531, 'batch_loss/train': 0.6703630267875269} +12/28/2021 16:42:16 - INFO - codeparrot_training - Step 43532: {'lr': 2.0503474872205303e-05, 'samples': 22288896, 'steps': 43532, 'batch_loss/train': 0.645227910252288} +12/28/2021 16:42:30 - INFO - codeparrot_training - Step 43533: {'lr': 2.0497223471450722e-05, 'samples': 22289408, 'steps': 43533, 'batch_loss/train': 0.65820402931422} +12/28/2021 16:42:41 - INFO - codeparrot_training - Step 43534: {'lr': 2.049097298311256e-05, 'samples': 22289920, 'steps': 43534, 'batch_loss/train': 0.7273731585592031} +12/28/2021 16:42:51 - INFO - codeparrot_training - Step 43535: {'lr': 2.048472340721555e-05, 'samples': 22290432, 'steps': 43535, 'batch_loss/train': 0.7682917267084122} +12/28/2021 16:43:03 - INFO - codeparrot_training - Step 43536: {'lr': 2.047847474378467e-05, 'samples': 22290944, 'steps': 43536, 'batch_loss/train': 0.7456387523561716} +12/28/2021 16:43:14 - INFO - codeparrot_training - Step 43537: {'lr': 2.0472226992844734e-05, 'samples': 22291456, 'steps': 43537, 'batch_loss/train': 0.7190204812213778} +12/28/2021 16:43:25 - INFO - codeparrot_training - Step 43538: {'lr': 2.0465980154420526e-05, 'samples': 22291968, 'steps': 43538, 'batch_loss/train': 0.7503766758018173} +12/28/2021 16:43:37 - INFO - codeparrot_training - Step 43539: {'lr': 2.0459734228536865e-05, 'samples': 22292480, 'steps': 43539, 'batch_loss/train': 0.7649217755533755} +12/28/2021 16:43:48 - INFO - codeparrot_training - Step 43540: {'lr': 2.0453489215218722e-05, 'samples': 22292992, 'steps': 43540, 'batch_loss/train': 0.8870082765351981} +12/28/2021 16:43:58 - INFO - codeparrot_training - Step 43541: {'lr': 2.044724511449078e-05, 'samples': 22293504, 'steps': 43541, 'batch_loss/train': 0.7559799337759614} +12/28/2021 16:44:09 - INFO - codeparrot_training - Step 43542: {'lr': 2.0441001926377934e-05, 'samples': 22294016, 'steps': 43542, 'batch_loss/train': 1.2415314023382962} +12/28/2021 16:44:27 - INFO - codeparrot_training - Step 43543: {'lr': 2.0434759650904966e-05, 'samples': 22294528, 'steps': 43543, 'batch_loss/train': 1.1227372521534562} +12/28/2021 16:44:37 - INFO - codeparrot_training - Step 43544: {'lr': 2.042851828809672e-05, 'samples': 22295040, 'steps': 43544, 'batch_loss/train': 0.9046505205333233} +12/28/2021 16:44:48 - INFO - codeparrot_training - Step 43545: {'lr': 2.042227783797801e-05, 'samples': 22295552, 'steps': 43545, 'batch_loss/train': 0.6345432108792011} +12/28/2021 16:45:00 - INFO - codeparrot_training - Step 43546: {'lr': 2.0416038300573646e-05, 'samples': 22296064, 'steps': 43546, 'batch_loss/train': 0.738487146794796} +12/28/2021 16:45:11 - INFO - codeparrot_training - Step 43547: {'lr': 2.040979967590839e-05, 'samples': 22296576, 'steps': 43547, 'batch_loss/train': 0.6529700090177357} +12/28/2021 16:45:21 - INFO - codeparrot_training - Step 43548: {'lr': 2.0403561964007112e-05, 'samples': 22297088, 'steps': 43548, 'batch_loss/train': 0.6271840613335371} +12/28/2021 16:45:32 - INFO - codeparrot_training - Step 43549: {'lr': 2.039732516489462e-05, 'samples': 22297600, 'steps': 43549, 'batch_loss/train': 0.7584124002605677} +12/28/2021 16:45:44 - INFO - codeparrot_training - Step 43550: {'lr': 2.0391089278595566e-05, 'samples': 22298112, 'steps': 43550, 'batch_loss/train': 0.788964958046563} +12/28/2021 16:45:55 - INFO - codeparrot_training - Step 43551: {'lr': 2.0384854305134952e-05, 'samples': 22298624, 'steps': 43551, 'batch_loss/train': 0.760646250564605} +12/28/2021 16:46:05 - INFO - codeparrot_training - Step 43552: {'lr': 2.0378620244537405e-05, 'samples': 22299136, 'steps': 43552, 'batch_loss/train': 0.6893884773598984} +12/28/2021 16:46:19 - INFO - codeparrot_training - Step 43553: {'lr': 2.037238709682776e-05, 'samples': 22299648, 'steps': 43553, 'batch_loss/train': 0.7543999501504004} +12/28/2021 16:46:30 - INFO - codeparrot_training - Step 43554: {'lr': 2.0366154862030806e-05, 'samples': 22300160, 'steps': 43554, 'batch_loss/train': 0.7390691684558988} +12/28/2021 16:46:41 - INFO - codeparrot_training - Step 43555: {'lr': 2.03599235401713e-05, 'samples': 22300672, 'steps': 43555, 'batch_loss/train': 0.7770155845209956} +12/28/2021 16:46:53 - INFO - codeparrot_training - Step 43556: {'lr': 2.0353693131274026e-05, 'samples': 22301184, 'steps': 43556, 'batch_loss/train': 0.7135297181084752} +12/28/2021 16:47:03 - INFO - codeparrot_training - Step 43557: {'lr': 2.034746363536377e-05, 'samples': 22301696, 'steps': 43557, 'batch_loss/train': 0.6658573849126697} +12/28/2021 16:47:14 - INFO - codeparrot_training - Step 43558: {'lr': 2.0341235052465295e-05, 'samples': 22302208, 'steps': 43558, 'batch_loss/train': 0.7749383689370006} +12/28/2021 16:47:25 - INFO - codeparrot_training - Step 43559: {'lr': 2.0335007382603325e-05, 'samples': 22302720, 'steps': 43559, 'batch_loss/train': 0.7370326754171401} +12/28/2021 16:47:39 - INFO - codeparrot_training - Step 43560: {'lr': 2.0328780625802706e-05, 'samples': 22303232, 'steps': 43560, 'batch_loss/train': 0.7538184382719919} +12/28/2021 16:47:49 - INFO - codeparrot_training - Step 43561: {'lr': 2.0322554782088053e-05, 'samples': 22303744, 'steps': 43561, 'batch_loss/train': 0.8022567690350115} +12/28/2021 16:48:00 - INFO - codeparrot_training - Step 43562: {'lr': 2.0316329851484237e-05, 'samples': 22304256, 'steps': 43562, 'batch_loss/train': 0.6893492102390155} +12/28/2021 16:48:12 - INFO - codeparrot_training - Step 43563: {'lr': 2.0310105834016017e-05, 'samples': 22304768, 'steps': 43563, 'batch_loss/train': 0.7835565214045346} +12/28/2021 16:48:23 - INFO - codeparrot_training - Step 43564: {'lr': 2.030388272970804e-05, 'samples': 22305280, 'steps': 43564, 'batch_loss/train': 0.6576421755598858} +12/28/2021 16:48:33 - INFO - codeparrot_training - Step 43565: {'lr': 2.029766053858506e-05, 'samples': 22305792, 'steps': 43565, 'batch_loss/train': 0.7102504717186093} +12/28/2021 16:48:46 - INFO - codeparrot_training - Step 43566: {'lr': 2.0291439260671923e-05, 'samples': 22306304, 'steps': 43566, 'batch_loss/train': 0.656452901661396} +12/28/2021 16:48:56 - INFO - codeparrot_training - Step 43567: {'lr': 2.0285218895993274e-05, 'samples': 22306816, 'steps': 43567, 'batch_loss/train': 0.7980327729601413} +12/28/2021 16:49:07 - INFO - codeparrot_training - Step 43568: {'lr': 2.0278999444573788e-05, 'samples': 22307328, 'steps': 43568, 'batch_loss/train': 0.789233885705471} +12/28/2021 16:49:21 - INFO - codeparrot_training - Step 43569: {'lr': 2.0272780906438358e-05, 'samples': 22307840, 'steps': 43569, 'batch_loss/train': 0.7055135904811323} +12/28/2021 16:49:31 - INFO - codeparrot_training - Step 43570: {'lr': 2.0266563281611555e-05, 'samples': 22308352, 'steps': 43570, 'batch_loss/train': 0.7093531284481287} +12/28/2021 16:49:42 - INFO - codeparrot_training - Step 43571: {'lr': 2.026034657011816e-05, 'samples': 22308864, 'steps': 43571, 'batch_loss/train': 0.765248910523951} +12/28/2021 16:49:53 - INFO - codeparrot_training - Step 43572: {'lr': 2.0254130771982876e-05, 'samples': 22309376, 'steps': 43572, 'batch_loss/train': 0.8140660929493606} +12/28/2021 16:50:05 - INFO - codeparrot_training - Step 43573: {'lr': 2.0247915887230435e-05, 'samples': 22309888, 'steps': 43573, 'batch_loss/train': 0.7707800720818341} +12/28/2021 16:50:15 - INFO - codeparrot_training - Step 43574: {'lr': 2.0241701915885507e-05, 'samples': 22310400, 'steps': 43574, 'batch_loss/train': 0.6548121469095349} +12/28/2021 16:50:26 - INFO - codeparrot_training - Step 43575: {'lr': 2.0235488857972883e-05, 'samples': 22310912, 'steps': 43575, 'batch_loss/train': 0.6807925631292164} +12/28/2021 16:50:38 - INFO - codeparrot_training - Step 43576: {'lr': 2.0229276713517096e-05, 'samples': 22311424, 'steps': 43576, 'batch_loss/train': 0.798008575453423} +12/28/2021 16:50:49 - INFO - codeparrot_training - Step 43577: {'lr': 2.022306548254299e-05, 'samples': 22311936, 'steps': 43577, 'batch_loss/train': 0.706247553229332} +12/28/2021 16:51:00 - INFO - codeparrot_training - Step 43578: {'lr': 2.0216855165075266e-05, 'samples': 22312448, 'steps': 43578, 'batch_loss/train': 0.6920734015293419} +12/28/2021 16:51:10 - INFO - codeparrot_training - Step 43579: {'lr': 2.0210645761138513e-05, 'samples': 22312960, 'steps': 43579, 'batch_loss/train': 0.684062133077532} +12/28/2021 16:51:22 - INFO - codeparrot_training - Step 43580: {'lr': 2.0204437270757463e-05, 'samples': 22313472, 'steps': 43580, 'batch_loss/train': 0.7400285424664617} +12/28/2021 16:51:33 - INFO - codeparrot_training - Step 43581: {'lr': 2.019822969395682e-05, 'samples': 22313984, 'steps': 43581, 'batch_loss/train': 0.7179299406707287} +12/28/2021 16:51:43 - INFO - codeparrot_training - Step 43582: {'lr': 2.0192023030761257e-05, 'samples': 22314496, 'steps': 43582, 'batch_loss/train': 0.7263008593581617} +12/28/2021 16:51:58 - INFO - codeparrot_training - Step 43583: {'lr': 2.0185817281195422e-05, 'samples': 22315008, 'steps': 43583, 'batch_loss/train': 0.7495627552270889} +12/28/2021 16:52:08 - INFO - codeparrot_training - Step 43584: {'lr': 2.017961244528399e-05, 'samples': 22315520, 'steps': 43584, 'batch_loss/train': 0.7562326649203897} +12/28/2021 16:52:19 - INFO - codeparrot_training - Step 43585: {'lr': 2.0173408523051662e-05, 'samples': 22316032, 'steps': 43585, 'batch_loss/train': 0.7416676804423332} +12/28/2021 16:52:31 - INFO - codeparrot_training - Step 43586: {'lr': 2.0167205514523058e-05, 'samples': 22316544, 'steps': 43586, 'batch_loss/train': 0.784850955940783} +12/28/2021 16:52:42 - INFO - codeparrot_training - Step 43587: {'lr': 2.0161003419722878e-05, 'samples': 22317056, 'steps': 43587, 'batch_loss/train': 0.8247334046754986} +12/28/2021 16:52:52 - INFO - codeparrot_training - Step 43588: {'lr': 2.0154802238675773e-05, 'samples': 22317568, 'steps': 43588, 'batch_loss/train': 0.8747802204452455} +12/28/2021 16:53:07 - INFO - codeparrot_training - Step 43589: {'lr': 2.0148601971406415e-05, 'samples': 22318080, 'steps': 43589, 'batch_loss/train': 0.8079252743627876} +12/28/2021 16:53:17 - INFO - codeparrot_training - Step 43590: {'lr': 2.01424026179394e-05, 'samples': 22318592, 'steps': 43590, 'batch_loss/train': 0.9455791525542736} +12/28/2021 16:53:28 - INFO - codeparrot_training - Step 43591: {'lr': 2.0136204178299368e-05, 'samples': 22319104, 'steps': 43591, 'batch_loss/train': 0.7553369477391243} +12/28/2021 16:53:39 - INFO - codeparrot_training - Step 43592: {'lr': 2.013000665251105e-05, 'samples': 22319616, 'steps': 43592, 'batch_loss/train': 0.8360991696827114} +12/28/2021 16:53:51 - INFO - codeparrot_training - Step 43593: {'lr': 2.0123810040599017e-05, 'samples': 22320128, 'steps': 43593, 'batch_loss/train': 0.5895905555225909} +12/28/2021 16:54:02 - INFO - codeparrot_training - Step 43594: {'lr': 2.0117614342587853e-05, 'samples': 22320640, 'steps': 43594, 'batch_loss/train': 0.7808756409212947} +12/28/2021 16:54:12 - INFO - codeparrot_training - Step 43595: {'lr': 2.011141955850235e-05, 'samples': 22321152, 'steps': 43595, 'batch_loss/train': 0.7723095640540123} +12/28/2021 16:54:24 - INFO - codeparrot_training - Step 43596: {'lr': 2.010522568836698e-05, 'samples': 22321664, 'steps': 43596, 'batch_loss/train': 0.5061975589487702} +12/28/2021 16:54:35 - INFO - codeparrot_training - Step 43597: {'lr': 2.009903273220645e-05, 'samples': 22322176, 'steps': 43597, 'batch_loss/train': 0.6590358694083989} +12/28/2021 16:54:46 - INFO - codeparrot_training - Step 43598: {'lr': 2.0092840690045356e-05, 'samples': 22322688, 'steps': 43598, 'batch_loss/train': 0.8110168852144852} +12/28/2021 16:54:56 - INFO - codeparrot_training - Step 43599: {'lr': 2.008664956190831e-05, 'samples': 22323200, 'steps': 43599, 'batch_loss/train': 0.8496292214840651} +12/28/2021 16:55:10 - INFO - codeparrot_training - Step 43600: {'lr': 2.0080459347819934e-05, 'samples': 22323712, 'steps': 43600, 'batch_loss/train': 0.7417234062450007} +12/28/2021 16:55:21 - INFO - codeparrot_training - Step 43601: {'lr': 2.0074270047804876e-05, 'samples': 22324224, 'steps': 43601, 'batch_loss/train': 0.7175130671821535} +12/28/2021 16:55:31 - INFO - codeparrot_training - Step 43602: {'lr': 2.0068081661887615e-05, 'samples': 22324736, 'steps': 43602, 'batch_loss/train': 0.710300964768976} +12/28/2021 16:55:44 - INFO - codeparrot_training - Step 43603: {'lr': 2.0061894190092884e-05, 'samples': 22325248, 'steps': 43603, 'batch_loss/train': 0.7160043842159212} +12/28/2021 16:55:54 - INFO - codeparrot_training - Step 43604: {'lr': 2.0055707632445296e-05, 'samples': 22325760, 'steps': 43604, 'batch_loss/train': 0.5637571250554174} +12/28/2021 16:56:05 - INFO - codeparrot_training - Step 43605: {'lr': 2.004952198896931e-05, 'samples': 22326272, 'steps': 43605, 'batch_loss/train': 0.7935980167239904} +12/28/2021 16:56:16 - INFO - codeparrot_training - Step 43606: {'lr': 2.0043337259689683e-05, 'samples': 22326784, 'steps': 43606, 'batch_loss/train': 0.8888823490124196} +12/28/2021 16:56:30 - INFO - codeparrot_training - Step 43607: {'lr': 2.0037153444630864e-05, 'samples': 22327296, 'steps': 43607, 'batch_loss/train': 0.7317290501669049} +12/28/2021 16:56:40 - INFO - codeparrot_training - Step 43608: {'lr': 2.00309705438175e-05, 'samples': 22327808, 'steps': 43608, 'batch_loss/train': 1.3178314422257245} +12/28/2021 16:56:51 - INFO - codeparrot_training - Step 43609: {'lr': 2.002478855727416e-05, 'samples': 22328320, 'steps': 43609, 'batch_loss/train': 0.6720056044869125} +12/28/2021 16:57:03 - INFO - codeparrot_training - Step 43610: {'lr': 2.0018607485025432e-05, 'samples': 22328832, 'steps': 43610, 'batch_loss/train': 0.7341536111198366} +12/28/2021 16:57:14 - INFO - codeparrot_training - Step 43611: {'lr': 2.0012427327095876e-05, 'samples': 22329344, 'steps': 43611, 'batch_loss/train': 0.7478662063367665} +12/28/2021 16:57:24 - INFO - codeparrot_training - Step 43612: {'lr': 2.0006248083510065e-05, 'samples': 22329856, 'steps': 43612, 'batch_loss/train': 0.770715743303299} +12/28/2021 16:57:36 - INFO - codeparrot_training - Step 43613: {'lr': 2.000006975429258e-05, 'samples': 22330368, 'steps': 43613, 'batch_loss/train': 0.6940345084294677} +12/28/2021 16:57:47 - INFO - codeparrot_training - Step 43614: {'lr': 1.9993892339467966e-05, 'samples': 22330880, 'steps': 43614, 'batch_loss/train': 0.6538366654422134} +12/28/2021 16:57:58 - INFO - codeparrot_training - Step 43615: {'lr': 1.998771583906084e-05, 'samples': 22331392, 'steps': 43615, 'batch_loss/train': 0.7729939445853233} +12/28/2021 16:58:10 - INFO - codeparrot_training - Step 43616: {'lr': 1.9981540253095648e-05, 'samples': 22331904, 'steps': 43616, 'batch_loss/train': 0.7527305171824992} +12/28/2021 16:58:21 - INFO - codeparrot_training - Step 43617: {'lr': 1.997536558159696e-05, 'samples': 22332416, 'steps': 43617, 'batch_loss/train': 0.6878002276644111} +12/28/2021 16:58:31 - INFO - codeparrot_training - Step 43618: {'lr': 1.996919182458948e-05, 'samples': 22332928, 'steps': 43618, 'batch_loss/train': 0.7369631342589855} +12/28/2021 16:58:42 - INFO - codeparrot_training - Step 43619: {'lr': 1.996301898209757e-05, 'samples': 22333440, 'steps': 43619, 'batch_loss/train': 0.7336265242192894} +12/28/2021 16:58:56 - INFO - codeparrot_training - Step 43620: {'lr': 1.99568470541458e-05, 'samples': 22333952, 'steps': 43620, 'batch_loss/train': 0.7259883414953947} +12/28/2021 16:59:06 - INFO - codeparrot_training - Step 43621: {'lr': 1.9950676040758815e-05, 'samples': 22334464, 'steps': 43621, 'batch_loss/train': 0.6001000786200166} +12/28/2021 16:59:17 - INFO - codeparrot_training - Step 43622: {'lr': 1.994450594196104e-05, 'samples': 22334976, 'steps': 43622, 'batch_loss/train': 0.5470073008327745} +12/28/2021 16:59:29 - INFO - codeparrot_training - Step 43623: {'lr': 1.9938336757777064e-05, 'samples': 22335488, 'steps': 43623, 'batch_loss/train': 0.710282820102293} +12/28/2021 16:59:40 - INFO - codeparrot_training - Step 43624: {'lr': 1.993216848823137e-05, 'samples': 22336000, 'steps': 43624, 'batch_loss/train': 0.7176260149572045} +12/28/2021 16:59:50 - INFO - codeparrot_training - Step 43625: {'lr': 1.9926001133348527e-05, 'samples': 22336512, 'steps': 43625, 'batch_loss/train': 0.8337345793843269} +12/28/2021 17:00:03 - INFO - codeparrot_training - Step 43626: {'lr': 1.9919834693153032e-05, 'samples': 22337024, 'steps': 43626, 'batch_loss/train': 0.702709439327009} +12/28/2021 17:00:13 - INFO - codeparrot_training - Step 43627: {'lr': 1.991366916766943e-05, 'samples': 22337536, 'steps': 43627, 'batch_loss/train': 0.7072122159879655} +12/28/2021 17:00:24 - INFO - codeparrot_training - Step 43628: {'lr': 1.9907504556922113e-05, 'samples': 22338048, 'steps': 43628, 'batch_loss/train': 0.8213274488225579} +12/28/2021 17:00:34 - INFO - codeparrot_training - Step 43629: {'lr': 1.990134086093573e-05, 'samples': 22338560, 'steps': 43629, 'batch_loss/train': 0.7882369682192802} +12/28/2021 17:00:48 - INFO - codeparrot_training - Step 43630: {'lr': 1.989517807973476e-05, 'samples': 22339072, 'steps': 43630, 'batch_loss/train': 0.8632665267214179} +12/28/2021 17:00:59 - INFO - codeparrot_training - Step 43631: {'lr': 1.9889016213343604e-05, 'samples': 22339584, 'steps': 43631, 'batch_loss/train': 0.7211274281144142} +12/28/2021 17:01:10 - INFO - codeparrot_training - Step 43632: {'lr': 1.9882855261786875e-05, 'samples': 22340096, 'steps': 43632, 'batch_loss/train': 0.7146926070563495} +12/28/2021 17:01:22 - INFO - codeparrot_training - Step 43633: {'lr': 1.987669522508906e-05, 'samples': 22340608, 'steps': 43633, 'batch_loss/train': 0.8247180161997676} +12/28/2021 17:01:32 - INFO - codeparrot_training - Step 43634: {'lr': 1.9870536103274578e-05, 'samples': 22341120, 'steps': 43634, 'batch_loss/train': 0.7601466355845332} +12/28/2021 17:01:43 - INFO - codeparrot_training - Step 43635: {'lr': 1.9864377896367967e-05, 'samples': 22341632, 'steps': 43635, 'batch_loss/train': 0.8675495623610914} +12/28/2021 17:01:57 - INFO - codeparrot_training - Step 43636: {'lr': 1.9858220604393655e-05, 'samples': 22342144, 'steps': 43636, 'batch_loss/train': 0.7114720779936761} +12/28/2021 17:02:08 - INFO - codeparrot_training - Step 43637: {'lr': 1.9852064227376204e-05, 'samples': 22342656, 'steps': 43637, 'batch_loss/train': 0.6485785152763128} +12/28/2021 17:02:18 - INFO - codeparrot_training - Step 43638: {'lr': 1.984590876534001e-05, 'samples': 22343168, 'steps': 43638, 'batch_loss/train': 0.6752792655024678} +12/28/2021 17:02:31 - INFO - codeparrot_training - Step 43639: {'lr': 1.9839754218309607e-05, 'samples': 22343680, 'steps': 43639, 'batch_loss/train': 0.696419829968363} +12/28/2021 17:02:41 - INFO - codeparrot_training - Step 43640: {'lr': 1.9833600586309426e-05, 'samples': 22344192, 'steps': 43640, 'batch_loss/train': 0.78944908734411} +12/28/2021 17:02:52 - INFO - codeparrot_training - Step 43641: {'lr': 1.982744786936397e-05, 'samples': 22344704, 'steps': 43641, 'batch_loss/train': 0.7731192149221897} +12/28/2021 17:03:03 - INFO - codeparrot_training - Step 43642: {'lr': 1.9821296067497608e-05, 'samples': 22345216, 'steps': 43642, 'batch_loss/train': 0.6783303112024441} +12/28/2021 17:03:15 - INFO - codeparrot_training - Step 43643: {'lr': 1.981514518073488e-05, 'samples': 22345728, 'steps': 43643, 'batch_loss/train': 0.8206101241521537} +12/28/2021 17:03:25 - INFO - codeparrot_training - Step 43644: {'lr': 1.9808995209100287e-05, 'samples': 22346240, 'steps': 43644, 'batch_loss/train': 0.7534524686634541} +12/28/2021 17:03:36 - INFO - codeparrot_training - Step 43645: {'lr': 1.9802846152618155e-05, 'samples': 22346752, 'steps': 43645, 'batch_loss/train': 0.8140233960002661} +12/28/2021 17:03:50 - INFO - codeparrot_training - Step 43646: {'lr': 1.9796698011312953e-05, 'samples': 22347264, 'steps': 43646, 'batch_loss/train': 0.8093131561763585} +12/28/2021 17:04:01 - INFO - codeparrot_training - Step 43647: {'lr': 1.9790550785209222e-05, 'samples': 22347776, 'steps': 43647, 'batch_loss/train': 0.7317215540679172} +12/28/2021 17:04:11 - INFO - codeparrot_training - Step 43648: {'lr': 1.978440447433133e-05, 'samples': 22348288, 'steps': 43648, 'batch_loss/train': 0.6961149820126593} +12/28/2021 17:04:24 - INFO - codeparrot_training - Step 43649: {'lr': 1.9778259078703704e-05, 'samples': 22348800, 'steps': 43649, 'batch_loss/train': 0.7262553367763758} +12/28/2021 17:04:34 - INFO - codeparrot_training - Step 43650: {'lr': 1.9772114598350767e-05, 'samples': 22349312, 'steps': 43650, 'batch_loss/train': 0.5819332371465862} +12/28/2021 17:04:45 - INFO - codeparrot_training - Step 43651: {'lr': 1.9765971033296998e-05, 'samples': 22349824, 'steps': 43651, 'batch_loss/train': 0.7435176339931786} +12/28/2021 17:04:56 - INFO - codeparrot_training - Step 43652: {'lr': 1.9759828383566798e-05, 'samples': 22350336, 'steps': 43652, 'batch_loss/train': 0.8449091208167374} +12/28/2021 17:05:08 - INFO - codeparrot_training - Step 43653: {'lr': 1.9753686649184616e-05, 'samples': 22350848, 'steps': 43653, 'batch_loss/train': 0.8139241300523281} +12/28/2021 17:05:18 - INFO - codeparrot_training - Step 43654: {'lr': 1.974754583017477e-05, 'samples': 22351360, 'steps': 43654, 'batch_loss/train': 0.7382655355613679} +12/28/2021 17:05:29 - INFO - codeparrot_training - Step 43655: {'lr': 1.9741405926561763e-05, 'samples': 22351872, 'steps': 43655, 'batch_loss/train': 0.7147817779332399} +12/28/2021 17:05:41 - INFO - codeparrot_training - Step 43656: {'lr': 1.9735266938370022e-05, 'samples': 22352384, 'steps': 43656, 'batch_loss/train': 0.7185149099677801} +12/28/2021 17:05:52 - INFO - codeparrot_training - Step 43657: {'lr': 1.9729128865623864e-05, 'samples': 22352896, 'steps': 43657, 'batch_loss/train': 0.7863656487315893} +12/28/2021 17:06:02 - INFO - codeparrot_training - Step 43658: {'lr': 1.9722991708347764e-05, 'samples': 22353408, 'steps': 43658, 'batch_loss/train': 0.6922382889315486} +12/28/2021 17:06:16 - INFO - codeparrot_training - Step 43659: {'lr': 1.9716855466566125e-05, 'samples': 22353920, 'steps': 43659, 'batch_loss/train': 0.7609089598990977} +12/28/2021 17:06:27 - INFO - codeparrot_training - Step 43660: {'lr': 1.9710720140303312e-05, 'samples': 22354432, 'steps': 43660, 'batch_loss/train': 0.8110669618472457} +12/28/2021 17:06:38 - INFO - codeparrot_training - Step 43661: {'lr': 1.9704585729583698e-05, 'samples': 22354944, 'steps': 43661, 'batch_loss/train': 0.7658844627439976} +12/28/2021 17:06:50 - INFO - codeparrot_training - Step 43662: {'lr': 1.9698452234431703e-05, 'samples': 22355456, 'steps': 43662, 'batch_loss/train': 0.7364076776430011} +12/28/2021 17:07:00 - INFO - codeparrot_training - Step 43663: {'lr': 1.96923196548717e-05, 'samples': 22355968, 'steps': 43663, 'batch_loss/train': 0.7825276986695826} +12/28/2021 17:07:11 - INFO - codeparrot_training - Step 43664: {'lr': 1.9686187990928055e-05, 'samples': 22356480, 'steps': 43664, 'batch_loss/train': 0.6666361839743331} +12/28/2021 17:07:22 - INFO - codeparrot_training - Step 43665: {'lr': 1.96800572426252e-05, 'samples': 22356992, 'steps': 43665, 'batch_loss/train': 0.6381022813729942} +12/28/2021 17:07:36 - INFO - codeparrot_training - Step 43666: {'lr': 1.9673927409987442e-05, 'samples': 22357504, 'steps': 43666, 'batch_loss/train': 0.6729059062781744} +12/28/2021 17:07:46 - INFO - codeparrot_training - Step 43667: {'lr': 1.9667798493039206e-05, 'samples': 22358016, 'steps': 43667, 'batch_loss/train': 0.7483166952151805} +12/28/2021 17:07:57 - INFO - codeparrot_training - Step 43668: {'lr': 1.966167049180481e-05, 'samples': 22358528, 'steps': 43668, 'batch_loss/train': 0.4954616241448093} +12/28/2021 17:08:09 - INFO - codeparrot_training - Step 43669: {'lr': 1.965554340630865e-05, 'samples': 22359040, 'steps': 43669, 'batch_loss/train': 0.9834025725722313} +12/28/2021 17:08:20 - INFO - codeparrot_training - Step 43670: {'lr': 1.9649417236575125e-05, 'samples': 22359552, 'steps': 43670, 'batch_loss/train': 0.6462812600657344} +12/28/2021 17:08:30 - INFO - codeparrot_training - Step 43671: {'lr': 1.9643291982628486e-05, 'samples': 22360064, 'steps': 43671, 'batch_loss/train': 0.6405384065583348} +12/28/2021 17:08:42 - INFO - codeparrot_training - Step 43672: {'lr': 1.963716764449308e-05, 'samples': 22360576, 'steps': 43672, 'batch_loss/train': 0.7018599197035655} +12/28/2021 17:08:53 - INFO - codeparrot_training - Step 43673: {'lr': 1.9631044222193418e-05, 'samples': 22361088, 'steps': 43673, 'batch_loss/train': 0.7699119858443737} +12/28/2021 17:09:04 - INFO - codeparrot_training - Step 43674: {'lr': 1.9624921715753696e-05, 'samples': 22361600, 'steps': 43674, 'batch_loss/train': 0.8118460569530725} +12/28/2021 17:09:14 - INFO - codeparrot_training - Step 43675: {'lr': 1.9618800125198316e-05, 'samples': 22362112, 'steps': 43675, 'batch_loss/train': 0.8304718500003219} +12/28/2021 17:09:29 - INFO - codeparrot_training - Step 43676: {'lr': 1.961267945055159e-05, 'samples': 22362624, 'steps': 43676, 'batch_loss/train': 0.7843554893042892} +12/28/2021 17:09:39 - INFO - codeparrot_training - Step 43677: {'lr': 1.9606559691837833e-05, 'samples': 22363136, 'steps': 43677, 'batch_loss/train': 0.8353963531553745} +12/28/2021 17:09:50 - INFO - codeparrot_training - Step 43678: {'lr': 1.960044084908144e-05, 'samples': 22363648, 'steps': 43678, 'batch_loss/train': 0.7425217051059008} +12/28/2021 17:10:02 - INFO - codeparrot_training - Step 43679: {'lr': 1.9594322922306675e-05, 'samples': 22364160, 'steps': 43679, 'batch_loss/train': 0.8863969594240189} +12/28/2021 17:10:13 - INFO - codeparrot_training - Step 43680: {'lr': 1.958820591153787e-05, 'samples': 22364672, 'steps': 43680, 'batch_loss/train': 0.7511012847535312} +12/28/2021 17:10:23 - INFO - codeparrot_training - Step 43681: {'lr': 1.9582089816799404e-05, 'samples': 22365184, 'steps': 43681, 'batch_loss/train': 0.7434234152315184} +12/28/2021 17:10:35 - INFO - codeparrot_training - Step 43682: {'lr': 1.9575974638115556e-05, 'samples': 22365696, 'steps': 43682, 'batch_loss/train': 0.7351283053867519} +12/28/2021 17:10:46 - INFO - codeparrot_training - Step 43683: {'lr': 1.9569860375510533e-05, 'samples': 22366208, 'steps': 43683, 'batch_loss/train': 0.7588706811657175} +12/28/2021 17:10:57 - INFO - codeparrot_training - Step 43684: {'lr': 1.9563747029008817e-05, 'samples': 22366720, 'steps': 43684, 'batch_loss/train': 0.7111501349136233} +12/28/2021 17:11:07 - INFO - codeparrot_training - Step 43685: {'lr': 1.9557634598634634e-05, 'samples': 22367232, 'steps': 43685, 'batch_loss/train': 0.7548960465937853} +12/28/2021 17:11:19 - INFO - codeparrot_training - Step 43686: {'lr': 1.955152308441227e-05, 'samples': 22367744, 'steps': 43686, 'batch_loss/train': 0.8046970358118415} +12/28/2021 17:11:30 - INFO - codeparrot_training - Step 43687: {'lr': 1.9545412486366044e-05, 'samples': 22368256, 'steps': 43687, 'batch_loss/train': 0.7291516761761159} +12/28/2021 17:11:41 - INFO - codeparrot_training - Step 43688: {'lr': 1.9539302804520208e-05, 'samples': 22368768, 'steps': 43688, 'batch_loss/train': 0.6177168251015246} +12/28/2021 17:11:55 - INFO - codeparrot_training - Step 43689: {'lr': 1.953319403889911e-05, 'samples': 22369280, 'steps': 43689, 'batch_loss/train': 0.7763404008001089} +12/28/2021 17:12:05 - INFO - codeparrot_training - Step 43690: {'lr': 1.9527086189527003e-05, 'samples': 22369792, 'steps': 43690, 'batch_loss/train': 0.6915562856593169} +12/28/2021 17:12:16 - INFO - codeparrot_training - Step 43691: {'lr': 1.9520979256428177e-05, 'samples': 22370304, 'steps': 43691, 'batch_loss/train': 0.6395124425762333} +12/28/2021 17:12:28 - INFO - codeparrot_training - Step 43692: {'lr': 1.9514873239626916e-05, 'samples': 22370816, 'steps': 43692, 'batch_loss/train': 0.7262612117920071} +12/28/2021 17:12:39 - INFO - codeparrot_training - Step 43693: {'lr': 1.950876813914751e-05, 'samples': 22371328, 'steps': 43693, 'batch_loss/train': 0.7972382558509707} +12/28/2021 17:12:50 - INFO - codeparrot_training - Step 43694: {'lr': 1.9502663955014182e-05, 'samples': 22371840, 'steps': 43694, 'batch_loss/train': 0.7913088672794402} +12/28/2021 17:13:02 - INFO - codeparrot_training - Step 43695: {'lr': 1.9496560687251254e-05, 'samples': 22372352, 'steps': 43695, 'batch_loss/train': 0.6593247214332223} +12/28/2021 17:13:13 - INFO - codeparrot_training - Step 43696: {'lr': 1.9490458335882978e-05, 'samples': 22372864, 'steps': 43696, 'batch_loss/train': 0.7090826532803476} +12/28/2021 17:13:23 - INFO - codeparrot_training - Step 43697: {'lr': 1.9484356900933586e-05, 'samples': 22373376, 'steps': 43697, 'batch_loss/train': 0.7393272412009537} +12/28/2021 17:13:34 - INFO - codeparrot_training - Step 43698: {'lr': 1.947825638242731e-05, 'samples': 22373888, 'steps': 43698, 'batch_loss/train': 1.0942740151658654} +12/28/2021 17:13:48 - INFO - codeparrot_training - Step 43699: {'lr': 1.9472156780388523e-05, 'samples': 22374400, 'steps': 43699, 'batch_loss/train': 0.7903225107584149} +12/28/2021 17:13:59 - INFO - codeparrot_training - Step 43700: {'lr': 1.946605809484134e-05, 'samples': 22374912, 'steps': 43700, 'batch_loss/train': 0.7639431040734053} +12/28/2021 17:14:10 - INFO - codeparrot_training - Step 43701: {'lr': 1.9459960325810072e-05, 'samples': 22375424, 'steps': 43701, 'batch_loss/train': 0.8261668113991618} +12/28/2021 17:14:22 - INFO - codeparrot_training - Step 43702: {'lr': 1.9453863473318957e-05, 'samples': 22375936, 'steps': 43702, 'batch_loss/train': 0.7270322232507169} +12/28/2021 17:14:32 - INFO - codeparrot_training - Step 43703: {'lr': 1.9447767537392247e-05, 'samples': 22376448, 'steps': 43703, 'batch_loss/train': 0.7204211237840354} +12/28/2021 17:14:43 - INFO - codeparrot_training - Step 43704: {'lr': 1.944167251805412e-05, 'samples': 22376960, 'steps': 43704, 'batch_loss/train': 0.7066128174774349} +12/28/2021 17:14:57 - INFO - codeparrot_training - Step 43705: {'lr': 1.943557841532889e-05, 'samples': 22377472, 'steps': 43705, 'batch_loss/train': 0.858263332862407} +12/28/2021 17:15:08 - INFO - codeparrot_training - Step 43706: {'lr': 1.9429485229240705e-05, 'samples': 22377984, 'steps': 43706, 'batch_loss/train': 0.6830367259681225} +12/28/2021 17:15:18 - INFO - codeparrot_training - Step 43707: {'lr': 1.9423392959813852e-05, 'samples': 22378496, 'steps': 43707, 'batch_loss/train': 0.6748069394379854} +12/28/2021 17:15:29 - INFO - codeparrot_training - Step 43708: {'lr': 1.9417301607072556e-05, 'samples': 22379008, 'steps': 43708, 'batch_loss/train': 0.6331794610014185} +12/28/2021 17:15:41 - INFO - codeparrot_training - Step 43709: {'lr': 1.9411211171040944e-05, 'samples': 22379520, 'steps': 43709, 'batch_loss/train': 0.7373796869069338} +12/28/2021 17:15:51 - INFO - codeparrot_training - Step 43710: {'lr': 1.9405121651743297e-05, 'samples': 22380032, 'steps': 43710, 'batch_loss/train': 0.7496225470677018} +12/28/2021 17:16:02 - INFO - codeparrot_training - Step 43711: {'lr': 1.9399033049203875e-05, 'samples': 22380544, 'steps': 43711, 'batch_loss/train': 0.6944072081241757} +12/28/2021 17:16:14 - INFO - codeparrot_training - Step 43712: {'lr': 1.9392945363446796e-05, 'samples': 22381056, 'steps': 43712, 'batch_loss/train': 0.732629070058465} +12/28/2021 17:16:25 - INFO - codeparrot_training - Step 43713: {'lr': 1.9386858594496238e-05, 'samples': 22381568, 'steps': 43713, 'batch_loss/train': 0.7338931974954903} +12/28/2021 17:16:36 - INFO - codeparrot_training - Step 43714: {'lr': 1.938077274237654e-05, 'samples': 22382080, 'steps': 43714, 'batch_loss/train': 0.7767059868201613} +12/28/2021 17:16:49 - INFO - codeparrot_training - Step 43715: {'lr': 1.937468780711177e-05, 'samples': 22382592, 'steps': 43715, 'batch_loss/train': 0.6676922934129834} +12/28/2021 17:17:00 - INFO - codeparrot_training - Step 43716: {'lr': 1.936860378872615e-05, 'samples': 22383104, 'steps': 43716, 'batch_loss/train': 0.8467287635430694} +12/28/2021 17:17:11 - INFO - codeparrot_training - Step 43717: {'lr': 1.9362520687243896e-05, 'samples': 22383616, 'steps': 43717, 'batch_loss/train': 0.7135044410824776} +12/28/2021 17:17:21 - INFO - codeparrot_training - Step 43718: {'lr': 1.9356438502689145e-05, 'samples': 22384128, 'steps': 43718, 'batch_loss/train': 0.7804344650357962} +12/28/2021 17:17:34 - INFO - codeparrot_training - Step 43719: {'lr': 1.935035723508613e-05, 'samples': 22384640, 'steps': 43719, 'batch_loss/train': 0.7072119298391044} +12/28/2021 17:17:44 - INFO - codeparrot_training - Step 43720: {'lr': 1.9344276884459e-05, 'samples': 22385152, 'steps': 43720, 'batch_loss/train': 0.8850626070052385} +12/28/2021 17:17:55 - INFO - codeparrot_training - Step 43721: {'lr': 1.933819745083193e-05, 'samples': 22385664, 'steps': 43721, 'batch_loss/train': 0.81058051250875} +12/28/2021 17:18:08 - INFO - codeparrot_training - Step 43722: {'lr': 1.933211893422912e-05, 'samples': 22386176, 'steps': 43722, 'batch_loss/train': 0.6635229110252112} +12/28/2021 17:18:19 - INFO - codeparrot_training - Step 43723: {'lr': 1.932604133467472e-05, 'samples': 22386688, 'steps': 43723, 'batch_loss/train': 0.7080485709011555} +12/28/2021 17:18:30 - INFO - codeparrot_training - Step 43724: {'lr': 1.9319964652192794e-05, 'samples': 22387200, 'steps': 43724, 'batch_loss/train': 0.7367723835632205} +12/28/2021 17:18:42 - INFO - codeparrot_training - Step 43725: {'lr': 1.9313888886807685e-05, 'samples': 22387712, 'steps': 43725, 'batch_loss/train': 0.7866583582945168} +12/28/2021 17:18:52 - INFO - codeparrot_training - Step 43726: {'lr': 1.9307814038543397e-05, 'samples': 22388224, 'steps': 43726, 'batch_loss/train': 0.6145215854048729} +12/28/2021 17:19:03 - INFO - codeparrot_training - Step 43727: {'lr': 1.930174010742414e-05, 'samples': 22388736, 'steps': 43727, 'batch_loss/train': 0.7112744320183992} +12/28/2021 17:19:15 - INFO - codeparrot_training - Step 43728: {'lr': 1.929566709347408e-05, 'samples': 22389248, 'steps': 43728, 'batch_loss/train': 0.7263216702267528} +12/28/2021 17:19:26 - INFO - codeparrot_training - Step 43729: {'lr': 1.9289594996717285e-05, 'samples': 22389760, 'steps': 43729, 'batch_loss/train': 0.8754741149023175} +12/28/2021 17:19:37 - INFO - codeparrot_training - Step 43730: {'lr': 1.928352381717799e-05, 'samples': 22390272, 'steps': 43730, 'batch_loss/train': 0.8217881256714463} +12/28/2021 17:19:47 - INFO - codeparrot_training - Step 43731: {'lr': 1.927745355488028e-05, 'samples': 22390784, 'steps': 43731, 'batch_loss/train': 0.5421728155342862} +12/28/2021 17:19:59 - INFO - codeparrot_training - Step 43732: {'lr': 1.9271384209848282e-05, 'samples': 22391296, 'steps': 43732, 'batch_loss/train': 0.7712922594510019} +12/28/2021 17:20:10 - INFO - codeparrot_training - Step 43733: {'lr': 1.9265315782106162e-05, 'samples': 22391808, 'steps': 43733, 'batch_loss/train': 0.8220433220267296} +12/28/2021 17:20:21 - INFO - codeparrot_training - Step 43734: {'lr': 1.9259248271678047e-05, 'samples': 22392320, 'steps': 43734, 'batch_loss/train': 0.7114706572610885} +12/28/2021 17:20:35 - INFO - codeparrot_training - Step 43735: {'lr': 1.9253181678587973e-05, 'samples': 22392832, 'steps': 43735, 'batch_loss/train': 0.7192156473174691} +12/28/2021 17:20:45 - INFO - codeparrot_training - Step 43736: {'lr': 1.9247116002860137e-05, 'samples': 22393344, 'steps': 43736, 'batch_loss/train': 0.704274034127593} +12/28/2021 17:20:56 - INFO - codeparrot_training - Step 43737: {'lr': 1.924105124451872e-05, 'samples': 22393856, 'steps': 43737, 'batch_loss/train': 0.7268098705681041} +12/28/2021 17:21:08 - INFO - codeparrot_training - Step 43738: {'lr': 1.923498740358767e-05, 'samples': 22394368, 'steps': 43738, 'batch_loss/train': 0.735871035605669} +12/28/2021 17:21:19 - INFO - codeparrot_training - Step 43739: {'lr': 1.9228924480091143e-05, 'samples': 22394880, 'steps': 43739, 'batch_loss/train': 0.7467967392876744} +12/28/2021 17:21:29 - INFO - codeparrot_training - Step 43740: {'lr': 1.9222862474053388e-05, 'samples': 22395392, 'steps': 43740, 'batch_loss/train': 0.774944256991148} +12/28/2021 17:21:40 - INFO - codeparrot_training - Step 43741: {'lr': 1.9216801385498333e-05, 'samples': 22395904, 'steps': 43741, 'batch_loss/train': 0.7540203025564551} +12/28/2021 17:21:52 - INFO - codeparrot_training - Step 43742: {'lr': 1.921074121445013e-05, 'samples': 22396416, 'steps': 43742, 'batch_loss/train': 0.6998963016085327} +12/28/2021 17:22:03 - INFO - codeparrot_training - Step 43743: {'lr': 1.920468196093289e-05, 'samples': 22396928, 'steps': 43743, 'batch_loss/train': 0.6222020536661148} +12/28/2021 17:22:13 - INFO - codeparrot_training - Step 43744: {'lr': 1.9198623624970686e-05, 'samples': 22397440, 'steps': 43744, 'batch_loss/train': 0.8177598880138248} +12/28/2021 17:22:27 - INFO - codeparrot_training - Step 43745: {'lr': 1.9192566206587575e-05, 'samples': 22397952, 'steps': 43745, 'batch_loss/train': 0.8061314187943935} +12/28/2021 17:22:38 - INFO - codeparrot_training - Step 43746: {'lr': 1.9186509705807708e-05, 'samples': 22398464, 'steps': 43746, 'batch_loss/train': 0.6343656685203314} +12/28/2021 17:22:49 - INFO - codeparrot_training - Step 43747: {'lr': 1.9180454122655122e-05, 'samples': 22398976, 'steps': 43747, 'batch_loss/train': 0.6382032819092274} +12/28/2021 17:23:01 - INFO - codeparrot_training - Step 43748: {'lr': 1.9174399457153878e-05, 'samples': 22399488, 'steps': 43748, 'batch_loss/train': 0.6601815624162555} +12/28/2021 17:23:12 - INFO - codeparrot_training - Step 43749: {'lr': 1.916834570932813e-05, 'samples': 22400000, 'steps': 43749, 'batch_loss/train': 0.742668210528791} +12/28/2021 17:23:22 - INFO - codeparrot_training - Step 43750: {'lr': 1.9162292879201794e-05, 'samples': 22400512, 'steps': 43750, 'batch_loss/train': 0.7478388822637498} +12/28/2021 17:23:33 - INFO - codeparrot_training - Step 43751: {'lr': 1.9156240966799104e-05, 'samples': 22401024, 'steps': 43751, 'batch_loss/train': 0.6435650233179331} +12/28/2021 17:23:47 - INFO - codeparrot_training - Step 43752: {'lr': 1.9150189972143988e-05, 'samples': 22401536, 'steps': 43752, 'batch_loss/train': 0.7755263643339276} +12/28/2021 17:23:57 - INFO - codeparrot_training - Step 43753: {'lr': 1.9144139895260508e-05, 'samples': 22402048, 'steps': 43753, 'batch_loss/train': 0.7119042177218944} +12/28/2021 17:24:08 - INFO - codeparrot_training - Step 43754: {'lr': 1.9138090736172843e-05, 'samples': 22402560, 'steps': 43754, 'batch_loss/train': 0.8741376837715507} +12/28/2021 17:24:20 - INFO - codeparrot_training - Step 43755: {'lr': 1.9132042494904913e-05, 'samples': 22403072, 'steps': 43755, 'batch_loss/train': 0.6968424320220947} +12/28/2021 17:24:31 - INFO - codeparrot_training - Step 43756: {'lr': 1.9125995171480837e-05, 'samples': 22403584, 'steps': 43756, 'batch_loss/train': 0.8173075262457132} +12/28/2021 17:24:41 - INFO - codeparrot_training - Step 43757: {'lr': 1.91199487659246e-05, 'samples': 22404096, 'steps': 43757, 'batch_loss/train': 0.7844464527443051} +12/28/2021 17:24:54 - INFO - codeparrot_training - Step 43758: {'lr': 1.9113903278260293e-05, 'samples': 22404608, 'steps': 43758, 'batch_loss/train': 0.7705523017793894} +12/28/2021 17:25:05 - INFO - codeparrot_training - Step 43759: {'lr': 1.9107858708511895e-05, 'samples': 22405120, 'steps': 43759, 'batch_loss/train': 1.2024803087115288} +12/28/2021 17:25:16 - INFO - codeparrot_training - Step 43760: {'lr': 1.9101815056703552e-05, 'samples': 22405632, 'steps': 43760, 'batch_loss/train': 0.628347294870764} +12/28/2021 17:25:26 - INFO - codeparrot_training - Step 43761: {'lr': 1.9095772322859084e-05, 'samples': 22406144, 'steps': 43761, 'batch_loss/train': 0.7874798304401338} +12/28/2021 17:25:40 - INFO - codeparrot_training - Step 43762: {'lr': 1.9089730507002717e-05, 'samples': 22406656, 'steps': 43762, 'batch_loss/train': 0.6673025946365669} +12/28/2021 17:25:51 - INFO - codeparrot_training - Step 43763: {'lr': 1.9083689609158406e-05, 'samples': 22407168, 'steps': 43763, 'batch_loss/train': 0.7707730988040566} +12/28/2021 17:26:01 - INFO - codeparrot_training - Step 43764: {'lr': 1.907764962935013e-05, 'samples': 22407680, 'steps': 43764, 'batch_loss/train': 0.7007548045367002} +12/28/2021 17:26:14 - INFO - codeparrot_training - Step 43765: {'lr': 1.907161056760187e-05, 'samples': 22408192, 'steps': 43765, 'batch_loss/train': 0.5716311521828175} +12/28/2021 17:26:24 - INFO - codeparrot_training - Step 43766: {'lr': 1.9065572423937777e-05, 'samples': 22408704, 'steps': 43766, 'batch_loss/train': 0.8242629058659077} +12/28/2021 17:26:35 - INFO - codeparrot_training - Step 43767: {'lr': 1.9059535198381745e-05, 'samples': 22409216, 'steps': 43767, 'batch_loss/train': 0.6960998745635152} +12/28/2021 17:26:46 - INFO - codeparrot_training - Step 43768: {'lr': 1.905349889095781e-05, 'samples': 22409728, 'steps': 43768, 'batch_loss/train': 0.5934498896822333} +12/28/2021 17:26:58 - INFO - codeparrot_training - Step 43769: {'lr': 1.9047463501689926e-05, 'samples': 22410240, 'steps': 43769, 'batch_loss/train': 0.7123095728456974} +12/28/2021 17:27:08 - INFO - codeparrot_training - Step 43770: {'lr': 1.9041429030602158e-05, 'samples': 22410752, 'steps': 43770, 'batch_loss/train': 0.7618934931233525} +12/28/2021 17:27:19 - INFO - codeparrot_training - Step 43771: {'lr': 1.903539547771843e-05, 'samples': 22411264, 'steps': 43771, 'batch_loss/train': 0.7183187864720821} +12/28/2021 17:27:31 - INFO - codeparrot_training - Step 43772: {'lr': 1.9029362843062782e-05, 'samples': 22411776, 'steps': 43772, 'batch_loss/train': 0.766173031181097} +12/28/2021 17:27:42 - INFO - codeparrot_training - Step 43773: {'lr': 1.902333112665916e-05, 'samples': 22412288, 'steps': 43773, 'batch_loss/train': 0.8095325054600835} +12/28/2021 17:27:52 - INFO - codeparrot_training - Step 43774: {'lr': 1.901730032853158e-05, 'samples': 22412800, 'steps': 43774, 'batch_loss/train': 0.6731307714653667} +12/28/2021 17:28:07 - INFO - codeparrot_training - Step 43775: {'lr': 1.9011270448704015e-05, 'samples': 22413312, 'steps': 43775, 'batch_loss/train': 0.6972935581579804} +12/28/2021 17:28:17 - INFO - codeparrot_training - Step 43776: {'lr': 1.900524148720037e-05, 'samples': 22413824, 'steps': 43776, 'batch_loss/train': 0.7214920681435615} +12/28/2021 17:28:28 - INFO - codeparrot_training - Step 43777: {'lr': 1.8999213444044705e-05, 'samples': 22414336, 'steps': 43777, 'batch_loss/train': 0.746470239944756} +12/28/2021 17:28:40 - INFO - codeparrot_training - Step 43778: {'lr': 1.8993186319260946e-05, 'samples': 22414848, 'steps': 43778, 'batch_loss/train': 0.7929914202541113} +12/28/2021 17:28:51 - INFO - codeparrot_training - Step 43779: {'lr': 1.8987160112872992e-05, 'samples': 22415360, 'steps': 43779, 'batch_loss/train': 0.7315123528242111} +12/28/2021 17:29:01 - INFO - codeparrot_training - Step 43780: {'lr': 1.8981134824904933e-05, 'samples': 22415872, 'steps': 43780, 'batch_loss/train': 0.7386629628017545} +12/28/2021 17:29:12 - INFO - codeparrot_training - Step 43781: {'lr': 1.8975110455380638e-05, 'samples': 22416384, 'steps': 43781, 'batch_loss/train': 0.7810899242758751} +12/28/2021 17:29:26 - INFO - codeparrot_training - Step 43782: {'lr': 1.8969087004324065e-05, 'samples': 22416896, 'steps': 43782, 'batch_loss/train': 0.8669226607307792} +12/28/2021 17:29:37 - INFO - codeparrot_training - Step 43783: {'lr': 1.896306447175916e-05, 'samples': 22417408, 'steps': 43783, 'batch_loss/train': 0.7408436150290072} +12/28/2021 17:29:47 - INFO - codeparrot_training - Step 43784: {'lr': 1.8957042857709855e-05, 'samples': 22417920, 'steps': 43784, 'batch_loss/train': 0.6888241074047983} +12/28/2021 17:29:59 - INFO - codeparrot_training - Step 43785: {'lr': 1.8951022162200154e-05, 'samples': 22418432, 'steps': 43785, 'batch_loss/train': 0.7154637947678566} +12/28/2021 17:30:10 - INFO - codeparrot_training - Step 43786: {'lr': 1.8945002385253957e-05, 'samples': 22418944, 'steps': 43786, 'batch_loss/train': 0.5718978389631957} +12/28/2021 17:30:21 - INFO - codeparrot_training - Step 43787: {'lr': 1.8938983526895103e-05, 'samples': 22419456, 'steps': 43787, 'batch_loss/train': 0.7757075749104843} +12/28/2021 17:30:33 - INFO - codeparrot_training - Step 43788: {'lr': 1.893296558714766e-05, 'samples': 22419968, 'steps': 43788, 'batch_loss/train': 0.5659619495272636} +12/28/2021 17:30:43 - INFO - codeparrot_training - Step 43789: {'lr': 1.8926948566035552e-05, 'samples': 22420480, 'steps': 43789, 'batch_loss/train': 0.6714893179014325} +12/28/2021 17:30:54 - INFO - codeparrot_training - Step 43790: {'lr': 1.8920932463582536e-05, 'samples': 22420992, 'steps': 43790, 'batch_loss/train': 0.706766554620117} +12/28/2021 17:31:05 - INFO - codeparrot_training - Step 43791: {'lr': 1.8914917279812703e-05, 'samples': 22421504, 'steps': 43791, 'batch_loss/train': 0.8232260039076209} +12/28/2021 17:31:19 - INFO - codeparrot_training - Step 43792: {'lr': 1.890890301474993e-05, 'samples': 22422016, 'steps': 43792, 'batch_loss/train': 0.7311509647406638} +12/28/2021 17:31:29 - INFO - codeparrot_training - Step 43793: {'lr': 1.8902889668418078e-05, 'samples': 22422528, 'steps': 43793, 'batch_loss/train': 0.7849465254694223} +12/28/2021 17:31:40 - INFO - codeparrot_training - Step 43794: {'lr': 1.889687724084102e-05, 'samples': 22423040, 'steps': 43794, 'batch_loss/train': 0.6781511658336967} +12/28/2021 17:31:52 - INFO - codeparrot_training - Step 43795: {'lr': 1.8890865732042824e-05, 'samples': 22423552, 'steps': 43795, 'batch_loss/train': 0.6685969200916588} +12/28/2021 17:32:03 - INFO - codeparrot_training - Step 43796: {'lr': 1.8884855142047215e-05, 'samples': 22424064, 'steps': 43796, 'batch_loss/train': 0.7120685032568872} +12/28/2021 17:32:13 - INFO - codeparrot_training - Step 43797: {'lr': 1.8878845470878175e-05, 'samples': 22424576, 'steps': 43797, 'batch_loss/train': 0.8332055490463972} +12/28/2021 17:32:25 - INFO - codeparrot_training - Step 43798: {'lr': 1.887283671855955e-05, 'samples': 22425088, 'steps': 43798, 'batch_loss/train': 0.6764198071323335} +12/28/2021 17:32:36 - INFO - codeparrot_training - Step 43799: {'lr': 1.8866828885115288e-05, 'samples': 22425600, 'steps': 43799, 'batch_loss/train': 0.7136989813297987} +12/28/2021 17:32:47 - INFO - codeparrot_training - Step 43800: {'lr': 1.8860821970569208e-05, 'samples': 22426112, 'steps': 43800, 'batch_loss/train': 0.6868313438026235} +12/28/2021 17:33:01 - INFO - codeparrot_training - Step 43801: {'lr': 1.885481597494529e-05, 'samples': 22426624, 'steps': 43801, 'batch_loss/train': 0.7362777683883905} +12/28/2021 17:33:11 - INFO - codeparrot_training - Step 43802: {'lr': 1.884881089826726e-05, 'samples': 22427136, 'steps': 43802, 'batch_loss/train': 0.721529901959002} +12/28/2021 17:33:22 - INFO - codeparrot_training - Step 43803: {'lr': 1.8842806740559132e-05, 'samples': 22427648, 'steps': 43803, 'batch_loss/train': 0.7540623038075864} +12/28/2021 17:33:33 - INFO - codeparrot_training - Step 43804: {'lr': 1.8836803501844745e-05, 'samples': 22428160, 'steps': 43804, 'batch_loss/train': 0.8614094201475382} +12/28/2021 17:33:45 - INFO - codeparrot_training - Step 43805: {'lr': 1.883080118214786e-05, 'samples': 22428672, 'steps': 43805, 'batch_loss/train': 0.8555084522813559} +12/28/2021 17:33:55 - INFO - codeparrot_training - Step 43806: {'lr': 1.8824799781492513e-05, 'samples': 22429184, 'steps': 43806, 'batch_loss/train': 0.5995352268218994} +12/28/2021 17:34:06 - INFO - codeparrot_training - Step 43807: {'lr': 1.8818799299902434e-05, 'samples': 22429696, 'steps': 43807, 'batch_loss/train': 0.8265802939422429} +12/28/2021 17:34:18 - INFO - codeparrot_training - Step 43808: {'lr': 1.881279973740152e-05, 'samples': 22430208, 'steps': 43808, 'batch_loss/train': 0.742986022029072} +12/28/2021 17:34:29 - INFO - codeparrot_training - Step 43809: {'lr': 1.8806801094013644e-05, 'samples': 22430720, 'steps': 43809, 'batch_loss/train': 0.48068901570513844} +12/28/2021 17:34:39 - INFO - codeparrot_training - Step 43810: {'lr': 1.8800803369762587e-05, 'samples': 22431232, 'steps': 43810, 'batch_loss/train': 0.676874364289688} +12/28/2021 17:34:52 - INFO - codeparrot_training - Step 43811: {'lr': 1.8794806564672252e-05, 'samples': 22431744, 'steps': 43811, 'batch_loss/train': 0.7241416499018669} +12/28/2021 17:35:02 - INFO - codeparrot_training - Step 43812: {'lr': 1.878881067876653e-05, 'samples': 22432256, 'steps': 43812, 'batch_loss/train': 0.7440331694670022} +12/28/2021 17:35:13 - INFO - codeparrot_training - Step 43813: {'lr': 1.878281571206908e-05, 'samples': 22432768, 'steps': 43813, 'batch_loss/train': 0.7609851198503748} +12/28/2021 17:35:27 - INFO - codeparrot_training - Step 43814: {'lr': 1.8776821664603928e-05, 'samples': 22433280, 'steps': 43814, 'batch_loss/train': 0.682246645912528} +12/28/2021 17:35:38 - INFO - codeparrot_training - Step 43815: {'lr': 1.8770828536394835e-05, 'samples': 22433792, 'steps': 43815, 'batch_loss/train': 0.5948410737328231} +12/28/2021 17:35:48 - INFO - codeparrot_training - Step 43816: {'lr': 1.8764836327465562e-05, 'samples': 22434304, 'steps': 43816, 'batch_loss/train': 0.7419239450246096} +12/28/2021 17:35:59 - INFO - codeparrot_training - Step 43817: {'lr': 1.8758845037840005e-05, 'samples': 22434816, 'steps': 43817, 'batch_loss/train': 0.7369029903784394} +12/28/2021 17:36:11 - INFO - codeparrot_training - Step 43818: {'lr': 1.8752854667542036e-05, 'samples': 22435328, 'steps': 43818, 'batch_loss/train': 0.8417036049067974} +12/28/2021 17:36:22 - INFO - codeparrot_training - Step 43819: {'lr': 1.8746865216595326e-05, 'samples': 22435840, 'steps': 43819, 'batch_loss/train': 0.9964025979861617} +12/28/2021 17:36:32 - INFO - codeparrot_training - Step 43820: {'lr': 1.874087668502375e-05, 'samples': 22436352, 'steps': 43820, 'batch_loss/train': 0.7567547773942351} +12/28/2021 17:36:46 - INFO - codeparrot_training - Step 43821: {'lr': 1.8734889072851202e-05, 'samples': 22436864, 'steps': 43821, 'batch_loss/train': 0.7012965672183782} +12/28/2021 17:36:57 - INFO - codeparrot_training - Step 43822: {'lr': 1.8728902380101358e-05, 'samples': 22437376, 'steps': 43822, 'batch_loss/train': 0.7053936589509249} +12/28/2021 17:37:08 - INFO - codeparrot_training - Step 43823: {'lr': 1.872291660679809e-05, 'samples': 22437888, 'steps': 43823, 'batch_loss/train': 0.9040170991793275} +12/28/2021 17:37:20 - INFO - codeparrot_training - Step 43824: {'lr': 1.8716931752965183e-05, 'samples': 22438400, 'steps': 43824, 'batch_loss/train': 0.746601689606905} +12/28/2021 17:37:31 - INFO - codeparrot_training - Step 43825: {'lr': 1.8710947818626396e-05, 'samples': 22438912, 'steps': 43825, 'batch_loss/train': 0.8146760929375887} +12/28/2021 17:37:41 - INFO - codeparrot_training - Step 43826: {'lr': 1.870496480380557e-05, 'samples': 22439424, 'steps': 43826, 'batch_loss/train': 0.7012322078226134} +12/28/2021 17:37:52 - INFO - codeparrot_training - Step 43827: {'lr': 1.869898270852649e-05, 'samples': 22439936, 'steps': 43827, 'batch_loss/train': 0.614471772685647} +12/28/2021 17:38:04 - INFO - codeparrot_training - Step 43828: {'lr': 1.8693001532812894e-05, 'samples': 22440448, 'steps': 43828, 'batch_loss/train': 0.6500683519989252} +12/28/2021 17:38:15 - INFO - codeparrot_training - Step 43829: {'lr': 1.8687021276688587e-05, 'samples': 22440960, 'steps': 43829, 'batch_loss/train': 0.755784903652966} +12/28/2021 17:38:25 - INFO - codeparrot_training - Step 43830: {'lr': 1.868104194017739e-05, 'samples': 22441472, 'steps': 43830, 'batch_loss/train': 0.8110726804006845} +12/28/2021 17:38:39 - INFO - codeparrot_training - Step 43831: {'lr': 1.8675063523302947e-05, 'samples': 22441984, 'steps': 43831, 'batch_loss/train': 0.7302573919296265} +12/28/2021 17:38:50 - INFO - codeparrot_training - Step 43832: {'lr': 1.866908602608919e-05, 'samples': 22442496, 'steps': 43832, 'batch_loss/train': 0.8060567118227482} +12/28/2021 17:39:00 - INFO - codeparrot_training - Step 43833: {'lr': 1.8663109448559757e-05, 'samples': 22443008, 'steps': 43833, 'batch_loss/train': 0.7778100473806262} +12/28/2021 17:39:12 - INFO - codeparrot_training - Step 43834: {'lr': 1.865713379073844e-05, 'samples': 22443520, 'steps': 43834, 'batch_loss/train': 0.7322870097123086} +12/28/2021 17:39:23 - INFO - codeparrot_training - Step 43835: {'lr': 1.8651159052648996e-05, 'samples': 22444032, 'steps': 43835, 'batch_loss/train': 0.7876257095485926} +12/28/2021 17:39:34 - INFO - codeparrot_training - Step 43836: {'lr': 1.8645185234315213e-05, 'samples': 22444544, 'steps': 43836, 'batch_loss/train': 0.6685505895875394} +12/28/2021 17:39:44 - INFO - codeparrot_training - Step 43837: {'lr': 1.8639212335760823e-05, 'samples': 22445056, 'steps': 43837, 'batch_loss/train': 0.6978654148988426} +12/28/2021 17:39:58 - INFO - codeparrot_training - Step 43838: {'lr': 1.8633240357009557e-05, 'samples': 22445568, 'steps': 43838, 'batch_loss/train': 0.6038864581205416} +12/28/2021 17:40:09 - INFO - codeparrot_training - Step 43839: {'lr': 1.862726929808517e-05, 'samples': 22446080, 'steps': 43839, 'batch_loss/train': 0.7656259571667761} +12/28/2021 17:40:19 - INFO - codeparrot_training - Step 43840: {'lr': 1.8621299159011396e-05, 'samples': 22446592, 'steps': 43840, 'batch_loss/train': 0.6894454666180536} +12/28/2021 17:40:32 - INFO - codeparrot_training - Step 43841: {'lr': 1.861532993981202e-05, 'samples': 22447104, 'steps': 43841, 'batch_loss/train': 0.6142770652659237} +12/28/2021 17:40:42 - INFO - codeparrot_training - Step 43842: {'lr': 1.860936164051061e-05, 'samples': 22447616, 'steps': 43842, 'batch_loss/train': 0.7297738939523697} +12/28/2021 17:40:53 - INFO - codeparrot_training - Step 43843: {'lr': 1.8603394261131085e-05, 'samples': 22448128, 'steps': 43843, 'batch_loss/train': 0.6767819102387875} +12/28/2021 17:41:05 - INFO - codeparrot_training - Step 43844: {'lr': 1.8597427801697125e-05, 'samples': 22448640, 'steps': 43844, 'batch_loss/train': 0.6502404562197626} +12/28/2021 17:41:16 - INFO - codeparrot_training - Step 43845: {'lr': 1.8591462262232377e-05, 'samples': 22449152, 'steps': 43845, 'batch_loss/train': 0.8553340826183558} +12/28/2021 17:41:27 - INFO - codeparrot_training - Step 43846: {'lr': 1.8585497642760546e-05, 'samples': 22449664, 'steps': 43846, 'batch_loss/train': 1.0293504928704351} +12/28/2021 17:41:39 - INFO - codeparrot_training - Step 43847: {'lr': 1.857953394330547e-05, 'samples': 22450176, 'steps': 43847, 'batch_loss/train': 0.8040087958797812} +12/28/2021 17:41:50 - INFO - codeparrot_training - Step 43848: {'lr': 1.8573571163890774e-05, 'samples': 22450688, 'steps': 43848, 'batch_loss/train': 0.7392540879081935} +12/28/2021 17:42:00 - INFO - codeparrot_training - Step 43849: {'lr': 1.8567609304540155e-05, 'samples': 22451200, 'steps': 43849, 'batch_loss/train': 0.725763919763267} +12/28/2021 17:42:11 - INFO - codeparrot_training - Step 43850: {'lr': 1.856164836527735e-05, 'samples': 22451712, 'steps': 43850, 'batch_loss/train': 0.6883806046098471} +12/28/2021 17:42:25 - INFO - codeparrot_training - Step 43851: {'lr': 1.855568834612603e-05, 'samples': 22452224, 'steps': 43851, 'batch_loss/train': 0.8196455026045442} +12/28/2021 17:42:36 - INFO - codeparrot_training - Step 43852: {'lr': 1.854972924710993e-05, 'samples': 22452736, 'steps': 43852, 'batch_loss/train': 0.6889845561236143} +12/28/2021 17:42:46 - INFO - codeparrot_training - Step 43853: {'lr': 1.8543771068252696e-05, 'samples': 22453248, 'steps': 43853, 'batch_loss/train': 0.7093792604282498} +12/28/2021 17:42:58 - INFO - codeparrot_training - Step 43854: {'lr': 1.853781380957803e-05, 'samples': 22453760, 'steps': 43854, 'batch_loss/train': 0.7676394548034295} +12/28/2021 17:43:09 - INFO - codeparrot_training - Step 43855: {'lr': 1.853185747110961e-05, 'samples': 22454272, 'steps': 43855, 'batch_loss/train': 0.7667479705996811} +12/28/2021 17:43:19 - INFO - codeparrot_training - Step 43856: {'lr': 1.852590205287119e-05, 'samples': 22454784, 'steps': 43856, 'batch_loss/train': 0.7474100929684937} +12/28/2021 17:43:32 - INFO - codeparrot_training - Step 43857: {'lr': 1.8519947554886283e-05, 'samples': 22455296, 'steps': 43857, 'batch_loss/train': 0.7024413379840553} +12/28/2021 17:43:42 - INFO - codeparrot_training - Step 43858: {'lr': 1.851399397717876e-05, 'samples': 22455808, 'steps': 43858, 'batch_loss/train': 0.5229155708802864} +12/28/2021 17:43:53 - INFO - codeparrot_training - Step 43859: {'lr': 1.8508041319772127e-05, 'samples': 22456320, 'steps': 43859, 'batch_loss/train': 0.7213924992829561} +12/28/2021 17:44:03 - INFO - codeparrot_training - Step 43860: {'lr': 1.8502089582690146e-05, 'samples': 22456832, 'steps': 43860, 'batch_loss/train': 0.8022694699466228} +12/28/2021 17:44:17 - INFO - codeparrot_training - Step 43861: {'lr': 1.8496138765956405e-05, 'samples': 22457344, 'steps': 43861, 'batch_loss/train': 0.6946917860768735} +12/28/2021 17:44:28 - INFO - codeparrot_training - Step 43862: {'lr': 1.8490188869594637e-05, 'samples': 22457856, 'steps': 43862, 'batch_loss/train': 0.7772121336311102} +12/28/2021 17:44:39 - INFO - codeparrot_training - Step 43863: {'lr': 1.8484239893628462e-05, 'samples': 22458368, 'steps': 43863, 'batch_loss/train': 0.8024026500061154} +12/28/2021 17:44:51 - INFO - codeparrot_training - Step 43864: {'lr': 1.84782918380815e-05, 'samples': 22458880, 'steps': 43864, 'batch_loss/train': 0.7936564185656607} +12/28/2021 17:45:01 - INFO - codeparrot_training - Step 43865: {'lr': 1.8472344702977457e-05, 'samples': 22459392, 'steps': 43865, 'batch_loss/train': 0.7411293145269156} +12/28/2021 17:45:12 - INFO - codeparrot_training - Step 43866: {'lr': 1.8466398488339947e-05, 'samples': 22459904, 'steps': 43866, 'batch_loss/train': 0.7054022289812565} +12/28/2021 17:45:26 - INFO - codeparrot_training - Step 43867: {'lr': 1.846045319419265e-05, 'samples': 22460416, 'steps': 43867, 'batch_loss/train': 0.7662192992866039} +12/28/2021 17:45:37 - INFO - codeparrot_training - Step 43868: {'lr': 1.84545088205591e-05, 'samples': 22460928, 'steps': 43868, 'batch_loss/train': 0.7118016095482744} +12/28/2021 17:45:47 - INFO - codeparrot_training - Step 43869: {'lr': 1.8448565367463034e-05, 'samples': 22461440, 'steps': 43869, 'batch_loss/train': 0.7118379813618958} +12/28/2021 17:45:59 - INFO - codeparrot_training - Step 43870: {'lr': 1.8442622834928063e-05, 'samples': 22461952, 'steps': 43870, 'batch_loss/train': 0.806603180244565} +12/28/2021 17:46:10 - INFO - codeparrot_training - Step 43871: {'lr': 1.8436681222977758e-05, 'samples': 22462464, 'steps': 43871, 'batch_loss/train': 0.7959309481084347} +12/28/2021 17:46:21 - INFO - codeparrot_training - Step 43872: {'lr': 1.8430740531635736e-05, 'samples': 22462976, 'steps': 43872, 'batch_loss/train': 0.7141621774062514} +12/28/2021 17:46:31 - INFO - codeparrot_training - Step 43873: {'lr': 1.842480076092573e-05, 'samples': 22463488, 'steps': 43873, 'batch_loss/train': 0.6299356808885932} +12/28/2021 17:46:43 - INFO - codeparrot_training - Step 43874: {'lr': 1.841886191087122e-05, 'samples': 22464000, 'steps': 43874, 'batch_loss/train': 0.7257975321263075} +12/28/2021 17:46:54 - INFO - codeparrot_training - Step 43875: {'lr': 1.841292398149591e-05, 'samples': 22464512, 'steps': 43875, 'batch_loss/train': 0.7256235741078854} +12/28/2021 17:47:04 - INFO - codeparrot_training - Step 43876: {'lr': 1.8406986972823337e-05, 'samples': 22465024, 'steps': 43876, 'batch_loss/train': 0.7026932234875858} +12/28/2021 17:47:18 - INFO - codeparrot_training - Step 43877: {'lr': 1.8401050884877145e-05, 'samples': 22465536, 'steps': 43877, 'batch_loss/train': 0.770290276966989} +12/28/2021 17:47:29 - INFO - codeparrot_training - Step 43878: {'lr': 1.8395115717680934e-05, 'samples': 22466048, 'steps': 43878, 'batch_loss/train': 0.8062632102519274} +12/28/2021 17:47:40 - INFO - codeparrot_training - Step 43879: {'lr': 1.8389181471258288e-05, 'samples': 22466560, 'steps': 43879, 'batch_loss/train': 0.6697713097091764} +12/28/2021 17:47:52 - INFO - codeparrot_training - Step 43880: {'lr': 1.8383248145632802e-05, 'samples': 22467072, 'steps': 43880, 'batch_loss/train': 0.6853900225833058} +12/28/2021 17:48:03 - INFO - codeparrot_training - Step 43881: {'lr': 1.8377315740828072e-05, 'samples': 22467584, 'steps': 43881, 'batch_loss/train': 0.9033179981634021} +12/28/2021 17:48:13 - INFO - codeparrot_training - Step 43882: {'lr': 1.837138425686774e-05, 'samples': 22468096, 'steps': 43882, 'batch_loss/train': 0.6073418925516307} +12/28/2021 17:48:24 - INFO - codeparrot_training - Step 43883: {'lr': 1.8365453693775207e-05, 'samples': 22468608, 'steps': 43883, 'batch_loss/train': 0.6618920126929879} +12/28/2021 17:48:36 - INFO - codeparrot_training - Step 43884: {'lr': 1.8359524051574232e-05, 'samples': 22469120, 'steps': 43884, 'batch_loss/train': 0.8369752466678619} +12/28/2021 17:48:47 - INFO - codeparrot_training - Step 43885: {'lr': 1.8353595330288354e-05, 'samples': 22469632, 'steps': 43885, 'batch_loss/train': 0.6932432467583567} +12/28/2021 17:48:57 - INFO - codeparrot_training - Step 43886: {'lr': 1.8347667529941075e-05, 'samples': 22470144, 'steps': 43886, 'batch_loss/train': 0.7861136039718986} +12/28/2021 17:49:11 - INFO - codeparrot_training - Step 43887: {'lr': 1.8341740650555995e-05, 'samples': 22470656, 'steps': 43887, 'batch_loss/train': 0.7523678508587182} +12/28/2021 17:49:22 - INFO - codeparrot_training - Step 43888: {'lr': 1.83358146921567e-05, 'samples': 22471168, 'steps': 43888, 'batch_loss/train': 0.6213970193639398} +12/28/2021 17:49:32 - INFO - codeparrot_training - Step 43889: {'lr': 1.8329889654766702e-05, 'samples': 22471680, 'steps': 43889, 'batch_loss/train': 0.8045492940582335} +12/28/2021 17:49:44 - INFO - codeparrot_training - Step 43890: {'lr': 1.8323965538409622e-05, 'samples': 22472192, 'steps': 43890, 'batch_loss/train': 0.7141005108132958} +12/28/2021 17:49:55 - INFO - codeparrot_training - Step 43891: {'lr': 1.8318042343108965e-05, 'samples': 22472704, 'steps': 43891, 'batch_loss/train': 0.7022842082660645} +12/28/2021 17:50:06 - INFO - codeparrot_training - Step 43892: {'lr': 1.83121200688883e-05, 'samples': 22473216, 'steps': 43892, 'batch_loss/train': 0.7185953832231462} +12/28/2021 17:50:16 - INFO - codeparrot_training - Step 43893: {'lr': 1.8306198715771134e-05, 'samples': 22473728, 'steps': 43893, 'batch_loss/train': 0.8697312297299504} +12/28/2021 17:50:28 - INFO - codeparrot_training - Step 43894: {'lr': 1.8300278283781057e-05, 'samples': 22474240, 'steps': 43894, 'batch_loss/train': 0.6356533863581717} +12/28/2021 17:50:39 - INFO - codeparrot_training - Step 43895: {'lr': 1.829435877294161e-05, 'samples': 22474752, 'steps': 43895, 'batch_loss/train': 0.7617720370180905} +12/28/2021 17:50:50 - INFO - codeparrot_training - Step 43896: {'lr': 1.8288440183276302e-05, 'samples': 22475264, 'steps': 43896, 'batch_loss/train': 0.7324523311108351} +12/28/2021 17:51:04 - INFO - codeparrot_training - Step 43897: {'lr': 1.8282522514808665e-05, 'samples': 22475776, 'steps': 43897, 'batch_loss/train': 0.7739135585725307} +12/28/2021 17:51:15 - INFO - codeparrot_training - Step 43898: {'lr': 1.8276605767562155e-05, 'samples': 22476288, 'steps': 43898, 'batch_loss/train': 1.033289611339569} +12/28/2021 17:51:25 - INFO - codeparrot_training - Step 43899: {'lr': 1.8270689941560446e-05, 'samples': 22476800, 'steps': 43899, 'batch_loss/train': 0.8285173873882741} +12/28/2021 17:51:38 - INFO - codeparrot_training - Step 43900: {'lr': 1.826477503682697e-05, 'samples': 22477312, 'steps': 43900, 'batch_loss/train': 0.6635183799080551} +12/28/2021 17:51:48 - INFO - codeparrot_training - Step 43901: {'lr': 1.8258861053385172e-05, 'samples': 22477824, 'steps': 43901, 'batch_loss/train': 0.7250218358822167} +12/28/2021 17:51:59 - INFO - codeparrot_training - Step 43902: {'lr': 1.8252947991258732e-05, 'samples': 22478336, 'steps': 43902, 'batch_loss/train': 0.7025320022366941} +12/28/2021 17:52:09 - INFO - codeparrot_training - Step 43903: {'lr': 1.8247035850471023e-05, 'samples': 22478848, 'steps': 43903, 'batch_loss/train': 0.6613040022784844} +12/28/2021 17:52:22 - INFO - codeparrot_training - Step 43904: {'lr': 1.8241124631045603e-05, 'samples': 22479360, 'steps': 43904, 'batch_loss/train': 0.7640813761390746} +12/28/2021 17:52:32 - INFO - codeparrot_training - Step 43905: {'lr': 1.8235214333005958e-05, 'samples': 22479872, 'steps': 43905, 'batch_loss/train': 0.7343862212146632} +12/28/2021 17:52:43 - INFO - codeparrot_training - Step 43906: {'lr': 1.8229304956375593e-05, 'samples': 22480384, 'steps': 43906, 'batch_loss/train': 0.8082115133292973} +12/28/2021 17:52:57 - INFO - codeparrot_training - Step 43907: {'lr': 1.8223396501177992e-05, 'samples': 22480896, 'steps': 43907, 'batch_loss/train': 0.7579803792759776} +12/28/2021 17:53:08 - INFO - codeparrot_training - Step 43908: {'lr': 1.8217488967436692e-05, 'samples': 22481408, 'steps': 43908, 'batch_loss/train': 0.8043681737035513} +12/28/2021 17:53:18 - INFO - codeparrot_training - Step 43909: {'lr': 1.821158235517506e-05, 'samples': 22481920, 'steps': 43909, 'batch_loss/train': 0.7190670198760927} +12/28/2021 17:53:30 - INFO - codeparrot_training - Step 43910: {'lr': 1.820567666441669e-05, 'samples': 22482432, 'steps': 43910, 'batch_loss/train': 0.6854786032345146} +12/28/2021 17:53:41 - INFO - codeparrot_training - Step 43911: {'lr': 1.8199771895185068e-05, 'samples': 22482944, 'steps': 43911, 'batch_loss/train': 0.7300583044998348} +12/28/2021 17:53:52 - INFO - codeparrot_training - Step 43912: {'lr': 1.8193868047503615e-05, 'samples': 22483456, 'steps': 43912, 'batch_loss/train': 0.742433762177825} +12/28/2021 17:54:02 - INFO - codeparrot_training - Step 43913: {'lr': 1.8187965121395783e-05, 'samples': 22483968, 'steps': 43913, 'batch_loss/train': 0.7614028798416257} +12/28/2021 17:54:16 - INFO - codeparrot_training - Step 43914: {'lr': 1.8182063116885084e-05, 'samples': 22484480, 'steps': 43914, 'batch_loss/train': 0.7084021957125515} +12/28/2021 17:54:27 - INFO - codeparrot_training - Step 43915: {'lr': 1.817616203399497e-05, 'samples': 22484992, 'steps': 43915, 'batch_loss/train': 0.772827104665339} +12/28/2021 17:54:37 - INFO - codeparrot_training - Step 43916: {'lr': 1.8170261872748923e-05, 'samples': 22485504, 'steps': 43916, 'batch_loss/train': 0.6865394607884809} +12/28/2021 17:54:50 - INFO - codeparrot_training - Step 43917: {'lr': 1.816436263317034e-05, 'samples': 22486016, 'steps': 43917, 'batch_loss/train': 0.6628643297590315} +12/28/2021 17:55:00 - INFO - codeparrot_training - Step 43918: {'lr': 1.8158464315282757e-05, 'samples': 22486528, 'steps': 43918, 'batch_loss/train': 0.6627261482644826} +12/28/2021 17:55:11 - INFO - codeparrot_training - Step 43919: {'lr': 1.815256691910955e-05, 'samples': 22487040, 'steps': 43919, 'batch_loss/train': 0.79127954877913} +12/28/2021 17:55:23 - INFO - codeparrot_training - Step 43920: {'lr': 1.814667044467419e-05, 'samples': 22487552, 'steps': 43920, 'batch_loss/train': 1.3472102604282554} +12/28/2021 17:55:34 - INFO - codeparrot_training - Step 43921: {'lr': 1.814077489200014e-05, 'samples': 22488064, 'steps': 43921, 'batch_loss/train': 0.6902215210720897} +12/28/2021 17:55:44 - INFO - codeparrot_training - Step 43922: {'lr': 1.8134880261110876e-05, 'samples': 22488576, 'steps': 43922, 'batch_loss/train': 0.5829236574936658} +12/28/2021 17:55:55 - INFO - codeparrot_training - Step 43923: {'lr': 1.8128986552029712e-05, 'samples': 22489088, 'steps': 43923, 'batch_loss/train': 0.5538990045897663} +12/28/2021 17:56:07 - INFO - codeparrot_training - Step 43924: {'lr': 1.812309376478011e-05, 'samples': 22489600, 'steps': 43924, 'batch_loss/train': 0.7264601979404688} +12/28/2021 17:56:18 - INFO - codeparrot_training - Step 43925: {'lr': 1.811720189938562e-05, 'samples': 22490112, 'steps': 43925, 'batch_loss/train': 0.7681647376157343} +12/28/2021 17:56:28 - INFO - codeparrot_training - Step 43926: {'lr': 1.8111310955869543e-05, 'samples': 22490624, 'steps': 43926, 'batch_loss/train': 0.6164371364284307} +12/28/2021 17:56:43 - INFO - codeparrot_training - Step 43927: {'lr': 1.8105420934255297e-05, 'samples': 22491136, 'steps': 43927, 'batch_loss/train': 0.8505219703074545} +12/28/2021 17:56:53 - INFO - codeparrot_training - Step 43928: {'lr': 1.8099531834566418e-05, 'samples': 22491648, 'steps': 43928, 'batch_loss/train': 0.8517875140532851} +12/28/2021 17:57:04 - INFO - codeparrot_training - Step 43929: {'lr': 1.809364365682617e-05, 'samples': 22492160, 'steps': 43929, 'batch_loss/train': 0.620694070123136} +12/28/2021 17:57:16 - INFO - codeparrot_training - Step 43930: {'lr': 1.808775640105806e-05, 'samples': 22492672, 'steps': 43930, 'batch_loss/train': 0.7820970311295241} +12/28/2021 17:57:27 - INFO - codeparrot_training - Step 43931: {'lr': 1.8081870067285456e-05, 'samples': 22493184, 'steps': 43931, 'batch_loss/train': 0.8917777645401657} +12/28/2021 17:57:37 - INFO - codeparrot_training - Step 43932: {'lr': 1.8075984655531785e-05, 'samples': 22493696, 'steps': 43932, 'batch_loss/train': 0.8388322577811778} +12/28/2021 17:57:48 - INFO - codeparrot_training - Step 43933: {'lr': 1.8070100165820393e-05, 'samples': 22494208, 'steps': 43933, 'batch_loss/train': 0.6828112020157278} +12/28/2021 17:58:01 - INFO - codeparrot_training - Step 43934: {'lr': 1.8064216598174782e-05, 'samples': 22494720, 'steps': 43934, 'batch_loss/train': 0.6607731976546347} +12/28/2021 17:58:11 - INFO - codeparrot_training - Step 43935: {'lr': 1.8058333952618162e-05, 'samples': 22495232, 'steps': 43935, 'batch_loss/train': 0.5710829086601734} +12/28/2021 17:58:22 - INFO - codeparrot_training - Step 43936: {'lr': 1.8052452229174094e-05, 'samples': 22495744, 'steps': 43936, 'batch_loss/train': 0.5896034324541688} +12/28/2021 17:58:36 - INFO - codeparrot_training - Step 43937: {'lr': 1.8046571427865922e-05, 'samples': 22496256, 'steps': 43937, 'batch_loss/train': 0.7693973025307059} +12/28/2021 17:58:47 - INFO - codeparrot_training - Step 43938: {'lr': 1.8040691548716932e-05, 'samples': 22496768, 'steps': 43938, 'batch_loss/train': 0.7406941815279424} +12/28/2021 17:58:58 - INFO - codeparrot_training - Step 43939: {'lr': 1.8034812591750578e-05, 'samples': 22497280, 'steps': 43939, 'batch_loss/train': 1.0045012449845672} +12/28/2021 17:59:08 - INFO - codeparrot_training - Step 43940: {'lr': 1.8028934556990285e-05, 'samples': 22497792, 'steps': 43940, 'batch_loss/train': 0.6363733462058008} +12/28/2021 17:59:21 - INFO - codeparrot_training - Step 43941: {'lr': 1.802305744445934e-05, 'samples': 22498304, 'steps': 43941, 'batch_loss/train': 0.6942926794290543} +12/28/2021 17:59:31 - INFO - codeparrot_training - Step 43942: {'lr': 1.8017181254181088e-05, 'samples': 22498816, 'steps': 43942, 'batch_loss/train': 0.753554267110303} +12/28/2021 17:59:42 - INFO - codeparrot_training - Step 43943: {'lr': 1.801130598617895e-05, 'samples': 22499328, 'steps': 43943, 'batch_loss/train': 0.6791631744708866} +12/28/2021 17:59:56 - INFO - codeparrot_training - Step 43944: {'lr': 1.8005431640476272e-05, 'samples': 22499840, 'steps': 43944, 'batch_loss/train': 0.7030551908537745} +12/28/2021 18:00:06 - INFO - codeparrot_training - Step 43945: {'lr': 1.799955821709637e-05, 'samples': 22500352, 'steps': 43945, 'batch_loss/train': 0.8800844671204686} +12/28/2021 18:00:17 - INFO - codeparrot_training - Step 43946: {'lr': 1.7993685716062662e-05, 'samples': 22500864, 'steps': 43946, 'batch_loss/train': 0.6873562136897817} +12/28/2021 18:00:29 - INFO - codeparrot_training - Step 43947: {'lr': 1.7987814137398416e-05, 'samples': 22501376, 'steps': 43947, 'batch_loss/train': 0.744465502910316} +12/28/2021 18:00:39 - INFO - codeparrot_training - Step 43948: {'lr': 1.7981943481127083e-05, 'samples': 22501888, 'steps': 43948, 'batch_loss/train': 0.7348651071079075} +12/28/2021 18:00:50 - INFO - codeparrot_training - Step 43949: {'lr': 1.7976073747271892e-05, 'samples': 22502400, 'steps': 43949, 'batch_loss/train': 0.7132592761190608} +12/28/2021 18:01:01 - INFO - codeparrot_training - Step 43950: {'lr': 1.7970204935856187e-05, 'samples': 22502912, 'steps': 43950, 'batch_loss/train': 0.7409438602626324} +12/28/2021 18:01:13 - INFO - codeparrot_training - Step 43951: {'lr': 1.7964337046903424e-05, 'samples': 22503424, 'steps': 43951, 'batch_loss/train': 0.7070438580121845} +12/28/2021 18:01:24 - INFO - codeparrot_training - Step 43952: {'lr': 1.7958470080436802e-05, 'samples': 22503936, 'steps': 43952, 'batch_loss/train': 0.7404608263168484} +12/28/2021 18:01:34 - INFO - codeparrot_training - Step 43953: {'lr': 1.7952604036479635e-05, 'samples': 22504448, 'steps': 43953, 'batch_loss/train': 0.6918024807237089} +12/28/2021 18:01:48 - INFO - codeparrot_training - Step 43954: {'lr': 1.7946738915055384e-05, 'samples': 22504960, 'steps': 43954, 'batch_loss/train': 0.62080501858145} +12/28/2021 18:01:59 - INFO - codeparrot_training - Step 43955: {'lr': 1.7940874716187216e-05, 'samples': 22505472, 'steps': 43955, 'batch_loss/train': 0.7964174575172365} +12/28/2021 18:02:10 - INFO - codeparrot_training - Step 43956: {'lr': 1.7935011439898535e-05, 'samples': 22505984, 'steps': 43956, 'batch_loss/train': 0.7633007746189833} +12/28/2021 18:02:22 - INFO - codeparrot_training - Step 43957: {'lr': 1.7929149086212597e-05, 'samples': 22506496, 'steps': 43957, 'batch_loss/train': 0.6913404786027968} +12/28/2021 18:02:32 - INFO - codeparrot_training - Step 43958: {'lr': 1.7923287655152774e-05, 'samples': 22507008, 'steps': 43958, 'batch_loss/train': 0.7297321367077529} +12/28/2021 18:02:43 - INFO - codeparrot_training - Step 43959: {'lr': 1.7917427146742298e-05, 'samples': 22507520, 'steps': 43959, 'batch_loss/train': 0.6581164395902306} +12/28/2021 18:02:54 - INFO - codeparrot_training - Step 43960: {'lr': 1.7911567561004567e-05, 'samples': 22508032, 'steps': 43960, 'batch_loss/train': 0.6635325835086405} +12/28/2021 18:03:06 - INFO - codeparrot_training - Step 43961: {'lr': 1.7905708897962696e-05, 'samples': 22508544, 'steps': 43961, 'batch_loss/train': 0.7476076101884246} +12/28/2021 18:03:16 - INFO - codeparrot_training - Step 43962: {'lr': 1.7899851157640174e-05, 'samples': 22509056, 'steps': 43962, 'batch_loss/train': 0.6780840209685266} +12/28/2021 18:03:27 - INFO - codeparrot_training - Step 43963: {'lr': 1.7893994340060227e-05, 'samples': 22509568, 'steps': 43963, 'batch_loss/train': 0.6889210809022188} +12/28/2021 18:03:39 - INFO - codeparrot_training - Step 43964: {'lr': 1.7888138445246033e-05, 'samples': 22510080, 'steps': 43964, 'batch_loss/train': 1.0689470228971913} +12/28/2021 18:03:50 - INFO - codeparrot_training - Step 43965: {'lr': 1.788228347322099e-05, 'samples': 22510592, 'steps': 43965, 'batch_loss/train': 0.8148815762251616} +12/28/2021 18:04:01 - INFO - codeparrot_training - Step 43966: {'lr': 1.787642942400841e-05, 'samples': 22511104, 'steps': 43966, 'batch_loss/train': 0.7992057017982006} +12/28/2021 18:04:15 - INFO - codeparrot_training - Step 43967: {'lr': 1.7870576297631446e-05, 'samples': 22511616, 'steps': 43967, 'batch_loss/train': 1.0049192178994417} +12/28/2021 18:04:26 - INFO - codeparrot_training - Step 43968: {'lr': 1.786472409411341e-05, 'samples': 22512128, 'steps': 43968, 'batch_loss/train': 0.7560720397159457} +12/28/2021 18:04:36 - INFO - codeparrot_training - Step 43969: {'lr': 1.7858872813477587e-05, 'samples': 22512640, 'steps': 43969, 'batch_loss/train': 0.7870151326060295} +12/28/2021 18:04:47 - INFO - codeparrot_training - Step 43970: {'lr': 1.7853022455747243e-05, 'samples': 22513152, 'steps': 43970, 'batch_loss/train': 0.747612402192317} +12/28/2021 18:04:59 - INFO - codeparrot_training - Step 43971: {'lr': 1.7847173020945628e-05, 'samples': 22513664, 'steps': 43971, 'batch_loss/train': 0.6945935555268079} +12/28/2021 18:05:10 - INFO - codeparrot_training - Step 43972: {'lr': 1.784132450909598e-05, 'samples': 22514176, 'steps': 43972, 'batch_loss/train': 0.7322058700956404} +12/28/2021 18:05:20 - INFO - codeparrot_training - Step 43973: {'lr': 1.783547692022158e-05, 'samples': 22514688, 'steps': 43973, 'batch_loss/train': 0.7563880458474159} +12/28/2021 18:05:34 - INFO - codeparrot_training - Step 43974: {'lr': 1.7829630254345642e-05, 'samples': 22515200, 'steps': 43974, 'batch_loss/train': 0.7938450326910242} +12/28/2021 18:05:45 - INFO - codeparrot_training - Step 43975: {'lr': 1.7823784511491443e-05, 'samples': 22515712, 'steps': 43975, 'batch_loss/train': 0.640654010232538} +12/28/2021 18:05:55 - INFO - codeparrot_training - Step 43976: {'lr': 1.7817939691682217e-05, 'samples': 22516224, 'steps': 43976, 'batch_loss/train': 0.7150104407919571} +12/28/2021 18:06:07 - INFO - codeparrot_training - Step 43977: {'lr': 1.7812095794941225e-05, 'samples': 22516736, 'steps': 43977, 'batch_loss/train': 0.7746856939047575} +12/28/2021 18:06:18 - INFO - codeparrot_training - Step 43978: {'lr': 1.780625282129164e-05, 'samples': 22517248, 'steps': 43978, 'batch_loss/train': 0.7954997536726296} +12/28/2021 18:06:29 - INFO - codeparrot_training - Step 43979: {'lr': 1.780041077075667e-05, 'samples': 22517760, 'steps': 43979, 'batch_loss/train': 0.6502470104023814} +12/28/2021 18:06:39 - INFO - codeparrot_training - Step 43980: {'lr': 1.7794569643359683e-05, 'samples': 22518272, 'steps': 43980, 'batch_loss/train': 0.7044253151398152} +12/28/2021 18:06:51 - INFO - codeparrot_training - Step 43981: {'lr': 1.778872943912374e-05, 'samples': 22518784, 'steps': 43981, 'batch_loss/train': 0.7308340505696833} +12/28/2021 18:07:02 - INFO - codeparrot_training - Step 43982: {'lr': 1.7782890158072162e-05, 'samples': 22519296, 'steps': 43982, 'batch_loss/train': 0.7168416767381132} +12/28/2021 18:07:13 - INFO - codeparrot_training - Step 43983: {'lr': 1.777705180022812e-05, 'samples': 22519808, 'steps': 43983, 'batch_loss/train': 0.7470676791854203} +12/28/2021 18:07:27 - INFO - codeparrot_training - Step 43984: {'lr': 1.7771214365614824e-05, 'samples': 22520320, 'steps': 43984, 'batch_loss/train': 0.7386877983808517} +12/28/2021 18:07:37 - INFO - codeparrot_training - Step 43985: {'lr': 1.7765377854255528e-05, 'samples': 22520832, 'steps': 43985, 'batch_loss/train': 0.8085194481536746} +12/28/2021 18:07:48 - INFO - codeparrot_training - Step 43986: {'lr': 1.7759542266173405e-05, 'samples': 22521344, 'steps': 43986, 'batch_loss/train': 0.7788183372467756} +12/28/2021 18:08:00 - INFO - codeparrot_training - Step 43987: {'lr': 1.7753707601391585e-05, 'samples': 22521856, 'steps': 43987, 'batch_loss/train': 0.695502744987607} +12/28/2021 18:08:11 - INFO - codeparrot_training - Step 43988: {'lr': 1.7747873859933374e-05, 'samples': 22522368, 'steps': 43988, 'batch_loss/train': 0.6772650857456028} +12/28/2021 18:08:21 - INFO - codeparrot_training - Step 43989: {'lr': 1.774204104182195e-05, 'samples': 22522880, 'steps': 43989, 'batch_loss/train': 0.7459799749776721} +12/28/2021 18:08:34 - INFO - codeparrot_training - Step 43990: {'lr': 1.7736209147080406e-05, 'samples': 22523392, 'steps': 43990, 'batch_loss/train': 0.7634867029264569} +12/28/2021 18:08:44 - INFO - codeparrot_training - Step 43991: {'lr': 1.773037817573203e-05, 'samples': 22523904, 'steps': 43991, 'batch_loss/train': 0.6845857407897711} +12/28/2021 18:08:55 - INFO - codeparrot_training - Step 43992: {'lr': 1.7724548127800027e-05, 'samples': 22524416, 'steps': 43992, 'batch_loss/train': 0.739385865163058} +12/28/2021 18:09:06 - INFO - codeparrot_training - Step 43993: {'lr': 1.771871900330746e-05, 'samples': 22524928, 'steps': 43993, 'batch_loss/train': 0.7809376730583608} +12/28/2021 18:09:19 - INFO - codeparrot_training - Step 43994: {'lr': 1.771289080227756e-05, 'samples': 22525440, 'steps': 43994, 'batch_loss/train': 0.6919710584916174} +12/28/2021 18:09:29 - INFO - codeparrot_training - Step 43995: {'lr': 1.7707063524733476e-05, 'samples': 22525952, 'steps': 43995, 'batch_loss/train': 0.7381732212379575} +12/28/2021 18:09:40 - INFO - codeparrot_training - Step 43996: {'lr': 1.7701237170698437e-05, 'samples': 22526464, 'steps': 43996, 'batch_loss/train': 0.7384666632860899} +12/28/2021 18:09:53 - INFO - codeparrot_training - Step 43997: {'lr': 1.769541174019554e-05, 'samples': 22526976, 'steps': 43997, 'batch_loss/train': 0.705515009118244} +12/28/2021 18:10:03 - INFO - codeparrot_training - Step 43998: {'lr': 1.7689587233247983e-05, 'samples': 22527488, 'steps': 43998, 'batch_loss/train': 0.6736034385394305} +12/28/2021 18:10:14 - INFO - codeparrot_training - Step 43999: {'lr': 1.7683763649878894e-05, 'samples': 22528000, 'steps': 43999, 'batch_loss/train': 0.8033771547488868} +12/28/2021 18:10:28 - INFO - codeparrot_training - Step 44000: {'lr': 1.7677940990111467e-05, 'samples': 22528512, 'steps': 44000, 'batch_loss/train': 1.238082847557962} +12/28/2021 18:10:39 - INFO - codeparrot_training - Step 44001: {'lr': 1.7672119253968804e-05, 'samples': 22529024, 'steps': 44001, 'batch_loss/train': 0.733747489284724} +12/28/2021 18:10:49 - INFO - codeparrot_training - Step 44002: {'lr': 1.7666298441474075e-05, 'samples': 22529536, 'steps': 44002, 'batch_loss/train': 0.6579249378410168} +12/28/2021 18:11:00 - INFO - codeparrot_training - Step 44003: {'lr': 1.766047855265046e-05, 'samples': 22530048, 'steps': 44003, 'batch_loss/train': 0.9541194830089808} +12/28/2021 18:11:12 - INFO - codeparrot_training - Step 44004: {'lr': 1.7654659587521022e-05, 'samples': 22530560, 'steps': 44004, 'batch_loss/train': 0.7108762238640338} +12/28/2021 18:11:23 - INFO - codeparrot_training - Step 44005: {'lr': 1.764884154610888e-05, 'samples': 22531072, 'steps': 44005, 'batch_loss/train': 0.6766165299341083} +12/28/2021 18:11:33 - INFO - codeparrot_training - Step 44006: {'lr': 1.7643024428437294e-05, 'samples': 22531584, 'steps': 44006, 'batch_loss/train': 0.6338441804982722} +12/28/2021 18:11:45 - INFO - codeparrot_training - Step 44007: {'lr': 1.7637208234529274e-05, 'samples': 22532096, 'steps': 44007, 'batch_loss/train': 0.7704649777151644} +12/28/2021 18:11:56 - INFO - codeparrot_training - Step 44008: {'lr': 1.7631392964407966e-05, 'samples': 22532608, 'steps': 44008, 'batch_loss/train': 0.8598638791590929} +12/28/2021 18:12:07 - INFO - codeparrot_training - Step 44009: {'lr': 1.7625578618096522e-05, 'samples': 22533120, 'steps': 44009, 'batch_loss/train': 0.7764569986611605} +12/28/2021 18:12:19 - INFO - codeparrot_training - Step 44010: {'lr': 1.7619765195618005e-05, 'samples': 22533632, 'steps': 44010, 'batch_loss/train': 0.8381935935467482} +12/28/2021 18:12:29 - INFO - codeparrot_training - Step 44011: {'lr': 1.761395269699559e-05, 'samples': 22534144, 'steps': 44011, 'batch_loss/train': 0.671253951266408} +12/28/2021 18:12:40 - INFO - codeparrot_training - Step 44012: {'lr': 1.7608141122252313e-05, 'samples': 22534656, 'steps': 44012, 'batch_loss/train': 0.6828646650537848} +12/28/2021 18:12:51 - INFO - codeparrot_training - Step 44013: {'lr': 1.7602330471411353e-05, 'samples': 22535168, 'steps': 44013, 'batch_loss/train': 0.6670516488375142} +12/28/2021 18:13:05 - INFO - codeparrot_training - Step 44014: {'lr': 1.7596520744495775e-05, 'samples': 22535680, 'steps': 44014, 'batch_loss/train': 0.7457851571962237} +12/28/2021 18:13:15 - INFO - codeparrot_training - Step 44015: {'lr': 1.7590711941528696e-05, 'samples': 22536192, 'steps': 44015, 'batch_loss/train': 0.7206008671782911} +12/28/2021 18:13:26 - INFO - codeparrot_training - Step 44016: {'lr': 1.758490406253313e-05, 'samples': 22536704, 'steps': 44016, 'batch_loss/train': 0.6794338249601424} +12/28/2021 18:13:38 - INFO - codeparrot_training - Step 44017: {'lr': 1.7579097107532277e-05, 'samples': 22537216, 'steps': 44017, 'batch_loss/train': 0.6721497550606728} +12/28/2021 18:13:49 - INFO - codeparrot_training - Step 44018: {'lr': 1.7573291076549202e-05, 'samples': 22537728, 'steps': 44018, 'batch_loss/train': 0.7364372066222131} +12/28/2021 18:13:59 - INFO - codeparrot_training - Step 44019: {'lr': 1.7567485969606915e-05, 'samples': 22538240, 'steps': 44019, 'batch_loss/train': 0.8335973555222154} +12/28/2021 18:14:11 - INFO - codeparrot_training - Step 44020: {'lr': 1.756168178672851e-05, 'samples': 22538752, 'steps': 44020, 'batch_loss/train': 0.852652202360332} +12/28/2021 18:14:22 - INFO - codeparrot_training - Step 44021: {'lr': 1.7555878527937163e-05, 'samples': 22539264, 'steps': 44021, 'batch_loss/train': 0.7865010257810354} +12/28/2021 18:14:33 - INFO - codeparrot_training - Step 44022: {'lr': 1.7550076193255853e-05, 'samples': 22539776, 'steps': 44022, 'batch_loss/train': 0.8223040793091059} +12/28/2021 18:14:43 - INFO - codeparrot_training - Step 44023: {'lr': 1.754427478270765e-05, 'samples': 22540288, 'steps': 44023, 'batch_loss/train': 0.6843159096315503} +12/28/2021 18:14:57 - INFO - codeparrot_training - Step 44024: {'lr': 1.7538474296315666e-05, 'samples': 22540800, 'steps': 44024, 'batch_loss/train': 0.729889515787363} +12/28/2021 18:15:07 - INFO - codeparrot_training - Step 44025: {'lr': 1.7532674734102914e-05, 'samples': 22541312, 'steps': 44025, 'batch_loss/train': 0.9316099029965699} +12/28/2021 18:15:18 - INFO - codeparrot_training - Step 44026: {'lr': 1.7526876096092463e-05, 'samples': 22541824, 'steps': 44026, 'batch_loss/train': 0.8028270965442061} +12/28/2021 18:15:30 - INFO - codeparrot_training - Step 44027: {'lr': 1.7521078382307376e-05, 'samples': 22542336, 'steps': 44027, 'batch_loss/train': 0.7602474894374609} +12/28/2021 18:15:41 - INFO - codeparrot_training - Step 44028: {'lr': 1.7515281592770715e-05, 'samples': 22542848, 'steps': 44028, 'batch_loss/train': 0.8163314331322908} +12/28/2021 18:15:52 - INFO - codeparrot_training - Step 44029: {'lr': 1.750948572750552e-05, 'samples': 22543360, 'steps': 44029, 'batch_loss/train': 0.6379226446151733} +12/28/2021 18:16:06 - INFO - codeparrot_training - Step 44030: {'lr': 1.7503690786534855e-05, 'samples': 22543872, 'steps': 44030, 'batch_loss/train': 0.587795193423517} +12/28/2021 18:16:17 - INFO - codeparrot_training - Step 44031: {'lr': 1.7497896769881642e-05, 'samples': 22544384, 'steps': 44031, 'batch_loss/train': 0.7115385076031089} +12/28/2021 18:16:27 - INFO - codeparrot_training - Step 44032: {'lr': 1.7492103677569064e-05, 'samples': 22544896, 'steps': 44032, 'batch_loss/train': 0.7360229906626046} +12/28/2021 18:16:38 - INFO - codeparrot_training - Step 44033: {'lr': 1.7486311509620072e-05, 'samples': 22545408, 'steps': 44033, 'batch_loss/train': 0.6995098637416959} +12/28/2021 18:16:50 - INFO - codeparrot_training - Step 44034: {'lr': 1.7480520266057732e-05, 'samples': 22545920, 'steps': 44034, 'batch_loss/train': 0.7674002684652805} +12/28/2021 18:17:01 - INFO - codeparrot_training - Step 44035: {'lr': 1.747472994690502e-05, 'samples': 22546432, 'steps': 44035, 'batch_loss/train': 0.8740431889891624} +12/28/2021 18:17:11 - INFO - codeparrot_training - Step 44036: {'lr': 1.746894055218498e-05, 'samples': 22546944, 'steps': 44036, 'batch_loss/train': 0.7608780383598059} +12/28/2021 18:17:23 - INFO - codeparrot_training - Step 44037: {'lr': 1.7463152081920647e-05, 'samples': 22547456, 'steps': 44037, 'batch_loss/train': 0.8011162085458636} +12/28/2021 18:17:34 - INFO - codeparrot_training - Step 44038: {'lr': 1.7457364536135e-05, 'samples': 22547968, 'steps': 44038, 'batch_loss/train': 0.7102606415865012} +12/28/2021 18:17:45 - INFO - codeparrot_training - Step 44039: {'lr': 1.745157791485108e-05, 'samples': 22548480, 'steps': 44039, 'batch_loss/train': 0.708322410704568} +12/28/2021 18:17:59 - INFO - codeparrot_training - Step 44040: {'lr': 1.7445792218091895e-05, 'samples': 22548992, 'steps': 44040, 'batch_loss/train': 0.7143854218302295} +12/28/2021 18:18:09 - INFO - codeparrot_training - Step 44041: {'lr': 1.7440007445880452e-05, 'samples': 22549504, 'steps': 44041, 'batch_loss/train': 0.6776510099880397} +12/28/2021 18:18:20 - INFO - codeparrot_training - Step 44042: {'lr': 1.743422359823965e-05, 'samples': 22550016, 'steps': 44042, 'batch_loss/train': 0.7879387928405777} +12/28/2021 18:18:30 - INFO - codeparrot_training - Step 44043: {'lr': 1.7428440675192615e-05, 'samples': 22550528, 'steps': 44043, 'batch_loss/train': 0.6787501082289964} +12/28/2021 18:18:43 - INFO - codeparrot_training - Step 44044: {'lr': 1.7422658676762293e-05, 'samples': 22551040, 'steps': 44044, 'batch_loss/train': 0.6806264193728566} +12/28/2021 18:18:53 - INFO - codeparrot_training - Step 44045: {'lr': 1.741687760297164e-05, 'samples': 22551552, 'steps': 44045, 'batch_loss/train': 0.593050975818187} +12/28/2021 18:19:04 - INFO - codeparrot_training - Step 44046: {'lr': 1.7411097453843643e-05, 'samples': 22552064, 'steps': 44046, 'batch_loss/train': 0.8488705372437835} +12/28/2021 18:19:16 - INFO - codeparrot_training - Step 44047: {'lr': 1.7405318229401364e-05, 'samples': 22552576, 'steps': 44047, 'batch_loss/train': 0.6892186980694532} +12/28/2021 18:19:27 - INFO - codeparrot_training - Step 44048: {'lr': 1.7399539929667667e-05, 'samples': 22553088, 'steps': 44048, 'batch_loss/train': 0.6375712077133358} +12/28/2021 18:19:37 - INFO - codeparrot_training - Step 44049: {'lr': 1.7393762554665572e-05, 'samples': 22553600, 'steps': 44049, 'batch_loss/train': 0.7683975999243557} +12/28/2021 18:19:49 - INFO - codeparrot_training - Step 44050: {'lr': 1.7387986104418053e-05, 'samples': 22554112, 'steps': 44050, 'batch_loss/train': 0.6339855622500181} +12/28/2021 18:20:00 - INFO - codeparrot_training - Step 44051: {'lr': 1.7382210578948065e-05, 'samples': 22554624, 'steps': 44051, 'batch_loss/train': 0.708315756637603} +12/28/2021 18:20:11 - INFO - codeparrot_training - Step 44052: {'lr': 1.7376435978278593e-05, 'samples': 22555136, 'steps': 44052, 'batch_loss/train': 0.7678703228011727} +12/28/2021 18:20:25 - INFO - codeparrot_training - Step 44053: {'lr': 1.737066230243256e-05, 'samples': 22555648, 'steps': 44053, 'batch_loss/train': 0.7264456106349826} +12/28/2021 18:20:36 - INFO - codeparrot_training - Step 44054: {'lr': 1.736488955143295e-05, 'samples': 22556160, 'steps': 44054, 'batch_loss/train': 0.7153540703002363} +12/28/2021 18:20:46 - INFO - codeparrot_training - Step 44055: {'lr': 1.7359117725302686e-05, 'samples': 22556672, 'steps': 44055, 'batch_loss/train': 0.9606973314657807} +12/28/2021 18:20:57 - INFO - codeparrot_training - Step 44056: {'lr': 1.735334682406478e-05, 'samples': 22557184, 'steps': 44056, 'batch_loss/train': 0.7002493524923921} +12/28/2021 18:21:09 - INFO - codeparrot_training - Step 44057: {'lr': 1.734757684774202e-05, 'samples': 22557696, 'steps': 44057, 'batch_loss/train': 0.5959945891518146} +12/28/2021 18:21:20 - INFO - codeparrot_training - Step 44058: {'lr': 1.734180779635755e-05, 'samples': 22558208, 'steps': 44058, 'batch_loss/train': 0.6520224874839187} +12/28/2021 18:21:30 - INFO - codeparrot_training - Step 44059: {'lr': 1.733603966993416e-05, 'samples': 22558720, 'steps': 44059, 'batch_loss/train': 0.7165270398836583} +12/28/2021 18:21:44 - INFO - codeparrot_training - Step 44060: {'lr': 1.733027246849478e-05, 'samples': 22559232, 'steps': 44060, 'batch_loss/train': 0.7047858536243439} +12/28/2021 18:21:55 - INFO - codeparrot_training - Step 44061: {'lr': 1.7324506192062467e-05, 'samples': 22559744, 'steps': 44061, 'batch_loss/train': 0.6692529394058511} +12/28/2021 18:22:06 - INFO - codeparrot_training - Step 44062: {'lr': 1.731874084066004e-05, 'samples': 22560256, 'steps': 44062, 'batch_loss/train': 0.703566464362666} +12/28/2021 18:22:18 - INFO - codeparrot_training - Step 44063: {'lr': 1.73129764143104e-05, 'samples': 22560768, 'steps': 44063, 'batch_loss/train': 0.7489577940432355} +12/28/2021 18:22:28 - INFO - codeparrot_training - Step 44064: {'lr': 1.7307212913036552e-05, 'samples': 22561280, 'steps': 44064, 'batch_loss/train': 0.6472369455732405} +12/28/2021 18:22:39 - INFO - codeparrot_training - Step 44065: {'lr': 1.730145033686134e-05, 'samples': 22561792, 'steps': 44065, 'batch_loss/train': 0.7190349641023204} +12/28/2021 18:22:50 - INFO - codeparrot_training - Step 44066: {'lr': 1.729568868580769e-05, 'samples': 22562304, 'steps': 44066, 'batch_loss/train': 0.6857911892002448} +12/28/2021 18:23:02 - INFO - codeparrot_training - Step 44067: {'lr': 1.728992795989859e-05, 'samples': 22562816, 'steps': 44067, 'batch_loss/train': 0.7119883252307773} +12/28/2021 18:23:12 - INFO - codeparrot_training - Step 44068: {'lr': 1.728416815915676e-05, 'samples': 22563328, 'steps': 44068, 'batch_loss/train': 0.581036277115345} +12/28/2021 18:23:23 - INFO - codeparrot_training - Step 44069: {'lr': 1.7278409283605247e-05, 'samples': 22563840, 'steps': 44069, 'batch_loss/train': 0.7640924262814224} +12/28/2021 18:23:36 - INFO - codeparrot_training - Step 44070: {'lr': 1.7272651333266943e-05, 'samples': 22564352, 'steps': 44070, 'batch_loss/train': 0.6983195437351242} +12/28/2021 18:23:47 - INFO - codeparrot_training - Step 44071: {'lr': 1.7266894308164694e-05, 'samples': 22564864, 'steps': 44071, 'batch_loss/train': 0.7186236004345119} +12/28/2021 18:23:57 - INFO - codeparrot_training - Step 44072: {'lr': 1.7261138208321313e-05, 'samples': 22565376, 'steps': 44072, 'batch_loss/train': 0.8481705635786057} +12/28/2021 18:24:10 - INFO - codeparrot_training - Step 44073: {'lr': 1.72553830337599e-05, 'samples': 22565888, 'steps': 44073, 'batch_loss/train': 0.783269127830863} +12/28/2021 18:24:20 - INFO - codeparrot_training - Step 44074: {'lr': 1.724962878450312e-05, 'samples': 22566400, 'steps': 44074, 'batch_loss/train': 0.7262070445576683} +12/28/2021 18:24:31 - INFO - codeparrot_training - Step 44075: {'lr': 1.7243875460573905e-05, 'samples': 22566912, 'steps': 44075, 'batch_loss/train': 0.6914906564634293} +12/28/2021 18:24:41 - INFO - codeparrot_training - Step 44076: {'lr': 1.7238123061995236e-05, 'samples': 22567424, 'steps': 44076, 'batch_loss/train': 0.7466375082731247} +12/28/2021 18:24:55 - INFO - codeparrot_training - Step 44077: {'lr': 1.7232371588789876e-05, 'samples': 22567936, 'steps': 44077, 'batch_loss/train': 0.7247644015587866} +12/28/2021 18:25:05 - INFO - codeparrot_training - Step 44078: {'lr': 1.722662104098069e-05, 'samples': 22568448, 'steps': 44078, 'batch_loss/train': 0.7021257793530822} +12/28/2021 18:25:16 - INFO - codeparrot_training - Step 44079: {'lr': 1.7220871418590578e-05, 'samples': 22568960, 'steps': 44079, 'batch_loss/train': 0.7624734807759523} +12/28/2021 18:25:28 - INFO - codeparrot_training - Step 44080: {'lr': 1.7215122721642412e-05, 'samples': 22569472, 'steps': 44080, 'batch_loss/train': 0.7349740886129439} +12/28/2021 18:25:39 - INFO - codeparrot_training - Step 44081: {'lr': 1.720937495015898e-05, 'samples': 22569984, 'steps': 44081, 'batch_loss/train': 0.6811268655583262} +12/28/2021 18:25:49 - INFO - codeparrot_training - Step 44082: {'lr': 1.7203628104163232e-05, 'samples': 22570496, 'steps': 44082, 'batch_loss/train': 0.7409201841801405} +12/28/2021 18:26:02 - INFO - codeparrot_training - Step 44083: {'lr': 1.7197882183677903e-05, 'samples': 22571008, 'steps': 44083, 'batch_loss/train': 0.7335655898787081} +12/28/2021 18:26:12 - INFO - codeparrot_training - Step 44084: {'lr': 1.719213718872595e-05, 'samples': 22571520, 'steps': 44084, 'batch_loss/train': 0.46459195436909795} +12/28/2021 18:26:23 - INFO - codeparrot_training - Step 44085: {'lr': 1.7186393119330096e-05, 'samples': 22572032, 'steps': 44085, 'batch_loss/train': 0.7412258991971612} +12/28/2021 18:26:35 - INFO - codeparrot_training - Step 44086: {'lr': 1.718064997551322e-05, 'samples': 22572544, 'steps': 44086, 'batch_loss/train': 0.7907983362674713} +12/28/2021 18:26:46 - INFO - codeparrot_training - Step 44087: {'lr': 1.7174907757298243e-05, 'samples': 22573056, 'steps': 44087, 'batch_loss/train': 0.6889716917648911} +12/28/2021 18:26:57 - INFO - codeparrot_training - Step 44088: {'lr': 1.7169166464707898e-05, 'samples': 22573568, 'steps': 44088, 'batch_loss/train': 0.8311479976400733} +12/28/2021 18:27:07 - INFO - codeparrot_training - Step 44089: {'lr': 1.7163426097765e-05, 'samples': 22574080, 'steps': 44089, 'batch_loss/train': 0.5821107784286141} +12/28/2021 18:27:21 - INFO - codeparrot_training - Step 44090: {'lr': 1.715768665649245e-05, 'samples': 22574592, 'steps': 44090, 'batch_loss/train': 0.7338676920626312} +12/28/2021 18:27:32 - INFO - codeparrot_training - Step 44091: {'lr': 1.7151948140912976e-05, 'samples': 22575104, 'steps': 44091, 'batch_loss/train': 0.9164522150531411} +12/28/2021 18:27:43 - INFO - codeparrot_training - Step 44092: {'lr': 1.714621055104945e-05, 'samples': 22575616, 'steps': 44092, 'batch_loss/train': 0.7654493290465325} +12/28/2021 18:27:55 - INFO - codeparrot_training - Step 44093: {'lr': 1.714047388692472e-05, 'samples': 22576128, 'steps': 44093, 'batch_loss/train': 0.7634390108287334} +12/28/2021 18:28:06 - INFO - codeparrot_training - Step 44094: {'lr': 1.7134738148561452e-05, 'samples': 22576640, 'steps': 44094, 'batch_loss/train': 0.7487895430531353} +12/28/2021 18:28:16 - INFO - codeparrot_training - Step 44095: {'lr': 1.712900333598258e-05, 'samples': 22577152, 'steps': 44095, 'batch_loss/train': 0.656121902866289} +12/28/2021 18:28:28 - INFO - codeparrot_training - Step 44096: {'lr': 1.7123269449210914e-05, 'samples': 22577664, 'steps': 44096, 'batch_loss/train': 0.663492335472256} +12/28/2021 18:28:39 - INFO - codeparrot_training - Step 44097: {'lr': 1.711753648826911e-05, 'samples': 22578176, 'steps': 44097, 'batch_loss/train': 0.7170693245716393} +12/28/2021 18:28:50 - INFO - codeparrot_training - Step 44098: {'lr': 1.7111804453180084e-05, 'samples': 22578688, 'steps': 44098, 'batch_loss/train': 0.773008905351162} +12/28/2021 18:29:00 - INFO - codeparrot_training - Step 44099: {'lr': 1.7106073343966634e-05, 'samples': 22579200, 'steps': 44099, 'batch_loss/train': 1.483337384648621} +12/28/2021 18:29:14 - INFO - codeparrot_training - Step 44100: {'lr': 1.7100343160651427e-05, 'samples': 22579712, 'steps': 44100, 'batch_loss/train': 0.6785651815589517} +12/28/2021 18:29:24 - INFO - codeparrot_training - Step 44101: {'lr': 1.709461390325731e-05, 'samples': 22580224, 'steps': 44101, 'batch_loss/train': 0.762852190528065} +12/28/2021 18:29:35 - INFO - codeparrot_training - Step 44102: {'lr': 1.7088885571807156e-05, 'samples': 22580736, 'steps': 44102, 'batch_loss/train': 0.6663623293861747} +12/28/2021 18:29:47 - INFO - codeparrot_training - Step 44103: {'lr': 1.708315816632358e-05, 'samples': 22581248, 'steps': 44103, 'batch_loss/train': 0.6476708436384797} +12/28/2021 18:29:58 - INFO - codeparrot_training - Step 44104: {'lr': 1.707743168682943e-05, 'samples': 22581760, 'steps': 44104, 'batch_loss/train': 0.7409072457812726} +12/28/2021 18:30:09 - INFO - codeparrot_training - Step 44105: {'lr': 1.7071706133347463e-05, 'samples': 22582272, 'steps': 44105, 'batch_loss/train': 0.7521425671875477} +12/28/2021 18:30:19 - INFO - codeparrot_training - Step 44106: {'lr': 1.7065981505900467e-05, 'samples': 22582784, 'steps': 44106, 'batch_loss/train': 0.920120975933969} +12/28/2021 18:30:31 - INFO - codeparrot_training - Step 44107: {'lr': 1.7060257804511147e-05, 'samples': 22583296, 'steps': 44107, 'batch_loss/train': 0.7751257959753275} +12/28/2021 18:30:42 - INFO - codeparrot_training - Step 44108: {'lr': 1.7054535029202345e-05, 'samples': 22583808, 'steps': 44108, 'batch_loss/train': 0.7282276917248964} +12/28/2021 18:30:52 - INFO - codeparrot_training - Step 44109: {'lr': 1.7048813179996653e-05, 'samples': 22584320, 'steps': 44109, 'batch_loss/train': 0.6613099700771272} +12/28/2021 18:31:06 - INFO - codeparrot_training - Step 44110: {'lr': 1.704309225691697e-05, 'samples': 22584832, 'steps': 44110, 'batch_loss/train': 0.7716531925834715} +12/28/2021 18:31:17 - INFO - codeparrot_training - Step 44111: {'lr': 1.7037372259986027e-05, 'samples': 22585344, 'steps': 44111, 'batch_loss/train': 0.7017708318307996} +12/28/2021 18:31:28 - INFO - codeparrot_training - Step 44112: {'lr': 1.7031653189226476e-05, 'samples': 22585856, 'steps': 44112, 'batch_loss/train': 0.7414953161496669} +12/28/2021 18:31:40 - INFO - codeparrot_training - Step 44113: {'lr': 1.7025935044661155e-05, 'samples': 22586368, 'steps': 44113, 'batch_loss/train': 0.7351261819712818} +12/28/2021 18:31:51 - INFO - codeparrot_training - Step 44114: {'lr': 1.702021782631272e-05, 'samples': 22586880, 'steps': 44114, 'batch_loss/train': 0.8339325794950128} +12/28/2021 18:32:01 - INFO - codeparrot_training - Step 44115: {'lr': 1.7014501534203948e-05, 'samples': 22587392, 'steps': 44115, 'batch_loss/train': 0.7726771291345358} +12/28/2021 18:32:15 - INFO - codeparrot_training - Step 44116: {'lr': 1.700878616835752e-05, 'samples': 22587904, 'steps': 44116, 'batch_loss/train': 0.6791682038456202} +12/28/2021 18:32:26 - INFO - codeparrot_training - Step 44117: {'lr': 1.7003071728796198e-05, 'samples': 22588416, 'steps': 44117, 'batch_loss/train': 1.0013982113450766} +12/28/2021 18:32:37 - INFO - codeparrot_training - Step 44118: {'lr': 1.6997358215542657e-05, 'samples': 22588928, 'steps': 44118, 'batch_loss/train': 1.492614606861025} +12/28/2021 18:32:47 - INFO - codeparrot_training - Step 44119: {'lr': 1.6991645628619683e-05, 'samples': 22589440, 'steps': 44119, 'batch_loss/train': 0.6837786892428994} +12/28/2021 18:33:00 - INFO - codeparrot_training - Step 44120: {'lr': 1.6985933968049927e-05, 'samples': 22589952, 'steps': 44120, 'batch_loss/train': 0.8065931405872107} +12/28/2021 18:33:10 - INFO - codeparrot_training - Step 44121: {'lr': 1.6980223233856117e-05, 'samples': 22590464, 'steps': 44121, 'batch_loss/train': 0.6656017825007439} +12/28/2021 18:33:21 - INFO - codeparrot_training - Step 44122: {'lr': 1.6974513426060988e-05, 'samples': 22590976, 'steps': 44122, 'batch_loss/train': 0.7298403051681817} +12/28/2021 18:33:33 - INFO - codeparrot_training - Step 44123: {'lr': 1.6968804544687132e-05, 'samples': 22591488, 'steps': 44123, 'batch_loss/train': 0.869217867963016} +12/28/2021 18:33:44 - INFO - codeparrot_training - Step 44124: {'lr': 1.6963096589757365e-05, 'samples': 22592000, 'steps': 44124, 'batch_loss/train': 0.7893591625615954} +12/28/2021 18:33:54 - INFO - codeparrot_training - Step 44125: {'lr': 1.6957389561294363e-05, 'samples': 22592512, 'steps': 44125, 'batch_loss/train': 0.7654696456156671} +12/28/2021 18:34:05 - INFO - codeparrot_training - Step 44126: {'lr': 1.6951683459320745e-05, 'samples': 22593024, 'steps': 44126, 'batch_loss/train': 0.7386048790067434} +12/28/2021 18:34:18 - INFO - codeparrot_training - Step 44127: {'lr': 1.6945978283859216e-05, 'samples': 22593536, 'steps': 44127, 'batch_loss/train': 0.8245248636230826} +12/28/2021 18:34:29 - INFO - codeparrot_training - Step 44128: {'lr': 1.6940274034932533e-05, 'samples': 22594048, 'steps': 44128, 'batch_loss/train': 0.676937083946541} +12/28/2021 18:34:39 - INFO - codeparrot_training - Step 44129: {'lr': 1.6934570712563296e-05, 'samples': 22594560, 'steps': 44129, 'batch_loss/train': 0.771975455339998} +12/28/2021 18:34:52 - INFO - codeparrot_training - Step 44130: {'lr': 1.6928868316774204e-05, 'samples': 22595072, 'steps': 44130, 'batch_loss/train': 0.7058472190983593} +12/28/2021 18:35:02 - INFO - codeparrot_training - Step 44131: {'lr': 1.6923166847587935e-05, 'samples': 22595584, 'steps': 44131, 'batch_loss/train': 0.7220767429098487} +12/28/2021 18:35:13 - INFO - codeparrot_training - Step 44132: {'lr': 1.6917466305027134e-05, 'samples': 22596096, 'steps': 44132, 'batch_loss/train': 0.8482781387865543} +12/28/2021 18:35:25 - INFO - codeparrot_training - Step 44133: {'lr': 1.6911766689114483e-05, 'samples': 22596608, 'steps': 44133, 'batch_loss/train': 0.807482285425067} +12/28/2021 18:35:36 - INFO - codeparrot_training - Step 44134: {'lr': 1.6906067999872653e-05, 'samples': 22597120, 'steps': 44134, 'batch_loss/train': 0.7371011418290436} +12/28/2021 18:35:46 - INFO - codeparrot_training - Step 44135: {'lr': 1.690037023732427e-05, 'samples': 22597632, 'steps': 44135, 'batch_loss/train': 0.7230551075190306} +12/28/2021 18:35:57 - INFO - codeparrot_training - Step 44136: {'lr': 1.6894673401492006e-05, 'samples': 22598144, 'steps': 44136, 'batch_loss/train': 0.8860829407349229} +12/28/2021 18:36:09 - INFO - codeparrot_training - Step 44137: {'lr': 1.688897749239854e-05, 'samples': 22598656, 'steps': 44137, 'batch_loss/train': 0.707526424434036} +12/28/2021 18:36:20 - INFO - codeparrot_training - Step 44138: {'lr': 1.6883282510066377e-05, 'samples': 22599168, 'steps': 44138, 'batch_loss/train': 0.7716069621965289} +12/28/2021 18:36:31 - INFO - codeparrot_training - Step 44139: {'lr': 1.6877588454518367e-05, 'samples': 22599680, 'steps': 44139, 'batch_loss/train': 0.6981854285113513} +12/28/2021 18:36:44 - INFO - codeparrot_training - Step 44140: {'lr': 1.6871895325776986e-05, 'samples': 22600192, 'steps': 44140, 'batch_loss/train': 0.6781728072091937} +12/28/2021 18:36:55 - INFO - codeparrot_training - Step 44141: {'lr': 1.6866203123864944e-05, 'samples': 22600704, 'steps': 44141, 'batch_loss/train': 0.869995525223203} +12/28/2021 18:37:05 - INFO - codeparrot_training - Step 44142: {'lr': 1.6860511848804827e-05, 'samples': 22601216, 'steps': 44142, 'batch_loss/train': 0.6934254867956042} +12/28/2021 18:37:17 - INFO - codeparrot_training - Step 44143: {'lr': 1.6854821500619315e-05, 'samples': 22601728, 'steps': 44143, 'batch_loss/train': 0.6875063590705395} +12/28/2021 18:37:28 - INFO - codeparrot_training - Step 44144: {'lr': 1.6849132079330977e-05, 'samples': 22602240, 'steps': 44144, 'batch_loss/train': 0.7396951557602733} +12/28/2021 18:37:39 - INFO - codeparrot_training - Step 44145: {'lr': 1.6843443584962454e-05, 'samples': 22602752, 'steps': 44145, 'batch_loss/train': 0.6725552438292652} +12/28/2021 18:37:49 - INFO - codeparrot_training - Step 44146: {'lr': 1.683775601753637e-05, 'samples': 22603264, 'steps': 44146, 'batch_loss/train': 0.8437172984704375} +12/28/2021 18:38:03 - INFO - codeparrot_training - Step 44147: {'lr': 1.6832069377075347e-05, 'samples': 22603776, 'steps': 44147, 'batch_loss/train': 0.751840085722506} +12/28/2021 18:38:14 - INFO - codeparrot_training - Step 44148: {'lr': 1.682638366360201e-05, 'samples': 22604288, 'steps': 44148, 'batch_loss/train': 1.1558970818296075} +12/28/2021 18:38:24 - INFO - codeparrot_training - Step 44149: {'lr': 1.682069887713883e-05, 'samples': 22604800, 'steps': 44149, 'batch_loss/train': 0.6219357782974839} +12/28/2021 18:38:37 - INFO - codeparrot_training - Step 44150: {'lr': 1.6815015017708547e-05, 'samples': 22605312, 'steps': 44150, 'batch_loss/train': 0.7325491202063859} +12/28/2021 18:38:47 - INFO - codeparrot_training - Step 44151: {'lr': 1.680933208533375e-05, 'samples': 22605824, 'steps': 44151, 'batch_loss/train': 0.5782710519852117} +12/28/2021 18:38:58 - INFO - codeparrot_training - Step 44152: {'lr': 1.680365008003698e-05, 'samples': 22606336, 'steps': 44152, 'batch_loss/train': 0.7013597940094769} +12/28/2021 18:39:10 - INFO - codeparrot_training - Step 44153: {'lr': 1.6797969001840802e-05, 'samples': 22606848, 'steps': 44153, 'batch_loss/train': 0.750567264854908} +12/28/2021 18:39:21 - INFO - codeparrot_training - Step 44154: {'lr': 1.6792288850767918e-05, 'samples': 22607360, 'steps': 44154, 'batch_loss/train': 0.7983393878675997} +12/28/2021 18:39:31 - INFO - codeparrot_training - Step 44155: {'lr': 1.678660962684081e-05, 'samples': 22607872, 'steps': 44155, 'batch_loss/train': 0.7409832836128771} +12/28/2021 18:39:42 - INFO - codeparrot_training - Step 44156: {'lr': 1.6780931330082074e-05, 'samples': 22608384, 'steps': 44156, 'batch_loss/train': 0.7040219753980637} +12/28/2021 18:39:56 - INFO - codeparrot_training - Step 44157: {'lr': 1.6775253960514298e-05, 'samples': 22608896, 'steps': 44157, 'batch_loss/train': 0.731582386419177} +12/28/2021 18:40:06 - INFO - codeparrot_training - Step 44158: {'lr': 1.6769577518160052e-05, 'samples': 22609408, 'steps': 44158, 'batch_loss/train': 0.7738432832993567} +12/28/2021 18:40:17 - INFO - codeparrot_training - Step 44159: {'lr': 1.6763902003041898e-05, 'samples': 22609920, 'steps': 44159, 'batch_loss/train': 0.7220258065499365} +12/28/2021 18:40:29 - INFO - codeparrot_training - Step 44160: {'lr': 1.6758227415182403e-05, 'samples': 22610432, 'steps': 44160, 'batch_loss/train': 0.7538609956391156} +12/28/2021 18:40:40 - INFO - codeparrot_training - Step 44161: {'lr': 1.6752553754604134e-05, 'samples': 22610944, 'steps': 44161, 'batch_loss/train': 0.7573166117072105} +12/28/2021 18:40:50 - INFO - codeparrot_training - Step 44162: {'lr': 1.6746881021329623e-05, 'samples': 22611456, 'steps': 44162, 'batch_loss/train': 0.7611296931281686} +12/28/2021 18:41:03 - INFO - codeparrot_training - Step 44163: {'lr': 1.6741209215381498e-05, 'samples': 22611968, 'steps': 44163, 'batch_loss/train': 0.8064184365794063} +12/28/2021 18:41:13 - INFO - codeparrot_training - Step 44164: {'lr': 1.6735538336782153e-05, 'samples': 22612480, 'steps': 44164, 'batch_loss/train': 0.8204891383647919} +12/28/2021 18:41:24 - INFO - codeparrot_training - Step 44165: {'lr': 1.672986838555432e-05, 'samples': 22612992, 'steps': 44165, 'batch_loss/train': 0.74798284471035} +12/28/2021 18:41:35 - INFO - codeparrot_training - Step 44166: {'lr': 1.6724199361720428e-05, 'samples': 22613504, 'steps': 44166, 'batch_loss/train': 0.7825987171381712} +12/28/2021 18:41:48 - INFO - codeparrot_training - Step 44167: {'lr': 1.6718531265303012e-05, 'samples': 22614016, 'steps': 44167, 'batch_loss/train': 0.7506873533129692} +12/28/2021 18:41:59 - INFO - codeparrot_training - Step 44168: {'lr': 1.6712864096324636e-05, 'samples': 22614528, 'steps': 44168, 'batch_loss/train': 0.6335251143900678} +12/28/2021 18:42:09 - INFO - codeparrot_training - Step 44169: {'lr': 1.6707197854807842e-05, 'samples': 22615040, 'steps': 44169, 'batch_loss/train': 0.8843675618991256} +12/28/2021 18:42:21 - INFO - codeparrot_training - Step 44170: {'lr': 1.670153254077511e-05, 'samples': 22615552, 'steps': 44170, 'batch_loss/train': 0.7292089900001884} +12/28/2021 18:42:32 - INFO - codeparrot_training - Step 44171: {'lr': 1.6695868154249033e-05, 'samples': 22616064, 'steps': 44171, 'batch_loss/train': 0.7338743191212416} +12/28/2021 18:42:42 - INFO - codeparrot_training - Step 44172: {'lr': 1.6690204695252066e-05, 'samples': 22616576, 'steps': 44172, 'batch_loss/train': 0.7473654155619442} +12/28/2021 18:42:55 - INFO - codeparrot_training - Step 44173: {'lr': 1.668454216380677e-05, 'samples': 22617088, 'steps': 44173, 'batch_loss/train': 0.6500058092642576} +12/28/2021 18:43:05 - INFO - codeparrot_training - Step 44174: {'lr': 1.6678880559935662e-05, 'samples': 22617600, 'steps': 44174, 'batch_loss/train': 0.8112466298043728} +12/28/2021 18:43:16 - INFO - codeparrot_training - Step 44175: {'lr': 1.6673219883661163e-05, 'samples': 22618112, 'steps': 44175, 'batch_loss/train': 0.7083060350269079} +12/28/2021 18:43:29 - INFO - codeparrot_training - Step 44176: {'lr': 1.6667560135005864e-05, 'samples': 22618624, 'steps': 44176, 'batch_loss/train': 0.8216718853800558} +12/28/2021 18:43:40 - INFO - codeparrot_training - Step 44177: {'lr': 1.6661901313992284e-05, 'samples': 22619136, 'steps': 44177, 'batch_loss/train': 0.6965804849751294} +12/28/2021 18:43:50 - INFO - codeparrot_training - Step 44178: {'lr': 1.6656243420642842e-05, 'samples': 22619648, 'steps': 44178, 'batch_loss/train': 0.682777744717896} +12/28/2021 18:44:01 - INFO - codeparrot_training - Step 44179: {'lr': 1.6650586454980022e-05, 'samples': 22620160, 'steps': 44179, 'batch_loss/train': 0.8188076494261622} +12/28/2021 18:44:13 - INFO - codeparrot_training - Step 44180: {'lr': 1.6644930417026445e-05, 'samples': 22620672, 'steps': 44180, 'batch_loss/train': 0.6593194250017405} +12/28/2021 18:44:24 - INFO - codeparrot_training - Step 44181: {'lr': 1.663927530680448e-05, 'samples': 22621184, 'steps': 44181, 'batch_loss/train': 0.7977374205365777} +12/28/2021 18:44:34 - INFO - codeparrot_training - Step 44182: {'lr': 1.663362112433664e-05, 'samples': 22621696, 'steps': 44182, 'batch_loss/train': 1.5476706423796713} +12/28/2021 18:44:47 - INFO - codeparrot_training - Step 44183: {'lr': 1.6627967869645404e-05, 'samples': 22622208, 'steps': 44183, 'batch_loss/train': 0.6945043983869255} +12/28/2021 18:44:57 - INFO - codeparrot_training - Step 44184: {'lr': 1.6622315542753257e-05, 'samples': 22622720, 'steps': 44184, 'batch_loss/train': 0.6965669246856123} +12/28/2021 18:45:08 - INFO - codeparrot_training - Step 44185: {'lr': 1.661666414368268e-05, 'samples': 22623232, 'steps': 44185, 'batch_loss/train': 0.482527126907371} +12/28/2021 18:45:21 - INFO - codeparrot_training - Step 44186: {'lr': 1.66110136724561e-05, 'samples': 22623744, 'steps': 44186, 'batch_loss/train': 0.8399126762524247} +12/28/2021 18:45:32 - INFO - codeparrot_training - Step 44187: {'lr': 1.6605364129095997e-05, 'samples': 22624256, 'steps': 44187, 'batch_loss/train': 0.6822843609843403} +12/28/2021 18:45:42 - INFO - codeparrot_training - Step 44188: {'lr': 1.6599715513624852e-05, 'samples': 22624768, 'steps': 44188, 'batch_loss/train': 0.665766092017293} +12/28/2021 18:45:53 - INFO - codeparrot_training - Step 44189: {'lr': 1.6594067826065152e-05, 'samples': 22625280, 'steps': 44189, 'batch_loss/train': 0.7454780070111156} +12/28/2021 18:46:05 - INFO - codeparrot_training - Step 44190: {'lr': 1.6588421066439234e-05, 'samples': 22625792, 'steps': 44190, 'batch_loss/train': 0.8016421515494585} +12/28/2021 18:46:16 - INFO - codeparrot_training - Step 44191: {'lr': 1.6582775234769643e-05, 'samples': 22626304, 'steps': 44191, 'batch_loss/train': 1.5258907712996006} +12/28/2021 18:46:26 - INFO - codeparrot_training - Step 44192: {'lr': 1.6577130331078853e-05, 'samples': 22626816, 'steps': 44192, 'batch_loss/train': 0.6018353563849814} +12/28/2021 18:46:40 - INFO - codeparrot_training - Step 44193: {'lr': 1.6571486355389214e-05, 'samples': 22627328, 'steps': 44193, 'batch_loss/train': 0.671841761097312} +12/28/2021 18:46:51 - INFO - codeparrot_training - Step 44194: {'lr': 1.6565843307723205e-05, 'samples': 22627840, 'steps': 44194, 'batch_loss/train': 0.7197821801528335} +12/28/2021 18:47:02 - INFO - codeparrot_training - Step 44195: {'lr': 1.656020118810328e-05, 'samples': 22628352, 'steps': 44195, 'batch_loss/train': 0.6767762554809451} +12/28/2021 18:47:12 - INFO - codeparrot_training - Step 44196: {'lr': 1.6554559996551837e-05, 'samples': 22628864, 'steps': 44196, 'batch_loss/train': 1.1079340167343616} +12/28/2021 18:47:25 - INFO - codeparrot_training - Step 44197: {'lr': 1.6548919733091307e-05, 'samples': 22629376, 'steps': 44197, 'batch_loss/train': 0.808526593260467} +12/28/2021 18:47:36 - INFO - codeparrot_training - Step 44198: {'lr': 1.6543280397744138e-05, 'samples': 22629888, 'steps': 44198, 'batch_loss/train': 0.7571931313723326} +12/28/2021 18:47:46 - INFO - codeparrot_training - Step 44199: {'lr': 1.653764199053273e-05, 'samples': 22630400, 'steps': 44199, 'batch_loss/train': 0.7245207456871867} +12/28/2021 18:47:59 - INFO - codeparrot_training - Step 44200: {'lr': 1.6532004511479515e-05, 'samples': 22630912, 'steps': 44200, 'batch_loss/train': 0.9807036723941565} +12/28/2021 18:48:09 - INFO - codeparrot_training - Step 44201: {'lr': 1.6526367960606887e-05, 'samples': 22631424, 'steps': 44201, 'batch_loss/train': 0.9911241284571588} +12/28/2021 18:48:20 - INFO - codeparrot_training - Step 44202: {'lr': 1.6520732337937245e-05, 'samples': 22631936, 'steps': 44202, 'batch_loss/train': 0.7094690902158618} +12/28/2021 18:48:31 - INFO - codeparrot_training - Step 44203: {'lr': 1.651509764349307e-05, 'samples': 22632448, 'steps': 44203, 'batch_loss/train': 0.7952769068069756} +12/28/2021 18:48:44 - INFO - codeparrot_training - Step 44204: {'lr': 1.6509463877296683e-05, 'samples': 22632960, 'steps': 44204, 'batch_loss/train': 0.7388125943252817} +12/28/2021 18:48:55 - INFO - codeparrot_training - Step 44205: {'lr': 1.6503831039370448e-05, 'samples': 22633472, 'steps': 44205, 'batch_loss/train': 0.8048993591219187} +12/28/2021 18:49:06 - INFO - codeparrot_training - Step 44206: {'lr': 1.6498199129736907e-05, 'samples': 22633984, 'steps': 44206, 'batch_loss/train': 0.9274623701348901} +12/28/2021 18:49:18 - INFO - codeparrot_training - Step 44207: {'lr': 1.649256814841832e-05, 'samples': 22634496, 'steps': 44207, 'batch_loss/train': 0.8319214815273881} +12/28/2021 18:49:28 - INFO - codeparrot_training - Step 44208: {'lr': 1.6486938095437052e-05, 'samples': 22635008, 'steps': 44208, 'batch_loss/train': 0.6774082635529339} +12/28/2021 18:49:39 - INFO - codeparrot_training - Step 44209: {'lr': 1.648130897081565e-05, 'samples': 22635520, 'steps': 44209, 'batch_loss/train': 0.796432739123702} +12/28/2021 18:49:51 - INFO - codeparrot_training - Step 44210: {'lr': 1.647568077457634e-05, 'samples': 22636032, 'steps': 44210, 'batch_loss/train': 0.7962425332516432} +12/28/2021 18:50:02 - INFO - codeparrot_training - Step 44211: {'lr': 1.6470053506741552e-05, 'samples': 22636544, 'steps': 44211, 'batch_loss/train': 0.8635541787371039} +12/28/2021 18:50:13 - INFO - codeparrot_training - Step 44212: {'lr': 1.646442716733365e-05, 'samples': 22637056, 'steps': 44212, 'batch_loss/train': 0.8097462388686836} +12/28/2021 18:50:23 - INFO - codeparrot_training - Step 44213: {'lr': 1.6458801756374986e-05, 'samples': 22637568, 'steps': 44213, 'batch_loss/train': 0.9351950893178582} +12/28/2021 18:50:35 - INFO - codeparrot_training - Step 44214: {'lr': 1.6453177273887982e-05, 'samples': 22638080, 'steps': 44214, 'batch_loss/train': 0.7580447718501091} +12/28/2021 18:50:46 - INFO - codeparrot_training - Step 44215: {'lr': 1.6447553719894982e-05, 'samples': 22638592, 'steps': 44215, 'batch_loss/train': 0.677996211219579} +12/28/2021 18:50:57 - INFO - codeparrot_training - Step 44216: {'lr': 1.6441931094418245e-05, 'samples': 22639104, 'steps': 44216, 'batch_loss/train': 0.720915550366044} +12/28/2021 18:51:11 - INFO - codeparrot_training - Step 44217: {'lr': 1.6436309397480224e-05, 'samples': 22639616, 'steps': 44217, 'batch_loss/train': 0.780173453153111} +12/28/2021 18:51:22 - INFO - codeparrot_training - Step 44218: {'lr': 1.6430688629103326e-05, 'samples': 22640128, 'steps': 44218, 'batch_loss/train': 0.7215541338082403} +12/28/2021 18:51:32 - INFO - codeparrot_training - Step 44219: {'lr': 1.6425068789309745e-05, 'samples': 22640640, 'steps': 44219, 'batch_loss/train': 0.6975013772025704} +12/28/2021 18:51:44 - INFO - codeparrot_training - Step 44220: {'lr': 1.641944987812191e-05, 'samples': 22641152, 'steps': 44220, 'batch_loss/train': 0.722357866470702} +12/28/2021 18:51:55 - INFO - codeparrot_training - Step 44221: {'lr': 1.641383189556214e-05, 'samples': 22641664, 'steps': 44221, 'batch_loss/train': 0.7004167406121269} +12/28/2021 18:52:06 - INFO - codeparrot_training - Step 44222: {'lr': 1.640821484165278e-05, 'samples': 22642176, 'steps': 44222, 'batch_loss/train': 0.7505181645974517} +12/28/2021 18:52:18 - INFO - codeparrot_training - Step 44223: {'lr': 1.640259871641614e-05, 'samples': 22642688, 'steps': 44223, 'batch_loss/train': 0.7276510633528233} +12/28/2021 18:52:28 - INFO - codeparrot_training - Step 44224: {'lr': 1.6396983519874592e-05, 'samples': 22643200, 'steps': 44224, 'batch_loss/train': 0.8492876756936312} +12/28/2021 18:52:39 - INFO - codeparrot_training - Step 44225: {'lr': 1.6391369252050424e-05, 'samples': 22643712, 'steps': 44225, 'batch_loss/train': 0.7167568672448397} +12/28/2021 18:52:50 - INFO - codeparrot_training - Step 44226: {'lr': 1.6385755912965956e-05, 'samples': 22644224, 'steps': 44226, 'batch_loss/train': 0.7329843770712614} +12/28/2021 18:53:03 - INFO - codeparrot_training - Step 44227: {'lr': 1.6380143502643524e-05, 'samples': 22644736, 'steps': 44227, 'batch_loss/train': 0.6779209531378001} +12/28/2021 18:53:13 - INFO - codeparrot_training - Step 44228: {'lr': 1.637453202110542e-05, 'samples': 22645248, 'steps': 44228, 'batch_loss/train': 0.7443595621734858} +12/28/2021 18:53:24 - INFO - codeparrot_training - Step 44229: {'lr': 1.6368921468373986e-05, 'samples': 22645760, 'steps': 44229, 'batch_loss/train': 0.7618763698264956} +12/28/2021 18:53:36 - INFO - codeparrot_training - Step 44230: {'lr': 1.6363311844471486e-05, 'samples': 22646272, 'steps': 44230, 'batch_loss/train': 0.6426055570482276} +12/28/2021 18:53:47 - INFO - codeparrot_training - Step 44231: {'lr': 1.63577031494202e-05, 'samples': 22646784, 'steps': 44231, 'batch_loss/train': 0.7763330538582522} +12/28/2021 18:53:57 - INFO - codeparrot_training - Step 44232: {'lr': 1.635209538324256e-05, 'samples': 22647296, 'steps': 44232, 'batch_loss/train': 0.8508986003580503} +12/28/2021 18:54:11 - INFO - codeparrot_training - Step 44233: {'lr': 1.6346488545960714e-05, 'samples': 22647808, 'steps': 44233, 'batch_loss/train': 0.6880112076178193} +12/28/2021 18:54:22 - INFO - codeparrot_training - Step 44234: {'lr': 1.6340882637596948e-05, 'samples': 22648320, 'steps': 44234, 'batch_loss/train': 0.6697855787351727} +12/28/2021 18:54:32 - INFO - codeparrot_training - Step 44235: {'lr': 1.633527765817372e-05, 'samples': 22648832, 'steps': 44235, 'batch_loss/train': 0.669682770036161} +12/28/2021 18:54:43 - INFO - codeparrot_training - Step 44236: {'lr': 1.6329673607713146e-05, 'samples': 22649344, 'steps': 44236, 'batch_loss/train': 0.7074535586871207} +12/28/2021 18:54:55 - INFO - codeparrot_training - Step 44237: {'lr': 1.6324070486237545e-05, 'samples': 22649856, 'steps': 44237, 'batch_loss/train': 0.7848156662657857} +12/28/2021 18:55:06 - INFO - codeparrot_training - Step 44238: {'lr': 1.6318468293769206e-05, 'samples': 22650368, 'steps': 44238, 'batch_loss/train': 1.4354554498568177} +12/28/2021 18:55:16 - INFO - codeparrot_training - Step 44239: {'lr': 1.6312867030330414e-05, 'samples': 22650880, 'steps': 44239, 'batch_loss/train': 0.7239901318680495} +12/28/2021 18:55:28 - INFO - codeparrot_training - Step 44240: {'lr': 1.6307266695943428e-05, 'samples': 22651392, 'steps': 44240, 'batch_loss/train': 0.7585072410292923} +12/28/2021 18:55:39 - INFO - codeparrot_training - Step 44241: {'lr': 1.6301667290630567e-05, 'samples': 22651904, 'steps': 44241, 'batch_loss/train': 0.7288704458624125} +12/28/2021 18:55:50 - INFO - codeparrot_training - Step 44242: {'lr': 1.6296068814413922e-05, 'samples': 22652416, 'steps': 44242, 'batch_loss/train': 0.8111358024179935} +12/28/2021 18:56:00 - INFO - codeparrot_training - Step 44243: {'lr': 1.629047126731592e-05, 'samples': 22652928, 'steps': 44243, 'batch_loss/train': 0.7009551292285323} +12/28/2021 18:56:14 - INFO - codeparrot_training - Step 44244: {'lr': 1.6284874649358794e-05, 'samples': 22653440, 'steps': 44244, 'batch_loss/train': 0.7547985324636102} +12/28/2021 18:56:25 - INFO - codeparrot_training - Step 44245: {'lr': 1.627927896056469e-05, 'samples': 22653952, 'steps': 44245, 'batch_loss/train': 0.6808878576848656} +12/28/2021 18:56:36 - INFO - codeparrot_training - Step 44246: {'lr': 1.6273684200955957e-05, 'samples': 22654464, 'steps': 44246, 'batch_loss/train': 0.7485798336565495} +12/28/2021 18:56:48 - INFO - codeparrot_training - Step 44247: {'lr': 1.626809037055485e-05, 'samples': 22654976, 'steps': 44247, 'batch_loss/train': 0.7147771326126531} +12/28/2021 18:56:58 - INFO - codeparrot_training - Step 44248: {'lr': 1.626249746938352e-05, 'samples': 22655488, 'steps': 44248, 'batch_loss/train': 0.7961997790262103} +12/28/2021 18:57:09 - INFO - codeparrot_training - Step 44249: {'lr': 1.6256905497464226e-05, 'samples': 22656000, 'steps': 44249, 'batch_loss/train': 0.629335985518992} +12/28/2021 18:57:21 - INFO - codeparrot_training - Step 44250: {'lr': 1.6251314454819226e-05, 'samples': 22656512, 'steps': 44250, 'batch_loss/train': 0.74415825586766} +12/28/2021 18:57:32 - INFO - codeparrot_training - Step 44251: {'lr': 1.6245724341470757e-05, 'samples': 22657024, 'steps': 44251, 'batch_loss/train': 0.5755693424725905} +12/28/2021 18:57:43 - INFO - codeparrot_training - Step 44252: {'lr': 1.6240135157441022e-05, 'samples': 22657536, 'steps': 44252, 'batch_loss/train': 0.8621018063277006} +12/28/2021 18:57:55 - INFO - codeparrot_training - Step 44253: {'lr': 1.6234546902752222e-05, 'samples': 22658048, 'steps': 44253, 'batch_loss/train': 0.634575979784131} +12/28/2021 18:58:05 - INFO - codeparrot_training - Step 44254: {'lr': 1.622895957742662e-05, 'samples': 22658560, 'steps': 44254, 'batch_loss/train': 0.8025962272658944} +12/28/2021 18:58:16 - INFO - codeparrot_training - Step 44255: {'lr': 1.6223373181486422e-05, 'samples': 22659072, 'steps': 44255, 'batch_loss/train': 0.7679232228547335} +12/28/2021 18:58:27 - INFO - codeparrot_training - Step 44256: {'lr': 1.6217787714953802e-05, 'samples': 22659584, 'steps': 44256, 'batch_loss/train': 0.7086834600195289} +12/28/2021 18:58:41 - INFO - codeparrot_training - Step 44257: {'lr': 1.6212203177850937e-05, 'samples': 22660096, 'steps': 44257, 'batch_loss/train': 0.6927815824747086} +12/28/2021 18:58:51 - INFO - codeparrot_training - Step 44258: {'lr': 1.620661957020017e-05, 'samples': 22660608, 'steps': 44258, 'batch_loss/train': 0.7495500519871712} +12/28/2021 18:59:02 - INFO - codeparrot_training - Step 44259: {'lr': 1.6201036892023568e-05, 'samples': 22661120, 'steps': 44259, 'batch_loss/train': 0.7513462249189615} +12/28/2021 18:59:14 - INFO - codeparrot_training - Step 44260: {'lr': 1.6195455143343306e-05, 'samples': 22661632, 'steps': 44260, 'batch_loss/train': 0.790870126336813} +12/28/2021 18:59:25 - INFO - codeparrot_training - Step 44261: {'lr': 1.618987432418173e-05, 'samples': 22662144, 'steps': 44261, 'batch_loss/train': 0.7518909508362412} +12/28/2021 18:59:35 - INFO - codeparrot_training - Step 44262: {'lr': 1.6184294434560876e-05, 'samples': 22662656, 'steps': 44262, 'batch_loss/train': 0.648339859675616} +12/28/2021 18:59:46 - INFO - codeparrot_training - Step 44263: {'lr': 1.6178715474502974e-05, 'samples': 22663168, 'steps': 44263, 'batch_loss/train': 0.8187334598042071} +12/28/2021 19:00:00 - INFO - codeparrot_training - Step 44264: {'lr': 1.6173137444030233e-05, 'samples': 22663680, 'steps': 44264, 'batch_loss/train': 0.696036055451259} +12/28/2021 19:00:11 - INFO - codeparrot_training - Step 44265: {'lr': 1.61675603431648e-05, 'samples': 22664192, 'steps': 44265, 'batch_loss/train': 0.9853634536266327} +12/28/2021 19:00:22 - INFO - codeparrot_training - Step 44266: {'lr': 1.6161984171928852e-05, 'samples': 22664704, 'steps': 44266, 'batch_loss/train': 0.7899388279765844} +12/28/2021 19:00:34 - INFO - codeparrot_training - Step 44267: {'lr': 1.615640893034462e-05, 'samples': 22665216, 'steps': 44267, 'batch_loss/train': 0.7181662237271667} +12/28/2021 19:00:45 - INFO - codeparrot_training - Step 44268: {'lr': 1.6150834618434114e-05, 'samples': 22665728, 'steps': 44268, 'batch_loss/train': 0.8937777215614915} +12/28/2021 19:00:55 - INFO - codeparrot_training - Step 44269: {'lr': 1.6145261236219623e-05, 'samples': 22666240, 'steps': 44269, 'batch_loss/train': 0.5925140811596066} +12/28/2021 19:01:07 - INFO - codeparrot_training - Step 44270: {'lr': 1.613968878372332e-05, 'samples': 22666752, 'steps': 44270, 'batch_loss/train': 0.7380428188480437} +12/28/2021 19:01:18 - INFO - codeparrot_training - Step 44271: {'lr': 1.613411726096725e-05, 'samples': 22667264, 'steps': 44271, 'batch_loss/train': 0.7192294460255653} +12/28/2021 19:01:29 - INFO - codeparrot_training - Step 44272: {'lr': 1.6128546667973644e-05, 'samples': 22667776, 'steps': 44272, 'batch_loss/train': 0.7687409416539595} +12/28/2021 19:01:39 - INFO - codeparrot_training - Step 44273: {'lr': 1.612297700476467e-05, 'samples': 22668288, 'steps': 44273, 'batch_loss/train': 0.602747370256111} +12/28/2021 19:01:53 - INFO - codeparrot_training - Step 44274: {'lr': 1.6117408271362405e-05, 'samples': 22668800, 'steps': 44274, 'batch_loss/train': 0.678136202855967} +12/28/2021 19:02:04 - INFO - codeparrot_training - Step 44275: {'lr': 1.611184046778902e-05, 'samples': 22669312, 'steps': 44275, 'batch_loss/train': 0.7682886403053999} +12/28/2021 19:02:15 - INFO - codeparrot_training - Step 44276: {'lr': 1.6106273594066634e-05, 'samples': 22669824, 'steps': 44276, 'batch_loss/train': 0.5292214277142193} +12/28/2021 19:02:27 - INFO - codeparrot_training - Step 44277: {'lr': 1.6100707650217374e-05, 'samples': 22670336, 'steps': 44277, 'batch_loss/train': 0.7731345811625943} +12/28/2021 19:02:37 - INFO - codeparrot_training - Step 44278: {'lr': 1.609514263626341e-05, 'samples': 22670848, 'steps': 44278, 'batch_loss/train': 0.6676218844950199} +12/28/2021 19:02:48 - INFO - codeparrot_training - Step 44279: {'lr': 1.6089578552226812e-05, 'samples': 22671360, 'steps': 44279, 'batch_loss/train': 0.7413360420614481} +12/28/2021 19:03:00 - INFO - codeparrot_training - Step 44280: {'lr': 1.6084015398129732e-05, 'samples': 22671872, 'steps': 44280, 'batch_loss/train': 0.7483589090406895} +12/28/2021 19:03:11 - INFO - codeparrot_training - Step 44281: {'lr': 1.6078453173994313e-05, 'samples': 22672384, 'steps': 44281, 'batch_loss/train': 0.8144655216019601} +12/28/2021 19:03:21 - INFO - codeparrot_training - Step 44282: {'lr': 1.607289187984262e-05, 'samples': 22672896, 'steps': 44282, 'batch_loss/train': 0.6347014987841249} +12/28/2021 19:03:34 - INFO - codeparrot_training - Step 44283: {'lr': 1.606733151569678e-05, 'samples': 22673408, 'steps': 44283, 'batch_loss/train': 0.7089304896071553} +12/28/2021 19:03:45 - INFO - codeparrot_training - Step 44284: {'lr': 1.6061772081578942e-05, 'samples': 22673920, 'steps': 44284, 'batch_loss/train': 0.713048294885084} +12/28/2021 19:03:56 - INFO - codeparrot_training - Step 44285: {'lr': 1.6056213577511108e-05, 'samples': 22674432, 'steps': 44285, 'batch_loss/train': 0.6596158635802567} +12/28/2021 19:04:06 - INFO - codeparrot_training - Step 44286: {'lr': 1.6050656003515436e-05, 'samples': 22674944, 'steps': 44286, 'batch_loss/train': 0.7849548188969493} +12/28/2021 19:04:18 - INFO - codeparrot_training - Step 44287: {'lr': 1.604509935961407e-05, 'samples': 22675456, 'steps': 44287, 'batch_loss/train': 0.7553263790905476} +12/28/2021 19:04:29 - INFO - codeparrot_training - Step 44288: {'lr': 1.6039543645829018e-05, 'samples': 22675968, 'steps': 44288, 'batch_loss/train': 0.6569926119409502} +12/28/2021 19:04:40 - INFO - codeparrot_training - Step 44289: {'lr': 1.603398886218241e-05, 'samples': 22676480, 'steps': 44289, 'batch_loss/train': 0.7214555856771767} +12/28/2021 19:04:52 - INFO - codeparrot_training - Step 44290: {'lr': 1.60284350086963e-05, 'samples': 22676992, 'steps': 44290, 'batch_loss/train': 0.700921356678009} +12/28/2021 19:05:02 - INFO - codeparrot_training - Step 44291: {'lr': 1.6022882085392793e-05, 'samples': 22677504, 'steps': 44291, 'batch_loss/train': 0.7168099214322865} +12/28/2021 19:05:13 - INFO - codeparrot_training - Step 44292: {'lr': 1.6017330092293976e-05, 'samples': 22678016, 'steps': 44292, 'batch_loss/train': 0.6175303015625104} +12/28/2021 19:05:25 - INFO - codeparrot_training - Step 44293: {'lr': 1.601177902942194e-05, 'samples': 22678528, 'steps': 44293, 'batch_loss/train': 0.7225642609992065} +12/28/2021 19:05:36 - INFO - codeparrot_training - Step 44294: {'lr': 1.600622889679862e-05, 'samples': 22679040, 'steps': 44294, 'batch_loss/train': 0.7461053607985377} +12/28/2021 19:05:47 - INFO - codeparrot_training - Step 44295: {'lr': 1.600067969444624e-05, 'samples': 22679552, 'steps': 44295, 'batch_loss/train': 0.6950665847398341} +12/28/2021 19:05:57 - INFO - codeparrot_training - Step 44296: {'lr': 1.5995131422386844e-05, 'samples': 22680064, 'steps': 44296, 'batch_loss/train': 0.6914308127015829} +12/28/2021 19:06:11 - INFO - codeparrot_training - Step 44297: {'lr': 1.5989584080642354e-05, 'samples': 22680576, 'steps': 44297, 'batch_loss/train': 0.7982771010138094} +12/28/2021 19:06:22 - INFO - codeparrot_training - Step 44298: {'lr': 1.598403766923498e-05, 'samples': 22681088, 'steps': 44298, 'batch_loss/train': 0.6663259617052972} +12/28/2021 19:06:32 - INFO - codeparrot_training - Step 44299: {'lr': 1.5978492188186754e-05, 'samples': 22681600, 'steps': 44299, 'batch_loss/train': 0.7500038142316043} +12/28/2021 19:06:45 - INFO - codeparrot_training - Step 44300: {'lr': 1.5972947637519608e-05, 'samples': 22682112, 'steps': 44300, 'batch_loss/train': 0.7182165938429534} +12/28/2021 19:06:55 - INFO - codeparrot_training - Step 44301: {'lr': 1.5967404017255687e-05, 'samples': 22682624, 'steps': 44301, 'batch_loss/train': 0.8144579101353884} +12/28/2021 19:07:06 - INFO - codeparrot_training - Step 44302: {'lr': 1.5961861327416978e-05, 'samples': 22683136, 'steps': 44302, 'batch_loss/train': 0.645298620685935} +12/28/2021 19:07:20 - INFO - codeparrot_training - Step 44303: {'lr': 1.5956319568025567e-05, 'samples': 22683648, 'steps': 44303, 'batch_loss/train': 0.6718086791224778} +12/28/2021 19:07:30 - INFO - codeparrot_training - Step 44304: {'lr': 1.5950778739103443e-05, 'samples': 22684160, 'steps': 44304, 'batch_loss/train': 0.7022174925077707} +12/28/2021 19:07:41 - INFO - codeparrot_training - Step 44305: {'lr': 1.5945238840672643e-05, 'samples': 22684672, 'steps': 44305, 'batch_loss/train': 0.79038424231112} +12/28/2021 19:07:53 - INFO - codeparrot_training - Step 44306: {'lr': 1.5939699872755203e-05, 'samples': 22685184, 'steps': 44306, 'batch_loss/train': 0.8416506857611239} +12/28/2021 19:08:04 - INFO - codeparrot_training - Step 44307: {'lr': 1.5934161835373133e-05, 'samples': 22685696, 'steps': 44307, 'batch_loss/train': 1.0319015635177493} +12/28/2021 19:08:15 - INFO - codeparrot_training - Step 44308: {'lr': 1.5928624728548475e-05, 'samples': 22686208, 'steps': 44308, 'batch_loss/train': 0.7530293832533062} +12/28/2021 19:08:25 - INFO - codeparrot_training - Step 44309: {'lr': 1.5923088552303206e-05, 'samples': 22686720, 'steps': 44309, 'batch_loss/train': 0.8000767258927226} +12/28/2021 19:08:39 - INFO - codeparrot_training - Step 44310: {'lr': 1.5917553306659394e-05, 'samples': 22687232, 'steps': 44310, 'batch_loss/train': 0.6958627491258085} +12/28/2021 19:08:49 - INFO - codeparrot_training - Step 44311: {'lr': 1.5912018991638966e-05, 'samples': 22687744, 'steps': 44311, 'batch_loss/train': 0.9133764123544097} +12/28/2021 19:09:00 - INFO - codeparrot_training - Step 44312: {'lr': 1.590648560726393e-05, 'samples': 22688256, 'steps': 44312, 'batch_loss/train': 0.763369528343901} +12/28/2021 19:09:12 - INFO - codeparrot_training - Step 44313: {'lr': 1.5900953153556384e-05, 'samples': 22688768, 'steps': 44313, 'batch_loss/train': 0.62334657413885} +12/28/2021 19:09:23 - INFO - codeparrot_training - Step 44314: {'lr': 1.5895421630538225e-05, 'samples': 22689280, 'steps': 44314, 'batch_loss/train': 0.8575609987601638} +12/28/2021 19:09:33 - INFO - codeparrot_training - Step 44315: {'lr': 1.5889891038231463e-05, 'samples': 22689792, 'steps': 44315, 'batch_loss/train': 0.8757194555364549} +12/28/2021 19:09:44 - INFO - codeparrot_training - Step 44316: {'lr': 1.5884361376658106e-05, 'samples': 22690304, 'steps': 44316, 'batch_loss/train': 0.6927813803777099} +12/28/2021 19:09:56 - INFO - codeparrot_training - Step 44317: {'lr': 1.5878832645840114e-05, 'samples': 22690816, 'steps': 44317, 'batch_loss/train': 0.754369433503598} +12/28/2021 19:10:07 - INFO - codeparrot_training - Step 44318: {'lr': 1.587330484579952e-05, 'samples': 22691328, 'steps': 44318, 'batch_loss/train': 0.6629805851262063} +12/28/2021 19:10:17 - INFO - codeparrot_training - Step 44319: {'lr': 1.586777797655822e-05, 'samples': 22691840, 'steps': 44319, 'batch_loss/train': 0.7445853261742741} +12/28/2021 19:10:31 - INFO - codeparrot_training - Step 44320: {'lr': 1.5862252038138262e-05, 'samples': 22692352, 'steps': 44320, 'batch_loss/train': 0.6658275369554758} +12/28/2021 19:10:42 - INFO - codeparrot_training - Step 44321: {'lr': 1.585672703056157e-05, 'samples': 22692864, 'steps': 44321, 'batch_loss/train': 0.718418839154765} +12/28/2021 19:10:52 - INFO - codeparrot_training - Step 44322: {'lr': 1.5851202953850175e-05, 'samples': 22693376, 'steps': 44322, 'batch_loss/train': 0.7630188791081309} +12/28/2021 19:11:05 - INFO - codeparrot_training - Step 44323: {'lr': 1.5845679808025897e-05, 'samples': 22693888, 'steps': 44323, 'batch_loss/train': 0.7406565030105412} +12/28/2021 19:11:15 - INFO - codeparrot_training - Step 44324: {'lr': 1.5840157593110803e-05, 'samples': 22694400, 'steps': 44324, 'batch_loss/train': 0.7992643248289824} +12/28/2021 19:11:26 - INFO - codeparrot_training - Step 44325: {'lr': 1.5834636309126904e-05, 'samples': 22694912, 'steps': 44325, 'batch_loss/train': 0.755512148141861} +12/28/2021 19:11:38 - INFO - codeparrot_training - Step 44326: {'lr': 1.5829115956096013e-05, 'samples': 22695424, 'steps': 44326, 'batch_loss/train': 0.8142629992216825} +12/28/2021 19:11:49 - INFO - codeparrot_training - Step 44327: {'lr': 1.5823596534040085e-05, 'samples': 22695936, 'steps': 44327, 'batch_loss/train': 1.3711061533540487} +12/28/2021 19:11:59 - INFO - codeparrot_training - Step 44328: {'lr': 1.5818078042981216e-05, 'samples': 22696448, 'steps': 44328, 'batch_loss/train': 0.5645300354517531} +12/28/2021 19:12:10 - INFO - codeparrot_training - Step 44329: {'lr': 1.581256048294119e-05, 'samples': 22696960, 'steps': 44329, 'batch_loss/train': 0.7131512900814414} +12/28/2021 19:12:24 - INFO - codeparrot_training - Step 44330: {'lr': 1.5807043853942023e-05, 'samples': 22697472, 'steps': 44330, 'batch_loss/train': 0.6809406246757135} +12/28/2021 19:12:34 - INFO - codeparrot_training - Step 44331: {'lr': 1.5801528156005608e-05, 'samples': 22697984, 'steps': 44331, 'batch_loss/train': 0.7205755361355841} +12/28/2021 19:12:45 - INFO - codeparrot_training - Step 44332: {'lr': 1.5796013389153873e-05, 'samples': 22698496, 'steps': 44332, 'batch_loss/train': 0.6485144211910665} +12/28/2021 19:12:57 - INFO - codeparrot_training - Step 44333: {'lr': 1.5790499553408777e-05, 'samples': 22699008, 'steps': 44333, 'batch_loss/train': 0.7919032771023922} +12/28/2021 19:13:08 - INFO - codeparrot_training - Step 44334: {'lr': 1.5784986648792215e-05, 'samples': 22699520, 'steps': 44334, 'batch_loss/train': 0.8902664706110954} +12/28/2021 19:13:18 - INFO - codeparrot_training - Step 44335: {'lr': 1.577947467532609e-05, 'samples': 22700032, 'steps': 44335, 'batch_loss/train': 0.833325014449656} +12/28/2021 19:13:31 - INFO - codeparrot_training - Step 44336: {'lr': 1.577396363303235e-05, 'samples': 22700544, 'steps': 44336, 'batch_loss/train': 0.6600474519655108} +12/28/2021 19:13:41 - INFO - codeparrot_training - Step 44337: {'lr': 1.5768453521932928e-05, 'samples': 22701056, 'steps': 44337, 'batch_loss/train': 0.7488461988978088} +12/28/2021 19:13:52 - INFO - codeparrot_training - Step 44338: {'lr': 1.576294434204964e-05, 'samples': 22701568, 'steps': 44338, 'batch_loss/train': 0.6089180042035878} +12/28/2021 19:14:03 - INFO - codeparrot_training - Step 44339: {'lr': 1.5757436093404493e-05, 'samples': 22702080, 'steps': 44339, 'batch_loss/train': 0.7693813340738416} +12/28/2021 19:14:15 - INFO - codeparrot_training - Step 44340: {'lr': 1.57519287760193e-05, 'samples': 22702592, 'steps': 44340, 'batch_loss/train': 0.9452579049393535} +12/28/2021 19:14:26 - INFO - codeparrot_training - Step 44341: {'lr': 1.574642238991597e-05, 'samples': 22703104, 'steps': 44341, 'batch_loss/train': 0.6972414404153824} +12/28/2021 19:14:36 - INFO - codeparrot_training - Step 44342: {'lr': 1.574091693511645e-05, 'samples': 22703616, 'steps': 44342, 'batch_loss/train': 0.6541494554840028} +12/28/2021 19:14:49 - INFO - codeparrot_training - Step 44343: {'lr': 1.5735412411642557e-05, 'samples': 22704128, 'steps': 44343, 'batch_loss/train': 0.7288720803335309} +12/28/2021 19:15:00 - INFO - codeparrot_training - Step 44344: {'lr': 1.572990881951622e-05, 'samples': 22704640, 'steps': 44344, 'batch_loss/train': 0.7036150712519884} +12/28/2021 19:15:11 - INFO - codeparrot_training - Step 44345: {'lr': 1.5724406158759336e-05, 'samples': 22705152, 'steps': 44345, 'batch_loss/train': 0.6771576311439276} +12/28/2021 19:15:23 - INFO - codeparrot_training - Step 44346: {'lr': 1.571890442939372e-05, 'samples': 22705664, 'steps': 44346, 'batch_loss/train': 0.7047913964488544} +12/28/2021 19:15:34 - INFO - codeparrot_training - Step 44347: {'lr': 1.5713403631441303e-05, 'samples': 22706176, 'steps': 44347, 'batch_loss/train': 0.6982322502881289} +12/28/2021 19:15:44 - INFO - codeparrot_training - Step 44348: {'lr': 1.5707903764923953e-05, 'samples': 22706688, 'steps': 44348, 'batch_loss/train': 0.8105487311258912} +12/28/2021 19:15:55 - INFO - codeparrot_training - Step 44349: {'lr': 1.5702404829863432e-05, 'samples': 22707200, 'steps': 44349, 'batch_loss/train': 0.7825024258345366} +12/28/2021 19:16:09 - INFO - codeparrot_training - Step 44350: {'lr': 1.569690682628172e-05, 'samples': 22707712, 'steps': 44350, 'batch_loss/train': 0.7822496574372053} +12/28/2021 19:16:19 - INFO - codeparrot_training - Step 44351: {'lr': 1.5691409754200692e-05, 'samples': 22708224, 'steps': 44351, 'batch_loss/train': 0.6888970371801406} +12/28/2021 19:16:30 - INFO - codeparrot_training - Step 44352: {'lr': 1.5685913613642106e-05, 'samples': 22708736, 'steps': 44352, 'batch_loss/train': 0.8998481980524957} +12/28/2021 19:16:42 - INFO - codeparrot_training - Step 44353: {'lr': 1.5680418404627804e-05, 'samples': 22709248, 'steps': 44353, 'batch_loss/train': 0.6524684201576747} +12/28/2021 19:16:53 - INFO - codeparrot_training - Step 44354: {'lr': 1.5674924127179775e-05, 'samples': 22709760, 'steps': 44354, 'batch_loss/train': 0.7533221244812012} +12/28/2021 19:17:04 - INFO - codeparrot_training - Step 44355: {'lr': 1.5669430781319716e-05, 'samples': 22710272, 'steps': 44355, 'batch_loss/train': 0.8596371398307383} +12/28/2021 19:17:15 - INFO - codeparrot_training - Step 44356: {'lr': 1.566393836706953e-05, 'samples': 22710784, 'steps': 44356, 'batch_loss/train': 0.6593566802330315} +12/28/2021 19:17:26 - INFO - codeparrot_training - Step 44357: {'lr': 1.5658446884451032e-05, 'samples': 22711296, 'steps': 44357, 'batch_loss/train': 0.7672759188571945} +12/28/2021 19:17:37 - INFO - codeparrot_training - Step 44358: {'lr': 1.5652956333486067e-05, 'samples': 22711808, 'steps': 44358, 'batch_loss/train': 0.7031656764447689} +12/28/2021 19:17:47 - INFO - codeparrot_training - Step 44359: {'lr': 1.564746671419648e-05, 'samples': 22712320, 'steps': 44359, 'batch_loss/train': 0.6823712881887332} +12/28/2021 19:18:00 - INFO - codeparrot_training - Step 44360: {'lr': 1.5641978026604055e-05, 'samples': 22712832, 'steps': 44360, 'batch_loss/train': 0.5696758010890335} +12/28/2021 19:18:10 - INFO - codeparrot_training - Step 44361: {'lr': 1.5636490270730637e-05, 'samples': 22713344, 'steps': 44361, 'batch_loss/train': 0.6987545997835696} +12/28/2021 19:18:21 - INFO - codeparrot_training - Step 44362: {'lr': 1.563100344659807e-05, 'samples': 22713856, 'steps': 44362, 'batch_loss/train': 0.6703267865814269} +12/28/2021 19:18:35 - INFO - codeparrot_training - Step 44363: {'lr': 1.5625517554228143e-05, 'samples': 22714368, 'steps': 44363, 'batch_loss/train': 0.7257239283062518} +12/28/2021 19:18:46 - INFO - codeparrot_training - Step 44364: {'lr': 1.5620032593642615e-05, 'samples': 22714880, 'steps': 44364, 'batch_loss/train': 0.6501891675870866} +12/28/2021 19:18:57 - INFO - codeparrot_training - Step 44365: {'lr': 1.5614548564863385e-05, 'samples': 22715392, 'steps': 44365, 'batch_loss/train': 0.4006544796284288} +12/28/2021 19:19:09 - INFO - codeparrot_training - Step 44366: {'lr': 1.5609065467912188e-05, 'samples': 22715904, 'steps': 44366, 'batch_loss/train': 0.6687113428488374} +12/28/2021 19:19:20 - INFO - codeparrot_training - Step 44367: {'lr': 1.5603583302810837e-05, 'samples': 22716416, 'steps': 44367, 'batch_loss/train': 0.7066816333681345} +12/28/2021 19:19:30 - INFO - codeparrot_training - Step 44368: {'lr': 1.559810206958115e-05, 'samples': 22716928, 'steps': 44368, 'batch_loss/train': 0.7387996207689866} +12/28/2021 19:19:41 - INFO - codeparrot_training - Step 44369: {'lr': 1.5592621768244885e-05, 'samples': 22717440, 'steps': 44369, 'batch_loss/train': 0.727974895038642} +12/28/2021 19:19:55 - INFO - codeparrot_training - Step 44370: {'lr': 1.558714239882386e-05, 'samples': 22717952, 'steps': 44370, 'batch_loss/train': 0.7069232980720699} +12/28/2021 19:20:05 - INFO - codeparrot_training - Step 44371: {'lr': 1.5581663961339836e-05, 'samples': 22718464, 'steps': 44371, 'batch_loss/train': 0.7508357726037502} +12/28/2021 19:20:16 - INFO - codeparrot_training - Step 44372: {'lr': 1.55761864558146e-05, 'samples': 22718976, 'steps': 44372, 'batch_loss/train': 0.7869826294481754} +12/28/2021 19:20:28 - INFO - codeparrot_training - Step 44373: {'lr': 1.557070988226994e-05, 'samples': 22719488, 'steps': 44373, 'batch_loss/train': 0.7176119829528034} +12/28/2021 19:20:39 - INFO - codeparrot_training - Step 44374: {'lr': 1.5565234240727642e-05, 'samples': 22720000, 'steps': 44374, 'batch_loss/train': 0.6930803614668548} +12/28/2021 19:20:50 - INFO - codeparrot_training - Step 44375: {'lr': 1.5559759531209416e-05, 'samples': 22720512, 'steps': 44375, 'batch_loss/train': 0.7050572037696838} +12/28/2021 19:21:02 - INFO - codeparrot_training - Step 44376: {'lr': 1.5554285753737073e-05, 'samples': 22721024, 'steps': 44376, 'batch_loss/train': 0.7231528768315911} +12/28/2021 19:21:12 - INFO - codeparrot_training - Step 44377: {'lr': 1.5548812908332405e-05, 'samples': 22721536, 'steps': 44377, 'batch_loss/train': 0.8665075394092128} +12/28/2021 19:21:23 - INFO - codeparrot_training - Step 44378: {'lr': 1.5543340995017084e-05, 'samples': 22722048, 'steps': 44378, 'batch_loss/train': 0.7045423053205013} +12/28/2021 19:21:34 - INFO - codeparrot_training - Step 44379: {'lr': 1.5537870013812905e-05, 'samples': 22722560, 'steps': 44379, 'batch_loss/train': 0.6860078300815076} +12/28/2021 19:21:47 - INFO - codeparrot_training - Step 44380: {'lr': 1.5532399964741678e-05, 'samples': 22723072, 'steps': 44380, 'batch_loss/train': 0.6947306650690734} +12/28/2021 19:21:58 - INFO - codeparrot_training - Step 44381: {'lr': 1.5526930847825083e-05, 'samples': 22723584, 'steps': 44381, 'batch_loss/train': 0.6993760764598846} +12/28/2021 19:22:08 - INFO - codeparrot_training - Step 44382: {'lr': 1.5521462663084825e-05, 'samples': 22724096, 'steps': 44382, 'batch_loss/train': 0.7245543396566063} +12/28/2021 19:22:21 - INFO - codeparrot_training - Step 44383: {'lr': 1.5515995410542743e-05, 'samples': 22724608, 'steps': 44383, 'batch_loss/train': 0.7849345649592578} +12/28/2021 19:22:31 - INFO - codeparrot_training - Step 44384: {'lr': 1.551052909022052e-05, 'samples': 22725120, 'steps': 44384, 'batch_loss/train': 0.6288302764296532} +12/28/2021 19:22:42 - INFO - codeparrot_training - Step 44385: {'lr': 1.5505063702139888e-05, 'samples': 22725632, 'steps': 44385, 'batch_loss/train': 0.5858407744672149} +12/28/2021 19:22:54 - INFO - codeparrot_training - Step 44386: {'lr': 1.5499599246322575e-05, 'samples': 22726144, 'steps': 44386, 'batch_loss/train': 0.4988360834540799} +12/28/2021 19:23:05 - INFO - codeparrot_training - Step 44387: {'lr': 1.549413572279032e-05, 'samples': 22726656, 'steps': 44387, 'batch_loss/train': 0.6194188894005492} +12/28/2021 19:23:15 - INFO - codeparrot_training - Step 44388: {'lr': 1.548867313156485e-05, 'samples': 22727168, 'steps': 44388, 'batch_loss/train': 0.7806696137413383} +12/28/2021 19:23:29 - INFO - codeparrot_training - Step 44389: {'lr': 1.5483211472667876e-05, 'samples': 22727680, 'steps': 44389, 'batch_loss/train': 0.7744014789350331} +12/28/2021 19:23:39 - INFO - codeparrot_training - Step 44390: {'lr': 1.547775074612104e-05, 'samples': 22728192, 'steps': 44390, 'batch_loss/train': 0.7441305950051174} +12/28/2021 19:23:50 - INFO - codeparrot_training - Step 44391: {'lr': 1.5472290951946187e-05, 'samples': 22728704, 'steps': 44391, 'batch_loss/train': 0.7652040245011449} +12/28/2021 19:24:00 - INFO - codeparrot_training - Step 44392: {'lr': 1.5466832090164916e-05, 'samples': 22729216, 'steps': 44392, 'batch_loss/train': 0.8338326434604824} +12/28/2021 19:24:13 - INFO - codeparrot_training - Step 44393: {'lr': 1.546137416079893e-05, 'samples': 22729728, 'steps': 44393, 'batch_loss/train': 0.833690982311964} +12/28/2021 19:24:23 - INFO - codeparrot_training - Step 44394: {'lr': 1.5455917163870042e-05, 'samples': 22730240, 'steps': 44394, 'batch_loss/train': 0.6248386651277542} +12/28/2021 19:24:34 - INFO - codeparrot_training - Step 44395: {'lr': 1.5450461099399822e-05, 'samples': 22730752, 'steps': 44395, 'batch_loss/train': 0.7511014682240784} +12/28/2021 19:24:47 - INFO - codeparrot_training - Step 44396: {'lr': 1.5445005967409996e-05, 'samples': 22731264, 'steps': 44396, 'batch_loss/train': 0.8502559652552009} +12/28/2021 19:24:58 - INFO - codeparrot_training - Step 44397: {'lr': 1.543955176792228e-05, 'samples': 22731776, 'steps': 44397, 'batch_loss/train': 0.7549099926836789} +12/28/2021 19:25:08 - INFO - codeparrot_training - Step 44398: {'lr': 1.543409850095831e-05, 'samples': 22732288, 'steps': 44398, 'batch_loss/train': 0.8163443924859166} +12/28/2021 19:25:21 - INFO - codeparrot_training - Step 44399: {'lr': 1.542864616653983e-05, 'samples': 22732800, 'steps': 44399, 'batch_loss/train': 0.81215357221663} +12/28/2021 19:25:31 - INFO - codeparrot_training - Step 44400: {'lr': 1.542319476468851e-05, 'samples': 22733312, 'steps': 44400, 'batch_loss/train': 0.6838477025739849} +12/28/2021 19:25:42 - INFO - codeparrot_training - Step 44401: {'lr': 1.541774429542592e-05, 'samples': 22733824, 'steps': 44401, 'batch_loss/train': 0.786426228005439} +12/28/2021 19:25:53 - INFO - codeparrot_training - Step 44402: {'lr': 1.5412294758773813e-05, 'samples': 22734336, 'steps': 44402, 'batch_loss/train': 0.6966199576854706} +12/28/2021 19:26:05 - INFO - codeparrot_training - Step 44403: {'lr': 1.5406846154753935e-05, 'samples': 22734848, 'steps': 44403, 'batch_loss/train': 0.7097383555956185} +12/28/2021 19:26:15 - INFO - codeparrot_training - Step 44404: {'lr': 1.5401398483387784e-05, 'samples': 22735360, 'steps': 44404, 'batch_loss/train': 0.7392145632766187} +12/28/2021 19:26:26 - INFO - codeparrot_training - Step 44405: {'lr': 1.5395951744697045e-05, 'samples': 22735872, 'steps': 44405, 'batch_loss/train': 0.6430657958844677} +12/28/2021 19:26:38 - INFO - codeparrot_training - Step 44406: {'lr': 1.539050593870353e-05, 'samples': 22736384, 'steps': 44406, 'batch_loss/train': 0.7216494493186474} +12/28/2021 19:26:49 - INFO - codeparrot_training - Step 44407: {'lr': 1.5385061065428724e-05, 'samples': 22736896, 'steps': 44407, 'batch_loss/train': 0.7770240870304406} +12/28/2021 19:26:59 - INFO - codeparrot_training - Step 44408: {'lr': 1.5379617124894274e-05, 'samples': 22737408, 'steps': 44408, 'batch_loss/train': 0.718339663464576} +12/28/2021 19:27:13 - INFO - codeparrot_training - Step 44409: {'lr': 1.537417411712197e-05, 'samples': 22737920, 'steps': 44409, 'batch_loss/train': 0.6367608858272433} +12/28/2021 19:27:24 - INFO - codeparrot_training - Step 44410: {'lr': 1.536873204213332e-05, 'samples': 22738432, 'steps': 44410, 'batch_loss/train': 0.692446896340698} +12/28/2021 19:27:34 - INFO - codeparrot_training - Step 44411: {'lr': 1.5363290899950004e-05, 'samples': 22738944, 'steps': 44411, 'batch_loss/train': 0.8053944366984069} +12/28/2021 19:27:45 - INFO - codeparrot_training - Step 44412: {'lr': 1.5357850690593616e-05, 'samples': 22739456, 'steps': 44412, 'batch_loss/train': 0.8304861737415195} +12/28/2021 19:27:57 - INFO - codeparrot_training - Step 44413: {'lr': 1.535241141408586e-05, 'samples': 22739968, 'steps': 44413, 'batch_loss/train': 0.7322831536293961} +12/28/2021 19:28:08 - INFO - codeparrot_training - Step 44414: {'lr': 1.53469730704483e-05, 'samples': 22740480, 'steps': 44414, 'batch_loss/train': 0.6695693801157176} +12/28/2021 19:28:18 - INFO - codeparrot_training - Step 44415: {'lr': 1.534153565970259e-05, 'samples': 22740992, 'steps': 44415, 'batch_loss/train': 0.6102096484974027} +12/28/2021 19:28:30 - INFO - codeparrot_training - Step 44416: {'lr': 1.5336099181870287e-05, 'samples': 22741504, 'steps': 44416, 'batch_loss/train': 0.6565303541719913} +12/28/2021 19:28:41 - INFO - codeparrot_training - Step 44417: {'lr': 1.5330663636973053e-05, 'samples': 22742016, 'steps': 44417, 'batch_loss/train': 0.6927918130531907} +12/28/2021 19:28:52 - INFO - codeparrot_training - Step 44418: {'lr': 1.5325229025032554e-05, 'samples': 22742528, 'steps': 44418, 'batch_loss/train': 0.7187347188591957} +12/28/2021 19:29:05 - INFO - codeparrot_training - Step 44419: {'lr': 1.5319795346070254e-05, 'samples': 22743040, 'steps': 44419, 'batch_loss/train': 0.8325517391785979} +12/28/2021 19:29:16 - INFO - codeparrot_training - Step 44420: {'lr': 1.5314362600107905e-05, 'samples': 22743552, 'steps': 44420, 'batch_loss/train': 0.7171320924535394} +12/28/2021 19:29:26 - INFO - codeparrot_training - Step 44421: {'lr': 1.5308930787167024e-05, 'samples': 22744064, 'steps': 44421, 'batch_loss/train': 0.7338522607460618} +12/28/2021 19:29:37 - INFO - codeparrot_training - Step 44422: {'lr': 1.53034999072692e-05, 'samples': 22744576, 'steps': 44422, 'batch_loss/train': 0.7425373150035739} +12/28/2021 19:29:49 - INFO - codeparrot_training - Step 44423: {'lr': 1.5298069960436033e-05, 'samples': 22745088, 'steps': 44423, 'batch_loss/train': 0.7438248284161091} +12/28/2021 19:30:00 - INFO - codeparrot_training - Step 44424: {'lr': 1.529264094668914e-05, 'samples': 22745600, 'steps': 44424, 'batch_loss/train': 0.8473088688333519} +12/28/2021 19:30:10 - INFO - codeparrot_training - Step 44425: {'lr': 1.528721286605006e-05, 'samples': 22746112, 'steps': 44425, 'batch_loss/train': 0.8484486498637125} +12/28/2021 19:30:24 - INFO - codeparrot_training - Step 44426: {'lr': 1.5281785718540413e-05, 'samples': 22746624, 'steps': 44426, 'batch_loss/train': 0.760726684005931} +12/28/2021 19:30:35 - INFO - codeparrot_training - Step 44427: {'lr': 1.527635950418177e-05, 'samples': 22747136, 'steps': 44427, 'batch_loss/train': 0.7277459658216685} +12/28/2021 19:30:45 - INFO - codeparrot_training - Step 44428: {'lr': 1.5270934222995663e-05, 'samples': 22747648, 'steps': 44428, 'batch_loss/train': 0.7800780707038939} +12/28/2021 19:30:57 - INFO - codeparrot_training - Step 44429: {'lr': 1.5265509875003747e-05, 'samples': 22748160, 'steps': 44429, 'batch_loss/train': 0.7386282747611403} +12/28/2021 19:31:08 - INFO - codeparrot_training - Step 44430: {'lr': 1.5260086460227473e-05, 'samples': 22748672, 'steps': 44430, 'batch_loss/train': 0.7502810908481479} +12/28/2021 19:31:19 - INFO - codeparrot_training - Step 44431: {'lr': 1.5254663978688466e-05, 'samples': 22749184, 'steps': 44431, 'batch_loss/train': 0.7580657862126827} +12/28/2021 19:31:31 - INFO - codeparrot_training - Step 44432: {'lr': 1.5249242430408345e-05, 'samples': 22749696, 'steps': 44432, 'batch_loss/train': 0.7259154352359474} +12/28/2021 19:31:41 - INFO - codeparrot_training - Step 44433: {'lr': 1.5243821815408537e-05, 'samples': 22750208, 'steps': 44433, 'batch_loss/train': 0.7935123834758997} +12/28/2021 19:31:52 - INFO - codeparrot_training - Step 44434: {'lr': 1.5238402133710637e-05, 'samples': 22750720, 'steps': 44434, 'batch_loss/train': 0.7697290824726224} +12/28/2021 19:32:03 - INFO - codeparrot_training - Step 44435: {'lr': 1.5232983385336269e-05, 'samples': 22751232, 'steps': 44435, 'batch_loss/train': 0.7863826008979231} +12/28/2021 19:32:17 - INFO - codeparrot_training - Step 44436: {'lr': 1.5227565570306884e-05, 'samples': 22751744, 'steps': 44436, 'batch_loss/train': 0.724615097977221} +12/28/2021 19:32:27 - INFO - codeparrot_training - Step 44437: {'lr': 1.5222148688644021e-05, 'samples': 22752256, 'steps': 44437, 'batch_loss/train': 0.7592490212991834} +12/28/2021 19:32:38 - INFO - codeparrot_training - Step 44438: {'lr': 1.5216732740369277e-05, 'samples': 22752768, 'steps': 44438, 'batch_loss/train': 0.810789032606408} +12/28/2021 19:32:50 - INFO - codeparrot_training - Step 44439: {'lr': 1.521131772550416e-05, 'samples': 22753280, 'steps': 44439, 'batch_loss/train': 0.8091069757938385} +12/28/2021 19:33:01 - INFO - codeparrot_training - Step 44440: {'lr': 1.520590364407018e-05, 'samples': 22753792, 'steps': 44440, 'batch_loss/train': 0.7631713710725307} +12/28/2021 19:33:11 - INFO - codeparrot_training - Step 44441: {'lr': 1.5200490496088904e-05, 'samples': 22754304, 'steps': 44441, 'batch_loss/train': 0.6126543362624943} +12/28/2021 19:33:23 - INFO - codeparrot_training - Step 44442: {'lr': 1.5195078281581759e-05, 'samples': 22754816, 'steps': 44442, 'batch_loss/train': 0.8185417240601964} +12/28/2021 19:33:34 - INFO - codeparrot_training - Step 44443: {'lr': 1.518966700057034e-05, 'samples': 22755328, 'steps': 44443, 'batch_loss/train': 0.6932616622652858} +12/28/2021 19:33:45 - INFO - codeparrot_training - Step 44444: {'lr': 1.5184256653076185e-05, 'samples': 22755840, 'steps': 44444, 'batch_loss/train': 0.8082292536273599} +12/28/2021 19:33:55 - INFO - codeparrot_training - Step 44445: {'lr': 1.517884723912072e-05, 'samples': 22756352, 'steps': 44445, 'batch_loss/train': 0.75874581374228} +12/28/2021 19:34:09 - INFO - codeparrot_training - Step 44446: {'lr': 1.517343875872554e-05, 'samples': 22756864, 'steps': 44446, 'batch_loss/train': 0.800376464612782} +12/28/2021 19:34:20 - INFO - codeparrot_training - Step 44447: {'lr': 1.516803121191207e-05, 'samples': 22757376, 'steps': 44447, 'batch_loss/train': 0.8329168916679919} +12/28/2021 19:34:31 - INFO - codeparrot_training - Step 44448: {'lr': 1.5162624598701824e-05, 'samples': 22757888, 'steps': 44448, 'batch_loss/train': 0.8304764851927757} +12/28/2021 19:34:43 - INFO - codeparrot_training - Step 44449: {'lr': 1.515721891911634e-05, 'samples': 22758400, 'steps': 44449, 'batch_loss/train': 0.705072307959199} +12/28/2021 19:34:53 - INFO - codeparrot_training - Step 44450: {'lr': 1.5151814173177043e-05, 'samples': 22758912, 'steps': 44450, 'batch_loss/train': 0.8370769424363971} +12/28/2021 19:35:04 - INFO - codeparrot_training - Step 44451: {'lr': 1.51464103609055e-05, 'samples': 22759424, 'steps': 44451, 'batch_loss/train': 0.7483592573553324} +12/28/2021 19:35:17 - INFO - codeparrot_training - Step 44452: {'lr': 1.514100748232311e-05, 'samples': 22759936, 'steps': 44452, 'batch_loss/train': 0.7974230693653226} +12/28/2021 19:35:27 - INFO - codeparrot_training - Step 44453: {'lr': 1.5135605537451414e-05, 'samples': 22760448, 'steps': 44453, 'batch_loss/train': 0.8082894212566316} +12/28/2021 19:35:38 - INFO - codeparrot_training - Step 44454: {'lr': 1.5130204526311891e-05, 'samples': 22760960, 'steps': 44454, 'batch_loss/train': 0.7536374279297888} +12/28/2021 19:35:49 - INFO - codeparrot_training - Step 44455: {'lr': 1.5124804448925999e-05, 'samples': 22761472, 'steps': 44455, 'batch_loss/train': 0.7717868397012353} +12/28/2021 19:36:02 - INFO - codeparrot_training - Step 44456: {'lr': 1.5119405305315137e-05, 'samples': 22761984, 'steps': 44456, 'batch_loss/train': 0.649835747666657} +12/28/2021 19:36:13 - INFO - codeparrot_training - Step 44457: {'lr': 1.511400709550087e-05, 'samples': 22762496, 'steps': 44457, 'batch_loss/train': 0.726723110768944} +12/28/2021 19:36:23 - INFO - codeparrot_training - Step 44458: {'lr': 1.5108609819504654e-05, 'samples': 22763008, 'steps': 44458, 'batch_loss/train': 0.6277072047814727} +12/28/2021 19:36:35 - INFO - codeparrot_training - Step 44459: {'lr': 1.5103213477347888e-05, 'samples': 22763520, 'steps': 44459, 'batch_loss/train': 0.6924799755215645} +12/28/2021 19:36:46 - INFO - codeparrot_training - Step 44460: {'lr': 1.5097818069052e-05, 'samples': 22764032, 'steps': 44460, 'batch_loss/train': 0.7830400308594108} +12/28/2021 19:36:57 - INFO - codeparrot_training - Step 44461: {'lr': 1.5092423594638555e-05, 'samples': 22764544, 'steps': 44461, 'batch_loss/train': 0.7607603981159627} +12/28/2021 19:37:09 - INFO - codeparrot_training - Step 44462: {'lr': 1.5087030054128926e-05, 'samples': 22765056, 'steps': 44462, 'batch_loss/train': 0.7389201335608959} +12/28/2021 19:37:19 - INFO - codeparrot_training - Step 44463: {'lr': 1.5081637447544538e-05, 'samples': 22765568, 'steps': 44463, 'batch_loss/train': 0.7345748590305448} +12/28/2021 19:37:30 - INFO - codeparrot_training - Step 44464: {'lr': 1.5076245774906877e-05, 'samples': 22766080, 'steps': 44464, 'batch_loss/train': 0.7701140618883073} +12/28/2021 19:37:40 - INFO - codeparrot_training - Step 44465: {'lr': 1.5070855036237369e-05, 'samples': 22766592, 'steps': 44465, 'batch_loss/train': 0.7133084442466497} +12/28/2021 19:37:54 - INFO - codeparrot_training - Step 44466: {'lr': 1.5065465231557413e-05, 'samples': 22767104, 'steps': 44466, 'batch_loss/train': 0.7720306219998747} +12/28/2021 19:38:04 - INFO - codeparrot_training - Step 44467: {'lr': 1.5060076360888465e-05, 'samples': 22767616, 'steps': 44467, 'batch_loss/train': 0.8617779370397329} +12/28/2021 19:38:15 - INFO - codeparrot_training - Step 44468: {'lr': 1.505468842425195e-05, 'samples': 22768128, 'steps': 44468, 'batch_loss/train': 0.7054209299385548} +12/28/2021 19:38:27 - INFO - codeparrot_training - Step 44469: {'lr': 1.5049301421669298e-05, 'samples': 22768640, 'steps': 44469, 'batch_loss/train': 0.795904211467132} +12/28/2021 19:38:38 - INFO - codeparrot_training - Step 44470: {'lr': 1.5043915353161908e-05, 'samples': 22769152, 'steps': 44470, 'batch_loss/train': 0.7972309280885383} +12/28/2021 19:38:48 - INFO - codeparrot_training - Step 44471: {'lr': 1.5038530218751152e-05, 'samples': 22769664, 'steps': 44471, 'batch_loss/train': 0.6798349805176258} +12/28/2021 19:39:02 - INFO - codeparrot_training - Step 44472: {'lr': 1.503314601845851e-05, 'samples': 22770176, 'steps': 44472, 'batch_loss/train': 0.7055594231933355} +12/28/2021 19:39:12 - INFO - codeparrot_training - Step 44473: {'lr': 1.5027762752305385e-05, 'samples': 22770688, 'steps': 44473, 'batch_loss/train': 0.7955365704838187} +12/28/2021 19:39:23 - INFO - codeparrot_training - Step 44474: {'lr': 1.502238042031312e-05, 'samples': 22771200, 'steps': 44474, 'batch_loss/train': 0.6452991520054638} +12/28/2021 19:39:34 - INFO - codeparrot_training - Step 44475: {'lr': 1.501699902250317e-05, 'samples': 22771712, 'steps': 44475, 'batch_loss/train': 0.774208853719756} +12/28/2021 19:39:46 - INFO - codeparrot_training - Step 44476: {'lr': 1.5011618558896878e-05, 'samples': 22772224, 'steps': 44476, 'batch_loss/train': 0.6959541453979909} +12/28/2021 19:39:56 - INFO - codeparrot_training - Step 44477: {'lr': 1.5006239029515673e-05, 'samples': 22772736, 'steps': 44477, 'batch_loss/train': 0.7447398100048304} +12/28/2021 19:40:07 - INFO - codeparrot_training - Step 44478: {'lr': 1.5000860434380924e-05, 'samples': 22773248, 'steps': 44478, 'batch_loss/train': 0.6963043115101755} +12/28/2021 19:40:19 - INFO - codeparrot_training - Step 44479: {'lr': 1.4995482773514035e-05, 'samples': 22773760, 'steps': 44479, 'batch_loss/train': 0.7628959352150559} +12/28/2021 19:40:30 - INFO - codeparrot_training - Step 44480: {'lr': 1.4990106046936375e-05, 'samples': 22774272, 'steps': 44480, 'batch_loss/train': 0.7242788085713983} +12/28/2021 19:40:40 - INFO - codeparrot_training - Step 44481: {'lr': 1.4984730254669344e-05, 'samples': 22774784, 'steps': 44481, 'batch_loss/train': 0.7687091552652419} +12/28/2021 19:40:53 - INFO - codeparrot_training - Step 44482: {'lr': 1.4979355396734201e-05, 'samples': 22775296, 'steps': 44482, 'batch_loss/train': 0.6564990489277989} +12/28/2021 19:41:03 - INFO - codeparrot_training - Step 44483: {'lr': 1.497398147315246e-05, 'samples': 22775808, 'steps': 44483, 'batch_loss/train': 0.6720117116346955} +12/28/2021 19:41:14 - INFO - codeparrot_training - Step 44484: {'lr': 1.4968608483945433e-05, 'samples': 22776320, 'steps': 44484, 'batch_loss/train': 0.7607802310958505} +12/28/2021 19:41:27 - INFO - codeparrot_training - Step 44485: {'lr': 1.496323642913447e-05, 'samples': 22776832, 'steps': 44485, 'batch_loss/train': 0.7071882151067257} +12/28/2021 19:41:38 - INFO - codeparrot_training - Step 44486: {'lr': 1.495786530874088e-05, 'samples': 22777344, 'steps': 44486, 'batch_loss/train': 0.6981701664626598} +12/28/2021 19:41:48 - INFO - codeparrot_training - Step 44487: {'lr': 1.4952495122786153e-05, 'samples': 22777856, 'steps': 44487, 'batch_loss/train': 0.7288296609767713} +12/28/2021 19:41:59 - INFO - codeparrot_training - Step 44488: {'lr': 1.4947125871291516e-05, 'samples': 22778368, 'steps': 44488, 'batch_loss/train': 0.7071274691261351} +12/28/2021 19:42:11 - INFO - codeparrot_training - Step 44489: {'lr': 1.4941757554278346e-05, 'samples': 22778880, 'steps': 44489, 'batch_loss/train': 0.913590332493186} +12/28/2021 19:42:22 - INFO - codeparrot_training - Step 44490: {'lr': 1.4936390171767983e-05, 'samples': 22779392, 'steps': 44490, 'batch_loss/train': 0.7269970765337348} +12/28/2021 19:42:32 - INFO - codeparrot_training - Step 44491: {'lr': 1.4931023723781801e-05, 'samples': 22779904, 'steps': 44491, 'batch_loss/train': 0.7147567961364985} +12/28/2021 19:42:45 - INFO - codeparrot_training - Step 44492: {'lr': 1.4925658210341087e-05, 'samples': 22780416, 'steps': 44492, 'batch_loss/train': 0.6287444308400154} +12/28/2021 19:42:55 - INFO - codeparrot_training - Step 44493: {'lr': 1.4920293631467214e-05, 'samples': 22780928, 'steps': 44493, 'batch_loss/train': 0.8074891953729093} +12/28/2021 19:43:06 - INFO - codeparrot_training - Step 44494: {'lr': 1.491492998718147e-05, 'samples': 22781440, 'steps': 44494, 'batch_loss/train': 0.7872083494439721} +12/28/2021 19:43:17 - INFO - codeparrot_training - Step 44495: {'lr': 1.4909567277505226e-05, 'samples': 22781952, 'steps': 44495, 'batch_loss/train': 0.7252486329525709} +12/28/2021 19:43:30 - INFO - codeparrot_training - Step 44496: {'lr': 1.49042055024598e-05, 'samples': 22782464, 'steps': 44496, 'batch_loss/train': 0.7174284995417111} +12/28/2021 19:43:41 - INFO - codeparrot_training - Step 44497: {'lr': 1.4898844662066424e-05, 'samples': 22782976, 'steps': 44497, 'batch_loss/train': 0.6688984003849328} +12/28/2021 19:43:51 - INFO - codeparrot_training - Step 44498: {'lr': 1.4893484756346471e-05, 'samples': 22783488, 'steps': 44498, 'batch_loss/train': 0.7584341485053301} +12/28/2021 19:44:03 - INFO - codeparrot_training - Step 44499: {'lr': 1.488812578532131e-05, 'samples': 22784000, 'steps': 44499, 'batch_loss/train': 0.7493745271349326} +12/28/2021 19:44:14 - INFO - codeparrot_training - Step 44500: {'lr': 1.4882767749012149e-05, 'samples': 22784512, 'steps': 44500, 'batch_loss/train': 0.7807084331288934} +12/28/2021 19:44:24 - INFO - codeparrot_training - Step 44501: {'lr': 1.4877410647440331e-05, 'samples': 22785024, 'steps': 44501, 'batch_loss/train': 0.7058289067354053} +12/28/2021 19:44:38 - INFO - codeparrot_training - Step 44502: {'lr': 1.4872054480627145e-05, 'samples': 22785536, 'steps': 44502, 'batch_loss/train': 0.6780203452799469} +12/28/2021 19:44:48 - INFO - codeparrot_training - Step 44503: {'lr': 1.4866699248593907e-05, 'samples': 22786048, 'steps': 44503, 'batch_loss/train': 0.7580363173037767} +12/28/2021 19:44:59 - INFO - codeparrot_training - Step 44504: {'lr': 1.486134495136185e-05, 'samples': 22786560, 'steps': 44504, 'batch_loss/train': 0.7588818897493184} +12/28/2021 19:45:10 - INFO - codeparrot_training - Step 44505: {'lr': 1.4855991588952344e-05, 'samples': 22787072, 'steps': 44505, 'batch_loss/train': 0.7222316483967006} +12/28/2021 19:45:22 - INFO - codeparrot_training - Step 44506: {'lr': 1.4850639161386598e-05, 'samples': 22787584, 'steps': 44506, 'batch_loss/train': 0.7358269635587931} +12/28/2021 19:45:32 - INFO - codeparrot_training - Step 44507: {'lr': 1.4845287668685924e-05, 'samples': 22788096, 'steps': 44507, 'batch_loss/train': 0.7553306819172576} +12/28/2021 19:45:43 - INFO - codeparrot_training - Step 44508: {'lr': 1.4839937110871615e-05, 'samples': 22788608, 'steps': 44508, 'batch_loss/train': 0.6801475349348038} +12/28/2021 19:45:55 - INFO - codeparrot_training - Step 44509: {'lr': 1.4834587487964901e-05, 'samples': 22789120, 'steps': 44509, 'batch_loss/train': 0.6036784470779821} +12/28/2021 19:46:06 - INFO - codeparrot_training - Step 44510: {'lr': 1.4829238799987127e-05, 'samples': 22789632, 'steps': 44510, 'batch_loss/train': 0.7069858773611486} +12/28/2021 19:46:16 - INFO - codeparrot_training - Step 44511: {'lr': 1.4823891046959443e-05, 'samples': 22790144, 'steps': 44511, 'batch_loss/train': 0.7299148524180055} +12/28/2021 19:46:30 - INFO - codeparrot_training - Step 44512: {'lr': 1.4818544228903164e-05, 'samples': 22790656, 'steps': 44512, 'batch_loss/train': 0.6460038721561432} +12/28/2021 19:46:40 - INFO - codeparrot_training - Step 44513: {'lr': 1.4813198345839607e-05, 'samples': 22791168, 'steps': 44513, 'batch_loss/train': 0.5481710135936737} +12/28/2021 19:46:51 - INFO - codeparrot_training - Step 44514: {'lr': 1.4807853397789922e-05, 'samples': 22791680, 'steps': 44514, 'batch_loss/train': 0.7513115899637341} +12/28/2021 19:47:02 - INFO - codeparrot_training - Step 44515: {'lr': 1.4802509384775425e-05, 'samples': 22792192, 'steps': 44515, 'batch_loss/train': 0.6331357881426811} +12/28/2021 19:47:14 - INFO - codeparrot_training - Step 44516: {'lr': 1.479716630681735e-05, 'samples': 22792704, 'steps': 44516, 'batch_loss/train': 0.7425076486542821} +12/28/2021 19:47:25 - INFO - codeparrot_training - Step 44517: {'lr': 1.4791824163936902e-05, 'samples': 22793216, 'steps': 44517, 'batch_loss/train': 0.8086816882714629} +12/28/2021 19:47:35 - INFO - codeparrot_training - Step 44518: {'lr': 1.4786482956155367e-05, 'samples': 22793728, 'steps': 44518, 'batch_loss/train': 0.8453871898818761} +12/28/2021 19:47:47 - INFO - codeparrot_training - Step 44519: {'lr': 1.4781142683493982e-05, 'samples': 22794240, 'steps': 44519, 'batch_loss/train': 1.494638395961374} +12/28/2021 19:47:58 - INFO - codeparrot_training - Step 44520: {'lr': 1.4775803345973948e-05, 'samples': 22794752, 'steps': 44520, 'batch_loss/train': 0.7811250928789377} +12/28/2021 19:48:09 - INFO - codeparrot_training - Step 44521: {'lr': 1.4770464943616474e-05, 'samples': 22795264, 'steps': 44521, 'batch_loss/train': 0.741895878687501} +12/28/2021 19:48:21 - INFO - codeparrot_training - Step 44522: {'lr': 1.4765127476442874e-05, 'samples': 22795776, 'steps': 44522, 'batch_loss/train': 0.6657720973016694} +12/28/2021 19:48:32 - INFO - codeparrot_training - Step 44523: {'lr': 1.4759790944474243e-05, 'samples': 22796288, 'steps': 44523, 'batch_loss/train': 0.665813154540956} +12/28/2021 19:48:43 - INFO - codeparrot_training - Step 44524: {'lr': 1.4754455347731871e-05, 'samples': 22796800, 'steps': 44524, 'batch_loss/train': 0.7404964622110128} +12/28/2021 19:48:56 - INFO - codeparrot_training - Step 44525: {'lr': 1.4749120686237016e-05, 'samples': 22797312, 'steps': 44525, 'batch_loss/train': 0.4371933205402456} +12/28/2021 19:49:07 - INFO - codeparrot_training - Step 44526: {'lr': 1.4743786960010774e-05, 'samples': 22797824, 'steps': 44526, 'batch_loss/train': 0.7007467793300748} +12/28/2021 19:49:17 - INFO - codeparrot_training - Step 44527: {'lr': 1.4738454169074433e-05, 'samples': 22798336, 'steps': 44527, 'batch_loss/train': 0.5542737258947454} +12/28/2021 19:49:28 - INFO - codeparrot_training - Step 44528: {'lr': 1.4733122313449143e-05, 'samples': 22798848, 'steps': 44528, 'batch_loss/train': 0.7886454910039902} +12/28/2021 19:49:40 - INFO - codeparrot_training - Step 44529: {'lr': 1.4727791393156136e-05, 'samples': 22799360, 'steps': 44529, 'batch_loss/train': 0.7976474976167083} +12/28/2021 19:49:51 - INFO - codeparrot_training - Step 44530: {'lr': 1.4722461408216592e-05, 'samples': 22799872, 'steps': 44530, 'batch_loss/train': 0.7676403895020485} +12/28/2021 19:50:01 - INFO - codeparrot_training - Step 44531: {'lr': 1.4717132358651686e-05, 'samples': 22800384, 'steps': 44531, 'batch_loss/train': 0.6877498154062778} +12/28/2021 19:50:15 - INFO - codeparrot_training - Step 44532: {'lr': 1.4711804244482651e-05, 'samples': 22800896, 'steps': 44532, 'batch_loss/train': 0.9467433326644823} +12/28/2021 19:50:26 - INFO - codeparrot_training - Step 44533: {'lr': 1.4706477065730639e-05, 'samples': 22801408, 'steps': 44533, 'batch_loss/train': 0.752078311983496} +12/28/2021 19:50:36 - INFO - codeparrot_training - Step 44534: {'lr': 1.4701150822416825e-05, 'samples': 22801920, 'steps': 44534, 'batch_loss/train': 0.9534788858145475} +12/28/2021 19:50:47 - INFO - codeparrot_training - Step 44535: {'lr': 1.469582551456239e-05, 'samples': 22802432, 'steps': 44535, 'batch_loss/train': 0.7366510955616832} +12/28/2021 19:50:59 - INFO - codeparrot_training - Step 44536: {'lr': 1.4690501142188533e-05, 'samples': 22802944, 'steps': 44536, 'batch_loss/train': 0.6911252613645047} +12/28/2021 19:51:10 - INFO - codeparrot_training - Step 44537: {'lr': 1.4685177705316354e-05, 'samples': 22803456, 'steps': 44537, 'batch_loss/train': 0.720787369646132} +12/28/2021 19:51:20 - INFO - codeparrot_training - Step 44538: {'lr': 1.467985520396703e-05, 'samples': 22803968, 'steps': 44538, 'batch_loss/train': 0.7609802111983299} +12/28/2021 19:51:32 - INFO - codeparrot_training - Step 44539: {'lr': 1.4674533638161819e-05, 'samples': 22804480, 'steps': 44539, 'batch_loss/train': 0.8652660856023431} +12/28/2021 19:51:43 - INFO - codeparrot_training - Step 44540: {'lr': 1.4669213007921788e-05, 'samples': 22804992, 'steps': 44540, 'batch_loss/train': 0.7479615425691009} +12/28/2021 19:51:54 - INFO - codeparrot_training - Step 44541: {'lr': 1.4663893313268034e-05, 'samples': 22805504, 'steps': 44541, 'batch_loss/train': 0.9545808425173163} +12/28/2021 19:52:07 - INFO - codeparrot_training - Step 44542: {'lr': 1.4658574554221898e-05, 'samples': 22806016, 'steps': 44542, 'batch_loss/train': 0.720602935180068} +12/28/2021 19:52:17 - INFO - codeparrot_training - Step 44543: {'lr': 1.4653256730804337e-05, 'samples': 22806528, 'steps': 44543, 'batch_loss/train': 0.702931213658303} +12/28/2021 19:52:28 - INFO - codeparrot_training - Step 44544: {'lr': 1.4647939843036584e-05, 'samples': 22807040, 'steps': 44544, 'batch_loss/train': 0.7926341239362955} +12/28/2021 19:52:40 - INFO - codeparrot_training - Step 44545: {'lr': 1.464262389093976e-05, 'samples': 22807552, 'steps': 44545, 'batch_loss/train': 0.727089126361534} +12/28/2021 19:52:51 - INFO - codeparrot_training - Step 44546: {'lr': 1.463730887453496e-05, 'samples': 22808064, 'steps': 44546, 'batch_loss/train': 0.8253916576504707} +12/28/2021 19:53:01 - INFO - codeparrot_training - Step 44547: {'lr': 1.463199479384339e-05, 'samples': 22808576, 'steps': 44547, 'batch_loss/train': 0.7262849387479946} +12/28/2021 19:53:12 - INFO - codeparrot_training - Step 44548: {'lr': 1.462668164888617e-05, 'samples': 22809088, 'steps': 44548, 'batch_loss/train': 0.7712957737967372} +12/28/2021 19:53:24 - INFO - codeparrot_training - Step 44549: {'lr': 1.4621369439684285e-05, 'samples': 22809600, 'steps': 44549, 'batch_loss/train': 0.6102518690750003} +12/28/2021 19:53:35 - INFO - codeparrot_training - Step 44550: {'lr': 1.4616058166259022e-05, 'samples': 22810112, 'steps': 44550, 'batch_loss/train': 0.7403649194166064} +12/28/2021 19:53:45 - INFO - codeparrot_training - Step 44551: {'lr': 1.4610747828631476e-05, 'samples': 22810624, 'steps': 44551, 'batch_loss/train': 0.6211210582405329} +12/28/2021 19:53:57 - INFO - codeparrot_training - Step 44552: {'lr': 1.460543842682266e-05, 'samples': 22811136, 'steps': 44552, 'batch_loss/train': 0.7118547293357551} +12/28/2021 19:54:08 - INFO - codeparrot_training - Step 44553: {'lr': 1.460012996085372e-05, 'samples': 22811648, 'steps': 44553, 'batch_loss/train': 0.7858149260282516} +12/28/2021 19:54:19 - INFO - codeparrot_training - Step 44554: {'lr': 1.4594822430745835e-05, 'samples': 22812160, 'steps': 44554, 'batch_loss/train': 0.704813888296485} +12/28/2021 19:54:32 - INFO - codeparrot_training - Step 44555: {'lr': 1.4589515836520046e-05, 'samples': 22812672, 'steps': 44555, 'batch_loss/train': 0.7314496566541493} +12/28/2021 19:54:43 - INFO - codeparrot_training - Step 44556: {'lr': 1.4584210178197415e-05, 'samples': 22813184, 'steps': 44556, 'batch_loss/train': 0.7316136742010713} +12/28/2021 19:54:53 - INFO - codeparrot_training - Step 44557: {'lr': 1.4578905455799097e-05, 'samples': 22813696, 'steps': 44557, 'batch_loss/train': 1.0141364689916372} +12/28/2021 19:55:04 - INFO - codeparrot_training - Step 44558: {'lr': 1.4573601669346153e-05, 'samples': 22814208, 'steps': 44558, 'batch_loss/train': 0.7020819089375436} +12/28/2021 19:55:16 - INFO - codeparrot_training - Step 44559: {'lr': 1.4568298818859681e-05, 'samples': 22814720, 'steps': 44559, 'batch_loss/train': 0.7151258722878993} +12/28/2021 19:55:27 - INFO - codeparrot_training - Step 44560: {'lr': 1.4562996904360776e-05, 'samples': 22815232, 'steps': 44560, 'batch_loss/train': 0.6980341502930969} +12/28/2021 19:55:37 - INFO - codeparrot_training - Step 44561: {'lr': 1.4557695925870474e-05, 'samples': 22815744, 'steps': 44561, 'batch_loss/train': 0.7718272535130382} +12/28/2021 19:55:51 - INFO - codeparrot_training - Step 44562: {'lr': 1.455239588340987e-05, 'samples': 22816256, 'steps': 44562, 'batch_loss/train': 0.6602363232523203} +12/28/2021 19:56:01 - INFO - codeparrot_training - Step 44563: {'lr': 1.4547096777000084e-05, 'samples': 22816768, 'steps': 44563, 'batch_loss/train': 0.4502592319622636} +12/28/2021 19:56:12 - INFO - codeparrot_training - Step 44564: {'lr': 1.4541798606662049e-05, 'samples': 22817280, 'steps': 44564, 'batch_loss/train': 0.6566577809280716} +12/28/2021 19:56:24 - INFO - codeparrot_training - Step 44565: {'lr': 1.4536501372417022e-05, 'samples': 22817792, 'steps': 44565, 'batch_loss/train': 0.7207918101921678} +12/28/2021 19:56:35 - INFO - codeparrot_training - Step 44566: {'lr': 1.4531205074285875e-05, 'samples': 22818304, 'steps': 44566, 'batch_loss/train': 0.7177837376948446} +12/28/2021 19:56:45 - INFO - codeparrot_training - Step 44567: {'lr': 1.4525909712289759e-05, 'samples': 22818816, 'steps': 44567, 'batch_loss/train': 0.7238929360173643} +12/28/2021 19:56:56 - INFO - codeparrot_training - Step 44568: {'lr': 1.4520615286449767e-05, 'samples': 22819328, 'steps': 44568, 'batch_loss/train': 0.7775223220814951} +12/28/2021 19:57:08 - INFO - codeparrot_training - Step 44569: {'lr': 1.4515321796786857e-05, 'samples': 22819840, 'steps': 44569, 'batch_loss/train': 0.6363610322587192} +12/28/2021 19:57:19 - INFO - codeparrot_training - Step 44570: {'lr': 1.4510029243322092e-05, 'samples': 22820352, 'steps': 44570, 'batch_loss/train': 0.7656714431941509} +12/28/2021 19:57:30 - INFO - codeparrot_training - Step 44571: {'lr': 1.450473762607657e-05, 'samples': 22820864, 'steps': 44571, 'batch_loss/train': 0.8298880839720368} +12/28/2021 19:57:43 - INFO - codeparrot_training - Step 44572: {'lr': 1.4499446945071271e-05, 'samples': 22821376, 'steps': 44572, 'batch_loss/train': 0.7587624853476882} +12/28/2021 19:57:54 - INFO - codeparrot_training - Step 44573: {'lr': 1.4494157200327263e-05, 'samples': 22821888, 'steps': 44573, 'batch_loss/train': 0.7634810591116548} +12/28/2021 19:58:04 - INFO - codeparrot_training - Step 44574: {'lr': 1.4488868391865583e-05, 'samples': 22822400, 'steps': 44574, 'batch_loss/train': 0.6599106717621908} +12/28/2021 19:58:16 - INFO - codeparrot_training - Step 44575: {'lr': 1.4483580519707163e-05, 'samples': 22822912, 'steps': 44575, 'batch_loss/train': 0.7220694692805409} +12/28/2021 19:58:27 - INFO - codeparrot_training - Step 44576: {'lr': 1.4478293583873147e-05, 'samples': 22823424, 'steps': 44576, 'batch_loss/train': 0.828002137131989} +12/28/2021 19:58:38 - INFO - codeparrot_training - Step 44577: {'lr': 1.4473007584384552e-05, 'samples': 22823936, 'steps': 44577, 'batch_loss/train': 0.716884090565145} +12/28/2021 19:58:50 - INFO - codeparrot_training - Step 44578: {'lr': 1.4467722521262245e-05, 'samples': 22824448, 'steps': 44578, 'batch_loss/train': 0.7579092527739704} +12/28/2021 19:59:00 - INFO - codeparrot_training - Step 44579: {'lr': 1.4462438394527406e-05, 'samples': 22824960, 'steps': 44579, 'batch_loss/train': 0.8109311331063509} +12/28/2021 19:59:11 - INFO - codeparrot_training - Step 44580: {'lr': 1.445715520420099e-05, 'samples': 22825472, 'steps': 44580, 'batch_loss/train': 0.6767658167518675} +12/28/2021 19:59:22 - INFO - codeparrot_training - Step 44581: {'lr': 1.4451872950303951e-05, 'samples': 22825984, 'steps': 44581, 'batch_loss/train': 0.8298316905274987} +12/28/2021 19:59:35 - INFO - codeparrot_training - Step 44582: {'lr': 1.4446591632857331e-05, 'samples': 22826496, 'steps': 44582, 'batch_loss/train': 0.7468736725859344} +12/28/2021 19:59:46 - INFO - codeparrot_training - Step 44583: {'lr': 1.4441311251882139e-05, 'samples': 22827008, 'steps': 44583, 'batch_loss/train': 0.6665414017625153} +12/28/2021 19:59:56 - INFO - codeparrot_training - Step 44584: {'lr': 1.443603180739933e-05, 'samples': 22827520, 'steps': 44584, 'batch_loss/train': 0.6606636343058199} +12/28/2021 20:00:08 - INFO - codeparrot_training - Step 44585: {'lr': 1.4430753299429917e-05, 'samples': 22828032, 'steps': 44585, 'batch_loss/train': 0.7418468296527863} +12/28/2021 20:00:19 - INFO - codeparrot_training - Step 44586: {'lr': 1.442547572799488e-05, 'samples': 22828544, 'steps': 44586, 'batch_loss/train': 0.7449423023499548} +12/28/2021 20:00:30 - INFO - codeparrot_training - Step 44587: {'lr': 1.4420199093115205e-05, 'samples': 22829056, 'steps': 44587, 'batch_loss/train': 0.7426582234911621} +12/28/2021 20:00:42 - INFO - codeparrot_training - Step 44588: {'lr': 1.4414923394811875e-05, 'samples': 22829568, 'steps': 44588, 'batch_loss/train': 0.8382472153753042} +12/28/2021 20:00:53 - INFO - codeparrot_training - Step 44589: {'lr': 1.44096486331059e-05, 'samples': 22830080, 'steps': 44589, 'batch_loss/train': 0.7978941276669502} +12/28/2021 20:01:03 - INFO - codeparrot_training - Step 44590: {'lr': 1.4404374808018094e-05, 'samples': 22830592, 'steps': 44590, 'batch_loss/train': 0.6696567870676517} +12/28/2021 20:01:14 - INFO - codeparrot_training - Step 44591: {'lr': 1.439910191956964e-05, 'samples': 22831104, 'steps': 44591, 'batch_loss/train': 0.8640657365322113} +12/28/2021 20:01:28 - INFO - codeparrot_training - Step 44592: {'lr': 1.4393829967781352e-05, 'samples': 22831616, 'steps': 44592, 'batch_loss/train': 0.725426432210952} +12/28/2021 20:01:39 - INFO - codeparrot_training - Step 44593: {'lr': 1.4388558952674214e-05, 'samples': 22832128, 'steps': 44593, 'batch_loss/train': 0.9135311922291294} +12/28/2021 20:01:50 - INFO - codeparrot_training - Step 44594: {'lr': 1.4383288874269262e-05, 'samples': 22832640, 'steps': 44594, 'batch_loss/train': 0.7957166079431772} +12/28/2021 20:02:02 - INFO - codeparrot_training - Step 44595: {'lr': 1.4378019732587345e-05, 'samples': 22833152, 'steps': 44595, 'batch_loss/train': 0.6880183280445635} +12/28/2021 20:02:12 - INFO - codeparrot_training - Step 44596: {'lr': 1.437275152764947e-05, 'samples': 22833664, 'steps': 44596, 'batch_loss/train': 0.7241202036384493} +12/28/2021 20:02:23 - INFO - codeparrot_training - Step 44597: {'lr': 1.4367484259476538e-05, 'samples': 22834176, 'steps': 44597, 'batch_loss/train': 0.9589898651465774} +12/28/2021 20:02:34 - INFO - codeparrot_training - Step 44598: {'lr': 1.4362217928089561e-05, 'samples': 22834688, 'steps': 44598, 'batch_loss/train': 0.7773782266303897} +12/28/2021 20:02:46 - INFO - codeparrot_training - Step 44599: {'lr': 1.4356952533509382e-05, 'samples': 22835200, 'steps': 44599, 'batch_loss/train': 0.7256087698042393} +12/28/2021 20:02:56 - INFO - codeparrot_training - Step 44600: {'lr': 1.4351688075757069e-05, 'samples': 22835712, 'steps': 44600, 'batch_loss/train': 0.716180331306532} +12/28/2021 20:03:07 - INFO - codeparrot_training - Step 44601: {'lr': 1.4346424554853355e-05, 'samples': 22836224, 'steps': 44601, 'batch_loss/train': 0.7042367931571789} +12/28/2021 20:03:21 - INFO - codeparrot_training - Step 44602: {'lr': 1.4341161970819332e-05, 'samples': 22836736, 'steps': 44602, 'batch_loss/train': 0.7448403094895184} +12/28/2021 20:03:31 - INFO - codeparrot_training - Step 44603: {'lr': 1.4335900323675905e-05, 'samples': 22837248, 'steps': 44603, 'batch_loss/train': 0.6802072103600949} +12/28/2021 20:03:42 - INFO - codeparrot_training - Step 44604: {'lr': 1.4330639613443886e-05, 'samples': 22837760, 'steps': 44604, 'batch_loss/train': 0.6434643531683832} +12/28/2021 20:03:54 - INFO - codeparrot_training - Step 44605: {'lr': 1.4325379840144287e-05, 'samples': 22838272, 'steps': 44605, 'batch_loss/train': 0.8648756127804518} +12/28/2021 20:04:05 - INFO - codeparrot_training - Step 44606: {'lr': 1.4320121003798009e-05, 'samples': 22838784, 'steps': 44606, 'batch_loss/train': 0.6713795067043975} +12/28/2021 20:04:16 - INFO - codeparrot_training - Step 44607: {'lr': 1.4314863104425923e-05, 'samples': 22839296, 'steps': 44607, 'batch_loss/train': 0.7638258840888739} +12/28/2021 20:04:26 - INFO - codeparrot_training - Step 44608: {'lr': 1.430960614204896e-05, 'samples': 22839808, 'steps': 44608, 'batch_loss/train': 0.7283994844183326} +12/28/2021 20:04:38 - INFO - codeparrot_training - Step 44609: {'lr': 1.4304350116687987e-05, 'samples': 22840320, 'steps': 44609, 'batch_loss/train': 0.7345205172896385} +12/28/2021 20:04:49 - INFO - codeparrot_training - Step 44610: {'lr': 1.4299095028363935e-05, 'samples': 22840832, 'steps': 44610, 'batch_loss/train': 0.6838839170522988} +12/28/2021 20:05:00 - INFO - codeparrot_training - Step 44611: {'lr': 1.4293840877097702e-05, 'samples': 22841344, 'steps': 44611, 'batch_loss/train': 0.7615022836253047} +12/28/2021 20:05:13 - INFO - codeparrot_training - Step 44612: {'lr': 1.4288587662910135e-05, 'samples': 22841856, 'steps': 44612, 'batch_loss/train': 0.7719960613176227} +12/28/2021 20:05:24 - INFO - codeparrot_training - Step 44613: {'lr': 1.428333538582216e-05, 'samples': 22842368, 'steps': 44613, 'batch_loss/train': 0.6541840075515211} +12/28/2021 20:05:34 - INFO - codeparrot_training - Step 44614: {'lr': 1.4278084045854623e-05, 'samples': 22842880, 'steps': 44614, 'batch_loss/train': 0.7496933112852275} +12/28/2021 20:05:46 - INFO - codeparrot_training - Step 44615: {'lr': 1.4272833643028421e-05, 'samples': 22843392, 'steps': 44615, 'batch_loss/train': 0.7294842427363619} +12/28/2021 20:05:57 - INFO - codeparrot_training - Step 44616: {'lr': 1.4267584177364428e-05, 'samples': 22843904, 'steps': 44616, 'batch_loss/train': 0.7230412242934108} +12/28/2021 20:06:08 - INFO - codeparrot_training - Step 44617: {'lr': 1.4262335648883545e-05, 'samples': 22844416, 'steps': 44617, 'batch_loss/train': 0.6745692519471049} +12/28/2021 20:06:18 - INFO - codeparrot_training - Step 44618: {'lr': 1.425708805760656e-05, 'samples': 22844928, 'steps': 44618, 'batch_loss/train': 0.8079744146671146} +12/28/2021 20:06:31 - INFO - codeparrot_training - Step 44619: {'lr': 1.4251841403554372e-05, 'samples': 22845440, 'steps': 44619, 'batch_loss/train': 0.7795685436576605} +12/28/2021 20:06:42 - INFO - codeparrot_training - Step 44620: {'lr': 1.4246595686747883e-05, 'samples': 22845952, 'steps': 44620, 'batch_loss/train': 0.7855383764253929} +12/28/2021 20:06:53 - INFO - codeparrot_training - Step 44621: {'lr': 1.4241350907207906e-05, 'samples': 22846464, 'steps': 44621, 'batch_loss/train': 0.737085344735533} +12/28/2021 20:07:05 - INFO - codeparrot_training - Step 44622: {'lr': 1.4236107064955289e-05, 'samples': 22846976, 'steps': 44622, 'batch_loss/train': 0.7090542023070157} +12/28/2021 20:07:15 - INFO - codeparrot_training - Step 44623: {'lr': 1.4230864160010876e-05, 'samples': 22847488, 'steps': 44623, 'batch_loss/train': 0.7908072602003813} +12/28/2021 20:07:26 - INFO - codeparrot_training - Step 44624: {'lr': 1.4225622192395538e-05, 'samples': 22848000, 'steps': 44624, 'batch_loss/train': 0.7977840937674046} +12/28/2021 20:07:38 - INFO - codeparrot_training - Step 44625: {'lr': 1.4220381162130119e-05, 'samples': 22848512, 'steps': 44625, 'batch_loss/train': 0.6947796046733856} +12/28/2021 20:07:49 - INFO - codeparrot_training - Step 44626: {'lr': 1.4215141069235437e-05, 'samples': 22849024, 'steps': 44626, 'batch_loss/train': 0.7783204400911927} +12/28/2021 20:07:59 - INFO - codeparrot_training - Step 44627: {'lr': 1.4209901913732281e-05, 'samples': 22849536, 'steps': 44627, 'batch_loss/train': 0.8233035178855062} +12/28/2021 20:08:10 - INFO - codeparrot_training - Step 44628: {'lr': 1.4204663695641551e-05, 'samples': 22850048, 'steps': 44628, 'batch_loss/train': 0.6981205749325454} +12/28/2021 20:08:22 - INFO - codeparrot_training - Step 44629: {'lr': 1.419942641498409e-05, 'samples': 22850560, 'steps': 44629, 'batch_loss/train': 0.6735198878450319} +12/28/2021 20:08:33 - INFO - codeparrot_training - Step 44630: {'lr': 1.4194190071780577e-05, 'samples': 22851072, 'steps': 44630, 'batch_loss/train': 0.7397234360687435} +12/28/2021 20:08:44 - INFO - codeparrot_training - Step 44631: {'lr': 1.4188954666051995e-05, 'samples': 22851584, 'steps': 44631, 'batch_loss/train': 0.5608922424726188} +12/28/2021 20:08:57 - INFO - codeparrot_training - Step 44632: {'lr': 1.4183720197819106e-05, 'samples': 22852096, 'steps': 44632, 'batch_loss/train': 0.7543417075648904} +12/28/2021 20:09:08 - INFO - codeparrot_training - Step 44633: {'lr': 1.417848666710267e-05, 'samples': 22852608, 'steps': 44633, 'batch_loss/train': 0.8138433094136417} +12/28/2021 20:09:18 - INFO - codeparrot_training - Step 44634: {'lr': 1.4173254073923502e-05, 'samples': 22853120, 'steps': 44634, 'batch_loss/train': 0.7560738874599338} +12/28/2021 20:09:30 - INFO - codeparrot_training - Step 44635: {'lr': 1.4168022418302478e-05, 'samples': 22853632, 'steps': 44635, 'batch_loss/train': 0.7821311922743917} +12/28/2021 20:09:41 - INFO - codeparrot_training - Step 44636: {'lr': 1.4162791700260359e-05, 'samples': 22854144, 'steps': 44636, 'batch_loss/train': 0.6991421412676573} +12/28/2021 20:09:52 - INFO - codeparrot_training - Step 44637: {'lr': 1.4157561919817902e-05, 'samples': 22854656, 'steps': 44637, 'batch_loss/train': 0.7604072648100555} +12/28/2021 20:10:02 - INFO - codeparrot_training - Step 44638: {'lr': 1.4152333076995927e-05, 'samples': 22855168, 'steps': 44638, 'batch_loss/train': 0.6793890167027712} +12/28/2021 20:10:14 - INFO - codeparrot_training - Step 44639: {'lr': 1.4147105171815222e-05, 'samples': 22855680, 'steps': 44639, 'batch_loss/train': 0.7479772865772247} +12/28/2021 20:10:25 - INFO - codeparrot_training - Step 44640: {'lr': 1.4141878204296577e-05, 'samples': 22856192, 'steps': 44640, 'batch_loss/train': 0.7156023448333144} +12/28/2021 20:10:36 - INFO - codeparrot_training - Step 44641: {'lr': 1.413665217446078e-05, 'samples': 22856704, 'steps': 44641, 'batch_loss/train': 0.727549722418189} +12/28/2021 20:10:49 - INFO - codeparrot_training - Step 44642: {'lr': 1.4131427082328562e-05, 'samples': 22857216, 'steps': 44642, 'batch_loss/train': 0.8006944889202714} +12/28/2021 20:10:59 - INFO - codeparrot_training - Step 44643: {'lr': 1.4126202927920773e-05, 'samples': 22857728, 'steps': 44643, 'batch_loss/train': 0.6811465863138437} +12/28/2021 20:11:10 - INFO - codeparrot_training - Step 44644: {'lr': 1.4120979711258142e-05, 'samples': 22858240, 'steps': 44644, 'batch_loss/train': 0.7160113987047225} +12/28/2021 20:11:23 - INFO - codeparrot_training - Step 44645: {'lr': 1.4115757432361375e-05, 'samples': 22858752, 'steps': 44645, 'batch_loss/train': 0.7337289098650217} +12/28/2021 20:11:33 - INFO - codeparrot_training - Step 44646: {'lr': 1.4110536091251347e-05, 'samples': 22859264, 'steps': 44646, 'batch_loss/train': 0.840433114208281} +12/28/2021 20:11:44 - INFO - codeparrot_training - Step 44647: {'lr': 1.4105315687948733e-05, 'samples': 22859776, 'steps': 44647, 'batch_loss/train': 0.757107425481081} +12/28/2021 20:11:55 - INFO - codeparrot_training - Step 44648: {'lr': 1.4100096222474323e-05, 'samples': 22860288, 'steps': 44648, 'batch_loss/train': 0.9849321125075221} +12/28/2021 20:12:08 - INFO - codeparrot_training - Step 44649: {'lr': 1.4094877694848852e-05, 'samples': 22860800, 'steps': 44649, 'batch_loss/train': 0.6806617630645633} +12/28/2021 20:12:19 - INFO - codeparrot_training - Step 44650: {'lr': 1.4089660105093077e-05, 'samples': 22861312, 'steps': 44650, 'batch_loss/train': 0.7606039543170482} +12/28/2021 20:12:29 - INFO - codeparrot_training - Step 44651: {'lr': 1.4084443453227736e-05, 'samples': 22861824, 'steps': 44651, 'batch_loss/train': 0.8455743631348014} +12/28/2021 20:12:41 - INFO - codeparrot_training - Step 44652: {'lr': 1.407922773927356e-05, 'samples': 22862336, 'steps': 44652, 'batch_loss/train': 0.8080253070220351} +12/28/2021 20:12:52 - INFO - codeparrot_training - Step 44653: {'lr': 1.407401296325128e-05, 'samples': 22862848, 'steps': 44653, 'batch_loss/train': 0.7250653272494674} +12/28/2021 20:13:03 - INFO - codeparrot_training - Step 44654: {'lr': 1.4068799125181663e-05, 'samples': 22863360, 'steps': 44654, 'batch_loss/train': 0.7538965339772403} +12/28/2021 20:13:15 - INFO - codeparrot_training - Step 44655: {'lr': 1.4063586225085467e-05, 'samples': 22863872, 'steps': 44655, 'batch_loss/train': 0.6913668471388519} +12/28/2021 20:13:26 - INFO - codeparrot_training - Step 44656: {'lr': 1.4058374262983259e-05, 'samples': 22864384, 'steps': 44656, 'batch_loss/train': 0.7989012124016881} +12/28/2021 20:13:36 - INFO - codeparrot_training - Step 44657: {'lr': 1.405316323889594e-05, 'samples': 22864896, 'steps': 44657, 'batch_loss/train': 1.052779086632654} +12/28/2021 20:13:47 - INFO - codeparrot_training - Step 44658: {'lr': 1.4047953152844156e-05, 'samples': 22865408, 'steps': 44658, 'batch_loss/train': 0.7212256686761975} +12/28/2021 20:14:00 - INFO - codeparrot_training - Step 44659: {'lr': 1.404274400484859e-05, 'samples': 22865920, 'steps': 44659, 'batch_loss/train': 0.7143307207152247} +12/28/2021 20:14:11 - INFO - codeparrot_training - Step 44660: {'lr': 1.4037535794929973e-05, 'samples': 22866432, 'steps': 44660, 'batch_loss/train': 0.7114345761947334} +12/28/2021 20:14:22 - INFO - codeparrot_training - Step 44661: {'lr': 1.4032328523109067e-05, 'samples': 22866944, 'steps': 44661, 'batch_loss/train': 0.7301865347544663} +12/28/2021 20:14:34 - INFO - codeparrot_training - Step 44662: {'lr': 1.4027122189406466e-05, 'samples': 22867456, 'steps': 44662, 'batch_loss/train': 0.7078115004114807} +12/28/2021 20:14:44 - INFO - codeparrot_training - Step 44663: {'lr': 1.4021916793842958e-05, 'samples': 22867968, 'steps': 44663, 'batch_loss/train': 0.7508108862675726} +12/28/2021 20:14:55 - INFO - codeparrot_training - Step 44664: {'lr': 1.4016712336439197e-05, 'samples': 22868480, 'steps': 44664, 'batch_loss/train': 1.4650592114776373} +12/28/2021 20:15:07 - INFO - codeparrot_training - Step 44665: {'lr': 1.4011508817215885e-05, 'samples': 22868992, 'steps': 44665, 'batch_loss/train': 0.7075353180989623} +12/28/2021 20:15:18 - INFO - codeparrot_training - Step 44666: {'lr': 1.4006306236193728e-05, 'samples': 22869504, 'steps': 44666, 'batch_loss/train': 0.5938265890581533} +12/28/2021 20:15:28 - INFO - codeparrot_training - Step 44667: {'lr': 1.4001104593393376e-05, 'samples': 22870016, 'steps': 44667, 'batch_loss/train': 0.6784295805264264} +12/28/2021 20:15:39 - INFO - codeparrot_training - Step 44668: {'lr': 1.399590388883551e-05, 'samples': 22870528, 'steps': 44668, 'batch_loss/train': 0.7979269614443183} +12/28/2021 20:15:53 - INFO - codeparrot_training - Step 44669: {'lr': 1.3990704122540831e-05, 'samples': 22871040, 'steps': 44669, 'batch_loss/train': 0.6353920153342187} +12/28/2021 20:16:04 - INFO - codeparrot_training - Step 44670: {'lr': 1.398550529453002e-05, 'samples': 22871552, 'steps': 44670, 'batch_loss/train': 0.6903884320054203} +12/28/2021 20:16:14 - INFO - codeparrot_training - Step 44671: {'lr': 1.3980307404823673e-05, 'samples': 22872064, 'steps': 44671, 'batch_loss/train': 0.7025751858018339} +12/28/2021 20:16:26 - INFO - codeparrot_training - Step 44672: {'lr': 1.3975110453442546e-05, 'samples': 22872576, 'steps': 44672, 'batch_loss/train': 0.7788038358557969} +12/28/2021 20:16:37 - INFO - codeparrot_training - Step 44673: {'lr': 1.3969914440407266e-05, 'samples': 22873088, 'steps': 44673, 'batch_loss/train': 0.775616752798669} +12/28/2021 20:16:48 - INFO - codeparrot_training - Step 44674: {'lr': 1.3964719365738454e-05, 'samples': 22873600, 'steps': 44674, 'batch_loss/train': 0.8007147093303502} +12/28/2021 20:17:00 - INFO - codeparrot_training - Step 44675: {'lr': 1.3959525229456815e-05, 'samples': 22874112, 'steps': 44675, 'batch_loss/train': 0.8962728190235794} +12/28/2021 20:17:10 - INFO - codeparrot_training - Step 44676: {'lr': 1.3954332031582972e-05, 'samples': 22874624, 'steps': 44676, 'batch_loss/train': 0.7324632462114096} +12/28/2021 20:17:21 - INFO - codeparrot_training - Step 44677: {'lr': 1.3949139772137549e-05, 'samples': 22875136, 'steps': 44677, 'batch_loss/train': 0.857424674089998} +12/28/2021 20:17:35 - INFO - codeparrot_training - Step 44678: {'lr': 1.3943948451141247e-05, 'samples': 22875648, 'steps': 44678, 'batch_loss/train': 0.6944731832481921} +12/28/2021 20:17:45 - INFO - codeparrot_training - Step 44679: {'lr': 1.3938758068614666e-05, 'samples': 22876160, 'steps': 44679, 'batch_loss/train': 0.726835222914815} +12/28/2021 20:17:56 - INFO - codeparrot_training - Step 44680: {'lr': 1.3933568624578452e-05, 'samples': 22876672, 'steps': 44680, 'batch_loss/train': 0.6782371043227613} +12/28/2021 20:18:06 - INFO - codeparrot_training - Step 44681: {'lr': 1.3928380119053285e-05, 'samples': 22877184, 'steps': 44681, 'batch_loss/train': 0.74814941175282} +12/28/2021 20:18:19 - INFO - codeparrot_training - Step 44682: {'lr': 1.392319255205965e-05, 'samples': 22877696, 'steps': 44682, 'batch_loss/train': 0.9102813815698028} +12/28/2021 20:18:29 - INFO - codeparrot_training - Step 44683: {'lr': 1.3918005923618304e-05, 'samples': 22878208, 'steps': 44683, 'batch_loss/train': 0.7402809616178274} +12/28/2021 20:18:40 - INFO - codeparrot_training - Step 44684: {'lr': 1.3912820233749873e-05, 'samples': 22878720, 'steps': 44684, 'batch_loss/train': 0.6976939258165658} +12/28/2021 20:18:52 - INFO - codeparrot_training - Step 44685: {'lr': 1.3907635482474867e-05, 'samples': 22879232, 'steps': 44685, 'batch_loss/train': 0.7242323467507958} +12/28/2021 20:19:03 - INFO - codeparrot_training - Step 44686: {'lr': 1.3902451669813936e-05, 'samples': 22879744, 'steps': 44686, 'batch_loss/train': 0.7984049608930945} +12/28/2021 20:19:13 - INFO - codeparrot_training - Step 44687: {'lr': 1.389726879578776e-05, 'samples': 22880256, 'steps': 44687, 'batch_loss/train': 0.6507836659438908} +12/28/2021 20:19:26 - INFO - codeparrot_training - Step 44688: {'lr': 1.3892086860416903e-05, 'samples': 22880768, 'steps': 44688, 'batch_loss/train': 0.7369827209040523} +12/28/2021 20:19:37 - INFO - codeparrot_training - Step 44689: {'lr': 1.3886905863721878e-05, 'samples': 22881280, 'steps': 44689, 'batch_loss/train': 0.6770531716756523} +12/28/2021 20:19:48 - INFO - codeparrot_training - Step 44690: {'lr': 1.3881725805723445e-05, 'samples': 22881792, 'steps': 44690, 'batch_loss/train': 0.7856999887153506} +12/28/2021 20:19:58 - INFO - codeparrot_training - Step 44691: {'lr': 1.3876546686442088e-05, 'samples': 22882304, 'steps': 44691, 'batch_loss/train': 0.7114997548051178} +12/28/2021 20:20:11 - INFO - codeparrot_training - Step 44692: {'lr': 1.387136850589843e-05, 'samples': 22882816, 'steps': 44692, 'batch_loss/train': 0.6934454112779349} +12/28/2021 20:20:21 - INFO - codeparrot_training - Step 44693: {'lr': 1.3866191264113037e-05, 'samples': 22883328, 'steps': 44693, 'batch_loss/train': 0.7254495467059314} +12/28/2021 20:20:32 - INFO - codeparrot_training - Step 44694: {'lr': 1.3861014961106533e-05, 'samples': 22883840, 'steps': 44694, 'batch_loss/train': 0.7102953158318996} +12/28/2021 20:20:45 - INFO - codeparrot_training - Step 44695: {'lr': 1.3855839596899427e-05, 'samples': 22884352, 'steps': 44695, 'batch_loss/train': 0.7861123755574226} +12/28/2021 20:20:56 - INFO - codeparrot_training - Step 44696: {'lr': 1.3850665171512427e-05, 'samples': 22884864, 'steps': 44696, 'batch_loss/train': 0.698181320913136} +12/28/2021 20:21:07 - INFO - codeparrot_training - Step 44697: {'lr': 1.3845491684965905e-05, 'samples': 22885376, 'steps': 44697, 'batch_loss/train': 0.6917777610942721} +12/28/2021 20:21:19 - INFO - codeparrot_training - Step 44698: {'lr': 1.3840319137280621e-05, 'samples': 22885888, 'steps': 44698, 'batch_loss/train': 0.6771051813848317} +12/28/2021 20:21:29 - INFO - codeparrot_training - Step 44699: {'lr': 1.3835147528477032e-05, 'samples': 22886400, 'steps': 44699, 'batch_loss/train': 0.8289607437327504} +12/28/2021 20:21:40 - INFO - codeparrot_training - Step 44700: {'lr': 1.3829976858575705e-05, 'samples': 22886912, 'steps': 44700, 'batch_loss/train': 0.6526250476017594} +12/28/2021 20:21:51 - INFO - codeparrot_training - Step 44701: {'lr': 1.3824807127597234e-05, 'samples': 22887424, 'steps': 44701, 'batch_loss/train': 0.7834526062943041} +12/28/2021 20:22:03 - INFO - codeparrot_training - Step 44702: {'lr': 1.3819638335562129e-05, 'samples': 22887936, 'steps': 44702, 'batch_loss/train': 0.714407961233519} +12/28/2021 20:22:14 - INFO - codeparrot_training - Step 44703: {'lr': 1.3814470482490988e-05, 'samples': 22888448, 'steps': 44703, 'batch_loss/train': 0.7890814635902643} +12/28/2021 20:22:24 - INFO - codeparrot_training - Step 44704: {'lr': 1.3809303568404319e-05, 'samples': 22888960, 'steps': 44704, 'batch_loss/train': 0.7130245509324595} +12/28/2021 20:22:38 - INFO - codeparrot_training - Step 44705: {'lr': 1.3804137593322692e-05, 'samples': 22889472, 'steps': 44705, 'batch_loss/train': 0.6593261775560677} +12/28/2021 20:22:48 - INFO - codeparrot_training - Step 44706: {'lr': 1.3798972557266614e-05, 'samples': 22889984, 'steps': 44706, 'batch_loss/train': 0.6507758991792798} +12/28/2021 20:22:59 - INFO - codeparrot_training - Step 44707: {'lr': 1.3793808460256685e-05, 'samples': 22890496, 'steps': 44707, 'batch_loss/train': 0.714032422285527} +12/28/2021 20:23:11 - INFO - codeparrot_training - Step 44708: {'lr': 1.3788645302313301e-05, 'samples': 22891008, 'steps': 44708, 'batch_loss/train': 0.8485724404454231} +12/28/2021 20:23:22 - INFO - codeparrot_training - Step 44709: {'lr': 1.3783483083457116e-05, 'samples': 22891520, 'steps': 44709, 'batch_loss/train': 0.659697150811553} +12/28/2021 20:23:32 - INFO - codeparrot_training - Step 44710: {'lr': 1.3778321803708637e-05, 'samples': 22892032, 'steps': 44710, 'batch_loss/train': 0.7900752332061529} +12/28/2021 20:23:43 - INFO - codeparrot_training - Step 44711: {'lr': 1.3773161463088353e-05, 'samples': 22892544, 'steps': 44711, 'batch_loss/train': 0.7140636593103409} +12/28/2021 20:23:55 - INFO - codeparrot_training - Step 44712: {'lr': 1.3768002061616714e-05, 'samples': 22893056, 'steps': 44712, 'batch_loss/train': 0.642985881306231} +12/28/2021 20:24:05 - INFO - codeparrot_training - Step 44713: {'lr': 1.3762843599314373e-05, 'samples': 22893568, 'steps': 44713, 'batch_loss/train': 0.7208838434889913} +12/28/2021 20:24:16 - INFO - codeparrot_training - Step 44714: {'lr': 1.375768607620173e-05, 'samples': 22894080, 'steps': 44714, 'batch_loss/train': 0.6389999362872913} +12/28/2021 20:24:28 - INFO - codeparrot_training - Step 44715: {'lr': 1.3752529492299298e-05, 'samples': 22894592, 'steps': 44715, 'batch_loss/train': 0.7196646835654974} +12/28/2021 20:24:39 - INFO - codeparrot_training - Step 44716: {'lr': 1.3747373847627698e-05, 'samples': 22895104, 'steps': 44716, 'batch_loss/train': 0.8056919118389487} +12/28/2021 20:24:49 - INFO - codeparrot_training - Step 44717: {'lr': 1.3742219142207275e-05, 'samples': 22895616, 'steps': 44717, 'batch_loss/train': 0.6899007408646867} +12/28/2021 20:25:03 - INFO - codeparrot_training - Step 44718: {'lr': 1.3737065376058566e-05, 'samples': 22896128, 'steps': 44718, 'batch_loss/train': 0.8230301961302757} +12/28/2021 20:25:13 - INFO - codeparrot_training - Step 44719: {'lr': 1.3731912549202113e-05, 'samples': 22896640, 'steps': 44719, 'batch_loss/train': 0.7275080503895879} +12/28/2021 20:25:24 - INFO - codeparrot_training - Step 44720: {'lr': 1.3726760661658344e-05, 'samples': 22897152, 'steps': 44720, 'batch_loss/train': 0.6979223147500306} +12/28/2021 20:25:35 - INFO - codeparrot_training - Step 44721: {'lr': 1.372160971344777e-05, 'samples': 22897664, 'steps': 44721, 'batch_loss/train': 0.7003626981750131} +12/28/2021 20:25:47 - INFO - codeparrot_training - Step 44722: {'lr': 1.3716459704590872e-05, 'samples': 22898176, 'steps': 44722, 'batch_loss/train': 0.7668045181781054} +12/28/2021 20:25:58 - INFO - codeparrot_training - Step 44723: {'lr': 1.3711310635108054e-05, 'samples': 22898688, 'steps': 44723, 'batch_loss/train': 0.7363303005695343} +12/28/2021 20:26:08 - INFO - codeparrot_training - Step 44724: {'lr': 1.3706162505019882e-05, 'samples': 22899200, 'steps': 44724, 'batch_loss/train': 0.5039713816659059} +12/28/2021 20:26:20 - INFO - codeparrot_training - Step 44725: {'lr': 1.3701015314346838e-05, 'samples': 22899712, 'steps': 44725, 'batch_loss/train': 0.7126512518152595} +12/28/2021 20:26:31 - INFO - codeparrot_training - Step 44726: {'lr': 1.3695869063109239e-05, 'samples': 22900224, 'steps': 44726, 'batch_loss/train': 0.848467942327261} +12/28/2021 20:26:41 - INFO - codeparrot_training - Step 44727: {'lr': 1.3690723751327739e-05, 'samples': 22900736, 'steps': 44727, 'batch_loss/train': 0.6615209685405716} +12/28/2021 20:26:55 - INFO - codeparrot_training - Step 44728: {'lr': 1.3685579379022623e-05, 'samples': 22901248, 'steps': 44728, 'batch_loss/train': 0.7108053625561297} +12/28/2021 20:27:05 - INFO - codeparrot_training - Step 44729: {'lr': 1.3680435946214459e-05, 'samples': 22901760, 'steps': 44729, 'batch_loss/train': 0.712003412656486} +12/28/2021 20:27:16 - INFO - codeparrot_training - Step 44730: {'lr': 1.3675293452923621e-05, 'samples': 22902272, 'steps': 44730, 'batch_loss/train': 0.6937810115050524} +12/28/2021 20:27:28 - INFO - codeparrot_training - Step 44731: {'lr': 1.3670151899170591e-05, 'samples': 22902784, 'steps': 44731, 'batch_loss/train': 0.8125794157385826} +12/28/2021 20:27:39 - INFO - codeparrot_training - Step 44732: {'lr': 1.3665011284975798e-05, 'samples': 22903296, 'steps': 44732, 'batch_loss/train': 0.7268615448847413} +12/28/2021 20:27:49 - INFO - codeparrot_training - Step 44733: {'lr': 1.365987161035967e-05, 'samples': 22903808, 'steps': 44733, 'batch_loss/train': 0.7462841691449285} +12/28/2021 20:28:00 - INFO - codeparrot_training - Step 44734: {'lr': 1.3654732875342663e-05, 'samples': 22904320, 'steps': 44734, 'batch_loss/train': 0.7649562498554587} +12/28/2021 20:28:13 - INFO - codeparrot_training - Step 44735: {'lr': 1.3649595079945204e-05, 'samples': 22904832, 'steps': 44735, 'batch_loss/train': 0.7245863699354231} +12/28/2021 20:28:24 - INFO - codeparrot_training - Step 44736: {'lr': 1.364445822418775e-05, 'samples': 22905344, 'steps': 44736, 'batch_loss/train': 0.7461247639730573} +12/28/2021 20:28:35 - INFO - codeparrot_training - Step 44737: {'lr': 1.3639322308090645e-05, 'samples': 22905856, 'steps': 44737, 'batch_loss/train': 0.7986645692144521} +12/28/2021 20:28:47 - INFO - codeparrot_training - Step 44738: {'lr': 1.363418733167432e-05, 'samples': 22906368, 'steps': 44738, 'batch_loss/train': 0.7942181953694671} +12/28/2021 20:28:58 - INFO - codeparrot_training - Step 44739: {'lr': 1.362905329495928e-05, 'samples': 22906880, 'steps': 44739, 'batch_loss/train': 0.7477365587837994} +12/28/2021 20:29:08 - INFO - codeparrot_training - Step 44740: {'lr': 1.362392019796585e-05, 'samples': 22907392, 'steps': 44740, 'batch_loss/train': 0.7107679951004684} +12/28/2021 20:29:20 - INFO - codeparrot_training - Step 44741: {'lr': 1.3618788040714425e-05, 'samples': 22907904, 'steps': 44741, 'batch_loss/train': 0.7919319105567411} +12/28/2021 20:29:31 - INFO - codeparrot_training - Step 44742: {'lr': 1.3613656823225517e-05, 'samples': 22908416, 'steps': 44742, 'batch_loss/train': 0.7951205215649679} +12/28/2021 20:29:42 - INFO - codeparrot_training - Step 44743: {'lr': 1.3608526545519389e-05, 'samples': 22908928, 'steps': 44743, 'batch_loss/train': 0.7130680353147909} +12/28/2021 20:29:53 - INFO - codeparrot_training - Step 44744: {'lr': 1.3603397207616525e-05, 'samples': 22909440, 'steps': 44744, 'batch_loss/train': 0.7109258144628257} +12/28/2021 20:30:06 - INFO - codeparrot_training - Step 44745: {'lr': 1.3598268809537295e-05, 'samples': 22909952, 'steps': 44745, 'batch_loss/train': 0.6123832773882896} +12/28/2021 20:30:17 - INFO - codeparrot_training - Step 44746: {'lr': 1.3593141351302101e-05, 'samples': 22910464, 'steps': 44746, 'batch_loss/train': 0.642116891220212} +12/28/2021 20:30:28 - INFO - codeparrot_training - Step 44747: {'lr': 1.358801483293129e-05, 'samples': 22910976, 'steps': 44747, 'batch_loss/train': 0.7111279107630253} +12/28/2021 20:30:40 - INFO - codeparrot_training - Step 44748: {'lr': 1.3582889254445313e-05, 'samples': 22911488, 'steps': 44748, 'batch_loss/train': 0.6953107081353664} +12/28/2021 20:30:50 - INFO - codeparrot_training - Step 44749: {'lr': 1.3577764615864407e-05, 'samples': 22912000, 'steps': 44749, 'batch_loss/train': 0.6417810344137251} +12/28/2021 20:31:01 - INFO - codeparrot_training - Step 44750: {'lr': 1.3572640917209084e-05, 'samples': 22912512, 'steps': 44750, 'batch_loss/train': 0.577302251389483} +12/28/2021 20:31:13 - INFO - codeparrot_training - Step 44751: {'lr': 1.3567518158499715e-05, 'samples': 22913024, 'steps': 44751, 'batch_loss/train': 0.6366529213264585} +12/28/2021 20:31:24 - INFO - codeparrot_training - Step 44752: {'lr': 1.3562396339756562e-05, 'samples': 22913536, 'steps': 44752, 'batch_loss/train': 0.7782060778699815} +12/28/2021 20:31:34 - INFO - codeparrot_training - Step 44753: {'lr': 1.3557275461000107e-05, 'samples': 22914048, 'steps': 44753, 'batch_loss/train': 1.5917660649865866} +12/28/2021 20:31:45 - INFO - codeparrot_training - Step 44754: {'lr': 1.3552155522250587e-05, 'samples': 22914560, 'steps': 44754, 'batch_loss/train': 0.6868020281544887} +12/28/2021 20:31:57 - INFO - codeparrot_training - Step 44755: {'lr': 1.3547036523528455e-05, 'samples': 22915072, 'steps': 44755, 'batch_loss/train': 0.7003633375279605} +12/28/2021 20:32:08 - INFO - codeparrot_training - Step 44756: {'lr': 1.3541918464854003e-05, 'samples': 22915584, 'steps': 44756, 'batch_loss/train': 0.7609743759967387} +12/28/2021 20:32:18 - INFO - codeparrot_training - Step 44757: {'lr': 1.35368013462476e-05, 'samples': 22916096, 'steps': 44757, 'batch_loss/train': 0.5476430773269385} +12/28/2021 20:32:32 - INFO - codeparrot_training - Step 44758: {'lr': 1.3531685167729597e-05, 'samples': 22916608, 'steps': 44758, 'batch_loss/train': 0.6999204019084573} +12/28/2021 20:32:42 - INFO - codeparrot_training - Step 44759: {'lr': 1.3526569929320305e-05, 'samples': 22917120, 'steps': 44759, 'batch_loss/train': 0.5382411060854793} +12/28/2021 20:32:53 - INFO - codeparrot_training - Step 44760: {'lr': 1.3521455631040097e-05, 'samples': 22917632, 'steps': 44760, 'batch_loss/train': 0.63977513439022} +12/28/2021 20:33:05 - INFO - codeparrot_training - Step 44761: {'lr': 1.3516342272909294e-05, 'samples': 22918144, 'steps': 44761, 'batch_loss/train': 0.7363593702903017} +12/28/2021 20:33:16 - INFO - codeparrot_training - Step 44762: {'lr': 1.3511229854948265e-05, 'samples': 22918656, 'steps': 44762, 'batch_loss/train': 0.7169270445592701} +12/28/2021 20:33:26 - INFO - codeparrot_training - Step 44763: {'lr': 1.3506118377177217e-05, 'samples': 22919168, 'steps': 44763, 'batch_loss/train': 0.7301630922593176} +12/28/2021 20:33:37 - INFO - codeparrot_training - Step 44764: {'lr': 1.350100783961658e-05, 'samples': 22919680, 'steps': 44764, 'batch_loss/train': 0.600815675832564} +12/28/2021 20:33:50 - INFO - codeparrot_training - Step 44765: {'lr': 1.349589824228667e-05, 'samples': 22920192, 'steps': 44765, 'batch_loss/train': 0.7627658005803823} +12/28/2021 20:34:01 - INFO - codeparrot_training - Step 44766: {'lr': 1.3490789585207719e-05, 'samples': 22920704, 'steps': 44766, 'batch_loss/train': 0.7685468029230833} +12/28/2021 20:34:12 - INFO - codeparrot_training - Step 44767: {'lr': 1.3485681868400073e-05, 'samples': 22921216, 'steps': 44767, 'batch_loss/train': 0.8196745608001947} +12/28/2021 20:34:24 - INFO - codeparrot_training - Step 44768: {'lr': 1.3480575091884106e-05, 'samples': 22921728, 'steps': 44768, 'batch_loss/train': 0.696442671935074} +12/28/2021 20:34:34 - INFO - codeparrot_training - Step 44769: {'lr': 1.3475469255680051e-05, 'samples': 22922240, 'steps': 44769, 'batch_loss/train': 0.7793675884604454} +12/28/2021 20:34:45 - INFO - codeparrot_training - Step 44770: {'lr': 1.3470364359808195e-05, 'samples': 22922752, 'steps': 44770, 'batch_loss/train': 0.601698309648782} +12/28/2021 20:34:57 - INFO - codeparrot_training - Step 44771: {'lr': 1.346526040428886e-05, 'samples': 22923264, 'steps': 44771, 'batch_loss/train': 0.7390191806480289} +12/28/2021 20:35:08 - INFO - codeparrot_training - Step 44772: {'lr': 1.3460157389142358e-05, 'samples': 22923776, 'steps': 44772, 'batch_loss/train': 0.7214473044732586} +12/28/2021 20:35:18 - INFO - codeparrot_training - Step 44773: {'lr': 1.3455055314388953e-05, 'samples': 22924288, 'steps': 44773, 'batch_loss/train': 0.6823305301368237} +12/28/2021 20:35:29 - INFO - codeparrot_training - Step 44774: {'lr': 1.3449954180048963e-05, 'samples': 22924800, 'steps': 44774, 'batch_loss/train': 0.7375252293422818} +12/28/2021 20:35:42 - INFO - codeparrot_training - Step 44775: {'lr': 1.3444853986142563e-05, 'samples': 22925312, 'steps': 44775, 'batch_loss/train': 0.6933029795181938} +12/28/2021 20:35:53 - INFO - codeparrot_training - Step 44776: {'lr': 1.3439754732690158e-05, 'samples': 22925824, 'steps': 44776, 'batch_loss/train': 0.6902625828515738} +12/28/2021 20:36:03 - INFO - codeparrot_training - Step 44777: {'lr': 1.3434656419711978e-05, 'samples': 22926336, 'steps': 44777, 'batch_loss/train': 0.7208649329841137} +12/28/2021 20:36:15 - INFO - codeparrot_training - Step 44778: {'lr': 1.342955904722823e-05, 'samples': 22926848, 'steps': 44778, 'batch_loss/train': 0.6756991683505476} +12/28/2021 20:36:26 - INFO - codeparrot_training - Step 44779: {'lr': 1.3424462615259259e-05, 'samples': 22927360, 'steps': 44779, 'batch_loss/train': 0.7616416974924505} +12/28/2021 20:36:37 - INFO - codeparrot_training - Step 44780: {'lr': 1.3419367123825326e-05, 'samples': 22927872, 'steps': 44780, 'batch_loss/train': 0.7605844680219889} +12/28/2021 20:36:50 - INFO - codeparrot_training - Step 44781: {'lr': 1.3414272572946612e-05, 'samples': 22928384, 'steps': 44781, 'batch_loss/train': 0.7876634863205254} +12/28/2021 20:37:00 - INFO - codeparrot_training - Step 44782: {'lr': 1.340917896264343e-05, 'samples': 22928896, 'steps': 44782, 'batch_loss/train': 0.78644352639094} +12/28/2021 20:37:11 - INFO - codeparrot_training - Step 44783: {'lr': 1.3404086292936019e-05, 'samples': 22929408, 'steps': 44783, 'batch_loss/train': 0.791319886688143} +12/28/2021 20:37:21 - INFO - codeparrot_training - Step 44784: {'lr': 1.3398994563844636e-05, 'samples': 22929920, 'steps': 44784, 'batch_loss/train': 0.6164586753584445} +12/28/2021 20:37:33 - INFO - codeparrot_training - Step 44785: {'lr': 1.3393903775389515e-05, 'samples': 22930432, 'steps': 44785, 'batch_loss/train': 0.7812034231610596} +12/28/2021 20:37:44 - INFO - codeparrot_training - Step 44786: {'lr': 1.3388813927590893e-05, 'samples': 22930944, 'steps': 44786, 'batch_loss/train': 0.7696303157135844} +12/28/2021 20:37:55 - INFO - codeparrot_training - Step 44787: {'lr': 1.3383725020468973e-05, 'samples': 22931456, 'steps': 44787, 'batch_loss/train': 0.6937362258322537} +12/28/2021 20:38:10 - INFO - codeparrot_training - Step 44788: {'lr': 1.33786370540441e-05, 'samples': 22931968, 'steps': 44788, 'batch_loss/train': 0.7322179526090622} +12/28/2021 20:38:20 - INFO - codeparrot_training - Step 44789: {'lr': 1.3373550028336318e-05, 'samples': 22932480, 'steps': 44789, 'batch_loss/train': 0.7887695236131549} +12/28/2021 20:38:31 - INFO - codeparrot_training - Step 44790: {'lr': 1.3368463943366021e-05, 'samples': 22932992, 'steps': 44790, 'batch_loss/train': 0.6518225884065032} +12/28/2021 20:38:42 - INFO - codeparrot_training - Step 44791: {'lr': 1.3363378799153364e-05, 'samples': 22933504, 'steps': 44791, 'batch_loss/train': 0.9850894389674067} +12/28/2021 20:38:54 - INFO - codeparrot_training - Step 44792: {'lr': 1.335829459571855e-05, 'samples': 22934016, 'steps': 44792, 'batch_loss/train': 0.8308779364451766} +12/28/2021 20:39:04 - INFO - codeparrot_training - Step 44793: {'lr': 1.335321133308176e-05, 'samples': 22934528, 'steps': 44793, 'batch_loss/train': 0.7558496799319983} +12/28/2021 20:39:15 - INFO - codeparrot_training - Step 44794: {'lr': 1.334812901126331e-05, 'samples': 22935040, 'steps': 44794, 'batch_loss/train': 0.6838780154939741} +12/28/2021 20:39:27 - INFO - codeparrot_training - Step 44795: {'lr': 1.3343047630283323e-05, 'samples': 22935552, 'steps': 44795, 'batch_loss/train': 0.6694279429502785} +12/28/2021 20:39:38 - INFO - codeparrot_training - Step 44796: {'lr': 1.3337967190162003e-05, 'samples': 22936064, 'steps': 44796, 'batch_loss/train': 0.7158516314812005} +12/28/2021 20:39:48 - INFO - codeparrot_training - Step 44797: {'lr': 1.3332887690919588e-05, 'samples': 22936576, 'steps': 44797, 'batch_loss/train': 0.7288436726666987} +12/28/2021 20:40:02 - INFO - codeparrot_training - Step 44798: {'lr': 1.3327809132576251e-05, 'samples': 22937088, 'steps': 44798, 'batch_loss/train': 0.6174648247542791} +12/28/2021 20:40:13 - INFO - codeparrot_training - Step 44799: {'lr': 1.3322731515152147e-05, 'samples': 22937600, 'steps': 44799, 'batch_loss/train': 0.7494484996423125} +12/28/2021 20:40:24 - INFO - codeparrot_training - Step 44800: {'lr': 1.3317654838667537e-05, 'samples': 22938112, 'steps': 44800, 'batch_loss/train': 0.6425475101568736} +12/28/2021 20:40:36 - INFO - codeparrot_training - Step 44801: {'lr': 1.331257910314254e-05, 'samples': 22938624, 'steps': 44801, 'batch_loss/train': 0.7799229053780437} +12/28/2021 20:40:46 - INFO - codeparrot_training - Step 44802: {'lr': 1.3307504308597367e-05, 'samples': 22939136, 'steps': 44802, 'batch_loss/train': 0.7117850752547383} +12/28/2021 20:40:57 - INFO - codeparrot_training - Step 44803: {'lr': 1.3302430455052222e-05, 'samples': 22939648, 'steps': 44803, 'batch_loss/train': 0.6890066582709551} +12/28/2021 20:41:08 - INFO - codeparrot_training - Step 44804: {'lr': 1.3297357542527144e-05, 'samples': 22940160, 'steps': 44804, 'batch_loss/train': 0.8232028237544} +12/28/2021 20:41:20 - INFO - codeparrot_training - Step 44805: {'lr': 1.329228557104245e-05, 'samples': 22940672, 'steps': 44805, 'batch_loss/train': 0.7664988096803427} +12/28/2021 20:41:30 - INFO - codeparrot_training - Step 44806: {'lr': 1.3287214540618293e-05, 'samples': 22941184, 'steps': 44806, 'batch_loss/train': 0.8162275915965438} +12/28/2021 20:41:41 - INFO - codeparrot_training - Step 44807: {'lr': 1.3282144451274763e-05, 'samples': 22941696, 'steps': 44807, 'batch_loss/train': 0.6644782638177276} +12/28/2021 20:41:55 - INFO - codeparrot_training - Step 44808: {'lr': 1.3277075303032044e-05, 'samples': 22942208, 'steps': 44808, 'batch_loss/train': 0.7213124204427004} +12/28/2021 20:42:06 - INFO - codeparrot_training - Step 44809: {'lr': 1.3272007095910283e-05, 'samples': 22942720, 'steps': 44809, 'batch_loss/train': 0.7689856109209359} +12/28/2021 20:42:16 - INFO - codeparrot_training - Step 44810: {'lr': 1.3266939829929631e-05, 'samples': 22943232, 'steps': 44810, 'batch_loss/train': 0.650189263134962} +12/28/2021 20:42:29 - INFO - codeparrot_training - Step 44811: {'lr': 1.326187350511024e-05, 'samples': 22943744, 'steps': 44811, 'batch_loss/train': 0.7018010143656284} +12/28/2021 20:42:39 - INFO - codeparrot_training - Step 44812: {'lr': 1.3256808121472258e-05, 'samples': 22944256, 'steps': 44812, 'batch_loss/train': 0.6664979103952646} +12/28/2021 20:42:50 - INFO - codeparrot_training - Step 44813: {'lr': 1.325174367903581e-05, 'samples': 22944768, 'steps': 44813, 'batch_loss/train': 0.7442472488619387} +12/28/2021 20:43:00 - INFO - codeparrot_training - Step 44814: {'lr': 1.3246680177821018e-05, 'samples': 22945280, 'steps': 44814, 'batch_loss/train': 0.6145844606216997} +12/28/2021 20:43:14 - INFO - codeparrot_training - Step 44815: {'lr': 1.3241617617848062e-05, 'samples': 22945792, 'steps': 44815, 'batch_loss/train': 0.7307553659193218} +12/28/2021 20:43:25 - INFO - codeparrot_training - Step 44816: {'lr': 1.3236555999137034e-05, 'samples': 22946304, 'steps': 44816, 'batch_loss/train': 0.7264314605854452} +12/28/2021 20:43:36 - INFO - codeparrot_training - Step 44817: {'lr': 1.3231495321708087e-05, 'samples': 22946816, 'steps': 44817, 'batch_loss/train': 0.769196031615138} +12/28/2021 20:43:48 - INFO - codeparrot_training - Step 44818: {'lr': 1.3226435585581286e-05, 'samples': 22947328, 'steps': 44818, 'batch_loss/train': 0.7476661494001746} +12/28/2021 20:43:58 - INFO - codeparrot_training - Step 44819: {'lr': 1.3221376790776728e-05, 'samples': 22947840, 'steps': 44819, 'batch_loss/train': 0.67419037676882} +12/28/2021 20:44:09 - INFO - codeparrot_training - Step 44820: {'lr': 1.3216318937314647e-05, 'samples': 22948352, 'steps': 44820, 'batch_loss/train': 0.7956449673511088} +12/28/2021 20:44:21 - INFO - codeparrot_training - Step 44821: {'lr': 1.3211262025215054e-05, 'samples': 22948864, 'steps': 44821, 'batch_loss/train': 0.787569179199636} +12/28/2021 20:44:32 - INFO - codeparrot_training - Step 44822: {'lr': 1.3206206054498071e-05, 'samples': 22949376, 'steps': 44822, 'batch_loss/train': 0.661207193043083} +12/28/2021 20:44:42 - INFO - codeparrot_training - Step 44823: {'lr': 1.3201151025183795e-05, 'samples': 22949888, 'steps': 44823, 'batch_loss/train': 0.7921542404219508} +12/28/2021 20:44:55 - INFO - codeparrot_training - Step 44824: {'lr': 1.3196096937292319e-05, 'samples': 22950400, 'steps': 44824, 'batch_loss/train': 0.6937518827617168} +12/28/2021 20:45:05 - INFO - codeparrot_training - Step 44825: {'lr': 1.3191043790843765e-05, 'samples': 22950912, 'steps': 44825, 'batch_loss/train': 0.765240992885083} +12/28/2021 20:45:16 - INFO - codeparrot_training - Step 44826: {'lr': 1.3185991585858204e-05, 'samples': 22951424, 'steps': 44826, 'batch_loss/train': 0.6723478017374873} +12/28/2021 20:45:26 - INFO - codeparrot_training - Step 44827: {'lr': 1.3180940322355727e-05, 'samples': 22951936, 'steps': 44827, 'batch_loss/train': 0.6946057642344385} +12/28/2021 20:45:40 - INFO - codeparrot_training - Step 44828: {'lr': 1.3175890000356406e-05, 'samples': 22952448, 'steps': 44828, 'batch_loss/train': 0.6731212306767702} +12/28/2021 20:45:51 - INFO - codeparrot_training - Step 44829: {'lr': 1.3170840619880358e-05, 'samples': 22952960, 'steps': 44829, 'batch_loss/train': 0.6971254805102944} +12/28/2021 20:46:02 - INFO - codeparrot_training - Step 44830: {'lr': 1.3165792180947544e-05, 'samples': 22953472, 'steps': 44830, 'batch_loss/train': 0.6481913728639483} +12/28/2021 20:46:15 - INFO - codeparrot_training - Step 44831: {'lr': 1.3160744683578168e-05, 'samples': 22953984, 'steps': 44831, 'batch_loss/train': 0.6720201279968023} +12/28/2021 20:46:25 - INFO - codeparrot_training - Step 44832: {'lr': 1.315569812779227e-05, 'samples': 22954496, 'steps': 44832, 'batch_loss/train': 0.6066879583522677} +12/28/2021 20:46:36 - INFO - codeparrot_training - Step 44833: {'lr': 1.3150652513609862e-05, 'samples': 22955008, 'steps': 44833, 'batch_loss/train': 0.6633197828195989} +12/28/2021 20:46:50 - INFO - codeparrot_training - Step 44834: {'lr': 1.314560784105101e-05, 'samples': 22955520, 'steps': 44834, 'batch_loss/train': 0.78272401727736} +12/28/2021 20:47:01 - INFO - codeparrot_training - Step 44835: {'lr': 1.314056411013581e-05, 'samples': 22956032, 'steps': 44835, 'batch_loss/train': 0.9687947437632829} +12/28/2021 20:47:11 - INFO - codeparrot_training - Step 44836: {'lr': 1.3135521320884275e-05, 'samples': 22956544, 'steps': 44836, 'batch_loss/train': 0.6340396924642846} +12/28/2021 20:47:22 - INFO - codeparrot_training - Step 44837: {'lr': 1.3130479473316498e-05, 'samples': 22957056, 'steps': 44837, 'batch_loss/train': 0.8115964103490114} +12/28/2021 20:47:34 - INFO - codeparrot_training - Step 44838: {'lr': 1.3125438567452464e-05, 'samples': 22957568, 'steps': 44838, 'batch_loss/train': 0.6110284986789338} +12/28/2021 20:47:45 - INFO - codeparrot_training - Step 44839: {'lr': 1.3120398603312266e-05, 'samples': 22958080, 'steps': 44839, 'batch_loss/train': 0.7537954733707011} +12/28/2021 20:47:55 - INFO - codeparrot_training - Step 44840: {'lr': 1.311535958091592e-05, 'samples': 22958592, 'steps': 44840, 'batch_loss/train': 0.7066299477592111} +12/28/2021 20:48:07 - INFO - codeparrot_training - Step 44841: {'lr': 1.3110321500283462e-05, 'samples': 22959104, 'steps': 44841, 'batch_loss/train': 0.6839916244498454} +12/28/2021 20:48:18 - INFO - codeparrot_training - Step 44842: {'lr': 1.3105284361434906e-05, 'samples': 22959616, 'steps': 44842, 'batch_loss/train': 0.6935628484934568} +12/28/2021 20:48:29 - INFO - codeparrot_training - Step 44843: {'lr': 1.3100248164390316e-05, 'samples': 22960128, 'steps': 44843, 'batch_loss/train': 0.8556574168615043} +12/28/2021 20:48:43 - INFO - codeparrot_training - Step 44844: {'lr': 1.3095212909169679e-05, 'samples': 22960640, 'steps': 44844, 'batch_loss/train': 0.6587315334472805} +12/28/2021 20:48:53 - INFO - codeparrot_training - Step 44845: {'lr': 1.3090178595792979e-05, 'samples': 22961152, 'steps': 44845, 'batch_loss/train': 0.6890188534744084} +12/28/2021 20:49:04 - INFO - codeparrot_training - Step 44846: {'lr': 1.3085145224280337e-05, 'samples': 22961664, 'steps': 44846, 'batch_loss/train': 0.5783398255007342} +12/28/2021 20:49:16 - INFO - codeparrot_training - Step 44847: {'lr': 1.3080112794651683e-05, 'samples': 22962176, 'steps': 44847, 'batch_loss/train': 0.7596115083433688} +12/28/2021 20:49:27 - INFO - codeparrot_training - Step 44848: {'lr': 1.3075081306927028e-05, 'samples': 22962688, 'steps': 44848, 'batch_loss/train': 0.6782629441004246} +12/28/2021 20:49:37 - INFO - codeparrot_training - Step 44849: {'lr': 1.3070050761126412e-05, 'samples': 22963200, 'steps': 44849, 'batch_loss/train': 0.7814462371170521} +12/28/2021 20:49:48 - INFO - codeparrot_training - Step 44850: {'lr': 1.306502115726979e-05, 'samples': 22963712, 'steps': 44850, 'batch_loss/train': 0.6912765605957247} +12/28/2021 20:50:00 - INFO - codeparrot_training - Step 44851: {'lr': 1.3059992495377204e-05, 'samples': 22964224, 'steps': 44851, 'batch_loss/train': 0.7121398622402921} +12/28/2021 20:50:11 - INFO - codeparrot_training - Step 44852: {'lr': 1.3054964775468608e-05, 'samples': 22964736, 'steps': 44852, 'batch_loss/train': 0.7341879811137915} +12/28/2021 20:50:21 - INFO - codeparrot_training - Step 44853: {'lr': 1.3049937997564015e-05, 'samples': 22965248, 'steps': 44853, 'batch_loss/train': 0.7581088859587908} +12/28/2021 20:50:36 - INFO - codeparrot_training - Step 44854: {'lr': 1.3044912161683381e-05, 'samples': 22965760, 'steps': 44854, 'batch_loss/train': 0.762495948409196} +12/28/2021 20:50:46 - INFO - codeparrot_training - Step 44855: {'lr': 1.3039887267846746e-05, 'samples': 22966272, 'steps': 44855, 'batch_loss/train': 0.5056444550864398} +12/28/2021 20:50:57 - INFO - codeparrot_training - Step 44856: {'lr': 1.303486331607398e-05, 'samples': 22966784, 'steps': 44856, 'batch_loss/train': 0.7965814247727394} +12/28/2021 20:51:09 - INFO - codeparrot_training - Step 44857: {'lr': 1.3029840306385154e-05, 'samples': 22967296, 'steps': 44857, 'batch_loss/train': 0.7083062187302858} +12/28/2021 20:51:20 - INFO - codeparrot_training - Step 44858: {'lr': 1.3024818238800252e-05, 'samples': 22967808, 'steps': 44858, 'batch_loss/train': 0.7681698175147176} +12/28/2021 20:51:30 - INFO - codeparrot_training - Step 44859: {'lr': 1.3019797113339172e-05, 'samples': 22968320, 'steps': 44859, 'batch_loss/train': 0.6360917913261801} +12/28/2021 20:51:41 - INFO - codeparrot_training - Step 44860: {'lr': 1.3014776930021843e-05, 'samples': 22968832, 'steps': 44860, 'batch_loss/train': 0.7137867505662143} +12/28/2021 20:51:53 - INFO - codeparrot_training - Step 44861: {'lr': 1.3009757688868334e-05, 'samples': 22969344, 'steps': 44861, 'batch_loss/train': 0.652775910217315} +12/28/2021 20:52:04 - INFO - codeparrot_training - Step 44862: {'lr': 1.3004739389898545e-05, 'samples': 22969856, 'steps': 44862, 'batch_loss/train': 0.6685368060134351} +12/28/2021 20:52:14 - INFO - codeparrot_training - Step 44863: {'lr': 1.2999722033132406e-05, 'samples': 22970368, 'steps': 44863, 'batch_loss/train': 0.6662077973596752} +12/28/2021 20:52:28 - INFO - codeparrot_training - Step 44864: {'lr': 1.2994705618589869e-05, 'samples': 22970880, 'steps': 44864, 'batch_loss/train': 0.7754911668598652} +12/28/2021 20:52:38 - INFO - codeparrot_training - Step 44865: {'lr': 1.2989690146290923e-05, 'samples': 22971392, 'steps': 44865, 'batch_loss/train': 0.741590662393719} +12/28/2021 20:52:49 - INFO - codeparrot_training - Step 44866: {'lr': 1.2984675616255464e-05, 'samples': 22971904, 'steps': 44866, 'batch_loss/train': 0.6951101124286652} +12/28/2021 20:53:01 - INFO - codeparrot_training - Step 44867: {'lr': 1.2979662028503424e-05, 'samples': 22972416, 'steps': 44867, 'batch_loss/train': 0.806021642871201} +12/28/2021 20:53:12 - INFO - codeparrot_training - Step 44868: {'lr': 1.2974649383054787e-05, 'samples': 22972928, 'steps': 44868, 'batch_loss/train': 0.7413163513119798} +12/28/2021 20:53:22 - INFO - codeparrot_training - Step 44869: {'lr': 1.2969637679929425e-05, 'samples': 22973440, 'steps': 44869, 'batch_loss/train': 0.8099472995963879} +12/28/2021 20:53:33 - INFO - codeparrot_training - Step 44870: {'lr': 1.296462691914732e-05, 'samples': 22973952, 'steps': 44870, 'batch_loss/train': 0.7236925374600105} +12/28/2021 20:53:45 - INFO - codeparrot_training - Step 44871: {'lr': 1.2959617100728294e-05, 'samples': 22974464, 'steps': 44871, 'batch_loss/train': 0.8043847200460732} +12/28/2021 20:53:56 - INFO - codeparrot_training - Step 44872: {'lr': 1.2954608224692382e-05, 'samples': 22974976, 'steps': 44872, 'batch_loss/train': 0.749309771694243} +12/28/2021 20:54:06 - INFO - codeparrot_training - Step 44873: {'lr': 1.2949600291059432e-05, 'samples': 22975488, 'steps': 44873, 'batch_loss/train': 0.6652922574430704} +12/28/2021 20:54:21 - INFO - codeparrot_training - Step 44874: {'lr': 1.2944593299849317e-05, 'samples': 22976000, 'steps': 44874, 'batch_loss/train': 0.8708055070601404} +12/28/2021 20:54:31 - INFO - codeparrot_training - Step 44875: {'lr': 1.2939587251082075e-05, 'samples': 22976512, 'steps': 44875, 'batch_loss/train': 0.6858572693308815} +12/28/2021 20:54:42 - INFO - codeparrot_training - Step 44876: {'lr': 1.2934582144777469e-05, 'samples': 22977024, 'steps': 44876, 'batch_loss/train': 0.7694036685861647} +12/28/2021 20:54:54 - INFO - codeparrot_training - Step 44877: {'lr': 1.2929577980955454e-05, 'samples': 22977536, 'steps': 44877, 'batch_loss/train': 0.7316343556158245} +12/28/2021 20:55:05 - INFO - codeparrot_training - Step 44878: {'lr': 1.2924574759635932e-05, 'samples': 22978048, 'steps': 44878, 'batch_loss/train': 0.8148097284138203} +12/28/2021 20:55:15 - INFO - codeparrot_training - Step 44879: {'lr': 1.2919572480838775e-05, 'samples': 22978560, 'steps': 44879, 'batch_loss/train': 0.8108349265530705} +12/28/2021 20:55:26 - INFO - codeparrot_training - Step 44880: {'lr': 1.2914571144583886e-05, 'samples': 22979072, 'steps': 44880, 'batch_loss/train': 0.7276285784319043} +12/28/2021 20:55:38 - INFO - codeparrot_training - Step 44881: {'lr': 1.2909570750891164e-05, 'samples': 22979584, 'steps': 44881, 'batch_loss/train': 0.692138833925128} +12/28/2021 20:55:48 - INFO - codeparrot_training - Step 44882: {'lr': 1.2904571299780399e-05, 'samples': 22980096, 'steps': 44882, 'batch_loss/train': 0.7641836674883962} +12/28/2021 20:55:59 - INFO - codeparrot_training - Step 44883: {'lr': 1.2899572791271602e-05, 'samples': 22980608, 'steps': 44883, 'batch_loss/train': 0.6875320073449984} +12/28/2021 20:56:14 - INFO - codeparrot_training - Step 44884: {'lr': 1.2894575225384564e-05, 'samples': 22981120, 'steps': 44884, 'batch_loss/train': 0.8085589222609997} +12/28/2021 20:56:24 - INFO - codeparrot_training - Step 44885: {'lr': 1.2889578602139184e-05, 'samples': 22981632, 'steps': 44885, 'batch_loss/train': 0.6598718203604221} +12/28/2021 20:56:35 - INFO - codeparrot_training - Step 44886: {'lr': 1.2884582921555227e-05, 'samples': 22982144, 'steps': 44886, 'batch_loss/train': 0.6342046704958193} +12/28/2021 20:56:47 - INFO - codeparrot_training - Step 44887: {'lr': 1.287958818365273e-05, 'samples': 22982656, 'steps': 44887, 'batch_loss/train': 0.6083291303366423} +12/28/2021 20:56:58 - INFO - codeparrot_training - Step 44888: {'lr': 1.2874594388451428e-05, 'samples': 22983168, 'steps': 44888, 'batch_loss/train': 0.706337527371943} +12/28/2021 20:57:08 - INFO - codeparrot_training - Step 44889: {'lr': 1.2869601535971192e-05, 'samples': 22983680, 'steps': 44889, 'batch_loss/train': 0.7758072204887867} +12/28/2021 20:57:21 - INFO - codeparrot_training - Step 44890: {'lr': 1.2864609626231899e-05, 'samples': 22984192, 'steps': 44890, 'batch_loss/train': 0.7141193780116737} +12/28/2021 20:57:31 - INFO - codeparrot_training - Step 44891: {'lr': 1.2859618659253363e-05, 'samples': 22984704, 'steps': 44891, 'batch_loss/train': 0.5883046865928918} +12/28/2021 20:57:42 - INFO - codeparrot_training - Step 44892: {'lr': 1.285462863505546e-05, 'samples': 22985216, 'steps': 44892, 'batch_loss/train': 0.696160668740049} +12/28/2021 20:57:53 - INFO - codeparrot_training - Step 44893: {'lr': 1.2849639553657976e-05, 'samples': 22985728, 'steps': 44893, 'batch_loss/train': 0.8263740825932473} +12/28/2021 20:58:05 - INFO - codeparrot_training - Step 44894: {'lr': 1.2844651415080815e-05, 'samples': 22986240, 'steps': 44894, 'batch_loss/train': 0.789151850156486} +12/28/2021 20:58:16 - INFO - codeparrot_training - Step 44895: {'lr': 1.2839664219343766e-05, 'samples': 22986752, 'steps': 44895, 'batch_loss/train': 0.5750239412300289} +12/28/2021 20:58:26 - INFO - codeparrot_training - Step 44896: {'lr': 1.2834677966466702e-05, 'samples': 22987264, 'steps': 44896, 'batch_loss/train': 0.658104341593571} +12/28/2021 20:58:41 - INFO - codeparrot_training - Step 44897: {'lr': 1.2829692656469328e-05, 'samples': 22987776, 'steps': 44897, 'batch_loss/train': 0.7436693720519543} +12/28/2021 20:58:51 - INFO - codeparrot_training - Step 44898: {'lr': 1.2824708289371601e-05, 'samples': 22988288, 'steps': 44898, 'batch_loss/train': 0.6790652071940713} +12/28/2021 20:59:02 - INFO - codeparrot_training - Step 44899: {'lr': 1.2819724865193255e-05, 'samples': 22988800, 'steps': 44899, 'batch_loss/train': 1.065919267013669} +12/28/2021 20:59:13 - INFO - codeparrot_training - Step 44900: {'lr': 1.2814742383954109e-05, 'samples': 22989312, 'steps': 44900, 'batch_loss/train': 0.6977917365729809} +12/28/2021 20:59:25 - INFO - codeparrot_training - Step 44901: {'lr': 1.2809760845674062e-05, 'samples': 22989824, 'steps': 44901, 'batch_loss/train': 0.6635445905849338} +12/28/2021 20:59:35 - INFO - codeparrot_training - Step 44902: {'lr': 1.2804780250372794e-05, 'samples': 22990336, 'steps': 44902, 'batch_loss/train': 0.7256871149875224} +12/28/2021 20:59:46 - INFO - codeparrot_training - Step 44903: {'lr': 1.279980059807015e-05, 'samples': 22990848, 'steps': 44903, 'batch_loss/train': 0.6493957510683686} +12/28/2021 21:00:00 - INFO - codeparrot_training - Step 44904: {'lr': 1.2794821888785946e-05, 'samples': 22991360, 'steps': 44904, 'batch_loss/train': 0.6988379182294011} +12/28/2021 21:00:11 - INFO - codeparrot_training - Step 44905: {'lr': 1.2789844122539973e-05, 'samples': 22991872, 'steps': 44905, 'batch_loss/train': 0.7532736035063863} +12/28/2021 21:00:21 - INFO - codeparrot_training - Step 44906: {'lr': 1.2784867299351992e-05, 'samples': 22992384, 'steps': 44906, 'batch_loss/train': 0.6141119817621075} +12/28/2021 21:00:32 - INFO - codeparrot_training - Step 44907: {'lr': 1.277989141924188e-05, 'samples': 22992896, 'steps': 44907, 'batch_loss/train': 0.6479383455007337} +12/28/2021 21:00:44 - INFO - codeparrot_training - Step 44908: {'lr': 1.2774916482229254e-05, 'samples': 22993408, 'steps': 44908, 'batch_loss/train': 0.6989085134118795} +12/28/2021 21:00:55 - INFO - codeparrot_training - Step 44909: {'lr': 1.276994248833402e-05, 'samples': 22993920, 'steps': 44909, 'batch_loss/train': 0.7965810089372098} +12/28/2021 21:01:05 - INFO - codeparrot_training - Step 44910: {'lr': 1.2764969437575963e-05, 'samples': 22994432, 'steps': 44910, 'batch_loss/train': 0.7955244993790984} +12/28/2021 21:01:17 - INFO - codeparrot_training - Step 44911: {'lr': 1.2759997329974737e-05, 'samples': 22994944, 'steps': 44911, 'batch_loss/train': 0.6538720477838069} +12/28/2021 21:01:28 - INFO - codeparrot_training - Step 44912: {'lr': 1.2755026165550215e-05, 'samples': 22995456, 'steps': 44912, 'batch_loss/train': 0.7305033435113728} +12/28/2021 21:01:39 - INFO - codeparrot_training - Step 44913: {'lr': 1.2750055944322186e-05, 'samples': 22995968, 'steps': 44913, 'batch_loss/train': 0.7695453623309731} +12/28/2021 21:01:53 - INFO - codeparrot_training - Step 44914: {'lr': 1.2745086666310302e-05, 'samples': 22996480, 'steps': 44914, 'batch_loss/train': 0.6770617021247745} +12/28/2021 21:02:03 - INFO - codeparrot_training - Step 44915: {'lr': 1.2740118331534323e-05, 'samples': 22996992, 'steps': 44915, 'batch_loss/train': 0.6949358754791319} +12/28/2021 21:02:14 - INFO - codeparrot_training - Step 44916: {'lr': 1.2735150940014123e-05, 'samples': 22997504, 'steps': 44916, 'batch_loss/train': 0.6554673963692039} +12/28/2021 21:02:26 - INFO - codeparrot_training - Step 44917: {'lr': 1.2730184491769354e-05, 'samples': 22998016, 'steps': 44917, 'batch_loss/train': 0.7231839979067445} +12/28/2021 21:02:37 - INFO - codeparrot_training - Step 44918: {'lr': 1.2725218986819776e-05, 'samples': 22998528, 'steps': 44918, 'batch_loss/train': 0.7439837856218219} +12/28/2021 21:02:47 - INFO - codeparrot_training - Step 44919: {'lr': 1.2720254425185123e-05, 'samples': 22999040, 'steps': 44919, 'batch_loss/train': 0.7809070786461234} +12/28/2021 21:02:58 - INFO - codeparrot_training - Step 44920: {'lr': 1.271529080688516e-05, 'samples': 22999552, 'steps': 44920, 'batch_loss/train': 0.7923209127038717} +12/28/2021 21:03:10 - INFO - codeparrot_training - Step 44921: {'lr': 1.271032813193959e-05, 'samples': 23000064, 'steps': 44921, 'batch_loss/train': 0.6042949017137289} +12/28/2021 21:03:21 - INFO - codeparrot_training - Step 44922: {'lr': 1.2705366400368207e-05, 'samples': 23000576, 'steps': 44922, 'batch_loss/train': 0.6774265333078802} +12/28/2021 21:03:31 - INFO - codeparrot_training - Step 44923: {'lr': 1.270040561219063e-05, 'samples': 23001088, 'steps': 44923, 'batch_loss/train': 0.6646741512231529} +12/28/2021 21:03:45 - INFO - codeparrot_training - Step 44924: {'lr': 1.2695445767426677e-05, 'samples': 23001600, 'steps': 44924, 'batch_loss/train': 0.6590665173716843} +12/28/2021 21:03:56 - INFO - codeparrot_training - Step 44925: {'lr': 1.269048686609603e-05, 'samples': 23002112, 'steps': 44925, 'batch_loss/train': 0.8457164308056235} +12/28/2021 21:04:07 - INFO - codeparrot_training - Step 44926: {'lr': 1.2685528908218336e-05, 'samples': 23002624, 'steps': 44926, 'batch_loss/train': 0.7031124143395573} +12/28/2021 21:04:19 - INFO - codeparrot_training - Step 44927: {'lr': 1.2680571893813441e-05, 'samples': 23003136, 'steps': 44927, 'batch_loss/train': 0.6417653481476009} +12/28/2021 21:04:30 - INFO - codeparrot_training - Step 44928: {'lr': 1.267561582290097e-05, 'samples': 23003648, 'steps': 44928, 'batch_loss/train': 0.6430700803175569} +12/28/2021 21:04:40 - INFO - codeparrot_training - Step 44929: {'lr': 1.267066069550063e-05, 'samples': 23004160, 'steps': 44929, 'batch_loss/train': 0.6699356907047331} +12/28/2021 21:04:51 - INFO - codeparrot_training - Step 44930: {'lr': 1.2665706511632152e-05, 'samples': 23004672, 'steps': 44930, 'batch_loss/train': 0.7915217438712716} +12/28/2021 21:05:03 - INFO - codeparrot_training - Step 44931: {'lr': 1.2660753271315189e-05, 'samples': 23005184, 'steps': 44931, 'batch_loss/train': 0.7051984826102853} +12/28/2021 21:05:14 - INFO - codeparrot_training - Step 44932: {'lr': 1.2655800974569448e-05, 'samples': 23005696, 'steps': 44932, 'batch_loss/train': 0.7695357352495193} +12/28/2021 21:05:24 - INFO - codeparrot_training - Step 44933: {'lr': 1.265084962141469e-05, 'samples': 23006208, 'steps': 44933, 'batch_loss/train': 0.7019115500152111} +12/28/2021 21:05:36 - INFO - codeparrot_training - Step 44934: {'lr': 1.2645899211870426e-05, 'samples': 23006720, 'steps': 44934, 'batch_loss/train': 0.7295017472933978} +12/28/2021 21:05:47 - INFO - codeparrot_training - Step 44935: {'lr': 1.2640949745956504e-05, 'samples': 23007232, 'steps': 44935, 'batch_loss/train': 0.7066862895153463} +12/28/2021 21:05:58 - INFO - codeparrot_training - Step 44936: {'lr': 1.2636001223692572e-05, 'samples': 23007744, 'steps': 44936, 'batch_loss/train': 0.7356623690575361} +12/28/2021 21:06:12 - INFO - codeparrot_training - Step 44937: {'lr': 1.26310536450982e-05, 'samples': 23008256, 'steps': 44937, 'batch_loss/train': 0.5667628694791347} +12/28/2021 21:06:22 - INFO - codeparrot_training - Step 44938: {'lr': 1.2626107010193178e-05, 'samples': 23008768, 'steps': 44938, 'batch_loss/train': 0.8364323754794896} +12/28/2021 21:06:33 - INFO - codeparrot_training - Step 44939: {'lr': 1.2621161318997153e-05, 'samples': 23009280, 'steps': 44939, 'batch_loss/train': 0.6446721986867487} +12/28/2021 21:06:45 - INFO - codeparrot_training - Step 44940: {'lr': 1.2616216571529726e-05, 'samples': 23009792, 'steps': 44940, 'batch_loss/train': 0.7026942418888211} +12/28/2021 21:06:56 - INFO - codeparrot_training - Step 44941: {'lr': 1.2611272767810571e-05, 'samples': 23010304, 'steps': 44941, 'batch_loss/train': 0.8058744189329445} +12/28/2021 21:07:06 - INFO - codeparrot_training - Step 44942: {'lr': 1.2606329907859399e-05, 'samples': 23010816, 'steps': 44942, 'batch_loss/train': 0.8213061224669218} +12/28/2021 21:07:17 - INFO - codeparrot_training - Step 44943: {'lr': 1.26013879916958e-05, 'samples': 23011328, 'steps': 44943, 'batch_loss/train': 0.7181948041543365} +12/28/2021 21:07:31 - INFO - codeparrot_training - Step 44944: {'lr': 1.2596447019339458e-05, 'samples': 23011840, 'steps': 44944, 'batch_loss/train': 0.688827168662101} +12/28/2021 21:07:42 - INFO - codeparrot_training - Step 44945: {'lr': 1.2591506990809993e-05, 'samples': 23012352, 'steps': 44945, 'batch_loss/train': 0.7297022498678416} +12/28/2021 21:07:52 - INFO - codeparrot_training - Step 44946: {'lr': 1.2586567906127055e-05, 'samples': 23012864, 'steps': 44946, 'batch_loss/train': 0.6401559796649963} +12/28/2021 21:08:04 - INFO - codeparrot_training - Step 44947: {'lr': 1.2581629765310271e-05, 'samples': 23013376, 'steps': 44947, 'batch_loss/train': 0.7318467060104012} +12/28/2021 21:08:15 - INFO - codeparrot_training - Step 44948: {'lr': 1.2576692568379288e-05, 'samples': 23013888, 'steps': 44948, 'batch_loss/train': 0.547312268987298} +12/28/2021 21:08:26 - INFO - codeparrot_training - Step 44949: {'lr': 1.2571756315353732e-05, 'samples': 23014400, 'steps': 44949, 'batch_loss/train': 0.7221773490309715} +12/28/2021 21:08:38 - INFO - codeparrot_training - Step 44950: {'lr': 1.2566821006253226e-05, 'samples': 23014912, 'steps': 44950, 'batch_loss/train': 0.8006832245155238} +12/28/2021 21:08:48 - INFO - codeparrot_training - Step 44951: {'lr': 1.2561886641097419e-05, 'samples': 23015424, 'steps': 44951, 'batch_loss/train': 0.7270614076405764} +12/28/2021 21:08:59 - INFO - codeparrot_training - Step 44952: {'lr': 1.2556953219905826e-05, 'samples': 23015936, 'steps': 44952, 'batch_loss/train': 0.7337712626904249} +12/28/2021 21:09:10 - INFO - codeparrot_training - Step 44953: {'lr': 1.2552020742698206e-05, 'samples': 23016448, 'steps': 44953, 'batch_loss/train': 0.7366656982339919} +12/28/2021 21:09:24 - INFO - codeparrot_training - Step 44954: {'lr': 1.2547089209494045e-05, 'samples': 23016960, 'steps': 44954, 'batch_loss/train': 0.5332537284120917} +12/28/2021 21:09:35 - INFO - codeparrot_training - Step 44955: {'lr': 1.2542158620312993e-05, 'samples': 23017472, 'steps': 44955, 'batch_loss/train': 0.6958582904189825} +12/28/2021 21:09:45 - INFO - codeparrot_training - Step 44956: {'lr': 1.2537228975174674e-05, 'samples': 23017984, 'steps': 44956, 'batch_loss/train': 0.7023964574327692} +12/28/2021 21:09:57 - INFO - codeparrot_training - Step 44957: {'lr': 1.2532300274098685e-05, 'samples': 23018496, 'steps': 44957, 'batch_loss/train': 0.7043376080691814} +12/28/2021 21:10:08 - INFO - codeparrot_training - Step 44958: {'lr': 1.252737251710459e-05, 'samples': 23019008, 'steps': 44958, 'batch_loss/train': 0.7163219470530748} +12/28/2021 21:10:19 - INFO - codeparrot_training - Step 44959: {'lr': 1.2522445704211988e-05, 'samples': 23019520, 'steps': 44959, 'batch_loss/train': 0.6991438223049045} +12/28/2021 21:10:32 - INFO - codeparrot_training - Step 44960: {'lr': 1.2517519835440472e-05, 'samples': 23020032, 'steps': 44960, 'batch_loss/train': 0.7211578856222332} +12/28/2021 21:10:43 - INFO - codeparrot_training - Step 44961: {'lr': 1.2512594910809639e-05, 'samples': 23020544, 'steps': 44961, 'batch_loss/train': 0.7522688512690365} +12/28/2021 21:10:54 - INFO - codeparrot_training - Step 44962: {'lr': 1.250767093033911e-05, 'samples': 23021056, 'steps': 44962, 'batch_loss/train': 0.8949803886935115} +12/28/2021 21:11:04 - INFO - codeparrot_training - Step 44963: {'lr': 1.2502747894048316e-05, 'samples': 23021568, 'steps': 44963, 'batch_loss/train': 0.7492129025049508} +12/28/2021 21:11:16 - INFO - codeparrot_training - Step 44964: {'lr': 1.2497825801956936e-05, 'samples': 23022080, 'steps': 44964, 'batch_loss/train': 0.7259510979056358} +12/28/2021 21:11:27 - INFO - codeparrot_training - Step 44965: {'lr': 1.2492904654084591e-05, 'samples': 23022592, 'steps': 44965, 'batch_loss/train': 0.7717103864997625} +12/28/2021 21:11:38 - INFO - codeparrot_training - Step 44966: {'lr': 1.248798445045074e-05, 'samples': 23023104, 'steps': 44966, 'batch_loss/train': 0.7360580591484904} +12/28/2021 21:11:50 - INFO - codeparrot_training - Step 44967: {'lr': 1.2483065191074922e-05, 'samples': 23023616, 'steps': 44967, 'batch_loss/train': 0.7274965075775981} +12/28/2021 21:12:01 - INFO - codeparrot_training - Step 44968: {'lr': 1.2478146875976843e-05, 'samples': 23024128, 'steps': 44968, 'batch_loss/train': 0.7262429152615368} +12/28/2021 21:12:11 - INFO - codeparrot_training - Step 44969: {'lr': 1.2473229505175932e-05, 'samples': 23024640, 'steps': 44969, 'batch_loss/train': 0.7632082761265337} +12/28/2021 21:12:23 - INFO - codeparrot_training - Step 44970: {'lr': 1.2468313078691784e-05, 'samples': 23025152, 'steps': 44970, 'batch_loss/train': 0.8083221060223877} +12/28/2021 21:12:34 - INFO - codeparrot_training - Step 44971: {'lr': 1.2463397596543912e-05, 'samples': 23025664, 'steps': 44971, 'batch_loss/train': 0.7442458234727383} +12/28/2021 21:12:45 - INFO - codeparrot_training - Step 44972: {'lr': 1.2458483058751885e-05, 'samples': 23026176, 'steps': 44972, 'batch_loss/train': 0.6714891605079174} +12/28/2021 21:12:59 - INFO - codeparrot_training - Step 44973: {'lr': 1.245356946533524e-05, 'samples': 23026688, 'steps': 44973, 'batch_loss/train': 0.7099165577674285} +12/28/2021 21:13:09 - INFO - codeparrot_training - Step 44974: {'lr': 1.244865681631352e-05, 'samples': 23027200, 'steps': 44974, 'batch_loss/train': 0.6573407677933574} +12/28/2021 21:13:20 - INFO - codeparrot_training - Step 44975: {'lr': 1.2443745111706235e-05, 'samples': 23027712, 'steps': 44975, 'batch_loss/train': 0.7965230377740227} +12/28/2021 21:13:31 - INFO - codeparrot_training - Step 44976: {'lr': 1.2438834351532923e-05, 'samples': 23028224, 'steps': 44976, 'batch_loss/train': 0.5387624334543943} +12/28/2021 21:13:43 - INFO - codeparrot_training - Step 44977: {'lr': 1.2433924535813158e-05, 'samples': 23028736, 'steps': 44977, 'batch_loss/train': 0.8397184379864484} +12/28/2021 21:13:53 - INFO - codeparrot_training - Step 44978: {'lr': 1.2429015664566306e-05, 'samples': 23029248, 'steps': 44978, 'batch_loss/train': 0.7451759693212807} +12/28/2021 21:14:04 - INFO - codeparrot_training - Step 44979: {'lr': 1.242410773781208e-05, 'samples': 23029760, 'steps': 44979, 'batch_loss/train': 0.6927516153082252} +12/28/2021 21:14:16 - INFO - codeparrot_training - Step 44980: {'lr': 1.2419200755569848e-05, 'samples': 23030272, 'steps': 44980, 'batch_loss/train': 0.694405036047101} +12/28/2021 21:14:27 - INFO - codeparrot_training - Step 44981: {'lr': 1.2414294717859153e-05, 'samples': 23030784, 'steps': 44981, 'batch_loss/train': 0.677150844479911} +12/28/2021 21:14:37 - INFO - codeparrot_training - Step 44982: {'lr': 1.2409389624699535e-05, 'samples': 23031296, 'steps': 44982, 'batch_loss/train': 0.6811900339089334} +12/28/2021 21:14:50 - INFO - codeparrot_training - Step 44983: {'lr': 1.2404485476110477e-05, 'samples': 23031808, 'steps': 44983, 'batch_loss/train': 0.5618568354984745} +12/28/2021 21:15:01 - INFO - codeparrot_training - Step 44984: {'lr': 1.2399582272111465e-05, 'samples': 23032320, 'steps': 44984, 'batch_loss/train': 0.8034100208897144} +12/28/2021 21:15:12 - INFO - codeparrot_training - Step 44985: {'lr': 1.2394680012721981e-05, 'samples': 23032832, 'steps': 44985, 'batch_loss/train': 0.8445778116583824} +12/28/2021 21:15:24 - INFO - codeparrot_training - Step 44986: {'lr': 1.2389778697961568e-05, 'samples': 23033344, 'steps': 44986, 'batch_loss/train': 0.7470579775981605} +12/28/2021 21:15:34 - INFO - codeparrot_training - Step 44987: {'lr': 1.2384878327849652e-05, 'samples': 23033856, 'steps': 44987, 'batch_loss/train': 0.6692639588145539} +12/28/2021 21:15:45 - INFO - codeparrot_training - Step 44988: {'lr': 1.2379978902405774e-05, 'samples': 23034368, 'steps': 44988, 'batch_loss/train': 0.7432776667992584} +12/28/2021 21:15:56 - INFO - codeparrot_training - Step 44989: {'lr': 1.237508042164931e-05, 'samples': 23034880, 'steps': 44989, 'batch_loss/train': 0.8061871370300651} +12/28/2021 21:16:10 - INFO - codeparrot_training - Step 44990: {'lr': 1.2370182885599851e-05, 'samples': 23035392, 'steps': 44990, 'batch_loss/train': 0.7083332790061831} +12/28/2021 21:16:21 - INFO - codeparrot_training - Step 44991: {'lr': 1.2365286294276829e-05, 'samples': 23035904, 'steps': 44991, 'batch_loss/train': 2.207082530017942} +12/28/2021 21:16:31 - INFO - codeparrot_training - Step 44992: {'lr': 1.2360390647699698e-05, 'samples': 23036416, 'steps': 44992, 'batch_loss/train': 1.2661650758236647} +12/28/2021 21:16:43 - INFO - codeparrot_training - Step 44993: {'lr': 1.2355495945887862e-05, 'samples': 23036928, 'steps': 44993, 'batch_loss/train': 0.7111700726673007} +12/28/2021 21:16:54 - INFO - codeparrot_training - Step 44994: {'lr': 1.2350602188860943e-05, 'samples': 23037440, 'steps': 44994, 'batch_loss/train': 0.6998613076284528} +12/28/2021 21:17:05 - INFO - codeparrot_training - Step 44995: {'lr': 1.234570937663823e-05, 'samples': 23037952, 'steps': 44995, 'batch_loss/train': 0.6791232684627175} +12/28/2021 21:17:15 - INFO - codeparrot_training - Step 44996: {'lr': 1.2340817509239266e-05, 'samples': 23038464, 'steps': 44996, 'batch_loss/train': 0.7215040284208953} +12/28/2021 21:17:27 - INFO - codeparrot_training - Step 44997: {'lr': 1.2335926586683448e-05, 'samples': 23038976, 'steps': 44997, 'batch_loss/train': 0.6975839795777574} +12/28/2021 21:17:38 - INFO - codeparrot_training - Step 44998: {'lr': 1.2331036608990293e-05, 'samples': 23039488, 'steps': 44998, 'batch_loss/train': 0.695563433226198} +12/28/2021 21:17:48 - INFO - codeparrot_training - Step 44999: {'lr': 1.2326147576179142e-05, 'samples': 23040000, 'steps': 44999, 'batch_loss/train': 0.7561318734660745} +12/28/2021 21:17:48 - INFO - codeparrot_training - Evaluating and saving model checkpoint +12/28/2021 21:21:11 - INFO - codeparrot_training - Step 45000: {'loss/eval': 0.7414663434028625, 'perplexity': 2.099011182785034}