diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -33500,3 +33500,1009 @@ Use FP16 precision: False 02/25/2022 20:18:14 - INFO - codeparrot_training - Step 32998: {'lr': 0.0001394571784843114, 'samples': 16895488, 'steps': 32998, 'loss/train': 2.2089178562164307} 02/25/2022 20:18:20 - INFO - codeparrot_training - Step 32999: {'lr': 0.00013944250272798393, 'samples': 16896000, 'steps': 32999, 'loss/train': 1.2068666219711304} 02/25/2022 20:18:20 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/25/2022 20:18:38 - WARNING - huggingface_hub.repository - Several commits (33) will be pushed upstream. +02/25/2022 20:18:38 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/25/2022 20:19:11 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + 06c1d4e..6b82659 floral-grass-11 -> floral-grass-11 + +02/25/2022 20:19:16 - INFO - codeparrot_training - Step 33000: {'lr': 0.00013942782744524973, 'samples': 16896512, 'steps': 33000, 'loss/train': 1.1297675371170044} +02/25/2022 20:19:22 - INFO - codeparrot_training - Step 33001: {'lr': 0.00013941315263617157, 'samples': 16897024, 'steps': 33001, 'loss/train': 2.33842134475708} +02/25/2022 20:19:25 - INFO - codeparrot_training - Step 33002: {'lr': 0.00013939847830081226, 'samples': 16897536, 'steps': 33002, 'loss/train': 1.0919899940490723} +02/25/2022 20:19:32 - INFO - codeparrot_training - Step 33003: {'lr': 0.00013938380443923487, 'samples': 16898048, 'steps': 33003, 'loss/train': 1.9448785781860352} +02/25/2022 20:19:36 - INFO - codeparrot_training - Step 33004: {'lr': 0.00013936913105150212, 'samples': 16898560, 'steps': 33004, 'loss/train': 1.6131045818328857} +02/25/2022 20:19:41 - INFO - codeparrot_training - Step 33005: {'lr': 0.00013935445813767678, 'samples': 16899072, 'steps': 33005, 'loss/train': 2.596254587173462} +02/25/2022 20:19:45 - INFO - codeparrot_training - Step 33006: {'lr': 0.000139339785697822, 'samples': 16899584, 'steps': 33006, 'loss/train': 1.629105806350708} +02/25/2022 20:19:50 - INFO - codeparrot_training - Step 33007: {'lr': 0.00013932511373200023, 'samples': 16900096, 'steps': 33007, 'loss/train': 1.2458577156066895} +02/25/2022 20:19:53 - INFO - codeparrot_training - Step 33008: {'lr': 0.00013931044224027467, 'samples': 16900608, 'steps': 33008, 'loss/train': 1.3389471769332886} +02/25/2022 20:19:59 - INFO - codeparrot_training - Step 33009: {'lr': 0.0001392957712227079, 'samples': 16901120, 'steps': 33009, 'loss/train': 1.5443185567855835} +02/25/2022 20:20:02 - INFO - codeparrot_training - Step 33010: {'lr': 0.00013928110067936312, 'samples': 16901632, 'steps': 33010, 'loss/train': 1.9592325687408447} +02/25/2022 20:20:08 - INFO - codeparrot_training - Step 33011: {'lr': 0.0001392664306103027, 'samples': 16902144, 'steps': 33011, 'loss/train': 1.7590411901474} +02/25/2022 20:20:12 - INFO - codeparrot_training - Step 33012: {'lr': 0.00013925176101558986, 'samples': 16902656, 'steps': 33012, 'loss/train': 1.8600107431411743} +02/25/2022 20:20:17 - INFO - codeparrot_training - Step 33013: {'lr': 0.0001392370918952872, 'samples': 16903168, 'steps': 33013, 'loss/train': 2.094806432723999} +02/25/2022 20:20:21 - INFO - codeparrot_training - Step 33014: {'lr': 0.00013922242324945788, 'samples': 16903680, 'steps': 33014, 'loss/train': 2.209496021270752} +02/25/2022 20:20:26 - INFO - codeparrot_training - Step 33015: {'lr': 0.0001392077550781643, 'samples': 16904192, 'steps': 33015, 'loss/train': 1.3058743476867676} +02/25/2022 20:20:30 - INFO - codeparrot_training - Step 33016: {'lr': 0.00013919308738146962, 'samples': 16904704, 'steps': 33016, 'loss/train': 0.8081715106964111} +02/25/2022 20:20:35 - INFO - codeparrot_training - Step 33017: {'lr': 0.0001391784201594365, 'samples': 16905216, 'steps': 33017, 'loss/train': 1.964890956878662} +02/25/2022 20:20:39 - INFO - codeparrot_training - Step 33018: {'lr': 0.00013916375341212794, 'samples': 16905728, 'steps': 33018, 'loss/train': 1.5021774768829346} +02/25/2022 20:20:45 - INFO - codeparrot_training - Step 33019: {'lr': 0.00013914908713960665, 'samples': 16906240, 'steps': 33019, 'loss/train': 1.1030876636505127} +02/25/2022 20:20:48 - INFO - codeparrot_training - Step 33020: {'lr': 0.00013913442134193545, 'samples': 16906752, 'steps': 33020, 'loss/train': 2.2400050163269043} +02/25/2022 20:20:54 - INFO - codeparrot_training - Step 33021: {'lr': 0.00013911975601917708, 'samples': 16907264, 'steps': 33021, 'loss/train': 1.0865991115570068} +02/25/2022 20:20:57 - INFO - codeparrot_training - Step 33022: {'lr': 0.00013910509117139462, 'samples': 16907776, 'steps': 33022, 'loss/train': 2.0021352767944336} +02/25/2022 20:21:03 - INFO - codeparrot_training - Step 33023: {'lr': 0.0001390904267986507, 'samples': 16908288, 'steps': 33023, 'loss/train': 2.7526328563690186} +02/25/2022 20:21:07 - INFO - codeparrot_training - Step 33024: {'lr': 0.0001390757629010082, 'samples': 16908800, 'steps': 33024, 'loss/train': 8.040966033935547} +02/25/2022 20:21:12 - INFO - codeparrot_training - Step 33025: {'lr': 0.00013906109947852977, 'samples': 16909312, 'steps': 33025, 'loss/train': 1.3483787775039673} +02/25/2022 20:21:16 - INFO - codeparrot_training - Step 33026: {'lr': 0.00013904643653127847, 'samples': 16909824, 'steps': 33026, 'loss/train': 2.066166400909424} +02/25/2022 20:21:21 - INFO - codeparrot_training - Step 33027: {'lr': 0.00013903177405931701, 'samples': 16910336, 'steps': 33027, 'loss/train': 1.4834327697753906} +02/25/2022 20:21:24 - INFO - codeparrot_training - Step 33028: {'lr': 0.00013901711206270822, 'samples': 16910848, 'steps': 33028, 'loss/train': 1.009364128112793} +02/25/2022 20:21:31 - INFO - codeparrot_training - Step 33029: {'lr': 0.00013900245054151476, 'samples': 16911360, 'steps': 33029, 'loss/train': 1.5293819904327393} +02/25/2022 20:21:34 - INFO - codeparrot_training - Step 33030: {'lr': 0.00013898778949579966, 'samples': 16911872, 'steps': 33030, 'loss/train': 1.4307610988616943} +02/25/2022 20:21:40 - INFO - codeparrot_training - Step 33031: {'lr': 0.0001389731289256255, 'samples': 16912384, 'steps': 33031, 'loss/train': 1.7180373668670654} +02/25/2022 20:21:43 - INFO - codeparrot_training - Step 33032: {'lr': 0.0001389584688310554, 'samples': 16912896, 'steps': 33032, 'loss/train': 2.271662950515747} +02/25/2022 20:21:49 - INFO - codeparrot_training - Step 33033: {'lr': 0.00013894380921215178, 'samples': 16913408, 'steps': 33033, 'loss/train': 1.338437795639038} +02/25/2022 20:21:52 - INFO - codeparrot_training - Step 33034: {'lr': 0.0001389291500689777, 'samples': 16913920, 'steps': 33034, 'loss/train': 0.5751528739929199} +02/25/2022 20:21:58 - INFO - codeparrot_training - Step 33035: {'lr': 0.0001389144914015958, 'samples': 16914432, 'steps': 33035, 'loss/train': 1.5223861932754517} +02/25/2022 20:22:01 - INFO - codeparrot_training - Step 33036: {'lr': 0.00013889983321006916, 'samples': 16914944, 'steps': 33036, 'loss/train': 1.6493160724639893} +02/25/2022 20:22:07 - INFO - codeparrot_training - Step 33037: {'lr': 0.0001388851754944601, 'samples': 16915456, 'steps': 33037, 'loss/train': 1.842376470565796} +02/25/2022 20:22:10 - INFO - codeparrot_training - Step 33038: {'lr': 0.0001388705182548318, 'samples': 16915968, 'steps': 33038, 'loss/train': 1.6146433353424072} +02/25/2022 20:22:16 - INFO - codeparrot_training - Step 33039: {'lr': 0.0001388558614912469, 'samples': 16916480, 'steps': 33039, 'loss/train': 0.2921536862850189} +02/25/2022 20:22:20 - INFO - codeparrot_training - Step 33040: {'lr': 0.00013884120520376818, 'samples': 16916992, 'steps': 33040, 'loss/train': 2.442641258239746} +02/25/2022 20:22:25 - INFO - codeparrot_training - Step 33041: {'lr': 0.00013882654939245835, 'samples': 16917504, 'steps': 33041, 'loss/train': 1.280112862586975} +02/25/2022 20:22:29 - INFO - codeparrot_training - Step 33042: {'lr': 0.00013881189405738043, 'samples': 16918016, 'steps': 33042, 'loss/train': 3.812242269515991} +02/25/2022 20:22:34 - INFO - codeparrot_training - Step 33043: {'lr': 0.00013879723919859698, 'samples': 16918528, 'steps': 33043, 'loss/train': 2.054389715194702} +02/25/2022 20:22:38 - INFO - codeparrot_training - Step 33044: {'lr': 0.00013878258481617078, 'samples': 16919040, 'steps': 33044, 'loss/train': 2.501282215118408} +02/25/2022 20:22:43 - INFO - codeparrot_training - Step 33045: {'lr': 0.0001387679309101648, 'samples': 16919552, 'steps': 33045, 'loss/train': 1.7748994827270508} +02/25/2022 20:22:47 - INFO - codeparrot_training - Step 33046: {'lr': 0.00013875327748064166, 'samples': 16920064, 'steps': 33046, 'loss/train': 3.001192569732666} +02/25/2022 20:22:52 - INFO - codeparrot_training - Step 33047: {'lr': 0.0001387386245276641, 'samples': 16920576, 'steps': 33047, 'loss/train': 2.1625325679779053} +02/25/2022 20:22:56 - INFO - codeparrot_training - Step 33048: {'lr': 0.00013872397205129495, 'samples': 16921088, 'steps': 33048, 'loss/train': 1.4520047903060913} +02/25/2022 20:23:02 - INFO - codeparrot_training - Step 33049: {'lr': 0.00013870932005159698, 'samples': 16921600, 'steps': 33049, 'loss/train': 2.028303623199463} +02/25/2022 20:23:06 - INFO - codeparrot_training - Step 33050: {'lr': 0.000138694668528633, 'samples': 16922112, 'steps': 33050, 'loss/train': 1.6190156936645508} +02/25/2022 20:23:11 - INFO - codeparrot_training - Step 33051: {'lr': 0.00013868001748246566, 'samples': 16922624, 'steps': 33051, 'loss/train': 1.5195121765136719} +02/25/2022 20:23:15 - INFO - codeparrot_training - Step 33052: {'lr': 0.00013866536691315774, 'samples': 16923136, 'steps': 33052, 'loss/train': 1.4691048860549927} +02/25/2022 20:23:20 - INFO - codeparrot_training - Step 33053: {'lr': 0.00013865071682077207, 'samples': 16923648, 'steps': 33053, 'loss/train': 1.4723105430603027} +02/25/2022 20:23:24 - INFO - codeparrot_training - Step 33054: {'lr': 0.00013863606720537143, 'samples': 16924160, 'steps': 33054, 'loss/train': 0.8526555299758911} +02/25/2022 20:23:29 - INFO - codeparrot_training - Step 33055: {'lr': 0.00013862141806701846, 'samples': 16924672, 'steps': 33055, 'loss/train': 2.0327413082122803} +02/25/2022 20:23:33 - INFO - codeparrot_training - Step 33056: {'lr': 0.00013860676940577593, 'samples': 16925184, 'steps': 33056, 'loss/train': 2.1846323013305664} +02/25/2022 20:23:38 - INFO - codeparrot_training - Step 33057: {'lr': 0.00013859212122170668, 'samples': 16925696, 'steps': 33057, 'loss/train': 1.57632315158844} +02/25/2022 20:23:42 - INFO - codeparrot_training - Step 33058: {'lr': 0.00013857747351487344, 'samples': 16926208, 'steps': 33058, 'loss/train': 3.3128674030303955} +02/25/2022 20:23:47 - INFO - codeparrot_training - Step 33059: {'lr': 0.0001385628262853389, 'samples': 16926720, 'steps': 33059, 'loss/train': 2.1297178268432617} +02/25/2022 20:23:51 - INFO - codeparrot_training - Step 33060: {'lr': 0.00013854817953316573, 'samples': 16927232, 'steps': 33060, 'loss/train': 1.578182339668274} +02/25/2022 20:23:56 - INFO - codeparrot_training - Step 33061: {'lr': 0.0001385335332584169, 'samples': 16927744, 'steps': 33061, 'loss/train': 1.3774527311325073} +02/25/2022 20:24:00 - INFO - codeparrot_training - Step 33062: {'lr': 0.00013851888746115498, 'samples': 16928256, 'steps': 33062, 'loss/train': 1.1669151782989502} +02/25/2022 20:24:05 - INFO - codeparrot_training - Step 33063: {'lr': 0.00013850424214144276, 'samples': 16928768, 'steps': 33063, 'loss/train': 1.1518981456756592} +02/25/2022 20:24:09 - INFO - codeparrot_training - Step 33064: {'lr': 0.00013848959729934285, 'samples': 16929280, 'steps': 33064, 'loss/train': 1.6680086851119995} +02/25/2022 20:24:15 - INFO - codeparrot_training - Step 33065: {'lr': 0.00013847495293491818, 'samples': 16929792, 'steps': 33065, 'loss/train': 0.9807769060134888} +02/25/2022 20:24:19 - INFO - codeparrot_training - Step 33066: {'lr': 0.00013846030904823137, 'samples': 16930304, 'steps': 33066, 'loss/train': 0.9813781380653381} +02/25/2022 20:24:25 - INFO - codeparrot_training - Step 33067: {'lr': 0.0001384456656393453, 'samples': 16930816, 'steps': 33067, 'loss/train': 2.031611919403076} +02/25/2022 20:24:28 - INFO - codeparrot_training - Step 33068: {'lr': 0.00013843102270832242, 'samples': 16931328, 'steps': 33068, 'loss/train': 0.8346250653266907} +02/25/2022 20:24:34 - INFO - codeparrot_training - Step 33069: {'lr': 0.0001384163802552257, 'samples': 16931840, 'steps': 33069, 'loss/train': 1.6836495399475098} +02/25/2022 20:24:37 - INFO - codeparrot_training - Step 33070: {'lr': 0.0001384017382801177, 'samples': 16932352, 'steps': 33070, 'loss/train': 2.083275556564331} +02/25/2022 20:24:43 - INFO - codeparrot_training - Step 33071: {'lr': 0.00013838709678306137, 'samples': 16932864, 'steps': 33071, 'loss/train': 0.08037525415420532} +02/25/2022 20:24:46 - INFO - codeparrot_training - Step 33072: {'lr': 0.00013837245576411912, 'samples': 16933376, 'steps': 33072, 'loss/train': 1.5169219970703125} +02/25/2022 20:24:52 - INFO - codeparrot_training - Step 33073: {'lr': 0.0001383578152233539, 'samples': 16933888, 'steps': 33073, 'loss/train': 2.193718671798706} +02/25/2022 20:24:55 - INFO - codeparrot_training - Step 33074: {'lr': 0.00013834317516082825, 'samples': 16934400, 'steps': 33074, 'loss/train': 1.8317298889160156} +02/25/2022 20:25:02 - INFO - codeparrot_training - Step 33075: {'lr': 0.00013832853557660518, 'samples': 16934912, 'steps': 33075, 'loss/train': 2.081847667694092} +02/25/2022 20:25:05 - INFO - codeparrot_training - Step 33076: {'lr': 0.00013831389647074702, 'samples': 16935424, 'steps': 33076, 'loss/train': 1.608643651008606} +02/25/2022 20:25:11 - INFO - codeparrot_training - Step 33077: {'lr': 0.00013829925784331675, 'samples': 16935936, 'steps': 33077, 'loss/train': 2.0918939113616943} +02/25/2022 20:25:14 - INFO - codeparrot_training - Step 33078: {'lr': 0.0001382846196943769, 'samples': 16936448, 'steps': 33078, 'loss/train': 1.0892910957336426} +02/25/2022 20:25:20 - INFO - codeparrot_training - Step 33079: {'lr': 0.00013826998202399037, 'samples': 16936960, 'steps': 33079, 'loss/train': 0.45494207739830017} +02/25/2022 20:25:23 - INFO - codeparrot_training - Step 33080: {'lr': 0.00013825534483221974, 'samples': 16937472, 'steps': 33080, 'loss/train': 1.1691253185272217} +02/25/2022 20:25:29 - INFO - codeparrot_training - Step 33081: {'lr': 0.00013824070811912774, 'samples': 16937984, 'steps': 33081, 'loss/train': 1.1718149185180664} +02/25/2022 20:25:32 - INFO - codeparrot_training - Step 33082: {'lr': 0.00013822607188477697, 'samples': 16938496, 'steps': 33082, 'loss/train': 1.8045306205749512} +02/25/2022 20:25:38 - INFO - codeparrot_training - Step 33083: {'lr': 0.00013821143612923026, 'samples': 16939008, 'steps': 33083, 'loss/train': 2.6224429607391357} +02/25/2022 20:25:41 - INFO - codeparrot_training - Step 33084: {'lr': 0.0001381968008525503, 'samples': 16939520, 'steps': 33084, 'loss/train': 1.9999854564666748} +02/25/2022 20:25:48 - INFO - codeparrot_training - Step 33085: {'lr': 0.00013818216605479978, 'samples': 16940032, 'steps': 33085, 'loss/train': 2.1719233989715576} +02/25/2022 20:25:51 - INFO - codeparrot_training - Step 33086: {'lr': 0.0001381675317360413, 'samples': 16940544, 'steps': 33086, 'loss/train': 7.925590991973877} +02/25/2022 20:25:57 - INFO - codeparrot_training - Step 33087: {'lr': 0.00013815289789633752, 'samples': 16941056, 'steps': 33087, 'loss/train': 1.7833501100540161} +02/25/2022 20:26:00 - INFO - codeparrot_training - Step 33088: {'lr': 0.00013813826453575135, 'samples': 16941568, 'steps': 33088, 'loss/train': 2.3182373046875} +02/25/2022 20:26:06 - INFO - codeparrot_training - Step 33089: {'lr': 0.00013812363165434526, 'samples': 16942080, 'steps': 33089, 'loss/train': 2.175645589828491} +02/25/2022 20:26:09 - INFO - codeparrot_training - Step 33090: {'lr': 0.00013810899925218207, 'samples': 16942592, 'steps': 33090, 'loss/train': 2.6579976081848145} +02/25/2022 20:26:15 - INFO - codeparrot_training - Step 33091: {'lr': 0.00013809436732932428, 'samples': 16943104, 'steps': 33091, 'loss/train': 1.1017791032791138} +02/25/2022 20:26:18 - INFO - codeparrot_training - Step 33092: {'lr': 0.0001380797358858348, 'samples': 16943616, 'steps': 33092, 'loss/train': 1.3910006284713745} +02/25/2022 20:26:24 - INFO - codeparrot_training - Step 33093: {'lr': 0.00013806510492177614, 'samples': 16944128, 'steps': 33093, 'loss/train': 2.0862746238708496} +02/25/2022 20:26:27 - INFO - codeparrot_training - Step 33094: {'lr': 0.00013805047443721108, 'samples': 16944640, 'steps': 33094, 'loss/train': 1.7025426626205444} +02/25/2022 20:26:33 - INFO - codeparrot_training - Step 33095: {'lr': 0.0001380358444322021, 'samples': 16945152, 'steps': 33095, 'loss/train': 2.4947752952575684} +02/25/2022 20:26:37 - INFO - codeparrot_training - Step 33096: {'lr': 0.00013802121490681213, 'samples': 16945664, 'steps': 33096, 'loss/train': 2.175145149230957} +02/25/2022 20:26:42 - INFO - codeparrot_training - Step 33097: {'lr': 0.0001380065858611037, 'samples': 16946176, 'steps': 33097, 'loss/train': 2.319929361343384} +02/25/2022 20:26:46 - INFO - codeparrot_training - Step 33098: {'lr': 0.00013799195729513952, 'samples': 16946688, 'steps': 33098, 'loss/train': 0.7369640469551086} +02/25/2022 20:26:52 - INFO - codeparrot_training - Step 33099: {'lr': 0.00013797732920898216, 'samples': 16947200, 'steps': 33099, 'loss/train': 2.231600761413574} +02/25/2022 20:26:56 - INFO - codeparrot_training - Step 33100: {'lr': 0.0001379627016026944, 'samples': 16947712, 'steps': 33100, 'loss/train': 2.0435006618499756} +02/25/2022 20:26:59 - INFO - codeparrot_training - Step 33101: {'lr': 0.00013794807447633874, 'samples': 16948224, 'steps': 33101, 'loss/train': 0.7494664192199707} +02/25/2022 20:27:05 - INFO - codeparrot_training - Step 33102: {'lr': 0.00013793344782997817, 'samples': 16948736, 'steps': 33102, 'loss/train': 0.8188365697860718} +02/25/2022 20:27:08 - INFO - codeparrot_training - Step 33103: {'lr': 0.00013791882166367493, 'samples': 16949248, 'steps': 33103, 'loss/train': 0.4193016588687897} +02/25/2022 20:27:14 - INFO - codeparrot_training - Step 33104: {'lr': 0.000137904195977492, 'samples': 16949760, 'steps': 33104, 'loss/train': 2.408766269683838} +02/25/2022 20:27:17 - INFO - codeparrot_training - Step 33105: {'lr': 0.00013788957077149174, 'samples': 16950272, 'steps': 33105, 'loss/train': 0.5733808875083923} +02/25/2022 20:27:23 - INFO - codeparrot_training - Step 33106: {'lr': 0.00013787494604573718, 'samples': 16950784, 'steps': 33106, 'loss/train': 1.0324296951293945} +02/25/2022 20:27:26 - INFO - codeparrot_training - Step 33107: {'lr': 0.00013786032180029053, 'samples': 16951296, 'steps': 33107, 'loss/train': 2.0375800132751465} +02/25/2022 20:27:32 - INFO - codeparrot_training - Step 33108: {'lr': 0.0001378456980352148, 'samples': 16951808, 'steps': 33108, 'loss/train': 0.04046846181154251} +02/25/2022 20:27:37 - INFO - codeparrot_training - Step 33109: {'lr': 0.00013783107475057234, 'samples': 16952320, 'steps': 33109, 'loss/train': 1.7546592950820923} +02/25/2022 20:27:41 - INFO - codeparrot_training - Step 33110: {'lr': 0.00013781645194642616, 'samples': 16952832, 'steps': 33110, 'loss/train': 2.344900608062744} +02/25/2022 20:27:47 - INFO - codeparrot_training - Step 33111: {'lr': 0.0001378018296228385, 'samples': 16953344, 'steps': 33111, 'loss/train': 1.6780171394348145} +02/25/2022 20:27:50 - INFO - codeparrot_training - Step 33112: {'lr': 0.00013778720777987225, 'samples': 16953856, 'steps': 33112, 'loss/train': 1.6987971067428589} +02/25/2022 20:27:54 - INFO - codeparrot_training - Step 33113: {'lr': 0.00013777258641758982, 'samples': 16954368, 'steps': 33113, 'loss/train': 1.5293858051300049} +02/25/2022 20:28:00 - INFO - codeparrot_training - Step 33114: {'lr': 0.00013775796553605417, 'samples': 16954880, 'steps': 33114, 'loss/train': 1.8258339166641235} +02/25/2022 20:28:04 - INFO - codeparrot_training - Step 33115: {'lr': 0.00013774334513532772, 'samples': 16955392, 'steps': 33115, 'loss/train': 2.187664270401001} +02/25/2022 20:28:09 - INFO - codeparrot_training - Step 33116: {'lr': 0.00013772872521547314, 'samples': 16955904, 'steps': 33116, 'loss/train': 1.9151933193206787} +02/25/2022 20:28:12 - INFO - codeparrot_training - Step 33117: {'lr': 0.00013771410577655298, 'samples': 16956416, 'steps': 33117, 'loss/train': 0.0994546115398407} +02/25/2022 20:28:18 - INFO - codeparrot_training - Step 33118: {'lr': 0.00013769948681862997, 'samples': 16956928, 'steps': 33118, 'loss/train': 1.7644505500793457} +02/25/2022 20:28:22 - INFO - codeparrot_training - Step 33119: {'lr': 0.00013768486834176677, 'samples': 16957440, 'steps': 33119, 'loss/train': 0.7357638478279114} +02/25/2022 20:28:27 - INFO - codeparrot_training - Step 33120: {'lr': 0.0001376702503460259, 'samples': 16957952, 'steps': 33120, 'loss/train': 1.7830103635787964} +02/25/2022 20:28:30 - INFO - codeparrot_training - Step 33121: {'lr': 0.00013765563283146993, 'samples': 16958464, 'steps': 33121, 'loss/train': 0.5785823464393616} +02/25/2022 20:28:36 - INFO - codeparrot_training - Step 33122: {'lr': 0.0001376410157981616, 'samples': 16958976, 'steps': 33122, 'loss/train': 1.0229709148406982} +02/25/2022 20:28:39 - INFO - codeparrot_training - Step 33123: {'lr': 0.00013762639924616351, 'samples': 16959488, 'steps': 33123, 'loss/train': 0.8258456587791443} +02/25/2022 20:28:46 - INFO - codeparrot_training - Step 33124: {'lr': 0.00013761178317553825, 'samples': 16960000, 'steps': 33124, 'loss/train': 2.104133129119873} +02/25/2022 20:28:49 - INFO - codeparrot_training - Step 33125: {'lr': 0.00013759716758634833, 'samples': 16960512, 'steps': 33125, 'loss/train': 1.05113685131073} +02/25/2022 20:28:55 - INFO - codeparrot_training - Step 33126: {'lr': 0.00013758255247865652, 'samples': 16961024, 'steps': 33126, 'loss/train': 1.6678348779678345} +02/25/2022 20:28:58 - INFO - codeparrot_training - Step 33127: {'lr': 0.0001375679378525254, 'samples': 16961536, 'steps': 33127, 'loss/train': 1.6824684143066406} +02/25/2022 20:29:04 - INFO - codeparrot_training - Step 33128: {'lr': 0.0001375533237080175, 'samples': 16962048, 'steps': 33128, 'loss/train': 2.2287282943725586} +02/25/2022 20:29:07 - INFO - codeparrot_training - Step 33129: {'lr': 0.0001375387100451954, 'samples': 16962560, 'steps': 33129, 'loss/train': 2.9126675128936768} +02/25/2022 20:29:13 - INFO - codeparrot_training - Step 33130: {'lr': 0.00013752409686412182, 'samples': 16963072, 'steps': 33130, 'loss/train': 1.0279017686843872} +02/25/2022 20:29:16 - INFO - codeparrot_training - Step 33131: {'lr': 0.0001375094841648593, 'samples': 16963584, 'steps': 33131, 'loss/train': 1.4462863206863403} +02/25/2022 20:29:22 - INFO - codeparrot_training - Step 33132: {'lr': 0.0001374948719474704, 'samples': 16964096, 'steps': 33132, 'loss/train': 0.4972841441631317} +02/25/2022 20:29:25 - INFO - codeparrot_training - Step 33133: {'lr': 0.0001374802602120177, 'samples': 16964608, 'steps': 33133, 'loss/train': 1.721798062324524} +02/25/2022 20:29:32 - INFO - codeparrot_training - Step 33134: {'lr': 0.0001374656489585639, 'samples': 16965120, 'steps': 33134, 'loss/train': 0.14702363312244415} +02/25/2022 20:29:35 - INFO - codeparrot_training - Step 33135: {'lr': 0.00013745103818717152, 'samples': 16965632, 'steps': 33135, 'loss/train': 1.9318891763687134} +02/25/2022 20:29:41 - INFO - codeparrot_training - Step 33136: {'lr': 0.00013743642789790318, 'samples': 16966144, 'steps': 33136, 'loss/train': 0.4734211266040802} +02/25/2022 20:29:44 - INFO - codeparrot_training - Step 33137: {'lr': 0.00013742181809082144, 'samples': 16966656, 'steps': 33137, 'loss/train': 1.9879918098449707} +02/25/2022 20:29:48 - INFO - codeparrot_training - Step 33138: {'lr': 0.00013740720876598877, 'samples': 16967168, 'steps': 33138, 'loss/train': 2.406527280807495} +02/25/2022 20:29:53 - INFO - codeparrot_training - Step 33139: {'lr': 0.000137392599923468, 'samples': 16967680, 'steps': 33139, 'loss/train': 2.1891274452209473} +02/25/2022 20:29:59 - INFO - codeparrot_training - Step 33140: {'lr': 0.00013737799156332144, 'samples': 16968192, 'steps': 33140, 'loss/train': 1.9293526411056519} +02/25/2022 20:30:02 - INFO - codeparrot_training - Step 33141: {'lr': 0.00013736338368561197, 'samples': 16968704, 'steps': 33141, 'loss/train': 2.776402473449707} +02/25/2022 20:30:08 - INFO - codeparrot_training - Step 33142: {'lr': 0.00013734877629040183, 'samples': 16969216, 'steps': 33142, 'loss/train': 1.84799063205719} +02/25/2022 20:30:12 - INFO - codeparrot_training - Step 33143: {'lr': 0.0001373341693777539, 'samples': 16969728, 'steps': 33143, 'loss/train': 1.8786133527755737} +02/25/2022 20:30:17 - INFO - codeparrot_training - Step 33144: {'lr': 0.00013731956294773046, 'samples': 16970240, 'steps': 33144, 'loss/train': 1.5956374406814575} +02/25/2022 20:30:21 - INFO - codeparrot_training - Step 33145: {'lr': 0.00013730495700039444, 'samples': 16970752, 'steps': 33145, 'loss/train': 0.39798957109451294} +02/25/2022 20:30:26 - INFO - codeparrot_training - Step 33146: {'lr': 0.000137290351535808, 'samples': 16971264, 'steps': 33146, 'loss/train': 2.0345547199249268} +02/25/2022 20:30:30 - INFO - codeparrot_training - Step 33147: {'lr': 0.00013727574655403402, 'samples': 16971776, 'steps': 33147, 'loss/train': 2.4439237117767334} +02/25/2022 20:30:35 - INFO - codeparrot_training - Step 33148: {'lr': 0.00013726114205513484, 'samples': 16972288, 'steps': 33148, 'loss/train': 1.4308420419692993} +02/25/2022 20:30:39 - INFO - codeparrot_training - Step 33149: {'lr': 0.00013724653803917338, 'samples': 16972800, 'steps': 33149, 'loss/train': 0.8693727254867554} +02/25/2022 20:30:44 - INFO - codeparrot_training - Step 33150: {'lr': 0.00013723193450621167, 'samples': 16973312, 'steps': 33150, 'loss/train': 1.1945158243179321} +02/25/2022 20:30:48 - INFO - codeparrot_training - Step 33151: {'lr': 0.00013721733145631264, 'samples': 16973824, 'steps': 33151, 'loss/train': 1.2456892728805542} +02/25/2022 20:30:53 - INFO - codeparrot_training - Step 33152: {'lr': 0.0001372027288895387, 'samples': 16974336, 'steps': 33152, 'loss/train': 8.444211959838867} +02/25/2022 20:30:57 - INFO - codeparrot_training - Step 33153: {'lr': 0.00013718812680595254, 'samples': 16974848, 'steps': 33153, 'loss/train': 1.0180572271347046} +02/25/2022 20:31:02 - INFO - codeparrot_training - Step 33154: {'lr': 0.00013717352520561664, 'samples': 16975360, 'steps': 33154, 'loss/train': 0.6758490204811096} +02/25/2022 20:31:06 - INFO - codeparrot_training - Step 33155: {'lr': 0.00013715892408859348, 'samples': 16975872, 'steps': 33155, 'loss/train': 2.796698808670044} +02/25/2022 20:31:11 - INFO - codeparrot_training - Step 33156: {'lr': 0.0001371443234549456, 'samples': 16976384, 'steps': 33156, 'loss/train': 4.349503040313721} +02/25/2022 20:31:15 - INFO - codeparrot_training - Step 33157: {'lr': 0.0001371297233047357, 'samples': 16976896, 'steps': 33157, 'loss/train': 1.1091523170471191} +02/25/2022 20:31:21 - INFO - codeparrot_training - Step 33158: {'lr': 0.0001371151236380262, 'samples': 16977408, 'steps': 33158, 'loss/train': 2.1146535873413086} +02/25/2022 20:31:24 - INFO - codeparrot_training - Step 33159: {'lr': 0.0001371005244548797, 'samples': 16977920, 'steps': 33159, 'loss/train': 0.3008519113063812} +02/25/2022 20:31:30 - INFO - codeparrot_training - Step 33160: {'lr': 0.00013708592575535858, 'samples': 16978432, 'steps': 33160, 'loss/train': 2.463362455368042} +02/25/2022 20:31:33 - INFO - codeparrot_training - Step 33161: {'lr': 0.00013707132753952562, 'samples': 16978944, 'steps': 33161, 'loss/train': 1.5532010793685913} +02/25/2022 20:31:39 - INFO - codeparrot_training - Step 33162: {'lr': 0.0001370567298074431, 'samples': 16979456, 'steps': 33162, 'loss/train': 2.880884885787964} +02/25/2022 20:31:42 - INFO - codeparrot_training - Step 33163: {'lr': 0.0001370421325591739, 'samples': 16979968, 'steps': 33163, 'loss/train': 2.054656744003296} +02/25/2022 20:31:48 - INFO - codeparrot_training - Step 33164: {'lr': 0.00013702753579478017, 'samples': 16980480, 'steps': 33164, 'loss/train': 1.75658118724823} +02/25/2022 20:31:51 - INFO - codeparrot_training - Step 33165: {'lr': 0.00013701293951432465, 'samples': 16980992, 'steps': 33165, 'loss/train': 2.1616644859313965} +02/25/2022 20:31:57 - INFO - codeparrot_training - Step 33166: {'lr': 0.00013699834371786975, 'samples': 16981504, 'steps': 33166, 'loss/train': 3.6250452995300293} +02/25/2022 20:32:00 - INFO - codeparrot_training - Step 33167: {'lr': 0.00013698374840547827, 'samples': 16982016, 'steps': 33167, 'loss/train': 0.735996425151825} +02/25/2022 20:32:06 - INFO - codeparrot_training - Step 33168: {'lr': 0.0001369691535772123, 'samples': 16982528, 'steps': 33168, 'loss/train': 1.6416651010513306} +02/25/2022 20:32:09 - INFO - codeparrot_training - Step 33169: {'lr': 0.0001369545592331347, 'samples': 16983040, 'steps': 33169, 'loss/train': 1.380927562713623} +02/25/2022 20:32:15 - INFO - codeparrot_training - Step 33170: {'lr': 0.0001369399653733079, 'samples': 16983552, 'steps': 33170, 'loss/train': 0.5697680711746216} +02/25/2022 20:32:19 - INFO - codeparrot_training - Step 33171: {'lr': 0.00013692537199779434, 'samples': 16984064, 'steps': 33171, 'loss/train': 1.2204687595367432} +02/25/2022 20:32:24 - INFO - codeparrot_training - Step 33172: {'lr': 0.0001369107791066565, 'samples': 16984576, 'steps': 33172, 'loss/train': 1.3063995838165283} +02/25/2022 20:32:28 - INFO - codeparrot_training - Step 33173: {'lr': 0.00013689618669995707, 'samples': 16985088, 'steps': 33173, 'loss/train': 1.1784683465957642} +02/25/2022 20:32:33 - INFO - codeparrot_training - Step 33174: {'lr': 0.00013688159477775848, 'samples': 16985600, 'steps': 33174, 'loss/train': 1.8335788249969482} +02/25/2022 20:32:37 - INFO - codeparrot_training - Step 33175: {'lr': 0.0001368670033401231, 'samples': 16986112, 'steps': 33175, 'loss/train': 2.501119375228882} +02/25/2022 20:32:42 - INFO - codeparrot_training - Step 33176: {'lr': 0.00013685241238711366, 'samples': 16986624, 'steps': 33176, 'loss/train': 1.634509801864624} +02/25/2022 20:32:46 - INFO - codeparrot_training - Step 33177: {'lr': 0.00013683782191879253, 'samples': 16987136, 'steps': 33177, 'loss/train': 2.0425705909729004} +02/25/2022 20:32:51 - INFO - codeparrot_training - Step 33178: {'lr': 0.00013682323193522225, 'samples': 16987648, 'steps': 33178, 'loss/train': 1.3294026851654053} +02/25/2022 20:32:55 - INFO - codeparrot_training - Step 33179: {'lr': 0.00013680864243646523, 'samples': 16988160, 'steps': 33179, 'loss/train': 0.7168804407119751} +02/25/2022 20:33:01 - INFO - codeparrot_training - Step 33180: {'lr': 0.00013679405342258412, 'samples': 16988672, 'steps': 33180, 'loss/train': 1.2534846067428589} +02/25/2022 20:33:05 - INFO - codeparrot_training - Step 33181: {'lr': 0.00013677946489364134, 'samples': 16989184, 'steps': 33181, 'loss/train': 2.8118321895599365} +02/25/2022 20:33:10 - INFO - codeparrot_training - Step 33182: {'lr': 0.0001367648768496994, 'samples': 16989696, 'steps': 33182, 'loss/train': 0.6932120323181152} +02/25/2022 20:33:14 - INFO - codeparrot_training - Step 33183: {'lr': 0.00013675028929082067, 'samples': 16990208, 'steps': 33183, 'loss/train': 1.7817246913909912} +02/25/2022 20:33:19 - INFO - codeparrot_training - Step 33184: {'lr': 0.00013673570221706794, 'samples': 16990720, 'steps': 33184, 'loss/train': 1.319392204284668} +02/25/2022 20:33:23 - INFO - codeparrot_training - Step 33185: {'lr': 0.0001367211156285033, 'samples': 16991232, 'steps': 33185, 'loss/train': 1.7152141332626343} +02/25/2022 20:33:28 - INFO - codeparrot_training - Step 33186: {'lr': 0.00013670652952518949, 'samples': 16991744, 'steps': 33186, 'loss/train': 1.6319048404693604} +02/25/2022 20:33:32 - INFO - codeparrot_training - Step 33187: {'lr': 0.00013669194390718886, 'samples': 16992256, 'steps': 33187, 'loss/train': 1.635172963142395} +02/25/2022 20:33:37 - INFO - codeparrot_training - Step 33188: {'lr': 0.00013667735877456405, 'samples': 16992768, 'steps': 33188, 'loss/train': 1.7197566032409668} +02/25/2022 20:33:41 - INFO - codeparrot_training - Step 33189: {'lr': 0.00013666277412737748, 'samples': 16993280, 'steps': 33189, 'loss/train': 1.944015383720398} +02/25/2022 20:33:47 - INFO - codeparrot_training - Step 33190: {'lr': 0.0001366481899656916, 'samples': 16993792, 'steps': 33190, 'loss/train': 1.2589294910430908} +02/25/2022 20:33:51 - INFO - codeparrot_training - Step 33191: {'lr': 0.00013663360628956875, 'samples': 16994304, 'steps': 33191, 'loss/train': 1.6134530305862427} +02/25/2022 20:33:56 - INFO - codeparrot_training - Step 33192: {'lr': 0.00013661902309907166, 'samples': 16994816, 'steps': 33192, 'loss/train': 2.4372260570526123} +02/25/2022 20:34:00 - INFO - codeparrot_training - Step 33193: {'lr': 0.00013660444039426264, 'samples': 16995328, 'steps': 33193, 'loss/train': 2.484558582305908} +02/25/2022 20:34:05 - INFO - codeparrot_training - Step 33194: {'lr': 0.0001365898581752042, 'samples': 16995840, 'steps': 33194, 'loss/train': 0.4016493558883667} +02/25/2022 20:34:09 - INFO - codeparrot_training - Step 33195: {'lr': 0.0001365752764419587, 'samples': 16996352, 'steps': 33195, 'loss/train': 0.8811741471290588} +02/25/2022 20:34:14 - INFO - codeparrot_training - Step 33196: {'lr': 0.0001365606951945888, 'samples': 16996864, 'steps': 33196, 'loss/train': 2.482672929763794} +02/25/2022 20:34:18 - INFO - codeparrot_training - Step 33197: {'lr': 0.0001365461144331569, 'samples': 16997376, 'steps': 33197, 'loss/train': 0.10973562300205231} +02/25/2022 20:34:23 - INFO - codeparrot_training - Step 33198: {'lr': 0.00013653153415772536, 'samples': 16997888, 'steps': 33198, 'loss/train': 1.4116977453231812} +02/25/2022 20:34:27 - INFO - codeparrot_training - Step 33199: {'lr': 0.00013651695436835666, 'samples': 16998400, 'steps': 33199, 'loss/train': 0.6756554841995239} +02/25/2022 20:34:32 - INFO - codeparrot_training - Step 33200: {'lr': 0.00013650237506511331, 'samples': 16998912, 'steps': 33200, 'loss/train': 0.7271454334259033} +02/25/2022 20:34:36 - INFO - codeparrot_training - Step 33201: {'lr': 0.0001364877962480577, 'samples': 16999424, 'steps': 33201, 'loss/train': 3.139294147491455} +02/25/2022 20:34:41 - INFO - codeparrot_training - Step 33202: {'lr': 0.00013647321791725254, 'samples': 16999936, 'steps': 33202, 'loss/train': 1.9846982955932617} +02/25/2022 20:34:47 - INFO - codeparrot_training - Step 33203: {'lr': 0.00013645864007275982, 'samples': 17000448, 'steps': 33203, 'loss/train': 1.9110839366912842} +02/25/2022 20:34:50 - INFO - codeparrot_training - Step 33204: {'lr': 0.00013644406271464234, 'samples': 17000960, 'steps': 33204, 'loss/train': 0.6278583407402039} +02/25/2022 20:34:56 - INFO - codeparrot_training - Step 33205: {'lr': 0.00013642948584296238, 'samples': 17001472, 'steps': 33205, 'loss/train': 1.556687593460083} +02/25/2022 20:35:00 - INFO - codeparrot_training - Step 33206: {'lr': 0.0001364149094577826, 'samples': 17001984, 'steps': 33206, 'loss/train': 2.284637451171875} +02/25/2022 20:35:05 - INFO - codeparrot_training - Step 33207: {'lr': 0.00013640033355916508, 'samples': 17002496, 'steps': 33207, 'loss/train': 0.92034912109375} +02/25/2022 20:35:09 - INFO - codeparrot_training - Step 33208: {'lr': 0.00013638575814717258, 'samples': 17003008, 'steps': 33208, 'loss/train': 0.9885058999061584} +02/25/2022 20:35:14 - INFO - codeparrot_training - Step 33209: {'lr': 0.00013637118322186732, 'samples': 17003520, 'steps': 33209, 'loss/train': 0.9888502359390259} +02/25/2022 20:35:18 - INFO - codeparrot_training - Step 33210: {'lr': 0.0001363566087833119, 'samples': 17004032, 'steps': 33210, 'loss/train': 1.9248392581939697} +02/25/2022 20:35:23 - INFO - codeparrot_training - Step 33211: {'lr': 0.00013634203483156872, 'samples': 17004544, 'steps': 33211, 'loss/train': 2.1137912273406982} +02/25/2022 20:35:27 - INFO - codeparrot_training - Step 33212: {'lr': 0.00013632746136670016, 'samples': 17005056, 'steps': 33212, 'loss/train': 1.825897455215454} +02/25/2022 20:35:32 - INFO - codeparrot_training - Step 33213: {'lr': 0.00013631288838876853, 'samples': 17005568, 'steps': 33213, 'loss/train': 1.3221735954284668} +02/25/2022 20:35:36 - INFO - codeparrot_training - Step 33214: {'lr': 0.0001362983158978365, 'samples': 17006080, 'steps': 33214, 'loss/train': 1.058686375617981} +02/25/2022 20:35:42 - INFO - codeparrot_training - Step 33215: {'lr': 0.0001362837438939664, 'samples': 17006592, 'steps': 33215, 'loss/train': 2.1753990650177} +02/25/2022 20:35:46 - INFO - codeparrot_training - Step 33216: {'lr': 0.0001362691723772206, 'samples': 17007104, 'steps': 33216, 'loss/train': 2.5073959827423096} +02/25/2022 20:35:51 - INFO - codeparrot_training - Step 33217: {'lr': 0.0001362546013476616, 'samples': 17007616, 'steps': 33217, 'loss/train': 1.8683409690856934} +02/25/2022 20:35:55 - INFO - codeparrot_training - Step 33218: {'lr': 0.00013624003080535163, 'samples': 17008128, 'steps': 33218, 'loss/train': 1.471221685409546} +02/25/2022 20:36:00 - INFO - codeparrot_training - Step 33219: {'lr': 0.00013622546075035336, 'samples': 17008640, 'steps': 33219, 'loss/train': 1.8549385070800781} +02/25/2022 20:36:04 - INFO - codeparrot_training - Step 33220: {'lr': 0.0001362108911827291, 'samples': 17009152, 'steps': 33220, 'loss/train': 1.9270386695861816} +02/25/2022 20:36:09 - INFO - codeparrot_training - Step 33221: {'lr': 0.00013619632210254125, 'samples': 17009664, 'steps': 33221, 'loss/train': 1.8155683279037476} +02/25/2022 20:36:13 - INFO - codeparrot_training - Step 33222: {'lr': 0.00013618175350985206, 'samples': 17010176, 'steps': 33222, 'loss/train': 0.5815594792366028} +02/25/2022 20:36:18 - INFO - codeparrot_training - Step 33223: {'lr': 0.00013616718540472426, 'samples': 17010688, 'steps': 33223, 'loss/train': 2.2447140216827393} +02/25/2022 20:36:22 - INFO - codeparrot_training - Step 33224: {'lr': 0.00013615261778722007, 'samples': 17011200, 'steps': 33224, 'loss/train': 1.4483602046966553} +02/25/2022 20:36:28 - INFO - codeparrot_training - Step 33225: {'lr': 0.00013613805065740191, 'samples': 17011712, 'steps': 33225, 'loss/train': 1.7261251211166382} +02/25/2022 20:36:31 - INFO - codeparrot_training - Step 33226: {'lr': 0.00013612348401533208, 'samples': 17012224, 'steps': 33226, 'loss/train': 2.24259090423584} +02/25/2022 20:36:37 - INFO - codeparrot_training - Step 33227: {'lr': 0.00013610891786107322, 'samples': 17012736, 'steps': 33227, 'loss/train': 1.4078744649887085} +02/25/2022 20:36:40 - INFO - codeparrot_training - Step 33228: {'lr': 0.00013609435219468757, 'samples': 17013248, 'steps': 33228, 'loss/train': 1.9138964414596558} +02/25/2022 20:36:46 - INFO - codeparrot_training - Step 33229: {'lr': 0.00013607978701623755, 'samples': 17013760, 'steps': 33229, 'loss/train': 0.8258975744247437} +02/25/2022 20:36:49 - INFO - codeparrot_training - Step 33230: {'lr': 0.00013606522232578543, 'samples': 17014272, 'steps': 33230, 'loss/train': 1.6112148761749268} +02/25/2022 20:36:55 - INFO - codeparrot_training - Step 33231: {'lr': 0.00013605065812339384, 'samples': 17014784, 'steps': 33231, 'loss/train': 1.1645756959915161} +02/25/2022 20:36:58 - INFO - codeparrot_training - Step 33232: {'lr': 0.00013603609440912507, 'samples': 17015296, 'steps': 33232, 'loss/train': 1.3849626779556274} +02/25/2022 20:37:04 - INFO - codeparrot_training - Step 33233: {'lr': 0.00013602153118304146, 'samples': 17015808, 'steps': 33233, 'loss/train': 1.0744203329086304} +02/25/2022 20:37:07 - INFO - codeparrot_training - Step 33234: {'lr': 0.00013600696844520528, 'samples': 17016320, 'steps': 33234, 'loss/train': 1.4633487462997437} +02/25/2022 20:37:14 - INFO - codeparrot_training - Step 33235: {'lr': 0.0001359924061956792, 'samples': 17016832, 'steps': 33235, 'loss/train': 1.346463680267334} +02/25/2022 20:37:17 - INFO - codeparrot_training - Step 33236: {'lr': 0.00013597784443452533, 'samples': 17017344, 'steps': 33236, 'loss/train': 2.6140599250793457} +02/25/2022 20:37:23 - INFO - codeparrot_training - Step 33237: {'lr': 0.00013596328316180638, 'samples': 17017856, 'steps': 33237, 'loss/train': 2.1065449714660645} +02/25/2022 20:37:26 - INFO - codeparrot_training - Step 33238: {'lr': 0.00013594872237758428, 'samples': 17018368, 'steps': 33238, 'loss/train': 1.4545146226882935} +02/25/2022 20:37:32 - INFO - codeparrot_training - Step 33239: {'lr': 0.00013593416208192177, 'samples': 17018880, 'steps': 33239, 'loss/train': 0.8511667251586914} +02/25/2022 20:37:35 - INFO - codeparrot_training - Step 33240: {'lr': 0.00013591960227488098, 'samples': 17019392, 'steps': 33240, 'loss/train': 0.42749881744384766} +02/25/2022 20:37:41 - INFO - codeparrot_training - Step 33241: {'lr': 0.00013590504295652462, 'samples': 17019904, 'steps': 33241, 'loss/train': 1.4729161262512207} +02/25/2022 20:37:44 - INFO - codeparrot_training - Step 33242: {'lr': 0.00013589048412691458, 'samples': 17020416, 'steps': 33242, 'loss/train': 1.772336483001709} +02/25/2022 20:37:50 - INFO - codeparrot_training - Step 33243: {'lr': 0.0001358759257861136, 'samples': 17020928, 'steps': 33243, 'loss/train': 1.1782811880111694} +02/25/2022 20:37:53 - INFO - codeparrot_training - Step 33244: {'lr': 0.0001358613679341838, 'samples': 17021440, 'steps': 33244, 'loss/train': 2.6781327724456787} +02/25/2022 20:37:59 - INFO - codeparrot_training - Step 33245: {'lr': 0.00013584681057118787, 'samples': 17021952, 'steps': 33245, 'loss/train': 1.1321033239364624} +02/25/2022 20:38:02 - INFO - codeparrot_training - Step 33246: {'lr': 0.00013583225369718778, 'samples': 17022464, 'steps': 33246, 'loss/train': 2.751540422439575} +02/25/2022 20:38:08 - INFO - codeparrot_training - Step 33247: {'lr': 0.0001358176973122461, 'samples': 17022976, 'steps': 33247, 'loss/train': 0.070212721824646} +02/25/2022 20:38:11 - INFO - codeparrot_training - Step 33248: {'lr': 0.00013580314141642508, 'samples': 17023488, 'steps': 33248, 'loss/train': 1.7476670742034912} +02/25/2022 20:38:17 - INFO - codeparrot_training - Step 33249: {'lr': 0.00013578858600978727, 'samples': 17024000, 'steps': 33249, 'loss/train': 1.8255046606063843} +02/25/2022 20:38:20 - INFO - codeparrot_training - Step 33250: {'lr': 0.00013577403109239484, 'samples': 17024512, 'steps': 33250, 'loss/train': 0.021790722385048866} +02/25/2022 20:38:27 - INFO - codeparrot_training - Step 33251: {'lr': 0.0001357594766643102, 'samples': 17025024, 'steps': 33251, 'loss/train': 2.06384539604187} +02/25/2022 20:38:30 - INFO - codeparrot_training - Step 33252: {'lr': 0.00013574492272559565, 'samples': 17025536, 'steps': 33252, 'loss/train': 2.180598258972168} +02/25/2022 20:38:36 - INFO - codeparrot_training - Step 33253: {'lr': 0.0001357303692763136, 'samples': 17026048, 'steps': 33253, 'loss/train': 1.595044732093811} +02/25/2022 20:38:39 - INFO - codeparrot_training - Step 33254: {'lr': 0.00013571581631652638, 'samples': 17026560, 'steps': 33254, 'loss/train': 0.8401834964752197} +02/25/2022 20:38:45 - INFO - codeparrot_training - Step 33255: {'lr': 0.00013570126384629633, 'samples': 17027072, 'steps': 33255, 'loss/train': 0.8191642165184021} +02/25/2022 20:38:48 - INFO - codeparrot_training - Step 33256: {'lr': 0.00013568671186568566, 'samples': 17027584, 'steps': 33256, 'loss/train': 2.2237672805786133} +02/25/2022 20:38:54 - INFO - codeparrot_training - Step 33257: {'lr': 0.0001356721603747569, 'samples': 17028096, 'steps': 33257, 'loss/train': 1.6509199142456055} +02/25/2022 20:38:57 - INFO - codeparrot_training - Step 33258: {'lr': 0.0001356576093735723, 'samples': 17028608, 'steps': 33258, 'loss/train': 1.8999238014221191} +02/25/2022 20:39:02 - INFO - codeparrot_training - Step 33259: {'lr': 0.00013564305886219424, 'samples': 17029120, 'steps': 33259, 'loss/train': 1.1543700695037842} +02/25/2022 20:39:06 - INFO - codeparrot_training - Step 33260: {'lr': 0.00013562850884068486, 'samples': 17029632, 'steps': 33260, 'loss/train': 0.4423770606517792} +02/25/2022 20:39:12 - INFO - codeparrot_training - Step 33261: {'lr': 0.00013561395930910674, 'samples': 17030144, 'steps': 33261, 'loss/train': 1.4676690101623535} +02/25/2022 20:39:16 - INFO - codeparrot_training - Step 33262: {'lr': 0.00013559941026752213, 'samples': 17030656, 'steps': 33262, 'loss/train': 1.2208727598190308} +02/25/2022 20:39:21 - INFO - codeparrot_training - Step 33263: {'lr': 0.00013558486171599327, 'samples': 17031168, 'steps': 33263, 'loss/train': 1.3609212636947632} +02/25/2022 20:39:24 - INFO - codeparrot_training - Step 33264: {'lr': 0.00013557031365458256, 'samples': 17031680, 'steps': 33264, 'loss/train': 1.631249189376831} +02/25/2022 20:39:30 - INFO - codeparrot_training - Step 33265: {'lr': 0.00013555576608335218, 'samples': 17032192, 'steps': 33265, 'loss/train': 1.5071861743927002} +02/25/2022 20:39:33 - INFO - codeparrot_training - Step 33266: {'lr': 0.00013554121900236465, 'samples': 17032704, 'steps': 33266, 'loss/train': 1.347674012184143} +02/25/2022 20:39:39 - INFO - codeparrot_training - Step 33267: {'lr': 0.00013552667241168223, 'samples': 17033216, 'steps': 33267, 'loss/train': 1.5077143907546997} +02/25/2022 20:39:43 - INFO - codeparrot_training - Step 33268: {'lr': 0.00013551212631136717, 'samples': 17033728, 'steps': 33268, 'loss/train': 2.0944697856903076} +02/25/2022 20:39:48 - INFO - codeparrot_training - Step 33269: {'lr': 0.0001354975807014817, 'samples': 17034240, 'steps': 33269, 'loss/train': 0.8736912608146667} +02/25/2022 20:39:52 - INFO - codeparrot_training - Step 33270: {'lr': 0.00013548303558208835, 'samples': 17034752, 'steps': 33270, 'loss/train': 2.776947259902954} +02/25/2022 20:39:58 - INFO - codeparrot_training - Step 33271: {'lr': 0.0001354684909532492, 'samples': 17035264, 'steps': 33271, 'loss/train': 1.0085524320602417} +02/25/2022 20:40:01 - INFO - codeparrot_training - Step 33272: {'lr': 0.00013545394681502689, 'samples': 17035776, 'steps': 33272, 'loss/train': 2.1047728061676025} +02/25/2022 20:40:07 - INFO - codeparrot_training - Step 33273: {'lr': 0.0001354394031674833, 'samples': 17036288, 'steps': 33273, 'loss/train': 1.6534430980682373} +02/25/2022 20:40:11 - INFO - codeparrot_training - Step 33274: {'lr': 0.000135424860010681, 'samples': 17036800, 'steps': 33274, 'loss/train': 3.855248212814331} +02/25/2022 20:40:17 - INFO - codeparrot_training - Step 33275: {'lr': 0.00013541031734468211, 'samples': 17037312, 'steps': 33275, 'loss/train': 1.2242116928100586} +02/25/2022 20:40:20 - INFO - codeparrot_training - Step 33276: {'lr': 0.00013539577516954925, 'samples': 17037824, 'steps': 33276, 'loss/train': 1.2425414323806763} +02/25/2022 20:40:26 - INFO - codeparrot_training - Step 33277: {'lr': 0.00013538123348534435, 'samples': 17038336, 'steps': 33277, 'loss/train': 1.962565541267395} +02/25/2022 20:40:29 - INFO - codeparrot_training - Step 33278: {'lr': 0.0001353666922921299, 'samples': 17038848, 'steps': 33278, 'loss/train': 1.3285073041915894} +02/25/2022 20:40:35 - INFO - codeparrot_training - Step 33279: {'lr': 0.00013535215158996805, 'samples': 17039360, 'steps': 33279, 'loss/train': 1.932297706604004} +02/25/2022 20:40:38 - INFO - codeparrot_training - Step 33280: {'lr': 0.00013533761137892136, 'samples': 17039872, 'steps': 33280, 'loss/train': 1.4528483152389526} +02/25/2022 20:40:44 - INFO - codeparrot_training - Step 33281: {'lr': 0.00013532307165905176, 'samples': 17040384, 'steps': 33281, 'loss/train': 2.297314405441284} +02/25/2022 20:40:48 - INFO - codeparrot_training - Step 33282: {'lr': 0.0001353085324304218, 'samples': 17040896, 'steps': 33282, 'loss/train': 1.5145633220672607} +02/25/2022 20:40:53 - INFO - codeparrot_training - Step 33283: {'lr': 0.00013529399369309353, 'samples': 17041408, 'steps': 33283, 'loss/train': 2.052212715148926} +02/25/2022 20:40:59 - INFO - codeparrot_training - Step 33284: {'lr': 0.0001352794554471295, 'samples': 17041920, 'steps': 33284, 'loss/train': 2.34114408493042} +02/25/2022 20:41:02 - INFO - codeparrot_training - Step 33285: {'lr': 0.00013526491769259185, 'samples': 17042432, 'steps': 33285, 'loss/train': 2.142836332321167} +02/25/2022 20:41:08 - INFO - codeparrot_training - Step 33286: {'lr': 0.00013525038042954288, 'samples': 17042944, 'steps': 33286, 'loss/train': 2.1459133625030518} +02/25/2022 20:41:11 - INFO - codeparrot_training - Step 33287: {'lr': 0.00013523584365804473, 'samples': 17043456, 'steps': 33287, 'loss/train': 2.2856240272521973} +02/25/2022 20:41:17 - INFO - codeparrot_training - Step 33288: {'lr': 0.00013522130737815988, 'samples': 17043968, 'steps': 33288, 'loss/train': 1.3184434175491333} +02/25/2022 20:41:20 - INFO - codeparrot_training - Step 33289: {'lr': 0.00013520677158995048, 'samples': 17044480, 'steps': 33289, 'loss/train': 2.2861545085906982} +02/25/2022 20:41:26 - INFO - codeparrot_training - Step 33290: {'lr': 0.00013519223629347889, 'samples': 17044992, 'steps': 33290, 'loss/train': 0.9833929538726807} +02/25/2022 20:41:29 - INFO - codeparrot_training - Step 33291: {'lr': 0.00013517770148880714, 'samples': 17045504, 'steps': 33291, 'loss/train': 1.1772944927215576} +02/25/2022 20:41:35 - INFO - codeparrot_training - Step 33292: {'lr': 0.00013516316717599776, 'samples': 17046016, 'steps': 33292, 'loss/train': 1.5721584558486938} +02/25/2022 20:41:38 - INFO - codeparrot_training - Step 33293: {'lr': 0.00013514863335511293, 'samples': 17046528, 'steps': 33293, 'loss/train': 1.83499014377594} +02/25/2022 20:41:44 - INFO - codeparrot_training - Step 33294: {'lr': 0.00013513410002621487, 'samples': 17047040, 'steps': 33294, 'loss/train': 1.3630462884902954} +02/25/2022 20:41:47 - INFO - codeparrot_training - Step 33295: {'lr': 0.00013511956718936574, 'samples': 17047552, 'steps': 33295, 'loss/train': 2.10624098777771} +02/25/2022 20:41:53 - INFO - codeparrot_training - Step 33296: {'lr': 0.00013510503484462805, 'samples': 17048064, 'steps': 33296, 'loss/train': 0.9538619518280029} +02/25/2022 20:41:57 - INFO - codeparrot_training - Step 33297: {'lr': 0.0001350905029920639, 'samples': 17048576, 'steps': 33297, 'loss/train': 1.5796390771865845} +02/25/2022 20:42:02 - INFO - codeparrot_training - Step 33298: {'lr': 0.0001350759716317355, 'samples': 17049088, 'steps': 33298, 'loss/train': 1.9168552160263062} +02/25/2022 20:42:06 - INFO - codeparrot_training - Step 33299: {'lr': 0.0001350614407637051, 'samples': 17049600, 'steps': 33299, 'loss/train': 2.140123128890991} +02/25/2022 20:42:11 - INFO - codeparrot_training - Step 33300: {'lr': 0.00013504691038803505, 'samples': 17050112, 'steps': 33300, 'loss/train': 0.7203414440155029} +02/25/2022 20:42:15 - INFO - codeparrot_training - Step 33301: {'lr': 0.00013503238050478755, 'samples': 17050624, 'steps': 33301, 'loss/train': 2.3966708183288574} +02/25/2022 20:42:20 - INFO - codeparrot_training - Step 33302: {'lr': 0.00013501785111402482, 'samples': 17051136, 'steps': 33302, 'loss/train': 1.9984310865402222} +02/25/2022 20:42:24 - INFO - codeparrot_training - Step 33303: {'lr': 0.00013500332221580902, 'samples': 17051648, 'steps': 33303, 'loss/train': 1.9739950895309448} +02/25/2022 20:42:29 - INFO - codeparrot_training - Step 33304: {'lr': 0.00013498879381020255, 'samples': 17052160, 'steps': 33304, 'loss/train': 1.1981984376907349} +02/25/2022 20:42:33 - INFO - codeparrot_training - Step 33305: {'lr': 0.00013497426589726758, 'samples': 17052672, 'steps': 33305, 'loss/train': 1.3749852180480957} +02/25/2022 20:42:39 - INFO - codeparrot_training - Step 33306: {'lr': 0.00013495973847706634, 'samples': 17053184, 'steps': 33306, 'loss/train': 0.14559879899024963} +02/25/2022 20:42:42 - INFO - codeparrot_training - Step 33307: {'lr': 0.00013494521154966093, 'samples': 17053696, 'steps': 33307, 'loss/train': 1.5454174280166626} +02/25/2022 20:42:48 - INFO - codeparrot_training - Step 33308: {'lr': 0.0001349306851151138, 'samples': 17054208, 'steps': 33308, 'loss/train': 0.7908239364624023} +02/25/2022 20:42:51 - INFO - codeparrot_training - Step 33309: {'lr': 0.0001349161591734871, 'samples': 17054720, 'steps': 33309, 'loss/train': 2.9263968467712402} +02/25/2022 20:42:57 - INFO - codeparrot_training - Step 33310: {'lr': 0.0001349016337248429, 'samples': 17055232, 'steps': 33310, 'loss/train': 2.235459566116333} +02/25/2022 20:43:00 - INFO - codeparrot_training - Step 33311: {'lr': 0.00013488710876924376, 'samples': 17055744, 'steps': 33311, 'loss/train': 1.7963942289352417} +02/25/2022 20:43:06 - INFO - codeparrot_training - Step 33312: {'lr': 0.00013487258430675152, 'samples': 17056256, 'steps': 33312, 'loss/train': 0.7760738134384155} +02/25/2022 20:43:09 - INFO - codeparrot_training - Step 33313: {'lr': 0.00013485806033742865, 'samples': 17056768, 'steps': 33313, 'loss/train': 0.8822535276412964} +02/25/2022 20:43:15 - INFO - codeparrot_training - Step 33314: {'lr': 0.00013484353686133717, 'samples': 17057280, 'steps': 33314, 'loss/train': 1.7650905847549438} +02/25/2022 20:43:18 - INFO - codeparrot_training - Step 33315: {'lr': 0.00013482901387853967, 'samples': 17057792, 'steps': 33315, 'loss/train': 2.11855411529541} +02/25/2022 20:43:25 - INFO - codeparrot_training - Step 33316: {'lr': 0.00013481449138909784, 'samples': 17058304, 'steps': 33316, 'loss/train': 1.1121846437454224} +02/25/2022 20:43:29 - INFO - codeparrot_training - Step 33317: {'lr': 0.00013479996939307428, 'samples': 17058816, 'steps': 33317, 'loss/train': 2.221791982650757} +02/25/2022 20:43:34 - INFO - codeparrot_training - Step 33318: {'lr': 0.00013478544789053098, 'samples': 17059328, 'steps': 33318, 'loss/train': 2.2506325244903564} +02/25/2022 20:43:38 - INFO - codeparrot_training - Step 33319: {'lr': 0.00013477092688153033, 'samples': 17059840, 'steps': 33319, 'loss/train': 1.9401451349258423} +02/25/2022 20:43:43 - INFO - codeparrot_training - Step 33320: {'lr': 0.00013475640636613446, 'samples': 17060352, 'steps': 33320, 'loss/train': 2.0966241359710693} +02/25/2022 20:43:47 - INFO - codeparrot_training - Step 33321: {'lr': 0.00013474188634440552, 'samples': 17060864, 'steps': 33321, 'loss/train': 1.7538509368896484} +02/25/2022 20:43:53 - INFO - codeparrot_training - Step 33322: {'lr': 0.00013472736681640563, 'samples': 17061376, 'steps': 33322, 'loss/train': 0.045642364770174026} +02/25/2022 20:43:56 - INFO - codeparrot_training - Step 33323: {'lr': 0.00013471284778219722, 'samples': 17061888, 'steps': 33323, 'loss/train': 2.314727306365967} +02/25/2022 20:44:02 - INFO - codeparrot_training - Step 33324: {'lr': 0.00013469832924184238, 'samples': 17062400, 'steps': 33324, 'loss/train': 2.310760259628296} +02/25/2022 20:44:05 - INFO - codeparrot_training - Step 33325: {'lr': 0.00013468381119540325, 'samples': 17062912, 'steps': 33325, 'loss/train': 0.10972163081169128} +02/25/2022 20:44:12 - INFO - codeparrot_training - Step 33326: {'lr': 0.000134669293642942, 'samples': 17063424, 'steps': 33326, 'loss/train': 1.5418028831481934} +02/25/2022 20:44:16 - INFO - codeparrot_training - Step 33327: {'lr': 0.00013465477658452092, 'samples': 17063936, 'steps': 33327, 'loss/train': 1.0209107398986816} +02/25/2022 20:44:21 - INFO - codeparrot_training - Step 33328: {'lr': 0.0001346402600202022, 'samples': 17064448, 'steps': 33328, 'loss/train': 2.125606060028076} +02/25/2022 20:44:25 - INFO - codeparrot_training - Step 33329: {'lr': 0.00013462574395004792, 'samples': 17064960, 'steps': 33329, 'loss/train': 1.652251958847046} +02/25/2022 20:44:30 - INFO - codeparrot_training - Step 33330: {'lr': 0.00013461122837412026, 'samples': 17065472, 'steps': 33330, 'loss/train': 1.472922682762146} +02/25/2022 20:44:34 - INFO - codeparrot_training - Step 33331: {'lr': 0.00013459671329248154, 'samples': 17065984, 'steps': 33331, 'loss/train': 0.14456412196159363} +02/25/2022 20:44:39 - INFO - codeparrot_training - Step 33332: {'lr': 0.00013458219870519377, 'samples': 17066496, 'steps': 33332, 'loss/train': 1.5198042392730713} +02/25/2022 20:44:43 - INFO - codeparrot_training - Step 33333: {'lr': 0.00013456768461231938, 'samples': 17067008, 'steps': 33333, 'loss/train': 2.345118522644043} +02/25/2022 20:44:48 - INFO - codeparrot_training - Step 33334: {'lr': 0.00013455317101392024, 'samples': 17067520, 'steps': 33334, 'loss/train': 1.6569749116897583} +02/25/2022 20:44:52 - INFO - codeparrot_training - Step 33335: {'lr': 0.0001345386579100587, 'samples': 17068032, 'steps': 33335, 'loss/train': 2.184098243713379} +02/25/2022 20:44:58 - INFO - codeparrot_training - Step 33336: {'lr': 0.0001345241453007968, 'samples': 17068544, 'steps': 33336, 'loss/train': 1.4604190587997437} +02/25/2022 20:45:02 - INFO - codeparrot_training - Step 33337: {'lr': 0.000134509633186197, 'samples': 17069056, 'steps': 33337, 'loss/train': 2.704925298690796} +02/25/2022 20:45:07 - INFO - codeparrot_training - Step 33338: {'lr': 0.00013449512156632105, 'samples': 17069568, 'steps': 33338, 'loss/train': 1.7093887329101562} +02/25/2022 20:45:11 - INFO - codeparrot_training - Step 33339: {'lr': 0.00013448061044123144, 'samples': 17070080, 'steps': 33339, 'loss/train': 1.4775205850601196} +02/25/2022 20:45:16 - INFO - codeparrot_training - Step 33340: {'lr': 0.0001344660998109901, 'samples': 17070592, 'steps': 33340, 'loss/train': 1.6309698820114136} +02/25/2022 20:45:20 - INFO - codeparrot_training - Step 33341: {'lr': 0.0001344515896756595, 'samples': 17071104, 'steps': 33341, 'loss/train': 1.8075356483459473} +02/25/2022 20:45:25 - INFO - codeparrot_training - Step 33342: {'lr': 0.00013443708003530138, 'samples': 17071616, 'steps': 33342, 'loss/train': 1.0016676187515259} +02/25/2022 20:45:29 - INFO - codeparrot_training - Step 33343: {'lr': 0.00013442257088997823, 'samples': 17072128, 'steps': 33343, 'loss/train': 1.301164150238037} +02/25/2022 20:45:34 - INFO - codeparrot_training - Step 33344: {'lr': 0.0001344080622397521, 'samples': 17072640, 'steps': 33344, 'loss/train': 1.7645366191864014} +02/25/2022 20:45:38 - INFO - codeparrot_training - Step 33345: {'lr': 0.00013439355408468502, 'samples': 17073152, 'steps': 33345, 'loss/train': 1.2862515449523926} +02/25/2022 20:45:43 - INFO - codeparrot_training - Step 33346: {'lr': 0.00013437904642483932, 'samples': 17073664, 'steps': 33346, 'loss/train': 2.600736141204834} +02/25/2022 20:45:47 - INFO - codeparrot_training - Step 33347: {'lr': 0.00013436453926027713, 'samples': 17074176, 'steps': 33347, 'loss/train': 1.1010692119598389} +02/25/2022 20:45:53 - INFO - codeparrot_training - Step 33348: {'lr': 0.00013435003259106048, 'samples': 17074688, 'steps': 33348, 'loss/train': 1.8911545276641846} +02/25/2022 20:45:56 - INFO - codeparrot_training - Step 33349: {'lr': 0.0001343355264172515, 'samples': 17075200, 'steps': 33349, 'loss/train': 1.6415482759475708} +02/25/2022 20:46:02 - INFO - codeparrot_training - Step 33350: {'lr': 0.0001343210207389125, 'samples': 17075712, 'steps': 33350, 'loss/train': 1.190313458442688} +02/25/2022 20:46:05 - INFO - codeparrot_training - Step 33351: {'lr': 0.00013430651555610548, 'samples': 17076224, 'steps': 33351, 'loss/train': 2.041227340698242} +02/25/2022 20:46:11 - INFO - codeparrot_training - Step 33352: {'lr': 0.00013429201086889264, 'samples': 17076736, 'steps': 33352, 'loss/train': 1.9330180883407593} +02/25/2022 20:46:14 - INFO - codeparrot_training - Step 33353: {'lr': 0.00013427750667733597, 'samples': 17077248, 'steps': 33353, 'loss/train': 1.4775339365005493} +02/25/2022 20:46:20 - INFO - codeparrot_training - Step 33354: {'lr': 0.00013426300298149783, 'samples': 17077760, 'steps': 33354, 'loss/train': 2.110402822494507} +02/25/2022 20:46:23 - INFO - codeparrot_training - Step 33355: {'lr': 0.00013424849978144022, 'samples': 17078272, 'steps': 33355, 'loss/train': 1.1555075645446777} +02/25/2022 20:46:29 - INFO - codeparrot_training - Step 33356: {'lr': 0.00013423399707722527, 'samples': 17078784, 'steps': 33356, 'loss/train': 1.735811710357666} +02/25/2022 20:46:32 - INFO - codeparrot_training - Step 33357: {'lr': 0.00013421949486891504, 'samples': 17079296, 'steps': 33357, 'loss/train': 3.14457631111145} +02/25/2022 20:46:38 - INFO - codeparrot_training - Step 33358: {'lr': 0.00013420499315657184, 'samples': 17079808, 'steps': 33358, 'loss/train': 1.6877319812774658} +02/25/2022 20:46:41 - INFO - codeparrot_training - Step 33359: {'lr': 0.00013419049194025764, 'samples': 17080320, 'steps': 33359, 'loss/train': 1.126420259475708} +02/25/2022 20:46:47 - INFO - codeparrot_training - Step 33360: {'lr': 0.00013417599122003462, 'samples': 17080832, 'steps': 33360, 'loss/train': 0.7469651699066162} +02/25/2022 20:46:50 - INFO - codeparrot_training - Step 33361: {'lr': 0.0001341614909959648, 'samples': 17081344, 'steps': 33361, 'loss/train': 1.1103416681289673} +02/25/2022 20:46:57 - INFO - codeparrot_training - Step 33362: {'lr': 0.00013414699126811043, 'samples': 17081856, 'steps': 33362, 'loss/train': 2.720579147338867} +02/25/2022 20:47:00 - INFO - codeparrot_training - Step 33363: {'lr': 0.0001341324920365336, 'samples': 17082368, 'steps': 33363, 'loss/train': 1.9618735313415527} +02/25/2022 20:47:06 - INFO - codeparrot_training - Step 33364: {'lr': 0.00013411799330129633, 'samples': 17082880, 'steps': 33364, 'loss/train': 2.219003200531006} +02/25/2022 20:47:09 - INFO - codeparrot_training - Step 33365: {'lr': 0.00013410349506246073, 'samples': 17083392, 'steps': 33365, 'loss/train': 1.4462428092956543} +02/25/2022 20:47:15 - INFO - codeparrot_training - Step 33366: {'lr': 0.00013408899732008902, 'samples': 17083904, 'steps': 33366, 'loss/train': 1.0601718425750732} +02/25/2022 20:47:18 - INFO - codeparrot_training - Step 33367: {'lr': 0.00013407450007424317, 'samples': 17084416, 'steps': 33367, 'loss/train': 8.165153503417969} +02/25/2022 20:47:24 - INFO - codeparrot_training - Step 33368: {'lr': 0.00013406000332498552, 'samples': 17084928, 'steps': 33368, 'loss/train': 1.720931887626648} +02/25/2022 20:47:27 - INFO - codeparrot_training - Step 33369: {'lr': 0.00013404550707237783, 'samples': 17085440, 'steps': 33369, 'loss/train': 1.3504197597503662} +02/25/2022 20:47:33 - INFO - codeparrot_training - Step 33370: {'lr': 0.00013403101131648245, 'samples': 17085952, 'steps': 33370, 'loss/train': 1.1937004327774048} +02/25/2022 20:47:36 - INFO - codeparrot_training - Step 33371: {'lr': 0.00013401651605736133, 'samples': 17086464, 'steps': 33371, 'loss/train': 3.131883144378662} +02/25/2022 20:47:42 - INFO - codeparrot_training - Step 33372: {'lr': 0.00013400202129507677, 'samples': 17086976, 'steps': 33372, 'loss/train': 0.11477970331907272} +02/25/2022 20:47:46 - INFO - codeparrot_training - Step 33373: {'lr': 0.00013398752702969055, 'samples': 17087488, 'steps': 33373, 'loss/train': 1.726823091506958} +02/25/2022 20:47:51 - INFO - codeparrot_training - Step 33374: {'lr': 0.00013397303326126504, 'samples': 17088000, 'steps': 33374, 'loss/train': 0.8116297125816345} +02/25/2022 20:47:55 - INFO - codeparrot_training - Step 33375: {'lr': 0.00013395853998986206, 'samples': 17088512, 'steps': 33375, 'loss/train': 1.0939476490020752} +02/25/2022 20:48:00 - INFO - codeparrot_training - Step 33376: {'lr': 0.0001339440472155441, 'samples': 17089024, 'steps': 33376, 'loss/train': 1.861535906791687} +02/25/2022 20:48:04 - INFO - codeparrot_training - Step 33377: {'lr': 0.00013392955493837279, 'samples': 17089536, 'steps': 33377, 'loss/train': 1.3148138523101807} +02/25/2022 20:48:10 - INFO - codeparrot_training - Step 33378: {'lr': 0.00013391506315841046, 'samples': 17090048, 'steps': 33378, 'loss/train': 0.2312527298927307} +02/25/2022 20:48:13 - INFO - codeparrot_training - Step 33379: {'lr': 0.0001339005718757191, 'samples': 17090560, 'steps': 33379, 'loss/train': 1.6715458631515503} +02/25/2022 20:48:19 - INFO - codeparrot_training - Step 33380: {'lr': 0.00013388608109036085, 'samples': 17091072, 'steps': 33380, 'loss/train': 2.4183127880096436} +02/25/2022 20:48:22 - INFO - codeparrot_training - Step 33381: {'lr': 0.00013387159080239781, 'samples': 17091584, 'steps': 33381, 'loss/train': 0.658007025718689} +02/25/2022 20:48:29 - INFO - codeparrot_training - Step 33382: {'lr': 0.00013385710101189197, 'samples': 17092096, 'steps': 33382, 'loss/train': 1.1469815969467163} +02/25/2022 20:48:32 - INFO - codeparrot_training - Step 33383: {'lr': 0.00013384261171890539, 'samples': 17092608, 'steps': 33383, 'loss/train': 1.5197700262069702} +02/25/2022 20:48:38 - INFO - codeparrot_training - Step 33384: {'lr': 0.00013382812292350022, 'samples': 17093120, 'steps': 33384, 'loss/train': 1.2824183702468872} +02/25/2022 20:48:41 - INFO - codeparrot_training - Step 33385: {'lr': 0.00013381363462573848, 'samples': 17093632, 'steps': 33385, 'loss/train': 0.02857513353228569} +02/25/2022 20:48:47 - INFO - codeparrot_training - Step 33386: {'lr': 0.0001337991468256823, 'samples': 17094144, 'steps': 33386, 'loss/train': 1.6670411825180054} +02/25/2022 20:48:50 - INFO - codeparrot_training - Step 33387: {'lr': 0.0001337846595233935, 'samples': 17094656, 'steps': 33387, 'loss/train': 1.1680760383605957} +02/25/2022 20:48:56 - INFO - codeparrot_training - Step 33388: {'lr': 0.00013377017271893444, 'samples': 17095168, 'steps': 33388, 'loss/train': 1.1745519638061523} +02/25/2022 20:48:59 - INFO - codeparrot_training - Step 33389: {'lr': 0.00013375568641236707, 'samples': 17095680, 'steps': 33389, 'loss/train': 1.0801969766616821} +02/25/2022 20:49:05 - INFO - codeparrot_training - Step 33390: {'lr': 0.0001337412006037534, 'samples': 17096192, 'steps': 33390, 'loss/train': 2.1362245082855225} +02/25/2022 20:49:08 - INFO - codeparrot_training - Step 33391: {'lr': 0.00013372671529315544, 'samples': 17096704, 'steps': 33391, 'loss/train': 1.218953013420105} +02/25/2022 20:49:14 - INFO - codeparrot_training - Step 33392: {'lr': 0.00013371223048063541, 'samples': 17097216, 'steps': 33392, 'loss/train': 1.3273156881332397} +02/25/2022 20:49:17 - INFO - codeparrot_training - Step 33393: {'lr': 0.00013369774616625525, 'samples': 17097728, 'steps': 33393, 'loss/train': 1.6569035053253174} +02/25/2022 20:49:24 - INFO - codeparrot_training - Step 33394: {'lr': 0.000133683262350077, 'samples': 17098240, 'steps': 33394, 'loss/train': 1.5534240007400513} +02/25/2022 20:49:27 - INFO - codeparrot_training - Step 33395: {'lr': 0.00013366877903216273, 'samples': 17098752, 'steps': 33395, 'loss/train': 2.520770311355591} +02/25/2022 20:49:33 - INFO - codeparrot_training - Step 33396: {'lr': 0.0001336542962125744, 'samples': 17099264, 'steps': 33396, 'loss/train': 2.2042407989501953} +02/25/2022 20:49:36 - INFO - codeparrot_training - Step 33397: {'lr': 0.0001336398138913742, 'samples': 17099776, 'steps': 33397, 'loss/train': 1.7514128684997559} +02/25/2022 20:49:42 - INFO - codeparrot_training - Step 33398: {'lr': 0.0001336253320686241, 'samples': 17100288, 'steps': 33398, 'loss/train': 1.811836838722229} +02/25/2022 20:49:45 - INFO - codeparrot_training - Step 33399: {'lr': 0.0001336108507443861, 'samples': 17100800, 'steps': 33399, 'loss/train': 0.68487948179245} +02/25/2022 20:49:51 - INFO - codeparrot_training - Step 33400: {'lr': 0.00013359636991872216, 'samples': 17101312, 'steps': 33400, 'loss/train': 2.6286821365356445} +02/25/2022 20:49:56 - INFO - codeparrot_training - Step 33401: {'lr': 0.00013358188959169452, 'samples': 17101824, 'steps': 33401, 'loss/train': 1.5611859560012817} +02/25/2022 20:50:00 - INFO - codeparrot_training - Step 33402: {'lr': 0.00013356740976336507, 'samples': 17102336, 'steps': 33402, 'loss/train': 1.8862221240997314} +02/25/2022 20:50:05 - INFO - codeparrot_training - Step 33403: {'lr': 0.0001335529304337959, 'samples': 17102848, 'steps': 33403, 'loss/train': 2.2275445461273193} +02/25/2022 20:50:09 - INFO - codeparrot_training - Step 33404: {'lr': 0.0001335384516030489, 'samples': 17103360, 'steps': 33404, 'loss/train': 1.8185585737228394} +02/25/2022 20:50:14 - INFO - codeparrot_training - Step 33405: {'lr': 0.00013352397327118625, 'samples': 17103872, 'steps': 33405, 'loss/train': 1.7012587785720825} +02/25/2022 20:50:18 - INFO - codeparrot_training - Step 33406: {'lr': 0.00013350949543826986, 'samples': 17104384, 'steps': 33406, 'loss/train': 0.965317964553833} +02/25/2022 20:50:24 - INFO - codeparrot_training - Step 33407: {'lr': 0.000133495018104362, 'samples': 17104896, 'steps': 33407, 'loss/train': 2.268836736679077} +02/25/2022 20:50:27 - INFO - codeparrot_training - Step 33408: {'lr': 0.00013348054126952427, 'samples': 17105408, 'steps': 33408, 'loss/train': 1.6885693073272705} +02/25/2022 20:50:33 - INFO - codeparrot_training - Step 33409: {'lr': 0.000133466064933819, 'samples': 17105920, 'steps': 33409, 'loss/train': 1.1213762760162354} +02/25/2022 20:50:37 - INFO - codeparrot_training - Step 33410: {'lr': 0.00013345158909730802, 'samples': 17106432, 'steps': 33410, 'loss/train': 2.3392863273620605} +02/25/2022 20:50:42 - INFO - codeparrot_training - Step 33411: {'lr': 0.00013343711376005363, 'samples': 17106944, 'steps': 33411, 'loss/train': 1.5026854276657104} +02/25/2022 20:50:45 - INFO - codeparrot_training - Step 33412: {'lr': 0.0001334226389221174, 'samples': 17107456, 'steps': 33412, 'loss/train': 1.0822640657424927} +02/25/2022 20:50:51 - INFO - codeparrot_training - Step 33413: {'lr': 0.00013340816458356168, 'samples': 17107968, 'steps': 33413, 'loss/train': 1.0653711557388306} +02/25/2022 20:50:54 - INFO - codeparrot_training - Step 33414: {'lr': 0.00013339369074444825, 'samples': 17108480, 'steps': 33414, 'loss/train': 2.0920493602752686} +02/25/2022 20:51:00 - INFO - codeparrot_training - Step 33415: {'lr': 0.00013337921740483945, 'samples': 17108992, 'steps': 33415, 'loss/train': 3.0703022480010986} +02/25/2022 20:51:03 - INFO - codeparrot_training - Step 33416: {'lr': 0.00013336474456479685, 'samples': 17109504, 'steps': 33416, 'loss/train': 2.0900466442108154} +02/25/2022 20:51:10 - INFO - codeparrot_training - Step 33417: {'lr': 0.00013335027222438273, 'samples': 17110016, 'steps': 33417, 'loss/train': 1.3814113140106201} +02/25/2022 20:51:13 - INFO - codeparrot_training - Step 33418: {'lr': 0.00013333580038365892, 'samples': 17110528, 'steps': 33418, 'loss/train': 2.204141616821289} +02/25/2022 20:51:19 - INFO - codeparrot_training - Step 33419: {'lr': 0.00013332132904268757, 'samples': 17111040, 'steps': 33419, 'loss/train': 0.8567627668380737} +02/25/2022 20:51:22 - INFO - codeparrot_training - Step 33420: {'lr': 0.0001333068582015306, 'samples': 17111552, 'steps': 33420, 'loss/train': 2.138132333755493} +02/25/2022 20:51:28 - INFO - codeparrot_training - Step 33421: {'lr': 0.00013329238786025002, 'samples': 17112064, 'steps': 33421, 'loss/train': 2.030482292175293} +02/25/2022 20:51:31 - INFO - codeparrot_training - Step 33422: {'lr': 0.0001332779180189077, 'samples': 17112576, 'steps': 33422, 'loss/train': 2.1376850605010986} +02/25/2022 20:51:37 - INFO - codeparrot_training - Step 33423: {'lr': 0.00013326344867756578, 'samples': 17113088, 'steps': 33423, 'loss/train': 1.081028938293457} +02/25/2022 20:51:40 - INFO - codeparrot_training - Step 33424: {'lr': 0.0001332489798362862, 'samples': 17113600, 'steps': 33424, 'loss/train': 2.4660117626190186} +02/25/2022 20:51:46 - INFO - codeparrot_training - Step 33425: {'lr': 0.00013323451149513092, 'samples': 17114112, 'steps': 33425, 'loss/train': 1.915223240852356} +02/25/2022 20:51:49 - INFO - codeparrot_training - Step 33426: {'lr': 0.0001332200436541618, 'samples': 17114624, 'steps': 33426, 'loss/train': 1.6988694667816162} +02/25/2022 20:51:55 - INFO - codeparrot_training - Step 33427: {'lr': 0.00013320557631344104, 'samples': 17115136, 'steps': 33427, 'loss/train': 1.6248199939727783} +02/25/2022 20:51:58 - INFO - codeparrot_training - Step 33428: {'lr': 0.00013319110947303047, 'samples': 17115648, 'steps': 33428, 'loss/train': 1.44893217086792} +02/25/2022 20:52:04 - INFO - codeparrot_training - Step 33429: {'lr': 0.00013317664313299216, 'samples': 17116160, 'steps': 33429, 'loss/train': 1.6165456771850586} +02/25/2022 20:52:08 - INFO - codeparrot_training - Step 33430: {'lr': 0.00013316217729338785, 'samples': 17116672, 'steps': 33430, 'loss/train': 0.09326335042715073} +02/25/2022 20:52:13 - INFO - codeparrot_training - Step 33431: {'lr': 0.00013314771195427984, 'samples': 17117184, 'steps': 33431, 'loss/train': 1.220788598060608} +02/25/2022 20:52:17 - INFO - codeparrot_training - Step 33432: {'lr': 0.00013313324711572988, 'samples': 17117696, 'steps': 33432, 'loss/train': 1.998371958732605} +02/25/2022 20:52:22 - INFO - codeparrot_training - Step 33433: {'lr': 0.00013311878277779999, 'samples': 17118208, 'steps': 33433, 'loss/train': 1.4682552814483643} +02/25/2022 20:52:26 - INFO - codeparrot_training - Step 33434: {'lr': 0.00013310431894055202, 'samples': 17118720, 'steps': 33434, 'loss/train': 0.6298272013664246} +02/25/2022 20:52:31 - INFO - codeparrot_training - Step 33435: {'lr': 0.00013308985560404817, 'samples': 17119232, 'steps': 33435, 'loss/train': 2.096583127975464} +02/25/2022 20:52:35 - INFO - codeparrot_training - Step 33436: {'lr': 0.00013307539276835017, 'samples': 17119744, 'steps': 33436, 'loss/train': 0.9860432147979736} +02/25/2022 20:52:40 - INFO - codeparrot_training - Step 33437: {'lr': 0.00013306093043352014, 'samples': 17120256, 'steps': 33437, 'loss/train': 1.5159927606582642} +02/25/2022 20:52:44 - INFO - codeparrot_training - Step 33438: {'lr': 0.00013304646859961983, 'samples': 17120768, 'steps': 33438, 'loss/train': 2.343961238861084} +02/25/2022 20:52:50 - INFO - codeparrot_training - Step 33439: {'lr': 0.0001330320072667114, 'samples': 17121280, 'steps': 33439, 'loss/train': 1.0366311073303223} +02/25/2022 20:52:54 - INFO - codeparrot_training - Step 33440: {'lr': 0.0001330175464348567, 'samples': 17121792, 'steps': 33440, 'loss/train': 2.18906569480896} +02/25/2022 20:52:59 - INFO - codeparrot_training - Step 33441: {'lr': 0.0001330030861041176, 'samples': 17122304, 'steps': 33441, 'loss/train': 2.3539037704467773} +02/25/2022 20:53:02 - INFO - codeparrot_training - Step 33442: {'lr': 0.0001329886262745563, 'samples': 17122816, 'steps': 33442, 'loss/train': 1.4886040687561035} +02/25/2022 20:53:08 - INFO - codeparrot_training - Step 33443: {'lr': 0.0001329741669462344, 'samples': 17123328, 'steps': 33443, 'loss/train': 1.362316608428955} +02/25/2022 20:53:11 - INFO - codeparrot_training - Step 33444: {'lr': 0.0001329597081192141, 'samples': 17123840, 'steps': 33444, 'loss/train': 1.4776610136032104} +02/25/2022 20:53:17 - INFO - codeparrot_training - Step 33445: {'lr': 0.00013294524979355716, 'samples': 17124352, 'steps': 33445, 'loss/train': 1.8137837648391724} +02/25/2022 20:53:20 - INFO - codeparrot_training - Step 33446: {'lr': 0.00013293079196932578, 'samples': 17124864, 'steps': 33446, 'loss/train': 1.231760859489441} +02/25/2022 20:53:26 - INFO - codeparrot_training - Step 33447: {'lr': 0.00013291633464658154, 'samples': 17125376, 'steps': 33447, 'loss/train': 1.477229356765747} +02/25/2022 20:53:30 - INFO - codeparrot_training - Step 33448: {'lr': 0.00013290187782538662, 'samples': 17125888, 'steps': 33448, 'loss/train': 1.839331865310669} +02/25/2022 20:53:35 - INFO - codeparrot_training - Step 33449: {'lr': 0.0001328874215058028, 'samples': 17126400, 'steps': 33449, 'loss/train': 2.200927257537842} +02/25/2022 20:53:39 - INFO - codeparrot_training - Step 33450: {'lr': 0.00013287296568789225, 'samples': 17126912, 'steps': 33450, 'loss/train': 1.520546317100525} +02/25/2022 20:53:44 - INFO - codeparrot_training - Step 33451: {'lr': 0.00013285851037171654, 'samples': 17127424, 'steps': 33451, 'loss/train': 2.7835500240325928} +02/25/2022 20:53:48 - INFO - codeparrot_training - Step 33452: {'lr': 0.00013284405555733785, 'samples': 17127936, 'steps': 33452, 'loss/train': 2.5864064693450928} +02/25/2022 20:53:53 - INFO - codeparrot_training - Step 33453: {'lr': 0.00013282960124481793, 'samples': 17128448, 'steps': 33453, 'loss/train': 2.0206918716430664} +02/25/2022 20:53:57 - INFO - codeparrot_training - Step 33454: {'lr': 0.0001328151474342189, 'samples': 17128960, 'steps': 33454, 'loss/train': 1.7264811992645264} +02/25/2022 20:54:03 - INFO - codeparrot_training - Step 33455: {'lr': 0.0001328006941256026, 'samples': 17129472, 'steps': 33455, 'loss/train': 2.172121524810791} +02/25/2022 20:54:07 - INFO - codeparrot_training - Step 33456: {'lr': 0.00013278624131903086, 'samples': 17129984, 'steps': 33456, 'loss/train': 1.7189058065414429} +02/25/2022 20:54:12 - INFO - codeparrot_training - Step 33457: {'lr': 0.00013277178901456558, 'samples': 17130496, 'steps': 33457, 'loss/train': 1.3464924097061157} +02/25/2022 20:54:15 - INFO - codeparrot_training - Step 33458: {'lr': 0.0001327573372122688, 'samples': 17131008, 'steps': 33458, 'loss/train': 1.9749528169631958} +02/25/2022 20:54:21 - INFO - codeparrot_training - Step 33459: {'lr': 0.00013274288591220241, 'samples': 17131520, 'steps': 33459, 'loss/train': 0.8323876857757568} +02/25/2022 20:54:24 - INFO - codeparrot_training - Step 33460: {'lr': 0.00013272843511442822, 'samples': 17132032, 'steps': 33460, 'loss/train': 1.2661223411560059} +02/25/2022 20:54:30 - INFO - codeparrot_training - Step 33461: {'lr': 0.00013271398481900807, 'samples': 17132544, 'steps': 33461, 'loss/train': 2.106449604034424} +02/25/2022 20:54:33 - INFO - codeparrot_training - Step 33462: {'lr': 0.00013269953502600406, 'samples': 17133056, 'steps': 33462, 'loss/train': 1.5974721908569336} +02/25/2022 20:54:39 - INFO - codeparrot_training - Step 33463: {'lr': 0.00013268508573547805, 'samples': 17133568, 'steps': 33463, 'loss/train': 8.676044464111328} +02/25/2022 20:54:42 - INFO - codeparrot_training - Step 33464: {'lr': 0.0001326706369474918, 'samples': 17134080, 'steps': 33464, 'loss/train': 2.2265353202819824} +02/25/2022 20:54:49 - INFO - codeparrot_training - Step 33465: {'lr': 0.00013265618866210727, 'samples': 17134592, 'steps': 33465, 'loss/train': 1.908743143081665} +02/25/2022 20:54:52 - INFO - codeparrot_training - Step 33466: {'lr': 0.00013264174087938642, 'samples': 17135104, 'steps': 33466, 'loss/train': 2.4594006538391113} +02/25/2022 20:54:58 - INFO - codeparrot_training - Step 33467: {'lr': 0.00013262729359939102, 'samples': 17135616, 'steps': 33467, 'loss/train': 1.798116683959961} +02/25/2022 20:55:01 - INFO - codeparrot_training - Step 33468: {'lr': 0.0001326128468221832, 'samples': 17136128, 'steps': 33468, 'loss/train': 2.2674553394317627} +02/25/2022 20:55:07 - INFO - codeparrot_training - Step 33469: {'lr': 0.00013259840054782445, 'samples': 17136640, 'steps': 33469, 'loss/train': 1.3644338846206665} +02/25/2022 20:55:10 - INFO - codeparrot_training - Step 33470: {'lr': 0.000132583954776377, 'samples': 17137152, 'steps': 33470, 'loss/train': 1.0547000169754028} +02/25/2022 20:55:16 - INFO - codeparrot_training - Step 33471: {'lr': 0.0001325695095079025, 'samples': 17137664, 'steps': 33471, 'loss/train': 1.966468334197998} +02/25/2022 20:55:19 - INFO - codeparrot_training - Step 33472: {'lr': 0.00013255506474246312, 'samples': 17138176, 'steps': 33472, 'loss/train': 0.9320878982543945} +02/25/2022 20:55:25 - INFO - codeparrot_training - Step 33473: {'lr': 0.00013254062048012038, 'samples': 17138688, 'steps': 33473, 'loss/train': 1.82168710231781} +02/25/2022 20:55:28 - INFO - codeparrot_training - Step 33474: {'lr': 0.00013252617672093639, 'samples': 17139200, 'steps': 33474, 'loss/train': 1.3933069705963135} +02/25/2022 20:55:34 - INFO - codeparrot_training - Step 33475: {'lr': 0.00013251173346497298, 'samples': 17139712, 'steps': 33475, 'loss/train': 1.382158637046814} +02/25/2022 20:55:38 - INFO - codeparrot_training - Step 33476: {'lr': 0.0001324972907122919, 'samples': 17140224, 'steps': 33476, 'loss/train': 1.639510989189148} +02/25/2022 20:55:43 - INFO - codeparrot_training - Step 33477: {'lr': 0.0001324828484629552, 'samples': 17140736, 'steps': 33477, 'loss/train': 0.8593530058860779} +02/25/2022 20:55:47 - INFO - codeparrot_training - Step 33478: {'lr': 0.00013246840671702465, 'samples': 17141248, 'steps': 33478, 'loss/train': 2.601548910140991} +02/25/2022 20:55:52 - INFO - codeparrot_training - Step 33479: {'lr': 0.00013245396547456213, 'samples': 17141760, 'steps': 33479, 'loss/train': 1.5943324565887451} +02/25/2022 20:55:56 - INFO - codeparrot_training - Step 33480: {'lr': 0.00013243952473562942, 'samples': 17142272, 'steps': 33480, 'loss/train': 1.4353269338607788} +02/25/2022 20:56:01 - INFO - codeparrot_training - Step 33481: {'lr': 0.00013242508450028853, 'samples': 17142784, 'steps': 33481, 'loss/train': 0.19118362665176392} +02/25/2022 20:56:05 - INFO - codeparrot_training - Step 33482: {'lr': 0.00013241064476860126, 'samples': 17143296, 'steps': 33482, 'loss/train': 1.215999722480774} +02/25/2022 20:56:10 - INFO - codeparrot_training - Step 33483: {'lr': 0.00013239620554062945, 'samples': 17143808, 'steps': 33483, 'loss/train': 1.8257240056991577} +02/25/2022 20:56:14 - INFO - codeparrot_training - Step 33484: {'lr': 0.00013238176681643486, 'samples': 17144320, 'steps': 33484, 'loss/train': 2.3788487911224365} +02/25/2022 20:56:20 - INFO - codeparrot_training - Step 33485: {'lr': 0.00013236732859607953, 'samples': 17144832, 'steps': 33485, 'loss/train': 1.8935496807098389} +02/25/2022 20:56:23 - INFO - codeparrot_training - Step 33486: {'lr': 0.0001323528908796252, 'samples': 17145344, 'steps': 33486, 'loss/train': 2.148883581161499} +02/25/2022 20:56:29 - INFO - codeparrot_training - Step 33487: {'lr': 0.00013233845366713372, 'samples': 17145856, 'steps': 33487, 'loss/train': 1.2525650262832642} +02/25/2022 20:56:32 - INFO - codeparrot_training - Step 33488: {'lr': 0.00013232401695866685, 'samples': 17146368, 'steps': 33488, 'loss/train': 1.4463725090026855} +02/25/2022 20:56:38 - INFO - codeparrot_training - Step 33489: {'lr': 0.00013230958075428662, 'samples': 17146880, 'steps': 33489, 'loss/train': 1.3483624458312988} +02/25/2022 20:56:41 - INFO - codeparrot_training - Step 33490: {'lr': 0.00013229514505405478, 'samples': 17147392, 'steps': 33490, 'loss/train': 2.2947733402252197} +02/25/2022 20:56:47 - INFO - codeparrot_training - Step 33491: {'lr': 0.00013228070985803317, 'samples': 17147904, 'steps': 33491, 'loss/train': 2.344132423400879} +02/25/2022 20:56:50 - INFO - codeparrot_training - Step 33492: {'lr': 0.00013226627516628353, 'samples': 17148416, 'steps': 33492, 'loss/train': 0.8506695628166199} +02/25/2022 20:56:56 - INFO - codeparrot_training - Step 33493: {'lr': 0.00013225184097886785, 'samples': 17148928, 'steps': 33493, 'loss/train': 2.137237787246704} +02/25/2022 20:56:59 - INFO - codeparrot_training - Step 33494: {'lr': 0.00013223740729584793, 'samples': 17149440, 'steps': 33494, 'loss/train': 1.5528056621551514} +02/25/2022 20:57:05 - INFO - codeparrot_training - Step 33495: {'lr': 0.00013222297411728554, 'samples': 17149952, 'steps': 33495, 'loss/train': 1.4121397733688354} +02/25/2022 20:57:08 - INFO - codeparrot_training - Step 33496: {'lr': 0.00013220854144324243, 'samples': 17150464, 'steps': 33496, 'loss/train': 1.4583725929260254} +02/25/2022 20:57:14 - INFO - codeparrot_training - Step 33497: {'lr': 0.00013219410927378062, 'samples': 17150976, 'steps': 33497, 'loss/train': 1.376801609992981} +02/25/2022 20:57:17 - INFO - codeparrot_training - Step 33498: {'lr': 0.00013217967760896187, 'samples': 17151488, 'steps': 33498, 'loss/train': 1.8423397541046143} +02/25/2022 20:57:23 - INFO - codeparrot_training - Step 33499: {'lr': 0.0001321652464488479, 'samples': 17152000, 'steps': 33499, 'loss/train': 0.0896710753440857} +02/25/2022 20:57:26 - INFO - codeparrot_training - Step 33500: {'lr': 0.00013215081579350058, 'samples': 17152512, 'steps': 33500, 'loss/train': 1.9780653715133667} +02/25/2022 20:57:33 - INFO - codeparrot_training - Step 33501: {'lr': 0.0001321363856429818, 'samples': 17153024, 'steps': 33501, 'loss/train': 1.430535078048706} +02/25/2022 20:57:37 - INFO - codeparrot_training - Step 33502: {'lr': 0.00013212195599735324, 'samples': 17153536, 'steps': 33502, 'loss/train': 0.815642774105072} +02/25/2022 20:57:42 - INFO - codeparrot_training - Step 33503: {'lr': 0.00013210752685667698, 'samples': 17154048, 'steps': 33503, 'loss/train': 0.4819067120552063} +02/25/2022 20:57:46 - INFO - codeparrot_training - Step 33504: {'lr': 0.00013209309822101445, 'samples': 17154560, 'steps': 33504, 'loss/train': 1.550699234008789} +02/25/2022 20:57:51 - INFO - codeparrot_training - Step 33505: {'lr': 0.00013207867009042774, 'samples': 17155072, 'steps': 33505, 'loss/train': 0.050010789185762405} +02/25/2022 20:57:55 - INFO - codeparrot_training - Step 33506: {'lr': 0.00013206424246497845, 'samples': 17155584, 'steps': 33506, 'loss/train': 2.055128335952759} +02/25/2022 20:58:00 - INFO - codeparrot_training - Step 33507: {'lr': 0.0001320498153447287, 'samples': 17156096, 'steps': 33507, 'loss/train': 1.144755244255066} +02/25/2022 20:58:06 - INFO - codeparrot_training - Step 33508: {'lr': 0.00013203538872973988, 'samples': 17156608, 'steps': 33508, 'loss/train': 1.7556042671203613} +02/25/2022 20:58:09 - INFO - codeparrot_training - Step 33509: {'lr': 0.00013202096262007412, 'samples': 17157120, 'steps': 33509, 'loss/train': 0.9784806966781616} +02/25/2022 20:58:13 - INFO - codeparrot_training - Step 33510: {'lr': 0.00013200653701579297, 'samples': 17157632, 'steps': 33510, 'loss/train': 2.0428335666656494} +02/25/2022 20:58:20 - INFO - codeparrot_training - Step 33511: {'lr': 0.00013199211191695858, 'samples': 17158144, 'steps': 33511, 'loss/train': 1.5901081562042236} +02/25/2022 20:58:25 - INFO - codeparrot_training - Step 33512: {'lr': 0.0001319776873236323, 'samples': 17158656, 'steps': 33512, 'loss/train': 2.3524787425994873} +02/25/2022 20:58:28 - INFO - codeparrot_training - Step 33513: {'lr': 0.0001319632632358762, 'samples': 17159168, 'steps': 33513, 'loss/train': 1.2886229753494263} +02/25/2022 20:58:34 - INFO - codeparrot_training - Step 33514: {'lr': 0.00013194883965375194, 'samples': 17159680, 'steps': 33514, 'loss/train': 2.036590337753296} +02/25/2022 20:58:38 - INFO - codeparrot_training - Step 33515: {'lr': 0.0001319344165773214, 'samples': 17160192, 'steps': 33515, 'loss/train': 1.3453980684280396} +02/25/2022 20:58:43 - INFO - codeparrot_training - Step 33516: {'lr': 0.0001319199940066464, 'samples': 17160704, 'steps': 33516, 'loss/train': 1.2420707941055298} +02/25/2022 20:58:46 - INFO - codeparrot_training - Step 33517: {'lr': 0.00013190557194178864, 'samples': 17161216, 'steps': 33517, 'loss/train': 1.217384934425354} +02/25/2022 20:58:52 - INFO - codeparrot_training - Step 33518: {'lr': 0.0001318911503828098, 'samples': 17161728, 'steps': 33518, 'loss/train': 1.4960331916809082} +02/25/2022 20:58:55 - INFO - codeparrot_training - Step 33519: {'lr': 0.00013187672932977185, 'samples': 17162240, 'steps': 33519, 'loss/train': 1.9133321046829224} +02/25/2022 20:59:02 - INFO - codeparrot_training - Step 33520: {'lr': 0.00013186230878273653, 'samples': 17162752, 'steps': 33520, 'loss/train': 2.5817580223083496} +02/25/2022 20:59:06 - INFO - codeparrot_training - Step 33521: {'lr': 0.0001318478887417655, 'samples': 17163264, 'steps': 33521, 'loss/train': 2.210131883621216} +02/25/2022 20:59:11 - INFO - codeparrot_training - Step 33522: {'lr': 0.00013183346920692067, 'samples': 17163776, 'steps': 33522, 'loss/train': 1.3452799320220947} +02/25/2022 20:59:15 - INFO - codeparrot_training - Step 33523: {'lr': 0.00013181905017826357, 'samples': 17164288, 'steps': 33523, 'loss/train': 1.9927799701690674} +02/25/2022 20:59:20 - INFO - codeparrot_training - Step 33524: {'lr': 0.00013180463165585627, 'samples': 17164800, 'steps': 33524, 'loss/train': 0.8942004442214966} +02/25/2022 20:59:24 - INFO - codeparrot_training - Step 33525: {'lr': 0.0001317902136397604, 'samples': 17165312, 'steps': 33525, 'loss/train': 1.6530109643936157} +02/25/2022 20:59:29 - INFO - codeparrot_training - Step 33526: {'lr': 0.0001317757961300377, 'samples': 17165824, 'steps': 33526, 'loss/train': 1.8047325611114502} +02/25/2022 20:59:33 - INFO - codeparrot_training - Step 33527: {'lr': 0.00013176137912674984, 'samples': 17166336, 'steps': 33527, 'loss/train': 1.8546675443649292} +02/25/2022 20:59:38 - INFO - codeparrot_training - Step 33528: {'lr': 0.00013174696262995884, 'samples': 17166848, 'steps': 33528, 'loss/train': 2.9564239978790283} +02/25/2022 20:59:41 - INFO - codeparrot_training - Step 33529: {'lr': 0.00013173254663972623, 'samples': 17167360, 'steps': 33529, 'loss/train': 1.8618310689926147} +02/25/2022 20:59:48 - INFO - codeparrot_training - Step 33530: {'lr': 0.00013171813115611387, 'samples': 17167872, 'steps': 33530, 'loss/train': 1.823944330215454} +02/25/2022 20:59:51 - INFO - codeparrot_training - Step 33531: {'lr': 0.0001317037161791834, 'samples': 17168384, 'steps': 33531, 'loss/train': 1.596149206161499} +02/25/2022 20:59:57 - INFO - codeparrot_training - Step 33532: {'lr': 0.00013168930170899673, 'samples': 17168896, 'steps': 33532, 'loss/train': 2.4969022274017334} +02/25/2022 21:00:00 - INFO - codeparrot_training - Step 33533: {'lr': 0.00013167488774561552, 'samples': 17169408, 'steps': 33533, 'loss/train': 0.9365255236625671} +02/25/2022 21:00:06 - INFO - codeparrot_training - Step 33534: {'lr': 0.0001316604742891015, 'samples': 17169920, 'steps': 33534, 'loss/train': 0.6567209362983704} +02/25/2022 21:00:09 - INFO - codeparrot_training - Step 33535: {'lr': 0.00013164606133951633, 'samples': 17170432, 'steps': 33535, 'loss/train': 2.3283274173736572} +02/25/2022 21:00:15 - INFO - codeparrot_training - Step 33536: {'lr': 0.00013163164889692198, 'samples': 17170944, 'steps': 33536, 'loss/train': 3.23056960105896} +02/25/2022 21:00:18 - INFO - codeparrot_training - Step 33537: {'lr': 0.00013161723696137996, 'samples': 17171456, 'steps': 33537, 'loss/train': 1.2566150426864624} +02/25/2022 21:00:24 - INFO - codeparrot_training - Step 33538: {'lr': 0.0001316028255329523, 'samples': 17171968, 'steps': 33538, 'loss/train': 1.3540847301483154} +02/25/2022 21:00:27 - INFO - codeparrot_training - Step 33539: {'lr': 0.00013158841461170033, 'samples': 17172480, 'steps': 33539, 'loss/train': 0.04504428058862686} +02/25/2022 21:00:33 - INFO - codeparrot_training - Step 33540: {'lr': 0.00013157400419768611, 'samples': 17172992, 'steps': 33540, 'loss/train': 2.397364377975464} +02/25/2022 21:00:36 - INFO - codeparrot_training - Step 33541: {'lr': 0.00013155959429097115, 'samples': 17173504, 'steps': 33541, 'loss/train': 0.8878464102745056} +02/25/2022 21:00:42 - INFO - codeparrot_training - Step 33542: {'lr': 0.00013154518489161743, 'samples': 17174016, 'steps': 33542, 'loss/train': 2.290039300918579} +02/25/2022 21:00:45 - INFO - codeparrot_training - Step 33543: {'lr': 0.0001315307759996864, 'samples': 17174528, 'steps': 33543, 'loss/train': 2.1497087478637695} +02/25/2022 21:00:51 - INFO - codeparrot_training - Step 33544: {'lr': 0.00013151636761523993, 'samples': 17175040, 'steps': 33544, 'loss/train': 2.8595519065856934} +02/25/2022 21:00:55 - INFO - codeparrot_training - Step 33545: {'lr': 0.00013150195973833968, 'samples': 17175552, 'steps': 33545, 'loss/train': 2.3550238609313965} +02/25/2022 21:01:01 - INFO - codeparrot_training - Step 33546: {'lr': 0.0001314875523690476, 'samples': 17176064, 'steps': 33546, 'loss/train': 2.2303035259246826} +02/25/2022 21:01:04 - INFO - codeparrot_training - Step 33547: {'lr': 0.00013147314550742502, 'samples': 17176576, 'steps': 33547, 'loss/train': 1.4370452165603638} +02/25/2022 21:01:10 - INFO - codeparrot_training - Step 33548: {'lr': 0.0001314587391535339, 'samples': 17177088, 'steps': 33548, 'loss/train': 1.084684133529663} +02/25/2022 21:01:13 - INFO - codeparrot_training - Step 33549: {'lr': 0.00013144433330743583, 'samples': 17177600, 'steps': 33549, 'loss/train': 1.811046838760376} +02/25/2022 21:01:19 - INFO - codeparrot_training - Step 33550: {'lr': 0.00013142992796919266, 'samples': 17178112, 'steps': 33550, 'loss/train': 0.8219271302223206} +02/25/2022 21:01:22 - INFO - codeparrot_training - Step 33551: {'lr': 0.00013141552313886606, 'samples': 17178624, 'steps': 33551, 'loss/train': 0.722046971321106} +02/25/2022 21:01:28 - INFO - codeparrot_training - Step 33552: {'lr': 0.00013140111881651773, 'samples': 17179136, 'steps': 33552, 'loss/train': 1.7162096500396729} +02/25/2022 21:01:31 - INFO - codeparrot_training - Step 33553: {'lr': 0.00013138671500220923, 'samples': 17179648, 'steps': 33553, 'loss/train': 1.9073817729949951} +02/25/2022 21:01:37 - INFO - codeparrot_training - Step 33554: {'lr': 0.00013137231169600244, 'samples': 17180160, 'steps': 33554, 'loss/train': 1.2484458684921265} +02/25/2022 21:01:40 - INFO - codeparrot_training - Step 33555: {'lr': 0.00013135790889795903, 'samples': 17180672, 'steps': 33555, 'loss/train': 1.7559723854064941} +02/25/2022 21:01:46 - INFO - codeparrot_training - Step 33556: {'lr': 0.00013134350660814066, 'samples': 17181184, 'steps': 33556, 'loss/train': 2.2459285259246826} +02/25/2022 21:01:50 - INFO - codeparrot_training - Step 33557: {'lr': 0.00013132910482660892, 'samples': 17181696, 'steps': 33557, 'loss/train': 2.262085437774658} +02/25/2022 21:01:55 - INFO - codeparrot_training - Step 33558: {'lr': 0.00013131470355342572, 'samples': 17182208, 'steps': 33558, 'loss/train': 1.6074315309524536} +02/25/2022 21:01:59 - INFO - codeparrot_training - Step 33559: {'lr': 0.00013130030278865268, 'samples': 17182720, 'steps': 33559, 'loss/train': 1.5473116636276245} +02/25/2022 21:02:04 - INFO - codeparrot_training - Step 33560: {'lr': 0.0001312859025323514, 'samples': 17183232, 'steps': 33560, 'loss/train': 1.608634352684021} +02/25/2022 21:02:08 - INFO - codeparrot_training - Step 33561: {'lr': 0.00013127150278458355, 'samples': 17183744, 'steps': 33561, 'loss/train': 2.049781560897827} +02/25/2022 21:02:13 - INFO - codeparrot_training - Step 33562: {'lr': 0.00013125710354541095, 'samples': 17184256, 'steps': 33562, 'loss/train': 1.3477325439453125} +02/25/2022 21:02:19 - INFO - codeparrot_training - Step 33563: {'lr': 0.00013124270481489525, 'samples': 17184768, 'steps': 33563, 'loss/train': 2.2567009925842285} +02/25/2022 21:02:22 - INFO - codeparrot_training - Step 33564: {'lr': 0.00013122830659309808, 'samples': 17185280, 'steps': 33564, 'loss/train': 1.0545082092285156} +02/25/2022 21:02:28 - INFO - codeparrot_training - Step 33565: {'lr': 0.000131213908880081, 'samples': 17185792, 'steps': 33565, 'loss/train': 1.6416910886764526} +02/25/2022 21:02:31 - INFO - codeparrot_training - Step 33566: {'lr': 0.00013119951167590594, 'samples': 17186304, 'steps': 33566, 'loss/train': 1.9051103591918945} +02/25/2022 21:02:38 - INFO - codeparrot_training - Step 33567: {'lr': 0.00013118511498063447, 'samples': 17186816, 'steps': 33567, 'loss/train': 0.4766671359539032} +02/25/2022 21:02:41 - INFO - codeparrot_training - Step 33568: {'lr': 0.00013117071879432818, 'samples': 17187328, 'steps': 33568, 'loss/train': 1.9449372291564941} +02/25/2022 21:02:47 - INFO - codeparrot_training - Step 33569: {'lr': 0.00013115632311704883, 'samples': 17187840, 'steps': 33569, 'loss/train': 1.583652138710022} +02/25/2022 21:02:50 - INFO - codeparrot_training - Step 33570: {'lr': 0.00013114192794885798, 'samples': 17188352, 'steps': 33570, 'loss/train': 1.9384287595748901} +02/25/2022 21:02:56 - INFO - codeparrot_training - Step 33571: {'lr': 0.00013112753328981748, 'samples': 17188864, 'steps': 33571, 'loss/train': 1.3240394592285156} +02/25/2022 21:02:59 - INFO - codeparrot_training - Step 33572: {'lr': 0.0001311131391399888, 'samples': 17189376, 'steps': 33572, 'loss/train': 0.7764307856559753} +02/25/2022 21:03:05 - INFO - codeparrot_training - Step 33573: {'lr': 0.00013109874549943373, 'samples': 17189888, 'steps': 33573, 'loss/train': 2.430089235305786} +02/25/2022 21:03:08 - INFO - codeparrot_training - Step 33574: {'lr': 0.0001310843523682138, 'samples': 17190400, 'steps': 33574, 'loss/train': 1.2520678043365479} +02/25/2022 21:03:14 - INFO - codeparrot_training - Step 33575: {'lr': 0.00013106995974639083, 'samples': 17190912, 'steps': 33575, 'loss/train': 2.716520071029663} +02/25/2022 21:03:17 - INFO - codeparrot_training - Step 33576: {'lr': 0.00013105556763402627, 'samples': 17191424, 'steps': 33576, 'loss/train': 1.3092294931411743} +02/25/2022 21:03:23 - INFO - codeparrot_training - Step 33577: {'lr': 0.0001310411760311821, 'samples': 17191936, 'steps': 33577, 'loss/train': 0.8175541162490845} +02/25/2022 21:03:26 - INFO - codeparrot_training - Step 33578: {'lr': 0.00013102678493791958, 'samples': 17192448, 'steps': 33578, 'loss/train': 1.7693620920181274} +02/25/2022 21:03:32 - INFO - codeparrot_training - Step 33579: {'lr': 0.0001310123943543006, 'samples': 17192960, 'steps': 33579, 'loss/train': 1.3821778297424316} +02/25/2022 21:03:35 - INFO - codeparrot_training - Step 33580: {'lr': 0.00013099800428038665, 'samples': 17193472, 'steps': 33580, 'loss/train': 2.9223263263702393} +02/25/2022 21:03:42 - INFO - codeparrot_training - Step 33581: {'lr': 0.00013098361471623968, 'samples': 17193984, 'steps': 33581, 'loss/train': 0.9644354581832886} +02/25/2022 21:03:45 - INFO - codeparrot_training - Step 33582: {'lr': 0.00013096922566192088, 'samples': 17194496, 'steps': 33582, 'loss/train': 1.4427651166915894} +02/25/2022 21:03:51 - INFO - codeparrot_training - Step 33583: {'lr': 0.00013095483711749226, 'samples': 17195008, 'steps': 33583, 'loss/train': 2.0543999671936035} +02/25/2022 21:03:54 - INFO - codeparrot_training - Step 33584: {'lr': 0.0001309404490830152, 'samples': 17195520, 'steps': 33584, 'loss/train': 0.7783902883529663} +02/25/2022 21:04:00 - INFO - codeparrot_training - Step 33585: {'lr': 0.00013092606155855154, 'samples': 17196032, 'steps': 33585, 'loss/train': 1.1725890636444092} +02/25/2022 21:04:04 - INFO - codeparrot_training - Step 33586: {'lr': 0.00013091167454416284, 'samples': 17196544, 'steps': 33586, 'loss/train': 1.6601735353469849} +02/25/2022 21:04:09 - INFO - codeparrot_training - Step 33587: {'lr': 0.00013089728803991068, 'samples': 17197056, 'steps': 33587, 'loss/train': 0.08882290869951248} +02/25/2022 21:04:13 - INFO - codeparrot_training - Step 33588: {'lr': 0.00013088290204585666, 'samples': 17197568, 'steps': 33588, 'loss/train': 1.130894660949707} +02/25/2022 21:04:18 - INFO - codeparrot_training - Step 33589: {'lr': 0.00013086851656206254, 'samples': 17198080, 'steps': 33589, 'loss/train': 1.8784723281860352} +02/25/2022 21:04:22 - INFO - codeparrot_training - Step 33590: {'lr': 0.00013085413158858987, 'samples': 17198592, 'steps': 33590, 'loss/train': 2.4832186698913574} +02/25/2022 21:04:27 - INFO - codeparrot_training - Step 33591: {'lr': 0.00013083974712550025, 'samples': 17199104, 'steps': 33591, 'loss/train': 1.8793790340423584} +02/25/2022 21:04:31 - INFO - codeparrot_training - Step 33592: {'lr': 0.00013082536317285526, 'samples': 17199616, 'steps': 33592, 'loss/train': 2.6193909645080566} +02/25/2022 21:04:37 - INFO - codeparrot_training - Step 33593: {'lr': 0.00013081097973071662, 'samples': 17200128, 'steps': 33593, 'loss/train': 1.3323309421539307} +02/25/2022 21:04:41 - INFO - codeparrot_training - Step 33594: {'lr': 0.00013079659679914594, 'samples': 17200640, 'steps': 33594, 'loss/train': 1.5587894916534424} +02/25/2022 21:04:46 - INFO - codeparrot_training - Step 33595: {'lr': 0.00013078221437820475, 'samples': 17201152, 'steps': 33595, 'loss/train': 1.6539360284805298} +02/25/2022 21:04:50 - INFO - codeparrot_training - Step 33596: {'lr': 0.00013076783246795463, 'samples': 17201664, 'steps': 33596, 'loss/train': 1.6523311138153076} +02/25/2022 21:04:55 - INFO - codeparrot_training - Step 33597: {'lr': 0.00013075345106845733, 'samples': 17202176, 'steps': 33597, 'loss/train': 1.4353982210159302} +02/25/2022 21:04:59 - INFO - codeparrot_training - Step 33598: {'lr': 0.0001307390701797743, 'samples': 17202688, 'steps': 33598, 'loss/train': 1.3447364568710327} +02/25/2022 21:05:04 - INFO - codeparrot_training - Step 33599: {'lr': 0.00013072468980196742, 'samples': 17203200, 'steps': 33599, 'loss/train': 1.6747239828109741} +02/25/2022 21:05:08 - INFO - codeparrot_training - Step 33600: {'lr': 0.00013071030993509788, 'samples': 17203712, 'steps': 33600, 'loss/train': 1.739306926727295} +02/25/2022 21:05:13 - INFO - codeparrot_training - Step 33601: {'lr': 0.0001306959305792276, 'samples': 17204224, 'steps': 33601, 'loss/train': 0.9007031917572021} +02/25/2022 21:05:17 - INFO - codeparrot_training - Step 33602: {'lr': 0.00013068155173441809, 'samples': 17204736, 'steps': 33602, 'loss/train': 1.5792921781539917} +02/25/2022 21:05:23 - INFO - codeparrot_training - Step 33603: {'lr': 0.0001306671734007309, 'samples': 17205248, 'steps': 33603, 'loss/train': 2.1220898628234863} +02/25/2022 21:05:27 - INFO - codeparrot_training - Step 33604: {'lr': 0.00013065279557822757, 'samples': 17205760, 'steps': 33604, 'loss/train': 0.8287297487258911} +02/25/2022 21:05:32 - INFO - codeparrot_training - Step 33605: {'lr': 0.00013063841826696984, 'samples': 17206272, 'steps': 33605, 'loss/train': 1.906198263168335} +02/25/2022 21:05:36 - INFO - codeparrot_training - Step 33606: {'lr': 0.00013062404146701922, 'samples': 17206784, 'steps': 33606, 'loss/train': 1.7420077323913574} +02/25/2022 21:05:41 - INFO - codeparrot_training - Step 33607: {'lr': 0.0001306096651784373, 'samples': 17207296, 'steps': 33607, 'loss/train': 1.4945111274719238} +02/25/2022 21:05:45 - INFO - codeparrot_training - Step 33608: {'lr': 0.00013059528940128563, 'samples': 17207808, 'steps': 33608, 'loss/train': 2.7044003009796143} +02/25/2022 21:05:50 - INFO - codeparrot_training - Step 33609: {'lr': 0.00013058091413562585, 'samples': 17208320, 'steps': 33609, 'loss/train': 1.260374903678894} +02/25/2022 21:05:54 - INFO - codeparrot_training - Step 33610: {'lr': 0.00013056653938151955, 'samples': 17208832, 'steps': 33610, 'loss/train': 1.229081630706787} +02/25/2022 21:05:59 - INFO - codeparrot_training - Step 33611: {'lr': 0.0001305521651390282, 'samples': 17209344, 'steps': 33611, 'loss/train': 1.8966976404190063} +02/25/2022 21:06:03 - INFO - codeparrot_training - Step 33612: {'lr': 0.00013053779140821347, 'samples': 17209856, 'steps': 33612, 'loss/train': 1.5888992547988892} +02/25/2022 21:06:09 - INFO - codeparrot_training - Step 33613: {'lr': 0.00013052341818913692, 'samples': 17210368, 'steps': 33613, 'loss/train': 1.3090122938156128} +02/25/2022 21:06:12 - INFO - codeparrot_training - Step 33614: {'lr': 0.00013050904548186015, 'samples': 17210880, 'steps': 33614, 'loss/train': 1.8943570852279663} +02/25/2022 21:06:18 - INFO - codeparrot_training - Step 33615: {'lr': 0.00013049467328644457, 'samples': 17211392, 'steps': 33615, 'loss/train': 1.418304681777954} +02/25/2022 21:06:21 - INFO - codeparrot_training - Step 33616: {'lr': 0.00013048030160295196, 'samples': 17211904, 'steps': 33616, 'loss/train': 2.0662589073181152} +02/25/2022 21:06:27 - INFO - codeparrot_training - Step 33617: {'lr': 0.00013046593043144377, 'samples': 17212416, 'steps': 33617, 'loss/train': 2.159876823425293} +02/25/2022 21:06:30 - INFO - codeparrot_training - Step 33618: {'lr': 0.0001304515597719816, 'samples': 17212928, 'steps': 33618, 'loss/train': 0.46588242053985596} +02/25/2022 21:06:36 - INFO - codeparrot_training - Step 33619: {'lr': 0.00013043718962462688, 'samples': 17213440, 'steps': 33619, 'loss/train': 0.7992615103721619} +02/25/2022 21:06:39 - INFO - codeparrot_training - Step 33620: {'lr': 0.0001304228199894415, 'samples': 17213952, 'steps': 33620, 'loss/train': 1.5776960849761963} +02/25/2022 21:06:45 - INFO - codeparrot_training - Step 33621: {'lr': 0.00013040845086648655, 'samples': 17214464, 'steps': 33621, 'loss/train': 1.5923707485198975} +02/25/2022 21:06:48 - INFO - codeparrot_training - Step 33622: {'lr': 0.00013039408225582394, 'samples': 17214976, 'steps': 33622, 'loss/train': 2.7310385704040527} +02/25/2022 21:06:54 - INFO - codeparrot_training - Step 33623: {'lr': 0.000130379714157515, 'samples': 17215488, 'steps': 33623, 'loss/train': 0.5016627311706543} +02/25/2022 21:06:57 - INFO - codeparrot_training - Step 33624: {'lr': 0.00013036534657162146, 'samples': 17216000, 'steps': 33624, 'loss/train': 4.217424392700195} +02/25/2022 21:07:03 - INFO - codeparrot_training - Step 33625: {'lr': 0.00013035097949820485, 'samples': 17216512, 'steps': 33625, 'loss/train': 2.36342191696167} +02/25/2022 21:07:06 - INFO - codeparrot_training - Step 33626: {'lr': 0.00013033661293732657, 'samples': 17217024, 'steps': 33626, 'loss/train': 2.4237160682678223} +02/25/2022 21:07:12 - INFO - codeparrot_training - Step 33627: {'lr': 0.0001303222468890482, 'samples': 17217536, 'steps': 33627, 'loss/train': 2.6913466453552246} +02/25/2022 21:07:15 - INFO - codeparrot_training - Step 33628: {'lr': 0.00013030788135343142, 'samples': 17218048, 'steps': 33628, 'loss/train': 1.904963493347168} +02/25/2022 21:07:22 - INFO - codeparrot_training - Step 33629: {'lr': 0.00013029351633053765, 'samples': 17218560, 'steps': 33629, 'loss/train': 1.7030514478683472} +02/25/2022 21:07:25 - INFO - codeparrot_training - Step 33630: {'lr': 0.00013027915182042843, 'samples': 17219072, 'steps': 33630, 'loss/train': 2.454561710357666} +02/25/2022 21:07:31 - INFO - codeparrot_training - Step 33631: {'lr': 0.00013026478782316526, 'samples': 17219584, 'steps': 33631, 'loss/train': 2.4349617958068848} +02/25/2022 21:07:34 - INFO - codeparrot_training - Step 33632: {'lr': 0.00013025042433880977, 'samples': 17220096, 'steps': 33632, 'loss/train': 1.519382119178772} +02/25/2022 21:07:40 - INFO - codeparrot_training - Step 33633: {'lr': 0.00013023606136742345, 'samples': 17220608, 'steps': 33633, 'loss/train': 2.088820457458496} +02/25/2022 21:07:43 - INFO - codeparrot_training - Step 33634: {'lr': 0.00013022169890906783, 'samples': 17221120, 'steps': 33634, 'loss/train': 2.268474817276001} +02/25/2022 21:07:49 - INFO - codeparrot_training - Step 33635: {'lr': 0.00013020733696380427, 'samples': 17221632, 'steps': 33635, 'loss/train': 0.02734479494392872} +02/25/2022 21:07:52 - INFO - codeparrot_training - Step 33636: {'lr': 0.00013019297553169457, 'samples': 17222144, 'steps': 33636, 'loss/train': 2.41414475440979} +02/25/2022 21:07:58 - INFO - codeparrot_training - Step 33637: {'lr': 0.00013017861461280005, 'samples': 17222656, 'steps': 33637, 'loss/train': 1.1709057092666626} +02/25/2022 21:08:01 - INFO - codeparrot_training - Step 33638: {'lr': 0.00013016425420718248, 'samples': 17223168, 'steps': 33638, 'loss/train': 0.536399781703949} +02/25/2022 21:08:08 - INFO - codeparrot_training - Step 33639: {'lr': 0.00013014989431490298, 'samples': 17223680, 'steps': 33639, 'loss/train': 1.8530877828598022} +02/25/2022 21:08:11 - INFO - codeparrot_training - Step 33640: {'lr': 0.00013013553493602337, 'samples': 17224192, 'steps': 33640, 'loss/train': 1.8192707300186157} +02/25/2022 21:08:17 - INFO - codeparrot_training - Step 33641: {'lr': 0.00013012117607060498, 'samples': 17224704, 'steps': 33641, 'loss/train': 2.9733150005340576} +02/25/2022 21:08:20 - INFO - codeparrot_training - Step 33642: {'lr': 0.0001301068177187096, 'samples': 17225216, 'steps': 33642, 'loss/train': 2.9503421783447266} +02/25/2022 21:08:26 - INFO - codeparrot_training - Step 33643: {'lr': 0.00013009245988039836, 'samples': 17225728, 'steps': 33643, 'loss/train': 0.7980608940124512} +02/25/2022 21:08:29 - INFO - codeparrot_training - Step 33644: {'lr': 0.00013007810255573303, 'samples': 17226240, 'steps': 33644, 'loss/train': 1.8964685201644897} +02/25/2022 21:08:35 - INFO - codeparrot_training - Step 33645: {'lr': 0.0001300637457447749, 'samples': 17226752, 'steps': 33645, 'loss/train': 2.0529398918151855} +02/25/2022 21:08:38 - INFO - codeparrot_training - Step 33646: {'lr': 0.00013004938944758575, 'samples': 17227264, 'steps': 33646, 'loss/train': 1.1015039682388306} +02/25/2022 21:08:44 - INFO - codeparrot_training - Step 33647: {'lr': 0.00013003503366422692, 'samples': 17227776, 'steps': 33647, 'loss/train': 1.6781864166259766} +02/25/2022 21:08:47 - INFO - codeparrot_training - Step 33648: {'lr': 0.0001300206783947599, 'samples': 17228288, 'steps': 33648, 'loss/train': 0.8616465926170349} +02/25/2022 21:08:53 - INFO - codeparrot_training - Step 33649: {'lr': 0.00013000632363924618, 'samples': 17228800, 'steps': 33649, 'loss/train': 0.7489467859268188} +02/25/2022 21:08:57 - INFO - codeparrot_training - Step 33650: {'lr': 0.00012999196939774722, 'samples': 17229312, 'steps': 33650, 'loss/train': 0.5407328605651855} +02/25/2022 21:09:02 - INFO - codeparrot_training - Step 33651: {'lr': 0.0001299776156703246, 'samples': 17229824, 'steps': 33651, 'loss/train': 1.9846183061599731} +02/25/2022 21:09:06 - INFO - codeparrot_training - Step 33652: {'lr': 0.00012996326245703977, 'samples': 17230336, 'steps': 33652, 'loss/train': 1.2445614337921143} +02/25/2022 21:09:11 - INFO - codeparrot_training - Step 33653: {'lr': 0.0001299489097579542, 'samples': 17230848, 'steps': 33653, 'loss/train': 1.4827134609222412} +02/25/2022 21:09:15 - INFO - codeparrot_training - Step 33654: {'lr': 0.00012993455757312933, 'samples': 17231360, 'steps': 33654, 'loss/train': 2.2110941410064697} +02/25/2022 21:09:20 - INFO - codeparrot_training - Step 33655: {'lr': 0.00012992020590262677, 'samples': 17231872, 'steps': 33655, 'loss/train': 1.152616262435913} +02/25/2022 21:09:26 - INFO - codeparrot_training - Step 33656: {'lr': 0.0001299058547465079, 'samples': 17232384, 'steps': 33656, 'loss/train': 1.5722112655639648} +02/25/2022 21:09:29 - INFO - codeparrot_training - Step 33657: {'lr': 0.00012989150410483422, 'samples': 17232896, 'steps': 33657, 'loss/train': 2.3757901191711426} +02/25/2022 21:09:35 - INFO - codeparrot_training - Step 33658: {'lr': 0.00012987715397766707, 'samples': 17233408, 'steps': 33658, 'loss/train': 1.5098944902420044} +02/25/2022 21:09:38 - INFO - codeparrot_training - Step 33659: {'lr': 0.0001298628043650682, 'samples': 17233920, 'steps': 33659, 'loss/train': 0.9366226196289062} +02/25/2022 21:09:44 - INFO - codeparrot_training - Step 33660: {'lr': 0.00012984845526709893, 'samples': 17234432, 'steps': 33660, 'loss/train': 1.382674217224121} +02/25/2022 21:09:48 - INFO - codeparrot_training - Step 33661: {'lr': 0.00012983410668382066, 'samples': 17234944, 'steps': 33661, 'loss/train': 2.12652325630188} +02/25/2022 21:09:53 - INFO - codeparrot_training - Step 33662: {'lr': 0.0001298197586152949, 'samples': 17235456, 'steps': 33662, 'loss/train': 1.0333904027938843} +02/25/2022 21:09:57 - INFO - codeparrot_training - Step 33663: {'lr': 0.0001298054110615832, 'samples': 17235968, 'steps': 33663, 'loss/train': 2.891796350479126} +02/25/2022 21:10:02 - INFO - codeparrot_training - Step 33664: {'lr': 0.00012979106402274693, 'samples': 17236480, 'steps': 33664, 'loss/train': 2.807631015777588} +02/25/2022 21:10:06 - INFO - codeparrot_training - Step 33665: {'lr': 0.00012977671749884762, 'samples': 17236992, 'steps': 33665, 'loss/train': 1.7277345657348633} +02/25/2022 21:10:11 - INFO - codeparrot_training - Step 33666: {'lr': 0.0001297623714899465, 'samples': 17237504, 'steps': 33666, 'loss/train': 1.4725676774978638} +02/25/2022 21:10:15 - INFO - codeparrot_training - Step 33667: {'lr': 0.0001297480259961054, 'samples': 17238016, 'steps': 33667, 'loss/train': 1.6230171918869019} +02/25/2022 21:10:20 - INFO - codeparrot_training - Step 33668: {'lr': 0.0001297336810173855, 'samples': 17238528, 'steps': 33668, 'loss/train': 1.5123738050460815} +02/25/2022 21:10:24 - INFO - codeparrot_training - Step 33669: {'lr': 0.00012971933655384836, 'samples': 17239040, 'steps': 33669, 'loss/train': 1.4499177932739258} +02/25/2022 21:10:29 - INFO - codeparrot_training - Step 33670: {'lr': 0.00012970499260555525, 'samples': 17239552, 'steps': 33670, 'loss/train': 1.8074325323104858} +02/25/2022 21:10:33 - INFO - codeparrot_training - Step 33671: {'lr': 0.0001296906491725679, 'samples': 17240064, 'steps': 33671, 'loss/train': 2.260063409805298} +02/25/2022 21:10:38 - INFO - codeparrot_training - Step 33672: {'lr': 0.00012967630625494746, 'samples': 17240576, 'steps': 33672, 'loss/train': 1.3195381164550781} +02/25/2022 21:10:42 - INFO - codeparrot_training - Step 33673: {'lr': 0.00012966196385275574, 'samples': 17241088, 'steps': 33673, 'loss/train': 1.5441840887069702} +02/25/2022 21:10:48 - INFO - codeparrot_training - Step 33674: {'lr': 0.00012964762196605376, 'samples': 17241600, 'steps': 33674, 'loss/train': 1.3310438394546509} +02/25/2022 21:10:52 - INFO - codeparrot_training - Step 33675: {'lr': 0.00012963328059490325, 'samples': 17242112, 'steps': 33675, 'loss/train': 1.2927652597427368} +02/25/2022 21:10:57 - INFO - codeparrot_training - Step 33676: {'lr': 0.00012961893973936544, 'samples': 17242624, 'steps': 33676, 'loss/train': 1.964985728263855} +02/25/2022 21:11:01 - INFO - codeparrot_training - Step 33677: {'lr': 0.00012960459939950205, 'samples': 17243136, 'steps': 33677, 'loss/train': 0.8210015296936035} +02/25/2022 21:11:07 - INFO - codeparrot_training - Step 33678: {'lr': 0.00012959025957537415, 'samples': 17243648, 'steps': 33678, 'loss/train': 1.57382333278656} +02/25/2022 21:11:10 - INFO - codeparrot_training - Step 33679: {'lr': 0.00012957592026704344, 'samples': 17244160, 'steps': 33679, 'loss/train': 1.5136228799819946} +02/25/2022 21:11:15 - INFO - codeparrot_training - Step 33680: {'lr': 0.00012956158147457115, 'samples': 17244672, 'steps': 33680, 'loss/train': 1.345003604888916} +02/25/2022 21:11:19 - INFO - codeparrot_training - Step 33681: {'lr': 0.00012954724319801897, 'samples': 17245184, 'steps': 33681, 'loss/train': 2.4613077640533447} +02/25/2022 21:11:24 - INFO - codeparrot_training - Step 33682: {'lr': 0.000129532905437448, 'samples': 17245696, 'steps': 33682, 'loss/train': 1.4208694696426392} +02/25/2022 21:11:28 - INFO - codeparrot_training - Step 33683: {'lr': 0.0001295185681929199, 'samples': 17246208, 'steps': 33683, 'loss/train': 1.334010362625122} +02/25/2022 21:11:34 - INFO - codeparrot_training - Step 33684: {'lr': 0.00012950423146449586, 'samples': 17246720, 'steps': 33684, 'loss/train': 2.0325515270233154} +02/25/2022 21:11:38 - INFO - codeparrot_training - Step 33685: {'lr': 0.0001294898952522376, 'samples': 17247232, 'steps': 33685, 'loss/train': 0.7957848906517029} +02/25/2022 21:11:43 - INFO - codeparrot_training - Step 33686: {'lr': 0.00012947555955620626, 'samples': 17247744, 'steps': 33686, 'loss/train': 2.5259854793548584} +02/25/2022 21:11:47 - INFO - codeparrot_training - Step 33687: {'lr': 0.00012946122437646347, 'samples': 17248256, 'steps': 33687, 'loss/train': 2.0723929405212402} +02/25/2022 21:11:52 - INFO - codeparrot_training - Step 33688: {'lr': 0.00012944688971307033, 'samples': 17248768, 'steps': 33688, 'loss/train': 1.531112790107727} +02/25/2022 21:11:56 - INFO - codeparrot_training - Step 33689: {'lr': 0.00012943255556608857, 'samples': 17249280, 'steps': 33689, 'loss/train': 1.1501189470291138} +02/25/2022 21:12:01 - INFO - codeparrot_training - Step 33690: {'lr': 0.00012941822193557944, 'samples': 17249792, 'steps': 33690, 'loss/train': 2.2206125259399414} +02/25/2022 21:12:04 - INFO - codeparrot_training - Step 33691: {'lr': 0.00012940388882160436, 'samples': 17250304, 'steps': 33691, 'loss/train': 1.06920325756073} +02/25/2022 21:12:10 - INFO - codeparrot_training - Step 33692: {'lr': 0.00012938955622422466, 'samples': 17250816, 'steps': 33692, 'loss/train': 1.8763850927352905} +02/25/2022 21:12:13 - INFO - codeparrot_training - Step 33693: {'lr': 0.0001293752241435019, 'samples': 17251328, 'steps': 33693, 'loss/train': 2.325950860977173} +02/25/2022 21:12:19 - INFO - codeparrot_training - Step 33694: {'lr': 0.00012936089257949734, 'samples': 17251840, 'steps': 33694, 'loss/train': 2.7652454376220703} +02/25/2022 21:12:23 - INFO - codeparrot_training - Step 33695: {'lr': 0.0001293465615322724, 'samples': 17252352, 'steps': 33695, 'loss/train': 1.1024342775344849} +02/25/2022 21:12:29 - INFO - codeparrot_training - Step 33696: {'lr': 0.00012933223100188842, 'samples': 17252864, 'steps': 33696, 'loss/train': 1.908313512802124} +02/25/2022 21:12:32 - INFO - codeparrot_training - Step 33697: {'lr': 0.00012931790098840695, 'samples': 17253376, 'steps': 33697, 'loss/train': 0.09740322083234787} +02/25/2022 21:12:38 - INFO - codeparrot_training - Step 33698: {'lr': 0.00012930357149188926, 'samples': 17253888, 'steps': 33698, 'loss/train': 1.3092392683029175} +02/25/2022 21:12:41 - INFO - codeparrot_training - Step 33699: {'lr': 0.0001292892425123967, 'samples': 17254400, 'steps': 33699, 'loss/train': 0.8420547842979431} +02/25/2022 21:12:47 - INFO - codeparrot_training - Step 33700: {'lr': 0.00012927491404999075, 'samples': 17254912, 'steps': 33700, 'loss/train': 0.7378376126289368} +02/25/2022 21:12:50 - INFO - codeparrot_training - Step 33701: {'lr': 0.00012926058610473264, 'samples': 17255424, 'steps': 33701, 'loss/train': 0.9803818464279175} +02/25/2022 21:12:56 - INFO - codeparrot_training - Step 33702: {'lr': 0.00012924625867668388, 'samples': 17255936, 'steps': 33702, 'loss/train': 2.5155279636383057} +02/25/2022 21:12:59 - INFO - codeparrot_training - Step 33703: {'lr': 0.00012923193176590586, 'samples': 17256448, 'steps': 33703, 'loss/train': 1.3950456380844116} +02/25/2022 21:13:05 - INFO - codeparrot_training - Step 33704: {'lr': 0.00012921760537245986, 'samples': 17256960, 'steps': 33704, 'loss/train': 1.1246124505996704} +02/25/2022 21:13:08 - INFO - codeparrot_training - Step 33705: {'lr': 0.0001292032794964072, 'samples': 17257472, 'steps': 33705, 'loss/train': 1.4105521440505981} +02/25/2022 21:13:15 - INFO - codeparrot_training - Step 33706: {'lr': 0.00012918895413780945, 'samples': 17257984, 'steps': 33706, 'loss/train': 2.2826855182647705} +02/25/2022 21:13:18 - INFO - codeparrot_training - Step 33707: {'lr': 0.00012917462929672773, 'samples': 17258496, 'steps': 33707, 'loss/train': 2.267845869064331} +02/25/2022 21:13:24 - INFO - codeparrot_training - Step 33708: {'lr': 0.00012916030497322375, 'samples': 17259008, 'steps': 33708, 'loss/train': 2.367537260055542} +02/25/2022 21:13:27 - INFO - codeparrot_training - Step 33709: {'lr': 0.00012914598116735846, 'samples': 17259520, 'steps': 33709, 'loss/train': 2.224104881286621} +02/25/2022 21:13:33 - INFO - codeparrot_training - Step 33710: {'lr': 0.0001291316578791935, 'samples': 17260032, 'steps': 33710, 'loss/train': 1.4223227500915527} +02/25/2022 21:13:36 - INFO - codeparrot_training - Step 33711: {'lr': 0.00012911733510879005, 'samples': 17260544, 'steps': 33711, 'loss/train': 0.31277206540107727} +02/25/2022 21:13:42 - INFO - codeparrot_training - Step 33712: {'lr': 0.00012910301285620974, 'samples': 17261056, 'steps': 33712, 'loss/train': 1.3715959787368774} +02/25/2022 21:13:45 - INFO - codeparrot_training - Step 33713: {'lr': 0.0001290886911215135, 'samples': 17261568, 'steps': 33713, 'loss/train': 1.9319833517074585} +02/25/2022 21:13:51 - INFO - codeparrot_training - Step 33714: {'lr': 0.00012907436990476306, 'samples': 17262080, 'steps': 33714, 'loss/train': 2.2283267974853516} +02/25/2022 21:13:54 - INFO - codeparrot_training - Step 33715: {'lr': 0.00012906004920601948, 'samples': 17262592, 'steps': 33715, 'loss/train': 0.7467585206031799} +02/25/2022 21:14:00 - INFO - codeparrot_training - Step 33716: {'lr': 0.0001290457290253445, 'samples': 17263104, 'steps': 33716, 'loss/train': 1.4538532495498657} +02/25/2022 21:14:04 - INFO - codeparrot_training - Step 33717: {'lr': 0.00012903140936279897, 'samples': 17263616, 'steps': 33717, 'loss/train': 1.1254184246063232} +02/25/2022 21:14:07 - INFO - codeparrot_training - Step 33718: {'lr': 0.0001290170902184446, 'samples': 17264128, 'steps': 33718, 'loss/train': 2.270923614501953} +02/25/2022 21:14:13 - INFO - codeparrot_training - Step 33719: {'lr': 0.00012900277159234248, 'samples': 17264640, 'steps': 33719, 'loss/train': 3.5176162719726562} +02/25/2022 21:14:16 - INFO - codeparrot_training - Step 33720: {'lr': 0.00012898845348455418, 'samples': 17265152, 'steps': 33720, 'loss/train': 2.921058416366577} +02/25/2022 21:14:22 - INFO - codeparrot_training - Step 33721: {'lr': 0.00012897413589514089, 'samples': 17265664, 'steps': 33721, 'loss/train': 0.6855606436729431} +02/25/2022 21:14:26 - INFO - codeparrot_training - Step 33722: {'lr': 0.000128959818824164, 'samples': 17266176, 'steps': 33722, 'loss/train': 1.8103679418563843} +02/25/2022 21:14:31 - INFO - codeparrot_training - Step 33723: {'lr': 0.00012894550227168469, 'samples': 17266688, 'steps': 33723, 'loss/train': 2.340031623840332} +02/25/2022 21:14:37 - INFO - codeparrot_training - Step 33724: {'lr': 0.0001289311862377645, 'samples': 17267200, 'steps': 33724, 'loss/train': 1.982516884803772} +02/25/2022 21:14:40 - INFO - codeparrot_training - Step 33725: {'lr': 0.00012891687072246472, 'samples': 17267712, 'steps': 33725, 'loss/train': 1.9022910594940186} +02/25/2022 21:14:44 - INFO - codeparrot_training - Step 33726: {'lr': 0.00012890255572584657, 'samples': 17268224, 'steps': 33726, 'loss/train': 0.9450118541717529} +02/25/2022 21:14:49 - INFO - codeparrot_training - Step 33727: {'lr': 0.00012888824124797132, 'samples': 17268736, 'steps': 33727, 'loss/train': 1.738682508468628} +02/25/2022 21:14:55 - INFO - codeparrot_training - Step 33728: {'lr': 0.00012887392728890053, 'samples': 17269248, 'steps': 33728, 'loss/train': 1.5189931392669678} +02/25/2022 21:14:59 - INFO - codeparrot_training - Step 33729: {'lr': 0.0001288596138486953, 'samples': 17269760, 'steps': 33729, 'loss/train': 7.502089023590088} +02/25/2022 21:15:02 - INFO - codeparrot_training - Step 33730: {'lr': 0.0001288453009274171, 'samples': 17270272, 'steps': 33730, 'loss/train': 1.7766786813735962} +02/25/2022 21:15:08 - INFO - codeparrot_training - Step 33731: {'lr': 0.00012883098852512701, 'samples': 17270784, 'steps': 33731, 'loss/train': 2.1426002979278564} +02/25/2022 21:15:12 - INFO - codeparrot_training - Step 33732: {'lr': 0.00012881667664188659, 'samples': 17271296, 'steps': 33732, 'loss/train': 1.8388675451278687} +02/25/2022 21:15:17 - INFO - codeparrot_training - Step 33733: {'lr': 0.00012880236527775706, 'samples': 17271808, 'steps': 33733, 'loss/train': 2.0290133953094482} +02/25/2022 21:15:21 - INFO - codeparrot_training - Step 33734: {'lr': 0.00012878805443279973, 'samples': 17272320, 'steps': 33734, 'loss/train': 0.8609355092048645} +02/25/2022 21:15:26 - INFO - codeparrot_training - Step 33735: {'lr': 0.00012877374410707576, 'samples': 17272832, 'steps': 33735, 'loss/train': 1.5654685497283936} +02/25/2022 21:15:32 - INFO - codeparrot_training - Step 33736: {'lr': 0.00012875943430064668, 'samples': 17273344, 'steps': 33736, 'loss/train': 1.158992052078247} +02/25/2022 21:15:35 - INFO - codeparrot_training - Step 33737: {'lr': 0.00012874512501357367, 'samples': 17273856, 'steps': 33737, 'loss/train': 0.7392899990081787} +02/25/2022 21:15:41 - INFO - codeparrot_training - Step 33738: {'lr': 0.00012873081624591807, 'samples': 17274368, 'steps': 33738, 'loss/train': 1.5286954641342163} +02/25/2022 21:15:44 - INFO - codeparrot_training - Step 33739: {'lr': 0.00012871650799774103, 'samples': 17274880, 'steps': 33739, 'loss/train': 2.1024341583251953} +02/25/2022 21:15:49 - INFO - codeparrot_training - Step 33740: {'lr': 0.00012870220026910405, 'samples': 17275392, 'steps': 33740, 'loss/train': 1.8214421272277832} +02/25/2022 21:15:53 - INFO - codeparrot_training - Step 33741: {'lr': 0.00012868789306006833, 'samples': 17275904, 'steps': 33741, 'loss/train': 1.8990613222122192} +02/25/2022 21:15:59 - INFO - codeparrot_training - Step 33742: {'lr': 0.0001286735863706951, 'samples': 17276416, 'steps': 33742, 'loss/train': 0.8461995124816895} +02/25/2022 21:16:02 - INFO - codeparrot_training - Step 33743: {'lr': 0.00012865928020104576, 'samples': 17276928, 'steps': 33743, 'loss/train': 1.3933275938034058} +02/25/2022 21:16:08 - INFO - codeparrot_training - Step 33744: {'lr': 0.0001286449745511815, 'samples': 17277440, 'steps': 33744, 'loss/train': 0.628835141658783} +02/25/2022 21:16:12 - INFO - codeparrot_training - Step 33745: {'lr': 0.0001286306694211637, 'samples': 17277952, 'steps': 33745, 'loss/train': 1.5539894104003906} +02/25/2022 21:16:17 - INFO - codeparrot_training - Step 33746: {'lr': 0.00012861636481105343, 'samples': 17278464, 'steps': 33746, 'loss/train': 1.9513862133026123} +02/25/2022 21:16:21 - INFO - codeparrot_training - Step 33747: {'lr': 0.00012860206072091236, 'samples': 17278976, 'steps': 33747, 'loss/train': 2.8285558223724365} +02/25/2022 21:16:26 - INFO - codeparrot_training - Step 33748: {'lr': 0.00012858775715080125, 'samples': 17279488, 'steps': 33748, 'loss/train': 1.750298261642456} +02/25/2022 21:16:29 - INFO - codeparrot_training - Step 33749: {'lr': 0.0001285734541007818, 'samples': 17280000, 'steps': 33749, 'loss/train': 2.391174554824829} +02/25/2022 21:16:35 - INFO - codeparrot_training - Step 33750: {'lr': 0.00012855915157091496, 'samples': 17280512, 'steps': 33750, 'loss/train': 2.1921160221099854} +02/25/2022 21:16:38 - INFO - codeparrot_training - Step 33751: {'lr': 0.0001285448495612624, 'samples': 17281024, 'steps': 33751, 'loss/train': 1.680588722229004} +02/25/2022 21:16:45 - INFO - codeparrot_training - Step 33752: {'lr': 0.00012853054807188488, 'samples': 17281536, 'steps': 33752, 'loss/train': 1.7051770687103271} +02/25/2022 21:16:48 - INFO - codeparrot_training - Step 33753: {'lr': 0.00012851624710284403, 'samples': 17282048, 'steps': 33753, 'loss/train': 1.9384405612945557} +02/25/2022 21:16:54 - INFO - codeparrot_training - Step 33754: {'lr': 0.00012850194665420096, 'samples': 17282560, 'steps': 33754, 'loss/train': 1.533313512802124} +02/25/2022 21:16:57 - INFO - codeparrot_training - Step 33755: {'lr': 0.00012848764672601705, 'samples': 17283072, 'steps': 33755, 'loss/train': 2.5330677032470703} +02/25/2022 21:17:03 - INFO - codeparrot_training - Step 33756: {'lr': 0.00012847334731835345, 'samples': 17283584, 'steps': 33756, 'loss/train': 1.350999355316162} +02/25/2022 21:17:06 - INFO - codeparrot_training - Step 33757: {'lr': 0.00012845904843127143, 'samples': 17284096, 'steps': 33757, 'loss/train': 1.5097826719284058} +02/25/2022 21:17:12 - INFO - codeparrot_training - Step 33758: {'lr': 0.0001284447500648322, 'samples': 17284608, 'steps': 33758, 'loss/train': 0.9145926237106323} +02/25/2022 21:17:15 - INFO - codeparrot_training - Step 33759: {'lr': 0.00012843045221909715, 'samples': 17285120, 'steps': 33759, 'loss/train': 1.5351028442382812} +02/25/2022 21:17:21 - INFO - codeparrot_training - Step 33760: {'lr': 0.0001284161548941274, 'samples': 17285632, 'steps': 33760, 'loss/train': 2.0386199951171875} +02/25/2022 21:17:24 - INFO - codeparrot_training - Step 33761: {'lr': 0.0001284018580899843, 'samples': 17286144, 'steps': 33761, 'loss/train': 1.9935725927352905} +02/25/2022 21:17:30 - INFO - codeparrot_training - Step 33762: {'lr': 0.00012838756180672887, 'samples': 17286656, 'steps': 33762, 'loss/train': 2.113556146621704} +02/25/2022 21:17:34 - INFO - codeparrot_training - Step 33763: {'lr': 0.00012837326604442262, 'samples': 17287168, 'steps': 33763, 'loss/train': 0.44728103280067444} +02/25/2022 21:17:39 - INFO - codeparrot_training - Step 33764: {'lr': 0.00012835897080312668, 'samples': 17287680, 'steps': 33764, 'loss/train': 1.968787670135498} +02/25/2022 21:17:43 - INFO - codeparrot_training - Step 33765: {'lr': 0.0001283446760829023, 'samples': 17288192, 'steps': 33765, 'loss/train': 2.889301061630249} +02/25/2022 21:17:48 - INFO - codeparrot_training - Step 33766: {'lr': 0.0001283303818838106, 'samples': 17288704, 'steps': 33766, 'loss/train': 2.0273549556732178} +02/25/2022 21:17:52 - INFO - codeparrot_training - Step 33767: {'lr': 0.00012831608820591301, 'samples': 17289216, 'steps': 33767, 'loss/train': 1.0190807580947876} +02/25/2022 21:17:57 - INFO - codeparrot_training - Step 33768: {'lr': 0.00012830179504927052, 'samples': 17289728, 'steps': 33768, 'loss/train': 2.6717264652252197} +02/25/2022 21:18:01 - INFO - codeparrot_training - Step 33769: {'lr': 0.00012828750241394474, 'samples': 17290240, 'steps': 33769, 'loss/train': 2.1391308307647705} +02/25/2022 21:18:06 - INFO - codeparrot_training - Step 33770: {'lr': 0.00012827321029999645, 'samples': 17290752, 'steps': 33770, 'loss/train': 1.4765336513519287} +02/25/2022 21:18:10 - INFO - codeparrot_training - Step 33771: {'lr': 0.00012825891870748716, 'samples': 17291264, 'steps': 33771, 'loss/train': 1.2519994974136353} +02/25/2022 21:18:15 - INFO - codeparrot_training - Step 33772: {'lr': 0.0001282446276364779, 'samples': 17291776, 'steps': 33772, 'loss/train': 1.9881829023361206} +02/25/2022 21:18:19 - INFO - codeparrot_training - Step 33773: {'lr': 0.0001282303370870302, 'samples': 17292288, 'steps': 33773, 'loss/train': 1.3524502515792847} +02/25/2022 21:18:24 - INFO - codeparrot_training - Step 33774: {'lr': 0.00012821604705920487, 'samples': 17292800, 'steps': 33774, 'loss/train': 1.6359270811080933} +02/25/2022 21:18:28 - INFO - codeparrot_training - Step 33775: {'lr': 0.0001282017575530634, 'samples': 17293312, 'steps': 33775, 'loss/train': 1.7404199838638306} +02/25/2022 21:18:33 - INFO - codeparrot_training - Step 33776: {'lr': 0.00012818746856866687, 'samples': 17293824, 'steps': 33776, 'loss/train': 1.450870156288147} +02/25/2022 21:18:37 - INFO - codeparrot_training - Step 33777: {'lr': 0.0001281731801060767, 'samples': 17294336, 'steps': 33777, 'loss/train': 2.3660786151885986} +02/25/2022 21:18:43 - INFO - codeparrot_training - Step 33778: {'lr': 0.0001281588921653538, 'samples': 17294848, 'steps': 33778, 'loss/train': 2.1349151134490967} +02/25/2022 21:18:48 - INFO - codeparrot_training - Step 33779: {'lr': 0.0001281446047465596, 'samples': 17295360, 'steps': 33779, 'loss/train': 1.1453312635421753} +02/25/2022 21:18:52 - INFO - codeparrot_training - Step 33780: {'lr': 0.00012813031784975518, 'samples': 17295872, 'steps': 33780, 'loss/train': 2.256417751312256} +02/25/2022 21:18:57 - INFO - codeparrot_training - Step 33781: {'lr': 0.0001281160314750017, 'samples': 17296384, 'steps': 33781, 'loss/train': 2.6236774921417236} +02/25/2022 21:19:01 - INFO - codeparrot_training - Step 33782: {'lr': 0.00012810174562236055, 'samples': 17296896, 'steps': 33782, 'loss/train': 1.2795182466506958} +02/25/2022 21:19:04 - INFO - codeparrot_training - Step 33783: {'lr': 0.00012808746029189277, 'samples': 17297408, 'steps': 33783, 'loss/train': 0.646183967590332} +02/25/2022 21:19:10 - INFO - codeparrot_training - Step 33784: {'lr': 0.00012807317548365965, 'samples': 17297920, 'steps': 33784, 'loss/train': 1.2222410440444946} +02/25/2022 21:19:13 - INFO - codeparrot_training - Step 33785: {'lr': 0.00012805889119772224, 'samples': 17298432, 'steps': 33785, 'loss/train': 1.362569808959961} +02/25/2022 21:19:19 - INFO - codeparrot_training - Step 33786: {'lr': 0.00012804460743414187, 'samples': 17298944, 'steps': 33786, 'loss/train': 2.362821102142334} +02/25/2022 21:19:22 - INFO - codeparrot_training - Step 33787: {'lr': 0.00012803032419297973, 'samples': 17299456, 'steps': 33787, 'loss/train': 2.095163345336914} +02/25/2022 21:19:29 - INFO - codeparrot_training - Step 33788: {'lr': 0.0001280160414742969, 'samples': 17299968, 'steps': 33788, 'loss/train': 0.22723883390426636} +02/25/2022 21:19:32 - INFO - codeparrot_training - Step 33789: {'lr': 0.00012800175927815455, 'samples': 17300480, 'steps': 33789, 'loss/train': 2.116652727127075} +02/25/2022 21:19:38 - INFO - codeparrot_training - Step 33790: {'lr': 0.00012798747760461399, 'samples': 17300992, 'steps': 33790, 'loss/train': 1.2462120056152344} +02/25/2022 21:19:41 - INFO - codeparrot_training - Step 33791: {'lr': 0.00012797319645373634, 'samples': 17301504, 'steps': 33791, 'loss/train': 1.3809499740600586} +02/25/2022 21:19:47 - INFO - codeparrot_training - Step 33792: {'lr': 0.0001279589158255828, 'samples': 17302016, 'steps': 33792, 'loss/train': 1.826533317565918} +02/25/2022 21:19:52 - INFO - codeparrot_training - Step 33793: {'lr': 0.00012794463572021438, 'samples': 17302528, 'steps': 33793, 'loss/train': 1.3947933912277222} +02/25/2022 21:19:56 - INFO - codeparrot_training - Step 33794: {'lr': 0.0001279303561376925, 'samples': 17303040, 'steps': 33794, 'loss/train': 4.1246232986450195} +02/25/2022 21:20:01 - INFO - codeparrot_training - Step 33795: {'lr': 0.00012791607707807824, 'samples': 17303552, 'steps': 33795, 'loss/train': 1.8450731039047241} +02/25/2022 21:20:05 - INFO - codeparrot_training - Step 33796: {'lr': 0.0001279017985414327, 'samples': 17304064, 'steps': 33796, 'loss/train': 1.660609245300293} +02/25/2022 21:20:08 - INFO - codeparrot_training - Step 33797: {'lr': 0.00012788752052781698, 'samples': 17304576, 'steps': 33797, 'loss/train': 8.734773635864258} +02/25/2022 21:20:15 - INFO - codeparrot_training - Step 33798: {'lr': 0.00012787324303729247, 'samples': 17305088, 'steps': 33798, 'loss/train': 0.7384844422340393} +02/25/2022 21:20:18 - INFO - codeparrot_training - Step 33799: {'lr': 0.0001278589660699202, 'samples': 17305600, 'steps': 33799, 'loss/train': 2.1188859939575195} +02/25/2022 21:20:23 - INFO - codeparrot_training - Step 33800: {'lr': 0.00012784468962576134, 'samples': 17306112, 'steps': 33800, 'loss/train': 1.429151177406311} +02/25/2022 21:20:29 - INFO - codeparrot_training - Step 33801: {'lr': 0.00012783041370487692, 'samples': 17306624, 'steps': 33801, 'loss/train': 2.3084049224853516} +02/25/2022 21:20:33 - INFO - codeparrot_training - Step 33802: {'lr': 0.00012781613830732834, 'samples': 17307136, 'steps': 33802, 'loss/train': 3.241088390350342} +02/25/2022 21:20:38 - INFO - codeparrot_training - Step 33803: {'lr': 0.00012780186343317652, 'samples': 17307648, 'steps': 33803, 'loss/train': 1.4482510089874268} +02/25/2022 21:20:42 - INFO - codeparrot_training - Step 33804: {'lr': 0.00012778758908248288, 'samples': 17308160, 'steps': 33804, 'loss/train': 1.6408255100250244} +02/25/2022 21:20:47 - INFO - codeparrot_training - Step 33805: {'lr': 0.00012777331525530827, 'samples': 17308672, 'steps': 33805, 'loss/train': 1.994948387145996} +02/25/2022 21:20:51 - INFO - codeparrot_training - Step 33806: {'lr': 0.00012775904195171402, 'samples': 17309184, 'steps': 33806, 'loss/train': 1.8792275190353394} +02/25/2022 21:20:57 - INFO - codeparrot_training - Step 33807: {'lr': 0.00012774476917176112, 'samples': 17309696, 'steps': 33807, 'loss/train': 0.7920739054679871} +02/25/2022 21:21:00 - INFO - codeparrot_training - Step 33808: {'lr': 0.00012773049691551102, 'samples': 17310208, 'steps': 33808, 'loss/train': 1.5104888677597046} +02/25/2022 21:21:06 - INFO - codeparrot_training - Step 33809: {'lr': 0.00012771622518302446, 'samples': 17310720, 'steps': 33809, 'loss/train': 1.0942180156707764} +02/25/2022 21:21:09 - INFO - codeparrot_training - Step 33810: {'lr': 0.00012770195397436283, 'samples': 17311232, 'steps': 33810, 'loss/train': 1.358215570449829} +02/25/2022 21:21:15 - INFO - codeparrot_training - Step 33811: {'lr': 0.0001276876832895871, 'samples': 17311744, 'steps': 33811, 'loss/train': 1.5940165519714355} +02/25/2022 21:21:18 - INFO - codeparrot_training - Step 33812: {'lr': 0.00012767341312875868, 'samples': 17312256, 'steps': 33812, 'loss/train': 1.5972968339920044} +02/25/2022 21:21:24 - INFO - codeparrot_training - Step 33813: {'lr': 0.00012765914349193835, 'samples': 17312768, 'steps': 33813, 'loss/train': 0.7212535738945007} +02/25/2022 21:21:27 - INFO - codeparrot_training - Step 33814: {'lr': 0.0001276448743791875, 'samples': 17313280, 'steps': 33814, 'loss/train': 1.6966056823730469} +02/25/2022 21:21:33 - INFO - codeparrot_training - Step 33815: {'lr': 0.00012763060579056707, 'samples': 17313792, 'steps': 33815, 'loss/train': 1.6988589763641357} +02/25/2022 21:21:36 - INFO - codeparrot_training - Step 33816: {'lr': 0.0001276163377261383, 'samples': 17314304, 'steps': 33816, 'loss/train': 1.5363017320632935} +02/25/2022 21:21:42 - INFO - codeparrot_training - Step 33817: {'lr': 0.00012760207018596234, 'samples': 17314816, 'steps': 33817, 'loss/train': 0.8881706595420837} +02/25/2022 21:21:45 - INFO - codeparrot_training - Step 33818: {'lr': 0.00012758780317010022, 'samples': 17315328, 'steps': 33818, 'loss/train': 0.9441114664077759} +02/25/2022 21:21:51 - INFO - codeparrot_training - Step 33819: {'lr': 0.000127573536678613, 'samples': 17315840, 'steps': 33819, 'loss/train': 1.4173080921173096} +02/25/2022 21:21:54 - INFO - codeparrot_training - Step 33820: {'lr': 0.000127559270711562, 'samples': 17316352, 'steps': 33820, 'loss/train': 1.4545091390609741} +02/25/2022 21:21:59 - INFO - codeparrot_training - Step 33821: {'lr': 0.00012754500526900814, 'samples': 17316864, 'steps': 33821, 'loss/train': 2.3621437549591064} +02/25/2022 21:22:03 - INFO - codeparrot_training - Step 33822: {'lr': 0.0001275307403510126, 'samples': 17317376, 'steps': 33822, 'loss/train': 1.1926594972610474} +02/25/2022 21:22:09 - INFO - codeparrot_training - Step 33823: {'lr': 0.00012751647595763644, 'samples': 17317888, 'steps': 33823, 'loss/train': 1.8042646646499634} +02/25/2022 21:22:13 - INFO - codeparrot_training - Step 33824: {'lr': 0.00012750221208894085, 'samples': 17318400, 'steps': 33824, 'loss/train': 1.7800565958023071} +02/25/2022 21:22:18 - INFO - codeparrot_training - Step 33825: {'lr': 0.0001274879487449869, 'samples': 17318912, 'steps': 33825, 'loss/train': 2.3981049060821533} +02/25/2022 21:22:22 - INFO - codeparrot_training - Step 33826: {'lr': 0.00012747368592583568, 'samples': 17319424, 'steps': 33826, 'loss/train': 1.627747893333435} +02/25/2022 21:22:27 - INFO - codeparrot_training - Step 33827: {'lr': 0.00012745942363154828, 'samples': 17319936, 'steps': 33827, 'loss/train': 1.4943040609359741} +02/25/2022 21:22:31 - INFO - codeparrot_training - Step 33828: {'lr': 0.0001274451618621857, 'samples': 17320448, 'steps': 33828, 'loss/train': 2.628154754638672} +02/25/2022 21:22:36 - INFO - codeparrot_training - Step 33829: {'lr': 0.00012743090061780922, 'samples': 17320960, 'steps': 33829, 'loss/train': 1.7411680221557617} +02/25/2022 21:22:40 - INFO - codeparrot_training - Step 33830: {'lr': 0.00012741663989847984, 'samples': 17321472, 'steps': 33830, 'loss/train': 1.3970381021499634} +02/25/2022 21:22:45 - INFO - codeparrot_training - Step 33831: {'lr': 0.00012740237970425866, 'samples': 17321984, 'steps': 33831, 'loss/train': 2.2303411960601807} +02/25/2022 21:22:49 - INFO - codeparrot_training - Step 33832: {'lr': 0.00012738812003520666, 'samples': 17322496, 'steps': 33832, 'loss/train': 1.0654678344726562} +02/25/2022 21:22:55 - INFO - codeparrot_training - Step 33833: {'lr': 0.00012737386089138513, 'samples': 17323008, 'steps': 33833, 'loss/train': 2.1928462982177734} +02/25/2022 21:22:58 - INFO - codeparrot_training - Step 33834: {'lr': 0.00012735960227285503, 'samples': 17323520, 'steps': 33834, 'loss/train': 2.309786558151245} +02/25/2022 21:23:04 - INFO - codeparrot_training - Step 33835: {'lr': 0.00012734534417967747, 'samples': 17324032, 'steps': 33835, 'loss/train': 2.4196839332580566} +02/25/2022 21:23:07 - INFO - codeparrot_training - Step 33836: {'lr': 0.0001273310866119134, 'samples': 17324544, 'steps': 33836, 'loss/train': 0.07057811319828033} +02/25/2022 21:23:13 - INFO - codeparrot_training - Step 33837: {'lr': 0.00012731682956962405, 'samples': 17325056, 'steps': 33837, 'loss/train': 0.9658114910125732} +02/25/2022 21:23:16 - INFO - codeparrot_training - Step 33838: {'lr': 0.00012730257305287052, 'samples': 17325568, 'steps': 33838, 'loss/train': 2.374498128890991} +02/25/2022 21:23:22 - INFO - codeparrot_training - Step 33839: {'lr': 0.00012728831706171375, 'samples': 17326080, 'steps': 33839, 'loss/train': 1.5761159658432007} +02/25/2022 21:23:25 - INFO - codeparrot_training - Step 33840: {'lr': 0.00012727406159621478, 'samples': 17326592, 'steps': 33840, 'loss/train': 2.206552028656006} +02/25/2022 21:23:31 - INFO - codeparrot_training - Step 33841: {'lr': 0.00012725980665643488, 'samples': 17327104, 'steps': 33841, 'loss/train': 1.8146603107452393} +02/25/2022 21:23:34 - INFO - codeparrot_training - Step 33842: {'lr': 0.0001272455522424349, 'samples': 17327616, 'steps': 33842, 'loss/train': 1.3796119689941406} +02/25/2022 21:23:40 - INFO - codeparrot_training - Step 33843: {'lr': 0.00012723129835427616, 'samples': 17328128, 'steps': 33843, 'loss/train': 2.133607864379883} +02/25/2022 21:23:44 - INFO - codeparrot_training - Step 33844: {'lr': 0.00012721704499201939, 'samples': 17328640, 'steps': 33844, 'loss/train': 0.23322130739688873} +02/25/2022 21:23:49 - INFO - codeparrot_training - Step 33845: {'lr': 0.0001272027921557259, 'samples': 17329152, 'steps': 33845, 'loss/train': 1.4369958639144897} +02/25/2022 21:23:53 - INFO - codeparrot_training - Step 33846: {'lr': 0.00012718853984545653, 'samples': 17329664, 'steps': 33846, 'loss/train': 2.2980916500091553} +02/25/2022 21:23:58 - INFO - codeparrot_training - Step 33847: {'lr': 0.00012717428806127267, 'samples': 17330176, 'steps': 33847, 'loss/train': 2.0317742824554443} +02/25/2022 21:24:02 - INFO - codeparrot_training - Step 33848: {'lr': 0.000127160036803235, 'samples': 17330688, 'steps': 33848, 'loss/train': 1.8091988563537598} +02/25/2022 21:24:07 - INFO - codeparrot_training - Step 33849: {'lr': 0.00012714578607140475, 'samples': 17331200, 'steps': 33849, 'loss/train': 2.528547525405884} +02/25/2022 21:24:11 - INFO - codeparrot_training - Step 33850: {'lr': 0.0001271315358658429, 'samples': 17331712, 'steps': 33850, 'loss/train': 1.903111457824707} +02/25/2022 21:24:16 - INFO - codeparrot_training - Step 33851: {'lr': 0.00012711728618661062, 'samples': 17332224, 'steps': 33851, 'loss/train': 1.893397331237793} +02/25/2022 21:24:20 - INFO - codeparrot_training - Step 33852: {'lr': 0.0001271030370337689, 'samples': 17332736, 'steps': 33852, 'loss/train': 0.6803359985351562} +02/25/2022 21:24:27 - INFO - codeparrot_training - Step 33853: {'lr': 0.0001270887884073787, 'samples': 17333248, 'steps': 33853, 'loss/train': 2.1163859367370605} +02/25/2022 21:24:30 - INFO - codeparrot_training - Step 33854: {'lr': 0.000127074540307501, 'samples': 17333760, 'steps': 33854, 'loss/train': 2.4561607837677} +02/25/2022 21:24:36 - INFO - codeparrot_training - Step 33855: {'lr': 0.00012706029273419706, 'samples': 17334272, 'steps': 33855, 'loss/train': 1.0953820943832397} +02/25/2022 21:24:39 - INFO - codeparrot_training - Step 33856: {'lr': 0.00012704604568752775, 'samples': 17334784, 'steps': 33856, 'loss/train': 2.2849254608154297} +02/25/2022 21:24:45 - INFO - codeparrot_training - Step 33857: {'lr': 0.0001270317991675542, 'samples': 17335296, 'steps': 33857, 'loss/train': 0.9441487193107605} +02/25/2022 21:24:48 - INFO - codeparrot_training - Step 33858: {'lr': 0.00012701755317433722, 'samples': 17335808, 'steps': 33858, 'loss/train': 1.4715889692306519} +02/25/2022 21:24:54 - INFO - codeparrot_training - Step 33859: {'lr': 0.0001270033077079381, 'samples': 17336320, 'steps': 33859, 'loss/train': 2.308405876159668} +02/25/2022 21:24:57 - INFO - codeparrot_training - Step 33860: {'lr': 0.00012698906276841776, 'samples': 17336832, 'steps': 33860, 'loss/train': 2.1189324855804443} +02/25/2022 21:25:03 - INFO - codeparrot_training - Step 33861: {'lr': 0.00012697481835583725, 'samples': 17337344, 'steps': 33861, 'loss/train': 2.567265748977661} +02/25/2022 21:25:06 - INFO - codeparrot_training - Step 33862: {'lr': 0.0001269605744702574, 'samples': 17337856, 'steps': 33862, 'loss/train': 1.2684749364852905} +02/25/2022 21:25:14 - INFO - codeparrot_training - Step 33863: {'lr': 0.00012694633111173952, 'samples': 17338368, 'steps': 33863, 'loss/train': 1.4620167016983032} +02/25/2022 21:25:17 - INFO - codeparrot_training - Step 33864: {'lr': 0.00012693208828034447, 'samples': 17338880, 'steps': 33864, 'loss/train': 1.5807520151138306} +02/25/2022 21:25:23 - INFO - codeparrot_training - Step 33865: {'lr': 0.00012691784597613327, 'samples': 17339392, 'steps': 33865, 'loss/train': 2.109790802001953} +02/25/2022 21:25:26 - INFO - codeparrot_training - Step 33866: {'lr': 0.00012690360419916681, 'samples': 17339904, 'steps': 33866, 'loss/train': 2.128892421722412} +02/25/2022 21:25:32 - INFO - codeparrot_training - Step 33867: {'lr': 0.00012688936294950637, 'samples': 17340416, 'steps': 33867, 'loss/train': 2.1829886436462402} +02/25/2022 21:25:35 - INFO - codeparrot_training - Step 33868: {'lr': 0.0001268751222272128, 'samples': 17340928, 'steps': 33868, 'loss/train': 1.2478166818618774} +02/25/2022 21:25:41 - INFO - codeparrot_training - Step 33869: {'lr': 0.00012686088203234708, 'samples': 17341440, 'steps': 33869, 'loss/train': 0.27779507637023926} +02/25/2022 21:25:44 - INFO - codeparrot_training - Step 33870: {'lr': 0.00012684664236497017, 'samples': 17341952, 'steps': 33870, 'loss/train': 2.0378453731536865} +02/25/2022 21:25:50 - INFO - codeparrot_training - Step 33871: {'lr': 0.00012683240322514322, 'samples': 17342464, 'steps': 33871, 'loss/train': 1.2678548097610474} +02/25/2022 21:25:53 - INFO - codeparrot_training - Step 33872: {'lr': 0.00012681816461292713, 'samples': 17342976, 'steps': 33872, 'loss/train': 2.1260597705841064} +02/25/2022 21:26:01 - INFO - codeparrot_training - Step 33873: {'lr': 0.00012680392652838295, 'samples': 17343488, 'steps': 33873, 'loss/train': 1.573125958442688} +02/25/2022 21:26:04 - INFO - codeparrot_training - Step 33874: {'lr': 0.00012678968897157162, 'samples': 17344000, 'steps': 33874, 'loss/train': 1.628826379776001} +02/25/2022 21:26:09 - INFO - codeparrot_training - Step 33875: {'lr': 0.00012677545194255402, 'samples': 17344512, 'steps': 33875, 'loss/train': 1.747269630432129} +02/25/2022 21:26:13 - INFO - codeparrot_training - Step 33876: {'lr': 0.00012676121544139135, 'samples': 17345024, 'steps': 33876, 'loss/train': 1.6083604097366333} +02/25/2022 21:26:19 - INFO - codeparrot_training - Step 33877: {'lr': 0.0001267469794681444, 'samples': 17345536, 'steps': 33877, 'loss/train': 1.6406654119491577} +02/25/2022 21:26:22 - INFO - codeparrot_training - Step 33878: {'lr': 0.00012673274402287448, 'samples': 17346048, 'steps': 33878, 'loss/train': 1.620491623878479} +02/25/2022 21:26:28 - INFO - codeparrot_training - Step 33879: {'lr': 0.0001267185091056421, 'samples': 17346560, 'steps': 33879, 'loss/train': 1.682497262954712} +02/25/2022 21:26:31 - INFO - codeparrot_training - Step 33880: {'lr': 0.00012670427471650863, 'samples': 17347072, 'steps': 33880, 'loss/train': 2.2391676902770996} +02/25/2022 21:26:37 - INFO - codeparrot_training - Step 33881: {'lr': 0.00012669004085553477, 'samples': 17347584, 'steps': 33881, 'loss/train': 1.7439417839050293} +02/25/2022 21:26:40 - INFO - codeparrot_training - Step 33882: {'lr': 0.00012667580752278183, 'samples': 17348096, 'steps': 33882, 'loss/train': 2.0875656604766846} +02/25/2022 21:26:46 - INFO - codeparrot_training - Step 33883: {'lr': 0.00012666157471831035, 'samples': 17348608, 'steps': 33883, 'loss/train': 0.9700318574905396} +02/25/2022 21:26:53 - INFO - codeparrot_training - Step 33884: {'lr': 0.00012664734244218165, 'samples': 17349120, 'steps': 33884, 'loss/train': 1.6929476261138916} +02/25/2022 21:26:56 - INFO - codeparrot_training - Step 33885: {'lr': 0.00012663311069445644, 'samples': 17349632, 'steps': 33885, 'loss/train': 4.503514766693115} +02/25/2022 21:27:02 - INFO - codeparrot_training - Step 33886: {'lr': 0.000126618879475196, 'samples': 17350144, 'steps': 33886, 'loss/train': 2.246424674987793} +02/25/2022 21:27:05 - INFO - codeparrot_training - Step 33887: {'lr': 0.00012660464878446094, 'samples': 17350656, 'steps': 33887, 'loss/train': 1.6511085033416748} +02/25/2022 21:27:11 - INFO - codeparrot_training - Step 33888: {'lr': 0.00012659041862231245, 'samples': 17351168, 'steps': 33888, 'loss/train': 1.7126740217208862} +02/25/2022 21:27:14 - INFO - codeparrot_training - Step 33889: {'lr': 0.00012657618898881135, 'samples': 17351680, 'steps': 33889, 'loss/train': 1.870279312133789} +02/25/2022 21:27:20 - INFO - codeparrot_training - Step 33890: {'lr': 0.00012656195988401876, 'samples': 17352192, 'steps': 33890, 'loss/train': 1.5617823600769043} +02/25/2022 21:27:23 - INFO - codeparrot_training - Step 33891: {'lr': 0.00012654773130799552, 'samples': 17352704, 'steps': 33891, 'loss/train': 1.8851540088653564} +02/25/2022 21:27:29 - INFO - codeparrot_training - Step 33892: {'lr': 0.0001265335032608026, 'samples': 17353216, 'steps': 33892, 'loss/train': 1.4389629364013672} +02/25/2022 21:27:33 - INFO - codeparrot_training - Step 33893: {'lr': 0.0001265192757425009, 'samples': 17353728, 'steps': 33893, 'loss/train': 1.3848462104797363} +02/25/2022 21:27:38 - INFO - codeparrot_training - Step 33894: {'lr': 0.00012650504875315145, 'samples': 17354240, 'steps': 33894, 'loss/train': 1.4005266427993774} +02/25/2022 21:27:41 - INFO - codeparrot_training - Step 33895: {'lr': 0.00012649082229281516, 'samples': 17354752, 'steps': 33895, 'loss/train': 1.4963746070861816} +02/25/2022 21:27:47 - INFO - codeparrot_training - Step 33896: {'lr': 0.00012647659636155298, 'samples': 17355264, 'steps': 33896, 'loss/train': 2.163606643676758} +02/25/2022 21:27:51 - INFO - codeparrot_training - Step 33897: {'lr': 0.00012646237095942576, 'samples': 17355776, 'steps': 33897, 'loss/train': 1.659933090209961} +02/25/2022 21:27:56 - INFO - codeparrot_training - Step 33898: {'lr': 0.00012644814608649458, 'samples': 17356288, 'steps': 33898, 'loss/train': 2.6481409072875977} +02/25/2022 21:28:00 - INFO - codeparrot_training - Step 33899: {'lr': 0.00012643392174282032, 'samples': 17356800, 'steps': 33899, 'loss/train': 0.5791870951652527} +02/25/2022 21:28:03 - INFO - codeparrot_training - Step 33900: {'lr': 0.00012641969792846392, 'samples': 17357312, 'steps': 33900, 'loss/train': 3.5239765644073486} +02/25/2022 21:28:10 - INFO - codeparrot_training - Step 33901: {'lr': 0.00012640547464348617, 'samples': 17357824, 'steps': 33901, 'loss/train': 2.549034833908081} +02/25/2022 21:28:13 - INFO - codeparrot_training - Step 33902: {'lr': 0.00012639125188794822, 'samples': 17358336, 'steps': 33902, 'loss/train': 2.302746534347534} +02/25/2022 21:28:19 - INFO - codeparrot_training - Step 33903: {'lr': 0.00012637702966191084, 'samples': 17358848, 'steps': 33903, 'loss/train': 1.5887818336486816} +02/25/2022 21:28:24 - INFO - codeparrot_training - Step 33904: {'lr': 0.00012636280796543515, 'samples': 17359360, 'steps': 33904, 'loss/train': 0.2531879246234894} +02/25/2022 21:28:28 - INFO - codeparrot_training - Step 33905: {'lr': 0.00012634858679858176, 'samples': 17359872, 'steps': 33905, 'loss/train': 1.86305832862854} +02/25/2022 21:28:33 - INFO - codeparrot_training - Step 33906: {'lr': 0.00012633436616141187, 'samples': 17360384, 'steps': 33906, 'loss/train': 1.60151207447052} +02/25/2022 21:28:37 - INFO - codeparrot_training - Step 33907: {'lr': 0.00012632014605398628, 'samples': 17360896, 'steps': 33907, 'loss/train': 1.1821651458740234} +02/25/2022 21:28:42 - INFO - codeparrot_training - Step 33908: {'lr': 0.0001263059264763659, 'samples': 17361408, 'steps': 33908, 'loss/train': 1.4744224548339844} +02/25/2022 21:28:46 - INFO - codeparrot_training - Step 33909: {'lr': 0.00012629170742861157, 'samples': 17361920, 'steps': 33909, 'loss/train': 1.404135823249817} +02/25/2022 21:28:52 - INFO - codeparrot_training - Step 33910: {'lr': 0.00012627748891078439, 'samples': 17362432, 'steps': 33910, 'loss/train': 1.8911937475204468} +02/25/2022 21:28:56 - INFO - codeparrot_training - Step 33911: {'lr': 0.00012626327092294515, 'samples': 17362944, 'steps': 33911, 'loss/train': 1.41278076171875} +02/25/2022 21:29:01 - INFO - codeparrot_training - Step 33912: {'lr': 0.00012624905346515463, 'samples': 17363456, 'steps': 33912, 'loss/train': 2.0132040977478027} +02/25/2022 21:29:05 - INFO - codeparrot_training - Step 33913: {'lr': 0.00012623483653747403, 'samples': 17363968, 'steps': 33913, 'loss/train': 2.729323387145996} +02/25/2022 21:29:10 - INFO - codeparrot_training - Step 33914: {'lr': 0.00012622062013996406, 'samples': 17364480, 'steps': 33914, 'loss/train': 0.7326216101646423} +02/25/2022 21:29:14 - INFO - codeparrot_training - Step 33915: {'lr': 0.00012620640427268566, 'samples': 17364992, 'steps': 33915, 'loss/train': 1.2955124378204346} +02/25/2022 21:29:19 - INFO - codeparrot_training - Step 33916: {'lr': 0.00012619218893569962, 'samples': 17365504, 'steps': 33916, 'loss/train': 1.2184885740280151} +02/25/2022 21:29:23 - INFO - codeparrot_training - Step 33917: {'lr': 0.000126177974129067, 'samples': 17366016, 'steps': 33917, 'loss/train': 1.6754100322723389} +02/25/2022 21:29:28 - INFO - codeparrot_training - Step 33918: {'lr': 0.00012616375985284863, 'samples': 17366528, 'steps': 33918, 'loss/train': 1.5852715969085693} +02/25/2022 21:29:32 - INFO - codeparrot_training - Step 33919: {'lr': 0.0001261495461071054, 'samples': 17367040, 'steps': 33919, 'loss/train': 2.148063898086548} +02/25/2022 21:29:38 - INFO - codeparrot_training - Step 33920: {'lr': 0.0001261353328918981, 'samples': 17367552, 'steps': 33920, 'loss/train': 1.9192047119140625} +02/25/2022 21:29:42 - INFO - codeparrot_training - Step 33921: {'lr': 0.0001261211202072878, 'samples': 17368064, 'steps': 33921, 'loss/train': 1.1880720853805542} +02/25/2022 21:29:47 - INFO - codeparrot_training - Step 33922: {'lr': 0.00012610690805333526, 'samples': 17368576, 'steps': 33922, 'loss/train': 1.4836456775665283} +02/25/2022 21:29:51 - INFO - codeparrot_training - Step 33923: {'lr': 0.0001260926964301014, 'samples': 17369088, 'steps': 33923, 'loss/train': 1.558600664138794} +02/25/2022 21:29:56 - INFO - codeparrot_training - Step 33924: {'lr': 0.00012607848533764698, 'samples': 17369600, 'steps': 33924, 'loss/train': 1.5515732765197754} +02/25/2022 21:30:00 - INFO - codeparrot_training - Step 33925: {'lr': 0.00012606427477603307, 'samples': 17370112, 'steps': 33925, 'loss/train': 1.622775912284851} +02/25/2022 21:30:06 - INFO - codeparrot_training - Step 33926: {'lr': 0.0001260500647453205, 'samples': 17370624, 'steps': 33926, 'loss/train': 0.9117430448532104} +02/25/2022 21:30:09 - INFO - codeparrot_training - Step 33927: {'lr': 0.00012603585524557004, 'samples': 17371136, 'steps': 33927, 'loss/train': 0.8037216663360596} +02/25/2022 21:30:15 - INFO - codeparrot_training - Step 33928: {'lr': 0.00012602164627684254, 'samples': 17371648, 'steps': 33928, 'loss/train': 1.9353673458099365} +02/25/2022 21:30:18 - INFO - codeparrot_training - Step 33929: {'lr': 0.000126007437839199, 'samples': 17372160, 'steps': 33929, 'loss/train': 1.9547228813171387} +02/25/2022 21:30:25 - INFO - codeparrot_training - Step 33930: {'lr': 0.0001259932299327003, 'samples': 17372672, 'steps': 33930, 'loss/train': 2.6104300022125244} +02/25/2022 21:30:28 - INFO - codeparrot_training - Step 33931: {'lr': 0.00012597902255740716, 'samples': 17373184, 'steps': 33931, 'loss/train': 0.9223966598510742} +02/25/2022 21:30:34 - INFO - codeparrot_training - Step 33932: {'lr': 0.00012596481571338042, 'samples': 17373696, 'steps': 33932, 'loss/train': 0.9656538963317871} +02/25/2022 21:30:37 - INFO - codeparrot_training - Step 33933: {'lr': 0.00012595060940068115, 'samples': 17374208, 'steps': 33933, 'loss/train': 1.844779133796692} +02/25/2022 21:30:43 - INFO - codeparrot_training - Step 33934: {'lr': 0.00012593640361937007, 'samples': 17374720, 'steps': 33934, 'loss/train': 1.202297568321228} +02/25/2022 21:30:46 - INFO - codeparrot_training - Step 33935: {'lr': 0.000125922198369508, 'samples': 17375232, 'steps': 33935, 'loss/train': 1.727132797241211} +02/25/2022 21:30:52 - INFO - codeparrot_training - Step 33936: {'lr': 0.0001259079936511558, 'samples': 17375744, 'steps': 33936, 'loss/train': 2.462247610092163} +02/25/2022 21:30:55 - INFO - codeparrot_training - Step 33937: {'lr': 0.0001258937894643744, 'samples': 17376256, 'steps': 33937, 'loss/train': 2.097827434539795} +02/25/2022 21:31:01 - INFO - codeparrot_training - Step 33938: {'lr': 0.00012587958580922453, 'samples': 17376768, 'steps': 33938, 'loss/train': 1.4040406942367554} +02/25/2022 21:31:04 - INFO - codeparrot_training - Step 33939: {'lr': 0.0001258653826857673, 'samples': 17377280, 'steps': 33939, 'loss/train': 3.076289415359497} +02/25/2022 21:31:10 - INFO - codeparrot_training - Step 33940: {'lr': 0.00012585118009406313, 'samples': 17377792, 'steps': 33940, 'loss/train': 1.3370944261550903} +02/25/2022 21:31:13 - INFO - codeparrot_training - Step 33941: {'lr': 0.00012583697803417317, 'samples': 17378304, 'steps': 33941, 'loss/train': 1.432740330696106} +02/25/2022 21:31:19 - INFO - codeparrot_training - Step 33942: {'lr': 0.0001258227765061581, 'samples': 17378816, 'steps': 33942, 'loss/train': 1.8163667917251587} +02/25/2022 21:31:22 - INFO - codeparrot_training - Step 33943: {'lr': 0.000125808575510079, 'samples': 17379328, 'steps': 33943, 'loss/train': 3.35750675201416} +02/25/2022 21:31:28 - INFO - codeparrot_training - Step 33944: {'lr': 0.00012579437504599638, 'samples': 17379840, 'steps': 33944, 'loss/train': 1.80765700340271} +02/25/2022 21:31:31 - INFO - codeparrot_training - Step 33945: {'lr': 0.00012578017511397126, 'samples': 17380352, 'steps': 33945, 'loss/train': 1.9311050176620483} +02/25/2022 21:31:38 - INFO - codeparrot_training - Step 33946: {'lr': 0.00012576597571406438, 'samples': 17380864, 'steps': 33946, 'loss/train': 1.7766954898834229} +02/25/2022 21:31:41 - INFO - codeparrot_training - Step 33947: {'lr': 0.00012575177684633675, 'samples': 17381376, 'steps': 33947, 'loss/train': 1.289225459098816} +02/25/2022 21:31:47 - INFO - codeparrot_training - Step 33948: {'lr': 0.0001257375785108489, 'samples': 17381888, 'steps': 33948, 'loss/train': 2.7758259773254395} +02/25/2022 21:31:50 - INFO - codeparrot_training - Step 33949: {'lr': 0.0001257233807076619, 'samples': 17382400, 'steps': 33949, 'loss/train': 1.406354546546936} +02/25/2022 21:31:56 - INFO - codeparrot_training - Step 33950: {'lr': 0.00012570918343683636, 'samples': 17382912, 'steps': 33950, 'loss/train': 1.9868816137313843} +02/25/2022 21:31:59 - INFO - codeparrot_training - Step 33951: {'lr': 0.00012569498669843333, 'samples': 17383424, 'steps': 33951, 'loss/train': 1.9443607330322266} +02/25/2022 21:32:05 - INFO - codeparrot_training - Step 33952: {'lr': 0.0001256807904925135, 'samples': 17383936, 'steps': 33952, 'loss/train': 1.1866079568862915} +02/25/2022 21:32:08 - INFO - codeparrot_training - Step 33953: {'lr': 0.00012566659481913766, 'samples': 17384448, 'steps': 33953, 'loss/train': 1.3708676099777222} +02/25/2022 21:32:14 - INFO - codeparrot_training - Step 33954: {'lr': 0.00012565239967836657, 'samples': 17384960, 'steps': 33954, 'loss/train': 1.6714774370193481} +02/25/2022 21:32:17 - INFO - codeparrot_training - Step 33955: {'lr': 0.00012563820507026122, 'samples': 17385472, 'steps': 33955, 'loss/train': 2.0897037982940674} +02/25/2022 21:32:24 - INFO - codeparrot_training - Step 33956: {'lr': 0.0001256240109948823, 'samples': 17385984, 'steps': 33956, 'loss/train': 1.468039631843567} +02/25/2022 21:32:28 - INFO - codeparrot_training - Step 33957: {'lr': 0.00012560981745229061, 'samples': 17386496, 'steps': 33957, 'loss/train': 1.7863770723342896} +02/25/2022 21:32:33 - INFO - codeparrot_training - Step 33958: {'lr': 0.000125595624442547, 'samples': 17387008, 'steps': 33958, 'loss/train': 1.113387107849121} +02/25/2022 21:32:37 - INFO - codeparrot_training - Step 33959: {'lr': 0.0001255814319657121, 'samples': 17387520, 'steps': 33959, 'loss/train': 1.5041394233703613} +02/25/2022 21:32:42 - INFO - codeparrot_training - Step 33960: {'lr': 0.00012556724002184697, 'samples': 17388032, 'steps': 33960, 'loss/train': 2.3950276374816895} +02/25/2022 21:32:46 - INFO - codeparrot_training - Step 33961: {'lr': 0.00012555304861101225, 'samples': 17388544, 'steps': 33961, 'loss/train': 1.5436636209487915} +02/25/2022 21:32:51 - INFO - codeparrot_training - Step 33962: {'lr': 0.00012553885773326873, 'samples': 17389056, 'steps': 33962, 'loss/train': 1.258392333984375} +02/25/2022 21:32:54 - INFO - codeparrot_training - Step 33963: {'lr': 0.00012552466738867719, 'samples': 17389568, 'steps': 33963, 'loss/train': 0.9962368607521057} +02/25/2022 21:33:00 - INFO - codeparrot_training - Step 33964: {'lr': 0.0001255104775772985, 'samples': 17390080, 'steps': 33964, 'loss/train': 1.6323280334472656} +02/25/2022 21:33:03 - INFO - codeparrot_training - Step 33965: {'lr': 0.00012549628829919342, 'samples': 17390592, 'steps': 33965, 'loss/train': 2.3690903186798096} +02/25/2022 21:33:11 - INFO - codeparrot_training - Step 33966: {'lr': 0.00012548209955442265, 'samples': 17391104, 'steps': 33966, 'loss/train': 2.231538772583008} +02/25/2022 21:33:14 - INFO - codeparrot_training - Step 33967: {'lr': 0.00012546791134304696, 'samples': 17391616, 'steps': 33967, 'loss/train': 2.023271322250366} +02/25/2022 21:33:20 - INFO - codeparrot_training - Step 33968: {'lr': 0.0001254537236651273, 'samples': 17392128, 'steps': 33968, 'loss/train': 1.204742670059204} +02/25/2022 21:33:25 - INFO - codeparrot_training - Step 33969: {'lr': 0.00012543953652072436, 'samples': 17392640, 'steps': 33969, 'loss/train': 0.6722031235694885} +02/25/2022 21:33:29 - INFO - codeparrot_training - Step 33970: {'lr': 0.00012542534990989885, 'samples': 17393152, 'steps': 33970, 'loss/train': 2.123962163925171} +02/25/2022 21:33:34 - INFO - codeparrot_training - Step 33971: {'lr': 0.0001254111638327115, 'samples': 17393664, 'steps': 33971, 'loss/train': 1.146430253982544} +02/25/2022 21:33:38 - INFO - codeparrot_training - Step 33972: {'lr': 0.00012539697828922332, 'samples': 17394176, 'steps': 33972, 'loss/train': 2.195979595184326} +02/25/2022 21:33:43 - INFO - codeparrot_training - Step 33973: {'lr': 0.00012538279327949475, 'samples': 17394688, 'steps': 33973, 'loss/train': 0.11000077426433563} +02/25/2022 21:33:47 - INFO - codeparrot_training - Step 33974: {'lr': 0.0001253686088035869, 'samples': 17395200, 'steps': 33974, 'loss/train': 1.9265820980072021} +02/25/2022 21:33:54 - INFO - codeparrot_training - Step 33975: {'lr': 0.00012535442486156023, 'samples': 17395712, 'steps': 33975, 'loss/train': 1.5206562280654907} +02/25/2022 21:33:57 - INFO - codeparrot_training - Step 33976: {'lr': 0.0001253402414534757, 'samples': 17396224, 'steps': 33976, 'loss/train': 1.1792259216308594} +02/25/2022 21:34:03 - INFO - codeparrot_training - Step 33977: {'lr': 0.0001253260585793939, 'samples': 17396736, 'steps': 33977, 'loss/train': 0.7375963926315308} +02/25/2022 21:34:06 - INFO - codeparrot_training - Step 33978: {'lr': 0.00012531187623937584, 'samples': 17397248, 'steps': 33978, 'loss/train': 2.053083658218384} +02/25/2022 21:34:12 - INFO - codeparrot_training - Step 33979: {'lr': 0.00012529769443348193, 'samples': 17397760, 'steps': 33979, 'loss/train': 2.0573718547821045} +02/25/2022 21:34:15 - INFO - codeparrot_training - Step 33980: {'lr': 0.0001252835131617732, 'samples': 17398272, 'steps': 33980, 'loss/train': 1.6052539348602295} +02/25/2022 21:34:21 - INFO - codeparrot_training - Step 33981: {'lr': 0.0001252693324243102, 'samples': 17398784, 'steps': 33981, 'loss/train': 1.567251443862915} +02/25/2022 21:34:24 - INFO - codeparrot_training - Step 33982: {'lr': 0.00012525515222115395, 'samples': 17399296, 'steps': 33982, 'loss/train': 1.0509614944458008} +02/25/2022 21:34:30 - INFO - codeparrot_training - Step 33983: {'lr': 0.00012524097255236483, 'samples': 17399808, 'steps': 33983, 'loss/train': 2.0293431282043457} +02/25/2022 21:34:33 - INFO - codeparrot_training - Step 33984: {'lr': 0.0001252267934180039, 'samples': 17400320, 'steps': 33984, 'loss/train': 1.4426987171173096} +02/25/2022 21:34:39 - INFO - codeparrot_training - Step 33985: {'lr': 0.00012521261481813163, 'samples': 17400832, 'steps': 33985, 'loss/train': 1.6442755460739136} +02/25/2022 21:34:42 - INFO - codeparrot_training - Step 33986: {'lr': 0.000125198436752809, 'samples': 17401344, 'steps': 33986, 'loss/train': 2.235156774520874} +02/25/2022 21:34:48 - INFO - codeparrot_training - Step 33987: {'lr': 0.00012518425922209664, 'samples': 17401856, 'steps': 33987, 'loss/train': 2.2326109409332275} +02/25/2022 21:34:51 - INFO - codeparrot_training - Step 33988: {'lr': 0.0001251700822260553, 'samples': 17402368, 'steps': 33988, 'loss/train': 1.9844497442245483} +02/25/2022 21:34:57 - INFO - codeparrot_training - Step 33989: {'lr': 0.00012515590576474555, 'samples': 17402880, 'steps': 33989, 'loss/train': 1.950125813484192} +02/25/2022 21:35:00 - INFO - codeparrot_training - Step 33990: {'lr': 0.0001251417298382284, 'samples': 17403392, 'steps': 33990, 'loss/train': 1.9854763746261597} +02/25/2022 21:35:07 - INFO - codeparrot_training - Step 33991: {'lr': 0.00012512755444656442, 'samples': 17403904, 'steps': 33991, 'loss/train': 2.283198356628418} +02/25/2022 21:35:10 - INFO - codeparrot_training - Step 33992: {'lr': 0.00012511337958981433, 'samples': 17404416, 'steps': 33992, 'loss/train': 2.3445448875427246} +02/25/2022 21:35:16 - INFO - codeparrot_training - Step 33993: {'lr': 0.00012509920526803878, 'samples': 17404928, 'steps': 33993, 'loss/train': 1.1401385068893433} +02/25/2022 21:35:19 - INFO - codeparrot_training - Step 33994: {'lr': 0.00012508503148129865, 'samples': 17405440, 'steps': 33994, 'loss/train': 2.314635992050171} +02/25/2022 21:35:25 - INFO - codeparrot_training - Step 33995: {'lr': 0.0001250708582296546, 'samples': 17405952, 'steps': 33995, 'loss/train': 2.2194597721099854} +02/25/2022 21:35:28 - INFO - codeparrot_training - Step 33996: {'lr': 0.00012505668551316735, 'samples': 17406464, 'steps': 33996, 'loss/train': 1.5003371238708496} +02/25/2022 21:35:34 - INFO - codeparrot_training - Step 33997: {'lr': 0.00012504251333189743, 'samples': 17406976, 'steps': 33997, 'loss/train': 1.3384119272232056} +02/25/2022 21:35:37 - INFO - codeparrot_training - Step 33998: {'lr': 0.00012502834168590582, 'samples': 17407488, 'steps': 33998, 'loss/train': 2.855187177658081} +02/25/2022 21:35:43 - INFO - codeparrot_training - Step 33999: {'lr': 0.00012501417057525317, 'samples': 17408000, 'steps': 33999, 'loss/train': 1.9884394407272339} +02/25/2022 21:35:43 - INFO - codeparrot_training - Evaluating and saving model checkpoint