diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -2313,3 +2313,1009 @@ Use FP16 precision: False 02/24/2022 04:09:39 - INFO - codeparrot_training - Step 1998: {'lr': 0.0004995, 'samples': 1023488, 'steps': 1998, 'loss/train': 2.887777805328369} 02/24/2022 04:09:45 - INFO - codeparrot_training - Step 1999: {'lr': 0.0004997500000000001, 'samples': 1024000, 'steps': 1999, 'loss/train': 9.467606544494629} 02/24/2022 04:09:45 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/24/2022 04:10:02 - WARNING - huggingface_hub.repository - Several commits (2) will be pushed upstream. +02/24/2022 04:10:02 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/24/2022 04:10:36 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + b1f5d23..0998bf2 floral-grass-11 -> floral-grass-11 + +02/24/2022 04:10:41 - INFO - codeparrot_training - Step 2000: {'lr': 0.0005, 'samples': 1024512, 'steps': 2000, 'loss/train': 2.800246477127075} +02/24/2022 04:10:44 - INFO - codeparrot_training - Step 2001: {'lr': 0.0004999999994645397, 'samples': 1025024, 'steps': 2001, 'loss/train': 0.9297313094139099} +02/24/2022 04:10:50 - INFO - codeparrot_training - Step 2002: {'lr': 0.0004999999978581587, 'samples': 1025536, 'steps': 2002, 'loss/train': 3.4139480590820312} +02/24/2022 04:10:53 - INFO - codeparrot_training - Step 2003: {'lr': 0.0004999999951808573, 'samples': 1026048, 'steps': 2003, 'loss/train': 1.5185656547546387} +02/24/2022 04:10:59 - INFO - codeparrot_training - Step 2004: {'lr': 0.0004999999914326351, 'samples': 1026560, 'steps': 2004, 'loss/train': 3.569647789001465} +02/24/2022 04:11:05 - INFO - codeparrot_training - Step 2005: {'lr': 0.0004999999866134924, 'samples': 1027072, 'steps': 2005, 'loss/train': 3.923659324645996} +02/24/2022 04:11:08 - INFO - codeparrot_training - Step 2006: {'lr': 0.0004999999807234292, 'samples': 1027584, 'steps': 2006, 'loss/train': 4.813257694244385} +02/24/2022 04:11:14 - INFO - codeparrot_training - Step 2007: {'lr': 0.0004999999737624453, 'samples': 1028096, 'steps': 2007, 'loss/train': 3.247771978378296} +02/24/2022 04:11:17 - INFO - codeparrot_training - Step 2008: {'lr': 0.0004999999657305411, 'samples': 1028608, 'steps': 2008, 'loss/train': 2.858778715133667} +02/24/2022 04:11:23 - INFO - codeparrot_training - Step 2009: {'lr': 0.0004999999566277163, 'samples': 1029120, 'steps': 2009, 'loss/train': 3.722787857055664} +02/24/2022 04:11:26 - INFO - codeparrot_training - Step 2010: {'lr': 0.0004999999464539711, 'samples': 1029632, 'steps': 2010, 'loss/train': 1.810246467590332} +02/24/2022 04:11:32 - INFO - codeparrot_training - Step 2011: {'lr': 0.0004999999352093055, 'samples': 1030144, 'steps': 2011, 'loss/train': 3.4813263416290283} +02/24/2022 04:11:35 - INFO - codeparrot_training - Step 2012: {'lr': 0.0004999999228937196, 'samples': 1030656, 'steps': 2012, 'loss/train': 3.003511667251587} +02/24/2022 04:11:41 - INFO - codeparrot_training - Step 2013: {'lr': 0.0004999999095072135, 'samples': 1031168, 'steps': 2013, 'loss/train': 3.13137149810791} +02/24/2022 04:11:44 - INFO - codeparrot_training - Step 2014: {'lr': 0.0004999998950497869, 'samples': 1031680, 'steps': 2014, 'loss/train': 2.7934157848358154} +02/24/2022 04:11:50 - INFO - codeparrot_training - Step 2015: {'lr': 0.0004999998795214404, 'samples': 1032192, 'steps': 2015, 'loss/train': 5.817224979400635} +02/24/2022 04:11:54 - INFO - codeparrot_training - Step 2016: {'lr': 0.0004999998629221736, 'samples': 1032704, 'steps': 2016, 'loss/train': 3.418705701828003} +02/24/2022 04:11:59 - INFO - codeparrot_training - Step 2017: {'lr': 0.0004999998452519869, 'samples': 1033216, 'steps': 2017, 'loss/train': 3.1369762420654297} +02/24/2022 04:12:03 - INFO - codeparrot_training - Step 2018: {'lr': 0.0004999998265108802, 'samples': 1033728, 'steps': 2018, 'loss/train': 3.6667239665985107} +02/24/2022 04:12:06 - INFO - codeparrot_training - Step 2019: {'lr': 0.0004999998066988537, 'samples': 1034240, 'steps': 2019, 'loss/train': 2.634143352508545} +02/24/2022 04:12:12 - INFO - codeparrot_training - Step 2020: {'lr': 0.0004999997858159073, 'samples': 1034752, 'steps': 2020, 'loss/train': 4.146056652069092} +02/24/2022 04:12:18 - INFO - codeparrot_training - Step 2021: {'lr': 0.0004999997638620412, 'samples': 1035264, 'steps': 2021, 'loss/train': 3.6164910793304443} +02/24/2022 04:12:21 - INFO - codeparrot_training - Step 2022: {'lr': 0.0004999997408372557, 'samples': 1035776, 'steps': 2022, 'loss/train': 4.4506402015686035} +02/24/2022 04:12:26 - INFO - codeparrot_training - Step 2023: {'lr': 0.0004999997167415504, 'samples': 1036288, 'steps': 2023, 'loss/train': 3.631222724914551} +02/24/2022 04:12:30 - INFO - codeparrot_training - Step 2024: {'lr': 0.0004999996915749259, 'samples': 1036800, 'steps': 2024, 'loss/train': 3.945448398590088} +02/24/2022 04:12:36 - INFO - codeparrot_training - Step 2025: {'lr': 0.0004999996653373821, 'samples': 1037312, 'steps': 2025, 'loss/train': 3.8391072750091553} +02/24/2022 04:12:39 - INFO - codeparrot_training - Step 2026: {'lr': 0.000499999638028919, 'samples': 1037824, 'steps': 2026, 'loss/train': 2.200242280960083} +02/24/2022 04:12:44 - INFO - codeparrot_training - Step 2027: {'lr': 0.0004999996096495369, 'samples': 1038336, 'steps': 2027, 'loss/train': 3.019376277923584} +02/24/2022 04:12:48 - INFO - codeparrot_training - Step 2028: {'lr': 0.0004999995801992359, 'samples': 1038848, 'steps': 2028, 'loss/train': 2.8726444244384766} +02/24/2022 04:12:53 - INFO - codeparrot_training - Step 2029: {'lr': 0.000499999549678016, 'samples': 1039360, 'steps': 2029, 'loss/train': 3.873802423477173} +02/24/2022 04:12:57 - INFO - codeparrot_training - Step 2030: {'lr': 0.0004999995180858774, 'samples': 1039872, 'steps': 2030, 'loss/train': 4.297247886657715} +02/24/2022 04:13:04 - INFO - codeparrot_training - Step 2031: {'lr': 0.0004999994854228203, 'samples': 1040384, 'steps': 2031, 'loss/train': 3.6109445095062256} +02/24/2022 04:13:07 - INFO - codeparrot_training - Step 2032: {'lr': 0.0004999994516888449, 'samples': 1040896, 'steps': 2032, 'loss/train': 5.221895694732666} +02/24/2022 04:13:13 - INFO - codeparrot_training - Step 2033: {'lr': 0.000499999416883951, 'samples': 1041408, 'steps': 2033, 'loss/train': 4.7753705978393555} +02/24/2022 04:13:16 - INFO - codeparrot_training - Step 2034: {'lr': 0.0004999993810081391, 'samples': 1041920, 'steps': 2034, 'loss/train': 3.140744686126709} +02/24/2022 04:13:22 - INFO - codeparrot_training - Step 2035: {'lr': 0.0004999993440614092, 'samples': 1042432, 'steps': 2035, 'loss/train': 3.2063350677490234} +02/24/2022 04:13:25 - INFO - codeparrot_training - Step 2036: {'lr': 0.0004999993060437616, 'samples': 1042944, 'steps': 2036, 'loss/train': 3.360121250152588} +02/24/2022 04:13:31 - INFO - codeparrot_training - Step 2037: {'lr': 0.0004999992669551962, 'samples': 1043456, 'steps': 2037, 'loss/train': 3.6778557300567627} +02/24/2022 04:13:34 - INFO - codeparrot_training - Step 2038: {'lr': 0.0004999992267957135, 'samples': 1043968, 'steps': 2038, 'loss/train': 3.849402666091919} +02/24/2022 04:13:40 - INFO - codeparrot_training - Step 2039: {'lr': 0.0004999991855653134, 'samples': 1044480, 'steps': 2039, 'loss/train': 3.1258656978607178} +02/24/2022 04:13:43 - INFO - codeparrot_training - Step 2040: {'lr': 0.0004999991432639963, 'samples': 1044992, 'steps': 2040, 'loss/train': 4.852616786956787} +02/24/2022 04:13:50 - INFO - codeparrot_training - Step 2041: {'lr': 0.0004999990998917621, 'samples': 1045504, 'steps': 2041, 'loss/train': 3.8511712551116943} +02/24/2022 04:13:53 - INFO - codeparrot_training - Step 2042: {'lr': 0.0004999990554486111, 'samples': 1046016, 'steps': 2042, 'loss/train': 4.233978748321533} +02/24/2022 04:13:59 - INFO - codeparrot_training - Step 2043: {'lr': 0.0004999990099345436, 'samples': 1046528, 'steps': 2043, 'loss/train': 3.5593159198760986} +02/24/2022 04:14:02 - INFO - codeparrot_training - Step 2044: {'lr': 0.0004999989633495597, 'samples': 1047040, 'steps': 2044, 'loss/train': 3.260753870010376} +02/24/2022 04:14:08 - INFO - codeparrot_training - Step 2045: {'lr': 0.0004999989156936597, 'samples': 1047552, 'steps': 2045, 'loss/train': 2.6524853706359863} +02/24/2022 04:14:12 - INFO - codeparrot_training - Step 2046: {'lr': 0.0004999988669668437, 'samples': 1048064, 'steps': 2046, 'loss/train': 3.4726810455322266} +02/24/2022 04:14:17 - INFO - codeparrot_training - Step 2047: {'lr': 0.0004999988171691119, 'samples': 1048576, 'steps': 2047, 'loss/train': 2.923299551010132} +02/24/2022 04:14:21 - INFO - codeparrot_training - Step 2048: {'lr': 0.0004999987663004646, 'samples': 1049088, 'steps': 2048, 'loss/train': 4.113481521606445} +02/24/2022 04:14:26 - INFO - codeparrot_training - Step 2049: {'lr': 0.0004999987143609019, 'samples': 1049600, 'steps': 2049, 'loss/train': 0.8320827484130859} +02/24/2022 04:14:30 - INFO - codeparrot_training - Step 2050: {'lr': 0.0004999986613504242, 'samples': 1050112, 'steps': 2050, 'loss/train': 4.12211275100708} +02/24/2022 04:14:36 - INFO - codeparrot_training - Step 2051: {'lr': 0.0004999986072690315, 'samples': 1050624, 'steps': 2051, 'loss/train': 4.0427093505859375} +02/24/2022 04:14:39 - INFO - codeparrot_training - Step 2052: {'lr': 0.0004999985521167242, 'samples': 1051136, 'steps': 2052, 'loss/train': 3.8508830070495605} +02/24/2022 04:14:45 - INFO - codeparrot_training - Step 2053: {'lr': 0.0004999984958935025, 'samples': 1051648, 'steps': 2053, 'loss/train': 3.516901969909668} +02/24/2022 04:14:48 - INFO - codeparrot_training - Step 2054: {'lr': 0.0004999984385993665, 'samples': 1052160, 'steps': 2054, 'loss/train': 3.1342296600341797} +02/24/2022 04:14:54 - INFO - codeparrot_training - Step 2055: {'lr': 0.0004999983802343168, 'samples': 1052672, 'steps': 2055, 'loss/train': 4.581167697906494} +02/24/2022 04:14:57 - INFO - codeparrot_training - Step 2056: {'lr': 0.0004999983207983532, 'samples': 1053184, 'steps': 2056, 'loss/train': 3.29864501953125} +02/24/2022 04:15:03 - INFO - codeparrot_training - Step 2057: {'lr': 0.0004999982602914763, 'samples': 1053696, 'steps': 2057, 'loss/train': 4.555438995361328} +02/24/2022 04:15:06 - INFO - codeparrot_training - Step 2058: {'lr': 0.0004999981987136862, 'samples': 1054208, 'steps': 2058, 'loss/train': 2.4252705574035645} +02/24/2022 04:15:12 - INFO - codeparrot_training - Step 2059: {'lr': 0.0004999981360649833, 'samples': 1054720, 'steps': 2059, 'loss/train': 4.049717426300049} +02/24/2022 04:15:15 - INFO - codeparrot_training - Step 2060: {'lr': 0.0004999980723453676, 'samples': 1055232, 'steps': 2060, 'loss/train': 3.7873756885528564} +02/24/2022 04:15:21 - INFO - codeparrot_training - Step 2061: {'lr': 0.0004999980075548397, 'samples': 1055744, 'steps': 2061, 'loss/train': 3.2403149604797363} +02/24/2022 04:15:24 - INFO - codeparrot_training - Step 2062: {'lr': 0.0004999979416933997, 'samples': 1056256, 'steps': 2062, 'loss/train': 4.091283321380615} +02/24/2022 04:15:31 - INFO - codeparrot_training - Step 2063: {'lr': 0.0004999978747610478, 'samples': 1056768, 'steps': 2063, 'loss/train': 3.3648386001586914} +02/24/2022 04:15:34 - INFO - codeparrot_training - Step 2064: {'lr': 0.0004999978067577843, 'samples': 1057280, 'steps': 2064, 'loss/train': 3.1449785232543945} +02/24/2022 04:15:40 - INFO - codeparrot_training - Step 2065: {'lr': 0.0004999977376836098, 'samples': 1057792, 'steps': 2065, 'loss/train': 2.325490951538086} +02/24/2022 04:15:43 - INFO - codeparrot_training - Step 2066: {'lr': 0.0004999976675385243, 'samples': 1058304, 'steps': 2066, 'loss/train': 3.4341890811920166} +02/24/2022 04:15:49 - INFO - codeparrot_training - Step 2067: {'lr': 0.0004999975963225282, 'samples': 1058816, 'steps': 2067, 'loss/train': 2.971423864364624} +02/24/2022 04:15:52 - INFO - codeparrot_training - Step 2068: {'lr': 0.0004999975240356217, 'samples': 1059328, 'steps': 2068, 'loss/train': 4.039858818054199} +02/24/2022 04:15:58 - INFO - codeparrot_training - Step 2069: {'lr': 0.0004999974506778053, 'samples': 1059840, 'steps': 2069, 'loss/train': 0.8792083859443665} +02/24/2022 04:16:01 - INFO - codeparrot_training - Step 2070: {'lr': 0.0004999973762490792, 'samples': 1060352, 'steps': 2070, 'loss/train': 2.9441537857055664} +02/24/2022 04:16:07 - INFO - codeparrot_training - Step 2071: {'lr': 0.0004999973007494436, 'samples': 1060864, 'steps': 2071, 'loss/train': 3.1561279296875} +02/24/2022 04:16:10 - INFO - codeparrot_training - Step 2072: {'lr': 0.000499997224178899, 'samples': 1061376, 'steps': 2072, 'loss/train': 5.323057651519775} +02/24/2022 04:16:16 - INFO - codeparrot_training - Step 2073: {'lr': 0.0004999971465374457, 'samples': 1061888, 'steps': 2073, 'loss/train': 3.414630651473999} +02/24/2022 04:16:19 - INFO - codeparrot_training - Step 2074: {'lr': 0.000499997067825084, 'samples': 1062400, 'steps': 2074, 'loss/train': 3.895528554916382} +02/24/2022 04:16:24 - INFO - codeparrot_training - Step 2075: {'lr': 0.0004999969880418142, 'samples': 1062912, 'steps': 2075, 'loss/train': 3.8276734352111816} +02/24/2022 04:16:28 - INFO - codeparrot_training - Step 2076: {'lr': 0.0004999969071876367, 'samples': 1063424, 'steps': 2076, 'loss/train': 3.8214621543884277} +02/24/2022 04:16:34 - INFO - codeparrot_training - Step 2077: {'lr': 0.0004999968252625519, 'samples': 1063936, 'steps': 2077, 'loss/train': 3.8029026985168457} +02/24/2022 04:16:38 - INFO - codeparrot_training - Step 2078: {'lr': 0.00049999674226656, 'samples': 1064448, 'steps': 2078, 'loss/train': 4.751585960388184} +02/24/2022 04:16:43 - INFO - codeparrot_training - Step 2079: {'lr': 0.0004999966581996616, 'samples': 1064960, 'steps': 2079, 'loss/train': 4.121493816375732} +02/24/2022 04:16:49 - INFO - codeparrot_training - Step 2080: {'lr': 0.0004999965730618567, 'samples': 1065472, 'steps': 2080, 'loss/train': 4.1601691246032715} +02/24/2022 04:16:52 - INFO - codeparrot_training - Step 2081: {'lr': 0.000499996486853146, 'samples': 1065984, 'steps': 2081, 'loss/train': 3.698397397994995} +02/24/2022 04:16:58 - INFO - codeparrot_training - Step 2082: {'lr': 0.0004999963995735296, 'samples': 1066496, 'steps': 2082, 'loss/train': 2.953812599182129} +02/24/2022 04:17:01 - INFO - codeparrot_training - Step 2083: {'lr': 0.0004999963112230081, 'samples': 1067008, 'steps': 2083, 'loss/train': 3.1523876190185547} +02/24/2022 04:17:05 - INFO - codeparrot_training - Step 2084: {'lr': 0.0004999962218015818, 'samples': 1067520, 'steps': 2084, 'loss/train': 1.5381730794906616} +02/24/2022 04:17:10 - INFO - codeparrot_training - Step 2085: {'lr': 0.0004999961313092511, 'samples': 1068032, 'steps': 2085, 'loss/train': 3.5067222118377686} +02/24/2022 04:17:16 - INFO - codeparrot_training - Step 2086: {'lr': 0.0004999960397460162, 'samples': 1068544, 'steps': 2086, 'loss/train': 1.5887590646743774} +02/24/2022 04:17:20 - INFO - codeparrot_training - Step 2087: {'lr': 0.0004999959471118778, 'samples': 1069056, 'steps': 2087, 'loss/train': 2.0652477741241455} +02/24/2022 04:17:25 - INFO - codeparrot_training - Step 2088: {'lr': 0.000499995853406836, 'samples': 1069568, 'steps': 2088, 'loss/train': 3.348379373550415} +02/24/2022 04:17:29 - INFO - codeparrot_training - Step 2089: {'lr': 0.0004999957586308914, 'samples': 1070080, 'steps': 2089, 'loss/train': 2.952920436859131} +02/24/2022 04:17:34 - INFO - codeparrot_training - Step 2090: {'lr': 0.0004999956627840445, 'samples': 1070592, 'steps': 2090, 'loss/train': 2.571131706237793} +02/24/2022 04:17:38 - INFO - codeparrot_training - Step 2091: {'lr': 0.0004999955658662954, 'samples': 1071104, 'steps': 2091, 'loss/train': 3.6060545444488525} +02/24/2022 04:17:43 - INFO - codeparrot_training - Step 2092: {'lr': 0.0004999954678776448, 'samples': 1071616, 'steps': 2092, 'loss/train': 3.0546963214874268} +02/24/2022 04:17:47 - INFO - codeparrot_training - Step 2093: {'lr': 0.0004999953688180929, 'samples': 1072128, 'steps': 2093, 'loss/train': 3.4562463760375977} +02/24/2022 04:17:52 - INFO - codeparrot_training - Step 2094: {'lr': 0.0004999952686876402, 'samples': 1072640, 'steps': 2094, 'loss/train': 3.625727653503418} +02/24/2022 04:17:56 - INFO - codeparrot_training - Step 2095: {'lr': 0.0004999951674862872, 'samples': 1073152, 'steps': 2095, 'loss/train': 1.858146071434021} +02/24/2022 04:18:02 - INFO - codeparrot_training - Step 2096: {'lr': 0.0004999950652140343, 'samples': 1073664, 'steps': 2096, 'loss/train': 4.236659526824951} +02/24/2022 04:18:05 - INFO - codeparrot_training - Step 2097: {'lr': 0.0004999949618708819, 'samples': 1074176, 'steps': 2097, 'loss/train': 3.1121630668640137} +02/24/2022 04:18:11 - INFO - codeparrot_training - Step 2098: {'lr': 0.0004999948574568305, 'samples': 1074688, 'steps': 2098, 'loss/train': 3.973454236984253} +02/24/2022 04:18:14 - INFO - codeparrot_training - Step 2099: {'lr': 0.0004999947519718805, 'samples': 1075200, 'steps': 2099, 'loss/train': 2.847280502319336} +02/24/2022 04:18:20 - INFO - codeparrot_training - Step 2100: {'lr': 0.0004999946454160324, 'samples': 1075712, 'steps': 2100, 'loss/train': 5.648211479187012} +02/24/2022 04:18:23 - INFO - codeparrot_training - Step 2101: {'lr': 0.0004999945377892865, 'samples': 1076224, 'steps': 2101, 'loss/train': 2.1025211811065674} +02/24/2022 04:18:29 - INFO - codeparrot_training - Step 2102: {'lr': 0.0004999944290916434, 'samples': 1076736, 'steps': 2102, 'loss/train': 3.2467496395111084} +02/24/2022 04:18:32 - INFO - codeparrot_training - Step 2103: {'lr': 0.0004999943193231037, 'samples': 1077248, 'steps': 2103, 'loss/train': 3.7901408672332764} +02/24/2022 04:18:38 - INFO - codeparrot_training - Step 2104: {'lr': 0.0004999942084836675, 'samples': 1077760, 'steps': 2104, 'loss/train': 1.9659900665283203} +02/24/2022 04:18:41 - INFO - codeparrot_training - Step 2105: {'lr': 0.0004999940965733356, 'samples': 1078272, 'steps': 2105, 'loss/train': 3.02752685546875} +02/24/2022 04:18:47 - INFO - codeparrot_training - Step 2106: {'lr': 0.0004999939835921085, 'samples': 1078784, 'steps': 2106, 'loss/train': 3.097346067428589} +02/24/2022 04:18:50 - INFO - codeparrot_training - Step 2107: {'lr': 0.0004999938695399864, 'samples': 1079296, 'steps': 2107, 'loss/train': 2.835118532180786} +02/24/2022 04:18:56 - INFO - codeparrot_training - Step 2108: {'lr': 0.00049999375441697, 'samples': 1079808, 'steps': 2108, 'loss/train': 3.388638496398926} +02/24/2022 04:18:59 - INFO - codeparrot_training - Step 2109: {'lr': 0.0004999936382230597, 'samples': 1080320, 'steps': 2109, 'loss/train': 4.3357648849487305} +02/24/2022 04:19:05 - INFO - codeparrot_training - Step 2110: {'lr': 0.000499993520958256, 'samples': 1080832, 'steps': 2110, 'loss/train': 3.3163487911224365} +02/24/2022 04:19:08 - INFO - codeparrot_training - Step 2111: {'lr': 0.0004999934026225595, 'samples': 1081344, 'steps': 2111, 'loss/train': 3.4119441509246826} +02/24/2022 04:19:15 - INFO - codeparrot_training - Step 2112: {'lr': 0.0004999932832159708, 'samples': 1081856, 'steps': 2112, 'loss/train': 2.135465621948242} +02/24/2022 04:19:18 - INFO - codeparrot_training - Step 2113: {'lr': 0.00049999316273849, 'samples': 1082368, 'steps': 2113, 'loss/train': 4.085206508636475} +02/24/2022 04:19:24 - INFO - codeparrot_training - Step 2114: {'lr': 0.0004999930411901181, 'samples': 1082880, 'steps': 2114, 'loss/train': 1.668155550956726} +02/24/2022 04:19:27 - INFO - codeparrot_training - Step 2115: {'lr': 0.0004999929185708551, 'samples': 1083392, 'steps': 2115, 'loss/train': 3.799992322921753} +02/24/2022 04:19:33 - INFO - codeparrot_training - Step 2116: {'lr': 0.000499992794880702, 'samples': 1083904, 'steps': 2116, 'loss/train': 4.510910511016846} +02/24/2022 04:19:36 - INFO - codeparrot_training - Step 2117: {'lr': 0.0004999926701196592, 'samples': 1084416, 'steps': 2117, 'loss/train': 2.6093909740448} +02/24/2022 04:19:42 - INFO - codeparrot_training - Step 2118: {'lr': 0.0004999925442877271, 'samples': 1084928, 'steps': 2118, 'loss/train': 5.3163018226623535} +02/24/2022 04:19:45 - INFO - codeparrot_training - Step 2119: {'lr': 0.0004999924173849063, 'samples': 1085440, 'steps': 2119, 'loss/train': 3.2637908458709717} +02/24/2022 04:19:51 - INFO - codeparrot_training - Step 2120: {'lr': 0.0004999922894111975, 'samples': 1085952, 'steps': 2120, 'loss/train': 0.513687014579773} +02/24/2022 04:19:54 - INFO - codeparrot_training - Step 2121: {'lr': 0.000499992160366601, 'samples': 1086464, 'steps': 2121, 'loss/train': 2.543294906616211} +02/24/2022 04:20:01 - INFO - codeparrot_training - Step 2122: {'lr': 0.0004999920302511175, 'samples': 1086976, 'steps': 2122, 'loss/train': 3.3987691402435303} +02/24/2022 04:20:04 - INFO - codeparrot_training - Step 2123: {'lr': 0.0004999918990647474, 'samples': 1087488, 'steps': 2123, 'loss/train': 4.51619291305542} +02/24/2022 04:20:09 - INFO - codeparrot_training - Step 2124: {'lr': 0.0004999917668074915, 'samples': 1088000, 'steps': 2124, 'loss/train': 1.8270645141601562} +02/24/2022 04:20:13 - INFO - codeparrot_training - Step 2125: {'lr': 0.0004999916334793503, 'samples': 1088512, 'steps': 2125, 'loss/train': 4.188164234161377} +02/24/2022 04:20:18 - INFO - codeparrot_training - Step 2126: {'lr': 0.0004999914990803242, 'samples': 1089024, 'steps': 2126, 'loss/train': 3.254211664199829} +02/24/2022 04:20:22 - INFO - codeparrot_training - Step 2127: {'lr': 0.000499991363610414, 'samples': 1089536, 'steps': 2127, 'loss/train': 3.083686351776123} +02/24/2022 04:20:27 - INFO - codeparrot_training - Step 2128: {'lr': 0.0004999912270696202, 'samples': 1090048, 'steps': 2128, 'loss/train': 4.00949239730835} +02/24/2022 04:20:31 - INFO - codeparrot_training - Step 2129: {'lr': 0.0004999910894579432, 'samples': 1090560, 'steps': 2129, 'loss/train': 2.9461610317230225} +02/24/2022 04:20:36 - INFO - codeparrot_training - Step 2130: {'lr': 0.000499990950775384, 'samples': 1091072, 'steps': 2130, 'loss/train': 3.2630045413970947} +02/24/2022 04:20:40 - INFO - codeparrot_training - Step 2131: {'lr': 0.0004999908110219428, 'samples': 1091584, 'steps': 2131, 'loss/train': 3.818962812423706} +02/24/2022 04:20:46 - INFO - codeparrot_training - Step 2132: {'lr': 0.0004999906701976203, 'samples': 1092096, 'steps': 2132, 'loss/train': 2.9355931282043457} +02/24/2022 04:20:49 - INFO - codeparrot_training - Step 2133: {'lr': 0.0004999905283024172, 'samples': 1092608, 'steps': 2133, 'loss/train': 4.199846267700195} +02/24/2022 04:20:55 - INFO - codeparrot_training - Step 2134: {'lr': 0.0004999903853363341, 'samples': 1093120, 'steps': 2134, 'loss/train': 4.1997551918029785} +02/24/2022 04:20:59 - INFO - codeparrot_training - Step 2135: {'lr': 0.0004999902412993715, 'samples': 1093632, 'steps': 2135, 'loss/train': 0.6705176830291748} +02/24/2022 04:21:04 - INFO - codeparrot_training - Step 2136: {'lr': 0.0004999900961915302, 'samples': 1094144, 'steps': 2136, 'loss/train': 3.620927572250366} +02/24/2022 04:21:08 - INFO - codeparrot_training - Step 2137: {'lr': 0.0004999899500128107, 'samples': 1094656, 'steps': 2137, 'loss/train': 5.4724955558776855} +02/24/2022 04:21:15 - INFO - codeparrot_training - Step 2138: {'lr': 0.0004999898027632135, 'samples': 1095168, 'steps': 2138, 'loss/train': 3.5491676330566406} +02/24/2022 04:21:19 - INFO - codeparrot_training - Step 2139: {'lr': 0.0004999896544427394, 'samples': 1095680, 'steps': 2139, 'loss/train': 2.685971736907959} +02/24/2022 04:21:24 - INFO - codeparrot_training - Step 2140: {'lr': 0.0004999895050513891, 'samples': 1096192, 'steps': 2140, 'loss/train': 3.04113507270813} +02/24/2022 04:21:28 - INFO - codeparrot_training - Step 2141: {'lr': 0.0004999893545891631, 'samples': 1096704, 'steps': 2141, 'loss/train': 3.4661216735839844} +02/24/2022 04:21:33 - INFO - codeparrot_training - Step 2142: {'lr': 0.000499989203056062, 'samples': 1097216, 'steps': 2142, 'loss/train': 4.2213544845581055} +02/24/2022 04:21:37 - INFO - codeparrot_training - Step 2143: {'lr': 0.0004999890504520866, 'samples': 1097728, 'steps': 2143, 'loss/train': 3.146622896194458} +02/24/2022 04:21:42 - INFO - codeparrot_training - Step 2144: {'lr': 0.0004999888967772375, 'samples': 1098240, 'steps': 2144, 'loss/train': 2.617624044418335} +02/24/2022 04:21:46 - INFO - codeparrot_training - Step 2145: {'lr': 0.0004999887420315153, 'samples': 1098752, 'steps': 2145, 'loss/train': 2.923344612121582} +02/24/2022 04:21:51 - INFO - codeparrot_training - Step 2146: {'lr': 0.0004999885862149207, 'samples': 1099264, 'steps': 2146, 'loss/train': 3.128354549407959} +02/24/2022 04:21:55 - INFO - codeparrot_training - Step 2147: {'lr': 0.0004999884293274545, 'samples': 1099776, 'steps': 2147, 'loss/train': 2.230713367462158} +02/24/2022 04:22:02 - INFO - codeparrot_training - Step 2148: {'lr': 0.0004999882713691171, 'samples': 1100288, 'steps': 2148, 'loss/train': 2.229236125946045} +02/24/2022 04:22:05 - INFO - codeparrot_training - Step 2149: {'lr': 0.0004999881123399093, 'samples': 1100800, 'steps': 2149, 'loss/train': 3.4961557388305664} +02/24/2022 04:22:11 - INFO - codeparrot_training - Step 2150: {'lr': 0.000499987952239832, 'samples': 1101312, 'steps': 2150, 'loss/train': 4.360259532928467} +02/24/2022 04:22:16 - INFO - codeparrot_training - Step 2151: {'lr': 0.0004999877910688856, 'samples': 1101824, 'steps': 2151, 'loss/train': 3.7019996643066406} +02/24/2022 04:22:20 - INFO - codeparrot_training - Step 2152: {'lr': 0.0004999876288270708, 'samples': 1102336, 'steps': 2152, 'loss/train': 1.0514161586761475} +02/24/2022 04:22:25 - INFO - codeparrot_training - Step 2153: {'lr': 0.0004999874655143886, 'samples': 1102848, 'steps': 2153, 'loss/train': 3.8178577423095703} +02/24/2022 04:22:29 - INFO - codeparrot_training - Step 2154: {'lr': 0.0004999873011308393, 'samples': 1103360, 'steps': 2154, 'loss/train': 4.491191387176514} +02/24/2022 04:22:34 - INFO - codeparrot_training - Step 2155: {'lr': 0.0004999871356764238, 'samples': 1103872, 'steps': 2155, 'loss/train': 3.949737787246704} +02/24/2022 04:22:38 - INFO - codeparrot_training - Step 2156: {'lr': 0.0004999869691511428, 'samples': 1104384, 'steps': 2156, 'loss/train': 4.077169418334961} +02/24/2022 04:22:45 - INFO - codeparrot_training - Step 2157: {'lr': 0.000499986801554997, 'samples': 1104896, 'steps': 2157, 'loss/train': 3.050219774246216} +02/24/2022 04:22:49 - INFO - codeparrot_training - Step 2158: {'lr': 0.0004999866328879871, 'samples': 1105408, 'steps': 2158, 'loss/train': 5.032126426696777} +02/24/2022 04:22:54 - INFO - codeparrot_training - Step 2159: {'lr': 0.0004999864631501139, 'samples': 1105920, 'steps': 2159, 'loss/train': 2.369687795639038} +02/24/2022 04:22:58 - INFO - codeparrot_training - Step 2160: {'lr': 0.000499986292341378, 'samples': 1106432, 'steps': 2160, 'loss/train': 3.0985145568847656} +02/24/2022 04:23:03 - INFO - codeparrot_training - Step 2161: {'lr': 0.0004999861204617803, 'samples': 1106944, 'steps': 2161, 'loss/train': 2.0317800045013428} +02/24/2022 04:23:07 - INFO - codeparrot_training - Step 2162: {'lr': 0.0004999859475113213, 'samples': 1107456, 'steps': 2162, 'loss/train': 1.1256765127182007} +02/24/2022 04:23:12 - INFO - codeparrot_training - Step 2163: {'lr': 0.0004999857734900021, 'samples': 1107968, 'steps': 2163, 'loss/train': 3.6377909183502197} +02/24/2022 04:23:16 - INFO - codeparrot_training - Step 2164: {'lr': 0.000499985598397823, 'samples': 1108480, 'steps': 2164, 'loss/train': 3.1779940128326416} +02/24/2022 04:23:21 - INFO - codeparrot_training - Step 2165: {'lr': 0.0004999854222347851, 'samples': 1108992, 'steps': 2165, 'loss/train': 4.294700622558594} +02/24/2022 04:23:25 - INFO - codeparrot_training - Step 2166: {'lr': 0.000499985245000889, 'samples': 1109504, 'steps': 2166, 'loss/train': 3.5069031715393066} +02/24/2022 04:23:30 - INFO - codeparrot_training - Step 2167: {'lr': 0.0004999850666961355, 'samples': 1110016, 'steps': 2167, 'loss/train': 2.9456210136413574} +02/24/2022 04:23:34 - INFO - codeparrot_training - Step 2168: {'lr': 0.0004999848873205254, 'samples': 1110528, 'steps': 2168, 'loss/train': 3.3173673152923584} +02/24/2022 04:23:37 - INFO - codeparrot_training - Step 2169: {'lr': 0.0004999847068740593, 'samples': 1111040, 'steps': 2169, 'loss/train': 1.4148213863372803} +02/24/2022 04:23:45 - INFO - codeparrot_training - Step 2170: {'lr': 0.0004999845253567382, 'samples': 1111552, 'steps': 2170, 'loss/train': 2.3650436401367188} +02/24/2022 04:23:48 - INFO - codeparrot_training - Step 2171: {'lr': 0.0004999843427685627, 'samples': 1112064, 'steps': 2171, 'loss/train': 3.2185676097869873} +02/24/2022 04:23:54 - INFO - codeparrot_training - Step 2172: {'lr': 0.0004999841591095337, 'samples': 1112576, 'steps': 2172, 'loss/train': 2.8058295249938965} +02/24/2022 04:23:57 - INFO - codeparrot_training - Step 2173: {'lr': 0.0004999839743796519, 'samples': 1113088, 'steps': 2173, 'loss/train': 1.9911302328109741} +02/24/2022 04:24:03 - INFO - codeparrot_training - Step 2174: {'lr': 0.0004999837885789182, 'samples': 1113600, 'steps': 2174, 'loss/train': 3.1840808391571045} +02/24/2022 04:24:06 - INFO - codeparrot_training - Step 2175: {'lr': 0.0004999836017073332, 'samples': 1114112, 'steps': 2175, 'loss/train': 3.3054006099700928} +02/24/2022 04:24:12 - INFO - codeparrot_training - Step 2176: {'lr': 0.000499983413764898, 'samples': 1114624, 'steps': 2176, 'loss/train': 3.170403480529785} +02/24/2022 04:24:17 - INFO - codeparrot_training - Step 2177: {'lr': 0.0004999832247516132, 'samples': 1115136, 'steps': 2177, 'loss/train': 3.2368595600128174} +02/24/2022 04:24:20 - INFO - codeparrot_training - Step 2178: {'lr': 0.0004999830346674796, 'samples': 1115648, 'steps': 2178, 'loss/train': 3.468134880065918} +02/24/2022 04:24:28 - INFO - codeparrot_training - Step 2179: {'lr': 0.000499982843512498, 'samples': 1116160, 'steps': 2179, 'loss/train': 3.5260887145996094} +02/24/2022 04:24:31 - INFO - codeparrot_training - Step 2180: {'lr': 0.0004999826512866693, 'samples': 1116672, 'steps': 2180, 'loss/train': 3.104518413543701} +02/24/2022 04:24:37 - INFO - codeparrot_training - Step 2181: {'lr': 0.0004999824579899944, 'samples': 1117184, 'steps': 2181, 'loss/train': 2.4731578826904297} +02/24/2022 04:24:40 - INFO - codeparrot_training - Step 2182: {'lr': 0.000499982263622474, 'samples': 1117696, 'steps': 2182, 'loss/train': 3.185030698776245} +02/24/2022 04:24:44 - INFO - codeparrot_training - Step 2183: {'lr': 0.0004999820681841088, 'samples': 1118208, 'steps': 2183, 'loss/train': 3.0085761547088623} +02/24/2022 04:24:49 - INFO - codeparrot_training - Step 2184: {'lr': 0.0004999818716748999, 'samples': 1118720, 'steps': 2184, 'loss/train': 3.471605062484741} +02/24/2022 04:24:55 - INFO - codeparrot_training - Step 2185: {'lr': 0.0004999816740948481, 'samples': 1119232, 'steps': 2185, 'loss/train': 3.210179328918457} +02/24/2022 04:24:58 - INFO - codeparrot_training - Step 2186: {'lr': 0.0004999814754439542, 'samples': 1119744, 'steps': 2186, 'loss/train': 4.003715515136719} +02/24/2022 04:25:04 - INFO - codeparrot_training - Step 2187: {'lr': 0.000499981275722219, 'samples': 1120256, 'steps': 2187, 'loss/train': 4.127617835998535} +02/24/2022 04:25:07 - INFO - codeparrot_training - Step 2188: {'lr': 0.0004999810749296434, 'samples': 1120768, 'steps': 2188, 'loss/train': 4.164938926696777} +02/24/2022 04:25:13 - INFO - codeparrot_training - Step 2189: {'lr': 0.0004999808730662282, 'samples': 1121280, 'steps': 2189, 'loss/train': 2.5026516914367676} +02/24/2022 04:25:16 - INFO - codeparrot_training - Step 2190: {'lr': 0.0004999806701319743, 'samples': 1121792, 'steps': 2190, 'loss/train': 2.7306406497955322} +02/24/2022 04:25:22 - INFO - codeparrot_training - Step 2191: {'lr': 0.0004999804661268827, 'samples': 1122304, 'steps': 2191, 'loss/train': 2.1373531818389893} +02/24/2022 04:25:25 - INFO - codeparrot_training - Step 2192: {'lr': 0.0004999802610509541, 'samples': 1122816, 'steps': 2192, 'loss/train': 3.4641823768615723} +02/24/2022 04:25:30 - INFO - codeparrot_training - Step 2193: {'lr': 0.0004999800549041894, 'samples': 1123328, 'steps': 2193, 'loss/train': 4.5540995597839355} +02/24/2022 04:25:34 - INFO - codeparrot_training - Step 2194: {'lr': 0.0004999798476865895, 'samples': 1123840, 'steps': 2194, 'loss/train': 3.797067165374756} +02/24/2022 04:25:41 - INFO - codeparrot_training - Step 2195: {'lr': 0.0004999796393981554, 'samples': 1124352, 'steps': 2195, 'loss/train': 3.8183090686798096} +02/24/2022 04:25:45 - INFO - codeparrot_training - Step 2196: {'lr': 0.0004999794300388879, 'samples': 1124864, 'steps': 2196, 'loss/train': 4.2558369636535645} +02/24/2022 04:25:50 - INFO - codeparrot_training - Step 2197: {'lr': 0.0004999792196087879, 'samples': 1125376, 'steps': 2197, 'loss/train': 4.3991289138793945} +02/24/2022 04:25:54 - INFO - codeparrot_training - Step 2198: {'lr': 0.0004999790081078562, 'samples': 1125888, 'steps': 2198, 'loss/train': 2.6684980392456055} +02/24/2022 04:26:00 - INFO - codeparrot_training - Step 2199: {'lr': 0.0004999787955360939, 'samples': 1126400, 'steps': 2199, 'loss/train': 6.624790668487549} +02/24/2022 04:26:03 - INFO - codeparrot_training - Step 2200: {'lr': 0.0004999785818935018, 'samples': 1126912, 'steps': 2200, 'loss/train': 2.3392221927642822} +02/24/2022 04:26:09 - INFO - codeparrot_training - Step 2201: {'lr': 0.0004999783671800808, 'samples': 1127424, 'steps': 2201, 'loss/train': 3.1122958660125732} +02/24/2022 04:26:12 - INFO - codeparrot_training - Step 2202: {'lr': 0.0004999781513958318, 'samples': 1127936, 'steps': 2202, 'loss/train': 3.565094470977783} +02/24/2022 04:26:17 - INFO - codeparrot_training - Step 2203: {'lr': 0.000499977934540756, 'samples': 1128448, 'steps': 2203, 'loss/train': 3.473228931427002} +02/24/2022 04:26:21 - INFO - codeparrot_training - Step 2204: {'lr': 0.0004999777166148539, 'samples': 1128960, 'steps': 2204, 'loss/train': 2.5281777381896973} +02/24/2022 04:26:28 - INFO - codeparrot_training - Step 2205: {'lr': 0.0004999774976181267, 'samples': 1129472, 'steps': 2205, 'loss/train': 3.73065185546875} +02/24/2022 04:26:32 - INFO - codeparrot_training - Step 2206: {'lr': 0.0004999772775505753, 'samples': 1129984, 'steps': 2206, 'loss/train': 3.899561882019043} +02/24/2022 04:26:37 - INFO - codeparrot_training - Step 2207: {'lr': 0.0004999770564122005, 'samples': 1130496, 'steps': 2207, 'loss/train': 3.804422616958618} +02/24/2022 04:26:41 - INFO - codeparrot_training - Step 2208: {'lr': 0.0004999768342030035, 'samples': 1131008, 'steps': 2208, 'loss/train': 4.123617172241211} +02/24/2022 04:26:46 - INFO - codeparrot_training - Step 2209: {'lr': 0.0004999766109229851, 'samples': 1131520, 'steps': 2209, 'loss/train': 3.929154634475708} +02/24/2022 04:26:49 - INFO - codeparrot_training - Step 2210: {'lr': 0.0004999763865721463, 'samples': 1132032, 'steps': 2210, 'loss/train': 4.022369861602783} +02/24/2022 04:26:55 - INFO - codeparrot_training - Step 2211: {'lr': 0.000499976161150488, 'samples': 1132544, 'steps': 2211, 'loss/train': 3.633319139480591} +02/24/2022 04:26:58 - INFO - codeparrot_training - Step 2212: {'lr': 0.0004999759346580111, 'samples': 1133056, 'steps': 2212, 'loss/train': 4.5179572105407715} +02/24/2022 04:27:04 - INFO - codeparrot_training - Step 2213: {'lr': 0.0004999757070947168, 'samples': 1133568, 'steps': 2213, 'loss/train': 3.564769744873047} +02/24/2022 04:27:07 - INFO - codeparrot_training - Step 2214: {'lr': 0.0004999754784606058, 'samples': 1134080, 'steps': 2214, 'loss/train': 3.7344629764556885} +02/24/2022 04:27:15 - INFO - codeparrot_training - Step 2215: {'lr': 0.0004999752487556794, 'samples': 1134592, 'steps': 2215, 'loss/train': 2.515857458114624} +02/24/2022 04:27:18 - INFO - codeparrot_training - Step 2216: {'lr': 0.0004999750179799383, 'samples': 1135104, 'steps': 2216, 'loss/train': 2.132930278778076} +02/24/2022 04:27:24 - INFO - codeparrot_training - Step 2217: {'lr': 0.0004999747861333838, 'samples': 1135616, 'steps': 2217, 'loss/train': 2.356292963027954} +02/24/2022 04:27:27 - INFO - codeparrot_training - Step 2218: {'lr': 0.0004999745532160164, 'samples': 1136128, 'steps': 2218, 'loss/train': 3.3841753005981445} +02/24/2022 04:27:33 - INFO - codeparrot_training - Step 2219: {'lr': 0.0004999743192278377, 'samples': 1136640, 'steps': 2219, 'loss/train': 1.976936936378479} +02/24/2022 04:27:36 - INFO - codeparrot_training - Step 2220: {'lr': 0.0004999740841688481, 'samples': 1137152, 'steps': 2220, 'loss/train': 2.2595696449279785} +02/24/2022 04:27:42 - INFO - codeparrot_training - Step 2221: {'lr': 0.000499973848039049, 'samples': 1137664, 'steps': 2221, 'loss/train': 3.168776750564575} +02/24/2022 04:27:45 - INFO - codeparrot_training - Step 2222: {'lr': 0.0004999736108384414, 'samples': 1138176, 'steps': 2222, 'loss/train': 3.64705491065979} +02/24/2022 04:27:51 - INFO - codeparrot_training - Step 2223: {'lr': 0.0004999733725670261, 'samples': 1138688, 'steps': 2223, 'loss/train': 1.1466491222381592} +02/24/2022 04:27:54 - INFO - codeparrot_training - Step 2224: {'lr': 0.0004999731332248044, 'samples': 1139200, 'steps': 2224, 'loss/train': 2.6170737743377686} +02/24/2022 04:28:01 - INFO - codeparrot_training - Step 2225: {'lr': 0.0004999728928117771, 'samples': 1139712, 'steps': 2225, 'loss/train': 2.871457576751709} +02/24/2022 04:28:05 - INFO - codeparrot_training - Step 2226: {'lr': 0.0004999726513279452, 'samples': 1140224, 'steps': 2226, 'loss/train': 4.05280065536499} +02/24/2022 04:28:10 - INFO - codeparrot_training - Step 2227: {'lr': 0.0004999724087733099, 'samples': 1140736, 'steps': 2227, 'loss/train': 3.5515193939208984} +02/24/2022 04:28:14 - INFO - codeparrot_training - Step 2228: {'lr': 0.0004999721651478723, 'samples': 1141248, 'steps': 2228, 'loss/train': 2.5241708755493164} +02/24/2022 04:28:19 - INFO - codeparrot_training - Step 2229: {'lr': 0.0004999719204516332, 'samples': 1141760, 'steps': 2229, 'loss/train': 3.4813103675842285} +02/24/2022 04:28:22 - INFO - codeparrot_training - Step 2230: {'lr': 0.0004999716746845937, 'samples': 1142272, 'steps': 2230, 'loss/train': 4.263186454772949} +02/24/2022 04:28:28 - INFO - codeparrot_training - Step 2231: {'lr': 0.0004999714278467551, 'samples': 1142784, 'steps': 2231, 'loss/train': 3.2247517108917236} +02/24/2022 04:28:33 - INFO - codeparrot_training - Step 2232: {'lr': 0.0004999711799381181, 'samples': 1143296, 'steps': 2232, 'loss/train': 4.176067352294922} +02/24/2022 04:28:37 - INFO - codeparrot_training - Step 2233: {'lr': 0.000499970930958684, 'samples': 1143808, 'steps': 2233, 'loss/train': 4.0900115966796875} +02/24/2022 04:28:44 - INFO - codeparrot_training - Step 2234: {'lr': 0.0004999706809084538, 'samples': 1144320, 'steps': 2234, 'loss/train': 2.9840047359466553} +02/24/2022 04:28:48 - INFO - codeparrot_training - Step 2235: {'lr': 0.0004999704297874287, 'samples': 1144832, 'steps': 2235, 'loss/train': 4.0390167236328125} +02/24/2022 04:28:53 - INFO - codeparrot_training - Step 2236: {'lr': 0.0004999701775956095, 'samples': 1145344, 'steps': 2236, 'loss/train': 2.902132749557495} +02/24/2022 04:28:57 - INFO - codeparrot_training - Step 2237: {'lr': 0.0004999699243329975, 'samples': 1145856, 'steps': 2237, 'loss/train': 3.204497814178467} +02/24/2022 04:29:02 - INFO - codeparrot_training - Step 2238: {'lr': 0.0004999696699995937, 'samples': 1146368, 'steps': 2238, 'loss/train': 0.8694908618927002} +02/24/2022 04:29:06 - INFO - codeparrot_training - Step 2239: {'lr': 0.0004999694145953992, 'samples': 1146880, 'steps': 2239, 'loss/train': 3.2490906715393066} +02/24/2022 04:29:11 - INFO - codeparrot_training - Step 2240: {'lr': 0.0004999691581204152, 'samples': 1147392, 'steps': 2240, 'loss/train': 3.2772016525268555} +02/24/2022 04:29:15 - INFO - codeparrot_training - Step 2241: {'lr': 0.0004999689005746426, 'samples': 1147904, 'steps': 2241, 'loss/train': 2.77553129196167} +02/24/2022 04:29:20 - INFO - codeparrot_training - Step 2242: {'lr': 0.0004999686419580827, 'samples': 1148416, 'steps': 2242, 'loss/train': 2.762347459793091} +02/24/2022 04:29:24 - INFO - codeparrot_training - Step 2243: {'lr': 0.0004999683822707364, 'samples': 1148928, 'steps': 2243, 'loss/train': 4.271193504333496} +02/24/2022 04:29:27 - INFO - codeparrot_training - Step 2244: {'lr': 0.0004999681215126049, 'samples': 1149440, 'steps': 2244, 'loss/train': 4.404847621917725} +02/24/2022 04:29:33 - INFO - codeparrot_training - Step 2245: {'lr': 0.0004999678596836894, 'samples': 1149952, 'steps': 2245, 'loss/train': 2.916435480117798} +02/24/2022 04:29:36 - INFO - codeparrot_training - Step 2246: {'lr': 0.000499967596783991, 'samples': 1150464, 'steps': 2246, 'loss/train': 4.588435649871826} +02/24/2022 04:29:42 - INFO - codeparrot_training - Step 2247: {'lr': 0.0004999673328135107, 'samples': 1150976, 'steps': 2247, 'loss/train': 4.4290642738342285} +02/24/2022 04:29:45 - INFO - codeparrot_training - Step 2248: {'lr': 0.0004999670677722498, 'samples': 1151488, 'steps': 2248, 'loss/train': 3.213757038116455} +02/24/2022 04:29:51 - INFO - codeparrot_training - Step 2249: {'lr': 0.0004999668016602094, 'samples': 1152000, 'steps': 2249, 'loss/train': 3.557638645172119} +02/24/2022 04:29:54 - INFO - codeparrot_training - Step 2250: {'lr': 0.0004999665344773905, 'samples': 1152512, 'steps': 2250, 'loss/train': 2.462035894393921} +02/24/2022 04:30:02 - INFO - codeparrot_training - Step 2251: {'lr': 0.0004999662662237943, 'samples': 1153024, 'steps': 2251, 'loss/train': 3.8249166011810303} +02/24/2022 04:30:05 - INFO - codeparrot_training - Step 2252: {'lr': 0.0004999659968994221, 'samples': 1153536, 'steps': 2252, 'loss/train': 3.099893569946289} +02/24/2022 04:30:11 - INFO - codeparrot_training - Step 2253: {'lr': 0.0004999657265042748, 'samples': 1154048, 'steps': 2253, 'loss/train': 3.291548013687134} +02/24/2022 04:30:14 - INFO - codeparrot_training - Step 2254: {'lr': 0.0004999654550383539, 'samples': 1154560, 'steps': 2254, 'loss/train': 2.822593927383423} +02/24/2022 04:30:20 - INFO - codeparrot_training - Step 2255: {'lr': 0.0004999651825016603, 'samples': 1155072, 'steps': 2255, 'loss/train': 4.550164222717285} +02/24/2022 04:30:25 - INFO - codeparrot_training - Step 2256: {'lr': 0.0004999649088941951, 'samples': 1155584, 'steps': 2256, 'loss/train': 3.122103452682495} +02/24/2022 04:30:29 - INFO - codeparrot_training - Step 2257: {'lr': 0.0004999646342159597, 'samples': 1156096, 'steps': 2257, 'loss/train': 2.8963000774383545} +02/24/2022 04:30:34 - INFO - codeparrot_training - Step 2258: {'lr': 0.0004999643584669552, 'samples': 1156608, 'steps': 2258, 'loss/train': 3.3787777423858643} +02/24/2022 04:30:38 - INFO - codeparrot_training - Step 2259: {'lr': 0.0004999640816471827, 'samples': 1157120, 'steps': 2259, 'loss/train': 3.468085289001465} +02/24/2022 04:30:43 - INFO - codeparrot_training - Step 2260: {'lr': 0.0004999638037566436, 'samples': 1157632, 'steps': 2260, 'loss/train': 3.627345323562622} +02/24/2022 04:30:46 - INFO - codeparrot_training - Step 2261: {'lr': 0.0004999635247953387, 'samples': 1158144, 'steps': 2261, 'loss/train': 4.028489112854004} +02/24/2022 04:30:54 - INFO - codeparrot_training - Step 2262: {'lr': 0.0004999632447632696, 'samples': 1158656, 'steps': 2262, 'loss/train': 2.716365337371826} +02/24/2022 04:30:57 - INFO - codeparrot_training - Step 2263: {'lr': 0.0004999629636604372, 'samples': 1159168, 'steps': 2263, 'loss/train': 3.508540630340576} +02/24/2022 04:31:03 - INFO - codeparrot_training - Step 2264: {'lr': 0.0004999626814868429, 'samples': 1159680, 'steps': 2264, 'loss/train': 3.9661898612976074} +02/24/2022 04:31:06 - INFO - codeparrot_training - Step 2265: {'lr': 0.0004999623982424879, 'samples': 1160192, 'steps': 2265, 'loss/train': 2.874356985092163} +02/24/2022 04:31:12 - INFO - codeparrot_training - Step 2266: {'lr': 0.0004999621139273733, 'samples': 1160704, 'steps': 2266, 'loss/train': 4.915088653564453} +02/24/2022 04:31:15 - INFO - codeparrot_training - Step 2267: {'lr': 0.0004999618285415004, 'samples': 1161216, 'steps': 2267, 'loss/train': 3.3333630561828613} +02/24/2022 04:31:21 - INFO - codeparrot_training - Step 2268: {'lr': 0.0004999615420848704, 'samples': 1161728, 'steps': 2268, 'loss/train': 3.9082980155944824} +02/24/2022 04:31:24 - INFO - codeparrot_training - Step 2269: {'lr': 0.0004999612545574845, 'samples': 1162240, 'steps': 2269, 'loss/train': 3.7582783699035645} +02/24/2022 04:31:31 - INFO - codeparrot_training - Step 2270: {'lr': 0.000499960965959344, 'samples': 1162752, 'steps': 2270, 'loss/train': 3.778946876525879} +02/24/2022 04:31:35 - INFO - codeparrot_training - Step 2271: {'lr': 0.0004999606762904501, 'samples': 1163264, 'steps': 2271, 'loss/train': 4.916256904602051} +02/24/2022 04:31:40 - INFO - codeparrot_training - Step 2272: {'lr': 0.000499960385550804, 'samples': 1163776, 'steps': 2272, 'loss/train': 2.7035017013549805} +02/24/2022 04:31:44 - INFO - codeparrot_training - Step 2273: {'lr': 0.000499960093740407, 'samples': 1164288, 'steps': 2273, 'loss/train': 2.951904535293579} +02/24/2022 04:31:49 - INFO - codeparrot_training - Step 2274: {'lr': 0.0004999598008592603, 'samples': 1164800, 'steps': 2274, 'loss/train': 3.863191843032837} +02/24/2022 04:31:53 - INFO - codeparrot_training - Step 2275: {'lr': 0.0004999595069073653, 'samples': 1165312, 'steps': 2275, 'loss/train': 3.578238010406494} +02/24/2022 04:31:58 - INFO - codeparrot_training - Step 2276: {'lr': 0.0004999592118847229, 'samples': 1165824, 'steps': 2276, 'loss/train': 3.3003287315368652} +02/24/2022 04:32:02 - INFO - codeparrot_training - Step 2277: {'lr': 0.0004999589157913348, 'samples': 1166336, 'steps': 2277, 'loss/train': 3.5974996089935303} +02/24/2022 04:32:07 - INFO - codeparrot_training - Step 2278: {'lr': 0.0004999586186272021, 'samples': 1166848, 'steps': 2278, 'loss/train': 3.835594654083252} +02/24/2022 04:32:11 - INFO - codeparrot_training - Step 2279: {'lr': 0.000499958320392326, 'samples': 1167360, 'steps': 2279, 'loss/train': 3.9866204261779785} +02/24/2022 04:32:16 - INFO - codeparrot_training - Step 2280: {'lr': 0.0004999580210867077, 'samples': 1167872, 'steps': 2280, 'loss/train': 2.536888599395752} +02/24/2022 04:32:20 - INFO - codeparrot_training - Step 2281: {'lr': 0.0004999577207103487, 'samples': 1168384, 'steps': 2281, 'loss/train': 4.099573135375977} +02/24/2022 04:32:25 - INFO - codeparrot_training - Step 2282: {'lr': 0.0004999574192632502, 'samples': 1168896, 'steps': 2282, 'loss/train': 2.6548068523406982} +02/24/2022 04:32:29 - INFO - codeparrot_training - Step 2283: {'lr': 0.0004999571167454135, 'samples': 1169408, 'steps': 2283, 'loss/train': 3.8463265895843506} +02/24/2022 04:32:34 - INFO - codeparrot_training - Step 2284: {'lr': 0.0004999568131568399, 'samples': 1169920, 'steps': 2284, 'loss/train': 0.6082117557525635} +02/24/2022 04:32:38 - INFO - codeparrot_training - Step 2285: {'lr': 0.0004999565084975306, 'samples': 1170432, 'steps': 2285, 'loss/train': 3.158193826675415} +02/24/2022 04:32:45 - INFO - codeparrot_training - Step 2286: {'lr': 0.0004999562027674871, 'samples': 1170944, 'steps': 2286, 'loss/train': 4.124403476715088} +02/24/2022 04:32:48 - INFO - codeparrot_training - Step 2287: {'lr': 0.0004999558959667105, 'samples': 1171456, 'steps': 2287, 'loss/train': 1.7018682956695557} +02/24/2022 04:32:54 - INFO - codeparrot_training - Step 2288: {'lr': 0.0004999555880952023, 'samples': 1171968, 'steps': 2288, 'loss/train': 4.156793117523193} +02/24/2022 04:32:57 - INFO - codeparrot_training - Step 2289: {'lr': 0.0004999552791529637, 'samples': 1172480, 'steps': 2289, 'loss/train': 2.108534812927246} +02/24/2022 04:33:03 - INFO - codeparrot_training - Step 2290: {'lr': 0.000499954969139996, 'samples': 1172992, 'steps': 2290, 'loss/train': 3.1388399600982666} +02/24/2022 04:33:06 - INFO - codeparrot_training - Step 2291: {'lr': 0.0004999546580563006, 'samples': 1173504, 'steps': 2291, 'loss/train': 4.162036418914795} +02/24/2022 04:33:12 - INFO - codeparrot_training - Step 2292: {'lr': 0.0004999543459018788, 'samples': 1174016, 'steps': 2292, 'loss/train': 3.205673933029175} +02/24/2022 04:33:15 - INFO - codeparrot_training - Step 2293: {'lr': 0.000499954032676732, 'samples': 1174528, 'steps': 2293, 'loss/train': 4.420656204223633} +02/24/2022 04:33:21 - INFO - codeparrot_training - Step 2294: {'lr': 0.0004999537183808614, 'samples': 1175040, 'steps': 2294, 'loss/train': 2.455386161804199} +02/24/2022 04:33:24 - INFO - codeparrot_training - Step 2295: {'lr': 0.0004999534030142686, 'samples': 1175552, 'steps': 2295, 'loss/train': 0.4072885811328888} +02/24/2022 04:33:32 - INFO - codeparrot_training - Step 2296: {'lr': 0.0004999530865769547, 'samples': 1176064, 'steps': 2296, 'loss/train': 2.254129409790039} +02/24/2022 04:33:35 - INFO - codeparrot_training - Step 2297: {'lr': 0.0004999527690689212, 'samples': 1176576, 'steps': 2297, 'loss/train': 3.1453521251678467} +02/24/2022 04:33:41 - INFO - codeparrot_training - Step 2298: {'lr': 0.0004999524504901694, 'samples': 1177088, 'steps': 2298, 'loss/train': 2.589794635772705} +02/24/2022 04:33:44 - INFO - codeparrot_training - Step 2299: {'lr': 0.0004999521308407006, 'samples': 1177600, 'steps': 2299, 'loss/train': 3.344181537628174} +02/24/2022 04:33:50 - INFO - codeparrot_training - Step 2300: {'lr': 0.0004999518101205162, 'samples': 1178112, 'steps': 2300, 'loss/train': 4.422452926635742} +02/24/2022 04:33:53 - INFO - codeparrot_training - Step 2301: {'lr': 0.0004999514883296176, 'samples': 1178624, 'steps': 2301, 'loss/train': 1.787958025932312} +02/24/2022 04:33:59 - INFO - codeparrot_training - Step 2302: {'lr': 0.0004999511654680064, 'samples': 1179136, 'steps': 2302, 'loss/train': 2.1183021068573} +02/24/2022 04:34:02 - INFO - codeparrot_training - Step 2303: {'lr': 0.0004999508415356836, 'samples': 1179648, 'steps': 2303, 'loss/train': 3.060662269592285} +02/24/2022 04:34:08 - INFO - codeparrot_training - Step 2304: {'lr': 0.0004999505165326509, 'samples': 1180160, 'steps': 2304, 'loss/train': 2.1927576065063477} +02/24/2022 04:34:11 - INFO - codeparrot_training - Step 2305: {'lr': 0.0004999501904589095, 'samples': 1180672, 'steps': 2305, 'loss/train': 2.572749137878418} +02/24/2022 04:34:18 - INFO - codeparrot_training - Step 2306: {'lr': 0.0004999498633144608, 'samples': 1181184, 'steps': 2306, 'loss/train': 2.5980634689331055} +02/24/2022 04:34:21 - INFO - codeparrot_training - Step 2307: {'lr': 0.0004999495350993062, 'samples': 1181696, 'steps': 2307, 'loss/train': 3.2687549591064453} +02/24/2022 04:34:27 - INFO - codeparrot_training - Step 2308: {'lr': 0.0004999492058134473, 'samples': 1182208, 'steps': 2308, 'loss/train': 5.166488170623779} +02/24/2022 04:34:30 - INFO - codeparrot_training - Step 2309: {'lr': 0.0004999488754568853, 'samples': 1182720, 'steps': 2309, 'loss/train': 3.4950520992279053} +02/24/2022 04:34:36 - INFO - codeparrot_training - Step 2310: {'lr': 0.0004999485440296216, 'samples': 1183232, 'steps': 2310, 'loss/train': 2.227299213409424} +02/24/2022 04:34:39 - INFO - codeparrot_training - Step 2311: {'lr': 0.0004999482115316579, 'samples': 1183744, 'steps': 2311, 'loss/train': 2.8919758796691895} +02/24/2022 04:34:45 - INFO - codeparrot_training - Step 2312: {'lr': 0.0004999478779629953, 'samples': 1184256, 'steps': 2312, 'loss/train': 2.2472426891326904} +02/24/2022 04:34:48 - INFO - codeparrot_training - Step 2313: {'lr': 0.0004999475433236354, 'samples': 1184768, 'steps': 2313, 'loss/train': 2.3866384029388428} +02/24/2022 04:34:54 - INFO - codeparrot_training - Step 2314: {'lr': 0.0004999472076135796, 'samples': 1185280, 'steps': 2314, 'loss/train': 3.3593130111694336} +02/24/2022 04:34:59 - INFO - codeparrot_training - Step 2315: {'lr': 0.0004999468708328293, 'samples': 1185792, 'steps': 2315, 'loss/train': 3.8776097297668457} +02/24/2022 04:35:02 - INFO - codeparrot_training - Step 2316: {'lr': 0.0004999465329813859, 'samples': 1186304, 'steps': 2316, 'loss/train': 3.015650749206543} +02/24/2022 04:35:09 - INFO - codeparrot_training - Step 2317: {'lr': 0.000499946194059251, 'samples': 1186816, 'steps': 2317, 'loss/train': 2.145082712173462} +02/24/2022 04:35:13 - INFO - codeparrot_training - Step 2318: {'lr': 0.000499945854066426, 'samples': 1187328, 'steps': 2318, 'loss/train': 2.344414472579956} +02/24/2022 04:35:18 - INFO - codeparrot_training - Step 2319: {'lr': 0.0004999455130029123, 'samples': 1187840, 'steps': 2319, 'loss/train': 3.8513545989990234} +02/24/2022 04:35:22 - INFO - codeparrot_training - Step 2320: {'lr': 0.0004999451708687113, 'samples': 1188352, 'steps': 2320, 'loss/train': 3.5533957481384277} +02/24/2022 04:35:27 - INFO - codeparrot_training - Step 2321: {'lr': 0.0004999448276638247, 'samples': 1188864, 'steps': 2321, 'loss/train': 3.5150415897369385} +02/24/2022 04:35:31 - INFO - codeparrot_training - Step 2322: {'lr': 0.0004999444833882538, 'samples': 1189376, 'steps': 2322, 'loss/train': 3.4148900508880615} +02/24/2022 04:35:36 - INFO - codeparrot_training - Step 2323: {'lr': 0.000499944138042, 'samples': 1189888, 'steps': 2323, 'loss/train': 3.8514246940612793} +02/24/2022 04:35:40 - INFO - codeparrot_training - Step 2324: {'lr': 0.000499943791625065, 'samples': 1190400, 'steps': 2324, 'loss/train': 2.228163480758667} +02/24/2022 04:35:46 - INFO - codeparrot_training - Step 2325: {'lr': 0.0004999434441374501, 'samples': 1190912, 'steps': 2325, 'loss/train': 1.3067525625228882} +02/24/2022 04:35:49 - INFO - codeparrot_training - Step 2326: {'lr': 0.0004999430955791569, 'samples': 1191424, 'steps': 2326, 'loss/train': 0.28261226415634155} +02/24/2022 04:35:55 - INFO - codeparrot_training - Step 2327: {'lr': 0.0004999427459501868, 'samples': 1191936, 'steps': 2327, 'loss/train': 6.6103057861328125} +02/24/2022 04:35:58 - INFO - codeparrot_training - Step 2328: {'lr': 0.0004999423952505414, 'samples': 1192448, 'steps': 2328, 'loss/train': 2.38521671295166} +02/24/2022 04:36:04 - INFO - codeparrot_training - Step 2329: {'lr': 0.000499942043480222, 'samples': 1192960, 'steps': 2329, 'loss/train': 4.625878810882568} +02/24/2022 04:36:07 - INFO - codeparrot_training - Step 2330: {'lr': 0.0004999416906392303, 'samples': 1193472, 'steps': 2330, 'loss/train': 1.787308931350708} +02/24/2022 04:36:13 - INFO - codeparrot_training - Step 2331: {'lr': 0.0004999413367275678, 'samples': 1193984, 'steps': 2331, 'loss/train': 3.660783529281616} +02/24/2022 04:36:17 - INFO - codeparrot_training - Step 2332: {'lr': 0.000499940981745236, 'samples': 1194496, 'steps': 2332, 'loss/train': 1.4107046127319336} +02/24/2022 04:36:23 - INFO - codeparrot_training - Step 2333: {'lr': 0.0004999406256922365, 'samples': 1195008, 'steps': 2333, 'loss/train': 3.45271635055542} +02/24/2022 04:36:27 - INFO - codeparrot_training - Step 2334: {'lr': 0.0004999402685685705, 'samples': 1195520, 'steps': 2334, 'loss/train': 3.881763458251953} +02/24/2022 04:36:30 - INFO - codeparrot_training - Step 2335: {'lr': 0.0004999399103742399, 'samples': 1196032, 'steps': 2335, 'loss/train': 2.9919068813323975} +02/24/2022 04:36:33 - INFO - codeparrot_training - Step 2336: {'lr': 0.000499939551109246, 'samples': 1196544, 'steps': 2336, 'loss/train': 4.510589599609375} +02/24/2022 04:36:39 - INFO - codeparrot_training - Step 2337: {'lr': 0.0004999391907735905, 'samples': 1197056, 'steps': 2337, 'loss/train': 3.028735637664795} +02/24/2022 04:36:42 - INFO - codeparrot_training - Step 2338: {'lr': 0.0004999388293672748, 'samples': 1197568, 'steps': 2338, 'loss/train': 3.2945544719696045} +02/24/2022 04:36:48 - INFO - codeparrot_training - Step 2339: {'lr': 0.0004999384668903006, 'samples': 1198080, 'steps': 2339, 'loss/train': 2.7194881439208984} +02/24/2022 04:36:52 - INFO - codeparrot_training - Step 2340: {'lr': 0.0004999381033426693, 'samples': 1198592, 'steps': 2340, 'loss/train': 2.4050533771514893} +02/24/2022 04:36:57 - INFO - codeparrot_training - Step 2341: {'lr': 0.0004999377387243827, 'samples': 1199104, 'steps': 2341, 'loss/train': 4.339027404785156} +02/24/2022 04:37:00 - INFO - codeparrot_training - Step 2342: {'lr': 0.0004999373730354419, 'samples': 1199616, 'steps': 2342, 'loss/train': 2.8714916706085205} +02/24/2022 04:37:07 - INFO - codeparrot_training - Step 2343: {'lr': 0.0004999370062758491, 'samples': 1200128, 'steps': 2343, 'loss/train': 3.7314019203186035} +02/24/2022 04:37:12 - INFO - codeparrot_training - Step 2344: {'lr': 0.0004999366384456052, 'samples': 1200640, 'steps': 2344, 'loss/train': 4.306217193603516} +02/24/2022 04:37:15 - INFO - codeparrot_training - Step 2345: {'lr': 0.0004999362695447123, 'samples': 1201152, 'steps': 2345, 'loss/train': 3.8925302028656006} +02/24/2022 04:37:21 - INFO - codeparrot_training - Step 2346: {'lr': 0.0004999358995731718, 'samples': 1201664, 'steps': 2346, 'loss/train': 3.634169816970825} +02/24/2022 04:37:24 - INFO - codeparrot_training - Step 2347: {'lr': 0.0004999355285309851, 'samples': 1202176, 'steps': 2347, 'loss/train': 3.4635636806488037} +02/24/2022 04:37:30 - INFO - codeparrot_training - Step 2348: {'lr': 0.0004999351564181541, 'samples': 1202688, 'steps': 2348, 'loss/train': 4.446084976196289} +02/24/2022 04:37:33 - INFO - codeparrot_training - Step 2349: {'lr': 0.0004999347832346802, 'samples': 1203200, 'steps': 2349, 'loss/train': 4.977412700653076} +02/24/2022 04:37:39 - INFO - codeparrot_training - Step 2350: {'lr': 0.0004999344089805651, 'samples': 1203712, 'steps': 2350, 'loss/train': 2.824918746948242} +02/24/2022 04:37:42 - INFO - codeparrot_training - Step 2351: {'lr': 0.0004999340336558104, 'samples': 1204224, 'steps': 2351, 'loss/train': 1.8706772327423096} +02/24/2022 04:37:48 - INFO - codeparrot_training - Step 2352: {'lr': 0.0004999336572604175, 'samples': 1204736, 'steps': 2352, 'loss/train': 3.604421854019165} +02/24/2022 04:37:52 - INFO - codeparrot_training - Step 2353: {'lr': 0.0004999332797943883, 'samples': 1205248, 'steps': 2353, 'loss/train': 3.882422924041748} +02/24/2022 04:37:57 - INFO - codeparrot_training - Step 2354: {'lr': 0.0004999329012577243, 'samples': 1205760, 'steps': 2354, 'loss/train': 3.8811521530151367} +02/24/2022 04:38:01 - INFO - codeparrot_training - Step 2355: {'lr': 0.000499932521650427, 'samples': 1206272, 'steps': 2355, 'loss/train': 3.0197741985321045} +02/24/2022 04:38:06 - INFO - codeparrot_training - Step 2356: {'lr': 0.0004999321409724982, 'samples': 1206784, 'steps': 2356, 'loss/train': 0.8928283452987671} +02/24/2022 04:38:10 - INFO - codeparrot_training - Step 2357: {'lr': 0.0004999317592239395, 'samples': 1207296, 'steps': 2357, 'loss/train': 3.751070261001587} +02/24/2022 04:38:15 - INFO - codeparrot_training - Step 2358: {'lr': 0.0004999313764047525, 'samples': 1207808, 'steps': 2358, 'loss/train': 4.281635761260986} +02/24/2022 04:38:19 - INFO - codeparrot_training - Step 2359: {'lr': 0.0004999309925149388, 'samples': 1208320, 'steps': 2359, 'loss/train': 3.909730911254883} +02/24/2022 04:38:24 - INFO - codeparrot_training - Step 2360: {'lr': 0.0004999306075545002, 'samples': 1208832, 'steps': 2360, 'loss/train': 5.49069881439209} +02/24/2022 04:38:28 - INFO - codeparrot_training - Step 2361: {'lr': 0.0004999302215234381, 'samples': 1209344, 'steps': 2361, 'loss/train': 3.3348541259765625} +02/24/2022 04:38:33 - INFO - codeparrot_training - Step 2362: {'lr': 0.0004999298344217543, 'samples': 1209856, 'steps': 2362, 'loss/train': 3.580683946609497} +02/24/2022 04:38:37 - INFO - codeparrot_training - Step 2363: {'lr': 0.0004999294462494506, 'samples': 1210368, 'steps': 2363, 'loss/train': 2.8717422485351562} +02/24/2022 04:38:43 - INFO - codeparrot_training - Step 2364: {'lr': 0.0004999290570065284, 'samples': 1210880, 'steps': 2364, 'loss/train': 0.5460531711578369} +02/24/2022 04:38:46 - INFO - codeparrot_training - Step 2365: {'lr': 0.0004999286666929895, 'samples': 1211392, 'steps': 2365, 'loss/train': 3.815709114074707} +02/24/2022 04:38:51 - INFO - codeparrot_training - Step 2366: {'lr': 0.0004999282753088356, 'samples': 1211904, 'steps': 2366, 'loss/train': 3.463447093963623} +02/24/2022 04:38:55 - INFO - codeparrot_training - Step 2367: {'lr': 0.0004999278828540682, 'samples': 1212416, 'steps': 2367, 'loss/train': 3.520075559616089} +02/24/2022 04:39:00 - INFO - codeparrot_training - Step 2368: {'lr': 0.0004999274893286893, 'samples': 1212928, 'steps': 2368, 'loss/train': 3.047358751296997} +02/24/2022 04:39:04 - INFO - codeparrot_training - Step 2369: {'lr': 0.0004999270947327003, 'samples': 1213440, 'steps': 2369, 'loss/train': 3.0929086208343506} +02/24/2022 04:39:09 - INFO - codeparrot_training - Step 2370: {'lr': 0.0004999266990661029, 'samples': 1213952, 'steps': 2370, 'loss/train': 3.5242362022399902} +02/24/2022 04:39:13 - INFO - codeparrot_training - Step 2371: {'lr': 0.0004999263023288989, 'samples': 1214464, 'steps': 2371, 'loss/train': 3.582456350326538} +02/24/2022 04:39:18 - INFO - codeparrot_training - Step 2372: {'lr': 0.0004999259045210901, 'samples': 1214976, 'steps': 2372, 'loss/train': 2.7310776710510254} +02/24/2022 04:39:22 - INFO - codeparrot_training - Step 2373: {'lr': 0.000499925505642678, 'samples': 1215488, 'steps': 2373, 'loss/train': 5.180909156799316} +02/24/2022 04:39:28 - INFO - codeparrot_training - Step 2374: {'lr': 0.0004999251056936645, 'samples': 1216000, 'steps': 2374, 'loss/train': 2.431812047958374} +02/24/2022 04:39:32 - INFO - codeparrot_training - Step 2375: {'lr': 0.000499924704674051, 'samples': 1216512, 'steps': 2375, 'loss/train': 4.3434953689575195} +02/24/2022 04:39:37 - INFO - codeparrot_training - Step 2376: {'lr': 0.0004999243025838396, 'samples': 1217024, 'steps': 2376, 'loss/train': 3.5757734775543213} +02/24/2022 04:39:40 - INFO - codeparrot_training - Step 2377: {'lr': 0.0004999238994230318, 'samples': 1217536, 'steps': 2377, 'loss/train': 0.8257812857627869} +02/24/2022 04:39:46 - INFO - codeparrot_training - Step 2378: {'lr': 0.0004999234951916293, 'samples': 1218048, 'steps': 2378, 'loss/train': 3.2002294063568115} +02/24/2022 04:39:49 - INFO - codeparrot_training - Step 2379: {'lr': 0.0004999230898896341, 'samples': 1218560, 'steps': 2379, 'loss/train': 2.6964566707611084} +02/24/2022 04:39:55 - INFO - codeparrot_training - Step 2380: {'lr': 0.0004999226835170476, 'samples': 1219072, 'steps': 2380, 'loss/train': 3.7121119499206543} +02/24/2022 04:39:58 - INFO - codeparrot_training - Step 2381: {'lr': 0.0004999222760738717, 'samples': 1219584, 'steps': 2381, 'loss/train': 2.8447868824005127} +02/24/2022 04:40:04 - INFO - codeparrot_training - Step 2382: {'lr': 0.0004999218675601081, 'samples': 1220096, 'steps': 2382, 'loss/train': 4.5413126945495605} +02/24/2022 04:40:07 - INFO - codeparrot_training - Step 2383: {'lr': 0.0004999214579757586, 'samples': 1220608, 'steps': 2383, 'loss/train': 2.644672155380249} +02/24/2022 04:40:13 - INFO - codeparrot_training - Step 2384: {'lr': 0.000499921047320825, 'samples': 1221120, 'steps': 2384, 'loss/train': 4.234299182891846} +02/24/2022 04:40:16 - INFO - codeparrot_training - Step 2385: {'lr': 0.000499920635595309, 'samples': 1221632, 'steps': 2385, 'loss/train': 4.1897759437561035} +02/24/2022 04:40:22 - INFO - codeparrot_training - Step 2386: {'lr': 0.0004999202227992122, 'samples': 1222144, 'steps': 2386, 'loss/train': 3.9284749031066895} +02/24/2022 04:40:25 - INFO - codeparrot_training - Step 2387: {'lr': 0.0004999198089325367, 'samples': 1222656, 'steps': 2387, 'loss/train': 3.6172170639038086} +02/24/2022 04:40:31 - INFO - codeparrot_training - Step 2388: {'lr': 0.0004999193939952839, 'samples': 1223168, 'steps': 2388, 'loss/train': 3.1974456310272217} +02/24/2022 04:40:34 - INFO - codeparrot_training - Step 2389: {'lr': 0.000499918977987456, 'samples': 1223680, 'steps': 2389, 'loss/train': 1.7424440383911133} +02/24/2022 04:40:40 - INFO - codeparrot_training - Step 2390: {'lr': 0.0004999185609090544, 'samples': 1224192, 'steps': 2390, 'loss/train': 3.1206183433532715} +02/24/2022 04:40:46 - INFO - codeparrot_training - Step 2391: {'lr': 0.0004999181427600811, 'samples': 1224704, 'steps': 2391, 'loss/train': 3.673250675201416} +02/24/2022 04:40:49 - INFO - codeparrot_training - Step 2392: {'lr': 0.0004999177235405378, 'samples': 1225216, 'steps': 2392, 'loss/train': 3.422680377960205} +02/24/2022 04:40:55 - INFO - codeparrot_training - Step 2393: {'lr': 0.0004999173032504264, 'samples': 1225728, 'steps': 2393, 'loss/train': 2.807474136352539} +02/24/2022 04:40:58 - INFO - codeparrot_training - Step 2394: {'lr': 0.0004999168818897486, 'samples': 1226240, 'steps': 2394, 'loss/train': 3.026815414428711} +02/24/2022 04:41:04 - INFO - codeparrot_training - Step 2395: {'lr': 0.0004999164594585062, 'samples': 1226752, 'steps': 2395, 'loss/train': 2.855661392211914} +02/24/2022 04:41:07 - INFO - codeparrot_training - Step 2396: {'lr': 0.0004999160359567011, 'samples': 1227264, 'steps': 2396, 'loss/train': 3.660255193710327} +02/24/2022 04:41:13 - INFO - codeparrot_training - Step 2397: {'lr': 0.000499915611384335, 'samples': 1227776, 'steps': 2397, 'loss/train': 2.143773317337036} +02/24/2022 04:41:16 - INFO - codeparrot_training - Step 2398: {'lr': 0.0004999151857414099, 'samples': 1228288, 'steps': 2398, 'loss/train': 4.035255432128906} +02/24/2022 04:41:22 - INFO - codeparrot_training - Step 2399: {'lr': 0.0004999147590279273, 'samples': 1228800, 'steps': 2399, 'loss/train': 0.5259669423103333} +02/24/2022 04:41:26 - INFO - codeparrot_training - Step 2400: {'lr': 0.0004999143312438893, 'samples': 1229312, 'steps': 2400, 'loss/train': 2.4696826934814453} +02/24/2022 04:41:31 - INFO - codeparrot_training - Step 2401: {'lr': 0.0004999139023892978, 'samples': 1229824, 'steps': 2401, 'loss/train': 3.858065605163574} +02/24/2022 04:41:35 - INFO - codeparrot_training - Step 2402: {'lr': 0.0004999134724641543, 'samples': 1230336, 'steps': 2402, 'loss/train': 3.43780517578125} +02/24/2022 04:41:40 - INFO - codeparrot_training - Step 2403: {'lr': 0.000499913041468461, 'samples': 1230848, 'steps': 2403, 'loss/train': 2.750088691711426} +02/24/2022 04:41:44 - INFO - codeparrot_training - Step 2404: {'lr': 0.0004999126094022195, 'samples': 1231360, 'steps': 2404, 'loss/train': 3.518949031829834} +02/24/2022 04:41:49 - INFO - codeparrot_training - Step 2405: {'lr': 0.0004999121762654318, 'samples': 1231872, 'steps': 2405, 'loss/train': 2.4769396781921387} +02/24/2022 04:41:53 - INFO - codeparrot_training - Step 2406: {'lr': 0.0004999117420580996, 'samples': 1232384, 'steps': 2406, 'loss/train': 4.040136814117432} +02/24/2022 04:41:58 - INFO - codeparrot_training - Step 2407: {'lr': 0.0004999113067802249, 'samples': 1232896, 'steps': 2407, 'loss/train': 3.6795735359191895} +02/24/2022 04:42:01 - INFO - codeparrot_training - Step 2408: {'lr': 0.0004999108704318095, 'samples': 1233408, 'steps': 2408, 'loss/train': 3.4916088581085205} +02/24/2022 04:42:08 - INFO - codeparrot_training - Step 2409: {'lr': 0.0004999104330128553, 'samples': 1233920, 'steps': 2409, 'loss/train': 3.254087209701538} +02/24/2022 04:42:12 - INFO - codeparrot_training - Step 2410: {'lr': 0.0004999099945233641, 'samples': 1234432, 'steps': 2410, 'loss/train': 3.413273572921753} +02/24/2022 04:42:17 - INFO - codeparrot_training - Step 2411: {'lr': 0.000499909554963338, 'samples': 1234944, 'steps': 2411, 'loss/train': 3.116635799407959} +02/24/2022 04:42:21 - INFO - codeparrot_training - Step 2412: {'lr': 0.0004999091143327786, 'samples': 1235456, 'steps': 2412, 'loss/train': 3.037827253341675} +02/24/2022 04:42:26 - INFO - codeparrot_training - Step 2413: {'lr': 0.000499908672631688, 'samples': 1235968, 'steps': 2413, 'loss/train': 4.417897701263428} +02/24/2022 04:42:30 - INFO - codeparrot_training - Step 2414: {'lr': 0.0004999082298600679, 'samples': 1236480, 'steps': 2414, 'loss/train': 3.452875852584839} +02/24/2022 04:42:35 - INFO - codeparrot_training - Step 2415: {'lr': 0.0004999077860179204, 'samples': 1236992, 'steps': 2415, 'loss/train': 2.8953804969787598} +02/24/2022 04:42:39 - INFO - codeparrot_training - Step 2416: {'lr': 0.0004999073411052472, 'samples': 1237504, 'steps': 2416, 'loss/train': 3.4752864837646484} +02/24/2022 04:42:44 - INFO - codeparrot_training - Step 2417: {'lr': 0.0004999068951220503, 'samples': 1238016, 'steps': 2417, 'loss/train': 3.343427896499634} +02/24/2022 04:42:47 - INFO - codeparrot_training - Step 2418: {'lr': 0.0004999064480683317, 'samples': 1238528, 'steps': 2418, 'loss/train': 2.558880567550659} +02/24/2022 04:42:54 - INFO - codeparrot_training - Step 2419: {'lr': 0.0004999059999440932, 'samples': 1239040, 'steps': 2419, 'loss/train': 4.296968936920166} +02/24/2022 04:42:57 - INFO - codeparrot_training - Step 2420: {'lr': 0.0004999055507493368, 'samples': 1239552, 'steps': 2420, 'loss/train': 3.0869860649108887} +02/24/2022 04:43:03 - INFO - codeparrot_training - Step 2421: {'lr': 0.0004999051004840642, 'samples': 1240064, 'steps': 2421, 'loss/train': 2.653878927230835} +02/24/2022 04:43:06 - INFO - codeparrot_training - Step 2422: {'lr': 0.0004999046491482777, 'samples': 1240576, 'steps': 2422, 'loss/train': 3.7434804439544678} +02/24/2022 04:43:12 - INFO - codeparrot_training - Step 2423: {'lr': 0.000499904196741979, 'samples': 1241088, 'steps': 2423, 'loss/train': 3.566549301147461} +02/24/2022 04:43:15 - INFO - codeparrot_training - Step 2424: {'lr': 0.00049990374326517, 'samples': 1241600, 'steps': 2424, 'loss/train': 3.7648866176605225} +02/24/2022 04:43:21 - INFO - codeparrot_training - Step 2425: {'lr': 0.0004999032887178527, 'samples': 1242112, 'steps': 2425, 'loss/train': 3.2500345706939697} +02/24/2022 04:43:24 - INFO - codeparrot_training - Step 2426: {'lr': 0.000499902833100029, 'samples': 1242624, 'steps': 2426, 'loss/train': 4.845691204071045} +02/24/2022 04:43:30 - INFO - codeparrot_training - Step 2427: {'lr': 0.0004999023764117011, 'samples': 1243136, 'steps': 2427, 'loss/train': 0.8710881471633911} +02/24/2022 04:43:33 - INFO - codeparrot_training - Step 2428: {'lr': 0.0004999019186528708, 'samples': 1243648, 'steps': 2428, 'loss/train': 4.0029497146606445} +02/24/2022 04:43:39 - INFO - codeparrot_training - Step 2429: {'lr': 0.0004999014598235399, 'samples': 1244160, 'steps': 2429, 'loss/train': 2.7371466159820557} +02/24/2022 04:43:42 - INFO - codeparrot_training - Step 2430: {'lr': 0.0004999009999237105, 'samples': 1244672, 'steps': 2430, 'loss/train': 4.078182220458984} +02/24/2022 04:43:48 - INFO - codeparrot_training - Step 2431: {'lr': 0.0004999005389533846, 'samples': 1245184, 'steps': 2431, 'loss/train': 3.9989120960235596} +02/24/2022 04:43:51 - INFO - codeparrot_training - Step 2432: {'lr': 0.0004999000769125642, 'samples': 1245696, 'steps': 2432, 'loss/train': 2.7581992149353027} +02/24/2022 04:43:57 - INFO - codeparrot_training - Step 2433: {'lr': 0.0004998996138012512, 'samples': 1246208, 'steps': 2433, 'loss/train': 1.4696320295333862} +02/24/2022 04:44:00 - INFO - codeparrot_training - Step 2434: {'lr': 0.0004998991496194475, 'samples': 1246720, 'steps': 2434, 'loss/train': 0.5218791365623474} +02/24/2022 04:44:06 - INFO - codeparrot_training - Step 2435: {'lr': 0.0004998986843671552, 'samples': 1247232, 'steps': 2435, 'loss/train': 1.8831707239151} +02/24/2022 04:44:10 - INFO - codeparrot_training - Step 2436: {'lr': 0.0004998982180443764, 'samples': 1247744, 'steps': 2436, 'loss/train': 3.620414972305298} +02/24/2022 04:44:15 - INFO - codeparrot_training - Step 2437: {'lr': 0.000499897750651113, 'samples': 1248256, 'steps': 2437, 'loss/train': 4.267504692077637} +02/24/2022 04:44:19 - INFO - codeparrot_training - Step 2438: {'lr': 0.0004998972821873668, 'samples': 1248768, 'steps': 2438, 'loss/train': 2.5739922523498535} +02/24/2022 04:44:24 - INFO - codeparrot_training - Step 2439: {'lr': 0.0004998968126531402, 'samples': 1249280, 'steps': 2439, 'loss/train': 3.807056188583374} +02/24/2022 04:44:28 - INFO - codeparrot_training - Step 2440: {'lr': 0.0004998963420484349, 'samples': 1249792, 'steps': 2440, 'loss/train': 3.8136444091796875} +02/24/2022 04:44:33 - INFO - codeparrot_training - Step 2441: {'lr': 0.0004998958703732532, 'samples': 1250304, 'steps': 2441, 'loss/train': 1.0674653053283691} +02/24/2022 04:44:37 - INFO - codeparrot_training - Step 2442: {'lr': 0.0004998953976275966, 'samples': 1250816, 'steps': 2442, 'loss/train': 3.5890421867370605} +02/24/2022 04:44:42 - INFO - codeparrot_training - Step 2443: {'lr': 0.0004998949238114677, 'samples': 1251328, 'steps': 2443, 'loss/train': 2.782356023788452} +02/24/2022 04:44:49 - INFO - codeparrot_training - Step 2444: {'lr': 0.0004998944489248683, 'samples': 1251840, 'steps': 2444, 'loss/train': 3.3370895385742188} +02/24/2022 04:44:52 - INFO - codeparrot_training - Step 2445: {'lr': 0.0004998939729678004, 'samples': 1252352, 'steps': 2445, 'loss/train': 2.900718927383423} +02/24/2022 04:44:57 - INFO - codeparrot_training - Step 2446: {'lr': 0.000499893495940266, 'samples': 1252864, 'steps': 2446, 'loss/train': 4.6819748878479} +02/24/2022 04:45:01 - INFO - codeparrot_training - Step 2447: {'lr': 0.0004998930178422673, 'samples': 1253376, 'steps': 2447, 'loss/train': 2.8547513484954834} +02/24/2022 04:45:06 - INFO - codeparrot_training - Step 2448: {'lr': 0.0004998925386738062, 'samples': 1253888, 'steps': 2448, 'loss/train': 4.188310146331787} +02/24/2022 04:45:10 - INFO - codeparrot_training - Step 2449: {'lr': 0.0004998920584348849, 'samples': 1254400, 'steps': 2449, 'loss/train': 3.1551120281219482} +02/24/2022 04:45:16 - INFO - codeparrot_training - Step 2450: {'lr': 0.0004998915771255053, 'samples': 1254912, 'steps': 2450, 'loss/train': 4.014820575714111} +02/24/2022 04:45:20 - INFO - codeparrot_training - Step 2451: {'lr': 0.0004998910947456696, 'samples': 1255424, 'steps': 2451, 'loss/train': 3.264758825302124} +02/24/2022 04:45:23 - INFO - codeparrot_training - Step 2452: {'lr': 0.0004998906112953797, 'samples': 1255936, 'steps': 2452, 'loss/train': 3.2256007194519043} +02/24/2022 04:45:29 - INFO - codeparrot_training - Step 2453: {'lr': 0.0004998901267746379, 'samples': 1256448, 'steps': 2453, 'loss/train': 2.6657519340515137} +02/24/2022 04:45:32 - INFO - codeparrot_training - Step 2454: {'lr': 0.0004998896411834461, 'samples': 1256960, 'steps': 2454, 'loss/train': 3.830369710922241} +02/24/2022 04:45:38 - INFO - codeparrot_training - Step 2455: {'lr': 0.0004998891545218063, 'samples': 1257472, 'steps': 2455, 'loss/train': 4.173727989196777} +02/24/2022 04:45:42 - INFO - codeparrot_training - Step 2456: {'lr': 0.0004998886667897209, 'samples': 1257984, 'steps': 2456, 'loss/train': 3.6768832206726074} +02/24/2022 04:45:47 - INFO - codeparrot_training - Step 2457: {'lr': 0.0004998881779871917, 'samples': 1258496, 'steps': 2457, 'loss/train': 2.9944214820861816} +02/24/2022 04:45:51 - INFO - codeparrot_training - Step 2458: {'lr': 0.0004998876881142208, 'samples': 1259008, 'steps': 2458, 'loss/train': 2.1139795780181885} +02/24/2022 04:45:56 - INFO - codeparrot_training - Step 2459: {'lr': 0.0004998871971708106, 'samples': 1259520, 'steps': 2459, 'loss/train': 3.3430514335632324} +02/24/2022 04:45:59 - INFO - codeparrot_training - Step 2460: {'lr': 0.0004998867051569627, 'samples': 1260032, 'steps': 2460, 'loss/train': 3.4140310287475586} +02/24/2022 04:46:05 - INFO - codeparrot_training - Step 2461: {'lr': 0.0004998862120726798, 'samples': 1260544, 'steps': 2461, 'loss/train': 2.1001102924346924} +02/24/2022 04:46:08 - INFO - codeparrot_training - Step 2462: {'lr': 0.0004998857179179636, 'samples': 1261056, 'steps': 2462, 'loss/train': 2.8193459510803223} +02/24/2022 04:46:14 - INFO - codeparrot_training - Step 2463: {'lr': 0.0004998852226928164, 'samples': 1261568, 'steps': 2463, 'loss/train': 1.900756597518921} +02/24/2022 04:46:17 - INFO - codeparrot_training - Step 2464: {'lr': 0.0004998847263972401, 'samples': 1262080, 'steps': 2464, 'loss/train': 3.9644336700439453} +02/24/2022 04:46:23 - INFO - codeparrot_training - Step 2465: {'lr': 0.0004998842290312371, 'samples': 1262592, 'steps': 2465, 'loss/train': 2.8959858417510986} +02/24/2022 04:46:26 - INFO - codeparrot_training - Step 2466: {'lr': 0.0004998837305948094, 'samples': 1263104, 'steps': 2466, 'loss/train': 2.9531450271606445} +02/24/2022 04:46:32 - INFO - codeparrot_training - Step 2467: {'lr': 0.0004998832310879591, 'samples': 1263616, 'steps': 2467, 'loss/train': 0.5945470929145813} +02/24/2022 04:46:35 - INFO - codeparrot_training - Step 2468: {'lr': 0.0004998827305106884, 'samples': 1264128, 'steps': 2468, 'loss/train': 3.9969992637634277} +02/24/2022 04:46:41 - INFO - codeparrot_training - Step 2469: {'lr': 0.0004998822288629995, 'samples': 1264640, 'steps': 2469, 'loss/train': 2.9935379028320312} +02/24/2022 04:46:44 - INFO - codeparrot_training - Step 2470: {'lr': 0.0004998817261448943, 'samples': 1265152, 'steps': 2470, 'loss/train': 3.9394543170928955} +02/24/2022 04:46:51 - INFO - codeparrot_training - Step 2471: {'lr': 0.0004998812223563754, 'samples': 1265664, 'steps': 2471, 'loss/train': 2.3813884258270264} +02/24/2022 04:46:54 - INFO - codeparrot_training - Step 2472: {'lr': 0.0004998807174974445, 'samples': 1266176, 'steps': 2472, 'loss/train': 2.9916625022888184} +02/24/2022 04:47:00 - INFO - codeparrot_training - Step 2473: {'lr': 0.0004998802115681039, 'samples': 1266688, 'steps': 2473, 'loss/train': 1.8967984914779663} +02/24/2022 04:47:03 - INFO - codeparrot_training - Step 2474: {'lr': 0.000499879704568356, 'samples': 1267200, 'steps': 2474, 'loss/train': 3.8154592514038086} +02/24/2022 04:47:09 - INFO - codeparrot_training - Step 2475: {'lr': 0.0004998791964982026, 'samples': 1267712, 'steps': 2475, 'loss/train': 0.8268159627914429} +02/24/2022 04:47:12 - INFO - codeparrot_training - Step 2476: {'lr': 0.0004998786873576462, 'samples': 1268224, 'steps': 2476, 'loss/train': 4.143904209136963} +02/24/2022 04:47:18 - INFO - codeparrot_training - Step 2477: {'lr': 0.0004998781771466889, 'samples': 1268736, 'steps': 2477, 'loss/train': 3.781113862991333} +02/24/2022 04:47:21 - INFO - codeparrot_training - Step 2478: {'lr': 0.0004998776658653327, 'samples': 1269248, 'steps': 2478, 'loss/train': 3.5197501182556152} +02/24/2022 04:47:27 - INFO - codeparrot_training - Step 2479: {'lr': 0.00049987715351358, 'samples': 1269760, 'steps': 2479, 'loss/train': 3.852661371231079} +02/24/2022 04:47:30 - INFO - codeparrot_training - Step 2480: {'lr': 0.0004998766400914329, 'samples': 1270272, 'steps': 2480, 'loss/train': 4.078242301940918} +02/24/2022 04:47:36 - INFO - codeparrot_training - Step 2481: {'lr': 0.0004998761255988936, 'samples': 1270784, 'steps': 2481, 'loss/train': 3.6569957733154297} +02/24/2022 04:47:40 - INFO - codeparrot_training - Step 2482: {'lr': 0.0004998756100359643, 'samples': 1271296, 'steps': 2482, 'loss/train': 4.2928595542907715} +02/24/2022 04:47:46 - INFO - codeparrot_training - Step 2483: {'lr': 0.0004998750934026474, 'samples': 1271808, 'steps': 2483, 'loss/train': 0.5672394037246704} +02/24/2022 04:47:49 - INFO - codeparrot_training - Step 2484: {'lr': 0.0004998745756989448, 'samples': 1272320, 'steps': 2484, 'loss/train': 0.7372508645057678} +02/24/2022 04:47:54 - INFO - codeparrot_training - Step 2485: {'lr': 0.0004998740569248588, 'samples': 1272832, 'steps': 2485, 'loss/train': 3.4258670806884766} +02/24/2022 04:47:58 - INFO - codeparrot_training - Step 2486: {'lr': 0.0004998735370803917, 'samples': 1273344, 'steps': 2486, 'loss/train': 3.582427978515625} +02/24/2022 04:48:03 - INFO - codeparrot_training - Step 2487: {'lr': 0.0004998730161655459, 'samples': 1273856, 'steps': 2487, 'loss/train': 2.98801851272583} +02/24/2022 04:48:07 - INFO - codeparrot_training - Step 2488: {'lr': 0.0004998724941803232, 'samples': 1274368, 'steps': 2488, 'loss/train': 2.776625633239746} +02/24/2022 04:48:12 - INFO - codeparrot_training - Step 2489: {'lr': 0.0004998719711247262, 'samples': 1274880, 'steps': 2489, 'loss/train': 2.3862781524658203} +02/24/2022 04:48:18 - INFO - codeparrot_training - Step 2490: {'lr': 0.0004998714469987571, 'samples': 1275392, 'steps': 2490, 'loss/train': 2.8626139163970947} +02/24/2022 04:48:22 - INFO - codeparrot_training - Step 2491: {'lr': 0.000499870921802418, 'samples': 1275904, 'steps': 2491, 'loss/train': 3.086052417755127} +02/24/2022 04:48:28 - INFO - codeparrot_training - Step 2492: {'lr': 0.0004998703955357111, 'samples': 1276416, 'steps': 2492, 'loss/train': 2.601979970932007} +02/24/2022 04:48:31 - INFO - codeparrot_training - Step 2493: {'lr': 0.0004998698681986389, 'samples': 1276928, 'steps': 2493, 'loss/train': 0.6036854982376099} +02/24/2022 04:48:37 - INFO - codeparrot_training - Step 2494: {'lr': 0.0004998693397912034, 'samples': 1277440, 'steps': 2494, 'loss/train': 3.421665668487549} +02/24/2022 04:48:40 - INFO - codeparrot_training - Step 2495: {'lr': 0.0004998688103134072, 'samples': 1277952, 'steps': 2495, 'loss/train': 3.5919039249420166} +02/24/2022 04:48:46 - INFO - codeparrot_training - Step 2496: {'lr': 0.0004998682797652522, 'samples': 1278464, 'steps': 2496, 'loss/train': 2.873520851135254} +02/24/2022 04:48:49 - INFO - codeparrot_training - Step 2497: {'lr': 0.0004998677481467408, 'samples': 1278976, 'steps': 2497, 'loss/train': 2.5793066024780273} +02/24/2022 04:48:55 - INFO - codeparrot_training - Step 2498: {'lr': 0.0004998672154578754, 'samples': 1279488, 'steps': 2498, 'loss/train': 3.4168808460235596} +02/24/2022 04:48:58 - INFO - codeparrot_training - Step 2499: {'lr': 0.0004998666816986582, 'samples': 1280000, 'steps': 2499, 'loss/train': 1.6697356700897217} +02/24/2022 04:49:04 - INFO - codeparrot_training - Step 2500: {'lr': 0.0004998661468690914, 'samples': 1280512, 'steps': 2500, 'loss/train': 4.405819892883301} +02/24/2022 04:49:07 - INFO - codeparrot_training - Step 2501: {'lr': 0.0004998656109691774, 'samples': 1281024, 'steps': 2501, 'loss/train': 3.3693747520446777} +02/24/2022 04:49:14 - INFO - codeparrot_training - Step 2502: {'lr': 0.0004998650739989185, 'samples': 1281536, 'steps': 2502, 'loss/train': 3.306903600692749} +02/24/2022 04:49:17 - INFO - codeparrot_training - Step 2503: {'lr': 0.0004998645359583169, 'samples': 1282048, 'steps': 2503, 'loss/train': 2.528913736343384} +02/24/2022 04:49:23 - INFO - codeparrot_training - Step 2504: {'lr': 0.0004998639968473751, 'samples': 1282560, 'steps': 2504, 'loss/train': 3.113189697265625} +02/24/2022 04:49:26 - INFO - codeparrot_training - Step 2505: {'lr': 0.0004998634566660952, 'samples': 1283072, 'steps': 2505, 'loss/train': 2.5438289642333984} +02/24/2022 04:49:32 - INFO - codeparrot_training - Step 2506: {'lr': 0.0004998629154144795, 'samples': 1283584, 'steps': 2506, 'loss/train': 3.474109649658203} +02/24/2022 04:49:35 - INFO - codeparrot_training - Step 2507: {'lr': 0.0004998623730925305, 'samples': 1284096, 'steps': 2507, 'loss/train': 4.123108863830566} +02/24/2022 04:49:41 - INFO - codeparrot_training - Step 2508: {'lr': 0.0004998618297002504, 'samples': 1284608, 'steps': 2508, 'loss/train': 2.341304063796997} +02/24/2022 04:49:44 - INFO - codeparrot_training - Step 2509: {'lr': 0.0004998612852376417, 'samples': 1285120, 'steps': 2509, 'loss/train': 2.595646858215332} +02/24/2022 04:49:50 - INFO - codeparrot_training - Step 2510: {'lr': 0.0004998607397047063, 'samples': 1285632, 'steps': 2510, 'loss/train': 2.518873691558838} +02/24/2022 04:49:53 - INFO - codeparrot_training - Step 2511: {'lr': 0.0004998601931014471, 'samples': 1286144, 'steps': 2511, 'loss/train': 3.6765215396881104} +02/24/2022 04:49:59 - INFO - codeparrot_training - Step 2512: {'lr': 0.0004998596454278661, 'samples': 1286656, 'steps': 2512, 'loss/train': 1.8189362287521362} +02/24/2022 04:50:03 - INFO - codeparrot_training - Step 2513: {'lr': 0.0004998590966839657, 'samples': 1287168, 'steps': 2513, 'loss/train': 5.485591888427734} +02/24/2022 04:50:08 - INFO - codeparrot_training - Step 2514: {'lr': 0.0004998585468697482, 'samples': 1287680, 'steps': 2514, 'loss/train': 3.153528928756714} +02/24/2022 04:50:12 - INFO - codeparrot_training - Step 2515: {'lr': 0.0004998579959852161, 'samples': 1288192, 'steps': 2515, 'loss/train': 3.1058945655822754} +02/24/2022 04:50:17 - INFO - codeparrot_training - Step 2516: {'lr': 0.0004998574440303718, 'samples': 1288704, 'steps': 2516, 'loss/train': 3.6950082778930664} +02/24/2022 04:50:21 - INFO - codeparrot_training - Step 2517: {'lr': 0.0004998568910052173, 'samples': 1289216, 'steps': 2517, 'loss/train': 2.8677847385406494} +02/24/2022 04:50:26 - INFO - codeparrot_training - Step 2518: {'lr': 0.0004998563369097554, 'samples': 1289728, 'steps': 2518, 'loss/train': 2.691347360610962} +02/24/2022 04:50:30 - INFO - codeparrot_training - Step 2519: {'lr': 0.0004998557817439882, 'samples': 1290240, 'steps': 2519, 'loss/train': 2.9570348262786865} +02/24/2022 04:50:35 - INFO - codeparrot_training - Step 2520: {'lr': 0.0004998552255079182, 'samples': 1290752, 'steps': 2520, 'loss/train': 3.1546807289123535} +02/24/2022 04:50:39 - INFO - codeparrot_training - Step 2521: {'lr': 0.0004998546682015478, 'samples': 1291264, 'steps': 2521, 'loss/train': 4.013919353485107} +02/24/2022 04:50:44 - INFO - codeparrot_training - Step 2522: {'lr': 0.0004998541098248793, 'samples': 1291776, 'steps': 2522, 'loss/train': 3.959960699081421} +02/24/2022 04:50:48 - INFO - codeparrot_training - Step 2523: {'lr': 0.0004998535503779151, 'samples': 1292288, 'steps': 2523, 'loss/train': 3.3066318035125732} +02/24/2022 04:50:53 - INFO - codeparrot_training - Step 2524: {'lr': 0.0004998529898606576, 'samples': 1292800, 'steps': 2524, 'loss/train': 0.5945372581481934} +02/24/2022 04:50:57 - INFO - codeparrot_training - Step 2525: {'lr': 0.0004998524282731093, 'samples': 1293312, 'steps': 2525, 'loss/train': 3.0410656929016113} +02/24/2022 04:51:02 - INFO - codeparrot_training - Step 2526: {'lr': 0.0004998518656152725, 'samples': 1293824, 'steps': 2526, 'loss/train': 3.017993927001953} +02/24/2022 04:51:06 - INFO - codeparrot_training - Step 2527: {'lr': 0.0004998513018871498, 'samples': 1294336, 'steps': 2527, 'loss/train': 3.1269800662994385} +02/24/2022 04:51:12 - INFO - codeparrot_training - Step 2528: {'lr': 0.0004998507370887433, 'samples': 1294848, 'steps': 2528, 'loss/train': 3.7678191661834717} +02/24/2022 04:51:15 - INFO - codeparrot_training - Step 2529: {'lr': 0.0004998501712200555, 'samples': 1295360, 'steps': 2529, 'loss/train': 0.6667803525924683} +02/24/2022 04:51:21 - INFO - codeparrot_training - Step 2530: {'lr': 0.000499849604281089, 'samples': 1295872, 'steps': 2530, 'loss/train': 3.203901529312134} +02/24/2022 04:51:24 - INFO - codeparrot_training - Step 2531: {'lr': 0.0004998490362718462, 'samples': 1296384, 'steps': 2531, 'loss/train': 3.729490041732788} +02/24/2022 04:51:30 - INFO - codeparrot_training - Step 2532: {'lr': 0.0004998484671923293, 'samples': 1296896, 'steps': 2532, 'loss/train': 4.017566680908203} +02/24/2022 04:51:34 - INFO - codeparrot_training - Step 2533: {'lr': 0.000499847897042541, 'samples': 1297408, 'steps': 2533, 'loss/train': 3.762479782104492} +02/24/2022 04:51:39 - INFO - codeparrot_training - Step 2534: {'lr': 0.0004998473258224837, 'samples': 1297920, 'steps': 2534, 'loss/train': 2.2608602046966553} +02/24/2022 04:51:42 - INFO - codeparrot_training - Step 2535: {'lr': 0.0004998467535321597, 'samples': 1298432, 'steps': 2535, 'loss/train': 3.631470203399658} +02/24/2022 04:51:48 - INFO - codeparrot_training - Step 2536: {'lr': 0.0004998461801715716, 'samples': 1298944, 'steps': 2536, 'loss/train': 3.0279645919799805} +02/24/2022 04:51:52 - INFO - codeparrot_training - Step 2537: {'lr': 0.0004998456057407218, 'samples': 1299456, 'steps': 2537, 'loss/train': 3.118901014328003} +02/24/2022 04:51:58 - INFO - codeparrot_training - Step 2538: {'lr': 0.0004998450302396127, 'samples': 1299968, 'steps': 2538, 'loss/train': 3.350200891494751} +02/24/2022 04:52:01 - INFO - codeparrot_training - Step 2539: {'lr': 0.0004998444536682469, 'samples': 1300480, 'steps': 2539, 'loss/train': 2.497511386871338} +02/24/2022 04:52:06 - INFO - codeparrot_training - Step 2540: {'lr': 0.0004998438760266267, 'samples': 1300992, 'steps': 2540, 'loss/train': 2.3810489177703857} +02/24/2022 04:52:10 - INFO - codeparrot_training - Step 2541: {'lr': 0.0004998432973147548, 'samples': 1301504, 'steps': 2541, 'loss/train': 2.9320499897003174} +02/24/2022 04:52:15 - INFO - codeparrot_training - Step 2542: {'lr': 0.0004998427175326335, 'samples': 1302016, 'steps': 2542, 'loss/train': 3.6699774265289307} +02/24/2022 04:52:19 - INFO - codeparrot_training - Step 2543: {'lr': 0.0004998421366802653, 'samples': 1302528, 'steps': 2543, 'loss/train': 3.641139030456543} +02/24/2022 04:52:24 - INFO - codeparrot_training - Step 2544: {'lr': 0.0004998415547576527, 'samples': 1303040, 'steps': 2544, 'loss/train': 3.030946731567383} +02/24/2022 04:52:28 - INFO - codeparrot_training - Step 2545: {'lr': 0.0004998409717647983, 'samples': 1303552, 'steps': 2545, 'loss/train': 3.1390645503997803} +02/24/2022 04:52:33 - INFO - codeparrot_training - Step 2546: {'lr': 0.0004998403877017044, 'samples': 1304064, 'steps': 2546, 'loss/train': 2.7460896968841553} +02/24/2022 04:52:39 - INFO - codeparrot_training - Step 2547: {'lr': 0.0004998398025683737, 'samples': 1304576, 'steps': 2547, 'loss/train': 2.8691656589508057} +02/24/2022 04:52:43 - INFO - codeparrot_training - Step 2548: {'lr': 0.0004998392163648085, 'samples': 1305088, 'steps': 2548, 'loss/train': 3.3751120567321777} +02/24/2022 04:52:48 - INFO - codeparrot_training - Step 2549: {'lr': 0.0004998386290910116, 'samples': 1305600, 'steps': 2549, 'loss/train': 3.710108995437622} +02/24/2022 04:52:52 - INFO - codeparrot_training - Step 2550: {'lr': 0.0004998380407469853, 'samples': 1306112, 'steps': 2550, 'loss/train': 3.1626763343811035} +02/24/2022 04:52:57 - INFO - codeparrot_training - Step 2551: {'lr': 0.0004998374513327321, 'samples': 1306624, 'steps': 2551, 'loss/train': 3.2368767261505127} +02/24/2022 04:53:01 - INFO - codeparrot_training - Step 2552: {'lr': 0.0004998368608482546, 'samples': 1307136, 'steps': 2552, 'loss/train': 2.595431327819824} +02/24/2022 04:53:06 - INFO - codeparrot_training - Step 2553: {'lr': 0.0004998362692935553, 'samples': 1307648, 'steps': 2553, 'loss/train': 4.266594886779785} +02/24/2022 04:53:10 - INFO - codeparrot_training - Step 2554: {'lr': 0.0004998356766686368, 'samples': 1308160, 'steps': 2554, 'loss/train': 3.5977730751037598} +02/24/2022 04:53:15 - INFO - codeparrot_training - Step 2555: {'lr': 0.0004998350829735016, 'samples': 1308672, 'steps': 2555, 'loss/train': 3.4415242671966553} +02/24/2022 04:53:19 - INFO - codeparrot_training - Step 2556: {'lr': 0.0004998344882081522, 'samples': 1309184, 'steps': 2556, 'loss/train': 1.8883965015411377} +02/24/2022 04:53:24 - INFO - codeparrot_training - Step 2557: {'lr': 0.0004998338923725913, 'samples': 1309696, 'steps': 2557, 'loss/train': 4.211737632751465} +02/24/2022 04:53:28 - INFO - codeparrot_training - Step 2558: {'lr': 0.0004998332954668211, 'samples': 1310208, 'steps': 2558, 'loss/train': 2.723522901535034} +02/24/2022 04:53:33 - INFO - codeparrot_training - Step 2559: {'lr': 0.0004998326974908446, 'samples': 1310720, 'steps': 2559, 'loss/train': 2.6041481494903564} +02/24/2022 04:53:37 - INFO - codeparrot_training - Step 2560: {'lr': 0.0004998320984446641, 'samples': 1311232, 'steps': 2560, 'loss/train': 3.5851409435272217} +02/24/2022 04:53:42 - INFO - codeparrot_training - Step 2561: {'lr': 0.0004998314983282821, 'samples': 1311744, 'steps': 2561, 'loss/train': 2.926137924194336} +02/24/2022 04:53:46 - INFO - codeparrot_training - Step 2562: {'lr': 0.0004998308971417015, 'samples': 1312256, 'steps': 2562, 'loss/train': 3.7162015438079834} +02/24/2022 04:53:52 - INFO - codeparrot_training - Step 2563: {'lr': 0.0004998302948849246, 'samples': 1312768, 'steps': 2563, 'loss/train': 3.186518669128418} +02/24/2022 04:53:55 - INFO - codeparrot_training - Step 2564: {'lr': 0.0004998296915579539, 'samples': 1313280, 'steps': 2564, 'loss/train': 3.566105365753174} +02/24/2022 04:54:01 - INFO - codeparrot_training - Step 2565: {'lr': 0.0004998290871607924, 'samples': 1313792, 'steps': 2565, 'loss/train': 3.6817195415496826} +02/24/2022 04:54:04 - INFO - codeparrot_training - Step 2566: {'lr': 0.0004998284816934422, 'samples': 1314304, 'steps': 2566, 'loss/train': 3.373403549194336} +02/24/2022 04:54:10 - INFO - codeparrot_training - Step 2567: {'lr': 0.0004998278751559062, 'samples': 1314816, 'steps': 2567, 'loss/train': 4.0760321617126465} +02/24/2022 04:54:13 - INFO - codeparrot_training - Step 2568: {'lr': 0.0004998272675481868, 'samples': 1315328, 'steps': 2568, 'loss/train': 4.096252918243408} +02/24/2022 04:54:19 - INFO - codeparrot_training - Step 2569: {'lr': 0.0004998266588702869, 'samples': 1315840, 'steps': 2569, 'loss/train': 2.4457826614379883} +02/24/2022 04:54:22 - INFO - codeparrot_training - Step 2570: {'lr': 0.0004998260491222088, 'samples': 1316352, 'steps': 2570, 'loss/train': 2.908329486846924} +02/24/2022 04:54:27 - INFO - codeparrot_training - Step 2571: {'lr': 0.0004998254383039552, 'samples': 1316864, 'steps': 2571, 'loss/train': 4.027587890625} +02/24/2022 04:54:31 - INFO - codeparrot_training - Step 2572: {'lr': 0.0004998248264155288, 'samples': 1317376, 'steps': 2572, 'loss/train': 3.6171653270721436} +02/24/2022 04:54:37 - INFO - codeparrot_training - Step 2573: {'lr': 0.0004998242134569322, 'samples': 1317888, 'steps': 2573, 'loss/train': 3.2733216285705566} +02/24/2022 04:54:41 - INFO - codeparrot_training - Step 2574: {'lr': 0.0004998235994281681, 'samples': 1318400, 'steps': 2574, 'loss/train': 4.285373687744141} +02/24/2022 04:54:46 - INFO - codeparrot_training - Step 2575: {'lr': 0.0004998229843292388, 'samples': 1318912, 'steps': 2575, 'loss/train': 2.5809576511383057} +02/24/2022 04:54:49 - INFO - codeparrot_training - Step 2576: {'lr': 0.0004998223681601474, 'samples': 1319424, 'steps': 2576, 'loss/train': 3.8315556049346924} +02/24/2022 04:54:55 - INFO - codeparrot_training - Step 2577: {'lr': 0.0004998217509208961, 'samples': 1319936, 'steps': 2577, 'loss/train': 3.1385366916656494} +02/24/2022 04:54:58 - INFO - codeparrot_training - Step 2578: {'lr': 0.0004998211326114878, 'samples': 1320448, 'steps': 2578, 'loss/train': 3.515986442565918} +02/24/2022 04:55:04 - INFO - codeparrot_training - Step 2579: {'lr': 0.0004998205132319252, 'samples': 1320960, 'steps': 2579, 'loss/train': 4.016100883483887} +02/24/2022 04:55:07 - INFO - codeparrot_training - Step 2580: {'lr': 0.0004998198927822108, 'samples': 1321472, 'steps': 2580, 'loss/train': 1.2040746212005615} +02/24/2022 04:55:13 - INFO - codeparrot_training - Step 2581: {'lr': 0.0004998192712623472, 'samples': 1321984, 'steps': 2581, 'loss/train': 2.934591770172119} +02/24/2022 04:55:16 - INFO - codeparrot_training - Step 2582: {'lr': 0.0004998186486723373, 'samples': 1322496, 'steps': 2582, 'loss/train': 2.857848644256592} +02/24/2022 04:55:23 - INFO - codeparrot_training - Step 2583: {'lr': 0.0004998180250121836, 'samples': 1323008, 'steps': 2583, 'loss/train': 3.1073317527770996} +02/24/2022 04:55:26 - INFO - codeparrot_training - Step 2584: {'lr': 0.0004998174002818887, 'samples': 1323520, 'steps': 2584, 'loss/train': 2.4528825283050537} +02/24/2022 04:55:32 - INFO - codeparrot_training - Step 2585: {'lr': 0.0004998167744814555, 'samples': 1324032, 'steps': 2585, 'loss/train': 3.731081962585449} +02/24/2022 04:55:35 - INFO - codeparrot_training - Step 2586: {'lr': 0.0004998161476108864, 'samples': 1324544, 'steps': 2586, 'loss/train': 6.180232048034668} +02/24/2022 04:55:40 - INFO - codeparrot_training - Step 2587: {'lr': 0.0004998155196701845, 'samples': 1325056, 'steps': 2587, 'loss/train': 3.3464818000793457} +02/24/2022 04:55:44 - INFO - codeparrot_training - Step 2588: {'lr': 0.000499814890659352, 'samples': 1325568, 'steps': 2588, 'loss/train': 3.4936163425445557} +02/24/2022 04:55:50 - INFO - codeparrot_training - Step 2589: {'lr': 0.000499814260578392, 'samples': 1326080, 'steps': 2589, 'loss/train': 3.774045944213867} +02/24/2022 04:55:53 - INFO - codeparrot_training - Step 2590: {'lr': 0.000499813629427307, 'samples': 1326592, 'steps': 2590, 'loss/train': 3.5812551975250244} +02/24/2022 04:55:59 - INFO - codeparrot_training - Step 2591: {'lr': 0.0004998129972060998, 'samples': 1327104, 'steps': 2591, 'loss/train': 2.8676490783691406} +02/24/2022 04:56:02 - INFO - codeparrot_training - Step 2592: {'lr': 0.000499812363914773, 'samples': 1327616, 'steps': 2592, 'loss/train': 2.083740472793579} +02/24/2022 04:56:09 - INFO - codeparrot_training - Step 2593: {'lr': 0.0004998117295533292, 'samples': 1328128, 'steps': 2593, 'loss/train': 3.420949935913086} +02/24/2022 04:56:12 - INFO - codeparrot_training - Step 2594: {'lr': 0.0004998110941217714, 'samples': 1328640, 'steps': 2594, 'loss/train': 3.7538506984710693} +02/24/2022 04:56:18 - INFO - codeparrot_training - Step 2595: {'lr': 0.0004998104576201022, 'samples': 1329152, 'steps': 2595, 'loss/train': 4.181107997894287} +02/24/2022 04:56:21 - INFO - codeparrot_training - Step 2596: {'lr': 0.0004998098200483243, 'samples': 1329664, 'steps': 2596, 'loss/train': 3.6615211963653564} +02/24/2022 04:56:27 - INFO - codeparrot_training - Step 2597: {'lr': 0.0004998091814064405, 'samples': 1330176, 'steps': 2597, 'loss/train': 4.06433629989624} +02/24/2022 04:56:30 - INFO - codeparrot_training - Step 2598: {'lr': 0.0004998085416944534, 'samples': 1330688, 'steps': 2598, 'loss/train': 3.4494833946228027} +02/24/2022 04:56:36 - INFO - codeparrot_training - Step 2599: {'lr': 0.000499807900912366, 'samples': 1331200, 'steps': 2599, 'loss/train': 3.337926149368286} +02/24/2022 04:56:39 - INFO - codeparrot_training - Step 2600: {'lr': 0.0004998072590601808, 'samples': 1331712, 'steps': 2600, 'loss/train': 3.3017685413360596} +02/24/2022 04:56:45 - INFO - codeparrot_training - Step 2601: {'lr': 0.0004998066161379006, 'samples': 1332224, 'steps': 2601, 'loss/train': 1.7428919076919556} +02/24/2022 04:56:48 - INFO - codeparrot_training - Step 2602: {'lr': 0.0004998059721455281, 'samples': 1332736, 'steps': 2602, 'loss/train': 3.700556755065918} +02/24/2022 04:56:54 - INFO - codeparrot_training - Step 2603: {'lr': 0.0004998053270830662, 'samples': 1333248, 'steps': 2603, 'loss/train': 3.682637929916382} +02/24/2022 04:56:57 - INFO - codeparrot_training - Step 2604: {'lr': 0.0004998046809505176, 'samples': 1333760, 'steps': 2604, 'loss/train': 2.8790128231048584} +02/24/2022 04:57:03 - INFO - codeparrot_training - Step 2605: {'lr': 0.0004998040337478851, 'samples': 1334272, 'steps': 2605, 'loss/train': 3.541307210922241} +02/24/2022 04:57:06 - INFO - codeparrot_training - Step 2606: {'lr': 0.0004998033854751715, 'samples': 1334784, 'steps': 2606, 'loss/train': 3.504415273666382} +02/24/2022 04:57:12 - INFO - codeparrot_training - Step 2607: {'lr': 0.0004998027361323794, 'samples': 1335296, 'steps': 2607, 'loss/train': 3.3908636569976807} +02/24/2022 04:57:15 - INFO - codeparrot_training - Step 2608: {'lr': 0.0004998020857195117, 'samples': 1335808, 'steps': 2608, 'loss/train': 2.3568642139434814} +02/24/2022 04:57:22 - INFO - codeparrot_training - Step 2609: {'lr': 0.0004998014342365712, 'samples': 1336320, 'steps': 2609, 'loss/train': 3.6828958988189697} +02/24/2022 04:57:25 - INFO - codeparrot_training - Step 2610: {'lr': 0.0004998007816835608, 'samples': 1336832, 'steps': 2610, 'loss/train': 3.263063669204712} +02/24/2022 04:57:31 - INFO - codeparrot_training - Step 2611: {'lr': 0.000499800128060483, 'samples': 1337344, 'steps': 2611, 'loss/train': 2.5680322647094727} +02/24/2022 04:57:34 - INFO - codeparrot_training - Step 2612: {'lr': 0.0004997994733673409, 'samples': 1337856, 'steps': 2612, 'loss/train': 3.1815738677978516} +02/24/2022 04:57:40 - INFO - codeparrot_training - Step 2613: {'lr': 0.000499798817604137, 'samples': 1338368, 'steps': 2613, 'loss/train': 1.6468865871429443} +02/24/2022 04:57:43 - INFO - codeparrot_training - Step 2614: {'lr': 0.0004997981607708745, 'samples': 1338880, 'steps': 2614, 'loss/train': 2.981415271759033} +02/24/2022 04:57:49 - INFO - codeparrot_training - Step 2615: {'lr': 0.0004997975028675558, 'samples': 1339392, 'steps': 2615, 'loss/train': 4.030995845794678} +02/24/2022 04:57:53 - INFO - codeparrot_training - Step 2616: {'lr': 0.0004997968438941841, 'samples': 1339904, 'steps': 2616, 'loss/train': 1.6613208055496216} +02/24/2022 04:57:58 - INFO - codeparrot_training - Step 2617: {'lr': 0.0004997961838507619, 'samples': 1340416, 'steps': 2617, 'loss/train': 3.0013906955718994} +02/24/2022 04:58:02 - INFO - codeparrot_training - Step 2618: {'lr': 0.0004997955227372923, 'samples': 1340928, 'steps': 2618, 'loss/train': 4.839288711547852} +02/24/2022 04:58:08 - INFO - codeparrot_training - Step 2619: {'lr': 0.000499794860553778, 'samples': 1341440, 'steps': 2619, 'loss/train': 3.9381189346313477} +02/24/2022 04:58:11 - INFO - codeparrot_training - Step 2620: {'lr': 0.0004997941973002216, 'samples': 1341952, 'steps': 2620, 'loss/train': 4.103443622589111} +02/24/2022 04:58:17 - INFO - codeparrot_training - Step 2621: {'lr': 0.0004997935329766265, 'samples': 1342464, 'steps': 2621, 'loss/train': 3.048365592956543} +02/24/2022 04:58:20 - INFO - codeparrot_training - Step 2622: {'lr': 0.000499792867582995, 'samples': 1342976, 'steps': 2622, 'loss/train': 3.6909701824188232} +02/24/2022 04:58:25 - INFO - codeparrot_training - Step 2623: {'lr': 0.0004997922011193303, 'samples': 1343488, 'steps': 2623, 'loss/train': 1.2625305652618408} +02/24/2022 04:58:29 - INFO - codeparrot_training - Step 2624: {'lr': 0.000499791533585635, 'samples': 1344000, 'steps': 2624, 'loss/train': 0.9226672649383545} +02/24/2022 04:58:34 - INFO - codeparrot_training - Step 2625: {'lr': 0.0004997908649819122, 'samples': 1344512, 'steps': 2625, 'loss/train': 3.233002185821533} +02/24/2022 04:58:38 - INFO - codeparrot_training - Step 2626: {'lr': 0.0004997901953081646, 'samples': 1345024, 'steps': 2626, 'loss/train': 1.8113363981246948} +02/24/2022 04:58:43 - INFO - codeparrot_training - Step 2627: {'lr': 0.0004997895245643951, 'samples': 1345536, 'steps': 2627, 'loss/train': 3.721475839614868} +02/24/2022 04:58:49 - INFO - codeparrot_training - Step 2628: {'lr': 0.0004997888527506067, 'samples': 1346048, 'steps': 2628, 'loss/train': 2.823287010192871} +02/24/2022 04:58:53 - INFO - codeparrot_training - Step 2629: {'lr': 0.000499788179866802, 'samples': 1346560, 'steps': 2629, 'loss/train': 3.3880221843719482} +02/24/2022 04:58:56 - INFO - codeparrot_training - Step 2630: {'lr': 0.0004997875059129843, 'samples': 1347072, 'steps': 2630, 'loss/train': 2.731090784072876} +02/24/2022 04:59:02 - INFO - codeparrot_training - Step 2631: {'lr': 0.000499786830889156, 'samples': 1347584, 'steps': 2631, 'loss/train': 3.708174228668213} +02/24/2022 04:59:06 - INFO - codeparrot_training - Step 2632: {'lr': 0.0004997861547953203, 'samples': 1348096, 'steps': 2632, 'loss/train': 3.243811845779419} +02/24/2022 04:59:11 - INFO - codeparrot_training - Step 2633: {'lr': 0.00049978547763148, 'samples': 1348608, 'steps': 2633, 'loss/train': 2.597032070159912} +02/24/2022 04:59:15 - INFO - codeparrot_training - Step 2634: {'lr': 0.0004997847993976381, 'samples': 1349120, 'steps': 2634, 'loss/train': 2.9924778938293457} +02/24/2022 04:59:20 - INFO - codeparrot_training - Step 2635: {'lr': 0.0004997841200937975, 'samples': 1349632, 'steps': 2635, 'loss/train': 4.333327770233154} +02/24/2022 04:59:24 - INFO - codeparrot_training - Step 2636: {'lr': 0.0004997834397199609, 'samples': 1350144, 'steps': 2636, 'loss/train': 3.035799741744995} +02/24/2022 04:59:29 - INFO - codeparrot_training - Step 2637: {'lr': 0.0004997827582761315, 'samples': 1350656, 'steps': 2637, 'loss/train': 4.465551853179932} +02/24/2022 04:59:33 - INFO - codeparrot_training - Step 2638: {'lr': 0.0004997820757623119, 'samples': 1351168, 'steps': 2638, 'loss/train': 3.823025703430176} +02/24/2022 04:59:38 - INFO - codeparrot_training - Step 2639: {'lr': 0.0004997813921785054, 'samples': 1351680, 'steps': 2639, 'loss/train': 4.360265254974365} +02/24/2022 04:59:42 - INFO - codeparrot_training - Step 2640: {'lr': 0.0004997807075247146, 'samples': 1352192, 'steps': 2640, 'loss/train': 2.638787269592285} +02/24/2022 04:59:48 - INFO - codeparrot_training - Step 2641: {'lr': 0.0004997800218009426, 'samples': 1352704, 'steps': 2641, 'loss/train': 2.7928121089935303} +02/24/2022 04:59:54 - INFO - codeparrot_training - Step 2642: {'lr': 0.0004997793350071923, 'samples': 1353216, 'steps': 2642, 'loss/train': 2.997230291366577} +02/24/2022 04:59:57 - INFO - codeparrot_training - Step 2643: {'lr': 0.0004997786471434666, 'samples': 1353728, 'steps': 2643, 'loss/train': 3.406938314437866} +02/24/2022 05:00:03 - INFO - codeparrot_training - Step 2644: {'lr': 0.0004997779582097686, 'samples': 1354240, 'steps': 2644, 'loss/train': 3.1807749271392822} +02/24/2022 05:00:06 - INFO - codeparrot_training - Step 2645: {'lr': 0.0004997772682061011, 'samples': 1354752, 'steps': 2645, 'loss/train': 3.620171070098877} +02/24/2022 05:00:12 - INFO - codeparrot_training - Step 2646: {'lr': 0.000499776577132467, 'samples': 1355264, 'steps': 2646, 'loss/train': 2.2439463138580322} +02/24/2022 05:00:15 - INFO - codeparrot_training - Step 2647: {'lr': 0.0004997758849888693, 'samples': 1355776, 'steps': 2647, 'loss/train': 3.4832112789154053} +02/24/2022 05:00:20 - INFO - codeparrot_training - Step 2648: {'lr': 0.0004997751917753113, 'samples': 1356288, 'steps': 2648, 'loss/train': 4.780531883239746} +02/24/2022 05:00:24 - INFO - codeparrot_training - Step 2649: {'lr': 0.0004997744974917955, 'samples': 1356800, 'steps': 2649, 'loss/train': 3.1612603664398193} +02/24/2022 05:00:29 - INFO - codeparrot_training - Step 2650: {'lr': 0.0004997738021383252, 'samples': 1357312, 'steps': 2650, 'loss/train': 3.9964981079101562} +02/24/2022 05:00:33 - INFO - codeparrot_training - Step 2651: {'lr': 0.000499773105714903, 'samples': 1357824, 'steps': 2651, 'loss/train': 2.747293472290039} +02/24/2022 05:00:38 - INFO - codeparrot_training - Step 2652: {'lr': 0.0004997724082215323, 'samples': 1358336, 'steps': 2652, 'loss/train': 2.8473522663116455} +02/24/2022 05:00:42 - INFO - codeparrot_training - Step 2653: {'lr': 0.0004997717096582159, 'samples': 1358848, 'steps': 2653, 'loss/train': 3.1151986122131348} +02/24/2022 05:00:48 - INFO - codeparrot_training - Step 2654: {'lr': 0.0004997710100249568, 'samples': 1359360, 'steps': 2654, 'loss/train': 0.9005175232887268} +02/24/2022 05:00:51 - INFO - codeparrot_training - Step 2655: {'lr': 0.000499770309321758, 'samples': 1359872, 'steps': 2655, 'loss/train': 3.2134785652160645} +02/24/2022 05:00:57 - INFO - codeparrot_training - Step 2656: {'lr': 0.0004997696075486225, 'samples': 1360384, 'steps': 2656, 'loss/train': 3.5971925258636475} +02/24/2022 05:01:00 - INFO - codeparrot_training - Step 2657: {'lr': 0.0004997689047055534, 'samples': 1360896, 'steps': 2657, 'loss/train': 3.3990397453308105} +02/24/2022 05:01:06 - INFO - codeparrot_training - Step 2658: {'lr': 0.0004997682007925535, 'samples': 1361408, 'steps': 2658, 'loss/train': 1.5455138683319092} +02/24/2022 05:01:09 - INFO - codeparrot_training - Step 2659: {'lr': 0.0004997674958096259, 'samples': 1361920, 'steps': 2659, 'loss/train': 4.938658714294434} +02/24/2022 05:01:15 - INFO - codeparrot_training - Step 2660: {'lr': 0.0004997667897567738, 'samples': 1362432, 'steps': 2660, 'loss/train': 1.8334852457046509} +02/24/2022 05:01:18 - INFO - codeparrot_training - Step 2661: {'lr': 0.000499766082634, 'samples': 1362944, 'steps': 2661, 'loss/train': 1.4658668041229248} +02/24/2022 05:01:24 - INFO - codeparrot_training - Step 2662: {'lr': 0.0004997653744413076, 'samples': 1363456, 'steps': 2662, 'loss/train': 3.3851983547210693} +02/24/2022 05:01:27 - INFO - codeparrot_training - Step 2663: {'lr': 0.0004997646651786996, 'samples': 1363968, 'steps': 2663, 'loss/train': 3.7666029930114746} +02/24/2022 05:01:34 - INFO - codeparrot_training - Step 2664: {'lr': 0.0004997639548461792, 'samples': 1364480, 'steps': 2664, 'loss/train': 1.4481109380722046} +02/24/2022 05:01:37 - INFO - codeparrot_training - Step 2665: {'lr': 0.0004997632434437493, 'samples': 1364992, 'steps': 2665, 'loss/train': 2.707970142364502} +02/24/2022 05:01:43 - INFO - codeparrot_training - Step 2666: {'lr': 0.0004997625309714129, 'samples': 1365504, 'steps': 2666, 'loss/train': 3.949827194213867} +02/24/2022 05:01:46 - INFO - codeparrot_training - Step 2667: {'lr': 0.0004997618174291732, 'samples': 1366016, 'steps': 2667, 'loss/train': 3.3576014041900635} +02/24/2022 05:01:51 - INFO - codeparrot_training - Step 2668: {'lr': 0.0004997611028170332, 'samples': 1366528, 'steps': 2668, 'loss/train': 3.399460554122925} +02/24/2022 05:01:55 - INFO - codeparrot_training - Step 2669: {'lr': 0.000499760387134996, 'samples': 1367040, 'steps': 2669, 'loss/train': 4.606263637542725} +02/24/2022 05:02:01 - INFO - codeparrot_training - Step 2670: {'lr': 0.0004997596703830645, 'samples': 1367552, 'steps': 2670, 'loss/train': 1.6423394680023193} +02/24/2022 05:02:04 - INFO - codeparrot_training - Step 2671: {'lr': 0.0004997589525612418, 'samples': 1368064, 'steps': 2671, 'loss/train': 3.123924493789673} +02/24/2022 05:02:09 - INFO - codeparrot_training - Step 2672: {'lr': 0.0004997582336695312, 'samples': 1368576, 'steps': 2672, 'loss/train': 3.1173181533813477} +02/24/2022 05:02:13 - INFO - codeparrot_training - Step 2673: {'lr': 0.0004997575137079355, 'samples': 1369088, 'steps': 2673, 'loss/train': 2.865905523300171} +02/24/2022 05:02:19 - INFO - codeparrot_training - Step 2674: {'lr': 0.0004997567926764581, 'samples': 1369600, 'steps': 2674, 'loss/train': 2.6781394481658936} +02/24/2022 05:02:22 - INFO - codeparrot_training - Step 2675: {'lr': 0.0004997560705751018, 'samples': 1370112, 'steps': 2675, 'loss/train': 2.5407214164733887} +02/24/2022 05:02:28 - INFO - codeparrot_training - Step 2676: {'lr': 0.0004997553474038698, 'samples': 1370624, 'steps': 2676, 'loss/train': 2.841752529144287} +02/24/2022 05:02:32 - INFO - codeparrot_training - Step 2677: {'lr': 0.0004997546231627652, 'samples': 1371136, 'steps': 2677, 'loss/train': 3.3782777786254883} +02/24/2022 05:02:37 - INFO - codeparrot_training - Step 2678: {'lr': 0.0004997538978517912, 'samples': 1371648, 'steps': 2678, 'loss/train': 4.038352012634277} +02/24/2022 05:02:40 - INFO - codeparrot_training - Step 2679: {'lr': 0.0004997531714709506, 'samples': 1372160, 'steps': 2679, 'loss/train': 3.6375372409820557} +02/24/2022 05:02:46 - INFO - codeparrot_training - Step 2680: {'lr': 0.0004997524440202469, 'samples': 1372672, 'steps': 2680, 'loss/train': 3.3535213470458984} +02/24/2022 05:02:49 - INFO - codeparrot_training - Step 2681: {'lr': 0.0004997517154996829, 'samples': 1373184, 'steps': 2681, 'loss/train': 3.3058862686157227} +02/24/2022 05:02:57 - INFO - codeparrot_training - Step 2682: {'lr': 0.000499750985909262, 'samples': 1373696, 'steps': 2682, 'loss/train': 2.548179864883423} +02/24/2022 05:03:01 - INFO - codeparrot_training - Step 2683: {'lr': 0.0004997502552489871, 'samples': 1374208, 'steps': 2683, 'loss/train': 3.678434371948242} +02/24/2022 05:03:06 - INFO - codeparrot_training - Step 2684: {'lr': 0.0004997495235188614, 'samples': 1374720, 'steps': 2684, 'loss/train': 0.5663205981254578} +02/24/2022 05:03:10 - INFO - codeparrot_training - Step 2685: {'lr': 0.0004997487907188881, 'samples': 1375232, 'steps': 2685, 'loss/train': 2.213472843170166} +02/24/2022 05:03:15 - INFO - codeparrot_training - Step 2686: {'lr': 0.0004997480568490702, 'samples': 1375744, 'steps': 2686, 'loss/train': 3.539717435836792} +02/24/2022 05:03:19 - INFO - codeparrot_training - Step 2687: {'lr': 0.0004997473219094111, 'samples': 1376256, 'steps': 2687, 'loss/train': 0.9934473037719727} +02/24/2022 05:03:24 - INFO - codeparrot_training - Step 2688: {'lr': 0.0004997465858999136, 'samples': 1376768, 'steps': 2688, 'loss/train': 2.6953916549682617} +02/24/2022 05:03:28 - INFO - codeparrot_training - Step 2689: {'lr': 0.0004997458488205811, 'samples': 1377280, 'steps': 2689, 'loss/train': 4.212241172790527} +02/24/2022 05:03:33 - INFO - codeparrot_training - Step 2690: {'lr': 0.0004997451106714166, 'samples': 1377792, 'steps': 2690, 'loss/train': 3.1946170330047607} +02/24/2022 05:03:37 - INFO - codeparrot_training - Step 2691: {'lr': 0.0004997443714524235, 'samples': 1378304, 'steps': 2691, 'loss/train': 1.975101351737976} +02/24/2022 05:03:44 - INFO - codeparrot_training - Step 2692: {'lr': 0.0004997436311636046, 'samples': 1378816, 'steps': 2692, 'loss/train': 2.9100875854492188} +02/24/2022 05:03:48 - INFO - codeparrot_training - Step 2693: {'lr': 0.0004997428898049635, 'samples': 1379328, 'steps': 2693, 'loss/train': 3.6551802158355713} +02/24/2022 05:03:53 - INFO - codeparrot_training - Step 2694: {'lr': 0.0004997421473765031, 'samples': 1379840, 'steps': 2694, 'loss/train': 2.9520838260650635} +02/24/2022 05:03:59 - INFO - codeparrot_training - Step 2695: {'lr': 0.0004997414038782266, 'samples': 1380352, 'steps': 2695, 'loss/train': 2.3361704349517822} +02/24/2022 05:04:02 - INFO - codeparrot_training - Step 2696: {'lr': 0.0004997406593101373, 'samples': 1380864, 'steps': 2696, 'loss/train': 3.740046501159668} +02/24/2022 05:04:08 - INFO - codeparrot_training - Step 2697: {'lr': 0.0004997399136722383, 'samples': 1381376, 'steps': 2697, 'loss/train': 1.9504197835922241} +02/24/2022 05:04:11 - INFO - codeparrot_training - Step 2698: {'lr': 0.0004997391669645327, 'samples': 1381888, 'steps': 2698, 'loss/train': 2.307004928588867} +02/24/2022 05:04:17 - INFO - codeparrot_training - Step 2699: {'lr': 0.0004997384191870239, 'samples': 1382400, 'steps': 2699, 'loss/train': 2.8918838500976562} +02/24/2022 05:04:20 - INFO - codeparrot_training - Step 2700: {'lr': 0.000499737670339715, 'samples': 1382912, 'steps': 2700, 'loss/train': 4.2642822265625} +02/24/2022 05:04:28 - INFO - codeparrot_training - Step 2701: {'lr': 0.0004997369204226093, 'samples': 1383424, 'steps': 2701, 'loss/train': 3.9701991081237793} +02/24/2022 05:04:31 - INFO - codeparrot_training - Step 2702: {'lr': 0.0004997361694357098, 'samples': 1383936, 'steps': 2702, 'loss/train': 1.7114585638046265} +02/24/2022 05:04:37 - INFO - codeparrot_training - Step 2703: {'lr': 0.00049973541737902, 'samples': 1384448, 'steps': 2703, 'loss/train': 3.1372931003570557} +02/24/2022 05:04:40 - INFO - codeparrot_training - Step 2704: {'lr': 0.0004997346642525428, 'samples': 1384960, 'steps': 2704, 'loss/train': 2.4286675453186035} +02/24/2022 05:04:46 - INFO - codeparrot_training - Step 2705: {'lr': 0.0004997339100562817, 'samples': 1385472, 'steps': 2705, 'loss/train': 3.4984283447265625} +02/24/2022 05:04:49 - INFO - codeparrot_training - Step 2706: {'lr': 0.0004997331547902398, 'samples': 1385984, 'steps': 2706, 'loss/train': 3.564594030380249} +02/24/2022 05:04:55 - INFO - codeparrot_training - Step 2707: {'lr': 0.0004997323984544204, 'samples': 1386496, 'steps': 2707, 'loss/train': 1.8964987993240356} +02/24/2022 05:04:58 - INFO - codeparrot_training - Step 2708: {'lr': 0.0004997316410488267, 'samples': 1387008, 'steps': 2708, 'loss/train': 3.7054755687713623} +02/24/2022 05:05:04 - INFO - codeparrot_training - Step 2709: {'lr': 0.0004997308825734619, 'samples': 1387520, 'steps': 2709, 'loss/train': 2.7727036476135254} +02/24/2022 05:05:07 - INFO - codeparrot_training - Step 2710: {'lr': 0.0004997301230283294, 'samples': 1388032, 'steps': 2710, 'loss/train': 4.350216865539551} +02/24/2022 05:05:15 - INFO - codeparrot_training - Step 2711: {'lr': 0.0004997293624134322, 'samples': 1388544, 'steps': 2711, 'loss/train': 3.0784571170806885} +02/24/2022 05:05:18 - INFO - codeparrot_training - Step 2712: {'lr': 0.0004997286007287738, 'samples': 1389056, 'steps': 2712, 'loss/train': 2.5586674213409424} +02/24/2022 05:05:22 - INFO - codeparrot_training - Step 2713: {'lr': 0.0004997278379743574, 'samples': 1389568, 'steps': 2713, 'loss/train': 0.2997039556503296} +02/24/2022 05:05:27 - INFO - codeparrot_training - Step 2714: {'lr': 0.0004997270741501861, 'samples': 1390080, 'steps': 2714, 'loss/train': 2.3912224769592285} +02/24/2022 05:05:33 - INFO - codeparrot_training - Step 2715: {'lr': 0.0004997263092562634, 'samples': 1390592, 'steps': 2715, 'loss/train': 2.553396463394165} +02/24/2022 05:05:36 - INFO - codeparrot_training - Step 2716: {'lr': 0.0004997255432925926, 'samples': 1391104, 'steps': 2716, 'loss/train': 2.2280917167663574} +02/24/2022 05:05:42 - INFO - codeparrot_training - Step 2717: {'lr': 0.0004997247762591766, 'samples': 1391616, 'steps': 2717, 'loss/train': 2.6942803859710693} +02/24/2022 05:05:45 - INFO - codeparrot_training - Step 2718: {'lr': 0.0004997240081560193, 'samples': 1392128, 'steps': 2718, 'loss/train': 7.449179172515869} +02/24/2022 05:05:51 - INFO - codeparrot_training - Step 2719: {'lr': 0.0004997232389831234, 'samples': 1392640, 'steps': 2719, 'loss/train': 2.6250391006469727} +02/24/2022 05:05:54 - INFO - codeparrot_training - Step 2720: {'lr': 0.0004997224687404926, 'samples': 1393152, 'steps': 2720, 'loss/train': 3.1600351333618164} +02/24/2022 05:06:00 - INFO - codeparrot_training - Step 2721: {'lr': 0.0004997216974281299, 'samples': 1393664, 'steps': 2721, 'loss/train': 3.197535276412964} +02/24/2022 05:06:03 - INFO - codeparrot_training - Step 2722: {'lr': 0.0004997209250460387, 'samples': 1394176, 'steps': 2722, 'loss/train': 2.6948647499084473} +02/24/2022 05:06:09 - INFO - codeparrot_training - Step 2723: {'lr': 0.0004997201515942225, 'samples': 1394688, 'steps': 2723, 'loss/train': 1.3640938997268677} +02/24/2022 05:06:12 - INFO - codeparrot_training - Step 2724: {'lr': 0.0004997193770726844, 'samples': 1395200, 'steps': 2724, 'loss/train': 4.225404739379883} +02/24/2022 05:06:17 - INFO - codeparrot_training - Step 2725: {'lr': 0.0004997186014814278, 'samples': 1395712, 'steps': 2725, 'loss/train': 2.874544382095337} +02/24/2022 05:06:21 - INFO - codeparrot_training - Step 2726: {'lr': 0.000499717824820456, 'samples': 1396224, 'steps': 2726, 'loss/train': 3.7581987380981445} +02/24/2022 05:06:28 - INFO - codeparrot_training - Step 2727: {'lr': 0.0004997170470897723, 'samples': 1396736, 'steps': 2727, 'loss/train': 3.2515029907226562} +02/24/2022 05:06:32 - INFO - codeparrot_training - Step 2728: {'lr': 0.0004997162682893801, 'samples': 1397248, 'steps': 2728, 'loss/train': 3.428917646408081} +02/24/2022 05:06:37 - INFO - codeparrot_training - Step 2729: {'lr': 0.0004997154884192827, 'samples': 1397760, 'steps': 2729, 'loss/train': 2.7987887859344482} +02/24/2022 05:06:41 - INFO - codeparrot_training - Step 2730: {'lr': 0.0004997147074794835, 'samples': 1398272, 'steps': 2730, 'loss/train': 2.8409271240234375} +02/24/2022 05:06:46 - INFO - codeparrot_training - Step 2731: {'lr': 0.0004997139254699856, 'samples': 1398784, 'steps': 2731, 'loss/train': 4.316927433013916} +02/24/2022 05:06:50 - INFO - codeparrot_training - Step 2732: {'lr': 0.0004997131423907927, 'samples': 1399296, 'steps': 2732, 'loss/train': 4.355013847351074} +02/24/2022 05:06:55 - INFO - codeparrot_training - Step 2733: {'lr': 0.000499712358241908, 'samples': 1399808, 'steps': 2733, 'loss/train': 3.3068785667419434} +02/24/2022 05:06:59 - INFO - codeparrot_training - Step 2734: {'lr': 0.0004997115730233349, 'samples': 1400320, 'steps': 2734, 'loss/train': 4.108318328857422} +02/24/2022 05:07:04 - INFO - codeparrot_training - Step 2735: {'lr': 0.0004997107867350765, 'samples': 1400832, 'steps': 2735, 'loss/train': 2.8441662788391113} +02/24/2022 05:07:08 - INFO - codeparrot_training - Step 2736: {'lr': 0.0004997099993771365, 'samples': 1401344, 'steps': 2736, 'loss/train': 2.534899950027466} +02/24/2022 05:07:15 - INFO - codeparrot_training - Step 2737: {'lr': 0.0004997092109495181, 'samples': 1401856, 'steps': 2737, 'loss/train': 3.0105106830596924} +02/24/2022 05:07:18 - INFO - codeparrot_training - Step 2738: {'lr': 0.0004997084214522249, 'samples': 1402368, 'steps': 2738, 'loss/train': 2.264688014984131} +02/24/2022 05:07:24 - INFO - codeparrot_training - Step 2739: {'lr': 0.0004997076308852599, 'samples': 1402880, 'steps': 2739, 'loss/train': 3.0359232425689697} +02/24/2022 05:07:27 - INFO - codeparrot_training - Step 2740: {'lr': 0.0004997068392486268, 'samples': 1403392, 'steps': 2740, 'loss/train': 1.9524973630905151} +02/24/2022 05:07:33 - INFO - codeparrot_training - Step 2741: {'lr': 0.0004997060465423288, 'samples': 1403904, 'steps': 2741, 'loss/train': 1.9212144613265991} +02/24/2022 05:07:36 - INFO - codeparrot_training - Step 2742: {'lr': 0.0004997052527663696, 'samples': 1404416, 'steps': 2742, 'loss/train': 0.3470495045185089} +02/24/2022 05:07:42 - INFO - codeparrot_training - Step 2743: {'lr': 0.0004997044579207522, 'samples': 1404928, 'steps': 2743, 'loss/train': 3.8068249225616455} +02/24/2022 05:07:46 - INFO - codeparrot_training - Step 2744: {'lr': 0.0004997036620054803, 'samples': 1405440, 'steps': 2744, 'loss/train': 3.403555393218994} +02/24/2022 05:07:51 - INFO - codeparrot_training - Step 2745: {'lr': 0.0004997028650205572, 'samples': 1405952, 'steps': 2745, 'loss/train': 9.212778091430664} +02/24/2022 05:07:54 - INFO - codeparrot_training - Step 2746: {'lr': 0.0004997020669659862, 'samples': 1406464, 'steps': 2746, 'loss/train': 2.6647017002105713} +02/24/2022 05:08:02 - INFO - codeparrot_training - Step 2747: {'lr': 0.000499701267841771, 'samples': 1406976, 'steps': 2747, 'loss/train': 3.697998046875} +02/24/2022 05:08:05 - INFO - codeparrot_training - Step 2748: {'lr': 0.0004997004676479147, 'samples': 1407488, 'steps': 2748, 'loss/train': 2.0773494243621826} +02/24/2022 05:08:11 - INFO - codeparrot_training - Step 2749: {'lr': 0.0004996996663844209, 'samples': 1408000, 'steps': 2749, 'loss/train': 1.2449995279312134} +02/24/2022 05:08:14 - INFO - codeparrot_training - Step 2750: {'lr': 0.0004996988640512931, 'samples': 1408512, 'steps': 2750, 'loss/train': 3.868187665939331} +02/24/2022 05:08:20 - INFO - codeparrot_training - Step 2751: {'lr': 0.0004996980606485346, 'samples': 1409024, 'steps': 2751, 'loss/train': 2.1135287284851074} +02/24/2022 05:08:23 - INFO - codeparrot_training - Step 2752: {'lr': 0.0004996972561761489, 'samples': 1409536, 'steps': 2752, 'loss/train': 3.0647342205047607} +02/24/2022 05:08:29 - INFO - codeparrot_training - Step 2753: {'lr': 0.0004996964506341395, 'samples': 1410048, 'steps': 2753, 'loss/train': 2.9392306804656982} +02/24/2022 05:08:32 - INFO - codeparrot_training - Step 2754: {'lr': 0.0004996956440225098, 'samples': 1410560, 'steps': 2754, 'loss/train': 4.008497714996338} +02/24/2022 05:08:38 - INFO - codeparrot_training - Step 2755: {'lr': 0.0004996948363412631, 'samples': 1411072, 'steps': 2755, 'loss/train': 2.9995925426483154} +02/24/2022 05:08:43 - INFO - codeparrot_training - Step 2756: {'lr': 0.0004996940275904031, 'samples': 1411584, 'steps': 2756, 'loss/train': 1.4026012420654297} +02/24/2022 05:08:46 - INFO - codeparrot_training - Step 2757: {'lr': 0.0004996932177699332, 'samples': 1412096, 'steps': 2757, 'loss/train': 3.455096483230591} +02/24/2022 05:08:52 - INFO - codeparrot_training - Step 2758: {'lr': 0.0004996924068798569, 'samples': 1412608, 'steps': 2758, 'loss/train': 2.144666910171509} +02/24/2022 05:08:56 - INFO - codeparrot_training - Step 2759: {'lr': 0.0004996915949201775, 'samples': 1413120, 'steps': 2759, 'loss/train': 3.213867664337158} +02/24/2022 05:09:01 - INFO - codeparrot_training - Step 2760: {'lr': 0.0004996907818908987, 'samples': 1413632, 'steps': 2760, 'loss/train': 3.3109800815582275} +02/24/2022 05:09:05 - INFO - codeparrot_training - Step 2761: {'lr': 0.0004996899677920238, 'samples': 1414144, 'steps': 2761, 'loss/train': 3.086364269256592} +02/24/2022 05:09:12 - INFO - codeparrot_training - Step 2762: {'lr': 0.0004996891526235564, 'samples': 1414656, 'steps': 2762, 'loss/train': 3.8656375408172607} +02/24/2022 05:09:15 - INFO - codeparrot_training - Step 2763: {'lr': 0.0004996883363854998, 'samples': 1415168, 'steps': 2763, 'loss/train': 2.529487133026123} +02/24/2022 05:09:21 - INFO - codeparrot_training - Step 2764: {'lr': 0.0004996875190778579, 'samples': 1415680, 'steps': 2764, 'loss/train': 1.996953010559082} +02/24/2022 05:09:24 - INFO - codeparrot_training - Step 2765: {'lr': 0.0004996867007006339, 'samples': 1416192, 'steps': 2765, 'loss/train': 3.3359246253967285} +02/24/2022 05:09:30 - INFO - codeparrot_training - Step 2766: {'lr': 0.0004996858812538312, 'samples': 1416704, 'steps': 2766, 'loss/train': 3.4091908931732178} +02/24/2022 05:09:33 - INFO - codeparrot_training - Step 2767: {'lr': 0.0004996850607374535, 'samples': 1417216, 'steps': 2767, 'loss/train': 3.3565618991851807} +02/24/2022 05:09:39 - INFO - codeparrot_training - Step 2768: {'lr': 0.0004996842391515044, 'samples': 1417728, 'steps': 2768, 'loss/train': 2.5190060138702393} +02/24/2022 05:09:42 - INFO - codeparrot_training - Step 2769: {'lr': 0.0004996834164959872, 'samples': 1418240, 'steps': 2769, 'loss/train': 3.107618570327759} +02/24/2022 05:09:48 - INFO - codeparrot_training - Step 2770: {'lr': 0.0004996825927709056, 'samples': 1418752, 'steps': 2770, 'loss/train': 3.328472852706909} +02/24/2022 05:09:51 - INFO - codeparrot_training - Step 2771: {'lr': 0.0004996817679762631, 'samples': 1419264, 'steps': 2771, 'loss/train': 3.195223808288574} +02/24/2022 05:09:58 - INFO - codeparrot_training - Step 2772: {'lr': 0.000499680942112063, 'samples': 1419776, 'steps': 2772, 'loss/train': 0.8028042316436768} +02/24/2022 05:10:02 - INFO - codeparrot_training - Step 2773: {'lr': 0.0004996801151783092, 'samples': 1420288, 'steps': 2773, 'loss/train': 3.554117202758789} +02/24/2022 05:10:07 - INFO - codeparrot_training - Step 2774: {'lr': 0.000499679287175005, 'samples': 1420800, 'steps': 2774, 'loss/train': 4.272123336791992} +02/24/2022 05:10:11 - INFO - codeparrot_training - Step 2775: {'lr': 0.000499678458102154, 'samples': 1421312, 'steps': 2775, 'loss/train': 1.5888338088989258} +02/24/2022 05:10:16 - INFO - codeparrot_training - Step 2776: {'lr': 0.0004996776279597598, 'samples': 1421824, 'steps': 2776, 'loss/train': 2.162257671356201} +02/24/2022 05:10:20 - INFO - codeparrot_training - Step 2777: {'lr': 0.0004996767967478259, 'samples': 1422336, 'steps': 2777, 'loss/train': 2.9495656490325928} +02/24/2022 05:10:25 - INFO - codeparrot_training - Step 2778: {'lr': 0.0004996759644663559, 'samples': 1422848, 'steps': 2778, 'loss/train': 2.884937286376953} +02/24/2022 05:10:29 - INFO - codeparrot_training - Step 2779: {'lr': 0.0004996751311153535, 'samples': 1423360, 'steps': 2779, 'loss/train': 3.068324565887451} +02/24/2022 05:10:34 - INFO - codeparrot_training - Step 2780: {'lr': 0.0004996742966948219, 'samples': 1423872, 'steps': 2780, 'loss/train': 2.154026985168457} +02/24/2022 05:10:38 - INFO - codeparrot_training - Step 2781: {'lr': 0.000499673461204765, 'samples': 1424384, 'steps': 2781, 'loss/train': 4.119757652282715} +02/24/2022 05:10:43 - INFO - codeparrot_training - Step 2782: {'lr': 0.0004996726246451862, 'samples': 1424896, 'steps': 2782, 'loss/train': 3.3859035968780518} +02/24/2022 05:10:47 - INFO - codeparrot_training - Step 2783: {'lr': 0.0004996717870160892, 'samples': 1425408, 'steps': 2783, 'loss/train': 2.421213388442993} +02/24/2022 05:10:54 - INFO - codeparrot_training - Step 2784: {'lr': 0.0004996709483174775, 'samples': 1425920, 'steps': 2784, 'loss/train': 2.6468234062194824} +02/24/2022 05:10:57 - INFO - codeparrot_training - Step 2785: {'lr': 0.0004996701085493547, 'samples': 1426432, 'steps': 2785, 'loss/train': 3.9776830673217773} +02/24/2022 05:11:03 - INFO - codeparrot_training - Step 2786: {'lr': 0.0004996692677117246, 'samples': 1426944, 'steps': 2786, 'loss/train': 2.395897150039673} +02/24/2022 05:11:06 - INFO - codeparrot_training - Step 2787: {'lr': 0.0004996684258045906, 'samples': 1427456, 'steps': 2787, 'loss/train': 2.4937233924865723} +02/24/2022 05:11:12 - INFO - codeparrot_training - Step 2788: {'lr': 0.0004996675828279562, 'samples': 1427968, 'steps': 2788, 'loss/train': 2.744579315185547} +02/24/2022 05:11:15 - INFO - codeparrot_training - Step 2789: {'lr': 0.0004996667387818254, 'samples': 1428480, 'steps': 2789, 'loss/train': 5.006727695465088} +02/24/2022 05:11:21 - INFO - codeparrot_training - Step 2790: {'lr': 0.0004996658936662013, 'samples': 1428992, 'steps': 2790, 'loss/train': 3.5648996829986572} +02/24/2022 05:11:24 - INFO - codeparrot_training - Step 2791: {'lr': 0.0004996650474810879, 'samples': 1429504, 'steps': 2791, 'loss/train': 3.327510356903076} +02/24/2022 05:11:30 - INFO - codeparrot_training - Step 2792: {'lr': 0.0004996642002264887, 'samples': 1430016, 'steps': 2792, 'loss/train': 3.139310836791992} +02/24/2022 05:11:33 - INFO - codeparrot_training - Step 2793: {'lr': 0.0004996633519024074, 'samples': 1430528, 'steps': 2793, 'loss/train': 3.672539710998535} +02/24/2022 05:11:41 - INFO - codeparrot_training - Step 2794: {'lr': 0.0004996625025088476, 'samples': 1431040, 'steps': 2794, 'loss/train': 3.735501766204834} +02/24/2022 05:11:44 - INFO - codeparrot_training - Step 2795: {'lr': 0.0004996616520458128, 'samples': 1431552, 'steps': 2795, 'loss/train': 3.2168426513671875} +02/24/2022 05:11:50 - INFO - codeparrot_training - Step 2796: {'lr': 0.0004996608005133068, 'samples': 1432064, 'steps': 2796, 'loss/train': 2.692532777786255} +02/24/2022 05:11:53 - INFO - codeparrot_training - Step 2797: {'lr': 0.0004996599479113333, 'samples': 1432576, 'steps': 2797, 'loss/train': 2.4207661151885986} +02/24/2022 05:11:59 - INFO - codeparrot_training - Step 2798: {'lr': 0.0004996590942398958, 'samples': 1433088, 'steps': 2798, 'loss/train': 3.100615978240967} +02/24/2022 05:12:02 - INFO - codeparrot_training - Step 2799: {'lr': 0.0004996582394989979, 'samples': 1433600, 'steps': 2799, 'loss/train': 1.690998911857605} +02/24/2022 05:12:08 - INFO - codeparrot_training - Step 2800: {'lr': 0.0004996573836886434, 'samples': 1434112, 'steps': 2800, 'loss/train': 2.2832417488098145} +02/24/2022 05:12:11 - INFO - codeparrot_training - Step 2801: {'lr': 0.0004996565268088362, 'samples': 1434624, 'steps': 2801, 'loss/train': 2.6704163551330566} +02/24/2022 05:12:17 - INFO - codeparrot_training - Step 2802: {'lr': 0.0004996556688595794, 'samples': 1435136, 'steps': 2802, 'loss/train': 1.6761455535888672} +02/24/2022 05:12:20 - INFO - codeparrot_training - Step 2803: {'lr': 0.0004996548098408772, 'samples': 1435648, 'steps': 2803, 'loss/train': 3.0169854164123535} +02/24/2022 05:12:26 - INFO - codeparrot_training - Step 2804: {'lr': 0.0004996539497527329, 'samples': 1436160, 'steps': 2804, 'loss/train': 3.3277251720428467} +02/24/2022 05:12:29 - INFO - codeparrot_training - Step 2805: {'lr': 0.0004996530885951505, 'samples': 1436672, 'steps': 2805, 'loss/train': 3.2999966144561768} +02/24/2022 05:12:35 - INFO - codeparrot_training - Step 2806: {'lr': 0.0004996522263681335, 'samples': 1437184, 'steps': 2806, 'loss/train': 3.297621965408325} +02/24/2022 05:12:38 - INFO - codeparrot_training - Step 2807: {'lr': 0.0004996513630716856, 'samples': 1437696, 'steps': 2807, 'loss/train': 3.0177981853485107} +02/24/2022 05:12:45 - INFO - codeparrot_training - Step 2808: {'lr': 0.0004996504987058105, 'samples': 1438208, 'steps': 2808, 'loss/train': 1.5601694583892822} +02/24/2022 05:12:51 - INFO - codeparrot_training - Step 2809: {'lr': 0.000499649633270512, 'samples': 1438720, 'steps': 2809, 'loss/train': 2.582554817199707} +02/24/2022 05:12:54 - INFO - codeparrot_training - Step 2810: {'lr': 0.0004996487667657938, 'samples': 1439232, 'steps': 2810, 'loss/train': 3.531783103942871} +02/24/2022 05:12:58 - INFO - codeparrot_training - Step 2811: {'lr': 0.0004996478991916595, 'samples': 1439744, 'steps': 2811, 'loss/train': 0.40621069073677063} +02/24/2022 05:13:03 - INFO - codeparrot_training - Step 2812: {'lr': 0.0004996470305481127, 'samples': 1440256, 'steps': 2812, 'loss/train': 1.916076421737671} +02/24/2022 05:13:07 - INFO - codeparrot_training - Step 2813: {'lr': 0.0004996461608351575, 'samples': 1440768, 'steps': 2813, 'loss/train': 2.6425673961639404} +02/24/2022 05:13:12 - INFO - codeparrot_training - Step 2814: {'lr': 0.0004996452900527974, 'samples': 1441280, 'steps': 2814, 'loss/train': 3.936494827270508} +02/24/2022 05:13:18 - INFO - codeparrot_training - Step 2815: {'lr': 0.0004996444182010361, 'samples': 1441792, 'steps': 2815, 'loss/train': 2.7605273723602295} +02/24/2022 05:13:21 - INFO - codeparrot_training - Step 2816: {'lr': 0.0004996435452798775, 'samples': 1442304, 'steps': 2816, 'loss/train': 2.6253724098205566} +02/24/2022 05:13:27 - INFO - codeparrot_training - Step 2817: {'lr': 0.000499642671289325, 'samples': 1442816, 'steps': 2817, 'loss/train': 1.991423487663269} +02/24/2022 05:13:30 - INFO - codeparrot_training - Step 2818: {'lr': 0.0004996417962293828, 'samples': 1443328, 'steps': 2818, 'loss/train': 1.931232213973999} +02/24/2022 05:13:38 - INFO - codeparrot_training - Step 2819: {'lr': 0.0004996409201000543, 'samples': 1443840, 'steps': 2819, 'loss/train': 1.2316335439682007} +02/24/2022 05:13:41 - INFO - codeparrot_training - Step 2820: {'lr': 0.0004996400429013434, 'samples': 1444352, 'steps': 2820, 'loss/train': 2.7481179237365723} +02/24/2022 05:13:47 - INFO - codeparrot_training - Step 2821: {'lr': 0.0004996391646332537, 'samples': 1444864, 'steps': 2821, 'loss/train': 3.186208963394165} +02/24/2022 05:13:50 - INFO - codeparrot_training - Step 2822: {'lr': 0.0004996382852957892, 'samples': 1445376, 'steps': 2822, 'loss/train': 3.020224094390869} +02/24/2022 05:13:56 - INFO - codeparrot_training - Step 2823: {'lr': 0.0004996374048889536, 'samples': 1445888, 'steps': 2823, 'loss/train': 0.28825661540031433} +02/24/2022 05:13:59 - INFO - codeparrot_training - Step 2824: {'lr': 0.0004996365234127506, 'samples': 1446400, 'steps': 2824, 'loss/train': 4.033950328826904} +02/24/2022 05:14:05 - INFO - codeparrot_training - Step 2825: {'lr': 0.000499635640867184, 'samples': 1446912, 'steps': 2825, 'loss/train': 2.2927799224853516} +02/24/2022 05:14:08 - INFO - codeparrot_training - Step 2826: {'lr': 0.0004996347572522575, 'samples': 1447424, 'steps': 2826, 'loss/train': 3.171036958694458} +02/24/2022 05:14:13 - INFO - codeparrot_training - Step 2827: {'lr': 0.000499633872567975, 'samples': 1447936, 'steps': 2827, 'loss/train': 2.999267578125} +02/24/2022 05:14:17 - INFO - codeparrot_training - Step 2828: {'lr': 0.0004996329868143404, 'samples': 1448448, 'steps': 2828, 'loss/train': 2.059232234954834} +02/24/2022 05:14:24 - INFO - codeparrot_training - Step 2829: {'lr': 0.0004996320999913572, 'samples': 1448960, 'steps': 2829, 'loss/train': 2.5450592041015625} +02/24/2022 05:14:28 - INFO - codeparrot_training - Step 2830: {'lr': 0.0004996312120990293, 'samples': 1449472, 'steps': 2830, 'loss/train': 3.0187501907348633} +02/24/2022 05:14:33 - INFO - codeparrot_training - Step 2831: {'lr': 0.0004996303231373607, 'samples': 1449984, 'steps': 2831, 'loss/train': 3.935559034347534} +02/24/2022 05:14:37 - INFO - codeparrot_training - Step 2832: {'lr': 0.000499629433106355, 'samples': 1450496, 'steps': 2832, 'loss/train': 3.5534451007843018} +02/24/2022 05:14:42 - INFO - codeparrot_training - Step 2833: {'lr': 0.000499628542006016, 'samples': 1451008, 'steps': 2833, 'loss/train': 3.1241888999938965} +02/24/2022 05:14:45 - INFO - codeparrot_training - Step 2834: {'lr': 0.0004996276498363477, 'samples': 1451520, 'steps': 2834, 'loss/train': 3.4773495197296143} +02/24/2022 05:14:51 - INFO - codeparrot_training - Step 2835: {'lr': 0.0004996267565973538, 'samples': 1452032, 'steps': 2835, 'loss/train': 3.3543875217437744} +02/24/2022 05:14:54 - INFO - codeparrot_training - Step 2836: {'lr': 0.0004996258622890381, 'samples': 1452544, 'steps': 2836, 'loss/train': 2.600266218185425} +02/24/2022 05:15:00 - INFO - codeparrot_training - Step 2837: {'lr': 0.0004996249669114045, 'samples': 1453056, 'steps': 2837, 'loss/train': 3.269315242767334} +02/24/2022 05:15:03 - INFO - codeparrot_training - Step 2838: {'lr': 0.0004996240704644568, 'samples': 1453568, 'steps': 2838, 'loss/train': 3.004892110824585} +02/24/2022 05:15:11 - INFO - codeparrot_training - Step 2839: {'lr': 0.0004996231729481989, 'samples': 1454080, 'steps': 2839, 'loss/train': 3.3889365196228027} +02/24/2022 05:15:14 - INFO - codeparrot_training - Step 2840: {'lr': 0.0004996222743626345, 'samples': 1454592, 'steps': 2840, 'loss/train': 3.0549867153167725} +02/24/2022 05:15:20 - INFO - codeparrot_training - Step 2841: {'lr': 0.0004996213747077675, 'samples': 1455104, 'steps': 2841, 'loss/train': 3.333037853240967} +02/24/2022 05:15:23 - INFO - codeparrot_training - Step 2842: {'lr': 0.0004996204739836019, 'samples': 1455616, 'steps': 2842, 'loss/train': 3.4401707649230957} +02/24/2022 05:15:29 - INFO - codeparrot_training - Step 2843: {'lr': 0.0004996195721901415, 'samples': 1456128, 'steps': 2843, 'loss/train': 2.8262827396392822} +02/24/2022 05:15:32 - INFO - codeparrot_training - Step 2844: {'lr': 0.00049961866932739, 'samples': 1456640, 'steps': 2844, 'loss/train': 4.0146684646606445} +02/24/2022 05:15:38 - INFO - codeparrot_training - Step 2845: {'lr': 0.0004996177653953514, 'samples': 1457152, 'steps': 2845, 'loss/train': 4.328563690185547} +02/24/2022 05:15:41 - INFO - codeparrot_training - Step 2846: {'lr': 0.0004996168603940296, 'samples': 1457664, 'steps': 2846, 'loss/train': 1.920601725578308} +02/24/2022 05:15:47 - INFO - codeparrot_training - Step 2847: {'lr': 0.0004996159543234285, 'samples': 1458176, 'steps': 2847, 'loss/train': 3.8002512454986572} +02/24/2022 05:15:50 - INFO - codeparrot_training - Step 2848: {'lr': 0.0004996150471835518, 'samples': 1458688, 'steps': 2848, 'loss/train': 2.7994513511657715} +02/24/2022 05:15:56 - INFO - codeparrot_training - Step 2849: {'lr': 0.0004996141389744035, 'samples': 1459200, 'steps': 2849, 'loss/train': 2.1357784271240234} +02/24/2022 05:15:59 - INFO - codeparrot_training - Step 2850: {'lr': 0.0004996132296959876, 'samples': 1459712, 'steps': 2850, 'loss/train': 2.8863883018493652} +02/24/2022 05:16:04 - INFO - codeparrot_training - Step 2851: {'lr': 0.0004996123193483076, 'samples': 1460224, 'steps': 2851, 'loss/train': 4.0175557136535645} +02/24/2022 05:16:08 - INFO - codeparrot_training - Step 2852: {'lr': 0.000499611407931368, 'samples': 1460736, 'steps': 2852, 'loss/train': 2.7112789154052734} +02/24/2022 05:16:14 - INFO - codeparrot_training - Step 2853: {'lr': 0.0004996104954451722, 'samples': 1461248, 'steps': 2853, 'loss/train': 3.1282994747161865} +02/24/2022 05:16:17 - INFO - codeparrot_training - Step 2854: {'lr': 0.0004996095818897245, 'samples': 1461760, 'steps': 2854, 'loss/train': 3.200387954711914} +02/24/2022 05:16:23 - INFO - codeparrot_training - Step 2855: {'lr': 0.0004996086672650284, 'samples': 1462272, 'steps': 2855, 'loss/train': 3.209383964538574} +02/24/2022 05:16:26 - INFO - codeparrot_training - Step 2856: {'lr': 0.0004996077515710881, 'samples': 1462784, 'steps': 2856, 'loss/train': 1.6988755464553833} +02/24/2022 05:16:32 - INFO - codeparrot_training - Step 2857: {'lr': 0.0004996068348079075, 'samples': 1463296, 'steps': 2857, 'loss/train': 5.365222454071045} +02/24/2022 05:16:35 - INFO - codeparrot_training - Step 2858: {'lr': 0.0004996059169754904, 'samples': 1463808, 'steps': 2858, 'loss/train': 4.547300338745117} +02/24/2022 05:16:41 - INFO - codeparrot_training - Step 2859: {'lr': 0.0004996049980738409, 'samples': 1464320, 'steps': 2859, 'loss/train': 2.6535141468048096} +02/24/2022 05:16:44 - INFO - codeparrot_training - Step 2860: {'lr': 0.0004996040781029629, 'samples': 1464832, 'steps': 2860, 'loss/train': 0.21506935358047485} +02/24/2022 05:16:50 - INFO - codeparrot_training - Step 2861: {'lr': 0.00049960315706286, 'samples': 1465344, 'steps': 2861, 'loss/train': 2.928163766860962} +02/24/2022 05:16:53 - INFO - codeparrot_training - Step 2862: {'lr': 0.0004996022349535367, 'samples': 1465856, 'steps': 2862, 'loss/train': 3.1556451320648193} +02/24/2022 05:16:59 - INFO - codeparrot_training - Step 2863: {'lr': 0.0004996013117749967, 'samples': 1466368, 'steps': 2863, 'loss/train': 2.578514575958252} +02/24/2022 05:17:02 - INFO - codeparrot_training - Step 2864: {'lr': 0.0004996003875272438, 'samples': 1466880, 'steps': 2864, 'loss/train': 2.881091356277466} +02/24/2022 05:17:08 - INFO - codeparrot_training - Step 2865: {'lr': 0.0004995994622102821, 'samples': 1467392, 'steps': 2865, 'loss/train': 2.6668548583984375} +02/24/2022 05:17:12 - INFO - codeparrot_training - Step 2866: {'lr': 0.0004995985358241156, 'samples': 1467904, 'steps': 2866, 'loss/train': 2.870293617248535} +02/24/2022 05:17:17 - INFO - codeparrot_training - Step 2867: {'lr': 0.0004995976083687482, 'samples': 1468416, 'steps': 2867, 'loss/train': 1.574471116065979} +02/24/2022 05:17:21 - INFO - codeparrot_training - Step 2868: {'lr': 0.000499596679844184, 'samples': 1468928, 'steps': 2868, 'loss/train': 2.1411514282226562} +02/24/2022 05:17:27 - INFO - codeparrot_training - Step 2869: {'lr': 0.0004995957502504268, 'samples': 1469440, 'steps': 2869, 'loss/train': 3.222968339920044} +02/24/2022 05:17:30 - INFO - codeparrot_training - Step 2870: {'lr': 0.0004995948195874807, 'samples': 1469952, 'steps': 2870, 'loss/train': 3.6101386547088623} +02/24/2022 05:17:36 - INFO - codeparrot_training - Step 2871: {'lr': 0.0004995938878553496, 'samples': 1470464, 'steps': 2871, 'loss/train': 4.255477428436279} +02/24/2022 05:17:39 - INFO - codeparrot_training - Step 2872: {'lr': 0.0004995929550540376, 'samples': 1470976, 'steps': 2872, 'loss/train': 2.392404556274414} +02/24/2022 05:17:45 - INFO - codeparrot_training - Step 2873: {'lr': 0.0004995920211835485, 'samples': 1471488, 'steps': 2873, 'loss/train': 2.3501245975494385} +02/24/2022 05:17:48 - INFO - codeparrot_training - Step 2874: {'lr': 0.0004995910862438866, 'samples': 1472000, 'steps': 2874, 'loss/train': 3.421520948410034} +02/24/2022 05:17:55 - INFO - codeparrot_training - Step 2875: {'lr': 0.0004995901502350556, 'samples': 1472512, 'steps': 2875, 'loss/train': 2.563455820083618} +02/24/2022 05:17:58 - INFO - codeparrot_training - Step 2876: {'lr': 0.0004995892131570598, 'samples': 1473024, 'steps': 2876, 'loss/train': 1.8721435070037842} +02/24/2022 05:18:04 - INFO - codeparrot_training - Step 2877: {'lr': 0.0004995882750099029, 'samples': 1473536, 'steps': 2877, 'loss/train': 2.436617136001587} +02/24/2022 05:18:07 - INFO - codeparrot_training - Step 2878: {'lr': 0.0004995873357935892, 'samples': 1474048, 'steps': 2878, 'loss/train': 4.313658237457275} +02/24/2022 05:18:13 - INFO - codeparrot_training - Step 2879: {'lr': 0.0004995863955081226, 'samples': 1474560, 'steps': 2879, 'loss/train': 2.886084794998169} +02/24/2022 05:18:16 - INFO - codeparrot_training - Step 2880: {'lr': 0.0004995854541535071, 'samples': 1475072, 'steps': 2880, 'loss/train': 2.8019585609436035} +02/24/2022 05:18:22 - INFO - codeparrot_training - Step 2881: {'lr': 0.0004995845117297468, 'samples': 1475584, 'steps': 2881, 'loss/train': 2.918586254119873} +02/24/2022 05:18:25 - INFO - codeparrot_training - Step 2882: {'lr': 0.0004995835682368457, 'samples': 1476096, 'steps': 2882, 'loss/train': 3.149017572402954} +02/24/2022 05:18:31 - INFO - codeparrot_training - Step 2883: {'lr': 0.0004995826236748078, 'samples': 1476608, 'steps': 2883, 'loss/train': 2.0591771602630615} +02/24/2022 05:18:34 - INFO - codeparrot_training - Step 2884: {'lr': 0.0004995816780436372, 'samples': 1477120, 'steps': 2884, 'loss/train': 3.566878318786621} +02/24/2022 05:18:40 - INFO - codeparrot_training - Step 2885: {'lr': 0.0004995807313433379, 'samples': 1477632, 'steps': 2885, 'loss/train': 4.372598171234131} +02/24/2022 05:18:44 - INFO - codeparrot_training - Step 2886: {'lr': 0.0004995797835739141, 'samples': 1478144, 'steps': 2886, 'loss/train': 1.5869191884994507} +02/24/2022 05:18:49 - INFO - codeparrot_training - Step 2887: {'lr': 0.0004995788347353697, 'samples': 1478656, 'steps': 2887, 'loss/train': 2.7778050899505615} +02/24/2022 05:18:55 - INFO - codeparrot_training - Step 2888: {'lr': 0.0004995778848277088, 'samples': 1479168, 'steps': 2888, 'loss/train': 3.9248712062835693} +02/24/2022 05:18:58 - INFO - codeparrot_training - Step 2889: {'lr': 0.0004995769338509357, 'samples': 1479680, 'steps': 2889, 'loss/train': 3.454069137573242} +02/24/2022 05:19:04 - INFO - codeparrot_training - Step 2890: {'lr': 0.000499575981805054, 'samples': 1480192, 'steps': 2890, 'loss/train': 2.604952812194824} +02/24/2022 05:19:07 - INFO - codeparrot_training - Step 2891: {'lr': 0.000499575028690068, 'samples': 1480704, 'steps': 2891, 'loss/train': 1.6018638610839844} +02/24/2022 05:19:13 - INFO - codeparrot_training - Step 2892: {'lr': 0.000499574074505982, 'samples': 1481216, 'steps': 2892, 'loss/train': 4.385003089904785} +02/24/2022 05:19:16 - INFO - codeparrot_training - Step 2893: {'lr': 0.0004995731192527999, 'samples': 1481728, 'steps': 2893, 'loss/train': 1.1551443338394165} +02/24/2022 05:19:22 - INFO - codeparrot_training - Step 2894: {'lr': 0.0004995721629305258, 'samples': 1482240, 'steps': 2894, 'loss/train': 2.4492008686065674} +02/24/2022 05:19:25 - INFO - codeparrot_training - Step 2895: {'lr': 0.0004995712055391638, 'samples': 1482752, 'steps': 2895, 'loss/train': 0.4130255877971649} +02/24/2022 05:19:30 - INFO - codeparrot_training - Step 2896: {'lr': 0.000499570247078718, 'samples': 1483264, 'steps': 2896, 'loss/train': 3.880509853363037} +02/24/2022 05:19:34 - INFO - codeparrot_training - Step 2897: {'lr': 0.0004995692875491925, 'samples': 1483776, 'steps': 2897, 'loss/train': 4.691442489624023} +02/24/2022 05:19:40 - INFO - codeparrot_training - Step 2898: {'lr': 0.0004995683269505914, 'samples': 1484288, 'steps': 2898, 'loss/train': 2.7149837017059326} +02/24/2022 05:19:43 - INFO - codeparrot_training - Step 2899: {'lr': 0.000499567365282919, 'samples': 1484800, 'steps': 2899, 'loss/train': 3.7517716884613037} +02/24/2022 05:19:49 - INFO - codeparrot_training - Step 2900: {'lr': 0.000499566402546179, 'samples': 1485312, 'steps': 2900, 'loss/train': 3.438077211380005} +02/24/2022 05:19:53 - INFO - codeparrot_training - Step 2901: {'lr': 0.0004995654387403758, 'samples': 1485824, 'steps': 2901, 'loss/train': 3.3144567012786865} +02/24/2022 05:19:58 - INFO - codeparrot_training - Step 2902: {'lr': 0.0004995644738655136, 'samples': 1486336, 'steps': 2902, 'loss/train': 2.5644874572753906} +02/24/2022 05:20:02 - INFO - codeparrot_training - Step 2903: {'lr': 0.0004995635079215965, 'samples': 1486848, 'steps': 2903, 'loss/train': 2.727362632751465} +02/24/2022 05:20:07 - INFO - codeparrot_training - Step 2904: {'lr': 0.0004995625409086285, 'samples': 1487360, 'steps': 2904, 'loss/train': 1.5900318622589111} +02/24/2022 05:20:11 - INFO - codeparrot_training - Step 2905: {'lr': 0.0004995615728266138, 'samples': 1487872, 'steps': 2905, 'loss/train': 3.285262107849121} +02/24/2022 05:20:16 - INFO - codeparrot_training - Step 2906: {'lr': 0.0004995606036755566, 'samples': 1488384, 'steps': 2906, 'loss/train': 3.105708122253418} +02/24/2022 05:20:20 - INFO - codeparrot_training - Step 2907: {'lr': 0.000499559633455461, 'samples': 1488896, 'steps': 2907, 'loss/train': 2.7195498943328857} +02/24/2022 05:20:25 - INFO - codeparrot_training - Step 2908: {'lr': 0.0004995586621663312, 'samples': 1489408, 'steps': 2908, 'loss/train': 2.7219977378845215} +02/24/2022 05:20:29 - INFO - codeparrot_training - Step 2909: {'lr': 0.0004995576898081713, 'samples': 1489920, 'steps': 2909, 'loss/train': 4.537677764892578} +02/24/2022 05:20:35 - INFO - codeparrot_training - Step 2910: {'lr': 0.0004995567163809855, 'samples': 1490432, 'steps': 2910, 'loss/train': 3.032747268676758} +02/24/2022 05:20:38 - INFO - codeparrot_training - Step 2911: {'lr': 0.000499555741884778, 'samples': 1490944, 'steps': 2911, 'loss/train': 3.2470128536224365} +02/24/2022 05:20:44 - INFO - codeparrot_training - Step 2912: {'lr': 0.000499554766319553, 'samples': 1491456, 'steps': 2912, 'loss/train': 2.6746344566345215} +02/24/2022 05:20:47 - INFO - codeparrot_training - Step 2913: {'lr': 0.0004995537896853146, 'samples': 1491968, 'steps': 2913, 'loss/train': 2.9834694862365723} +02/24/2022 05:20:53 - INFO - codeparrot_training - Step 2914: {'lr': 0.0004995528119820669, 'samples': 1492480, 'steps': 2914, 'loss/train': 2.5413784980773926} +02/24/2022 05:20:56 - INFO - codeparrot_training - Step 2915: {'lr': 0.0004995518332098143, 'samples': 1492992, 'steps': 2915, 'loss/train': 3.562969923019409} +02/24/2022 05:21:02 - INFO - codeparrot_training - Step 2916: {'lr': 0.0004995508533685608, 'samples': 1493504, 'steps': 2916, 'loss/train': 3.3154449462890625} +02/24/2022 05:21:05 - INFO - codeparrot_training - Step 2917: {'lr': 0.0004995498724583107, 'samples': 1494016, 'steps': 2917, 'loss/train': 3.7721385955810547} +02/24/2022 05:21:10 - INFO - codeparrot_training - Step 2918: {'lr': 0.0004995488904790682, 'samples': 1494528, 'steps': 2918, 'loss/train': 2.7396411895751953} +02/24/2022 05:21:14 - INFO - codeparrot_training - Step 2919: {'lr': 0.0004995479074308375, 'samples': 1495040, 'steps': 2919, 'loss/train': 3.9017858505249023} +02/24/2022 05:21:20 - INFO - codeparrot_training - Step 2920: {'lr': 0.0004995469233136228, 'samples': 1495552, 'steps': 2920, 'loss/train': 2.7082836627960205} +02/24/2022 05:21:23 - INFO - codeparrot_training - Step 2921: {'lr': 0.0004995459381274284, 'samples': 1496064, 'steps': 2921, 'loss/train': 3.10884952545166} +02/24/2022 05:21:29 - INFO - codeparrot_training - Step 2922: {'lr': 0.0004995449518722584, 'samples': 1496576, 'steps': 2922, 'loss/train': 2.602710485458374} +02/24/2022 05:21:32 - INFO - codeparrot_training - Step 2923: {'lr': 0.000499543964548117, 'samples': 1497088, 'steps': 2923, 'loss/train': 2.3507938385009766} +02/24/2022 05:21:38 - INFO - codeparrot_training - Step 2924: {'lr': 0.0004995429761550086, 'samples': 1497600, 'steps': 2924, 'loss/train': 2.5034258365631104} +02/24/2022 05:21:43 - INFO - codeparrot_training - Step 2925: {'lr': 0.0004995419866929373, 'samples': 1498112, 'steps': 2925, 'loss/train': 3.191732406616211} +02/24/2022 05:21:47 - INFO - codeparrot_training - Step 2926: {'lr': 0.0004995409961619073, 'samples': 1498624, 'steps': 2926, 'loss/train': 4.301843643188477} +02/24/2022 05:21:52 - INFO - codeparrot_training - Step 2927: {'lr': 0.0004995400045619229, 'samples': 1499136, 'steps': 2927, 'loss/train': 2.3361196517944336} +02/24/2022 05:21:56 - INFO - codeparrot_training - Step 2928: {'lr': 0.0004995390118929885, 'samples': 1499648, 'steps': 2928, 'loss/train': 2.6883368492126465} +02/24/2022 05:22:02 - INFO - codeparrot_training - Step 2929: {'lr': 0.0004995380181551081, 'samples': 1500160, 'steps': 2929, 'loss/train': 3.7777180671691895} +02/24/2022 05:22:05 - INFO - codeparrot_training - Step 2930: {'lr': 0.0004995370233482861, 'samples': 1500672, 'steps': 2930, 'loss/train': 3.621124267578125} +02/24/2022 05:22:11 - INFO - codeparrot_training - Step 2931: {'lr': 0.0004995360274725267, 'samples': 1501184, 'steps': 2931, 'loss/train': 2.3852555751800537} +02/24/2022 05:22:14 - INFO - codeparrot_training - Step 2932: {'lr': 0.0004995350305278342, 'samples': 1501696, 'steps': 2932, 'loss/train': 2.6886954307556152} +02/24/2022 05:22:20 - INFO - codeparrot_training - Step 2933: {'lr': 0.0004995340325142128, 'samples': 1502208, 'steps': 2933, 'loss/train': 3.690152883529663} +02/24/2022 05:22:23 - INFO - codeparrot_training - Step 2934: {'lr': 0.000499533033431667, 'samples': 1502720, 'steps': 2934, 'loss/train': 3.8104782104492188} +02/24/2022 05:22:29 - INFO - codeparrot_training - Step 2935: {'lr': 0.0004995320332802008, 'samples': 1503232, 'steps': 2935, 'loss/train': 2.6377949714660645} +02/24/2022 05:22:33 - INFO - codeparrot_training - Step 2936: {'lr': 0.0004995310320598187, 'samples': 1503744, 'steps': 2936, 'loss/train': 4.464433193206787} +02/24/2022 05:22:38 - INFO - codeparrot_training - Step 2937: {'lr': 0.0004995300297705248, 'samples': 1504256, 'steps': 2937, 'loss/train': 3.2768266201019287} +02/24/2022 05:22:42 - INFO - codeparrot_training - Step 2938: {'lr': 0.0004995290264123235, 'samples': 1504768, 'steps': 2938, 'loss/train': 4.1689324378967285} +02/24/2022 05:22:47 - INFO - codeparrot_training - Step 2939: {'lr': 0.0004995280219852192, 'samples': 1505280, 'steps': 2939, 'loss/train': 2.5472989082336426} +02/24/2022 05:22:51 - INFO - codeparrot_training - Step 2940: {'lr': 0.000499527016489216, 'samples': 1505792, 'steps': 2940, 'loss/train': 0.33765846490859985} +02/24/2022 05:22:57 - INFO - codeparrot_training - Step 2941: {'lr': 0.0004995260099243182, 'samples': 1506304, 'steps': 2941, 'loss/train': 2.321903944015503} +02/24/2022 05:23:00 - INFO - codeparrot_training - Step 2942: {'lr': 0.0004995250022905303, 'samples': 1506816, 'steps': 2942, 'loss/train': 4.7813334465026855} +02/24/2022 05:23:04 - INFO - codeparrot_training - Step 2943: {'lr': 0.0004995239935878565, 'samples': 1507328, 'steps': 2943, 'loss/train': 2.1477274894714355} +02/24/2022 05:23:09 - INFO - codeparrot_training - Step 2944: {'lr': 0.0004995229838163012, 'samples': 1507840, 'steps': 2944, 'loss/train': 3.494150400161743} +02/24/2022 05:23:12 - INFO - codeparrot_training - Step 2945: {'lr': 0.0004995219729758687, 'samples': 1508352, 'steps': 2945, 'loss/train': 2.7612357139587402} +02/24/2022 05:23:19 - INFO - codeparrot_training - Step 2946: {'lr': 0.0004995209610665632, 'samples': 1508864, 'steps': 2946, 'loss/train': 4.717722415924072} +02/24/2022 05:23:22 - INFO - codeparrot_training - Step 2947: {'lr': 0.0004995199480883892, 'samples': 1509376, 'steps': 2947, 'loss/train': 3.280971050262451} +02/24/2022 05:23:28 - INFO - codeparrot_training - Step 2948: {'lr': 0.0004995189340413509, 'samples': 1509888, 'steps': 2948, 'loss/train': 1.9670884609222412} +02/24/2022 05:23:31 - INFO - codeparrot_training - Step 2949: {'lr': 0.0004995179189254528, 'samples': 1510400, 'steps': 2949, 'loss/train': 3.457575798034668} +02/24/2022 05:23:37 - INFO - codeparrot_training - Step 2950: {'lr': 0.000499516902740699, 'samples': 1510912, 'steps': 2950, 'loss/train': 3.502692461013794} +02/24/2022 05:23:40 - INFO - codeparrot_training - Step 2951: {'lr': 0.0004995158854870942, 'samples': 1511424, 'steps': 2951, 'loss/train': 3.8437092304229736} +02/24/2022 05:23:46 - INFO - codeparrot_training - Step 2952: {'lr': 0.0004995148671646426, 'samples': 1511936, 'steps': 2952, 'loss/train': 1.654483675956726} +02/24/2022 05:23:49 - INFO - codeparrot_training - Step 2953: {'lr': 0.0004995138477733484, 'samples': 1512448, 'steps': 2953, 'loss/train': 4.670032024383545} +02/24/2022 05:23:55 - INFO - codeparrot_training - Step 2954: {'lr': 0.0004995128273132161, 'samples': 1512960, 'steps': 2954, 'loss/train': 3.2233850955963135} +02/24/2022 05:23:58 - INFO - codeparrot_training - Step 2955: {'lr': 0.0004995118057842502, 'samples': 1513472, 'steps': 2955, 'loss/train': 3.455089569091797} +02/24/2022 05:24:04 - INFO - codeparrot_training - Step 2956: {'lr': 0.0004995107831864549, 'samples': 1513984, 'steps': 2956, 'loss/train': 3.514132022857666} +02/24/2022 05:24:08 - INFO - codeparrot_training - Step 2957: {'lr': 0.0004995097595198346, 'samples': 1514496, 'steps': 2957, 'loss/train': 2.50236177444458} +02/24/2022 05:24:13 - INFO - codeparrot_training - Step 2958: {'lr': 0.0004995087347843938, 'samples': 1515008, 'steps': 2958, 'loss/train': 3.0795295238494873} +02/24/2022 05:24:17 - INFO - codeparrot_training - Step 2959: {'lr': 0.0004995077089801368, 'samples': 1515520, 'steps': 2959, 'loss/train': 2.86020565032959} +02/24/2022 05:24:22 - INFO - codeparrot_training - Step 2960: {'lr': 0.0004995066821070679, 'samples': 1516032, 'steps': 2960, 'loss/train': 2.777902126312256} +02/24/2022 05:24:26 - INFO - codeparrot_training - Step 2961: {'lr': 0.0004995056541651917, 'samples': 1516544, 'steps': 2961, 'loss/train': 3.6399078369140625} +02/24/2022 05:24:31 - INFO - codeparrot_training - Step 2962: {'lr': 0.0004995046251545125, 'samples': 1517056, 'steps': 2962, 'loss/train': 2.810255289077759} +02/24/2022 05:24:37 - INFO - codeparrot_training - Step 2963: {'lr': 0.0004995035950750346, 'samples': 1517568, 'steps': 2963, 'loss/train': 3.0590343475341797} +02/24/2022 05:24:40 - INFO - codeparrot_training - Step 2964: {'lr': 0.0004995025639267627, 'samples': 1518080, 'steps': 2964, 'loss/train': 4.054042339324951} +02/24/2022 05:24:46 - INFO - codeparrot_training - Step 2965: {'lr': 0.0004995015317097009, 'samples': 1518592, 'steps': 2965, 'loss/train': 2.602961778640747} +02/24/2022 05:24:50 - INFO - codeparrot_training - Step 2966: {'lr': 0.0004995004984238538, 'samples': 1519104, 'steps': 2966, 'loss/train': 2.1026241779327393} +02/24/2022 05:24:55 - INFO - codeparrot_training - Step 2967: {'lr': 0.0004994994640692258, 'samples': 1519616, 'steps': 2967, 'loss/train': 3.4214701652526855} +02/24/2022 05:24:59 - INFO - codeparrot_training - Step 2968: {'lr': 0.0004994984286458213, 'samples': 1520128, 'steps': 2968, 'loss/train': 1.6972887516021729} +02/24/2022 05:25:04 - INFO - codeparrot_training - Step 2969: {'lr': 0.0004994973921536447, 'samples': 1520640, 'steps': 2969, 'loss/train': 3.4412119388580322} +02/24/2022 05:25:08 - INFO - codeparrot_training - Step 2970: {'lr': 0.0004994963545927006, 'samples': 1521152, 'steps': 2970, 'loss/train': 2.8235559463500977} +02/24/2022 05:25:13 - INFO - codeparrot_training - Step 2971: {'lr': 0.0004994953159629934, 'samples': 1521664, 'steps': 2971, 'loss/train': 3.128722667694092} +02/24/2022 05:25:17 - INFO - codeparrot_training - Step 2972: {'lr': 0.0004994942762645274, 'samples': 1522176, 'steps': 2972, 'loss/train': 1.9544121026992798} +02/24/2022 05:25:22 - INFO - codeparrot_training - Step 2973: {'lr': 0.000499493235497307, 'samples': 1522688, 'steps': 2973, 'loss/train': 2.301903486251831} +02/24/2022 05:25:26 - INFO - codeparrot_training - Step 2974: {'lr': 0.000499492193661337, 'samples': 1523200, 'steps': 2974, 'loss/train': 2.6223721504211426} +02/24/2022 05:25:31 - INFO - codeparrot_training - Step 2975: {'lr': 0.0004994911507566216, 'samples': 1523712, 'steps': 2975, 'loss/train': 3.720458984375} +02/24/2022 05:25:35 - INFO - codeparrot_training - Step 2976: {'lr': 0.0004994901067831654, 'samples': 1524224, 'steps': 2976, 'loss/train': 3.7408065795898438} +02/24/2022 05:25:40 - INFO - codeparrot_training - Step 2977: {'lr': 0.0004994890617409728, 'samples': 1524736, 'steps': 2977, 'loss/train': 4.947940349578857} +02/24/2022 05:25:44 - INFO - codeparrot_training - Step 2978: {'lr': 0.0004994880156300482, 'samples': 1525248, 'steps': 2978, 'loss/train': 3.488401174545288} +02/24/2022 05:25:49 - INFO - codeparrot_training - Step 2979: {'lr': 0.0004994869684503962, 'samples': 1525760, 'steps': 2979, 'loss/train': 2.4275946617126465} +02/24/2022 05:25:53 - INFO - codeparrot_training - Step 2980: {'lr': 0.0004994859202020212, 'samples': 1526272, 'steps': 2980, 'loss/train': 1.637404203414917} +02/24/2022 05:26:00 - INFO - codeparrot_training - Step 2981: {'lr': 0.0004994848708849279, 'samples': 1526784, 'steps': 2981, 'loss/train': 1.961496114730835} +02/24/2022 05:26:03 - INFO - codeparrot_training - Step 2982: {'lr': 0.0004994838204991205, 'samples': 1527296, 'steps': 2982, 'loss/train': 2.5848701000213623} +02/24/2022 05:26:08 - INFO - codeparrot_training - Step 2983: {'lr': 0.0004994827690446036, 'samples': 1527808, 'steps': 2983, 'loss/train': 2.58817720413208} +02/24/2022 05:26:12 - INFO - codeparrot_training - Step 2984: {'lr': 0.0004994817165213817, 'samples': 1528320, 'steps': 2984, 'loss/train': 2.185640573501587} +02/24/2022 05:26:17 - INFO - codeparrot_training - Step 2985: {'lr': 0.0004994806629294594, 'samples': 1528832, 'steps': 2985, 'loss/train': 2.0475449562072754} +02/24/2022 05:26:21 - INFO - codeparrot_training - Step 2986: {'lr': 0.0004994796082688413, 'samples': 1529344, 'steps': 2986, 'loss/train': 2.093899965286255} +02/24/2022 05:26:26 - INFO - codeparrot_training - Step 2987: {'lr': 0.0004994785525395316, 'samples': 1529856, 'steps': 2987, 'loss/train': 2.775014877319336} +02/24/2022 05:26:30 - INFO - codeparrot_training - Step 2988: {'lr': 0.0004994774957415351, 'samples': 1530368, 'steps': 2988, 'loss/train': 4.215974807739258} +02/24/2022 05:26:35 - INFO - codeparrot_training - Step 2989: {'lr': 0.0004994764378748562, 'samples': 1530880, 'steps': 2989, 'loss/train': 3.640334129333496} +02/24/2022 05:26:39 - INFO - codeparrot_training - Step 2990: {'lr': 0.0004994753789394994, 'samples': 1531392, 'steps': 2990, 'loss/train': 1.996397614479065} +02/24/2022 05:26:45 - INFO - codeparrot_training - Step 2991: {'lr': 0.0004994743189354694, 'samples': 1531904, 'steps': 2991, 'loss/train': 2.74576735496521} +02/24/2022 05:26:49 - INFO - codeparrot_training - Step 2992: {'lr': 0.0004994732578627706, 'samples': 1532416, 'steps': 2992, 'loss/train': 2.9333362579345703} +02/24/2022 05:26:54 - INFO - codeparrot_training - Step 2993: {'lr': 0.0004994721957214076, 'samples': 1532928, 'steps': 2993, 'loss/train': 4.03203821182251} +02/24/2022 05:26:58 - INFO - codeparrot_training - Step 2994: {'lr': 0.0004994711325113849, 'samples': 1533440, 'steps': 2994, 'loss/train': 2.6782896518707275} +02/24/2022 05:27:03 - INFO - codeparrot_training - Step 2995: {'lr': 0.000499470068232707, 'samples': 1533952, 'steps': 2995, 'loss/train': 2.666483163833618} +02/24/2022 05:27:06 - INFO - codeparrot_training - Step 2996: {'lr': 0.0004994690028853787, 'samples': 1534464, 'steps': 2996, 'loss/train': 2.761693239212036} +02/24/2022 05:27:12 - INFO - codeparrot_training - Step 2997: {'lr': 0.0004994679364694043, 'samples': 1534976, 'steps': 2997, 'loss/train': 2.6543567180633545} +02/24/2022 05:27:15 - INFO - codeparrot_training - Step 2998: {'lr': 0.0004994668689847885, 'samples': 1535488, 'steps': 2998, 'loss/train': 3.5141983032226562} +02/24/2022 05:27:21 - INFO - codeparrot_training - Step 2999: {'lr': 0.0004994658004315358, 'samples': 1536000, 'steps': 2999, 'loss/train': 4.3175482749938965} +02/24/2022 05:27:21 - INFO - codeparrot_training - Evaluating and saving model checkpoint