diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -40076,3 +40076,2506 @@ Use FP16 precision: False 12/27/2021 21:39:19 - INFO - codeparrot_training - Step 37499: {'lr': 7.372280104417153e-05, 'samples': 19200000, 'steps': 37499, 'batch_loss/train': 0.808799035847187} 12/27/2021 21:39:19 - INFO - codeparrot_training - Evaluating and saving model checkpoint 12/27/2021 21:42:43 - INFO - codeparrot_training - Step 37500: {'loss/eval': 0.750774621963501, 'perplexity': 2.118640422821045} +12/27/2021 21:43:01 - WARNING - huggingface_hub.repository - Several commits (13) will be pushed upstream. +12/27/2021 21:43:15 - INFO - codeparrot_training - Step 37500: {'lr': 7.371162376970078e-05, 'samples': 19200512, 'steps': 37500, 'batch_loss/train': 0.7357386639341712} +12/27/2021 21:43:28 - INFO - codeparrot_training - Step 37501: {'lr': 7.370044719608602e-05, 'samples': 19201024, 'steps': 37501, 'batch_loss/train': 0.7435182514600456} +12/27/2021 21:43:39 - INFO - codeparrot_training - Step 37502: {'lr': 7.368927132337172e-05, 'samples': 19201536, 'steps': 37502, 'batch_loss/train': 0.6693274169228971} +12/27/2021 21:43:49 - INFO - codeparrot_training - Step 37503: {'lr': 7.367809615160231e-05, 'samples': 19202048, 'steps': 37503, 'batch_loss/train': 0.6421770723536611} +12/27/2021 21:44:01 - INFO - codeparrot_training - Step 37504: {'lr': 7.366692168082221e-05, 'samples': 19202560, 'steps': 37504, 'batch_loss/train': 0.6276878505013883} +12/27/2021 21:44:12 - INFO - codeparrot_training - Step 37505: {'lr': 7.365574791107584e-05, 'samples': 19203072, 'steps': 37505, 'batch_loss/train': 0.6686520266812295} +12/27/2021 21:44:23 - INFO - codeparrot_training - Step 37506: {'lr': 7.364457484240763e-05, 'samples': 19203584, 'steps': 37506, 'batch_loss/train': 0.6210176380118355} +12/27/2021 21:44:37 - INFO - codeparrot_training - Step 37507: {'lr': 7.36334024748621e-05, 'samples': 19204096, 'steps': 37507, 'batch_loss/train': 0.7733500618487597} +12/27/2021 21:44:47 - INFO - codeparrot_training - Step 37508: {'lr': 7.36222308084834e-05, 'samples': 19204608, 'steps': 37508, 'batch_loss/train': 0.9040505704469979} +12/27/2021 21:44:58 - INFO - codeparrot_training - Step 37509: {'lr': 7.361105984331628e-05, 'samples': 19205120, 'steps': 37509, 'batch_loss/train': 0.7905787447816692} +12/27/2021 21:45:09 - INFO - codeparrot_training - Step 37510: {'lr': 7.359988957940489e-05, 'samples': 19205632, 'steps': 37510, 'batch_loss/train': 0.6167054476682097} +12/27/2021 21:45:21 - INFO - codeparrot_training - Step 37511: {'lr': 7.358872001679373e-05, 'samples': 19206144, 'steps': 37511, 'batch_loss/train': 0.7456460346002132} +12/27/2021 21:45:31 - INFO - codeparrot_training - Step 37512: {'lr': 7.357755115552723e-05, 'samples': 19206656, 'steps': 37512, 'batch_loss/train': 0.6298953645746224} +12/27/2021 21:45:42 - INFO - codeparrot_training - Step 37513: {'lr': 7.356638299564977e-05, 'samples': 19207168, 'steps': 37513, 'batch_loss/train': 0.695612823124975} +12/27/2021 21:45:55 - INFO - codeparrot_training - Step 37514: {'lr': 7.355521553720573e-05, 'samples': 19207680, 'steps': 37514, 'batch_loss/train': 1.1674690479412675} +12/27/2021 21:46:05 - INFO - codeparrot_training - Step 37515: {'lr': 7.354404878023954e-05, 'samples': 19208192, 'steps': 37515, 'batch_loss/train': 0.7166361203417182} +12/27/2021 21:46:16 - INFO - codeparrot_training - Step 37516: {'lr': 7.353288272479558e-05, 'samples': 19208704, 'steps': 37516, 'batch_loss/train': 0.6063757427036762} +12/27/2021 21:46:26 - INFO - codeparrot_training - Step 37517: {'lr': 7.352171737091826e-05, 'samples': 19209216, 'steps': 37517, 'batch_loss/train': 0.6467999559827149} +12/27/2021 21:46:40 - INFO - codeparrot_training - Step 37518: {'lr': 7.351055271865204e-05, 'samples': 19209728, 'steps': 37518, 'batch_loss/train': 0.7181018507108092} +12/27/2021 21:46:51 - INFO - codeparrot_training - Step 37519: {'lr': 7.349938876804104e-05, 'samples': 19210240, 'steps': 37519, 'batch_loss/train': 0.696806118299719} +12/27/2021 21:47:01 - INFO - codeparrot_training - Step 37520: {'lr': 7.348822551912993e-05, 'samples': 19210752, 'steps': 37520, 'batch_loss/train': 0.6597581906244159} +12/27/2021 21:47:14 - INFO - codeparrot_training - Step 37521: {'lr': 7.347706297196305e-05, 'samples': 19211264, 'steps': 37521, 'batch_loss/train': 0.7901114020496607} +12/27/2021 21:47:24 - INFO - codeparrot_training - Step 37522: {'lr': 7.346590112658464e-05, 'samples': 19211776, 'steps': 37522, 'batch_loss/train': 0.7814147006720304} +12/27/2021 21:47:35 - INFO - codeparrot_training - Step 37523: {'lr': 7.345473998303908e-05, 'samples': 19212288, 'steps': 37523, 'batch_loss/train': 0.7556688571348786} +12/27/2021 21:47:47 - INFO - codeparrot_training - Step 37524: {'lr': 7.344357954137096e-05, 'samples': 19212800, 'steps': 37524, 'batch_loss/train': 0.7085003582760692} +12/27/2021 21:47:58 - INFO - codeparrot_training - Step 37525: {'lr': 7.343241980162444e-05, 'samples': 19213312, 'steps': 37525, 'batch_loss/train': 0.7335137655027211} +12/27/2021 21:48:08 - INFO - codeparrot_training - Step 37526: {'lr': 7.342126076384392e-05, 'samples': 19213824, 'steps': 37526, 'batch_loss/train': 0.7512008370831609} +12/27/2021 21:48:20 - INFO - codeparrot_training - Step 37527: {'lr': 7.341010242807381e-05, 'samples': 19214336, 'steps': 37527, 'batch_loss/train': 0.7200516937300563} +12/27/2021 21:48:31 - INFO - codeparrot_training - Step 37528: {'lr': 7.339894479435846e-05, 'samples': 19214848, 'steps': 37528, 'batch_loss/train': 0.7727027875371277} +12/27/2021 21:48:42 - INFO - codeparrot_training - Step 37529: {'lr': 7.338778786274222e-05, 'samples': 19215360, 'steps': 37529, 'batch_loss/train': 0.6985601978376508} +12/27/2021 21:48:52 - INFO - codeparrot_training - Step 37530: {'lr': 7.337663163326946e-05, 'samples': 19215872, 'steps': 37530, 'batch_loss/train': 0.6496484489180148} +12/27/2021 21:49:07 - INFO - codeparrot_training - Step 37531: {'lr': 7.336547610598451e-05, 'samples': 19216384, 'steps': 37531, 'batch_loss/train': 0.6876111882738769} +12/27/2021 21:49:17 - INFO - codeparrot_training - Step 37532: {'lr': 7.335432128093173e-05, 'samples': 19216896, 'steps': 37532, 'batch_loss/train': 0.6436452187190298} +12/27/2021 21:49:28 - INFO - codeparrot_training - Step 37533: {'lr': 7.334316715815555e-05, 'samples': 19217408, 'steps': 37533, 'batch_loss/train': 0.4624420494074002} +12/27/2021 21:49:40 - INFO - codeparrot_training - Step 37534: {'lr': 7.333201373770007e-05, 'samples': 19217920, 'steps': 37534, 'batch_loss/train': 0.7080879284767434} +12/27/2021 21:49:50 - INFO - codeparrot_training - Step 37535: {'lr': 7.332086101960996e-05, 'samples': 19218432, 'steps': 37535, 'batch_loss/train': 0.6884201869834214} +12/27/2021 21:50:01 - INFO - codeparrot_training - Step 37536: {'lr': 7.330970900392934e-05, 'samples': 19218944, 'steps': 37536, 'batch_loss/train': 0.7126073078252375} +12/27/2021 21:50:15 - INFO - codeparrot_training - Step 37537: {'lr': 7.329855769070257e-05, 'samples': 19219456, 'steps': 37537, 'batch_loss/train': 0.8198648225516081} +12/27/2021 21:50:26 - INFO - codeparrot_training - Step 37538: {'lr': 7.328740707997402e-05, 'samples': 19219968, 'steps': 37538, 'batch_loss/train': 0.6566177012282424} +12/27/2021 21:50:36 - INFO - codeparrot_training - Step 37539: {'lr': 7.327625717178801e-05, 'samples': 19220480, 'steps': 37539, 'batch_loss/train': 0.7726355723571032} +12/27/2021 21:50:47 - INFO - codeparrot_training - Step 37540: {'lr': 7.326510796618891e-05, 'samples': 19220992, 'steps': 37540, 'batch_loss/train': 0.658759337849915} +12/27/2021 21:50:59 - INFO - codeparrot_training - Step 37541: {'lr': 7.325395946322098e-05, 'samples': 19221504, 'steps': 37541, 'batch_loss/train': 0.6405524201691151} +12/27/2021 21:51:09 - INFO - codeparrot_training - Step 37542: {'lr': 7.324281166292856e-05, 'samples': 19222016, 'steps': 37542, 'batch_loss/train': 0.8007869776338339} +12/27/2021 21:51:20 - INFO - codeparrot_training - Step 37543: {'lr': 7.323166456535598e-05, 'samples': 19222528, 'steps': 37543, 'batch_loss/train': 0.7416593916714191} +12/27/2021 21:51:32 - INFO - codeparrot_training - Step 37544: {'lr': 7.322051817054764e-05, 'samples': 19223040, 'steps': 37544, 'batch_loss/train': 0.6863210353767499} +12/27/2021 21:51:43 - INFO - codeparrot_training - Step 37545: {'lr': 7.320937247854764e-05, 'samples': 19223552, 'steps': 37545, 'batch_loss/train': 0.7160296908114105} +12/27/2021 21:51:54 - INFO - codeparrot_training - Step 37546: {'lr': 7.319822748940047e-05, 'samples': 19224064, 'steps': 37546, 'batch_loss/train': 0.7288490128703415} +12/27/2021 21:52:07 - INFO - codeparrot_training - Step 37547: {'lr': 7.318708320315048e-05, 'samples': 19224576, 'steps': 37547, 'batch_loss/train': 0.779841941781342} +12/27/2021 21:52:18 - INFO - codeparrot_training - Step 37548: {'lr': 7.317593961984182e-05, 'samples': 19225088, 'steps': 37548, 'batch_loss/train': 0.6685237800702453} +12/27/2021 21:52:29 - INFO - codeparrot_training - Step 37549: {'lr': 7.316479673951875e-05, 'samples': 19225600, 'steps': 37549, 'batch_loss/train': 0.6598315038718283} +12/27/2021 21:52:39 - INFO - codeparrot_training - Step 37550: {'lr': 7.315365456222587e-05, 'samples': 19226112, 'steps': 37550, 'batch_loss/train': 0.6775889276759699} +12/27/2021 21:52:52 - INFO - codeparrot_training - Step 37551: {'lr': 7.314251308800718e-05, 'samples': 19226624, 'steps': 37551, 'batch_loss/train': 0.6785634085536003} +12/27/2021 21:53:02 - INFO - codeparrot_training - Step 37552: {'lr': 7.313137231690708e-05, 'samples': 19227136, 'steps': 37552, 'batch_loss/train': 0.7245842609554529} +12/27/2021 21:53:13 - INFO - codeparrot_training - Step 37553: {'lr': 7.312023224896986e-05, 'samples': 19227648, 'steps': 37553, 'batch_loss/train': 0.6148419806850143} +12/27/2021 21:53:25 - INFO - codeparrot_training - Step 37554: {'lr': 7.31090928842398e-05, 'samples': 19228160, 'steps': 37554, 'batch_loss/train': 0.6883994047529995} +12/27/2021 21:53:36 - INFO - codeparrot_training - Step 37555: {'lr': 7.309795422276123e-05, 'samples': 19228672, 'steps': 37555, 'batch_loss/train': 0.9209551746025681} +12/27/2021 21:53:47 - INFO - codeparrot_training - Step 37556: {'lr': 7.308681626457836e-05, 'samples': 19229184, 'steps': 37556, 'batch_loss/train': 0.5962865251058247} +12/27/2021 21:54:01 - INFO - codeparrot_training - Step 37557: {'lr': 7.307567900973555e-05, 'samples': 19229696, 'steps': 37557, 'batch_loss/train': 0.7018822133541107} +12/27/2021 21:54:11 - INFO - codeparrot_training - Step 37558: {'lr': 7.3064542458277e-05, 'samples': 19230208, 'steps': 37558, 'batch_loss/train': 0.7511423039250076} +12/27/2021 21:54:22 - INFO - codeparrot_training - Step 37559: {'lr': 7.305340661024712e-05, 'samples': 19230720, 'steps': 37559, 'batch_loss/train': 0.6464865081943572} +12/27/2021 21:54:32 - INFO - codeparrot_training - Step 37560: {'lr': 7.304227146568993e-05, 'samples': 19231232, 'steps': 37560, 'batch_loss/train': 0.7058068260084838} +12/27/2021 21:54:45 - INFO - codeparrot_training - Step 37561: {'lr': 7.303113702464991e-05, 'samples': 19231744, 'steps': 37561, 'batch_loss/train': 0.4866297041880898} +12/27/2021 21:54:55 - INFO - codeparrot_training - Step 37562: {'lr': 7.302000328717135e-05, 'samples': 19232256, 'steps': 37562, 'batch_loss/train': 0.7047883830964565} +12/27/2021 21:55:06 - INFO - codeparrot_training - Step 37563: {'lr': 7.300887025329835e-05, 'samples': 19232768, 'steps': 37563, 'batch_loss/train': 0.8859242666512728} +12/27/2021 21:55:18 - INFO - codeparrot_training - Step 37564: {'lr': 7.299773792307526e-05, 'samples': 19233280, 'steps': 37564, 'batch_loss/train': 0.5855559476767667} +12/27/2021 21:55:29 - INFO - codeparrot_training - Step 37565: {'lr': 7.298660629654636e-05, 'samples': 19233792, 'steps': 37565, 'batch_loss/train': 0.6790951592847705} +12/27/2021 21:55:39 - INFO - codeparrot_training - Step 37566: {'lr': 7.297547537375585e-05, 'samples': 19234304, 'steps': 37566, 'batch_loss/train': 0.6899379999376833} +12/27/2021 21:55:53 - INFO - codeparrot_training - Step 37567: {'lr': 7.296434515474798e-05, 'samples': 19234816, 'steps': 37567, 'batch_loss/train': 0.7386481831781566} +12/27/2021 21:56:04 - INFO - codeparrot_training - Step 37568: {'lr': 7.295321563956705e-05, 'samples': 19235328, 'steps': 37568, 'batch_loss/train': 0.7554440045496449} +12/27/2021 21:56:14 - INFO - codeparrot_training - Step 37569: {'lr': 7.294208682825732e-05, 'samples': 19235840, 'steps': 37569, 'batch_loss/train': 0.744614286813885} +12/27/2021 21:56:26 - INFO - codeparrot_training - Step 37570: {'lr': 7.293095872086295e-05, 'samples': 19236352, 'steps': 37570, 'batch_loss/train': 0.6846874626353383} +12/27/2021 21:56:37 - INFO - codeparrot_training - Step 37571: {'lr': 7.291983131742824e-05, 'samples': 19236864, 'steps': 37571, 'batch_loss/train': 0.6575920606846921} +12/27/2021 21:56:48 - INFO - codeparrot_training - Step 37572: {'lr': 7.290870461799743e-05, 'samples': 19237376, 'steps': 37572, 'batch_loss/train': 0.6942357271909714} +12/27/2021 21:56:58 - INFO - codeparrot_training - Step 37573: {'lr': 7.289757862261482e-05, 'samples': 19237888, 'steps': 37573, 'batch_loss/train': 0.6445488701574504} +12/27/2021 21:57:11 - INFO - codeparrot_training - Step 37574: {'lr': 7.288645333132449e-05, 'samples': 19238400, 'steps': 37574, 'batch_loss/train': 0.6611454666126519} +12/27/2021 21:57:21 - INFO - codeparrot_training - Step 37575: {'lr': 7.287532874417066e-05, 'samples': 19238912, 'steps': 37575, 'batch_loss/train': 0.8309986991807818} +12/27/2021 21:57:32 - INFO - codeparrot_training - Step 37576: {'lr': 7.28642048611978e-05, 'samples': 19239424, 'steps': 37576, 'batch_loss/train': 0.7361975880339742} +12/27/2021 21:57:46 - INFO - codeparrot_training - Step 37577: {'lr': 7.28530816824499e-05, 'samples': 19239936, 'steps': 37577, 'batch_loss/train': 0.7672217441722751} +12/27/2021 21:57:57 - INFO - codeparrot_training - Step 37578: {'lr': 7.284195920797118e-05, 'samples': 19240448, 'steps': 37578, 'batch_loss/train': 0.6942449612542987} +12/27/2021 21:58:07 - INFO - codeparrot_training - Step 37579: {'lr': 7.283083743780608e-05, 'samples': 19240960, 'steps': 37579, 'batch_loss/train': 0.7059304309077561} +12/27/2021 21:58:19 - INFO - codeparrot_training - Step 37580: {'lr': 7.281971637199861e-05, 'samples': 19241472, 'steps': 37580, 'batch_loss/train': 0.7555155249428935} +12/27/2021 21:58:30 - INFO - codeparrot_training - Step 37581: {'lr': 7.280859601059306e-05, 'samples': 19241984, 'steps': 37581, 'batch_loss/train': 0.6063072067336179} +12/27/2021 21:58:41 - INFO - codeparrot_training - Step 37582: {'lr': 7.279747635363362e-05, 'samples': 19242496, 'steps': 37582, 'batch_loss/train': 0.611586266575614} +12/27/2021 21:58:53 - INFO - codeparrot_training - Step 37583: {'lr': 7.27863574011645e-05, 'samples': 19243008, 'steps': 37583, 'batch_loss/train': 0.7405858935671858} +12/27/2021 21:59:04 - INFO - codeparrot_training - Step 37584: {'lr': 7.27752391532299e-05, 'samples': 19243520, 'steps': 37584, 'batch_loss/train': 0.6803420479409397} +12/27/2021 21:59:14 - INFO - codeparrot_training - Step 37585: {'lr': 7.276412160987412e-05, 'samples': 19244032, 'steps': 37585, 'batch_loss/train': 0.6188627304509282} +12/27/2021 21:59:25 - INFO - codeparrot_training - Step 37586: {'lr': 7.275300477114111e-05, 'samples': 19244544, 'steps': 37586, 'batch_loss/train': 0.9360009585507214} +12/27/2021 21:59:39 - INFO - codeparrot_training - Step 37587: {'lr': 7.274188863707532e-05, 'samples': 19245056, 'steps': 37587, 'batch_loss/train': 1.3166502742096782} +12/27/2021 21:59:50 - INFO - codeparrot_training - Step 37588: {'lr': 7.273077320772093e-05, 'samples': 19245568, 'steps': 37588, 'batch_loss/train': 0.7582199187017977} +12/27/2021 22:00:00 - INFO - codeparrot_training - Step 37589: {'lr': 7.271965848312196e-05, 'samples': 19246080, 'steps': 37589, 'batch_loss/train': 0.7833939585834742} +12/27/2021 22:00:12 - INFO - codeparrot_training - Step 37590: {'lr': 7.270854446332268e-05, 'samples': 19246592, 'steps': 37590, 'batch_loss/train': 0.6724503021687269} +12/27/2021 22:00:23 - INFO - codeparrot_training - Step 37591: {'lr': 7.26974311483673e-05, 'samples': 19247104, 'steps': 37591, 'batch_loss/train': 0.7752807168290019} +12/27/2021 22:00:33 - INFO - codeparrot_training - Step 37592: {'lr': 7.268631853829999e-05, 'samples': 19247616, 'steps': 37592, 'batch_loss/train': 0.6965697288978845} +12/27/2021 22:00:47 - INFO - codeparrot_training - Step 37593: {'lr': 7.26752066331649e-05, 'samples': 19248128, 'steps': 37593, 'batch_loss/train': 0.8069468680769205} +12/27/2021 22:00:58 - INFO - codeparrot_training - Step 37594: {'lr': 7.266409543300626e-05, 'samples': 19248640, 'steps': 37594, 'batch_loss/train': 0.7973256204277277} +12/27/2021 22:01:09 - INFO - codeparrot_training - Step 37595: {'lr': 7.265298493786821e-05, 'samples': 19249152, 'steps': 37595, 'batch_loss/train': 0.7728339759632945} +12/27/2021 22:01:19 - INFO - codeparrot_training - Step 37596: {'lr': 7.264187514779494e-05, 'samples': 19249664, 'steps': 37596, 'batch_loss/train': 0.7464733277447522} +12/27/2021 22:01:31 - INFO - codeparrot_training - Step 37597: {'lr': 7.263076606283059e-05, 'samples': 19250176, 'steps': 37597, 'batch_loss/train': 0.7524815900251269} +12/27/2021 22:01:42 - INFO - codeparrot_training - Step 37598: {'lr': 7.261965768301934e-05, 'samples': 19250688, 'steps': 37598, 'batch_loss/train': 0.7356204567477107} +12/27/2021 22:01:53 - INFO - codeparrot_training - Step 37599: {'lr': 7.260855000840544e-05, 'samples': 19251200, 'steps': 37599, 'batch_loss/train': 0.8037867397069931} +12/27/2021 22:02:05 - INFO - codeparrot_training - Step 37600: {'lr': 7.25974430390329e-05, 'samples': 19251712, 'steps': 37600, 'batch_loss/train': 0.6588559368974529} +12/27/2021 22:02:16 - INFO - codeparrot_training - Step 37601: {'lr': 7.258633677494583e-05, 'samples': 19252224, 'steps': 37601, 'batch_loss/train': 0.8099284525960684} +12/27/2021 22:02:26 - INFO - codeparrot_training - Step 37602: {'lr': 7.257523121618867e-05, 'samples': 19252736, 'steps': 37602, 'batch_loss/train': 0.8148653141688555} +12/27/2021 22:02:39 - INFO - codeparrot_training - Step 37603: {'lr': 7.256412636280532e-05, 'samples': 19253248, 'steps': 37603, 'batch_loss/train': 0.7645574510097504} +12/27/2021 22:02:50 - INFO - codeparrot_training - Step 37604: {'lr': 7.255302221483995e-05, 'samples': 19253760, 'steps': 37604, 'batch_loss/train': 1.0074940277263522} +12/27/2021 22:03:00 - INFO - codeparrot_training - Step 37605: {'lr': 7.25419187723369e-05, 'samples': 19254272, 'steps': 37605, 'batch_loss/train': 0.950430910103023} +12/27/2021 22:03:11 - INFO - codeparrot_training - Step 37606: {'lr': 7.253081603534009e-05, 'samples': 19254784, 'steps': 37606, 'batch_loss/train': 0.7351180650293827} +12/27/2021 22:03:25 - INFO - codeparrot_training - Step 37607: {'lr': 7.251971400389376e-05, 'samples': 19255296, 'steps': 37607, 'batch_loss/train': 0.7041830664966255} +12/27/2021 22:03:36 - INFO - codeparrot_training - Step 37608: {'lr': 7.250861267804201e-05, 'samples': 19255808, 'steps': 37608, 'batch_loss/train': 0.7318011987954378} +12/27/2021 22:03:46 - INFO - codeparrot_training - Step 37609: {'lr': 7.249751205782901e-05, 'samples': 19256320, 'steps': 37609, 'batch_loss/train': 0.6468901163898408} +12/27/2021 22:03:58 - INFO - codeparrot_training - Step 37610: {'lr': 7.24864121432989e-05, 'samples': 19256832, 'steps': 37610, 'batch_loss/train': 0.8126379838213325} +12/27/2021 22:04:09 - INFO - codeparrot_training - Step 37611: {'lr': 7.247531293449583e-05, 'samples': 19257344, 'steps': 37611, 'batch_loss/train': 0.7302650075871497} +12/27/2021 22:04:20 - INFO - codeparrot_training - Step 37612: {'lr': 7.246421443146376e-05, 'samples': 19257856, 'steps': 37612, 'batch_loss/train': 0.7382661742158234} +12/27/2021 22:04:32 - INFO - codeparrot_training - Step 37613: {'lr': 7.2453116634247e-05, 'samples': 19258368, 'steps': 37613, 'batch_loss/train': 0.6492467033676803} +12/27/2021 22:04:43 - INFO - codeparrot_training - Step 37614: {'lr': 7.244201954288971e-05, 'samples': 19258880, 'steps': 37614, 'batch_loss/train': 0.6949067420791835} +12/27/2021 22:04:53 - INFO - codeparrot_training - Step 37615: {'lr': 7.243092315743572e-05, 'samples': 19259392, 'steps': 37615, 'batch_loss/train': 0.6511425985372625} +12/27/2021 22:05:07 - INFO - codeparrot_training - Step 37616: {'lr': 7.241982747792941e-05, 'samples': 19259904, 'steps': 37616, 'batch_loss/train': 0.8011859068647027} +12/27/2021 22:05:18 - INFO - codeparrot_training - Step 37617: {'lr': 7.24087325044149e-05, 'samples': 19260416, 'steps': 37617, 'batch_loss/train': 0.6427650232799351} +12/27/2021 22:05:28 - INFO - codeparrot_training - Step 37618: {'lr': 7.239763823693615e-05, 'samples': 19260928, 'steps': 37618, 'batch_loss/train': 0.7219798604492098} +12/27/2021 22:05:39 - INFO - codeparrot_training - Step 37619: {'lr': 7.238654467553732e-05, 'samples': 19261440, 'steps': 37619, 'batch_loss/train': 0.7229154612869024} +12/27/2021 22:05:51 - INFO - codeparrot_training - Step 37620: {'lr': 7.237545182026253e-05, 'samples': 19261952, 'steps': 37620, 'batch_loss/train': 0.6866389036295004} +12/27/2021 22:06:02 - INFO - codeparrot_training - Step 37621: {'lr': 7.236435967115587e-05, 'samples': 19262464, 'steps': 37621, 'batch_loss/train': 0.704981941729784} +12/27/2021 22:06:12 - INFO - codeparrot_training - Step 37622: {'lr': 7.235326822826144e-05, 'samples': 19262976, 'steps': 37622, 'batch_loss/train': 0.7946238797158003} +12/27/2021 22:06:27 - INFO - codeparrot_training - Step 37623: {'lr': 7.234217749162336e-05, 'samples': 19263488, 'steps': 37623, 'batch_loss/train': 0.7669832520186901} +12/27/2021 22:06:37 - INFO - codeparrot_training - Step 37624: {'lr': 7.233108746128566e-05, 'samples': 19264000, 'steps': 37624, 'batch_loss/train': 0.6352514360332862} +12/27/2021 22:06:48 - INFO - codeparrot_training - Step 37625: {'lr': 7.23199981372926e-05, 'samples': 19264512, 'steps': 37625, 'batch_loss/train': 0.6720801843330264} +12/27/2021 22:07:00 - INFO - codeparrot_training - Step 37626: {'lr': 7.230890951968802e-05, 'samples': 19265024, 'steps': 37626, 'batch_loss/train': 0.4913104111328721} +12/27/2021 22:07:11 - INFO - codeparrot_training - Step 37627: {'lr': 7.229782160851606e-05, 'samples': 19265536, 'steps': 37627, 'batch_loss/train': 0.5031326857279055} +12/27/2021 22:07:21 - INFO - codeparrot_training - Step 37628: {'lr': 7.228673440382102e-05, 'samples': 19266048, 'steps': 37628, 'batch_loss/train': 0.5517230805126019} +12/27/2021 22:07:32 - INFO - codeparrot_training - Step 37629: {'lr': 7.227564790564676e-05, 'samples': 19266560, 'steps': 37629, 'batch_loss/train': 0.5019021203916054} +12/27/2021 22:07:44 - INFO - codeparrot_training - Step 37630: {'lr': 7.226456211403728e-05, 'samples': 19267072, 'steps': 37630, 'batch_loss/train': 0.6399957410467323} +12/27/2021 22:07:55 - INFO - codeparrot_training - Step 37631: {'lr': 7.225347702903701e-05, 'samples': 19267584, 'steps': 37631, 'batch_loss/train': 0.5975048750115093} +12/27/2021 22:08:05 - INFO - codeparrot_training - Step 37632: {'lr': 7.224239265068969e-05, 'samples': 19268096, 'steps': 37632, 'batch_loss/train': 0.7233954169787467} +12/27/2021 22:08:17 - INFO - codeparrot_training - Step 37633: {'lr': 7.223130897903952e-05, 'samples': 19268608, 'steps': 37633, 'batch_loss/train': 0.8030933570116758} +12/27/2021 22:08:28 - INFO - codeparrot_training - Step 37634: {'lr': 7.222022601413053e-05, 'samples': 19269120, 'steps': 37634, 'batch_loss/train': 0.5719752920267638} +12/27/2021 22:08:39 - INFO - codeparrot_training - Step 37635: {'lr': 7.220914375600679e-05, 'samples': 19269632, 'steps': 37635, 'batch_loss/train': 0.7456176830455661} +12/27/2021 22:08:53 - INFO - codeparrot_training - Step 37636: {'lr': 7.219806220471237e-05, 'samples': 19270144, 'steps': 37636, 'batch_loss/train': 0.7538625374436378} +12/27/2021 22:09:03 - INFO - codeparrot_training - Step 37637: {'lr': 7.218698136029139e-05, 'samples': 19270656, 'steps': 37637, 'batch_loss/train': 0.7327590929344296} +12/27/2021 22:09:14 - INFO - codeparrot_training - Step 37638: {'lr': 7.217590122278769e-05, 'samples': 19271168, 'steps': 37638, 'batch_loss/train': 0.7790881767868996} +12/27/2021 22:09:26 - INFO - codeparrot_training - Step 37639: {'lr': 7.216482179224554e-05, 'samples': 19271680, 'steps': 37639, 'batch_loss/train': 0.7020890226121992} +12/27/2021 22:09:36 - INFO - codeparrot_training - Step 37640: {'lr': 7.215374306870898e-05, 'samples': 19272192, 'steps': 37640, 'batch_loss/train': 0.7570943580940366} +12/27/2021 22:09:47 - INFO - codeparrot_training - Step 37641: {'lr': 7.214266505222183e-05, 'samples': 19272704, 'steps': 37641, 'batch_loss/train': 0.7487537865526974} +12/27/2021 22:09:58 - INFO - codeparrot_training - Step 37642: {'lr': 7.213158774282836e-05, 'samples': 19273216, 'steps': 37642, 'batch_loss/train': 0.745124832727015} +12/27/2021 22:10:10 - INFO - codeparrot_training - Step 37643: {'lr': 7.212051114057264e-05, 'samples': 19273728, 'steps': 37643, 'batch_loss/train': 0.7072840123437345} +12/27/2021 22:10:20 - INFO - codeparrot_training - Step 37644: {'lr': 7.210943524549848e-05, 'samples': 19274240, 'steps': 37644, 'batch_loss/train': 0.6029975588899106} +12/27/2021 22:10:31 - INFO - codeparrot_training - Step 37645: {'lr': 7.209836005765008e-05, 'samples': 19274752, 'steps': 37645, 'batch_loss/train': 0.826749118976295} +12/27/2021 22:10:45 - INFO - codeparrot_training - Step 37646: {'lr': 7.208728557707139e-05, 'samples': 19275264, 'steps': 37646, 'batch_loss/train': 0.6495822072029114} +12/27/2021 22:10:56 - INFO - codeparrot_training - Step 37647: {'lr': 7.207621180380653e-05, 'samples': 19275776, 'steps': 37647, 'batch_loss/train': 0.6877372634480707} +12/27/2021 22:11:06 - INFO - codeparrot_training - Step 37648: {'lr': 7.20651387378994e-05, 'samples': 19276288, 'steps': 37648, 'batch_loss/train': 0.8380974140018225} +12/27/2021 22:11:18 - INFO - codeparrot_training - Step 37649: {'lr': 7.205406637939415e-05, 'samples': 19276800, 'steps': 37649, 'batch_loss/train': 0.6473179887980223} +12/27/2021 22:11:29 - INFO - codeparrot_training - Step 37650: {'lr': 7.204299472833472e-05, 'samples': 19277312, 'steps': 37650, 'batch_loss/train': 0.6728856889531016} +12/27/2021 22:11:40 - INFO - codeparrot_training - Step 37651: {'lr': 7.203192378476514e-05, 'samples': 19277824, 'steps': 37651, 'batch_loss/train': 0.5991701949387789} +12/27/2021 22:11:52 - INFO - codeparrot_training - Step 37652: {'lr': 7.202085354872945e-05, 'samples': 19278336, 'steps': 37652, 'batch_loss/train': 0.7316712076426484} +12/27/2021 22:12:03 - INFO - codeparrot_training - Step 37653: {'lr': 7.200978402027165e-05, 'samples': 19278848, 'steps': 37653, 'batch_loss/train': 0.6700221430510283} +12/27/2021 22:12:13 - INFO - codeparrot_training - Step 37654: {'lr': 7.199871519943579e-05, 'samples': 19279360, 'steps': 37654, 'batch_loss/train': 0.7820111168548465} +12/27/2021 22:12:24 - INFO - codeparrot_training - Step 37655: {'lr': 7.198764708626576e-05, 'samples': 19279872, 'steps': 37655, 'batch_loss/train': 0.7692466708831489} +12/27/2021 22:12:38 - INFO - codeparrot_training - Step 37656: {'lr': 7.197657968080557e-05, 'samples': 19280384, 'steps': 37656, 'batch_loss/train': 0.6207343883579597} +12/27/2021 22:12:48 - INFO - codeparrot_training - Step 37657: {'lr': 7.196551298309942e-05, 'samples': 19280896, 'steps': 37657, 'batch_loss/train': 0.6026344685815275} +12/27/2021 22:12:59 - INFO - codeparrot_training - Step 37658: {'lr': 7.195444699319109e-05, 'samples': 19281408, 'steps': 37658, 'batch_loss/train': 0.6329897353425622} +12/27/2021 22:13:11 - INFO - codeparrot_training - Step 37659: {'lr': 7.194338171112466e-05, 'samples': 19281920, 'steps': 37659, 'batch_loss/train': 0.6560767454793677} +12/27/2021 22:13:22 - INFO - codeparrot_training - Step 37660: {'lr': 7.193231713694412e-05, 'samples': 19282432, 'steps': 37660, 'batch_loss/train': 0.8935585087165236} +12/27/2021 22:13:32 - INFO - codeparrot_training - Step 37661: {'lr': 7.192125327069343e-05, 'samples': 19282944, 'steps': 37661, 'batch_loss/train': 0.6987595846876502} +12/27/2021 22:13:43 - INFO - codeparrot_training - Step 37662: {'lr': 7.191019011241662e-05, 'samples': 19283456, 'steps': 37662, 'batch_loss/train': 0.9598379447124898} +12/27/2021 22:13:57 - INFO - codeparrot_training - Step 37663: {'lr': 7.18991276621577e-05, 'samples': 19283968, 'steps': 37663, 'batch_loss/train': 0.7476274846121669} +12/27/2021 22:14:08 - INFO - codeparrot_training - Step 37664: {'lr': 7.188806591996048e-05, 'samples': 19284480, 'steps': 37664, 'batch_loss/train': 0.7337194085121155} +12/27/2021 22:14:18 - INFO - codeparrot_training - Step 37665: {'lr': 7.18770048858691e-05, 'samples': 19284992, 'steps': 37665, 'batch_loss/train': 0.7539177192375064} +12/27/2021 22:14:30 - INFO - codeparrot_training - Step 37666: {'lr': 7.186594455992759e-05, 'samples': 19285504, 'steps': 37666, 'batch_loss/train': 0.6856266758404672} +12/27/2021 22:14:41 - INFO - codeparrot_training - Step 37667: {'lr': 7.185488494217968e-05, 'samples': 19286016, 'steps': 37667, 'batch_loss/train': 0.7234894565772265} +12/27/2021 22:14:52 - INFO - codeparrot_training - Step 37668: {'lr': 7.184382603266953e-05, 'samples': 19286528, 'steps': 37668, 'batch_loss/train': 0.704360100440681} +12/27/2021 22:15:04 - INFO - codeparrot_training - Step 37669: {'lr': 7.183276783144116e-05, 'samples': 19287040, 'steps': 37669, 'batch_loss/train': 0.7820108300074935} +12/27/2021 22:15:14 - INFO - codeparrot_training - Step 37670: {'lr': 7.182171033853832e-05, 'samples': 19287552, 'steps': 37670, 'batch_loss/train': 0.678535025101155} +12/27/2021 22:15:25 - INFO - codeparrot_training - Step 37671: {'lr': 7.181065355400512e-05, 'samples': 19288064, 'steps': 37671, 'batch_loss/train': 0.735181087278761} +12/27/2021 22:15:37 - INFO - codeparrot_training - Step 37672: {'lr': 7.179959747788545e-05, 'samples': 19288576, 'steps': 37672, 'batch_loss/train': 0.7054260922595859} +12/27/2021 22:15:48 - INFO - codeparrot_training - Step 37673: {'lr': 7.178854211022331e-05, 'samples': 19289088, 'steps': 37673, 'batch_loss/train': 0.6441681855358183} +12/27/2021 22:15:58 - INFO - codeparrot_training - Step 37674: {'lr': 7.177748745106263e-05, 'samples': 19289600, 'steps': 37674, 'batch_loss/train': 0.8051113532856107} +12/27/2021 22:16:09 - INFO - codeparrot_training - Step 37675: {'lr': 7.17664335004474e-05, 'samples': 19290112, 'steps': 37675, 'batch_loss/train': 0.7336748787201941} +12/27/2021 22:16:23 - INFO - codeparrot_training - Step 37676: {'lr': 7.175538025842149e-05, 'samples': 19290624, 'steps': 37676, 'batch_loss/train': 0.7427348792552948} +12/27/2021 22:16:34 - INFO - codeparrot_training - Step 37677: {'lr': 7.17443277250289e-05, 'samples': 19291136, 'steps': 37677, 'batch_loss/train': 0.677530046319589} +12/27/2021 22:16:44 - INFO - codeparrot_training - Step 37678: {'lr': 7.173327590031355e-05, 'samples': 19291648, 'steps': 37678, 'batch_loss/train': 0.7553304601460695} +12/27/2021 22:16:56 - INFO - codeparrot_training - Step 37679: {'lr': 7.172222478431939e-05, 'samples': 19292160, 'steps': 37679, 'batch_loss/train': 0.6300845937803388} +12/27/2021 22:17:07 - INFO - codeparrot_training - Step 37680: {'lr': 7.171117437709044e-05, 'samples': 19292672, 'steps': 37680, 'batch_loss/train': 0.6250209819991142} +12/27/2021 22:17:18 - INFO - codeparrot_training - Step 37681: {'lr': 7.170012467867046e-05, 'samples': 19293184, 'steps': 37681, 'batch_loss/train': 0.6950753229903057} +12/27/2021 22:17:30 - INFO - codeparrot_training - Step 37682: {'lr': 7.168907568910335e-05, 'samples': 19293696, 'steps': 37682, 'batch_loss/train': 0.7403335068374872} +12/27/2021 22:17:40 - INFO - codeparrot_training - Step 37683: {'lr': 7.167802740843335e-05, 'samples': 19294208, 'steps': 37683, 'batch_loss/train': 0.659871599636972} +12/27/2021 22:17:51 - INFO - codeparrot_training - Step 37684: {'lr': 7.166697983670406e-05, 'samples': 19294720, 'steps': 37684, 'batch_loss/train': 0.6643612897023559} +12/27/2021 22:18:04 - INFO - codeparrot_training - Step 37685: {'lr': 7.165593297395955e-05, 'samples': 19295232, 'steps': 37685, 'batch_loss/train': 0.7913331035524607} +12/27/2021 22:18:15 - INFO - codeparrot_training - Step 37686: {'lr': 7.16448868202437e-05, 'samples': 19295744, 'steps': 37686, 'batch_loss/train': 0.6877202056348324} +12/27/2021 22:18:25 - INFO - codeparrot_training - Step 37687: {'lr': 7.163384137560044e-05, 'samples': 19296256, 'steps': 37687, 'batch_loss/train': 0.7970444988459349} +12/27/2021 22:18:36 - INFO - codeparrot_training - Step 37688: {'lr': 7.162279664007368e-05, 'samples': 19296768, 'steps': 37688, 'batch_loss/train': 0.7114815809763968} +12/27/2021 22:18:48 - INFO - codeparrot_training - Step 37689: {'lr': 7.16117526137073e-05, 'samples': 19297280, 'steps': 37689, 'batch_loss/train': 0.7132863039150834} +12/27/2021 22:18:59 - INFO - codeparrot_training - Step 37690: {'lr': 7.160070929654528e-05, 'samples': 19297792, 'steps': 37690, 'batch_loss/train': 0.7599714146926999} +12/27/2021 22:19:09 - INFO - codeparrot_training - Step 37691: {'lr': 7.158966668863146e-05, 'samples': 19298304, 'steps': 37691, 'batch_loss/train': 0.7192160994745791} +12/27/2021 22:19:23 - INFO - codeparrot_training - Step 37692: {'lr': 7.157862479000985e-05, 'samples': 19298816, 'steps': 37692, 'batch_loss/train': 0.5993905893701594} +12/27/2021 22:19:34 - INFO - codeparrot_training - Step 37693: {'lr': 7.156758360072408e-05, 'samples': 19299328, 'steps': 37693, 'batch_loss/train': 0.7135406763118226} +12/27/2021 22:19:45 - INFO - codeparrot_training - Step 37694: {'lr': 7.15565431208183e-05, 'samples': 19299840, 'steps': 37694, 'batch_loss/train': 0.6708690254017711} +12/27/2021 22:19:57 - INFO - codeparrot_training - Step 37695: {'lr': 7.154550335033641e-05, 'samples': 19300352, 'steps': 37695, 'batch_loss/train': 0.6873482265509665} +12/27/2021 22:20:07 - INFO - codeparrot_training - Step 37696: {'lr': 7.153446428932214e-05, 'samples': 19300864, 'steps': 37696, 'batch_loss/train': 0.8050324595533311} +12/27/2021 22:20:18 - INFO - codeparrot_training - Step 37697: {'lr': 7.152342593781941e-05, 'samples': 19301376, 'steps': 37697, 'batch_loss/train': 0.6872134949080646} +12/27/2021 22:20:29 - INFO - codeparrot_training - Step 37698: {'lr': 7.151238829587225e-05, 'samples': 19301888, 'steps': 37698, 'batch_loss/train': 0.7039257753640413} +12/27/2021 22:20:41 - INFO - codeparrot_training - Step 37699: {'lr': 7.15013513635244e-05, 'samples': 19302400, 'steps': 37699, 'batch_loss/train': 0.7412661481648684} +12/27/2021 22:20:51 - INFO - codeparrot_training - Step 37700: {'lr': 7.149031514081977e-05, 'samples': 19302912, 'steps': 37700, 'batch_loss/train': 1.2497080489993095} +12/27/2021 22:21:02 - INFO - codeparrot_training - Step 37701: {'lr': 7.147927962780223e-05, 'samples': 19303424, 'steps': 37701, 'batch_loss/train': 1.0254977969452739} +12/27/2021 22:21:16 - INFO - codeparrot_training - Step 37702: {'lr': 7.14682448245157e-05, 'samples': 19303936, 'steps': 37702, 'batch_loss/train': 0.683778828009963} +12/27/2021 22:21:27 - INFO - codeparrot_training - Step 37703: {'lr': 7.145721073100398e-05, 'samples': 19304448, 'steps': 37703, 'batch_loss/train': 0.776597942225635} +12/27/2021 22:21:37 - INFO - codeparrot_training - Step 37704: {'lr': 7.144617734731098e-05, 'samples': 19304960, 'steps': 37704, 'batch_loss/train': 0.7421112223528326} +12/27/2021 22:21:49 - INFO - codeparrot_training - Step 37705: {'lr': 7.143514467348059e-05, 'samples': 19305472, 'steps': 37705, 'batch_loss/train': 0.7349345623515546} +12/27/2021 22:22:00 - INFO - codeparrot_training - Step 37706: {'lr': 7.142411270955663e-05, 'samples': 19305984, 'steps': 37706, 'batch_loss/train': 0.6893097334832419} +12/27/2021 22:22:11 - INFO - codeparrot_training - Step 37707: {'lr': 7.1413081455583e-05, 'samples': 19306496, 'steps': 37707, 'batch_loss/train': 0.762250856962055} +12/27/2021 22:22:21 - INFO - codeparrot_training - Step 37708: {'lr': 7.140205091160343e-05, 'samples': 19307008, 'steps': 37708, 'batch_loss/train': 0.7801965046674013} +12/27/2021 22:22:34 - INFO - codeparrot_training - Step 37709: {'lr': 7.139102107766199e-05, 'samples': 19307520, 'steps': 37709, 'batch_loss/train': 0.7736815912649035} +12/27/2021 22:22:44 - INFO - codeparrot_training - Step 37710: {'lr': 7.137999195380235e-05, 'samples': 19308032, 'steps': 37710, 'batch_loss/train': 0.757076499518007} +12/27/2021 22:22:55 - INFO - codeparrot_training - Step 37711: {'lr': 7.136896354006841e-05, 'samples': 19308544, 'steps': 37711, 'batch_loss/train': 0.6630944402422756} +12/27/2021 22:23:07 - INFO - codeparrot_training - Step 37712: {'lr': 7.135793583650405e-05, 'samples': 19309056, 'steps': 37712, 'batch_loss/train': 0.6876103011891246} +12/27/2021 22:23:18 - INFO - codeparrot_training - Step 37713: {'lr': 7.134690884315306e-05, 'samples': 19309568, 'steps': 37713, 'batch_loss/train': 0.651910643093288} +12/27/2021 22:23:29 - INFO - codeparrot_training - Step 37714: {'lr': 7.133588256005929e-05, 'samples': 19310080, 'steps': 37714, 'batch_loss/train': 0.6657103085890412} +12/27/2021 22:23:43 - INFO - codeparrot_training - Step 37715: {'lr': 7.132485698726663e-05, 'samples': 19310592, 'steps': 37715, 'batch_loss/train': 0.7529480773955584} +12/27/2021 22:23:54 - INFO - codeparrot_training - Step 37716: {'lr': 7.131383212481884e-05, 'samples': 19311104, 'steps': 37716, 'batch_loss/train': 0.7461924776434898} +12/27/2021 22:24:04 - INFO - codeparrot_training - Step 37717: {'lr': 7.130280797275979e-05, 'samples': 19311616, 'steps': 37717, 'batch_loss/train': 0.691386794205755} +12/27/2021 22:24:16 - INFO - codeparrot_training - Step 37718: {'lr': 7.129178453113338e-05, 'samples': 19312128, 'steps': 37718, 'batch_loss/train': 0.6811860461020842} +12/27/2021 22:24:27 - INFO - codeparrot_training - Step 37719: {'lr': 7.128076179998318e-05, 'samples': 19312640, 'steps': 37719, 'batch_loss/train': 0.7147763656685129} +12/27/2021 22:24:38 - INFO - codeparrot_training - Step 37720: {'lr': 7.126973977935331e-05, 'samples': 19313152, 'steps': 37720, 'batch_loss/train': 0.8041354706510901} +12/27/2021 22:24:48 - INFO - codeparrot_training - Step 37721: {'lr': 7.125871846928752e-05, 'samples': 19313664, 'steps': 37721, 'batch_loss/train': 0.806397957727313} +12/27/2021 22:25:00 - INFO - codeparrot_training - Step 37722: {'lr': 7.124769786982951e-05, 'samples': 19314176, 'steps': 37722, 'batch_loss/train': 0.69436871772632} +12/27/2021 22:25:11 - INFO - codeparrot_training - Step 37723: {'lr': 7.12366779810231e-05, 'samples': 19314688, 'steps': 37723, 'batch_loss/train': 0.7383195860311389} +12/27/2021 22:25:22 - INFO - codeparrot_training - Step 37724: {'lr': 7.122565880291229e-05, 'samples': 19315200, 'steps': 37724, 'batch_loss/train': 0.7881569415330887} +12/27/2021 22:25:35 - INFO - codeparrot_training - Step 37725: {'lr': 7.121464033554068e-05, 'samples': 19315712, 'steps': 37725, 'batch_loss/train': 0.7418952230364084} +12/27/2021 22:25:46 - INFO - codeparrot_training - Step 37726: {'lr': 7.120362257895215e-05, 'samples': 19316224, 'steps': 37726, 'batch_loss/train': 0.7400654200464487} +12/27/2021 22:25:57 - INFO - codeparrot_training - Step 37727: {'lr': 7.119260553319052e-05, 'samples': 19316736, 'steps': 37727, 'batch_loss/train': 0.644485330209136} +12/27/2021 22:26:09 - INFO - codeparrot_training - Step 37728: {'lr': 7.118158919829956e-05, 'samples': 19317248, 'steps': 37728, 'batch_loss/train': 1.5245349258184433} +12/27/2021 22:26:20 - INFO - codeparrot_training - Step 37729: {'lr': 7.117057357432307e-05, 'samples': 19317760, 'steps': 37729, 'batch_loss/train': 0.7088679615408182} +12/27/2021 22:26:30 - INFO - codeparrot_training - Step 37730: {'lr': 7.115955866130488e-05, 'samples': 19318272, 'steps': 37730, 'batch_loss/train': 0.7643063925206661} +12/27/2021 22:26:44 - INFO - codeparrot_training - Step 37731: {'lr': 7.114854445928876e-05, 'samples': 19318784, 'steps': 37731, 'batch_loss/train': 0.7451301638502628} +12/27/2021 22:26:55 - INFO - codeparrot_training - Step 37732: {'lr': 7.113753096831849e-05, 'samples': 19319296, 'steps': 37732, 'batch_loss/train': 0.7505029446911067} +12/27/2021 22:27:05 - INFO - codeparrot_training - Step 37733: {'lr': 7.112651818843793e-05, 'samples': 19319808, 'steps': 37733, 'batch_loss/train': 0.6421818550443277} +12/27/2021 22:27:16 - INFO - codeparrot_training - Step 37734: {'lr': 7.111550611969064e-05, 'samples': 19320320, 'steps': 37734, 'batch_loss/train': 0.7656396338716149} +12/27/2021 22:27:28 - INFO - codeparrot_training - Step 37735: {'lr': 7.110449476212072e-05, 'samples': 19320832, 'steps': 37735, 'batch_loss/train': 0.7247192589566112} +12/27/2021 22:27:39 - INFO - codeparrot_training - Step 37736: {'lr': 7.109348411577171e-05, 'samples': 19321344, 'steps': 37736, 'batch_loss/train': 0.7669390579685569} +12/27/2021 22:27:49 - INFO - codeparrot_training - Step 37737: {'lr': 7.108247418068744e-05, 'samples': 19321856, 'steps': 37737, 'batch_loss/train': 0.6649328228086233} +12/27/2021 22:28:01 - INFO - codeparrot_training - Step 37738: {'lr': 7.107146495691169e-05, 'samples': 19322368, 'steps': 37738, 'batch_loss/train': 0.6387833282351494} +12/27/2021 22:28:12 - INFO - codeparrot_training - Step 37739: {'lr': 7.106045644448825e-05, 'samples': 19322880, 'steps': 37739, 'batch_loss/train': 0.736434318125248} +12/27/2021 22:28:23 - INFO - codeparrot_training - Step 37740: {'lr': 7.104944864346086e-05, 'samples': 19323392, 'steps': 37740, 'batch_loss/train': 0.7671254118904471} +12/27/2021 22:28:37 - INFO - codeparrot_training - Step 37741: {'lr': 7.103844155387329e-05, 'samples': 19323904, 'steps': 37741, 'batch_loss/train': 0.6909619951620698} +12/27/2021 22:28:47 - INFO - codeparrot_training - Step 37742: {'lr': 7.102743517576932e-05, 'samples': 19324416, 'steps': 37742, 'batch_loss/train': 0.7159868897870183} +12/27/2021 22:28:58 - INFO - codeparrot_training - Step 37743: {'lr': 7.101642950919266e-05, 'samples': 19324928, 'steps': 37743, 'batch_loss/train': 0.7169725252315402} +12/27/2021 22:29:09 - INFO - codeparrot_training - Step 37744: {'lr': 7.10054245541872e-05, 'samples': 19325440, 'steps': 37744, 'batch_loss/train': 0.7780646746978164} +12/27/2021 22:29:21 - INFO - codeparrot_training - Step 37745: {'lr': 7.099442031079643e-05, 'samples': 19325952, 'steps': 37745, 'batch_loss/train': 0.7310085408389568} +12/27/2021 22:29:31 - INFO - codeparrot_training - Step 37746: {'lr': 7.098341677906431e-05, 'samples': 19326464, 'steps': 37746, 'batch_loss/train': 0.7828149297856726} +12/27/2021 22:29:42 - INFO - codeparrot_training - Step 37747: {'lr': 7.097241395903461e-05, 'samples': 19326976, 'steps': 37747, 'batch_loss/train': 0.6459057962056249} +12/27/2021 22:29:55 - INFO - codeparrot_training - Step 37748: {'lr': 7.096141185075095e-05, 'samples': 19327488, 'steps': 37748, 'batch_loss/train': 0.7539773648604751} +12/27/2021 22:30:06 - INFO - codeparrot_training - Step 37749: {'lr': 7.095041045425701e-05, 'samples': 19328000, 'steps': 37749, 'batch_loss/train': 0.6842559366486967} +12/27/2021 22:30:16 - INFO - codeparrot_training - Step 37750: {'lr': 7.093940976959679e-05, 'samples': 19328512, 'steps': 37750, 'batch_loss/train': 0.916247732937336} +12/27/2021 22:30:29 - INFO - codeparrot_training - Step 37751: {'lr': 7.092840979681382e-05, 'samples': 19329024, 'steps': 37751, 'batch_loss/train': 0.7756955958902836} +12/27/2021 22:30:39 - INFO - codeparrot_training - Step 37752: {'lr': 7.091741053595175e-05, 'samples': 19329536, 'steps': 37752, 'batch_loss/train': 0.6793171493336558} +12/27/2021 22:30:50 - INFO - codeparrot_training - Step 37753: {'lr': 7.090641198705461e-05, 'samples': 19330048, 'steps': 37753, 'batch_loss/train': 0.9156065515708178} +12/27/2021 22:31:00 - INFO - codeparrot_training - Step 37754: {'lr': 7.089541415016587e-05, 'samples': 19330560, 'steps': 37754, 'batch_loss/train': 0.5841875427868217} +12/27/2021 22:31:13 - INFO - codeparrot_training - Step 37755: {'lr': 7.088441702532936e-05, 'samples': 19331072, 'steps': 37755, 'batch_loss/train': 0.6692193299531937} +12/27/2021 22:31:23 - INFO - codeparrot_training - Step 37756: {'lr': 7.087342061258876e-05, 'samples': 19331584, 'steps': 37756, 'batch_loss/train': 0.7509391305502504} +12/27/2021 22:31:34 - INFO - codeparrot_training - Step 37757: {'lr': 7.08624249119878e-05, 'samples': 19332096, 'steps': 37757, 'batch_loss/train': 0.6732246353058144} +12/27/2021 22:31:46 - INFO - codeparrot_training - Step 37758: {'lr': 7.085142992357022e-05, 'samples': 19332608, 'steps': 37758, 'batch_loss/train': 0.7409005360677838} +12/27/2021 22:31:57 - INFO - codeparrot_training - Step 37759: {'lr': 7.084043564737978e-05, 'samples': 19333120, 'steps': 37759, 'batch_loss/train': 0.7652485594153404} +12/27/2021 22:32:07 - INFO - codeparrot_training - Step 37760: {'lr': 7.082944208345992e-05, 'samples': 19333632, 'steps': 37760, 'batch_loss/train': 0.7245217561721802} +12/27/2021 22:32:21 - INFO - codeparrot_training - Step 37761: {'lr': 7.081844923185477e-05, 'samples': 19334144, 'steps': 37761, 'batch_loss/train': 0.7215969683602452} +12/27/2021 22:32:32 - INFO - codeparrot_training - Step 37762: {'lr': 7.080745709260769e-05, 'samples': 19334656, 'steps': 37762, 'batch_loss/train': 0.7984918975271285} +12/27/2021 22:32:43 - INFO - codeparrot_training - Step 37763: {'lr': 7.079646566576242e-05, 'samples': 19335168, 'steps': 37763, 'batch_loss/train': 0.6860335287638009} +12/27/2021 22:32:55 - INFO - codeparrot_training - Step 37764: {'lr': 7.078547495136293e-05, 'samples': 19335680, 'steps': 37764, 'batch_loss/train': 0.6663749809376895} +12/27/2021 22:33:05 - INFO - codeparrot_training - Step 37765: {'lr': 7.07744849494526e-05, 'samples': 19336192, 'steps': 37765, 'batch_loss/train': 0.7525169705040753} +12/27/2021 22:33:16 - INFO - codeparrot_training - Step 37766: {'lr': 7.076349566007525e-05, 'samples': 19336704, 'steps': 37766, 'batch_loss/train': 0.6933405492454767} +12/27/2021 22:33:26 - INFO - codeparrot_training - Step 37767: {'lr': 7.07525070832746e-05, 'samples': 19337216, 'steps': 37767, 'batch_loss/train': 0.7870901552960277} +12/27/2021 22:33:39 - INFO - codeparrot_training - Step 37768: {'lr': 7.074151921909428e-05, 'samples': 19337728, 'steps': 37768, 'batch_loss/train': 0.7281772457063198} +12/27/2021 22:33:49 - INFO - codeparrot_training - Step 37769: {'lr': 7.073053206757798e-05, 'samples': 19338240, 'steps': 37769, 'batch_loss/train': 0.7815419272519648} +12/27/2021 22:34:00 - INFO - codeparrot_training - Step 37770: {'lr': 7.07195456287695e-05, 'samples': 19338752, 'steps': 37770, 'batch_loss/train': 0.7204481270164251} +12/27/2021 22:34:14 - INFO - codeparrot_training - Step 37771: {'lr': 7.070855990271225e-05, 'samples': 19339264, 'steps': 37771, 'batch_loss/train': 0.7573629673570395} +12/27/2021 22:34:24 - INFO - codeparrot_training - Step 37772: {'lr': 7.069757488945014e-05, 'samples': 19339776, 'steps': 37772, 'batch_loss/train': 0.6708491845056415} +12/27/2021 22:34:35 - INFO - codeparrot_training - Step 37773: {'lr': 7.068659058902685e-05, 'samples': 19340288, 'steps': 37773, 'batch_loss/train': 0.7069462821818888} +12/27/2021 22:34:47 - INFO - codeparrot_training - Step 37774: {'lr': 7.06756070014859e-05, 'samples': 19340800, 'steps': 37774, 'batch_loss/train': 0.782282548956573} +12/27/2021 22:34:57 - INFO - codeparrot_training - Step 37775: {'lr': 7.066462412687091e-05, 'samples': 19341312, 'steps': 37775, 'batch_loss/train': 0.7521203476935625} +12/27/2021 22:35:08 - INFO - codeparrot_training - Step 37776: {'lr': 7.065364196522587e-05, 'samples': 19341824, 'steps': 37776, 'batch_loss/train': 0.7386509501375258} +12/27/2021 22:35:19 - INFO - codeparrot_training - Step 37777: {'lr': 7.064266051659413e-05, 'samples': 19342336, 'steps': 37777, 'batch_loss/train': 0.6675137211568654} +12/27/2021 22:35:32 - INFO - codeparrot_training - Step 37778: {'lr': 7.063167978101937e-05, 'samples': 19342848, 'steps': 37778, 'batch_loss/train': 0.7471154672093689} +12/27/2021 22:35:43 - INFO - codeparrot_training - Step 37779: {'lr': 7.062069975854546e-05, 'samples': 19343360, 'steps': 37779, 'batch_loss/train': 0.7851870507001877} +12/27/2021 22:35:53 - INFO - codeparrot_training - Step 37780: {'lr': 7.060972044921587e-05, 'samples': 19343872, 'steps': 37780, 'batch_loss/train': 0.6631940993247554} +12/27/2021 22:36:06 - INFO - codeparrot_training - Step 37781: {'lr': 7.059874185307427e-05, 'samples': 19344384, 'steps': 37781, 'batch_loss/train': 0.9415081916376948} +12/27/2021 22:36:16 - INFO - codeparrot_training - Step 37782: {'lr': 7.058776397016436e-05, 'samples': 19344896, 'steps': 37782, 'batch_loss/train': 0.616702280472964} +12/27/2021 22:36:27 - INFO - codeparrot_training - Step 37783: {'lr': 7.057678680052975e-05, 'samples': 19345408, 'steps': 37783, 'batch_loss/train': 0.8048212407156825} +12/27/2021 22:36:41 - INFO - codeparrot_training - Step 37784: {'lr': 7.056581034421408e-05, 'samples': 19345920, 'steps': 37784, 'batch_loss/train': 0.6245708339847624} +12/27/2021 22:36:52 - INFO - codeparrot_training - Step 37785: {'lr': 7.055483460126108e-05, 'samples': 19346432, 'steps': 37785, 'batch_loss/train': 0.7392778806388378} +12/27/2021 22:37:02 - INFO - codeparrot_training - Step 37786: {'lr': 7.054385957171413e-05, 'samples': 19346944, 'steps': 37786, 'batch_loss/train': 0.6942450497299433} +12/27/2021 22:37:13 - INFO - codeparrot_training - Step 37787: {'lr': 7.053288525561712e-05, 'samples': 19347456, 'steps': 37787, 'batch_loss/train': 0.7757116421125829} +12/27/2021 22:37:25 - INFO - codeparrot_training - Step 37788: {'lr': 7.05219116530137e-05, 'samples': 19347968, 'steps': 37788, 'batch_loss/train': 0.7353953155688941} +12/27/2021 22:37:36 - INFO - codeparrot_training - Step 37789: {'lr': 7.051093876394721e-05, 'samples': 19348480, 'steps': 37789, 'batch_loss/train': 0.6867673736996949} +12/27/2021 22:37:46 - INFO - codeparrot_training - Step 37790: {'lr': 7.049996658846162e-05, 'samples': 19348992, 'steps': 37790, 'batch_loss/train': 0.7031579811591655} +12/27/2021 22:37:59 - INFO - codeparrot_training - Step 37791: {'lr': 7.048899512660028e-05, 'samples': 19349504, 'steps': 37791, 'batch_loss/train': 0.7448503328487277} +12/27/2021 22:38:09 - INFO - codeparrot_training - Step 37792: {'lr': 7.047802437840695e-05, 'samples': 19350016, 'steps': 37792, 'batch_loss/train': 0.7577698146924376} +12/27/2021 22:38:20 - INFO - codeparrot_training - Step 37793: {'lr': 7.04670543439252e-05, 'samples': 19350528, 'steps': 37793, 'batch_loss/train': 0.7790978591656312} +12/27/2021 22:38:34 - INFO - codeparrot_training - Step 37794: {'lr': 7.045608502319864e-05, 'samples': 19351040, 'steps': 37794, 'batch_loss/train': 0.7666647477308288} +12/27/2021 22:38:45 - INFO - codeparrot_training - Step 37795: {'lr': 7.04451164162709e-05, 'samples': 19351552, 'steps': 37795, 'batch_loss/train': 0.8199271792545915} +12/27/2021 22:38:55 - INFO - codeparrot_training - Step 37796: {'lr': 7.043414852318558e-05, 'samples': 19352064, 'steps': 37796, 'batch_loss/train': 0.7065826305188239} +12/27/2021 22:39:08 - INFO - codeparrot_training - Step 37797: {'lr': 7.042318134398628e-05, 'samples': 19352576, 'steps': 37797, 'batch_loss/train': 0.7468431573361158} +12/27/2021 22:39:18 - INFO - codeparrot_training - Step 37798: {'lr': 7.041221487871663e-05, 'samples': 19353088, 'steps': 37798, 'batch_loss/train': 0.7876054020598531} +12/27/2021 22:39:29 - INFO - codeparrot_training - Step 37799: {'lr': 7.040124912742024e-05, 'samples': 19353600, 'steps': 37799, 'batch_loss/train': 0.6793751202058047} +12/27/2021 22:39:40 - INFO - codeparrot_training - Step 37800: {'lr': 7.039028409014055e-05, 'samples': 19354112, 'steps': 37800, 'batch_loss/train': 0.9693438392132521} +12/27/2021 22:39:54 - INFO - codeparrot_training - Step 37801: {'lr': 7.037931976692133e-05, 'samples': 19354624, 'steps': 37801, 'batch_loss/train': 0.7531082837376744} +12/27/2021 22:40:04 - INFO - codeparrot_training - Step 37802: {'lr': 7.036835615780621e-05, 'samples': 19355136, 'steps': 37802, 'batch_loss/train': 0.7558936877176166} +12/27/2021 22:40:15 - INFO - codeparrot_training - Step 37803: {'lr': 7.035739326283857e-05, 'samples': 19355648, 'steps': 37803, 'batch_loss/train': 0.8634629156440496} +12/27/2021 22:40:27 - INFO - codeparrot_training - Step 37804: {'lr': 7.034643108206206e-05, 'samples': 19356160, 'steps': 37804, 'batch_loss/train': 0.8729320880956948} +12/27/2021 22:40:38 - INFO - codeparrot_training - Step 37805: {'lr': 7.033546961552046e-05, 'samples': 19356672, 'steps': 37805, 'batch_loss/train': 0.7730695409700274} +12/27/2021 22:40:48 - INFO - codeparrot_training - Step 37806: {'lr': 7.032450886325709e-05, 'samples': 19357184, 'steps': 37806, 'batch_loss/train': 0.9062175329308957} +12/27/2021 22:41:01 - INFO - codeparrot_training - Step 37807: {'lr': 7.031354882531565e-05, 'samples': 19357696, 'steps': 37807, 'batch_loss/train': 0.796833235071972} +12/27/2021 22:41:11 - INFO - codeparrot_training - Step 37808: {'lr': 7.030258950173968e-05, 'samples': 19358208, 'steps': 37808, 'batch_loss/train': 0.6456293524242938} +12/27/2021 22:41:22 - INFO - codeparrot_training - Step 37809: {'lr': 7.029163089257276e-05, 'samples': 19358720, 'steps': 37809, 'batch_loss/train': 0.6938849687576294} +12/27/2021 22:41:33 - INFO - codeparrot_training - Step 37810: {'lr': 7.028067299785848e-05, 'samples': 19359232, 'steps': 37810, 'batch_loss/train': 0.7306367112323642} +12/27/2021 22:41:47 - INFO - codeparrot_training - Step 37811: {'lr': 7.026971581764043e-05, 'samples': 19359744, 'steps': 37811, 'batch_loss/train': 0.4827926770085469} +12/27/2021 22:41:58 - INFO - codeparrot_training - Step 37812: {'lr': 7.025875935196202e-05, 'samples': 19360256, 'steps': 37812, 'batch_loss/train': 0.8973042070865631} +12/27/2021 22:42:08 - INFO - codeparrot_training - Step 37813: {'lr': 7.024780360086697e-05, 'samples': 19360768, 'steps': 37813, 'batch_loss/train': 1.1133705666288733} +12/27/2021 22:42:20 - INFO - codeparrot_training - Step 37814: {'lr': 7.023684856439885e-05, 'samples': 19361280, 'steps': 37814, 'batch_loss/train': 0.8125955946743488} +12/27/2021 22:42:31 - INFO - codeparrot_training - Step 37815: {'lr': 7.022589424260097e-05, 'samples': 19361792, 'steps': 37815, 'batch_loss/train': 0.7861744603142142} +12/27/2021 22:42:42 - INFO - codeparrot_training - Step 37816: {'lr': 7.021494063551726e-05, 'samples': 19362304, 'steps': 37816, 'batch_loss/train': 0.7536648670211434} +12/27/2021 22:42:52 - INFO - codeparrot_training - Step 37817: {'lr': 7.020398774319096e-05, 'samples': 19362816, 'steps': 37817, 'batch_loss/train': 0.5955741568468511} +12/27/2021 22:43:06 - INFO - codeparrot_training - Step 37818: {'lr': 7.01930355656657e-05, 'samples': 19363328, 'steps': 37818, 'batch_loss/train': 0.7867474602535367} +12/27/2021 22:43:17 - INFO - codeparrot_training - Step 37819: {'lr': 7.018208410298505e-05, 'samples': 19363840, 'steps': 37819, 'batch_loss/train': 0.7463323408737779} +12/27/2021 22:43:28 - INFO - codeparrot_training - Step 37820: {'lr': 7.017113335519255e-05, 'samples': 19364352, 'steps': 37820, 'batch_loss/train': 0.6658764358144253} +12/27/2021 22:43:40 - INFO - codeparrot_training - Step 37821: {'lr': 7.01601833223317e-05, 'samples': 19364864, 'steps': 37821, 'batch_loss/train': 0.7677059599664062} +12/27/2021 22:43:50 - INFO - codeparrot_training - Step 37822: {'lr': 7.014923400444606e-05, 'samples': 19365376, 'steps': 37822, 'batch_loss/train': 0.6303273297380656} +12/27/2021 22:44:01 - INFO - codeparrot_training - Step 37823: {'lr': 7.013828540157919e-05, 'samples': 19365888, 'steps': 37823, 'batch_loss/train': 0.8393730036914349} +12/27/2021 22:44:13 - INFO - codeparrot_training - Step 37824: {'lr': 7.012733751377454e-05, 'samples': 19366400, 'steps': 37824, 'batch_loss/train': 0.7789520910009742} +12/27/2021 22:44:24 - INFO - codeparrot_training - Step 37825: {'lr': 7.011639034107578e-05, 'samples': 19366912, 'steps': 37825, 'batch_loss/train': 0.7488711578771472} +12/27/2021 22:44:34 - INFO - codeparrot_training - Step 37826: {'lr': 7.010544388352616e-05, 'samples': 19367424, 'steps': 37826, 'batch_loss/train': 0.8507143743336201} +12/27/2021 22:44:45 - INFO - codeparrot_training - Step 37827: {'lr': 7.009449814116948e-05, 'samples': 19367936, 'steps': 37827, 'batch_loss/train': 0.7528636949136853} +12/27/2021 22:44:58 - INFO - codeparrot_training - Step 37828: {'lr': 7.008355311404918e-05, 'samples': 19368448, 'steps': 37828, 'batch_loss/train': 0.8268692968413234} +12/27/2021 22:45:08 - INFO - codeparrot_training - Step 37829: {'lr': 7.00726088022087e-05, 'samples': 19368960, 'steps': 37829, 'batch_loss/train': 0.6269708890467882} +12/27/2021 22:45:19 - INFO - codeparrot_training - Step 37830: {'lr': 7.00616652056915e-05, 'samples': 19369472, 'steps': 37830, 'batch_loss/train': 0.659323084866628} +12/27/2021 22:45:33 - INFO - codeparrot_training - Step 37831: {'lr': 7.005072232454132e-05, 'samples': 19369984, 'steps': 37831, 'batch_loss/train': 0.788163305958733} +12/27/2021 22:45:43 - INFO - codeparrot_training - Step 37832: {'lr': 7.003978015880144e-05, 'samples': 19370496, 'steps': 37832, 'batch_loss/train': 0.7407432820182294} +12/27/2021 22:45:54 - INFO - codeparrot_training - Step 37833: {'lr': 7.002883870851545e-05, 'samples': 19371008, 'steps': 37833, 'batch_loss/train': 0.8519901231629774} +12/27/2021 22:46:06 - INFO - codeparrot_training - Step 37834: {'lr': 7.001789797372685e-05, 'samples': 19371520, 'steps': 37834, 'batch_loss/train': 0.7144193844869733} +12/27/2021 22:46:17 - INFO - codeparrot_training - Step 37835: {'lr': 7.000695795447915e-05, 'samples': 19372032, 'steps': 37835, 'batch_loss/train': 0.7603738233447075} +12/27/2021 22:46:28 - INFO - codeparrot_training - Step 37836: {'lr': 6.999601865081578e-05, 'samples': 19372544, 'steps': 37836, 'batch_loss/train': 0.8201711941510439} +12/27/2021 22:46:38 - INFO - codeparrot_training - Step 37837: {'lr': 6.998508006278029e-05, 'samples': 19373056, 'steps': 37837, 'batch_loss/train': 0.7943526263698004} +12/27/2021 22:46:50 - INFO - codeparrot_training - Step 37838: {'lr': 6.997414219041614e-05, 'samples': 19373568, 'steps': 37838, 'batch_loss/train': 0.700132871279493} +12/27/2021 22:47:01 - INFO - codeparrot_training - Step 37839: {'lr': 6.996320503376685e-05, 'samples': 19374080, 'steps': 37839, 'batch_loss/train': 0.7567326310090721} +12/27/2021 22:47:12 - INFO - codeparrot_training - Step 37840: {'lr': 6.995226859287595e-05, 'samples': 19374592, 'steps': 37840, 'batch_loss/train': 0.7081367088248953} +12/27/2021 22:47:25 - INFO - codeparrot_training - Step 37841: {'lr': 6.994133286778667e-05, 'samples': 19375104, 'steps': 37841, 'batch_loss/train': 0.7405070713721216} +12/27/2021 22:47:36 - INFO - codeparrot_training - Step 37842: {'lr': 6.993039785854277e-05, 'samples': 19375616, 'steps': 37842, 'batch_loss/train': 0.7190625158837065} +12/27/2021 22:47:47 - INFO - codeparrot_training - Step 37843: {'lr': 6.991946356518767e-05, 'samples': 19376128, 'steps': 37843, 'batch_loss/train': 0.755460349842906} +12/27/2021 22:47:59 - INFO - codeparrot_training - Step 37844: {'lr': 6.990852998776473e-05, 'samples': 19376640, 'steps': 37844, 'batch_loss/train': 0.7305247560143471} +12/27/2021 22:48:10 - INFO - codeparrot_training - Step 37845: {'lr': 6.989759712631747e-05, 'samples': 19377152, 'steps': 37845, 'batch_loss/train': 0.6193144407006912} +12/27/2021 22:48:20 - INFO - codeparrot_training - Step 37846: {'lr': 6.988666498088932e-05, 'samples': 19377664, 'steps': 37846, 'batch_loss/train': 0.7173534715548158} +12/27/2021 22:48:31 - INFO - codeparrot_training - Step 37847: {'lr': 6.987573355152382e-05, 'samples': 19378176, 'steps': 37847, 'batch_loss/train': 0.736738262232393} +12/27/2021 22:48:45 - INFO - codeparrot_training - Step 37848: {'lr': 6.986480283826441e-05, 'samples': 19378688, 'steps': 37848, 'batch_loss/train': 0.8080068775452673} +12/27/2021 22:48:55 - INFO - codeparrot_training - Step 37849: {'lr': 6.98538728411545e-05, 'samples': 19379200, 'steps': 37849, 'batch_loss/train': 0.6672481913119555} +12/27/2021 22:49:06 - INFO - codeparrot_training - Step 37850: {'lr': 6.984294356023757e-05, 'samples': 19379712, 'steps': 37850, 'batch_loss/train': 0.666537307202816} +12/27/2021 22:49:18 - INFO - codeparrot_training - Step 37851: {'lr': 6.983201499555717e-05, 'samples': 19380224, 'steps': 37851, 'batch_loss/train': 0.6753679383546114} +12/27/2021 22:49:29 - INFO - codeparrot_training - Step 37852: {'lr': 6.982108714715646e-05, 'samples': 19380736, 'steps': 37852, 'batch_loss/train': 0.6100402818992734} +12/27/2021 22:49:39 - INFO - codeparrot_training - Step 37853: {'lr': 6.981016001507917e-05, 'samples': 19381248, 'steps': 37853, 'batch_loss/train': 0.8586727164220065} +12/27/2021 22:49:51 - INFO - codeparrot_training - Step 37854: {'lr': 6.979923359936874e-05, 'samples': 19381760, 'steps': 37854, 'batch_loss/train': 0.8093118106480688} +12/27/2021 22:50:02 - INFO - codeparrot_training - Step 37855: {'lr': 6.97883079000684e-05, 'samples': 19382272, 'steps': 37855, 'batch_loss/train': 0.7556246720487252} +12/27/2021 22:50:13 - INFO - codeparrot_training - Step 37856: {'lr': 6.977738291722166e-05, 'samples': 19382784, 'steps': 37856, 'batch_loss/train': 0.7868364588357508} +12/27/2021 22:50:26 - INFO - codeparrot_training - Step 37857: {'lr': 6.976645865087213e-05, 'samples': 19383296, 'steps': 37857, 'batch_loss/train': 0.6694024513708428} +12/27/2021 22:50:37 - INFO - codeparrot_training - Step 37858: {'lr': 6.975553510106305e-05, 'samples': 19383808, 'steps': 37858, 'batch_loss/train': 0.7136656045913696} +12/27/2021 22:50:48 - INFO - codeparrot_training - Step 37859: {'lr': 6.97446122678379e-05, 'samples': 19384320, 'steps': 37859, 'batch_loss/train': 0.7828045906499028} +12/27/2021 22:50:58 - INFO - codeparrot_training - Step 37860: {'lr': 6.973369015124009e-05, 'samples': 19384832, 'steps': 37860, 'batch_loss/train': 0.8119308948516846} +12/27/2021 22:51:11 - INFO - codeparrot_training - Step 37861: {'lr': 6.972276875131309e-05, 'samples': 19385344, 'steps': 37861, 'batch_loss/train': 0.6767142943572253} +12/27/2021 22:51:21 - INFO - codeparrot_training - Step 37862: {'lr': 6.971184806810027e-05, 'samples': 19385856, 'steps': 37862, 'batch_loss/train': 0.7275316771119833} +12/27/2021 22:51:32 - INFO - codeparrot_training - Step 37863: {'lr': 6.970092810164508e-05, 'samples': 19386368, 'steps': 37863, 'batch_loss/train': 0.8396244717296213} +12/27/2021 22:51:44 - INFO - codeparrot_training - Step 37864: {'lr': 6.969000885199092e-05, 'samples': 19386880, 'steps': 37864, 'batch_loss/train': 0.814521555788815} +12/27/2021 22:51:55 - INFO - codeparrot_training - Step 37865: {'lr': 6.967909031918118e-05, 'samples': 19387392, 'steps': 37865, 'batch_loss/train': 0.7548109409399331} +12/27/2021 22:52:05 - INFO - codeparrot_training - Step 37866: {'lr': 6.966817250325941e-05, 'samples': 19387904, 'steps': 37866, 'batch_loss/train': 0.7378581836819649} +12/27/2021 22:52:17 - INFO - codeparrot_training - Step 37867: {'lr': 6.96572554042687e-05, 'samples': 19388416, 'steps': 37867, 'batch_loss/train': 0.6920118387788534} +12/27/2021 22:52:28 - INFO - codeparrot_training - Step 37868: {'lr': 6.964633902225275e-05, 'samples': 19388928, 'steps': 37868, 'batch_loss/train': 0.7137281728209928} +12/27/2021 22:52:39 - INFO - codeparrot_training - Step 37869: {'lr': 6.963542335725493e-05, 'samples': 19389440, 'steps': 37869, 'batch_loss/train': 0.7526291598333046} +12/27/2021 22:52:49 - INFO - codeparrot_training - Step 37870: {'lr': 6.962450840931847e-05, 'samples': 19389952, 'steps': 37870, 'batch_loss/train': 0.6539668259210885} +12/27/2021 22:53:03 - INFO - codeparrot_training - Step 37871: {'lr': 6.961359417848689e-05, 'samples': 19390464, 'steps': 37871, 'batch_loss/train': 0.7330927485600114} +12/27/2021 22:53:14 - INFO - codeparrot_training - Step 37872: {'lr': 6.960268066480352e-05, 'samples': 19390976, 'steps': 37872, 'batch_loss/train': 0.7410589121282101} +12/27/2021 22:53:24 - INFO - codeparrot_training - Step 37873: {'lr': 6.959176786831179e-05, 'samples': 19391488, 'steps': 37873, 'batch_loss/train': 0.7643704107031226} +12/27/2021 22:53:37 - INFO - codeparrot_training - Step 37874: {'lr': 6.958085578905507e-05, 'samples': 19392000, 'steps': 37874, 'batch_loss/train': 0.6982631431892514} +12/27/2021 22:53:47 - INFO - codeparrot_training - Step 37875: {'lr': 6.956994442707673e-05, 'samples': 19392512, 'steps': 37875, 'batch_loss/train': 0.7102837548591197} +12/27/2021 22:53:58 - INFO - codeparrot_training - Step 37876: {'lr': 6.955903378242018e-05, 'samples': 19393024, 'steps': 37876, 'batch_loss/train': 0.7496464485302567} +12/27/2021 22:54:12 - INFO - codeparrot_training - Step 37877: {'lr': 6.954812385512877e-05, 'samples': 19393536, 'steps': 37877, 'batch_loss/train': 0.7714387681335211} +12/27/2021 22:54:22 - INFO - codeparrot_training - Step 37878: {'lr': 6.95372146452459e-05, 'samples': 19394048, 'steps': 37878, 'batch_loss/train': 0.7430885415524244} +12/27/2021 22:54:33 - INFO - codeparrot_training - Step 37879: {'lr': 6.952630615281492e-05, 'samples': 19394560, 'steps': 37879, 'batch_loss/train': 0.8036569515243173} +12/27/2021 22:54:44 - INFO - codeparrot_training - Step 37880: {'lr': 6.951539837787926e-05, 'samples': 19395072, 'steps': 37880, 'batch_loss/train': 0.7423954326659441} +12/27/2021 22:54:56 - INFO - codeparrot_training - Step 37881: {'lr': 6.950449132048215e-05, 'samples': 19395584, 'steps': 37881, 'batch_loss/train': 0.7288541770540178} +12/27/2021 22:55:07 - INFO - codeparrot_training - Step 37882: {'lr': 6.949358498066697e-05, 'samples': 19396096, 'steps': 37882, 'batch_loss/train': 0.739379667211324} +12/27/2021 22:55:17 - INFO - codeparrot_training - Step 37883: {'lr': 6.94826793584773e-05, 'samples': 19396608, 'steps': 37883, 'batch_loss/train': 0.7472455091774464} +12/27/2021 22:55:30 - INFO - codeparrot_training - Step 37884: {'lr': 6.947177445395622e-05, 'samples': 19397120, 'steps': 37884, 'batch_loss/train': 0.767727124504745} +12/27/2021 22:55:40 - INFO - codeparrot_training - Step 37885: {'lr': 6.946087026714723e-05, 'samples': 19397632, 'steps': 37885, 'batch_loss/train': 0.6993262423202395} +12/27/2021 22:55:51 - INFO - codeparrot_training - Step 37886: {'lr': 6.944996679809365e-05, 'samples': 19398144, 'steps': 37886, 'batch_loss/train': 0.7934499285183847} +12/27/2021 22:56:05 - INFO - codeparrot_training - Step 37887: {'lr': 6.943906404683883e-05, 'samples': 19398656, 'steps': 37887, 'batch_loss/train': 0.6229564732639119} +12/27/2021 22:56:15 - INFO - codeparrot_training - Step 37888: {'lr': 6.942816201342611e-05, 'samples': 19399168, 'steps': 37888, 'batch_loss/train': 0.6890851485077292} +12/27/2021 22:56:26 - INFO - codeparrot_training - Step 37889: {'lr': 6.941726069789886e-05, 'samples': 19399680, 'steps': 37889, 'batch_loss/train': 0.8567852098494768} +12/27/2021 22:56:37 - INFO - codeparrot_training - Step 37890: {'lr': 6.940636010030035e-05, 'samples': 19400192, 'steps': 37890, 'batch_loss/train': 0.7277032905258238} +12/27/2021 22:56:49 - INFO - codeparrot_training - Step 37891: {'lr': 6.9395460220674e-05, 'samples': 19400704, 'steps': 37891, 'batch_loss/train': 0.6719166298862547} +12/27/2021 22:57:00 - INFO - codeparrot_training - Step 37892: {'lr': 6.938456105906318e-05, 'samples': 19401216, 'steps': 37892, 'batch_loss/train': 0.7273709308356047} +12/27/2021 22:57:11 - INFO - codeparrot_training - Step 37893: {'lr': 6.937366261551098e-05, 'samples': 19401728, 'steps': 37893, 'batch_loss/train': 0.6415940169245005} +12/27/2021 22:57:23 - INFO - codeparrot_training - Step 37894: {'lr': 6.936276489006096e-05, 'samples': 19402240, 'steps': 37894, 'batch_loss/train': 0.7840650202706456} +12/27/2021 22:57:33 - INFO - codeparrot_training - Step 37895: {'lr': 6.935186788275649e-05, 'samples': 19402752, 'steps': 37895, 'batch_loss/train': 0.7246563397347927} +12/27/2021 22:57:44 - INFO - codeparrot_training - Step 37896: {'lr': 6.934097159364069e-05, 'samples': 19403264, 'steps': 37896, 'batch_loss/train': 0.8400200419127941} +12/27/2021 22:57:58 - INFO - codeparrot_training - Step 37897: {'lr': 6.933007602275698e-05, 'samples': 19403776, 'steps': 37897, 'batch_loss/train': 0.7412977712228894} +12/27/2021 22:58:09 - INFO - codeparrot_training - Step 37898: {'lr': 6.931918117014866e-05, 'samples': 19404288, 'steps': 37898, 'batch_loss/train': 0.7130647194571793} +12/27/2021 22:58:19 - INFO - codeparrot_training - Step 37899: {'lr': 6.930828703585906e-05, 'samples': 19404800, 'steps': 37899, 'batch_loss/train': 0.7220736879389733} +12/27/2021 22:58:30 - INFO - codeparrot_training - Step 37900: {'lr': 6.929739361993149e-05, 'samples': 19405312, 'steps': 37900, 'batch_loss/train': 0.7978745312429965} +12/27/2021 22:58:42 - INFO - codeparrot_training - Step 37901: {'lr': 6.928650092240924e-05, 'samples': 19405824, 'steps': 37901, 'batch_loss/train': 0.7740875091403723} +12/27/2021 22:58:52 - INFO - codeparrot_training - Step 37902: {'lr': 6.927560894333562e-05, 'samples': 19406336, 'steps': 37902, 'batch_loss/train': 0.7300789913279004} +12/27/2021 22:59:03 - INFO - codeparrot_training - Step 37903: {'lr': 6.926471768275394e-05, 'samples': 19406848, 'steps': 37903, 'batch_loss/train': 0.8345288028940558} +12/27/2021 22:59:15 - INFO - codeparrot_training - Step 37904: {'lr': 6.925382714070753e-05, 'samples': 19407360, 'steps': 37904, 'batch_loss/train': 0.7614617291837931} +12/27/2021 22:59:26 - INFO - codeparrot_training - Step 37905: {'lr': 6.924293731723962e-05, 'samples': 19407872, 'steps': 37905, 'batch_loss/train': 0.6943542119115591} +12/27/2021 22:59:36 - INFO - codeparrot_training - Step 37906: {'lr': 6.923204821239362e-05, 'samples': 19408384, 'steps': 37906, 'batch_loss/train': 0.799405668862164} +12/27/2021 22:59:50 - INFO - codeparrot_training - Step 37907: {'lr': 6.922115982621268e-05, 'samples': 19408896, 'steps': 37907, 'batch_loss/train': 0.8032886511646211} +12/27/2021 23:00:01 - INFO - codeparrot_training - Step 37908: {'lr': 6.921027215874007e-05, 'samples': 19409408, 'steps': 37908, 'batch_loss/train': 0.7212391961365938} +12/27/2021 23:00:12 - INFO - codeparrot_training - Step 37909: {'lr': 6.91993852100193e-05, 'samples': 19409920, 'steps': 37909, 'batch_loss/train': 0.7907876167446375} +12/27/2021 23:00:22 - INFO - codeparrot_training - Step 37910: {'lr': 6.918849898009344e-05, 'samples': 19410432, 'steps': 37910, 'batch_loss/train': 0.7592957811430097} +12/27/2021 23:00:35 - INFO - codeparrot_training - Step 37911: {'lr': 6.917761346900578e-05, 'samples': 19410944, 'steps': 37911, 'batch_loss/train': 0.7216945631662384} +12/27/2021 23:00:45 - INFO - codeparrot_training - Step 37912: {'lr': 6.916672867679976e-05, 'samples': 19411456, 'steps': 37912, 'batch_loss/train': 0.6995371337980032} +12/27/2021 23:00:56 - INFO - codeparrot_training - Step 37913: {'lr': 6.915584460351851e-05, 'samples': 19411968, 'steps': 37913, 'batch_loss/train': 0.6203677137964405} +12/27/2021 23:01:08 - INFO - codeparrot_training - Step 37914: {'lr': 6.914496124920533e-05, 'samples': 19412480, 'steps': 37914, 'batch_loss/train': 0.7222043075598776} +12/27/2021 23:01:19 - INFO - codeparrot_training - Step 37915: {'lr': 6.913407861390348e-05, 'samples': 19412992, 'steps': 37915, 'batch_loss/train': 1.5123331970535219} +12/27/2021 23:01:29 - INFO - codeparrot_training - Step 37916: {'lr': 6.912319669765623e-05, 'samples': 19413504, 'steps': 37916, 'batch_loss/train': 1.5889678439125419} +12/27/2021 23:01:40 - INFO - codeparrot_training - Step 37917: {'lr': 6.911231550050689e-05, 'samples': 19414016, 'steps': 37917, 'batch_loss/train': 0.7513351095840335} +12/27/2021 23:01:54 - INFO - codeparrot_training - Step 37918: {'lr': 6.910143502249874e-05, 'samples': 19414528, 'steps': 37918, 'batch_loss/train': 0.706303742248565} +12/27/2021 23:02:05 - INFO - codeparrot_training - Step 37919: {'lr': 6.909055526367483e-05, 'samples': 19415040, 'steps': 37919, 'batch_loss/train': 0.9012899480294436} +12/27/2021 23:02:15 - INFO - codeparrot_training - Step 37920: {'lr': 6.907967622407863e-05, 'samples': 19415552, 'steps': 37920, 'batch_loss/train': 0.8605451788753271} +12/27/2021 23:02:27 - INFO - codeparrot_training - Step 37921: {'lr': 6.90687979037534e-05, 'samples': 19416064, 'steps': 37921, 'batch_loss/train': 0.7502624969929457} +12/27/2021 23:02:38 - INFO - codeparrot_training - Step 37922: {'lr': 6.905792030274224e-05, 'samples': 19416576, 'steps': 37922, 'batch_loss/train': 0.7273565115174279} +12/27/2021 23:02:49 - INFO - codeparrot_training - Step 37923: {'lr': 6.904704342108841e-05, 'samples': 19417088, 'steps': 37923, 'batch_loss/train': 0.697665236890316} +12/27/2021 23:03:01 - INFO - codeparrot_training - Step 37924: {'lr': 6.903616725883536e-05, 'samples': 19417600, 'steps': 37924, 'batch_loss/train': 0.7229712216649204} +12/27/2021 23:03:11 - INFO - codeparrot_training - Step 37925: {'lr': 6.902529181602607e-05, 'samples': 19418112, 'steps': 37925, 'batch_loss/train': 0.7771627473994158} +12/27/2021 23:03:22 - INFO - codeparrot_training - Step 37926: {'lr': 6.901441709270392e-05, 'samples': 19418624, 'steps': 37926, 'batch_loss/train': 0.7503784724976867} +12/27/2021 23:03:36 - INFO - codeparrot_training - Step 37927: {'lr': 6.90035430889121e-05, 'samples': 19419136, 'steps': 37927, 'batch_loss/train': 0.720661393366754} +12/27/2021 23:03:47 - INFO - codeparrot_training - Step 37928: {'lr': 6.899266980469385e-05, 'samples': 19419648, 'steps': 37928, 'batch_loss/train': 0.6912085868534632} +12/27/2021 23:03:57 - INFO - codeparrot_training - Step 37929: {'lr': 6.89817972400924e-05, 'samples': 19420160, 'steps': 37929, 'batch_loss/train': 0.7557434597983956} +12/27/2021 23:04:08 - INFO - codeparrot_training - Step 37930: {'lr': 6.897092539515096e-05, 'samples': 19420672, 'steps': 37930, 'batch_loss/train': 0.7064168169745244} +12/27/2021 23:04:20 - INFO - codeparrot_training - Step 37931: {'lr': 6.896005426991279e-05, 'samples': 19421184, 'steps': 37931, 'batch_loss/train': 0.7784588830545545} +12/27/2021 23:04:31 - INFO - codeparrot_training - Step 37932: {'lr': 6.894918386442115e-05, 'samples': 19421696, 'steps': 37932, 'batch_loss/train': 0.6773410467430949} +12/27/2021 23:04:41 - INFO - codeparrot_training - Step 37933: {'lr': 6.893831417871913e-05, 'samples': 19422208, 'steps': 37933, 'batch_loss/train': 0.7467883881181479} +12/27/2021 23:04:55 - INFO - codeparrot_training - Step 37934: {'lr': 6.89274452128499e-05, 'samples': 19422720, 'steps': 37934, 'batch_loss/train': 0.7328744297847152} +12/27/2021 23:05:06 - INFO - codeparrot_training - Step 37935: {'lr': 6.891657696685697e-05, 'samples': 19423232, 'steps': 37935, 'batch_loss/train': 0.7712392134126276} +12/27/2021 23:05:17 - INFO - codeparrot_training - Step 37936: {'lr': 6.890570944078325e-05, 'samples': 19423744, 'steps': 37936, 'batch_loss/train': 0.7726200451143086} +12/27/2021 23:05:29 - INFO - codeparrot_training - Step 37937: {'lr': 6.889484263467197e-05, 'samples': 19424256, 'steps': 37937, 'batch_loss/train': 0.7576803583651781} +12/27/2021 23:05:39 - INFO - codeparrot_training - Step 37938: {'lr': 6.888397654856659e-05, 'samples': 19424768, 'steps': 37938, 'batch_loss/train': 0.7100480878725648} +12/27/2021 23:05:50 - INFO - codeparrot_training - Step 37939: {'lr': 6.887311118251005e-05, 'samples': 19425280, 'steps': 37939, 'batch_loss/train': 0.7062269276939332} +12/27/2021 23:06:01 - INFO - codeparrot_training - Step 37940: {'lr': 6.886224653654563e-05, 'samples': 19425792, 'steps': 37940, 'batch_loss/train': 0.7483000960201025} +12/27/2021 23:06:13 - INFO - codeparrot_training - Step 37941: {'lr': 6.88513826107165e-05, 'samples': 19426304, 'steps': 37941, 'batch_loss/train': 0.9585089161992073} +12/27/2021 23:06:24 - INFO - codeparrot_training - Step 37942: {'lr': 6.884051940506588e-05, 'samples': 19426816, 'steps': 37942, 'batch_loss/train': 0.7229402158409357} +12/27/2021 23:06:34 - INFO - codeparrot_training - Step 37943: {'lr': 6.882965691963696e-05, 'samples': 19427328, 'steps': 37943, 'batch_loss/train': 0.7252019718289375} +12/27/2021 23:06:48 - INFO - codeparrot_training - Step 37944: {'lr': 6.8818795154473e-05, 'samples': 19427840, 'steps': 37944, 'batch_loss/train': 0.9071295689791441} +12/27/2021 23:06:59 - INFO - codeparrot_training - Step 37945: {'lr': 6.880793410961695e-05, 'samples': 19428352, 'steps': 37945, 'batch_loss/train': 0.7793735433369875} +12/27/2021 23:07:09 - INFO - codeparrot_training - Step 37946: {'lr': 6.879707378511221e-05, 'samples': 19428864, 'steps': 37946, 'batch_loss/train': 0.8073192620649934} +12/27/2021 23:07:22 - INFO - codeparrot_training - Step 37947: {'lr': 6.878621418100196e-05, 'samples': 19429376, 'steps': 37947, 'batch_loss/train': 0.8301053766626865} +12/27/2021 23:07:32 - INFO - codeparrot_training - Step 37948: {'lr': 6.877535529732915e-05, 'samples': 19429888, 'steps': 37948, 'batch_loss/train': 0.5909987520426512} +12/27/2021 23:07:43 - INFO - codeparrot_training - Step 37949: {'lr': 6.876449713413715e-05, 'samples': 19430400, 'steps': 37949, 'batch_loss/train': 0.7925762934610248} +12/27/2021 23:07:54 - INFO - codeparrot_training - Step 37950: {'lr': 6.87536396914692e-05, 'samples': 19430912, 'steps': 37950, 'batch_loss/train': 0.7412500656209886} +12/27/2021 23:08:06 - INFO - codeparrot_training - Step 37951: {'lr': 6.874278296936823e-05, 'samples': 19431424, 'steps': 37951, 'batch_loss/train': 0.7215174483135343} +12/27/2021 23:08:17 - INFO - codeparrot_training - Step 37952: {'lr': 6.873192696787753e-05, 'samples': 19431936, 'steps': 37952, 'batch_loss/train': 0.7308021355420351} +12/27/2021 23:08:27 - INFO - codeparrot_training - Step 37953: {'lr': 6.872107168704022e-05, 'samples': 19432448, 'steps': 37953, 'batch_loss/train': 0.7274769954383373} +12/27/2021 23:08:40 - INFO - codeparrot_training - Step 37954: {'lr': 6.87102171268995e-05, 'samples': 19432960, 'steps': 37954, 'batch_loss/train': 0.7876533889211714} +12/27/2021 23:08:50 - INFO - codeparrot_training - Step 37955: {'lr': 6.869936328749851e-05, 'samples': 19433472, 'steps': 37955, 'batch_loss/train': 0.7825709274038672} +12/27/2021 23:09:01 - INFO - codeparrot_training - Step 37956: {'lr': 6.86885101688804e-05, 'samples': 19433984, 'steps': 37956, 'batch_loss/train': 0.6598970466293395} +12/27/2021 23:09:12 - INFO - codeparrot_training - Step 37957: {'lr': 6.867765777108831e-05, 'samples': 19434496, 'steps': 37957, 'batch_loss/train': 1.138306584674865} +12/27/2021 23:09:26 - INFO - codeparrot_training - Step 37958: {'lr': 6.866680609416537e-05, 'samples': 19435008, 'steps': 37958, 'batch_loss/train': 0.554873844870599} +12/27/2021 23:09:36 - INFO - codeparrot_training - Step 37959: {'lr': 6.865595513815482e-05, 'samples': 19435520, 'steps': 37959, 'batch_loss/train': 0.7135313488543034} +12/27/2021 23:09:47 - INFO - codeparrot_training - Step 37960: {'lr': 6.864510490309958e-05, 'samples': 19436032, 'steps': 37960, 'batch_loss/train': 0.7334209443069994} +12/27/2021 23:09:59 - INFO - codeparrot_training - Step 37961: {'lr': 6.863425538904308e-05, 'samples': 19436544, 'steps': 37961, 'batch_loss/train': 0.7787251207046211} +12/27/2021 23:10:10 - INFO - codeparrot_training - Step 37962: {'lr': 6.862340659602819e-05, 'samples': 19437056, 'steps': 37962, 'batch_loss/train': 0.6920170423109084} +12/27/2021 23:10:20 - INFO - codeparrot_training - Step 37963: {'lr': 6.861255852409812e-05, 'samples': 19437568, 'steps': 37963, 'batch_loss/train': 0.7900913180783391} +12/27/2021 23:10:34 - INFO - codeparrot_training - Step 37964: {'lr': 6.860171117329617e-05, 'samples': 19438080, 'steps': 37964, 'batch_loss/train': 0.7516879797913134} +12/27/2021 23:10:45 - INFO - codeparrot_training - Step 37965: {'lr': 6.859086454366523e-05, 'samples': 19438592, 'steps': 37965, 'batch_loss/train': 0.8771762317046523} +12/27/2021 23:10:55 - INFO - codeparrot_training - Step 37966: {'lr': 6.858001863524852e-05, 'samples': 19439104, 'steps': 37966, 'batch_loss/train': 0.7912834119051695} +12/27/2021 23:11:06 - INFO - codeparrot_training - Step 37967: {'lr': 6.856917344808916e-05, 'samples': 19439616, 'steps': 37967, 'batch_loss/train': 0.7396066873334348} +12/27/2021 23:11:18 - INFO - codeparrot_training - Step 37968: {'lr': 6.855832898223024e-05, 'samples': 19440128, 'steps': 37968, 'batch_loss/train': 0.9192312713712454} +12/27/2021 23:11:29 - INFO - codeparrot_training - Step 37969: {'lr': 6.854748523771492e-05, 'samples': 19440640, 'steps': 37969, 'batch_loss/train': 0.7035023542121053} +12/27/2021 23:11:40 - INFO - codeparrot_training - Step 37970: {'lr': 6.853664221458636e-05, 'samples': 19441152, 'steps': 37970, 'batch_loss/train': 1.13865324575454} +12/27/2021 23:11:52 - INFO - codeparrot_training - Step 37971: {'lr': 6.852579991288743e-05, 'samples': 19441664, 'steps': 37971, 'batch_loss/train': 0.7897201557643712} +12/27/2021 23:12:02 - INFO - codeparrot_training - Step 37972: {'lr': 6.851495833266147e-05, 'samples': 19442176, 'steps': 37972, 'batch_loss/train': 0.7990775257349014} +12/27/2021 23:12:13 - INFO - codeparrot_training - Step 37973: {'lr': 6.850411747395161e-05, 'samples': 19442688, 'steps': 37973, 'batch_loss/train': 0.8237691652029753} +12/27/2021 23:12:28 - INFO - codeparrot_training - Step 37974: {'lr': 6.849327733680067e-05, 'samples': 19443200, 'steps': 37974, 'batch_loss/train': 0.8079334637150168} +12/27/2021 23:12:38 - INFO - codeparrot_training - Step 37975: {'lr': 6.8482437921252e-05, 'samples': 19443712, 'steps': 37975, 'batch_loss/train': 0.6542597856605425} +12/27/2021 23:12:49 - INFO - codeparrot_training - Step 37976: {'lr': 6.847159922734872e-05, 'samples': 19444224, 'steps': 37976, 'batch_loss/train': 0.7305298196151853} +12/27/2021 23:13:00 - INFO - codeparrot_training - Step 37977: {'lr': 6.84607612551337e-05, 'samples': 19444736, 'steps': 37977, 'batch_loss/train': 2.124360516667366} +12/27/2021 23:13:12 - INFO - codeparrot_training - Step 37978: {'lr': 6.84499240046502e-05, 'samples': 19445248, 'steps': 37978, 'batch_loss/train': 0.80744943395257} +12/27/2021 23:13:22 - INFO - codeparrot_training - Step 37979: {'lr': 6.843908747594122e-05, 'samples': 19445760, 'steps': 37979, 'batch_loss/train': 0.7679471634328365} +12/27/2021 23:13:33 - INFO - codeparrot_training - Step 37980: {'lr': 6.842825166904987e-05, 'samples': 19446272, 'steps': 37980, 'batch_loss/train': 0.7673457898199558} +12/27/2021 23:13:47 - INFO - codeparrot_training - Step 37981: {'lr': 6.841741658401923e-05, 'samples': 19446784, 'steps': 37981, 'batch_loss/train': 0.7349571632221341} +12/27/2021 23:13:58 - INFO - codeparrot_training - Step 37982: {'lr': 6.840658222089241e-05, 'samples': 19447296, 'steps': 37982, 'batch_loss/train': 0.7216533473692834} +12/27/2021 23:14:08 - INFO - codeparrot_training - Step 37983: {'lr': 6.839574857971243e-05, 'samples': 19447808, 'steps': 37983, 'batch_loss/train': 0.9584372027311474} +12/27/2021 23:14:19 - INFO - codeparrot_training - Step 37984: {'lr': 6.838491566052238e-05, 'samples': 19448320, 'steps': 37984, 'batch_loss/train': 0.7660047390963882} +12/27/2021 23:14:31 - INFO - codeparrot_training - Step 37985: {'lr': 6.837408346336532e-05, 'samples': 19448832, 'steps': 37985, 'batch_loss/train': 0.8329620976001024} +12/27/2021 23:14:42 - INFO - codeparrot_training - Step 37986: {'lr': 6.836325198828436e-05, 'samples': 19449344, 'steps': 37986, 'batch_loss/train': 0.8148735472932458} +12/27/2021 23:14:52 - INFO - codeparrot_training - Step 37987: {'lr': 6.835242123532257e-05, 'samples': 19449856, 'steps': 37987, 'batch_loss/train': 0.7475591842085123} +12/27/2021 23:15:04 - INFO - codeparrot_training - Step 37988: {'lr': 6.834159120452291e-05, 'samples': 19450368, 'steps': 37988, 'batch_loss/train': 0.7608683332800865} +12/27/2021 23:15:15 - INFO - codeparrot_training - Step 37989: {'lr': 6.83307618959284e-05, 'samples': 19450880, 'steps': 37989, 'batch_loss/train': 0.7362184710800648} +12/27/2021 23:15:26 - INFO - codeparrot_training - Step 37990: {'lr': 6.831993330958236e-05, 'samples': 19451392, 'steps': 37990, 'batch_loss/train': 0.7729928700719029} +12/27/2021 23:15:38 - INFO - codeparrot_training - Step 37991: {'lr': 6.830910544552757e-05, 'samples': 19451904, 'steps': 37991, 'batch_loss/train': 0.7850967422127724} +12/27/2021 23:15:48 - INFO - codeparrot_training - Step 37992: {'lr': 6.829827830380717e-05, 'samples': 19452416, 'steps': 37992, 'batch_loss/train': 0.7885849406011403} +12/27/2021 23:15:59 - INFO - codeparrot_training - Step 37993: {'lr': 6.828745188446422e-05, 'samples': 19452928, 'steps': 37993, 'batch_loss/train': 0.6222189285326749} +12/27/2021 23:16:10 - INFO - codeparrot_training - Step 37994: {'lr': 6.827662618754174e-05, 'samples': 19453440, 'steps': 37994, 'batch_loss/train': 0.667322495020926} +12/27/2021 23:16:24 - INFO - codeparrot_training - Step 37995: {'lr': 6.826580121308279e-05, 'samples': 19453952, 'steps': 37995, 'batch_loss/train': 0.8027400281280279} +12/27/2021 23:16:34 - INFO - codeparrot_training - Step 37996: {'lr': 6.825497696113044e-05, 'samples': 19454464, 'steps': 37996, 'batch_loss/train': 0.716345677152276} +12/27/2021 23:16:45 - INFO - codeparrot_training - Step 37997: {'lr': 6.824415343172757e-05, 'samples': 19454976, 'steps': 37997, 'batch_loss/train': 0.5687897165771574} +12/27/2021 23:16:57 - INFO - codeparrot_training - Step 37998: {'lr': 6.823333062491737e-05, 'samples': 19455488, 'steps': 37998, 'batch_loss/train': 0.7134642873425037} +12/27/2021 23:17:08 - INFO - codeparrot_training - Step 37999: {'lr': 6.822250854074288e-05, 'samples': 19456000, 'steps': 37999, 'batch_loss/train': 0.8369523352012038} +12/27/2021 23:17:18 - INFO - codeparrot_training - Step 38000: {'lr': 6.821168717924695e-05, 'samples': 19456512, 'steps': 38000, 'batch_loss/train': 0.7143137902021408} +12/27/2021 23:17:30 - INFO - codeparrot_training - Step 38001: {'lr': 6.820086654047275e-05, 'samples': 19457024, 'steps': 38001, 'batch_loss/train': 0.7138719321228564} +12/27/2021 23:17:41 - INFO - codeparrot_training - Step 38002: {'lr': 6.819004662446335e-05, 'samples': 19457536, 'steps': 38002, 'batch_loss/train': 0.7121083745732903} +12/27/2021 23:17:52 - INFO - codeparrot_training - Step 38003: {'lr': 6.817922743126157e-05, 'samples': 19458048, 'steps': 38003, 'batch_loss/train': 0.7552255378104746} +12/27/2021 23:18:02 - INFO - codeparrot_training - Step 38004: {'lr': 6.816840896091048e-05, 'samples': 19458560, 'steps': 38004, 'batch_loss/train': 0.8184477486647666} +12/27/2021 23:18:16 - INFO - codeparrot_training - Step 38005: {'lr': 6.815759121345331e-05, 'samples': 19459072, 'steps': 38005, 'batch_loss/train': 0.5731506105512381} +12/27/2021 23:18:27 - INFO - codeparrot_training - Step 38006: {'lr': 6.814677418893278e-05, 'samples': 19459584, 'steps': 38006, 'batch_loss/train': 0.6828083067666739} +12/27/2021 23:18:37 - INFO - codeparrot_training - Step 38007: {'lr': 6.813595788739202e-05, 'samples': 19460096, 'steps': 38007, 'batch_loss/train': 0.7412035157904029} +12/27/2021 23:18:50 - INFO - codeparrot_training - Step 38008: {'lr': 6.812514230887404e-05, 'samples': 19460608, 'steps': 38008, 'batch_loss/train': 0.6493329824879766} +12/27/2021 23:19:01 - INFO - codeparrot_training - Step 38009: {'lr': 6.81143274534218e-05, 'samples': 19461120, 'steps': 38009, 'batch_loss/train': 0.7183527983725071} +12/27/2021 23:19:11 - INFO - codeparrot_training - Step 38010: {'lr': 6.810351332107833e-05, 'samples': 19461632, 'steps': 38010, 'batch_loss/train': 0.8651158325374126} +12/27/2021 23:19:25 - INFO - codeparrot_training - Step 38011: {'lr': 6.80926999118866e-05, 'samples': 19462144, 'steps': 38011, 'batch_loss/train': 0.7002102332189679} +12/27/2021 23:19:36 - INFO - codeparrot_training - Step 38012: {'lr': 6.808188722588963e-05, 'samples': 19462656, 'steps': 38012, 'batch_loss/train': 0.7955935318022966} +12/27/2021 23:19:46 - INFO - codeparrot_training - Step 38013: {'lr': 6.807107526313038e-05, 'samples': 19463168, 'steps': 38013, 'batch_loss/train': 0.6919260867871344} +12/27/2021 23:19:57 - INFO - codeparrot_training - Step 38014: {'lr': 6.80602640236519e-05, 'samples': 19463680, 'steps': 38014, 'batch_loss/train': 0.8581539066508412} +12/27/2021 23:20:09 - INFO - codeparrot_training - Step 38015: {'lr': 6.804945350749698e-05, 'samples': 19464192, 'steps': 38015, 'batch_loss/train': 0.682075566612184} +12/27/2021 23:20:20 - INFO - codeparrot_training - Step 38016: {'lr': 6.803864371470888e-05, 'samples': 19464704, 'steps': 38016, 'batch_loss/train': 0.747820658609271} +12/27/2021 23:20:30 - INFO - codeparrot_training - Step 38017: {'lr': 6.802783464533033e-05, 'samples': 19465216, 'steps': 38017, 'batch_loss/train': 0.6807028746698052} +12/27/2021 23:20:42 - INFO - codeparrot_training - Step 38018: {'lr': 6.801702629940442e-05, 'samples': 19465728, 'steps': 38018, 'batch_loss/train': 0.6882599787786603} +12/27/2021 23:20:53 - INFO - codeparrot_training - Step 38019: {'lr': 6.80062186769741e-05, 'samples': 19466240, 'steps': 38019, 'batch_loss/train': 0.6906061568297446} +12/27/2021 23:21:04 - INFO - codeparrot_training - Step 38020: {'lr': 6.799541177808232e-05, 'samples': 19466752, 'steps': 38020, 'batch_loss/train': 0.6645843591541052} +12/27/2021 23:21:16 - INFO - codeparrot_training - Step 38021: {'lr': 6.798460560277209e-05, 'samples': 19467264, 'steps': 38021, 'batch_loss/train': 0.8676742487587035} +12/27/2021 23:21:26 - INFO - codeparrot_training - Step 38022: {'lr': 6.79738001510863e-05, 'samples': 19467776, 'steps': 38022, 'batch_loss/train': 0.8486853186041117} +12/27/2021 23:21:37 - INFO - codeparrot_training - Step 38023: {'lr': 6.7962995423068e-05, 'samples': 19468288, 'steps': 38023, 'batch_loss/train': 0.6921036714920774} +12/27/2021 23:21:48 - INFO - codeparrot_training - Step 38024: {'lr': 6.795219141876005e-05, 'samples': 19468800, 'steps': 38024, 'batch_loss/train': 0.6688577681779861} +12/27/2021 23:22:01 - INFO - codeparrot_training - Step 38025: {'lr': 6.794138813820556e-05, 'samples': 19469312, 'steps': 38025, 'batch_loss/train': 0.7574055880540982} +12/27/2021 23:22:12 - INFO - codeparrot_training - Step 38026: {'lr': 6.793058558144719e-05, 'samples': 19469824, 'steps': 38026, 'batch_loss/train': 0.7432022895663977} +12/27/2021 23:22:23 - INFO - codeparrot_training - Step 38027: {'lr': 6.791978374852814e-05, 'samples': 19470336, 'steps': 38027, 'batch_loss/train': 0.7078547389246523} +12/27/2021 23:22:35 - INFO - codeparrot_training - Step 38028: {'lr': 6.790898263949136e-05, 'samples': 19470848, 'steps': 38028, 'batch_loss/train': 0.7135637564351782} +12/27/2021 23:22:46 - INFO - codeparrot_training - Step 38029: {'lr': 6.789818225437966e-05, 'samples': 19471360, 'steps': 38029, 'batch_loss/train': 0.8664136354345828} +12/27/2021 23:22:56 - INFO - codeparrot_training - Step 38030: {'lr': 6.788738259323592e-05, 'samples': 19471872, 'steps': 38030, 'batch_loss/train': 0.8323615617118776} +12/27/2021 23:23:08 - INFO - codeparrot_training - Step 38031: {'lr': 6.787658365610336e-05, 'samples': 19472384, 'steps': 38031, 'batch_loss/train': 0.8075231658294797} +12/27/2021 23:23:19 - INFO - codeparrot_training - Step 38032: {'lr': 6.786578544302463e-05, 'samples': 19472896, 'steps': 38032, 'batch_loss/train': 0.8379937834106386} +12/27/2021 23:23:30 - INFO - codeparrot_training - Step 38033: {'lr': 6.78549879540428e-05, 'samples': 19473408, 'steps': 38033, 'batch_loss/train': 0.6763877061894163} +12/27/2021 23:23:40 - INFO - codeparrot_training - Step 38034: {'lr': 6.784419118920074e-05, 'samples': 19473920, 'steps': 38034, 'batch_loss/train': 0.7866082938853651} +12/27/2021 23:23:54 - INFO - codeparrot_training - Step 38035: {'lr': 6.78333951485414e-05, 'samples': 19474432, 'steps': 38035, 'batch_loss/train': 0.8483129423111677} +12/27/2021 23:24:05 - INFO - codeparrot_training - Step 38036: {'lr': 6.78225998321077e-05, 'samples': 19474944, 'steps': 38036, 'batch_loss/train': 0.702086822129786} +12/27/2021 23:24:15 - INFO - codeparrot_training - Step 38037: {'lr': 6.781180523994255e-05, 'samples': 19475456, 'steps': 38037, 'batch_loss/train': 0.691686560632661} +12/27/2021 23:24:28 - INFO - codeparrot_training - Step 38038: {'lr': 6.780101137208888e-05, 'samples': 19475968, 'steps': 38038, 'batch_loss/train': 0.6383889203425497} +12/27/2021 23:24:38 - INFO - codeparrot_training - Step 38039: {'lr': 6.779021822858958e-05, 'samples': 19476480, 'steps': 38039, 'batch_loss/train': 0.7677472177892923} +12/27/2021 23:24:49 - INFO - codeparrot_training - Step 38040: {'lr': 6.777942580948765e-05, 'samples': 19476992, 'steps': 38040, 'batch_loss/train': 0.7147419803077355} +12/27/2021 23:25:03 - INFO - codeparrot_training - Step 38041: {'lr': 6.776863411482579e-05, 'samples': 19477504, 'steps': 38041, 'batch_loss/train': 0.7787148517090827} +12/27/2021 23:25:13 - INFO - codeparrot_training - Step 38042: {'lr': 6.775784314464717e-05, 'samples': 19478016, 'steps': 38042, 'batch_loss/train': 0.7475278284400702} +12/27/2021 23:25:24 - INFO - codeparrot_training - Step 38043: {'lr': 6.774705289899447e-05, 'samples': 19478528, 'steps': 38043, 'batch_loss/train': 0.6694553000852466} +12/27/2021 23:25:36 - INFO - codeparrot_training - Step 38044: {'lr': 6.77362633779107e-05, 'samples': 19479040, 'steps': 38044, 'batch_loss/train': 0.838444103486836} +12/27/2021 23:25:47 - INFO - codeparrot_training - Step 38045: {'lr': 6.772547458143869e-05, 'samples': 19479552, 'steps': 38045, 'batch_loss/train': 0.6285860827192664} +12/27/2021 23:25:57 - INFO - codeparrot_training - Step 38046: {'lr': 6.771468650962138e-05, 'samples': 19480064, 'steps': 38046, 'batch_loss/train': 0.745768828317523} +12/27/2021 23:26:08 - INFO - codeparrot_training - Step 38047: {'lr': 6.770389916250167e-05, 'samples': 19480576, 'steps': 38047, 'batch_loss/train': 0.7956162407062948} +12/27/2021 23:26:20 - INFO - codeparrot_training - Step 38048: {'lr': 6.76931125401224e-05, 'samples': 19481088, 'steps': 38048, 'batch_loss/train': 0.6165697281248868} +12/27/2021 23:26:31 - INFO - codeparrot_training - Step 38049: {'lr': 6.76823266425265e-05, 'samples': 19481600, 'steps': 38049, 'batch_loss/train': 0.7515297078061849} +12/27/2021 23:26:41 - INFO - codeparrot_training - Step 38050: {'lr': 6.767154146975679e-05, 'samples': 19482112, 'steps': 38050, 'batch_loss/train': 0.7276681839721277} +12/27/2021 23:26:55 - INFO - codeparrot_training - Step 38051: {'lr': 6.766075702185628e-05, 'samples': 19482624, 'steps': 38051, 'batch_loss/train': 0.7916579321026802} +12/27/2021 23:27:06 - INFO - codeparrot_training - Step 38052: {'lr': 6.764997329886762e-05, 'samples': 19483136, 'steps': 38052, 'batch_loss/train': 0.7021405091509223} +12/27/2021 23:27:16 - INFO - codeparrot_training - Step 38053: {'lr': 6.763919030083387e-05, 'samples': 19483648, 'steps': 38053, 'batch_loss/train': 0.7989082671701908} +12/27/2021 23:27:29 - INFO - codeparrot_training - Step 38054: {'lr': 6.762840802779793e-05, 'samples': 19484160, 'steps': 38054, 'batch_loss/train': 0.7563087644521147} +12/27/2021 23:27:39 - INFO - codeparrot_training - Step 38055: {'lr': 6.76176264798025e-05, 'samples': 19484672, 'steps': 38055, 'batch_loss/train': 0.692258627153933} +12/27/2021 23:27:50 - INFO - codeparrot_training - Step 38056: {'lr': 6.760684565689044e-05, 'samples': 19485184, 'steps': 38056, 'batch_loss/train': 0.7154818926937878} +12/27/2021 23:28:01 - INFO - codeparrot_training - Step 38057: {'lr': 6.759606555910485e-05, 'samples': 19485696, 'steps': 38057, 'batch_loss/train': 0.7899064132943749} +12/27/2021 23:28:13 - INFO - codeparrot_training - Step 38058: {'lr': 6.758528618648835e-05, 'samples': 19486208, 'steps': 38058, 'batch_loss/train': 0.7662624209187925} +12/27/2021 23:28:23 - INFO - codeparrot_training - Step 38059: {'lr': 6.757450753908381e-05, 'samples': 19486720, 'steps': 38059, 'batch_loss/train': 0.7453341474756598} +12/27/2021 23:28:34 - INFO - codeparrot_training - Step 38060: {'lr': 6.756372961693427e-05, 'samples': 19487232, 'steps': 38060, 'batch_loss/train': 0.8154143746942282} +12/27/2021 23:28:46 - INFO - codeparrot_training - Step 38061: {'lr': 6.755295242008241e-05, 'samples': 19487744, 'steps': 38061, 'batch_loss/train': 0.8447525375522673} +12/27/2021 23:28:57 - INFO - codeparrot_training - Step 38062: {'lr': 6.754217594857113e-05, 'samples': 19488256, 'steps': 38062, 'batch_loss/train': 0.7745918449945748} +12/27/2021 23:29:08 - INFO - codeparrot_training - Step 38063: {'lr': 6.753140020244322e-05, 'samples': 19488768, 'steps': 38063, 'batch_loss/train': 0.6901517147198319} +12/27/2021 23:29:22 - INFO - codeparrot_training - Step 38064: {'lr': 6.75206251817416e-05, 'samples': 19489280, 'steps': 38064, 'batch_loss/train': 0.7283762791194022} +12/27/2021 23:29:33 - INFO - codeparrot_training - Step 38065: {'lr': 6.750985088650907e-05, 'samples': 19489792, 'steps': 38065, 'batch_loss/train': 0.7423744010739028} +12/27/2021 23:29:43 - INFO - codeparrot_training - Step 38066: {'lr': 6.749907731678854e-05, 'samples': 19490304, 'steps': 38066, 'batch_loss/train': 0.758969129063189} +12/27/2021 23:29:54 - INFO - codeparrot_training - Step 38067: {'lr': 6.748830447262261e-05, 'samples': 19490816, 'steps': 38067, 'batch_loss/train': 0.705785698723048} +12/27/2021 23:30:06 - INFO - codeparrot_training - Step 38068: {'lr': 6.747753235405444e-05, 'samples': 19491328, 'steps': 38068, 'batch_loss/train': 0.6774579361081123} +12/27/2021 23:30:17 - INFO - codeparrot_training - Step 38069: {'lr': 6.746676096112658e-05, 'samples': 19491840, 'steps': 38069, 'batch_loss/train': 0.6586057655513287} +12/27/2021 23:30:27 - INFO - codeparrot_training - Step 38070: {'lr': 6.745599029388197e-05, 'samples': 19492352, 'steps': 38070, 'batch_loss/train': 0.7276484143803827} +12/27/2021 23:30:40 - INFO - codeparrot_training - Step 38071: {'lr': 6.744522035236342e-05, 'samples': 19492864, 'steps': 38071, 'batch_loss/train': 0.7556026941165328} +12/27/2021 23:30:50 - INFO - codeparrot_training - Step 38072: {'lr': 6.743445113661373e-05, 'samples': 19493376, 'steps': 38072, 'batch_loss/train': 0.7139811348170042} +12/27/2021 23:31:01 - INFO - codeparrot_training - Step 38073: {'lr': 6.742368264667573e-05, 'samples': 19493888, 'steps': 38073, 'batch_loss/train': 0.8139375812606886} +12/27/2021 23:31:15 - INFO - codeparrot_training - Step 38074: {'lr': 6.741291488259224e-05, 'samples': 19494400, 'steps': 38074, 'batch_loss/train': 0.8141045346856117} +12/27/2021 23:31:26 - INFO - codeparrot_training - Step 38075: {'lr': 6.740214784440607e-05, 'samples': 19494912, 'steps': 38075, 'batch_loss/train': 0.8047191211953759} +12/27/2021 23:31:36 - INFO - codeparrot_training - Step 38076: {'lr': 6.739138153215998e-05, 'samples': 19495424, 'steps': 38076, 'batch_loss/train': 0.7730138404294848} +12/27/2021 23:31:47 - INFO - codeparrot_training - Step 38077: {'lr': 6.738061594589692e-05, 'samples': 19495936, 'steps': 38077, 'batch_loss/train': 0.7608152609318495} +12/27/2021 23:31:59 - INFO - codeparrot_training - Step 38078: {'lr': 6.736985108565941e-05, 'samples': 19496448, 'steps': 38078, 'batch_loss/train': 0.7581360279582441} +12/27/2021 23:32:10 - INFO - codeparrot_training - Step 38079: {'lr': 6.735908695149049e-05, 'samples': 19496960, 'steps': 38079, 'batch_loss/train': 0.6992946867831051} +12/27/2021 23:32:20 - INFO - codeparrot_training - Step 38080: {'lr': 6.734832354343292e-05, 'samples': 19497472, 'steps': 38080, 'batch_loss/train': 0.757581832818687} +12/27/2021 23:32:34 - INFO - codeparrot_training - Step 38081: {'lr': 6.733756086152943e-05, 'samples': 19497984, 'steps': 38081, 'batch_loss/train': 0.7205653378041461} +12/27/2021 23:32:45 - INFO - codeparrot_training - Step 38082: {'lr': 6.732679890582272e-05, 'samples': 19498496, 'steps': 38082, 'batch_loss/train': 0.8055658536031842} +12/27/2021 23:32:55 - INFO - codeparrot_training - Step 38083: {'lr': 6.731603767635583e-05, 'samples': 19499008, 'steps': 38083, 'batch_loss/train': 1.6342855783877894} +12/27/2021 23:33:08 - INFO - codeparrot_training - Step 38084: {'lr': 6.730527717317133e-05, 'samples': 19499520, 'steps': 38084, 'batch_loss/train': 0.8505795346572995} +12/27/2021 23:33:18 - INFO - codeparrot_training - Step 38085: {'lr': 6.729451739631199e-05, 'samples': 19500032, 'steps': 38085, 'batch_loss/train': 0.7832476822659373} +12/27/2021 23:33:29 - INFO - codeparrot_training - Step 38086: {'lr': 6.728375834582082e-05, 'samples': 19500544, 'steps': 38086, 'batch_loss/train': 0.7139241797849536} +12/27/2021 23:33:40 - INFO - codeparrot_training - Step 38087: {'lr': 6.727300002174033e-05, 'samples': 19501056, 'steps': 38087, 'batch_loss/train': 0.7116087400354445} +12/27/2021 23:33:52 - INFO - codeparrot_training - Step 38088: {'lr': 6.726224242411344e-05, 'samples': 19501568, 'steps': 38088, 'batch_loss/train': 0.6330985588720068} +12/27/2021 23:34:03 - INFO - codeparrot_training - Step 38089: {'lr': 6.725148555298285e-05, 'samples': 19502080, 'steps': 38089, 'batch_loss/train': 0.8502793395891786} +12/27/2021 23:34:13 - INFO - codeparrot_training - Step 38090: {'lr': 6.724072940839135e-05, 'samples': 19502592, 'steps': 38090, 'batch_loss/train': 0.9925122796557844} +12/27/2021 23:34:27 - INFO - codeparrot_training - Step 38091: {'lr': 6.722997399038172e-05, 'samples': 19503104, 'steps': 38091, 'batch_loss/train': 0.7793173061218113} +12/27/2021 23:34:38 - INFO - codeparrot_training - Step 38092: {'lr': 6.721921929899674e-05, 'samples': 19503616, 'steps': 38092, 'batch_loss/train': 0.6986192509066314} +12/27/2021 23:34:49 - INFO - codeparrot_training - Step 38093: {'lr': 6.720846533427901e-05, 'samples': 19504128, 'steps': 38093, 'batch_loss/train': 0.7985102282837033} +12/27/2021 23:34:59 - INFO - codeparrot_training - Step 38094: {'lr': 6.719771209627149e-05, 'samples': 19504640, 'steps': 38094, 'batch_loss/train': 0.6654206307139248} +12/27/2021 23:35:11 - INFO - codeparrot_training - Step 38095: {'lr': 6.718695958501689e-05, 'samples': 19505152, 'steps': 38095, 'batch_loss/train': 0.8024751516059041} +12/27/2021 23:35:22 - INFO - codeparrot_training - Step 38096: {'lr': 6.717620780055775e-05, 'samples': 19505664, 'steps': 38096, 'batch_loss/train': 0.8177887117490172} +12/27/2021 23:35:33 - INFO - codeparrot_training - Step 38097: {'lr': 6.716545674293717e-05, 'samples': 19506176, 'steps': 38097, 'batch_loss/train': 1.4748151362873614} +12/27/2021 23:35:47 - INFO - codeparrot_training - Step 38098: {'lr': 6.715470641219759e-05, 'samples': 19506688, 'steps': 38098, 'batch_loss/train': 0.7057048599235713} +12/27/2021 23:35:57 - INFO - codeparrot_training - Step 38099: {'lr': 6.714395680838186e-05, 'samples': 19507200, 'steps': 38099, 'batch_loss/train': 0.7789980713278055} +12/27/2021 23:36:08 - INFO - codeparrot_training - Step 38100: {'lr': 6.713320793153272e-05, 'samples': 19507712, 'steps': 38100, 'batch_loss/train': 0.7295831055380404} +12/27/2021 23:36:20 - INFO - codeparrot_training - Step 38101: {'lr': 6.71224597816929e-05, 'samples': 19508224, 'steps': 38101, 'batch_loss/train': 0.7241937201470137} +12/27/2021 23:36:31 - INFO - codeparrot_training - Step 38102: {'lr': 6.711171235890511e-05, 'samples': 19508736, 'steps': 38102, 'batch_loss/train': 0.6947713457047939} +12/27/2021 23:36:41 - INFO - codeparrot_training - Step 38103: {'lr': 6.710096566321213e-05, 'samples': 19509248, 'steps': 38103, 'batch_loss/train': 0.7571607967838645} +12/27/2021 23:36:53 - INFO - codeparrot_training - Step 38104: {'lr': 6.709021969465661e-05, 'samples': 19509760, 'steps': 38104, 'batch_loss/train': 0.8941481318324804} +12/27/2021 23:37:04 - INFO - codeparrot_training - Step 38105: {'lr': 6.707947445328136e-05, 'samples': 19510272, 'steps': 38105, 'batch_loss/train': 0.71685706730932} +12/27/2021 23:37:15 - INFO - codeparrot_training - Step 38106: {'lr': 6.706872993912911e-05, 'samples': 19510784, 'steps': 38106, 'batch_loss/train': 0.7346579041332006} +12/27/2021 23:37:25 - INFO - codeparrot_training - Step 38107: {'lr': 6.705798615224243e-05, 'samples': 19511296, 'steps': 38107, 'batch_loss/train': 0.6891707540489733} +12/27/2021 23:37:37 - INFO - codeparrot_training - Step 38108: {'lr': 6.704724309266402e-05, 'samples': 19511808, 'steps': 38108, 'batch_loss/train': 0.7751633897423744} +12/27/2021 23:37:48 - INFO - codeparrot_training - Step 38109: {'lr': 6.703650076043687e-05, 'samples': 19512320, 'steps': 38109, 'batch_loss/train': 0.7125693676061928} +12/27/2021 23:37:59 - INFO - codeparrot_training - Step 38110: {'lr': 6.702575915560344e-05, 'samples': 19512832, 'steps': 38110, 'batch_loss/train': 0.5810492064338177} +12/27/2021 23:38:12 - INFO - codeparrot_training - Step 38111: {'lr': 6.701501827820644e-05, 'samples': 19513344, 'steps': 38111, 'batch_loss/train': 0.6521981096593663} +12/27/2021 23:38:23 - INFO - codeparrot_training - Step 38112: {'lr': 6.700427812828878e-05, 'samples': 19513856, 'steps': 38112, 'batch_loss/train': 0.7222411064431071} +12/27/2021 23:38:34 - INFO - codeparrot_training - Step 38113: {'lr': 6.699353870589292e-05, 'samples': 19514368, 'steps': 38113, 'batch_loss/train': 0.6939551767427474} +12/27/2021 23:38:46 - INFO - codeparrot_training - Step 38114: {'lr': 6.698280001106163e-05, 'samples': 19514880, 'steps': 38114, 'batch_loss/train': 0.707933540455997} +12/27/2021 23:38:56 - INFO - codeparrot_training - Step 38115: {'lr': 6.697206204383766e-05, 'samples': 19515392, 'steps': 38115, 'batch_loss/train': 0.7744143878808245} +12/27/2021 23:39:07 - INFO - codeparrot_training - Step 38116: {'lr': 6.696132480426364e-05, 'samples': 19515904, 'steps': 38116, 'batch_loss/train': 0.7877387991175056} +12/27/2021 23:39:18 - INFO - codeparrot_training - Step 38117: {'lr': 6.69505882923823e-05, 'samples': 19516416, 'steps': 38117, 'batch_loss/train': 0.7509747589938343} +12/27/2021 23:39:30 - INFO - codeparrot_training - Step 38118: {'lr': 6.693985250823637e-05, 'samples': 19516928, 'steps': 38118, 'batch_loss/train': 0.877484773285687} +12/27/2021 23:39:41 - INFO - codeparrot_training - Step 38119: {'lr': 6.692911745186831e-05, 'samples': 19517440, 'steps': 38119, 'batch_loss/train': 0.6741580655798316} +12/27/2021 23:39:51 - INFO - codeparrot_training - Step 38120: {'lr': 6.691838312332104e-05, 'samples': 19517952, 'steps': 38120, 'batch_loss/train': 0.7566275019198656} +12/27/2021 23:40:05 - INFO - codeparrot_training - Step 38121: {'lr': 6.690764952263723e-05, 'samples': 19518464, 'steps': 38121, 'batch_loss/train': 0.7213246617466211} +12/27/2021 23:40:16 - INFO - codeparrot_training - Step 38122: {'lr': 6.689691664985931e-05, 'samples': 19518976, 'steps': 38122, 'batch_loss/train': 0.6487273168459069} +12/27/2021 23:40:26 - INFO - codeparrot_training - Step 38123: {'lr': 6.688618450503026e-05, 'samples': 19519488, 'steps': 38123, 'batch_loss/train': 0.7360379965975881} +12/27/2021 23:40:38 - INFO - codeparrot_training - Step 38124: {'lr': 6.68754530881925e-05, 'samples': 19520000, 'steps': 38124, 'batch_loss/train': 0.7575803302461281} +12/27/2021 23:40:49 - INFO - codeparrot_training - Step 38125: {'lr': 6.686472239938885e-05, 'samples': 19520512, 'steps': 38125, 'batch_loss/train': 0.6691921236924827} +12/27/2021 23:41:00 - INFO - codeparrot_training - Step 38126: {'lr': 6.685399243866188e-05, 'samples': 19521024, 'steps': 38126, 'batch_loss/train': 0.7452910207211971} +12/27/2021 23:41:10 - INFO - codeparrot_training - Step 38127: {'lr': 6.68432632060543e-05, 'samples': 19521536, 'steps': 38127, 'batch_loss/train': 0.8335386021062732} +12/27/2021 23:41:24 - INFO - codeparrot_training - Step 38128: {'lr': 6.683253470160871e-05, 'samples': 19522048, 'steps': 38128, 'batch_loss/train': 0.5849158206256106} +12/27/2021 23:41:35 - INFO - codeparrot_training - Step 38129: {'lr': 6.682180692536785e-05, 'samples': 19522560, 'steps': 38129, 'batch_loss/train': 0.791334240231663} +12/27/2021 23:41:45 - INFO - codeparrot_training - Step 38130: {'lr': 6.681107987737431e-05, 'samples': 19523072, 'steps': 38130, 'batch_loss/train': 0.7581189121119678} +12/27/2021 23:41:57 - INFO - codeparrot_training - Step 38131: {'lr': 6.680035355767073e-05, 'samples': 19523584, 'steps': 38131, 'batch_loss/train': 0.6989191183820367} +12/27/2021 23:42:08 - INFO - codeparrot_training - Step 38132: {'lr': 6.678962796629984e-05, 'samples': 19524096, 'steps': 38132, 'batch_loss/train': 0.7551042698323727} +12/27/2021 23:42:19 - INFO - codeparrot_training - Step 38133: {'lr': 6.677890310330406e-05, 'samples': 19524608, 'steps': 38133, 'batch_loss/train': 0.8298066155984998} +12/27/2021 23:42:31 - INFO - codeparrot_training - Step 38134: {'lr': 6.676817896872628e-05, 'samples': 19525120, 'steps': 38134, 'batch_loss/train': 0.7614981709048152} +12/27/2021 23:42:41 - INFO - codeparrot_training - Step 38135: {'lr': 6.675745556260909e-05, 'samples': 19525632, 'steps': 38135, 'batch_loss/train': 0.7639945643022656} +12/27/2021 23:42:52 - INFO - codeparrot_training - Step 38136: {'lr': 6.6746732884995e-05, 'samples': 19526144, 'steps': 38136, 'batch_loss/train': 0.7379753682762384} +12/27/2021 23:43:06 - INFO - codeparrot_training - Step 38137: {'lr': 6.673601093592662e-05, 'samples': 19526656, 'steps': 38137, 'batch_loss/train': 0.738164346665144} +12/27/2021 23:43:16 - INFO - codeparrot_training - Step 38138: {'lr': 6.672528971544683e-05, 'samples': 19527168, 'steps': 38138, 'batch_loss/train': 0.639187048189342} +12/27/2021 23:43:27 - INFO - codeparrot_training - Step 38139: {'lr': 6.6714569223598e-05, 'samples': 19527680, 'steps': 38139, 'batch_loss/train': 0.7271018528845161} +12/27/2021 23:43:38 - INFO - codeparrot_training - Step 38140: {'lr': 6.670384946042285e-05, 'samples': 19528192, 'steps': 38140, 'batch_loss/train': 0.7149888621643186} +12/27/2021 23:43:50 - INFO - codeparrot_training - Step 38141: {'lr': 6.669313042596398e-05, 'samples': 19528704, 'steps': 38141, 'batch_loss/train': 0.7427125805988908} +12/27/2021 23:44:01 - INFO - codeparrot_training - Step 38142: {'lr': 6.668241212026402e-05, 'samples': 19529216, 'steps': 38142, 'batch_loss/train': 0.7008967010769993} +12/27/2021 23:44:11 - INFO - codeparrot_training - Step 38143: {'lr': 6.667169454336555e-05, 'samples': 19529728, 'steps': 38143, 'batch_loss/train': 0.7254847539588809} +12/27/2021 23:44:23 - INFO - codeparrot_training - Step 38144: {'lr': 6.666097769531127e-05, 'samples': 19530240, 'steps': 38144, 'batch_loss/train': 0.7852943018078804} +12/27/2021 23:44:34 - INFO - codeparrot_training - Step 38145: {'lr': 6.665026157614359e-05, 'samples': 19530752, 'steps': 38145, 'batch_loss/train': 0.7113714113365859} +12/27/2021 23:44:44 - INFO - codeparrot_training - Step 38146: {'lr': 6.663954618590528e-05, 'samples': 19531264, 'steps': 38146, 'batch_loss/train': 0.8156648334115744} +12/27/2021 23:44:57 - INFO - codeparrot_training - Step 38147: {'lr': 6.662883152463903e-05, 'samples': 19531776, 'steps': 38147, 'batch_loss/train': 0.7965196426957846} +12/27/2021 23:45:07 - INFO - codeparrot_training - Step 38148: {'lr': 6.661811759238711e-05, 'samples': 19532288, 'steps': 38148, 'batch_loss/train': 0.7578490697778761} +12/27/2021 23:45:18 - INFO - codeparrot_training - Step 38149: {'lr': 6.660740438919241e-05, 'samples': 19532800, 'steps': 38149, 'batch_loss/train': 0.6775715027470142} +12/27/2021 23:45:28 - INFO - codeparrot_training - Step 38150: {'lr': 6.659669191509748e-05, 'samples': 19533312, 'steps': 38150, 'batch_loss/train': 0.6566980835050344} +12/27/2021 23:45:42 - INFO - codeparrot_training - Step 38151: {'lr': 6.658598017014481e-05, 'samples': 19533824, 'steps': 38151, 'batch_loss/train': 0.7487490166677162} +12/27/2021 23:45:53 - INFO - codeparrot_training - Step 38152: {'lr': 6.6575269154377e-05, 'samples': 19534336, 'steps': 38152, 'batch_loss/train': 0.7505469135940075} +12/27/2021 23:46:04 - INFO - codeparrot_training - Step 38153: {'lr': 6.656455886783666e-05, 'samples': 19534848, 'steps': 38153, 'batch_loss/train': 0.6856183391064405} +12/27/2021 23:46:16 - INFO - codeparrot_training - Step 38154: {'lr': 6.655384931056637e-05, 'samples': 19535360, 'steps': 38154, 'batch_loss/train': 0.6972450804896653} +12/27/2021 23:46:26 - INFO - codeparrot_training - Step 38155: {'lr': 6.654314048260873e-05, 'samples': 19535872, 'steps': 38155, 'batch_loss/train': 0.7726530702784657} +12/27/2021 23:46:37 - INFO - codeparrot_training - Step 38156: {'lr': 6.653243238400628e-05, 'samples': 19536384, 'steps': 38156, 'batch_loss/train': 0.6028069651219994} +12/27/2021 23:46:51 - INFO - codeparrot_training - Step 38157: {'lr': 6.652172501480161e-05, 'samples': 19536896, 'steps': 38157, 'batch_loss/train': 0.6044069905765355} +12/27/2021 23:47:02 - INFO - codeparrot_training - Step 38158: {'lr': 6.651101837503737e-05, 'samples': 19537408, 'steps': 38158, 'batch_loss/train': 0.7623013517586514} +12/27/2021 23:47:12 - INFO - codeparrot_training - Step 38159: {'lr': 6.650031246475586e-05, 'samples': 19537920, 'steps': 38159, 'batch_loss/train': 0.7555267903953791} +12/27/2021 23:47:23 - INFO - codeparrot_training - Step 38160: {'lr': 6.648960728399991e-05, 'samples': 19538432, 'steps': 38160, 'batch_loss/train': 0.7160872402600944} +12/27/2021 23:47:35 - INFO - codeparrot_training - Step 38161: {'lr': 6.647890283281205e-05, 'samples': 19538944, 'steps': 38161, 'batch_loss/train': 0.6752011426724494} +12/27/2021 23:47:46 - INFO - codeparrot_training - Step 38162: {'lr': 6.646819911123472e-05, 'samples': 19539456, 'steps': 38162, 'batch_loss/train': 0.850629972293973} +12/27/2021 23:47:56 - INFO - codeparrot_training - Step 38163: {'lr': 6.645749611931046e-05, 'samples': 19539968, 'steps': 38163, 'batch_loss/train': 0.7164336594287306} +12/27/2021 23:48:08 - INFO - codeparrot_training - Step 38164: {'lr': 6.644679385708202e-05, 'samples': 19540480, 'steps': 38164, 'batch_loss/train': 0.7175742397084832} +12/27/2021 23:48:19 - INFO - codeparrot_training - Step 38165: {'lr': 6.643609232459178e-05, 'samples': 19540992, 'steps': 38165, 'batch_loss/train': 0.7517568037146702} +12/27/2021 23:48:30 - INFO - codeparrot_training - Step 38166: {'lr': 6.642539152188229e-05, 'samples': 19541504, 'steps': 38166, 'batch_loss/train': 0.7695483909919858} +12/27/2021 23:48:44 - INFO - codeparrot_training - Step 38167: {'lr': 6.641469144899614e-05, 'samples': 19542016, 'steps': 38167, 'batch_loss/train': 0.7216514605097473} +12/27/2021 23:48:55 - INFO - codeparrot_training - Step 38168: {'lr': 6.640399210597588e-05, 'samples': 19542528, 'steps': 38168, 'batch_loss/train': 0.6697763132397085} +12/27/2021 23:49:05 - INFO - codeparrot_training - Step 38169: {'lr': 6.639329349286399e-05, 'samples': 19543040, 'steps': 38169, 'batch_loss/train': 0.7360765342600644} +12/27/2021 23:49:16 - INFO - codeparrot_training - Step 38170: {'lr': 6.638259560970305e-05, 'samples': 19543552, 'steps': 38170, 'batch_loss/train': 0.6837659031152725} +12/27/2021 23:49:29 - INFO - codeparrot_training - Step 38171: {'lr': 6.63718984565356e-05, 'samples': 19544064, 'steps': 38171, 'batch_loss/train': 1.154404896311462} +12/27/2021 23:49:39 - INFO - codeparrot_training - Step 38172: {'lr': 6.636120203340412e-05, 'samples': 19544576, 'steps': 38172, 'batch_loss/train': 0.7409996800124645} +12/27/2021 23:49:50 - INFO - codeparrot_training - Step 38173: {'lr': 6.635050634035125e-05, 'samples': 19545088, 'steps': 38173, 'batch_loss/train': 0.7768778763711452} +12/27/2021 23:50:02 - INFO - codeparrot_training - Step 38174: {'lr': 6.633981137741929e-05, 'samples': 19545600, 'steps': 38174, 'batch_loss/train': 0.7541071875020862} +12/27/2021 23:50:12 - INFO - codeparrot_training - Step 38175: {'lr': 6.632911714465092e-05, 'samples': 19546112, 'steps': 38175, 'batch_loss/train': 0.8278282694518566} +12/27/2021 23:50:23 - INFO - codeparrot_training - Step 38176: {'lr': 6.631842364208873e-05, 'samples': 19546624, 'steps': 38176, 'batch_loss/train': 0.697361696511507} +12/27/2021 23:50:37 - INFO - codeparrot_training - Step 38177: {'lr': 6.630773086977507e-05, 'samples': 19547136, 'steps': 38177, 'batch_loss/train': 0.7426096927374601} +12/27/2021 23:50:47 - INFO - codeparrot_training - Step 38178: {'lr': 6.629703882775252e-05, 'samples': 19547648, 'steps': 38178, 'batch_loss/train': 0.7503580623306334} +12/27/2021 23:50:58 - INFO - codeparrot_training - Step 38179: {'lr': 6.628634751606356e-05, 'samples': 19548160, 'steps': 38179, 'batch_loss/train': 0.8031446812674403} +12/27/2021 23:51:09 - INFO - codeparrot_training - Step 38180: {'lr': 6.627565693475071e-05, 'samples': 19548672, 'steps': 38180, 'batch_loss/train': 0.7554416321218014} +12/27/2021 23:51:21 - INFO - codeparrot_training - Step 38181: {'lr': 6.626496708385646e-05, 'samples': 19549184, 'steps': 38181, 'batch_loss/train': 0.717901112511754} +12/27/2021 23:51:31 - INFO - codeparrot_training - Step 38182: {'lr': 6.625427796342335e-05, 'samples': 19549696, 'steps': 38182, 'batch_loss/train': 0.756148673593998} +12/27/2021 23:51:42 - INFO - codeparrot_training - Step 38183: {'lr': 6.624358957349386e-05, 'samples': 19550208, 'steps': 38183, 'batch_loss/train': 0.7061813119798899} +12/27/2021 23:51:54 - INFO - codeparrot_training - Step 38184: {'lr': 6.623290191411047e-05, 'samples': 19550720, 'steps': 38184, 'batch_loss/train': 0.6967371630016714} +12/27/2021 23:52:05 - INFO - codeparrot_training - Step 38185: {'lr': 6.622221498531566e-05, 'samples': 19551232, 'steps': 38185, 'batch_loss/train': 0.7380496421828866} +12/27/2021 23:52:15 - INFO - codeparrot_training - Step 38186: {'lr': 6.621152878715192e-05, 'samples': 19551744, 'steps': 38186, 'batch_loss/train': 0.6856209053657949} +12/27/2021 23:52:28 - INFO - codeparrot_training - Step 38187: {'lr': 6.620084331966183e-05, 'samples': 19552256, 'steps': 38187, 'batch_loss/train': 0.7341563836671412} +12/27/2021 23:52:38 - INFO - codeparrot_training - Step 38188: {'lr': 6.619015858288774e-05, 'samples': 19552768, 'steps': 38188, 'batch_loss/train': 0.7682216884568334} +12/27/2021 23:52:49 - INFO - codeparrot_training - Step 38189: {'lr': 6.617947457687207e-05, 'samples': 19553280, 'steps': 38189, 'batch_loss/train': 0.7565338155254722} +12/27/2021 23:53:02 - INFO - codeparrot_training - Step 38190: {'lr': 6.616879130165756e-05, 'samples': 19553792, 'steps': 38190, 'batch_loss/train': 0.7000937135890126} +12/27/2021 23:53:13 - INFO - codeparrot_training - Step 38191: {'lr': 6.615810875728645e-05, 'samples': 19554304, 'steps': 38191, 'batch_loss/train': 0.7628334967885166} +12/27/2021 23:53:24 - INFO - codeparrot_training - Step 38192: {'lr': 6.614742694380129e-05, 'samples': 19554816, 'steps': 38192, 'batch_loss/train': 0.6169973557116464} +12/27/2021 23:53:34 - INFO - codeparrot_training - Step 38193: {'lr': 6.613674586124454e-05, 'samples': 19555328, 'steps': 38193, 'batch_loss/train': 0.6988013100926764} +12/27/2021 23:53:46 - INFO - codeparrot_training - Step 38194: {'lr': 6.612606550965866e-05, 'samples': 19555840, 'steps': 38194, 'batch_loss/train': 0.6250166965182871} +12/27/2021 23:53:57 - INFO - codeparrot_training - Step 38195: {'lr': 6.61153858890861e-05, 'samples': 19556352, 'steps': 38195, 'batch_loss/train': 0.7993985074572265} +12/27/2021 23:54:08 - INFO - codeparrot_training - Step 38196: {'lr': 6.610470699956937e-05, 'samples': 19556864, 'steps': 38196, 'batch_loss/train': 0.7396250439342111} +12/27/2021 23:54:22 - INFO - codeparrot_training - Step 38197: {'lr': 6.609402884115087e-05, 'samples': 19557376, 'steps': 38197, 'batch_loss/train': 0.6713492607232183} +12/27/2021 23:54:32 - INFO - codeparrot_training - Step 38198: {'lr': 6.608335141387306e-05, 'samples': 19557888, 'steps': 38198, 'batch_loss/train': 0.7010870794765651} +12/27/2021 23:54:43 - INFO - codeparrot_training - Step 38199: {'lr': 6.607267471777847e-05, 'samples': 19558400, 'steps': 38199, 'batch_loss/train': 0.8469199473038316} +12/27/2021 23:54:55 - INFO - codeparrot_training - Step 38200: {'lr': 6.606199875290936e-05, 'samples': 19558912, 'steps': 38200, 'batch_loss/train': 0.7403491870500147} +12/27/2021 23:55:06 - INFO - codeparrot_training - Step 38201: {'lr': 6.605132351930834e-05, 'samples': 19559424, 'steps': 38201, 'batch_loss/train': 0.8122623143717647} +12/27/2021 23:55:16 - INFO - codeparrot_training - Step 38202: {'lr': 6.60406490170179e-05, 'samples': 19559936, 'steps': 38202, 'batch_loss/train': 0.7378783435560763} +12/27/2021 23:55:27 - INFO - codeparrot_training - Step 38203: {'lr': 6.602997524608026e-05, 'samples': 19560448, 'steps': 38203, 'batch_loss/train': 0.718228334793821} +12/27/2021 23:55:40 - INFO - codeparrot_training - Step 38204: {'lr': 6.601930220653801e-05, 'samples': 19560960, 'steps': 38204, 'batch_loss/train': 0.7608047695830464} +12/27/2021 23:55:50 - INFO - codeparrot_training - Step 38205: {'lr': 6.600862989843354e-05, 'samples': 19561472, 'steps': 38205, 'batch_loss/train': 0.8052284466102719} +12/27/2021 23:56:01 - INFO - codeparrot_training - Step 38206: {'lr': 6.599795832180928e-05, 'samples': 19561984, 'steps': 38206, 'batch_loss/train': 0.5075615612440743} +12/27/2021 23:56:13 - INFO - codeparrot_training - Step 38207: {'lr': 6.598728747670766e-05, 'samples': 19562496, 'steps': 38207, 'batch_loss/train': 0.7177411688026041} +12/27/2021 23:56:24 - INFO - codeparrot_training - Step 38208: {'lr': 6.597661736317112e-05, 'samples': 19563008, 'steps': 38208, 'batch_loss/train': 0.7854504412971437} +12/27/2021 23:56:35 - INFO - codeparrot_training - Step 38209: {'lr': 6.596594798124206e-05, 'samples': 19563520, 'steps': 38209, 'batch_loss/train': 0.6064290511421859} +12/27/2021 23:56:49 - INFO - codeparrot_training - Step 38210: {'lr': 6.595527933096288e-05, 'samples': 19564032, 'steps': 38210, 'batch_loss/train': 0.6457908172160387} +12/27/2021 23:57:00 - INFO - codeparrot_training - Step 38211: {'lr': 6.594461141237604e-05, 'samples': 19564544, 'steps': 38211, 'batch_loss/train': 0.615154932718724} +12/27/2021 23:57:11 - INFO - codeparrot_training - Step 38212: {'lr': 6.593394422552391e-05, 'samples': 19565056, 'steps': 38212, 'batch_loss/train': 0.7028532791882753} +12/27/2021 23:57:21 - INFO - codeparrot_training - Step 38213: {'lr': 6.592327777044902e-05, 'samples': 19565568, 'steps': 38213, 'batch_loss/train': 0.7196987282950431} +12/27/2021 23:57:33 - INFO - codeparrot_training - Step 38214: {'lr': 6.591261204719357e-05, 'samples': 19566080, 'steps': 38214, 'batch_loss/train': 0.7358425405691378} +12/27/2021 23:57:44 - INFO - codeparrot_training - Step 38215: {'lr': 6.59019470558e-05, 'samples': 19566592, 'steps': 38215, 'batch_loss/train': 0.7633813864085823} +12/27/2021 23:57:54 - INFO - codeparrot_training - Step 38216: {'lr': 6.589128279631093e-05, 'samples': 19567104, 'steps': 38216, 'batch_loss/train': 0.7792551862075925} +12/27/2021 23:58:08 - INFO - codeparrot_training - Step 38217: {'lr': 6.588061926876851e-05, 'samples': 19567616, 'steps': 38217, 'batch_loss/train': 0.9625776652246714} +12/27/2021 23:58:19 - INFO - codeparrot_training - Step 38218: {'lr': 6.586995647321525e-05, 'samples': 19568128, 'steps': 38218, 'batch_loss/train': 0.7419016245985404} +12/27/2021 23:58:30 - INFO - codeparrot_training - Step 38219: {'lr': 6.585929440969352e-05, 'samples': 19568640, 'steps': 38219, 'batch_loss/train': 0.6653996298555285} +12/27/2021 23:58:42 - INFO - codeparrot_training - Step 38220: {'lr': 6.584863307824568e-05, 'samples': 19569152, 'steps': 38220, 'batch_loss/train': 0.8016148703172803} +12/27/2021 23:58:53 - INFO - codeparrot_training - Step 38221: {'lr': 6.583797247891416e-05, 'samples': 19569664, 'steps': 38221, 'batch_loss/train': 0.6407985994592309} +12/27/2021 23:59:04 - INFO - codeparrot_training - Step 38222: {'lr': 6.582731261174132e-05, 'samples': 19570176, 'steps': 38222, 'batch_loss/train': 0.7170037301257253} +12/27/2021 23:59:14 - INFO - codeparrot_training - Step 38223: {'lr': 6.581665347676954e-05, 'samples': 19570688, 'steps': 38223, 'batch_loss/train': 0.9801486814394593} +12/27/2021 23:59:26 - INFO - codeparrot_training - Step 38224: {'lr': 6.580599507404123e-05, 'samples': 19571200, 'steps': 38224, 'batch_loss/train': 0.692960518412292} +12/27/2021 23:59:37 - INFO - codeparrot_training - Step 38225: {'lr': 6.579533740359877e-05, 'samples': 19571712, 'steps': 38225, 'batch_loss/train': 0.6958692679181695} +12/27/2021 23:59:48 - INFO - codeparrot_training - Step 38226: {'lr': 6.578468046548438e-05, 'samples': 19572224, 'steps': 38226, 'batch_loss/train': 0.7599851507693529} +12/28/2021 00:00:02 - INFO - codeparrot_training - Step 38227: {'lr': 6.57740242597406e-05, 'samples': 19572736, 'steps': 38227, 'batch_loss/train': 0.7430704317521304} +12/28/2021 00:00:12 - INFO - codeparrot_training - Step 38228: {'lr': 6.576336878640982e-05, 'samples': 19573248, 'steps': 38228, 'batch_loss/train': 0.7251427182927728} +12/28/2021 00:00:23 - INFO - codeparrot_training - Step 38229: {'lr': 6.575271404553424e-05, 'samples': 19573760, 'steps': 38229, 'batch_loss/train': 0.7160039134323597} +12/28/2021 00:00:34 - INFO - codeparrot_training - Step 38230: {'lr': 6.574206003715622e-05, 'samples': 19574272, 'steps': 38230, 'batch_loss/train': 0.8019186542369425} +12/28/2021 00:00:46 - INFO - codeparrot_training - Step 38231: {'lr': 6.573140676131836e-05, 'samples': 19574784, 'steps': 38231, 'batch_loss/train': 0.676960319513455} +12/28/2021 00:00:56 - INFO - codeparrot_training - Step 38232: {'lr': 6.572075421806276e-05, 'samples': 19575296, 'steps': 38232, 'batch_loss/train': 0.8609317666850984} +12/28/2021 00:01:07 - INFO - codeparrot_training - Step 38233: {'lr': 6.571010240743187e-05, 'samples': 19575808, 'steps': 38233, 'batch_loss/train': 0.6560929785482585} +12/28/2021 00:01:19 - INFO - codeparrot_training - Step 38234: {'lr': 6.569945132946803e-05, 'samples': 19576320, 'steps': 38234, 'batch_loss/train': 0.7261673368047923} +12/28/2021 00:01:30 - INFO - codeparrot_training - Step 38235: {'lr': 6.568880098421357e-05, 'samples': 19576832, 'steps': 38235, 'batch_loss/train': 0.7078453821595758} +12/28/2021 00:01:41 - INFO - codeparrot_training - Step 38236: {'lr': 6.567815137171085e-05, 'samples': 19577344, 'steps': 38236, 'batch_loss/train': 0.8043895922601223} +12/28/2021 00:01:55 - INFO - codeparrot_training - Step 38237: {'lr': 6.566750249200221e-05, 'samples': 19577856, 'steps': 38237, 'batch_loss/train': 0.7276739794760942} +12/28/2021 00:02:05 - INFO - codeparrot_training - Step 38238: {'lr': 6.565685434512997e-05, 'samples': 19578368, 'steps': 38238, 'batch_loss/train': 0.700192992342636} +12/28/2021 00:02:16 - INFO - codeparrot_training - Step 38239: {'lr': 6.564620693113647e-05, 'samples': 19578880, 'steps': 38239, 'batch_loss/train': 0.6810625027865171} +12/28/2021 00:02:27 - INFO - codeparrot_training - Step 38240: {'lr': 6.563556025006412e-05, 'samples': 19579392, 'steps': 38240, 'batch_loss/train': 0.7867755638435483} +12/28/2021 00:02:39 - INFO - codeparrot_training - Step 38241: {'lr': 6.562491430195502e-05, 'samples': 19579904, 'steps': 38241, 'batch_loss/train': 0.6703734518960118} +12/28/2021 00:02:49 - INFO - codeparrot_training - Step 38242: {'lr': 6.561426908685179e-05, 'samples': 19580416, 'steps': 38242, 'batch_loss/train': 0.7245019087567925} +12/28/2021 00:03:00 - INFO - codeparrot_training - Step 38243: {'lr': 6.560362460479654e-05, 'samples': 19580928, 'steps': 38243, 'batch_loss/train': 0.7434597158571705} +12/28/2021 00:03:12 - INFO - codeparrot_training - Step 38244: {'lr': 6.559298085583157e-05, 'samples': 19581440, 'steps': 38244, 'batch_loss/train': 0.7909061699174345} +12/28/2021 00:03:23 - INFO - codeparrot_training - Step 38245: {'lr': 6.558233783999942e-05, 'samples': 19581952, 'steps': 38245, 'batch_loss/train': 0.7355686957016587} +12/28/2021 00:03:33 - INFO - codeparrot_training - Step 38246: {'lr': 6.557169555734222e-05, 'samples': 19582464, 'steps': 38246, 'batch_loss/train': 0.8203853573650122} +12/28/2021 00:03:47 - INFO - codeparrot_training - Step 38247: {'lr': 6.556105400790227e-05, 'samples': 19582976, 'steps': 38247, 'batch_loss/train': 0.7661723620258272} +12/28/2021 00:03:58 - INFO - codeparrot_training - Step 38248: {'lr': 6.555041319172197e-05, 'samples': 19583488, 'steps': 38248, 'batch_loss/train': 0.7475724825635552} +12/28/2021 00:04:08 - INFO - codeparrot_training - Step 38249: {'lr': 6.553977310884357e-05, 'samples': 19584000, 'steps': 38249, 'batch_loss/train': 0.822636166587472} +12/28/2021 00:04:20 - INFO - codeparrot_training - Step 38250: {'lr': 6.552913375930939e-05, 'samples': 19584512, 'steps': 38250, 'batch_loss/train': 0.6913166884332895} +12/28/2021 00:04:31 - INFO - codeparrot_training - Step 38251: {'lr': 6.551849514316177e-05, 'samples': 19585024, 'steps': 38251, 'batch_loss/train': 0.7323337830603123} +12/28/2021 00:04:42 - INFO - codeparrot_training - Step 38252: {'lr': 6.550785726044282e-05, 'samples': 19585536, 'steps': 38252, 'batch_loss/train': 0.8059500241652131} +12/28/2021 00:04:52 - INFO - codeparrot_training - Step 38253: {'lr': 6.549722011119504e-05, 'samples': 19586048, 'steps': 38253, 'batch_loss/train': 0.8417808641679585} +12/28/2021 00:05:05 - INFO - codeparrot_training - Step 38254: {'lr': 6.548658369546073e-05, 'samples': 19586560, 'steps': 38254, 'batch_loss/train': 0.7861618297174573} +12/28/2021 00:05:15 - INFO - codeparrot_training - Step 38255: {'lr': 6.547594801328202e-05, 'samples': 19587072, 'steps': 38255, 'batch_loss/train': 0.8468660106882453} +12/28/2021 00:05:26 - INFO - codeparrot_training - Step 38256: {'lr': 6.546531306470118e-05, 'samples': 19587584, 'steps': 38256, 'batch_loss/train': 0.7944851126521826} +12/28/2021 00:05:40 - INFO - codeparrot_training - Step 38257: {'lr': 6.545467884976075e-05, 'samples': 19588096, 'steps': 38257, 'batch_loss/train': 0.7551068705506623} +12/28/2021 00:05:51 - INFO - codeparrot_training - Step 38258: {'lr': 6.544404536850273e-05, 'samples': 19588608, 'steps': 38258, 'batch_loss/train': 0.5950392212253064} +12/28/2021 00:06:01 - INFO - codeparrot_training - Step 38259: {'lr': 6.543341262096952e-05, 'samples': 19589120, 'steps': 38259, 'batch_loss/train': 0.6199569408781826} +12/28/2021 00:06:12 - INFO - codeparrot_training - Step 38260: {'lr': 6.542278060720336e-05, 'samples': 19589632, 'steps': 38260, 'batch_loss/train': 0.7204940365627408} +12/28/2021 00:06:24 - INFO - codeparrot_training - Step 38261: {'lr': 6.541214932724653e-05, 'samples': 19590144, 'steps': 38261, 'batch_loss/train': 0.7649514614604414} +12/28/2021 00:06:35 - INFO - codeparrot_training - Step 38262: {'lr': 6.540151878114132e-05, 'samples': 19590656, 'steps': 38262, 'batch_loss/train': 0.7119155586697161} +12/28/2021 00:06:45 - INFO - codeparrot_training - Step 38263: {'lr': 6.539088896892994e-05, 'samples': 19591168, 'steps': 38263, 'batch_loss/train': 0.39729731815168634} +12/28/2021 00:06:57 - INFO - codeparrot_training - Step 38264: {'lr': 6.538025989065472e-05, 'samples': 19591680, 'steps': 38264, 'batch_loss/train': 0.7150715626776218} +12/28/2021 00:07:08 - INFO - codeparrot_training - Step 38265: {'lr': 6.536963154635783e-05, 'samples': 19592192, 'steps': 38265, 'batch_loss/train': 0.8113149646669626} +12/28/2021 00:07:19 - INFO - codeparrot_training - Step 38266: {'lr': 6.535900393608168e-05, 'samples': 19592704, 'steps': 38266, 'batch_loss/train': 0.7865096526220441} +12/28/2021 00:07:33 - INFO - codeparrot_training - Step 38267: {'lr': 6.534837705986826e-05, 'samples': 19593216, 'steps': 38267, 'batch_loss/train': 0.7727668462321162} +12/28/2021 00:07:43 - INFO - codeparrot_training - Step 38268: {'lr': 6.533775091776009e-05, 'samples': 19593728, 'steps': 38268, 'batch_loss/train': 0.8263084962964058} +12/28/2021 00:07:54 - INFO - codeparrot_training - Step 38269: {'lr': 6.532712550979924e-05, 'samples': 19594240, 'steps': 38269, 'batch_loss/train': 0.7259215348167345} +12/28/2021 00:08:06 - INFO - codeparrot_training - Step 38270: {'lr': 6.531650083602795e-05, 'samples': 19594752, 'steps': 38270, 'batch_loss/train': 0.9146595895290375} +12/28/2021 00:08:17 - INFO - codeparrot_training - Step 38271: {'lr': 6.530587689648865e-05, 'samples': 19595264, 'steps': 38271, 'batch_loss/train': 0.67665906669572} +12/28/2021 00:08:27 - INFO - codeparrot_training - Step 38272: {'lr': 6.529525369122339e-05, 'samples': 19595776, 'steps': 38272, 'batch_loss/train': 0.7706516465987079} +12/28/2021 00:08:38 - INFO - codeparrot_training - Step 38273: {'lr': 6.528463122027445e-05, 'samples': 19596288, 'steps': 38273, 'batch_loss/train': 0.7459305040538311} +12/28/2021 00:08:52 - INFO - codeparrot_training - Step 38274: {'lr': 6.527400948368406e-05, 'samples': 19596800, 'steps': 38274, 'batch_loss/train': 0.7296036505140364} +12/28/2021 00:09:03 - INFO - codeparrot_training - Step 38275: {'lr': 6.526338848149447e-05, 'samples': 19597312, 'steps': 38275, 'batch_loss/train': 0.7390027069486678} +12/28/2021 00:09:13 - INFO - codeparrot_training - Step 38276: {'lr': 6.525276821374787e-05, 'samples': 19597824, 'steps': 38276, 'batch_loss/train': 0.679428106173873} +12/28/2021 00:09:26 - INFO - codeparrot_training - Step 38277: {'lr': 6.52421486804866e-05, 'samples': 19598336, 'steps': 38277, 'batch_loss/train': 0.7297203075140715} +12/28/2021 00:09:36 - INFO - codeparrot_training - Step 38278: {'lr': 6.523152988175262e-05, 'samples': 19598848, 'steps': 38278, 'batch_loss/train': 0.7505086148157716} +12/28/2021 00:09:47 - INFO - codeparrot_training - Step 38279: {'lr': 6.522091181758841e-05, 'samples': 19599360, 'steps': 38279, 'batch_loss/train': 1.1043040258809924} +12/28/2021 00:09:59 - INFO - codeparrot_training - Step 38280: {'lr': 6.521029448803616e-05, 'samples': 19599872, 'steps': 38280, 'batch_loss/train': 0.7818784039700404} +12/28/2021 00:10:10 - INFO - codeparrot_training - Step 38281: {'lr': 6.519967789313786e-05, 'samples': 19600384, 'steps': 38281, 'batch_loss/train': 0.8457827370148152} +12/28/2021 00:10:20 - INFO - codeparrot_training - Step 38282: {'lr': 6.518906203293589e-05, 'samples': 19600896, 'steps': 38282, 'batch_loss/train': 0.7525841114111245} +12/28/2021 00:10:31 - INFO - codeparrot_training - Step 38283: {'lr': 6.517844690747254e-05, 'samples': 19601408, 'steps': 38283, 'batch_loss/train': 0.7965667298994958} +12/28/2021 00:10:43 - INFO - codeparrot_training - Step 38284: {'lr': 6.516783251678985e-05, 'samples': 19601920, 'steps': 38284, 'batch_loss/train': 0.8583795625017956} +12/28/2021 00:10:53 - INFO - codeparrot_training - Step 38285: {'lr': 6.515721886093002e-05, 'samples': 19602432, 'steps': 38285, 'batch_loss/train': 0.6461964971385896} +12/28/2021 00:11:04 - INFO - codeparrot_training - Step 38286: {'lr': 6.514660593993532e-05, 'samples': 19602944, 'steps': 38286, 'batch_loss/train': 0.7654438498429954} +12/28/2021 00:11:18 - INFO - codeparrot_training - Step 38287: {'lr': 6.513599375384791e-05, 'samples': 19603456, 'steps': 38287, 'batch_loss/train': 0.7677399488165975} +12/28/2021 00:11:29 - INFO - codeparrot_training - Step 38288: {'lr': 6.512538230270998e-05, 'samples': 19603968, 'steps': 38288, 'batch_loss/train': 0.7322713094763458} +12/28/2021 00:11:39 - INFO - codeparrot_training - Step 38289: {'lr': 6.511477158656373e-05, 'samples': 19604480, 'steps': 38289, 'batch_loss/train': 0.7068144190125167} +12/28/2021 00:11:51 - INFO - codeparrot_training - Step 38290: {'lr': 6.510416160545132e-05, 'samples': 19604992, 'steps': 38290, 'batch_loss/train': 0.8023194121196866} +12/28/2021 00:12:02 - INFO - codeparrot_training - Step 38291: {'lr': 6.509355235941497e-05, 'samples': 19605504, 'steps': 38291, 'batch_loss/train': 0.7940723514184356} +12/28/2021 00:12:12 - INFO - codeparrot_training - Step 38292: {'lr': 6.508294384849692e-05, 'samples': 19606016, 'steps': 38292, 'batch_loss/train': 0.7273894269019365} +12/28/2021 00:12:23 - INFO - codeparrot_training - Step 38293: {'lr': 6.50723360727391e-05, 'samples': 19606528, 'steps': 38293, 'batch_loss/train': 0.7589011201635003} +12/28/2021 00:12:35 - INFO - codeparrot_training - Step 38294: {'lr': 6.506172903218399e-05, 'samples': 19607040, 'steps': 38294, 'batch_loss/train': 0.681872146204114} +12/28/2021 00:12:46 - INFO - codeparrot_training - Step 38295: {'lr': 6.505112272687356e-05, 'samples': 19607552, 'steps': 38295, 'batch_loss/train': 0.7638281132094562} +12/28/2021 00:12:57 - INFO - codeparrot_training - Step 38296: {'lr': 6.504051715684994e-05, 'samples': 19608064, 'steps': 38296, 'batch_loss/train': 0.7066401953343302} +12/28/2021 00:13:11 - INFO - codeparrot_training - Step 38297: {'lr': 6.502991232215554e-05, 'samples': 19608576, 'steps': 38297, 'batch_loss/train': 0.7373089231550694} +12/28/2021 00:13:21 - INFO - codeparrot_training - Step 38298: {'lr': 6.50193082228323e-05, 'samples': 19609088, 'steps': 38298, 'batch_loss/train': 0.6641963459551334} +12/28/2021 00:13:32 - INFO - codeparrot_training - Step 38299: {'lr': 6.500870485892243e-05, 'samples': 19609600, 'steps': 38299, 'batch_loss/train': 0.7542337737977505} +12/28/2021 00:13:44 - INFO - codeparrot_training - Step 38300: {'lr': 6.499810223046813e-05, 'samples': 19610112, 'steps': 38300, 'batch_loss/train': 0.659111617016606} +12/28/2021 00:13:55 - INFO - codeparrot_training - Step 38301: {'lr': 6.49875003375115e-05, 'samples': 19610624, 'steps': 38301, 'batch_loss/train': 0.8163775624707341} +12/28/2021 00:14:05 - INFO - codeparrot_training - Step 38302: {'lr': 6.497689918009469e-05, 'samples': 19611136, 'steps': 38302, 'batch_loss/train': 0.7661019610241055} +12/28/2021 00:14:16 - INFO - codeparrot_training - Step 38303: {'lr': 6.496629875825997e-05, 'samples': 19611648, 'steps': 38303, 'batch_loss/train': 0.8495402345433831} +12/28/2021 00:14:30 - INFO - codeparrot_training - Step 38304: {'lr': 6.495569907204924e-05, 'samples': 19612160, 'steps': 38304, 'batch_loss/train': 0.7801128569990396} +12/28/2021 00:14:40 - INFO - codeparrot_training - Step 38305: {'lr': 6.494510012150482e-05, 'samples': 19612672, 'steps': 38305, 'batch_loss/train': 0.841670008841902} +12/28/2021 00:14:51 - INFO - codeparrot_training - Step 38306: {'lr': 6.493450190666891e-05, 'samples': 19613184, 'steps': 38306, 'batch_loss/train': 0.7830355037003756} +12/28/2021 00:15:03 - INFO - codeparrot_training - Step 38307: {'lr': 6.492390442758336e-05, 'samples': 19613696, 'steps': 38307, 'batch_loss/train': 0.6977011598646641} +12/28/2021 00:15:14 - INFO - codeparrot_training - Step 38308: {'lr': 6.49133076842906e-05, 'samples': 19614208, 'steps': 38308, 'batch_loss/train': 0.806927002966404} +12/28/2021 00:15:25 - INFO - codeparrot_training - Step 38309: {'lr': 6.490271167683271e-05, 'samples': 19614720, 'steps': 38309, 'batch_loss/train': 0.7385310009121895} +12/28/2021 00:15:37 - INFO - codeparrot_training - Step 38310: {'lr': 6.489211640525164e-05, 'samples': 19615232, 'steps': 38310, 'batch_loss/train': 0.7425264166668057} +12/28/2021 00:15:48 - INFO - codeparrot_training - Step 38311: {'lr': 6.488152186958957e-05, 'samples': 19615744, 'steps': 38311, 'batch_loss/train': 0.7783099012449384} +12/28/2021 00:15:58 - INFO - codeparrot_training - Step 38312: {'lr': 6.487092806988884e-05, 'samples': 19616256, 'steps': 38312, 'batch_loss/train': 0.7977734599262476} +12/28/2021 00:16:09 - INFO - codeparrot_training - Step 38313: {'lr': 6.486033500619128e-05, 'samples': 19616768, 'steps': 38313, 'batch_loss/train': 0.7662871675565839} +12/28/2021 00:16:22 - INFO - codeparrot_training - Step 38314: {'lr': 6.484974267853915e-05, 'samples': 19617280, 'steps': 38314, 'batch_loss/train': 0.8042777333175763} +12/28/2021 00:16:33 - INFO - codeparrot_training - Step 38315: {'lr': 6.483915108697452e-05, 'samples': 19617792, 'steps': 38315, 'batch_loss/train': 0.7805380839854479} +12/28/2021 00:16:43 - INFO - codeparrot_training - Step 38316: {'lr': 6.482856023153954e-05, 'samples': 19618304, 'steps': 38316, 'batch_loss/train': 0.6925967128481716} +12/28/2021 00:16:55 - INFO - codeparrot_training - Step 38317: {'lr': 6.481797011227625e-05, 'samples': 19618816, 'steps': 38317, 'batch_loss/train': 0.6103768647881225} +12/28/2021 00:17:06 - INFO - codeparrot_training - Step 38318: {'lr': 6.480738072922682e-05, 'samples': 19619328, 'steps': 38318, 'batch_loss/train': 0.7465503020212054} +12/28/2021 00:17:17 - INFO - codeparrot_training - Step 38319: {'lr': 6.47967920824333e-05, 'samples': 19619840, 'steps': 38319, 'batch_loss/train': 0.8305830415338278} +12/28/2021 00:17:29 - INFO - codeparrot_training - Step 38320: {'lr': 6.47862041719378e-05, 'samples': 19620352, 'steps': 38320, 'batch_loss/train': 0.7853557167109102} +12/28/2021 00:17:39 - INFO - codeparrot_training - Step 38321: {'lr': 6.47756169977825e-05, 'samples': 19620864, 'steps': 38321, 'batch_loss/train': 0.5991459025535733} +12/28/2021 00:17:50 - INFO - codeparrot_training - Step 38322: {'lr': 6.476503056000927e-05, 'samples': 19621376, 'steps': 38322, 'batch_loss/train': 1.0907602738589048} +12/28/2021 00:18:01 - INFO - codeparrot_training - Step 38323: {'lr': 6.475444485866047e-05, 'samples': 19621888, 'steps': 38323, 'batch_loss/train': 0.7803448652848601} +12/28/2021 00:18:13 - INFO - codeparrot_training - Step 38324: {'lr': 6.474385989377798e-05, 'samples': 19622400, 'steps': 38324, 'batch_loss/train': 0.7460453528910875} +12/28/2021 00:18:23 - INFO - codeparrot_training - Step 38325: {'lr': 6.473327566540396e-05, 'samples': 19622912, 'steps': 38325, 'batch_loss/train': 0.7651059399358928} +12/28/2021 00:18:34 - INFO - codeparrot_training - Step 38326: {'lr': 6.472269217358048e-05, 'samples': 19623424, 'steps': 38326, 'batch_loss/train': 0.7672331392532215} +12/28/2021 00:18:48 - INFO - codeparrot_training - Step 38327: {'lr': 6.47121094183496e-05, 'samples': 19623936, 'steps': 38327, 'batch_loss/train': 0.8058913256973028} +12/28/2021 00:18:58 - INFO - codeparrot_training - Step 38328: {'lr': 6.470152739975343e-05, 'samples': 19624448, 'steps': 38328, 'batch_loss/train': 0.7495194706134498} +12/28/2021 00:19:09 - INFO - codeparrot_training - Step 38329: {'lr': 6.469094611783411e-05, 'samples': 19624960, 'steps': 38329, 'batch_loss/train': 0.6307265651412308} +12/28/2021 00:19:21 - INFO - codeparrot_training - Step 38330: {'lr': 6.468036557263343e-05, 'samples': 19625472, 'steps': 38330, 'batch_loss/train': 0.7361193988472223} +12/28/2021 00:19:32 - INFO - codeparrot_training - Step 38331: {'lr': 6.466978576419374e-05, 'samples': 19625984, 'steps': 38331, 'batch_loss/train': 0.7281195761752315} +12/28/2021 00:19:42 - INFO - codeparrot_training - Step 38332: {'lr': 6.465920669255709e-05, 'samples': 19626496, 'steps': 38332, 'batch_loss/train': 0.7042256176937371} +12/28/2021 00:19:53 - INFO - codeparrot_training - Step 38333: {'lr': 6.464862835776528e-05, 'samples': 19627008, 'steps': 38333, 'batch_loss/train': 0.6992180674569681} +12/28/2021 00:20:05 - INFO - codeparrot_training - Step 38334: {'lr': 6.463805075986062e-05, 'samples': 19627520, 'steps': 38334, 'batch_loss/train': 0.7122929248143919} +12/28/2021 00:20:15 - INFO - codeparrot_training - Step 38335: {'lr': 6.462747389888516e-05, 'samples': 19628032, 'steps': 38335, 'batch_loss/train': 0.7623554370948114} +12/28/2021 00:20:26 - INFO - codeparrot_training - Step 38336: {'lr': 6.461689777488078e-05, 'samples': 19628544, 'steps': 38336, 'batch_loss/train': 0.6748537914827466} +12/28/2021 00:20:40 - INFO - codeparrot_training - Step 38337: {'lr': 6.460632238788958e-05, 'samples': 19629056, 'steps': 38337, 'batch_loss/train': 0.7327154751401395} +12/28/2021 00:20:51 - INFO - codeparrot_training - Step 38338: {'lr': 6.459574773795374e-05, 'samples': 19629568, 'steps': 38338, 'batch_loss/train': 0.740249081980437} +12/28/2021 00:21:01 - INFO - codeparrot_training - Step 38339: {'lr': 6.458517382511517e-05, 'samples': 19630080, 'steps': 38339, 'batch_loss/train': 0.7761305519379675} +12/28/2021 00:21:13 - INFO - codeparrot_training - Step 38340: {'lr': 6.457460064941589e-05, 'samples': 19630592, 'steps': 38340, 'batch_loss/train': 0.7885264130309224} +12/28/2021 00:21:24 - INFO - codeparrot_training - Step 38341: {'lr': 6.456402821089801e-05, 'samples': 19631104, 'steps': 38341, 'batch_loss/train': 0.6643819874152541} +12/28/2021 00:21:35 - INFO - codeparrot_training - Step 38342: {'lr': 6.455345650960356e-05, 'samples': 19631616, 'steps': 38342, 'batch_loss/train': 0.7686267375247553} +12/28/2021 00:21:45 - INFO - codeparrot_training - Step 38343: {'lr': 6.454288554557453e-05, 'samples': 19632128, 'steps': 38343, 'batch_loss/train': 0.7542864098213613} +12/28/2021 00:21:59 - INFO - codeparrot_training - Step 38344: {'lr': 6.453231531885295e-05, 'samples': 19632640, 'steps': 38344, 'batch_loss/train': 0.8060074700042605} +12/28/2021 00:22:10 - INFO - codeparrot_training - Step 38345: {'lr': 6.452174582948083e-05, 'samples': 19633152, 'steps': 38345, 'batch_loss/train': 0.7734018703922629} +12/28/2021 00:22:20 - INFO - codeparrot_training - Step 38346: {'lr': 6.451117707750027e-05, 'samples': 19633664, 'steps': 38346, 'batch_loss/train': 0.806265520513989} +12/28/2021 00:22:32 - INFO - codeparrot_training - Step 38347: {'lr': 6.450060906295327e-05, 'samples': 19634176, 'steps': 38347, 'batch_loss/train': 0.8250538082793355} +12/28/2021 00:22:43 - INFO - codeparrot_training - Step 38348: {'lr': 6.449004178588167e-05, 'samples': 19634688, 'steps': 38348, 'batch_loss/train': 0.7327384087257087} +12/28/2021 00:22:54 - INFO - codeparrot_training - Step 38349: {'lr': 6.447947524632777e-05, 'samples': 19635200, 'steps': 38349, 'batch_loss/train': 0.7325956334825605} +12/28/2021 00:23:06 - INFO - codeparrot_training - Step 38350: {'lr': 6.446890944433334e-05, 'samples': 19635712, 'steps': 38350, 'batch_loss/train': 0.740712609142065} +12/28/2021 00:23:16 - INFO - codeparrot_training - Step 38351: {'lr': 6.445834437994047e-05, 'samples': 19636224, 'steps': 38351, 'batch_loss/train': 0.7457031607627869} +12/28/2021 00:23:27 - INFO - codeparrot_training - Step 38352: {'lr': 6.444778005319119e-05, 'samples': 19636736, 'steps': 38352, 'batch_loss/train': 0.8725676694884896} +12/28/2021 00:23:41 - INFO - codeparrot_training - Step 38353: {'lr': 6.443721646412745e-05, 'samples': 19637248, 'steps': 38353, 'batch_loss/train': 0.6855516275390983} +12/28/2021 00:23:52 - INFO - codeparrot_training - Step 38354: {'lr': 6.442665361279129e-05, 'samples': 19637760, 'steps': 38354, 'batch_loss/train': 0.4789983533555642} +12/28/2021 00:24:02 - INFO - codeparrot_training - Step 38355: {'lr': 6.441609149922468e-05, 'samples': 19638272, 'steps': 38355, 'batch_loss/train': 0.7945847045630217} +12/28/2021 00:24:13 - INFO - codeparrot_training - Step 38356: {'lr': 6.440553012346961e-05, 'samples': 19638784, 'steps': 38356, 'batch_loss/train': 0.7204820830374956} +12/28/2021 00:24:25 - INFO - codeparrot_training - Step 38357: {'lr': 6.439496948556809e-05, 'samples': 19639296, 'steps': 38357, 'batch_loss/train': 0.7125729853287339} +12/28/2021 00:24:36 - INFO - codeparrot_training - Step 38358: {'lr': 6.438440958556213e-05, 'samples': 19639808, 'steps': 38358, 'batch_loss/train': 0.7362029834184796} +12/28/2021 00:24:46 - INFO - codeparrot_training - Step 38359: {'lr': 6.437385042349356e-05, 'samples': 19640320, 'steps': 38359, 'batch_loss/train': 0.671988049056381} +12/28/2021 00:24:58 - INFO - codeparrot_training - Step 38360: {'lr': 6.436329199940452e-05, 'samples': 19640832, 'steps': 38360, 'batch_loss/train': 0.7985797375440598} +12/28/2021 00:25:09 - INFO - codeparrot_training - Step 38361: {'lr': 6.435273431333702e-05, 'samples': 19641344, 'steps': 38361, 'batch_loss/train': 0.7597738000331447} +12/28/2021 00:25:20 - INFO - codeparrot_training - Step 38362: {'lr': 6.434217736533287e-05, 'samples': 19641856, 'steps': 38362, 'batch_loss/train': 0.6770162108587101} +12/28/2021 00:25:32 - INFO - codeparrot_training - Step 38363: {'lr': 6.433162115543406e-05, 'samples': 19642368, 'steps': 38363, 'batch_loss/train': 0.7052792422473431} +12/28/2021 00:25:43 - INFO - codeparrot_training - Step 38364: {'lr': 6.432106568368276e-05, 'samples': 19642880, 'steps': 38364, 'batch_loss/train': 0.8148827631957829} +12/28/2021 00:25:53 - INFO - codeparrot_training - Step 38365: {'lr': 6.431051095012073e-05, 'samples': 19643392, 'steps': 38365, 'batch_loss/train': 0.7686430052854121} +12/28/2021 00:26:04 - INFO - codeparrot_training - Step 38366: {'lr': 6.429995695478996e-05, 'samples': 19643904, 'steps': 38366, 'batch_loss/train': 0.7403858588077128} +12/28/2021 00:26:18 - INFO - codeparrot_training - Step 38367: {'lr': 6.428940369773248e-05, 'samples': 19644416, 'steps': 38367, 'batch_loss/train': 0.7410845505073667} +12/28/2021 00:26:28 - INFO - codeparrot_training - Step 38368: {'lr': 6.427885117899019e-05, 'samples': 19644928, 'steps': 38368, 'batch_loss/train': 0.7493040142580867} +12/28/2021 00:26:39 - INFO - codeparrot_training - Step 38369: {'lr': 6.426829939860507e-05, 'samples': 19645440, 'steps': 38369, 'batch_loss/train': 0.6543682743795216} +12/28/2021 00:26:51 - INFO - codeparrot_training - Step 38370: {'lr': 6.425774835661907e-05, 'samples': 19645952, 'steps': 38370, 'batch_loss/train': 0.693428578786552} +12/28/2021 00:27:02 - INFO - codeparrot_training - Step 38371: {'lr': 6.424719805307411e-05, 'samples': 19646464, 'steps': 38371, 'batch_loss/train': 0.9033864689990878} +12/28/2021 00:27:12 - INFO - codeparrot_training - Step 38372: {'lr': 6.423664848801216e-05, 'samples': 19646976, 'steps': 38372, 'batch_loss/train': 0.6848810724914074} +12/28/2021 00:27:26 - INFO - codeparrot_training - Step 38373: {'lr': 6.422609966147524e-05, 'samples': 19647488, 'steps': 38373, 'batch_loss/train': 0.7827931270003319} +12/28/2021 00:27:37 - INFO - codeparrot_training - Step 38374: {'lr': 6.421555157350506e-05, 'samples': 19648000, 'steps': 38374, 'batch_loss/train': 0.6135439765639603} +12/28/2021 00:27:48 - INFO - codeparrot_training - Step 38375: {'lr': 6.420500422414382e-05, 'samples': 19648512, 'steps': 38375, 'batch_loss/train': 0.65390006871894} +12/28/2021 00:27:58 - INFO - codeparrot_training - Step 38376: {'lr': 6.419445761343326e-05, 'samples': 19649024, 'steps': 38376, 'batch_loss/train': 0.7094280873425305} +12/28/2021 00:28:10 - INFO - codeparrot_training - Step 38377: {'lr': 6.418391174141539e-05, 'samples': 19649536, 'steps': 38377, 'batch_loss/train': 0.814843999221921} +12/28/2021 00:28:21 - INFO - codeparrot_training - Step 38378: {'lr': 6.417336660813214e-05, 'samples': 19650048, 'steps': 38378, 'batch_loss/train': 0.7379936659708619} +12/28/2021 00:28:31 - INFO - codeparrot_training - Step 38379: {'lr': 6.416282221362538e-05, 'samples': 19650560, 'steps': 38379, 'batch_loss/train': 0.6340129630407318} +12/28/2021 00:28:44 - INFO - codeparrot_training - Step 38380: {'lr': 6.41522785579371e-05, 'samples': 19651072, 'steps': 38380, 'batch_loss/train': 0.683839307166636} +12/28/2021 00:28:54 - INFO - codeparrot_training - Step 38381: {'lr': 6.414173564110917e-05, 'samples': 19651584, 'steps': 38381, 'batch_loss/train': 0.8094533861149102} +12/28/2021 00:29:05 - INFO - codeparrot_training - Step 38382: {'lr': 6.413119346318353e-05, 'samples': 19652096, 'steps': 38382, 'batch_loss/train': 0.7290824940428138} +12/28/2021 00:29:19 - INFO - codeparrot_training - Step 38383: {'lr': 6.412065202420209e-05, 'samples': 19652608, 'steps': 38383, 'batch_loss/train': 0.6961955849546939} +12/28/2021 00:29:29 - INFO - codeparrot_training - Step 38384: {'lr': 6.411011132420683e-05, 'samples': 19653120, 'steps': 38384, 'batch_loss/train': 0.6858328119851649} +12/28/2021 00:29:40 - INFO - codeparrot_training - Step 38385: {'lr': 6.40995713632394e-05, 'samples': 19653632, 'steps': 38385, 'batch_loss/train': 0.8461006209254265} +12/28/2021 00:29:52 - INFO - codeparrot_training - Step 38386: {'lr': 6.408903214134199e-05, 'samples': 19654144, 'steps': 38386, 'batch_loss/train': 0.7455729246139526} +12/28/2021 00:30:03 - INFO - codeparrot_training - Step 38387: {'lr': 6.407849365855645e-05, 'samples': 19654656, 'steps': 38387, 'batch_loss/train': 0.7552090603858232} +12/28/2021 00:30:14 - INFO - codeparrot_training - Step 38388: {'lr': 6.406795591492454e-05, 'samples': 19655168, 'steps': 38388, 'batch_loss/train': 0.6562322383979335} +12/28/2021 00:30:24 - INFO - codeparrot_training - Step 38389: {'lr': 6.405741891048817e-05, 'samples': 19655680, 'steps': 38389, 'batch_loss/train': 0.8091207966208458} +12/28/2021 00:30:38 - INFO - codeparrot_training - Step 38390: {'lr': 6.404688264528941e-05, 'samples': 19656192, 'steps': 38390, 'batch_loss/train': 0.748350549954921} +12/28/2021 00:30:49 - INFO - codeparrot_training - Step 38391: {'lr': 6.403634711936999e-05, 'samples': 19656704, 'steps': 38391, 'batch_loss/train': 0.8258429793640971} +12/28/2021 00:30:59 - INFO - codeparrot_training - Step 38392: {'lr': 6.402581233277174e-05, 'samples': 19657216, 'steps': 38392, 'batch_loss/train': 0.63260652354802} +12/28/2021 00:31:12 - INFO - codeparrot_training - Step 38393: {'lr': 6.40152782855368e-05, 'samples': 19657728, 'steps': 38393, 'batch_loss/train': 0.7495514452457428} +12/28/2021 00:31:22 - INFO - codeparrot_training - Step 38394: {'lr': 6.400474497770681e-05, 'samples': 19658240, 'steps': 38394, 'batch_loss/train': 0.6700763031840324} +12/28/2021 00:31:33 - INFO - codeparrot_training - Step 38395: {'lr': 6.399421240932374e-05, 'samples': 19658752, 'steps': 38395, 'batch_loss/train': 0.8013367643579841} +12/28/2021 00:31:45 - INFO - codeparrot_training - Step 38396: {'lr': 6.398368058042944e-05, 'samples': 19659264, 'steps': 38396, 'batch_loss/train': 0.7473797844722867} +12/28/2021 00:31:56 - INFO - codeparrot_training - Step 38397: {'lr': 6.397314949106579e-05, 'samples': 19659776, 'steps': 38397, 'batch_loss/train': 0.7748925001360476} +12/28/2021 00:32:06 - INFO - codeparrot_training - Step 38398: {'lr': 6.396261914127463e-05, 'samples': 19660288, 'steps': 38398, 'batch_loss/train': 0.591587239439832} +12/28/2021 00:32:17 - INFO - codeparrot_training - Step 38399: {'lr': 6.395208953109796e-05, 'samples': 19660800, 'steps': 38399, 'batch_loss/train': 0.700949290767312} +12/28/2021 00:32:29 - INFO - codeparrot_training - Step 38400: {'lr': 6.394156066057738e-05, 'samples': 19661312, 'steps': 38400, 'batch_loss/train': 0.7353537338785827} +12/28/2021 00:32:40 - INFO - codeparrot_training - Step 38401: {'lr': 6.393103252975497e-05, 'samples': 19661824, 'steps': 38401, 'batch_loss/train': 0.6307841779198498} +12/28/2021 00:32:50 - INFO - codeparrot_training - Step 38402: {'lr': 6.392050513867259e-05, 'samples': 19662336, 'steps': 38402, 'batch_loss/train': 0.7597777795162983} +12/28/2021 00:33:04 - INFO - codeparrot_training - Step 38403: {'lr': 6.390997848737193e-05, 'samples': 19662848, 'steps': 38403, 'batch_loss/train': 0.8178857195889577} +12/28/2021 00:33:15 - INFO - codeparrot_training - Step 38404: {'lr': 6.389945257589497e-05, 'samples': 19663360, 'steps': 38404, 'batch_loss/train': 0.7620668532326818} +12/28/2021 00:33:25 - INFO - codeparrot_training - Step 38405: {'lr': 6.388892740428348e-05, 'samples': 19663872, 'steps': 38405, 'batch_loss/train': 0.8803630908951163} +12/28/2021 00:33:38 - INFO - codeparrot_training - Step 38406: {'lr': 6.387840297257935e-05, 'samples': 19664384, 'steps': 38406, 'batch_loss/train': 0.6754396988544613} +12/28/2021 00:33:49 - INFO - codeparrot_training - Step 38407: {'lr': 6.386787928082444e-05, 'samples': 19664896, 'steps': 38407, 'batch_loss/train': 0.6862713247537613} +12/28/2021 00:33:59 - INFO - codeparrot_training - Step 38408: {'lr': 6.385735632906053e-05, 'samples': 19665408, 'steps': 38408, 'batch_loss/train': 0.7175460597500205} +12/28/2021 00:34:10 - INFO - codeparrot_training - Step 38409: {'lr': 6.38468341173295e-05, 'samples': 19665920, 'steps': 38409, 'batch_loss/train': 0.6468212339095771} +12/28/2021 00:34:22 - INFO - codeparrot_training - Step 38410: {'lr': 6.383631264567316e-05, 'samples': 19666432, 'steps': 38410, 'batch_loss/train': 0.7507250646594912} +12/28/2021 00:34:32 - INFO - codeparrot_training - Step 38411: {'lr': 6.382579191413334e-05, 'samples': 19666944, 'steps': 38411, 'batch_loss/train': 0.7829470243304968} +12/28/2021 00:34:43 - INFO - codeparrot_training - Step 38412: {'lr': 6.38152719227519e-05, 'samples': 19667456, 'steps': 38412, 'batch_loss/train': 0.5927355559542775} +12/28/2021 00:34:56 - INFO - codeparrot_training - Step 38413: {'lr': 6.380475267157071e-05, 'samples': 19667968, 'steps': 38413, 'batch_loss/train': 0.8065078901126981} +12/28/2021 00:35:07 - INFO - codeparrot_training - Step 38414: {'lr': 6.379423416063143e-05, 'samples': 19668480, 'steps': 38414, 'batch_loss/train': 0.762964136665687} +12/28/2021 00:35:18 - INFO - codeparrot_training - Step 38415: {'lr': 6.37837163899759e-05, 'samples': 19668992, 'steps': 38415, 'batch_loss/train': 0.6692428891547024} +12/28/2021 00:35:30 - INFO - codeparrot_training - Step 38416: {'lr': 6.377319935964616e-05, 'samples': 19669504, 'steps': 38416, 'batch_loss/train': 0.7497809994965792} +12/28/2021 00:35:40 - INFO - codeparrot_training - Step 38417: {'lr': 6.376268306968377e-05, 'samples': 19670016, 'steps': 38417, 'batch_loss/train': 0.7780832159332931} +12/28/2021 00:35:51 - INFO - codeparrot_training - Step 38418: {'lr': 6.375216752013058e-05, 'samples': 19670528, 'steps': 38418, 'batch_loss/train': 0.7567772939801216} +12/28/2021 00:36:01 - INFO - codeparrot_training - Step 38419: {'lr': 6.374165271102858e-05, 'samples': 19671040, 'steps': 38419, 'batch_loss/train': 0.7838439894840121} +12/28/2021 00:36:13 - INFO - codeparrot_training - Step 38420: {'lr': 6.373113864241937e-05, 'samples': 19671552, 'steps': 38420, 'batch_loss/train': 0.6370644306298345} +12/28/2021 00:36:24 - INFO - codeparrot_training - Step 38421: {'lr': 6.37206253143448e-05, 'samples': 19672064, 'steps': 38421, 'batch_loss/train': 0.7482514674775302} +12/28/2021 00:36:35 - INFO - codeparrot_training - Step 38422: {'lr': 6.371011272684671e-05, 'samples': 19672576, 'steps': 38422, 'batch_loss/train': 0.7714666360989213} +12/28/2021 00:36:48 - INFO - codeparrot_training - Step 38423: {'lr': 6.369960087996688e-05, 'samples': 19673088, 'steps': 38423, 'batch_loss/train': 0.7327038571238518} +12/28/2021 00:36:59 - INFO - codeparrot_training - Step 38424: {'lr': 6.368908977374708e-05, 'samples': 19673600, 'steps': 38424, 'batch_loss/train': 0.7319432012736797} +12/28/2021 00:37:10 - INFO - codeparrot_training - Step 38425: {'lr': 6.367857940822915e-05, 'samples': 19674112, 'steps': 38425, 'batch_loss/train': 0.7264312393963337} +12/28/2021 00:37:22 - INFO - codeparrot_training - Step 38426: {'lr': 6.366806978345475e-05, 'samples': 19674624, 'steps': 38426, 'batch_loss/train': 0.5986033962108195} +12/28/2021 00:37:33 - INFO - codeparrot_training - Step 38427: {'lr': 6.365756089946578e-05, 'samples': 19675136, 'steps': 38427, 'batch_loss/train': 0.7111239922232926} +12/28/2021 00:37:43 - INFO - codeparrot_training - Step 38428: {'lr': 6.364705275630408e-05, 'samples': 19675648, 'steps': 38428, 'batch_loss/train': 0.656296726083383} +12/28/2021 00:37:54 - INFO - codeparrot_training - Step 38429: {'lr': 6.363654535401118e-05, 'samples': 19676160, 'steps': 38429, 'batch_loss/train': 0.7038709837943316} +12/28/2021 00:38:07 - INFO - codeparrot_training - Step 38430: {'lr': 6.362603869262914e-05, 'samples': 19676672, 'steps': 38430, 'batch_loss/train': 0.719716046587564} +12/28/2021 00:38:18 - INFO - codeparrot_training - Step 38431: {'lr': 6.361553277219956e-05, 'samples': 19677184, 'steps': 38431, 'batch_loss/train': 0.7527082925662398} +12/28/2021 00:38:28 - INFO - codeparrot_training - Step 38432: {'lr': 6.360502759276421e-05, 'samples': 19677696, 'steps': 38432, 'batch_loss/train': 0.7618648714851588} +12/28/2021 00:38:40 - INFO - codeparrot_training - Step 38433: {'lr': 6.359452315436492e-05, 'samples': 19678208, 'steps': 38433, 'batch_loss/train': 0.7802742589265108} +12/28/2021 00:38:51 - INFO - codeparrot_training - Step 38434: {'lr': 6.358401945704339e-05, 'samples': 19678720, 'steps': 38434, 'batch_loss/train': 0.7132532484829426} +12/28/2021 00:39:02 - INFO - codeparrot_training - Step 38435: {'lr': 6.357351650084145e-05, 'samples': 19679232, 'steps': 38435, 'batch_loss/train': 0.8491972051560879} +12/28/2021 00:39:14 - INFO - codeparrot_training - Step 38436: {'lr': 6.35630142858008e-05, 'samples': 19679744, 'steps': 38436, 'batch_loss/train': 0.6819310084683821} +12/28/2021 00:39:24 - INFO - codeparrot_training - Step 38437: {'lr': 6.35525128119632e-05, 'samples': 19680256, 'steps': 38437, 'batch_loss/train': 0.7964048692956567} +12/28/2021 00:39:35 - INFO - codeparrot_training - Step 38438: {'lr': 6.35420120793704e-05, 'samples': 19680768, 'steps': 38438, 'batch_loss/train': 0.7027354270685464} +12/28/2021 00:39:45 - INFO - codeparrot_training - Step 38439: {'lr': 6.353151208806427e-05, 'samples': 19681280, 'steps': 38439, 'batch_loss/train': 0.7646195278503001} +12/28/2021 00:39:58 - INFO - codeparrot_training - Step 38440: {'lr': 6.352101283808634e-05, 'samples': 19681792, 'steps': 38440, 'batch_loss/train': 0.7578284488990903} +12/28/2021 00:40:08 - INFO - codeparrot_training - Step 38441: {'lr': 6.351051432947837e-05, 'samples': 19682304, 'steps': 38441, 'batch_loss/train': 0.7898561218753457} +12/28/2021 00:40:19 - INFO - codeparrot_training - Step 38442: {'lr': 6.350001656228232e-05, 'samples': 19682816, 'steps': 38442, 'batch_loss/train': 0.8288913741707802} +12/28/2021 00:40:32 - INFO - codeparrot_training - Step 38443: {'lr': 6.348951953653975e-05, 'samples': 19683328, 'steps': 38443, 'batch_loss/train': 0.8095743153244257} +12/28/2021 00:40:43 - INFO - codeparrot_training - Step 38444: {'lr': 6.347902325229232e-05, 'samples': 19683840, 'steps': 38444, 'batch_loss/train': 0.7590812365524471} +12/28/2021 00:40:53 - INFO - codeparrot_training - Step 38445: {'lr': 6.346852770958203e-05, 'samples': 19684352, 'steps': 38445, 'batch_loss/train': 0.5931028816848993} +12/28/2021 00:41:05 - INFO - codeparrot_training - Step 38446: {'lr': 6.345803290845034e-05, 'samples': 19684864, 'steps': 38446, 'batch_loss/train': 0.8167167259380221} +12/28/2021 00:41:16 - INFO - codeparrot_training - Step 38447: {'lr': 6.34475388489391e-05, 'samples': 19685376, 'steps': 38447, 'batch_loss/train': 0.6866022716276348} +12/28/2021 00:41:27 - INFO - codeparrot_training - Step 38448: {'lr': 6.343704553109e-05, 'samples': 19685888, 'steps': 38448, 'batch_loss/train': 0.8194201989099383} +12/28/2021 00:41:41 - INFO - codeparrot_training - Step 38449: {'lr': 6.342655295494478e-05, 'samples': 19686400, 'steps': 38449, 'batch_loss/train': 0.6920407949946821} +12/28/2021 00:41:51 - INFO - codeparrot_training - Step 38450: {'lr': 6.341606112054513e-05, 'samples': 19686912, 'steps': 38450, 'batch_loss/train': 0.7803047086345032} +12/28/2021 00:42:02 - INFO - codeparrot_training - Step 38451: {'lr': 6.340557002793282e-05, 'samples': 19687424, 'steps': 38451, 'batch_loss/train': 0.8365876264870167} +12/28/2021 00:42:13 - INFO - codeparrot_training - Step 38452: {'lr': 6.339507967714938e-05, 'samples': 19687936, 'steps': 38452, 'batch_loss/train': 0.6642131232656538} +12/28/2021 00:42:25 - INFO - codeparrot_training - Step 38453: {'lr': 6.338459006823672e-05, 'samples': 19688448, 'steps': 38453, 'batch_loss/train': 0.7386783640831709} +12/28/2021 00:42:35 - INFO - codeparrot_training - Step 38454: {'lr': 6.337410120123654e-05, 'samples': 19688960, 'steps': 38454, 'batch_loss/train': 0.6320990797830746} +12/28/2021 00:42:46 - INFO - codeparrot_training - Step 38455: {'lr': 6.336361307619032e-05, 'samples': 19689472, 'steps': 38455, 'batch_loss/train': 0.7699910085648298} +12/28/2021 00:42:58 - INFO - codeparrot_training - Step 38456: {'lr': 6.335312569313998e-05, 'samples': 19689984, 'steps': 38456, 'batch_loss/train': 0.793927138671279} +12/28/2021 00:43:09 - INFO - codeparrot_training - Step 38457: {'lr': 6.334263905212719e-05, 'samples': 19690496, 'steps': 38457, 'batch_loss/train': 0.6866336888633668} +12/28/2021 00:43:19 - INFO - codeparrot_training - Step 38458: {'lr': 6.333215315319355e-05, 'samples': 19691008, 'steps': 38458, 'batch_loss/train': 0.6983337382553145} +12/28/2021 00:43:33 - INFO - codeparrot_training - Step 38459: {'lr': 6.332166799638076e-05, 'samples': 19691520, 'steps': 38459, 'batch_loss/train': 0.7664677398279309} +12/28/2021 00:43:44 - INFO - codeparrot_training - Step 38460: {'lr': 6.331118358173052e-05, 'samples': 19692032, 'steps': 38460, 'batch_loss/train': 0.6944252374814823} +12/28/2021 00:43:55 - INFO - codeparrot_training - Step 38461: {'lr': 6.330069990928452e-05, 'samples': 19692544, 'steps': 38461, 'batch_loss/train': 0.598271930415649} +12/28/2021 00:44:05 - INFO - codeparrot_training - Step 38462: {'lr': 6.329021697908447e-05, 'samples': 19693056, 'steps': 38462, 'batch_loss/train': 0.7775097172707319} +12/28/2021 00:44:17 - INFO - codeparrot_training - Step 38463: {'lr': 6.327973479117203e-05, 'samples': 19693568, 'steps': 38463, 'batch_loss/train': 0.7509335740469396} +12/28/2021 00:44:28 - INFO - codeparrot_training - Step 38464: {'lr': 6.326925334558883e-05, 'samples': 19694080, 'steps': 38464, 'batch_loss/train': 0.6825699554756284} +12/28/2021 00:44:39 - INFO - codeparrot_training - Step 38465: {'lr': 6.325877264237664e-05, 'samples': 19694592, 'steps': 38465, 'batch_loss/train': 0.7386633413843811} +12/28/2021 00:44:51 - INFO - codeparrot_training - Step 38466: {'lr': 6.324829268157695e-05, 'samples': 19695104, 'steps': 38466, 'batch_loss/train': 0.8703750874847174} +12/28/2021 00:45:01 - INFO - codeparrot_training - Step 38467: {'lr': 6.323781346323157e-05, 'samples': 19695616, 'steps': 38467, 'batch_loss/train': 0.7684224210679531} +12/28/2021 00:45:12 - INFO - codeparrot_training - Step 38468: {'lr': 6.322733498738223e-05, 'samples': 19696128, 'steps': 38468, 'batch_loss/train': 0.7938327835872769} +12/28/2021 00:45:25 - INFO - codeparrot_training - Step 38469: {'lr': 6.32168572540704e-05, 'samples': 19696640, 'steps': 38469, 'batch_loss/train': 0.710220139939338} +12/28/2021 00:45:36 - INFO - codeparrot_training - Step 38470: {'lr': 6.320638026333772e-05, 'samples': 19697152, 'steps': 38470, 'batch_loss/train': 0.7113888976164162} +12/28/2021 00:45:47 - INFO - codeparrot_training - Step 38471: {'lr': 6.31959040152261e-05, 'samples': 19697664, 'steps': 38471, 'batch_loss/train': 0.7626016344875097} +12/28/2021 00:45:57 - INFO - codeparrot_training - Step 38472: {'lr': 6.318542850977696e-05, 'samples': 19698176, 'steps': 38472, 'batch_loss/train': 0.8056299681775272} +12/28/2021 00:46:10 - INFO - codeparrot_training - Step 38473: {'lr': 6.317495374703202e-05, 'samples': 19698688, 'steps': 38473, 'batch_loss/train': 0.8087638337165117} +12/28/2021 00:46:20 - INFO - codeparrot_training - Step 38474: {'lr': 6.316447972703293e-05, 'samples': 19699200, 'steps': 38474, 'batch_loss/train': 0.6999473250471056} +12/28/2021 00:46:31 - INFO - codeparrot_training - Step 38475: {'lr': 6.315400644982133e-05, 'samples': 19699712, 'steps': 38475, 'batch_loss/train': 0.5576146920211613} +12/28/2021 00:46:43 - INFO - codeparrot_training - Step 38476: {'lr': 6.314353391543884e-05, 'samples': 19700224, 'steps': 38476, 'batch_loss/train': 0.6610268514486961} +12/28/2021 00:46:54 - INFO - codeparrot_training - Step 38477: {'lr': 6.313306212392717e-05, 'samples': 19700736, 'steps': 38477, 'batch_loss/train': 0.8445931347087026} +12/28/2021 00:47:04 - INFO - codeparrot_training - Step 38478: {'lr': 6.312259107532774e-05, 'samples': 19701248, 'steps': 38478, 'batch_loss/train': 0.79826316004619} +12/28/2021 00:47:16 - INFO - codeparrot_training - Step 38479: {'lr': 6.31121207696824e-05, 'samples': 19701760, 'steps': 38479, 'batch_loss/train': 0.9290223428979516} +12/28/2021 00:47:27 - INFO - codeparrot_training - Step 38480: {'lr': 6.310165120703279e-05, 'samples': 19702272, 'steps': 38480, 'batch_loss/train': 0.6400492573156953} +12/28/2021 00:47:38 - INFO - codeparrot_training - Step 38481: {'lr': 6.309118238742028e-05, 'samples': 19702784, 'steps': 38481, 'batch_loss/train': 0.7267181766219437} +12/28/2021 00:47:48 - INFO - codeparrot_training - Step 38482: {'lr': 6.308071431088674e-05, 'samples': 19703296, 'steps': 38482, 'batch_loss/train': 0.7869244773173705} +12/28/2021 00:48:02 - INFO - codeparrot_training - Step 38483: {'lr': 6.307024697747377e-05, 'samples': 19703808, 'steps': 38483, 'batch_loss/train': 0.7119268216192722} +12/28/2021 00:48:13 - INFO - codeparrot_training - Step 38484: {'lr': 6.305978038722285e-05, 'samples': 19704320, 'steps': 38484, 'batch_loss/train': 0.8163332706317306} +12/28/2021 00:48:23 - INFO - codeparrot_training - Step 38485: {'lr': 6.304931454017566e-05, 'samples': 19704832, 'steps': 38485, 'batch_loss/train': 0.6311103096231818} +12/28/2021 00:48:35 - INFO - codeparrot_training - Step 38486: {'lr': 6.303884943637378e-05, 'samples': 19705344, 'steps': 38486, 'batch_loss/train': 0.7645781370811164} +12/28/2021 00:48:46 - INFO - codeparrot_training - Step 38487: {'lr': 6.302838507585883e-05, 'samples': 19705856, 'steps': 38487, 'batch_loss/train': 0.7368362348061055} +12/28/2021 00:48:57 - INFO - codeparrot_training - Step 38488: {'lr': 6.301792145867246e-05, 'samples': 19706368, 'steps': 38488, 'batch_loss/train': 0.7685706210322678} +12/28/2021 00:49:11 - INFO - codeparrot_training - Step 38489: {'lr': 6.300745858485623e-05, 'samples': 19706880, 'steps': 38489, 'batch_loss/train': 0.6928879718761891} +12/28/2021 00:49:21 - INFO - codeparrot_training - Step 38490: {'lr': 6.299699645445173e-05, 'samples': 19707392, 'steps': 38490, 'batch_loss/train': 0.6925926632247865} +12/28/2021 00:49:32 - INFO - codeparrot_training - Step 38491: {'lr': 6.298653506750057e-05, 'samples': 19707904, 'steps': 38491, 'batch_loss/train': 0.7901623845100403} +12/28/2021 00:49:44 - INFO - codeparrot_training - Step 38492: {'lr': 6.297607442404432e-05, 'samples': 19708416, 'steps': 38492, 'batch_loss/train': 0.7654010131955147} +12/28/2021 00:49:55 - INFO - codeparrot_training - Step 38493: {'lr': 6.296561452412461e-05, 'samples': 19708928, 'steps': 38493, 'batch_loss/train': 0.6866954765282571} +12/28/2021 00:50:05 - INFO - codeparrot_training - Step 38494: {'lr': 6.295515536778303e-05, 'samples': 19709440, 'steps': 38494, 'batch_loss/train': 0.6141325477510691} +12/28/2021 00:50:16 - INFO - codeparrot_training - Step 38495: {'lr': 6.294469695506109e-05, 'samples': 19709952, 'steps': 38495, 'batch_loss/train': 0.7583265919238329} +12/28/2021 00:50:29 - INFO - codeparrot_training - Step 38496: {'lr': 6.293423928600028e-05, 'samples': 19710464, 'steps': 38496, 'batch_loss/train': 0.7957413461990654} +12/28/2021 00:50:40 - INFO - codeparrot_training - Step 38497: {'lr': 6.29237823606425e-05, 'samples': 19710976, 'steps': 38497, 'batch_loss/train': 0.7559268689365126} +12/28/2021 00:50:50 - INFO - codeparrot_training - Step 38498: {'lr': 6.291332617902903e-05, 'samples': 19711488, 'steps': 38498, 'batch_loss/train': 0.5950503406929784} +12/28/2021 00:51:03 - INFO - codeparrot_training - Step 38499: {'lr': 6.290287074120154e-05, 'samples': 19712000, 'steps': 38499, 'batch_loss/train': 0.8953142175450921} +12/28/2021 00:51:14 - INFO - codeparrot_training - Step 38500: {'lr': 6.289241604720158e-05, 'samples': 19712512, 'steps': 38500, 'batch_loss/train': 0.8528660968877375} +12/28/2021 00:51:24 - INFO - codeparrot_training - Step 38501: {'lr': 6.288196209707072e-05, 'samples': 19713024, 'steps': 38501, 'batch_loss/train': 0.4403483749483712} +12/28/2021 00:51:35 - INFO - codeparrot_training - Step 38502: {'lr': 6.287150889085055e-05, 'samples': 19713536, 'steps': 38502, 'batch_loss/train': 0.6731671155430377} +12/28/2021 00:51:47 - INFO - codeparrot_training - Step 38503: {'lr': 6.286105642858258e-05, 'samples': 19714048, 'steps': 38503, 'batch_loss/train': 0.6253696205094457} +12/28/2021 00:51:58 - INFO - codeparrot_training - Step 38504: {'lr': 6.285060471030841e-05, 'samples': 19714560, 'steps': 38504, 'batch_loss/train': 0.5360687880311161} +12/28/2021 00:52:09 - INFO - codeparrot_training - Step 38505: {'lr': 6.284015373606954e-05, 'samples': 19715072, 'steps': 38505, 'batch_loss/train': 0.9558635056018829} +12/28/2021 00:52:23 - INFO - codeparrot_training - Step 38506: {'lr': 6.282970350590767e-05, 'samples': 19715584, 'steps': 38506, 'batch_loss/train': 0.8980566733516753} +12/28/2021 00:52:33 - INFO - codeparrot_training - Step 38507: {'lr': 6.281925401986405e-05, 'samples': 19716096, 'steps': 38507, 'batch_loss/train': 0.857407289557159} +12/28/2021 00:52:44 - INFO - codeparrot_training - Step 38508: {'lr': 6.280880527798047e-05, 'samples': 19716608, 'steps': 38508, 'batch_loss/train': 0.7430108329281211} +12/28/2021 00:52:56 - INFO - codeparrot_training - Step 38509: {'lr': 6.279835728029848e-05, 'samples': 19717120, 'steps': 38509, 'batch_loss/train': 0.7090084319934249} +12/28/2021 00:53:07 - INFO - codeparrot_training - Step 38510: {'lr': 6.278791002685943e-05, 'samples': 19717632, 'steps': 38510, 'batch_loss/train': 0.8043275391682982} +12/28/2021 00:53:17 - INFO - codeparrot_training - Step 38511: {'lr': 6.2777463517705e-05, 'samples': 19718144, 'steps': 38511, 'batch_loss/train': 0.7395359249785542} +12/28/2021 00:53:28 - INFO - codeparrot_training - Step 38512: {'lr': 6.276701775287669e-05, 'samples': 19718656, 'steps': 38512, 'batch_loss/train': 0.7490957519039512} +12/28/2021 00:53:40 - INFO - codeparrot_training - Step 38513: {'lr': 6.2756572732416e-05, 'samples': 19719168, 'steps': 38513, 'batch_loss/train': 0.9049792131409049} +12/28/2021 00:53:51 - INFO - codeparrot_training - Step 38514: {'lr': 6.274612845636448e-05, 'samples': 19719680, 'steps': 38514, 'batch_loss/train': 1.6238095904700458} +12/28/2021 00:54:01 - INFO - codeparrot_training - Step 38515: {'lr': 6.273568492476364e-05, 'samples': 19720192, 'steps': 38515, 'batch_loss/train': 0.6786352186463773} +12/28/2021 00:54:14 - INFO - codeparrot_training - Step 38516: {'lr': 6.272524213765504e-05, 'samples': 19720704, 'steps': 38516, 'batch_loss/train': 0.6207741020480171} +12/28/2021 00:54:24 - INFO - codeparrot_training - Step 38517: {'lr': 6.271480009508012e-05, 'samples': 19721216, 'steps': 38517, 'batch_loss/train': 0.6435707774944603} +12/28/2021 00:54:35 - INFO - codeparrot_training - Step 38518: {'lr': 6.270435879708047e-05, 'samples': 19721728, 'steps': 38518, 'batch_loss/train': 0.7758291559293866} +12/28/2021 00:54:48 - INFO - codeparrot_training - Step 38519: {'lr': 6.269391824369758e-05, 'samples': 19722240, 'steps': 38519, 'batch_loss/train': 0.6319524459540844} +12/28/2021 00:54:59 - INFO - codeparrot_training - Step 38520: {'lr': 6.268347843497299e-05, 'samples': 19722752, 'steps': 38520, 'batch_loss/train': 0.7310259705409408} +12/28/2021 00:55:09 - INFO - codeparrot_training - Step 38521: {'lr': 6.267303937094813e-05, 'samples': 19723264, 'steps': 38521, 'batch_loss/train': 0.7670648219063878} +12/28/2021 00:55:20 - INFO - codeparrot_training - Step 38522: {'lr': 6.26626010516644e-05, 'samples': 19723776, 'steps': 38522, 'batch_loss/train': 0.7185894125141203} +12/28/2021 00:55:32 - INFO - codeparrot_training - Step 38523: {'lr': 6.265216347716363e-05, 'samples': 19724288, 'steps': 38523, 'batch_loss/train': 0.7206030728993937} +12/28/2021 00:55:43 - INFO - codeparrot_training - Step 38524: {'lr': 6.264172664748701e-05, 'samples': 19724800, 'steps': 38524, 'batch_loss/train': 0.6217481712810695} +12/28/2021 00:55:54 - INFO - codeparrot_training - Step 38525: {'lr': 6.263129056267616e-05, 'samples': 19725312, 'steps': 38525, 'batch_loss/train': 0.757339131552726} +12/28/2021 00:56:06 - INFO - codeparrot_training - Step 38526: {'lr': 6.262085522277256e-05, 'samples': 19725824, 'steps': 38526, 'batch_loss/train': 0.7803738475777209} +12/28/2021 00:56:17 - INFO - codeparrot_training - Step 38527: {'lr': 6.261042062781766e-05, 'samples': 19726336, 'steps': 38527, 'batch_loss/train': 0.5679833321482874} +12/28/2021 00:56:27 - INFO - codeparrot_training - Step 38528: {'lr': 6.259998677785301e-05, 'samples': 19726848, 'steps': 38528, 'batch_loss/train': 0.8099242490716279} +12/28/2021 00:56:41 - INFO - codeparrot_training - Step 38529: {'lr': 6.258955367292002e-05, 'samples': 19727360, 'steps': 38529, 'batch_loss/train': 0.6602439088746905} +12/28/2021 00:56:52 - INFO - codeparrot_training - Step 38530: {'lr': 6.257912131306021e-05, 'samples': 19727872, 'steps': 38530, 'batch_loss/train': 0.8143248131964356} +12/28/2021 00:57:02 - INFO - codeparrot_training - Step 38531: {'lr': 6.256868969831506e-05, 'samples': 19728384, 'steps': 38531, 'batch_loss/train': 0.7829656950198114} +12/28/2021 00:57:13 - INFO - codeparrot_training - Step 38532: {'lr': 6.255825882872606e-05, 'samples': 19728896, 'steps': 38532, 'batch_loss/train': 0.7390374080277979} +12/28/2021 00:57:25 - INFO - codeparrot_training - Step 38533: {'lr': 6.254782870433453e-05, 'samples': 19729408, 'steps': 38533, 'batch_loss/train': 0.7125545849557966} +12/28/2021 00:57:36 - INFO - codeparrot_training - Step 38534: {'lr': 6.253739932518215e-05, 'samples': 19729920, 'steps': 38534, 'batch_loss/train': 0.707880390342325} +12/28/2021 00:57:46 - INFO - codeparrot_training - Step 38535: {'lr': 6.252697069131034e-05, 'samples': 19730432, 'steps': 38535, 'batch_loss/train': 0.7527965493500233} +12/28/2021 00:58:01 - INFO - codeparrot_training - Step 38536: {'lr': 6.251654280276042e-05, 'samples': 19730944, 'steps': 38536, 'batch_loss/train': 0.6142848604358733} +12/28/2021 00:58:11 - INFO - codeparrot_training - Step 38537: {'lr': 6.250611565957387e-05, 'samples': 19731456, 'steps': 38537, 'batch_loss/train': 0.80760339461267} +12/28/2021 00:58:22 - INFO - codeparrot_training - Step 38538: {'lr': 6.249568926179237e-05, 'samples': 19731968, 'steps': 38538, 'batch_loss/train': 0.843337781727314} +12/28/2021 00:58:34 - INFO - codeparrot_training - Step 38539: {'lr': 6.248526360945713e-05, 'samples': 19732480, 'steps': 38539, 'batch_loss/train': 0.7361961849965155} +12/28/2021 00:58:44 - INFO - codeparrot_training - Step 38540: {'lr': 6.247483870260967e-05, 'samples': 19732992, 'steps': 38540, 'batch_loss/train': 0.8084354139864445} +12/28/2021 00:58:55 - INFO - codeparrot_training - Step 38541: {'lr': 6.246441454129145e-05, 'samples': 19733504, 'steps': 38541, 'batch_loss/train': 0.7410398635547608} +12/28/2021 00:59:06 - INFO - codeparrot_training - Step 38542: {'lr': 6.245399112554392e-05, 'samples': 19734016, 'steps': 38542, 'batch_loss/train': 0.773354725446552} +12/28/2021 00:59:18 - INFO - codeparrot_training - Step 38543: {'lr': 6.244356845540849e-05, 'samples': 19734528, 'steps': 38543, 'batch_loss/train': 0.6860448494553566} +12/28/2021 00:59:29 - INFO - codeparrot_training - Step 38544: {'lr': 6.243314653092661e-05, 'samples': 19735040, 'steps': 38544, 'batch_loss/train': 0.5387298739515245} +12/28/2021 00:59:39 - INFO - codeparrot_training - Step 38545: {'lr': 6.242272535213971e-05, 'samples': 19735552, 'steps': 38545, 'batch_loss/train': 0.6919528039870784} +12/28/2021 00:59:52 - INFO - codeparrot_training - Step 38546: {'lr': 6.241230491908926e-05, 'samples': 19736064, 'steps': 38546, 'batch_loss/train': 0.5432820612913929} +12/28/2021 01:00:02 - INFO - codeparrot_training - Step 38547: {'lr': 6.240188523181672e-05, 'samples': 19736576, 'steps': 38547, 'batch_loss/train': 0.7221894497051835} +12/28/2021 01:00:13 - INFO - codeparrot_training - Step 38548: {'lr': 6.23914662903633e-05, 'samples': 19737088, 'steps': 38548, 'batch_loss/train': 0.7475956198759377} +12/28/2021 01:00:27 - INFO - codeparrot_training - Step 38549: {'lr': 6.238104809477072e-05, 'samples': 19737600, 'steps': 38549, 'batch_loss/train': 0.7771667591296136} +12/28/2021 01:00:38 - INFO - codeparrot_training - Step 38550: {'lr': 6.237063064508014e-05, 'samples': 19738112, 'steps': 38550, 'batch_loss/train': 0.7804238549433649} +12/28/2021 01:00:48 - INFO - codeparrot_training - Step 38551: {'lr': 6.236021394133312e-05, 'samples': 19738624, 'steps': 38551, 'batch_loss/train': 0.8056922516552731} +12/28/2021 01:00:59 - INFO - codeparrot_training - Step 38552: {'lr': 6.234979798357105e-05, 'samples': 19739136, 'steps': 38552, 'batch_loss/train': 0.7167910602875054} +12/28/2021 01:01:11 - INFO - codeparrot_training - Step 38553: {'lr': 6.233938277183532e-05, 'samples': 19739648, 'steps': 38553, 'batch_loss/train': 0.717138968873769} +12/28/2021 01:01:21 - INFO - codeparrot_training - Step 38554: {'lr': 6.232896830616732e-05, 'samples': 19740160, 'steps': 38554, 'batch_loss/train': 0.7256405760999769} +12/28/2021 01:01:32 - INFO - codeparrot_training - Step 38555: {'lr': 6.231855458660851e-05, 'samples': 19740672, 'steps': 38555, 'batch_loss/train': 0.7291985177434981} +12/28/2021 01:01:45 - INFO - codeparrot_training - Step 38556: {'lr': 6.230814161320025e-05, 'samples': 19741184, 'steps': 38556, 'batch_loss/train': 0.9318191315978765} +12/28/2021 01:01:56 - INFO - codeparrot_training - Step 38557: {'lr': 6.229772938598396e-05, 'samples': 19741696, 'steps': 38557, 'batch_loss/train': 0.9974065995775163} +12/28/2021 01:02:06 - INFO - codeparrot_training - Step 38558: {'lr': 6.228731790500108e-05, 'samples': 19742208, 'steps': 38558, 'batch_loss/train': 1.1106393891386688} +12/28/2021 01:02:17 - INFO - codeparrot_training - Step 38559: {'lr': 6.227690717029284e-05, 'samples': 19742720, 'steps': 38559, 'batch_loss/train': 0.75277094473131} +12/28/2021 01:02:31 - INFO - codeparrot_training - Step 38560: {'lr': 6.226649718190076e-05, 'samples': 19743232, 'steps': 38560, 'batch_loss/train': 0.7417522789910436} +12/28/2021 01:02:42 - INFO - codeparrot_training - Step 38561: {'lr': 6.225608793986628e-05, 'samples': 19743744, 'steps': 38561, 'batch_loss/train': 0.8065208815969527} +12/28/2021 01:02:52 - INFO - codeparrot_training - Step 38562: {'lr': 6.224567944423065e-05, 'samples': 19744256, 'steps': 38562, 'batch_loss/train': 0.5413910199422389} +12/28/2021 01:03:04 - INFO - codeparrot_training - Step 38563: {'lr': 6.223527169503523e-05, 'samples': 19744768, 'steps': 38563, 'batch_loss/train': 0.778783957939595} +12/28/2021 01:03:15 - INFO - codeparrot_training - Step 38564: {'lr': 6.22248646923216e-05, 'samples': 19745280, 'steps': 38564, 'batch_loss/train': 0.6644877181388438} +12/28/2021 01:03:26 - INFO - codeparrot_training - Step 38565: {'lr': 6.221445843613096e-05, 'samples': 19745792, 'steps': 38565, 'batch_loss/train': 0.8251500213518739} +12/28/2021 01:03:40 - INFO - codeparrot_training - Step 38566: {'lr': 6.220405292650472e-05, 'samples': 19746304, 'steps': 38566, 'batch_loss/train': 0.6647642273455858} +12/28/2021 01:03:50 - INFO - codeparrot_training - Step 38567: {'lr': 6.219364816348428e-05, 'samples': 19746816, 'steps': 38567, 'batch_loss/train': 0.6265557520091534} +12/28/2021 01:04:01 - INFO - codeparrot_training - Step 38568: {'lr': 6.218324414711093e-05, 'samples': 19747328, 'steps': 38568, 'batch_loss/train': 0.7970314705744386} +12/28/2021 01:04:12 - INFO - codeparrot_training - Step 38569: {'lr': 6.217284087742611e-05, 'samples': 19747840, 'steps': 38569, 'batch_loss/train': 1.7908611111342907} +12/28/2021 01:04:24 - INFO - codeparrot_training - Step 38570: {'lr': 6.216243835447116e-05, 'samples': 19748352, 'steps': 38570, 'batch_loss/train': 0.9718086153734475} +12/28/2021 01:04:35 - INFO - codeparrot_training - Step 38571: {'lr': 6.215203657828745e-05, 'samples': 19748864, 'steps': 38571, 'batch_loss/train': 0.7160967001691461} +12/28/2021 01:04:45 - INFO - codeparrot_training - Step 38572: {'lr': 6.214163554891627e-05, 'samples': 19749376, 'steps': 38572, 'batch_loss/train': 0.8031385019421577} +12/28/2021 01:04:57 - INFO - codeparrot_training - Step 38573: {'lr': 6.21312352663991e-05, 'samples': 19749888, 'steps': 38573, 'batch_loss/train': 0.8722985750064254} +12/28/2021 01:05:08 - INFO - codeparrot_training - Step 38574: {'lr': 6.212083573077707e-05, 'samples': 19750400, 'steps': 38574, 'batch_loss/train': 0.8614976750686765} +12/28/2021 01:05:19 - INFO - codeparrot_training - Step 38575: {'lr': 6.211043694209179e-05, 'samples': 19750912, 'steps': 38575, 'batch_loss/train': 0.7631275597959757} +12/28/2021 01:05:32 - INFO - codeparrot_training - Step 38576: {'lr': 6.21000389003844e-05, 'samples': 19751424, 'steps': 38576, 'batch_loss/train': 0.7844944503158331} +12/28/2021 01:05:43 - INFO - codeparrot_training - Step 38577: {'lr': 6.208964160569625e-05, 'samples': 19751936, 'steps': 38577, 'batch_loss/train': 0.6183680053800344} +12/28/2021 01:05:54 - INFO - codeparrot_training - Step 38578: {'lr': 6.207924505806886e-05, 'samples': 19752448, 'steps': 38578, 'batch_loss/train': 0.7254118821583688} +12/28/2021 01:06:04 - INFO - codeparrot_training - Step 38579: {'lr': 6.206884925754336e-05, 'samples': 19752960, 'steps': 38579, 'batch_loss/train': 0.8172244727611542} +12/28/2021 01:06:16 - INFO - codeparrot_training - Step 38580: {'lr': 6.205845420416117e-05, 'samples': 19753472, 'steps': 38580, 'batch_loss/train': 0.8003630638122559} +12/28/2021 01:06:27 - INFO - codeparrot_training - Step 38581: {'lr': 6.204805989796358e-05, 'samples': 19753984, 'steps': 38581, 'batch_loss/train': 0.7306803925894201} +12/28/2021 01:06:38 - INFO - codeparrot_training - Step 38582: {'lr': 6.203766633899196e-05, 'samples': 19754496, 'steps': 38582, 'batch_loss/train': 0.7494755536317825} +12/28/2021 01:06:50 - INFO - codeparrot_training - Step 38583: {'lr': 6.202727352728759e-05, 'samples': 19755008, 'steps': 38583, 'batch_loss/train': 0.6699392859009095} +12/28/2021 01:07:01 - INFO - codeparrot_training - Step 38584: {'lr': 6.201688146289189e-05, 'samples': 19755520, 'steps': 38584, 'batch_loss/train': 0.6762027539662085} +12/28/2021 01:07:11 - INFO - codeparrot_training - Step 38585: {'lr': 6.200649014584595e-05, 'samples': 19756032, 'steps': 38585, 'batch_loss/train': 0.8261265624314547} +12/28/2021 01:07:22 - INFO - codeparrot_training - Step 38586: {'lr': 6.199609957619127e-05, 'samples': 19756544, 'steps': 38586, 'batch_loss/train': 1.1167248180136085} +12/28/2021 01:07:36 - INFO - codeparrot_training - Step 38587: {'lr': 6.198570975396919e-05, 'samples': 19757056, 'steps': 38587, 'batch_loss/train': 0.7816394474357367} +12/28/2021 01:07:47 - INFO - codeparrot_training - Step 38588: {'lr': 6.197532067922088e-05, 'samples': 19757568, 'steps': 38588, 'batch_loss/train': 0.7675547460094094} +12/28/2021 01:07:57 - INFO - codeparrot_training - Step 38589: {'lr': 6.196493235198761e-05, 'samples': 19758080, 'steps': 38589, 'batch_loss/train': 0.6204850105568767} +12/28/2021 01:08:09 - INFO - codeparrot_training - Step 38590: {'lr': 6.195454477231092e-05, 'samples': 19758592, 'steps': 38590, 'batch_loss/train': 0.6553027506452054} +12/28/2021 01:08:20 - INFO - codeparrot_training - Step 38591: {'lr': 6.19441579402319e-05, 'samples': 19759104, 'steps': 38591, 'batch_loss/train': 0.779658603714779} +12/28/2021 01:08:31 - INFO - codeparrot_training - Step 38592: {'lr': 6.19337718557918e-05, 'samples': 19759616, 'steps': 38592, 'batch_loss/train': 0.6965190693736076} +12/28/2021 01:08:43 - INFO - codeparrot_training - Step 38593: {'lr': 6.192338651903218e-05, 'samples': 19760128, 'steps': 38593, 'batch_loss/train': 0.8091543577611446} +12/28/2021 01:08:53 - INFO - codeparrot_training - Step 38594: {'lr': 6.191300192999408e-05, 'samples': 19760640, 'steps': 38594, 'batch_loss/train': 0.7313229739665985} +12/28/2021 01:09:04 - INFO - codeparrot_training - Step 38595: {'lr': 6.190261808871886e-05, 'samples': 19761152, 'steps': 38595, 'batch_loss/train': 0.794970735674724} +12/28/2021 01:09:18 - INFO - codeparrot_training - Step 38596: {'lr': 6.189223499524783e-05, 'samples': 19761664, 'steps': 38596, 'batch_loss/train': 0.648811215069145} +12/28/2021 01:09:29 - INFO - codeparrot_training - Step 38597: {'lr': 6.188185264962221e-05, 'samples': 19762176, 'steps': 38597, 'batch_loss/train': 0.6191862199921161} +12/28/2021 01:09:39 - INFO - codeparrot_training - Step 38598: {'lr': 6.187147105188335e-05, 'samples': 19762688, 'steps': 38598, 'batch_loss/train': 0.6543341684155166} +12/28/2021 01:09:50 - INFO - codeparrot_training - Step 38599: {'lr': 6.186109020207253e-05, 'samples': 19763200, 'steps': 38599, 'batch_loss/train': 0.7035743452725001} +12/28/2021 01:10:02 - INFO - codeparrot_training - Step 38600: {'lr': 6.185071010023083e-05, 'samples': 19763712, 'steps': 38600, 'batch_loss/train': 0.5914978196378797} +12/28/2021 01:10:13 - INFO - codeparrot_training - Step 38601: {'lr': 6.184033074639984e-05, 'samples': 19764224, 'steps': 38601, 'batch_loss/train': 0.9044416649267077} +12/28/2021 01:10:23 - INFO - codeparrot_training - Step 38602: {'lr': 6.182995214062053e-05, 'samples': 19764736, 'steps': 38602, 'batch_loss/train': 0.6889653401449323} +12/28/2021 01:10:35 - INFO - codeparrot_training - Step 38603: {'lr': 6.181957428293425e-05, 'samples': 19765248, 'steps': 38603, 'batch_loss/train': 0.723068630322814} +12/28/2021 01:10:46 - INFO - codeparrot_training - Step 38604: {'lr': 6.180919717338243e-05, 'samples': 19765760, 'steps': 38604, 'batch_loss/train': 0.9011699249967933} +12/28/2021 01:10:57 - INFO - codeparrot_training - Step 38605: {'lr': 6.179882081200609e-05, 'samples': 19766272, 'steps': 38605, 'batch_loss/train': 0.7294855704531074} +12/28/2021 01:11:07 - INFO - codeparrot_training - Step 38606: {'lr': 6.178844519884658e-05, 'samples': 19766784, 'steps': 38606, 'batch_loss/train': 0.7963156811892986} +12/28/2021 01:11:21 - INFO - codeparrot_training - Step 38607: {'lr': 6.177807033394517e-05, 'samples': 19767296, 'steps': 38607, 'batch_loss/train': 0.9892269414849579} +12/28/2021 01:11:31 - INFO - codeparrot_training - Step 38608: {'lr': 6.176769621734305e-05, 'samples': 19767808, 'steps': 38608, 'batch_loss/train': 0.7199160065501928} +12/28/2021 01:11:42 - INFO - codeparrot_training - Step 38609: {'lr': 6.175732284908153e-05, 'samples': 19768320, 'steps': 38609, 'batch_loss/train': 0.7478421991690993} +12/28/2021 01:11:54 - INFO - codeparrot_training - Step 38610: {'lr': 6.174695022920184e-05, 'samples': 19768832, 'steps': 38610, 'batch_loss/train': 0.8302606204524636} +12/28/2021 01:12:05 - INFO - codeparrot_training - Step 38611: {'lr': 6.17365783577451e-05, 'samples': 19769344, 'steps': 38611, 'batch_loss/train': 0.7372376315761358} +12/28/2021 01:12:15 - INFO - codeparrot_training - Step 38612: {'lr': 6.17262072347527e-05, 'samples': 19769856, 'steps': 38612, 'batch_loss/train': 0.7054348101373762} +12/28/2021 01:12:27 - INFO - codeparrot_training - Step 38613: {'lr': 6.171583686026586e-05, 'samples': 19770368, 'steps': 38613, 'batch_loss/train': 0.8489370420575142} +12/28/2021 01:12:38 - INFO - codeparrot_training - Step 38614: {'lr': 6.170546723432563e-05, 'samples': 19770880, 'steps': 38614, 'batch_loss/train': 0.8445638599805534} +12/28/2021 01:12:49 - INFO - codeparrot_training - Step 38615: {'lr': 6.169509835697346e-05, 'samples': 19771392, 'steps': 38615, 'batch_loss/train': 0.7939928006380796} +12/28/2021 01:12:59 - INFO - codeparrot_training - Step 38616: {'lr': 6.168473022825052e-05, 'samples': 19771904, 'steps': 38616, 'batch_loss/train': 0.814802622422576} +12/28/2021 01:13:13 - INFO - codeparrot_training - Step 38617: {'lr': 6.167436284819792e-05, 'samples': 19772416, 'steps': 38617, 'batch_loss/train': 0.7011732854880393} +12/28/2021 01:13:24 - INFO - codeparrot_training - Step 38618: {'lr': 6.166399621685686e-05, 'samples': 19772928, 'steps': 38618, 'batch_loss/train': 0.7313916649436578} +12/28/2021 01:13:35 - INFO - codeparrot_training - Step 38619: {'lr': 6.165363033426879e-05, 'samples': 19773440, 'steps': 38619, 'batch_loss/train': 0.8015617472119629} +12/28/2021 01:13:47 - INFO - codeparrot_training - Step 38620: {'lr': 6.164326520047472e-05, 'samples': 19773952, 'steps': 38620, 'batch_loss/train': 0.6499131377786398} +12/28/2021 01:13:57 - INFO - codeparrot_training - Step 38621: {'lr': 6.16329008155159e-05, 'samples': 19774464, 'steps': 38621, 'batch_loss/train': 0.737680222839117} +12/28/2021 01:14:08 - INFO - codeparrot_training - Step 38622: {'lr': 6.162253717943353e-05, 'samples': 19774976, 'steps': 38622, 'batch_loss/train': 0.740686247125268} +12/28/2021 01:14:22 - INFO - codeparrot_training - Step 38623: {'lr': 6.161217429226881e-05, 'samples': 19775488, 'steps': 38623, 'batch_loss/train': 0.7208714819862507} +12/28/2021 01:14:33 - INFO - codeparrot_training - Step 38624: {'lr': 6.160181215406296e-05, 'samples': 19776000, 'steps': 38624, 'batch_loss/train': 0.6787209392059594} +12/28/2021 01:14:43 - INFO - codeparrot_training - Step 38625: {'lr': 6.159145076485726e-05, 'samples': 19776512, 'steps': 38625, 'batch_loss/train': 0.7371227843686938} +12/28/2021 01:14:55 - INFO - codeparrot_training - Step 38626: {'lr': 6.158109012469265e-05, 'samples': 19777024, 'steps': 38626, 'batch_loss/train': 0.7128598405979574} +12/28/2021 01:15:06 - INFO - codeparrot_training - Step 38627: {'lr': 6.157073023361057e-05, 'samples': 19777536, 'steps': 38627, 'batch_loss/train': 0.7585228346288204} +12/28/2021 01:15:17 - INFO - codeparrot_training - Step 38628: {'lr': 6.156037109165219e-05, 'samples': 19778048, 'steps': 38628, 'batch_loss/train': 0.825270933099091} +12/28/2021 01:15:27 - INFO - codeparrot_training - Step 38629: {'lr': 6.155001269885846e-05, 'samples': 19778560, 'steps': 38629, 'batch_loss/train': 0.7335618590004742} +12/28/2021 01:15:40 - INFO - codeparrot_training - Step 38630: {'lr': 6.153965505527087e-05, 'samples': 19779072, 'steps': 38630, 'batch_loss/train': 0.7966350428760052} +12/28/2021 01:15:50 - INFO - codeparrot_training - Step 38631: {'lr': 6.15292981609304e-05, 'samples': 19779584, 'steps': 38631, 'batch_loss/train': 0.8570838836021721} +12/28/2021 01:16:01 - INFO - codeparrot_training - Step 38632: {'lr': 6.151894201587824e-05, 'samples': 19780096, 'steps': 38632, 'batch_loss/train': 0.7907201701309532} +12/28/2021 01:16:13 - INFO - codeparrot_training - Step 38633: {'lr': 6.150858662015563e-05, 'samples': 19780608, 'steps': 38633, 'batch_loss/train': 0.7726617471198551} +12/28/2021 01:16:24 - INFO - codeparrot_training - Step 38634: {'lr': 6.149823197380369e-05, 'samples': 19781120, 'steps': 38634, 'batch_loss/train': 0.6272670495673083} +12/28/2021 01:16:34 - INFO - codeparrot_training - Step 38635: {'lr': 6.148787807686362e-05, 'samples': 19781632, 'steps': 38635, 'batch_loss/train': 0.7532835580641404} +12/28/2021 01:16:48 - INFO - codeparrot_training - Step 38636: {'lr': 6.147752492937656e-05, 'samples': 19782144, 'steps': 38636, 'batch_loss/train': 0.661607435438782} +12/28/2021 01:16:59 - INFO - codeparrot_training - Step 38637: {'lr': 6.146717253138367e-05, 'samples': 19782656, 'steps': 38637, 'batch_loss/train': 0.6417313073761761} +12/28/2021 01:17:10 - INFO - codeparrot_training - Step 38638: {'lr': 6.145682088292614e-05, 'samples': 19783168, 'steps': 38638, 'batch_loss/train': 0.7465582666918635} +12/28/2021 01:17:20 - INFO - codeparrot_training - Step 38639: {'lr': 6.144646998404515e-05, 'samples': 19783680, 'steps': 38639, 'batch_loss/train': 0.7103332779370248} +12/28/2021 01:17:32 - INFO - codeparrot_training - Step 38640: {'lr': 6.143611983478165e-05, 'samples': 19784192, 'steps': 38640, 'batch_loss/train': 0.7732701641507447} +12/28/2021 01:17:43 - INFO - codeparrot_training - Step 38641: {'lr': 6.142577043517703e-05, 'samples': 19784704, 'steps': 38641, 'batch_loss/train': 0.7469205399975181} +12/28/2021 01:17:53 - INFO - codeparrot_training - Step 38642: {'lr': 6.14154217852724e-05, 'samples': 19785216, 'steps': 38642, 'batch_loss/train': 0.7541529033333063} +12/28/2021 01:18:06 - INFO - codeparrot_training - Step 38643: {'lr': 6.140507388510879e-05, 'samples': 19785728, 'steps': 38643, 'batch_loss/train': 0.787351957987994} +12/28/2021 01:18:16 - INFO - codeparrot_training - Step 38644: {'lr': 6.139472673472732e-05, 'samples': 19786240, 'steps': 38644, 'batch_loss/train': 0.9939497150480747} +12/28/2021 01:18:27 - INFO - codeparrot_training - Step 38645: {'lr': 6.138438033416932e-05, 'samples': 19786752, 'steps': 38645, 'batch_loss/train': 0.7314211265183985} +12/28/2021 01:18:39 - INFO - codeparrot_training - Step 38646: {'lr': 6.137403468347578e-05, 'samples': 19787264, 'steps': 38646, 'batch_loss/train': 0.727678531780839} +12/28/2021 01:18:50 - INFO - codeparrot_training - Step 38647: {'lr': 6.13636897826878e-05, 'samples': 19787776, 'steps': 38647, 'batch_loss/train': 0.6394073537085205} +12/28/2021 01:19:01 - INFO - codeparrot_training - Step 38648: {'lr': 6.135334563184661e-05, 'samples': 19788288, 'steps': 38648, 'batch_loss/train': 0.7118160864338279} +12/28/2021 01:19:11 - INFO - codeparrot_training - Step 38649: {'lr': 6.134300223099326e-05, 'samples': 19788800, 'steps': 38649, 'batch_loss/train': 0.6039572429144755} +12/28/2021 01:19:26 - INFO - codeparrot_training - Step 38650: {'lr': 6.133265958016893e-05, 'samples': 19789312, 'steps': 38650, 'batch_loss/train': 0.8268266348168254} +12/28/2021 01:19:36 - INFO - codeparrot_training - Step 38651: {'lr': 6.132231767941468e-05, 'samples': 19789824, 'steps': 38651, 'batch_loss/train': 0.690887127420865} +12/28/2021 01:19:47 - INFO - codeparrot_training - Step 38652: {'lr': 6.131197652877166e-05, 'samples': 19790336, 'steps': 38652, 'batch_loss/train': 0.6855646129697561} +12/28/2021 01:19:59 - INFO - codeparrot_training - Step 38653: {'lr': 6.130163612828097e-05, 'samples': 19790848, 'steps': 38653, 'batch_loss/train': 0.8310847263783216} +12/28/2021 01:20:09 - INFO - codeparrot_training - Step 38654: {'lr': 6.129129647798381e-05, 'samples': 19791360, 'steps': 38654, 'batch_loss/train': 0.7113352000014856} +12/28/2021 01:20:20 - INFO - codeparrot_training - Step 38655: {'lr': 6.128095757792105e-05, 'samples': 19791872, 'steps': 38655, 'batch_loss/train': 0.7063161362893879} +12/28/2021 01:20:34 - INFO - codeparrot_training - Step 38656: {'lr': 6.127061942813409e-05, 'samples': 19792384, 'steps': 38656, 'batch_loss/train': 0.6901949718594551} +12/28/2021 01:20:45 - INFO - codeparrot_training - Step 38657: {'lr': 6.126028202866379e-05, 'samples': 19792896, 'steps': 38657, 'batch_loss/train': 0.7430054023861885} +12/28/2021 01:20:55 - INFO - codeparrot_training - Step 38658: {'lr': 6.124994537955139e-05, 'samples': 19793408, 'steps': 38658, 'batch_loss/train': 0.7306178510189056} +12/28/2021 01:21:07 - INFO - codeparrot_training - Step 38659: {'lr': 6.12396094808379e-05, 'samples': 19793920, 'steps': 38659, 'batch_loss/train': 0.7673916202038527} +12/28/2021 01:21:18 - INFO - codeparrot_training - Step 38660: {'lr': 6.122927433256448e-05, 'samples': 19794432, 'steps': 38660, 'batch_loss/train': 0.8460995032219216} +12/28/2021 01:21:29 - INFO - codeparrot_training - Step 38661: {'lr': 6.121893993477215e-05, 'samples': 19794944, 'steps': 38661, 'batch_loss/train': 0.6821749382652342} +12/28/2021 01:21:39 - INFO - codeparrot_training - Step 38662: {'lr': 6.120860628750206e-05, 'samples': 19795456, 'steps': 38662, 'batch_loss/train': 0.7751642018556595} +12/28/2021 01:21:51 - INFO - codeparrot_training - Step 38663: {'lr': 6.119827339079525e-05, 'samples': 19795968, 'steps': 38663, 'batch_loss/train': 0.6022106766758952} +12/28/2021 01:22:02 - INFO - codeparrot_training - Step 38664: {'lr': 6.118794124469285e-05, 'samples': 19796480, 'steps': 38664, 'batch_loss/train': 0.7627492374740541} +12/28/2021 01:22:13 - INFO - codeparrot_training - Step 38665: {'lr': 6.117760984923596e-05, 'samples': 19796992, 'steps': 38665, 'batch_loss/train': 0.7323533548042178} +12/28/2021 01:22:27 - INFO - codeparrot_training - Step 38666: {'lr': 6.116727920446543e-05, 'samples': 19797504, 'steps': 38666, 'batch_loss/train': 0.6996463621035218} +12/28/2021 01:22:37 - INFO - codeparrot_training - Step 38667: {'lr': 6.11569493104226e-05, 'samples': 19798016, 'steps': 38667, 'batch_loss/train': 0.6231292639859021} +12/28/2021 01:22:48 - INFO - codeparrot_training - Step 38668: {'lr': 6.114662016714847e-05, 'samples': 19798528, 'steps': 38668, 'batch_loss/train': 0.8087078053504229} +12/28/2021 01:23:00 - INFO - codeparrot_training - Step 38669: {'lr': 6.113629177468402e-05, 'samples': 19799040, 'steps': 38669, 'batch_loss/train': 0.6839957822812721} +12/28/2021 01:23:11 - INFO - codeparrot_training - Step 38670: {'lr': 6.11259641330703e-05, 'samples': 19799552, 'steps': 38670, 'batch_loss/train': 0.8175992406904697} +12/28/2021 01:23:21 - INFO - codeparrot_training - Step 38671: {'lr': 6.111563724234856e-05, 'samples': 19800064, 'steps': 38671, 'batch_loss/train': 0.6570698505965993} +12/28/2021 01:23:32 - INFO - codeparrot_training - Step 38672: {'lr': 6.110531110255965e-05, 'samples': 19800576, 'steps': 38672, 'batch_loss/train': 0.665431709960103} +12/28/2021 01:23:46 - INFO - codeparrot_training - Step 38673: {'lr': 6.109498571374473e-05, 'samples': 19801088, 'steps': 38673, 'batch_loss/train': 0.782311346847564} +12/28/2021 01:23:57 - INFO - codeparrot_training - Step 38674: {'lr': 6.108466107594479e-05, 'samples': 19801600, 'steps': 38674, 'batch_loss/train': 0.6504009030759335} +12/28/2021 01:24:07 - INFO - codeparrot_training - Step 38675: {'lr': 6.107433718920091e-05, 'samples': 19802112, 'steps': 38675, 'batch_loss/train': 0.7596104890108109} +12/28/2021 01:24:20 - INFO - codeparrot_training - Step 38676: {'lr': 6.106401405355413e-05, 'samples': 19802624, 'steps': 38676, 'batch_loss/train': 0.7339260862208903} +12/28/2021 01:24:30 - INFO - codeparrot_training - Step 38677: {'lr': 6.105369166904551e-05, 'samples': 19803136, 'steps': 38677, 'batch_loss/train': 0.6118829536717385} +12/28/2021 01:24:41 - INFO - codeparrot_training - Step 38678: {'lr': 6.104337003571602e-05, 'samples': 19803648, 'steps': 38678, 'batch_loss/train': 0.7366061434149742} +12/28/2021 01:24:53 - INFO - codeparrot_training - Step 38679: {'lr': 6.10330491536068e-05, 'samples': 19804160, 'steps': 38679, 'batch_loss/train': 0.7410623910836875} +12/28/2021 01:25:04 - INFO - codeparrot_training - Step 38680: {'lr': 6.1022729022758874e-05, 'samples': 19804672, 'steps': 38680, 'batch_loss/train': 0.7859787247143686} +12/28/2021 01:25:14 - INFO - codeparrot_training - Step 38681: {'lr': 6.1012409643213086e-05, 'samples': 19805184, 'steps': 38681, 'batch_loss/train': 0.7075341688469052} +12/28/2021 01:25:25 - INFO - codeparrot_training - Step 38682: {'lr': 6.100209101501064e-05, 'samples': 19805696, 'steps': 38682, 'batch_loss/train': 0.6429586578160524} +12/28/2021 01:25:37 - INFO - codeparrot_training - Step 38683: {'lr': 6.0991773138192626e-05, 'samples': 19806208, 'steps': 38683, 'batch_loss/train': 0.6732246205210686} +12/28/2021 01:25:48 - INFO - codeparrot_training - Step 38684: {'lr': 6.0981456012799866e-05, 'samples': 19806720, 'steps': 38684, 'batch_loss/train': 0.7092171756085008} +12/28/2021 01:25:58 - INFO - codeparrot_training - Step 38685: {'lr': 6.0971139638873465e-05, 'samples': 19807232, 'steps': 38685, 'batch_loss/train': 0.7943690679967403} +12/28/2021 01:26:12 - INFO - codeparrot_training - Step 38686: {'lr': 6.096082401645445e-05, 'samples': 19807744, 'steps': 38686, 'batch_loss/train': 0.6991420686244965} +12/28/2021 01:26:23 - INFO - codeparrot_training - Step 38687: {'lr': 6.095050914558381e-05, 'samples': 19808256, 'steps': 38687, 'batch_loss/train': 0.727948535233736} +12/28/2021 01:26:33 - INFO - codeparrot_training - Step 38688: {'lr': 6.0940195026302544e-05, 'samples': 19808768, 'steps': 38688, 'batch_loss/train': 0.7923706704750657} +12/28/2021 01:26:46 - INFO - codeparrot_training - Step 38689: {'lr': 6.092988165865171e-05, 'samples': 19809280, 'steps': 38689, 'batch_loss/train': 0.7392986039631069} +12/28/2021 01:26:57 - INFO - codeparrot_training - Step 38690: {'lr': 6.091956904267226e-05, 'samples': 19809792, 'steps': 38690, 'batch_loss/train': 0.6783370021730661} +12/28/2021 01:27:07 - INFO - codeparrot_training - Step 38691: {'lr': 6.090925717840528e-05, 'samples': 19810304, 'steps': 38691, 'batch_loss/train': 0.7014932986348867} +12/28/2021 01:27:18 - INFO - codeparrot_training - Step 38692: {'lr': 6.089894606589158e-05, 'samples': 19810816, 'steps': 38692, 'batch_loss/train': 0.791485152207315} +12/28/2021 01:27:30 - INFO - codeparrot_training - Step 38693: {'lr': 6.088863570517231e-05, 'samples': 19811328, 'steps': 38693, 'batch_loss/train': 0.7008618488907814} +12/28/2021 01:27:41 - INFO - codeparrot_training - Step 38694: {'lr': 6.087832609628849e-05, 'samples': 19811840, 'steps': 38694, 'batch_loss/train': 0.7659913748502731} +12/28/2021 01:27:51 - INFO - codeparrot_training - Step 38695: {'lr': 6.086801723928098e-05, 'samples': 19812352, 'steps': 38695, 'batch_loss/train': 0.6519919675774872} +12/28/2021 01:28:05 - INFO - codeparrot_training - Step 38696: {'lr': 6.0857709134190723e-05, 'samples': 19812864, 'steps': 38696, 'batch_loss/train': 0.7178121310425922} +12/28/2021 01:28:16 - INFO - codeparrot_training - Step 38697: {'lr': 6.0847401781058966e-05, 'samples': 19813376, 'steps': 38697, 'batch_loss/train': 0.7392503833398223} +12/28/2021 01:28:27 - INFO - codeparrot_training - Step 38698: {'lr': 6.083709517992644e-05, 'samples': 19813888, 'steps': 38698, 'batch_loss/train': 0.7068426087498665} +12/28/2021 01:28:39 - INFO - codeparrot_training - Step 38699: {'lr': 6.082678933083421e-05, 'samples': 19814400, 'steps': 38699, 'batch_loss/train': 0.744864453561604} +12/28/2021 01:28:49 - INFO - codeparrot_training - Step 38700: {'lr': 6.0816484233823214e-05, 'samples': 19814912, 'steps': 38700, 'batch_loss/train': 0.7873950025532395} +12/28/2021 01:29:00 - INFO - codeparrot_training - Step 38701: {'lr': 6.0806179888934444e-05, 'samples': 19815424, 'steps': 38701, 'batch_loss/train': 0.6544127650558949} +12/28/2021 01:29:11 - INFO - codeparrot_training - Step 38702: {'lr': 6.079587629620889e-05, 'samples': 19815936, 'steps': 38702, 'batch_loss/train': 0.6764645967632532} +12/28/2021 01:29:23 - INFO - codeparrot_training - Step 38703: {'lr': 6.078557345568747e-05, 'samples': 19816448, 'steps': 38703, 'batch_loss/train': 0.7368596973828971} +12/28/2021 01:29:33 - INFO - codeparrot_training - Step 38704: {'lr': 6.077527136741118e-05, 'samples': 19816960, 'steps': 38704, 'batch_loss/train': 0.636621936224401} +12/28/2021 01:29:44 - INFO - codeparrot_training - Step 38705: {'lr': 6.0764970031420955e-05, 'samples': 19817472, 'steps': 38705, 'batch_loss/train': 0.72135567269288} +12/28/2021 01:29:58 - INFO - codeparrot_training - Step 38706: {'lr': 6.0754669447757816e-05, 'samples': 19817984, 'steps': 38706, 'batch_loss/train': 0.5955868819728494} +12/28/2021 01:30:09 - INFO - codeparrot_training - Step 38707: {'lr': 6.074436961646254e-05, 'samples': 19818496, 'steps': 38707, 'batch_loss/train': 0.7253710445947945} +12/28/2021 01:30:19 - INFO - codeparrot_training - Step 38708: {'lr': 6.073407053757623e-05, 'samples': 19819008, 'steps': 38708, 'batch_loss/train': 0.8302673622965813} +12/28/2021 01:30:31 - INFO - codeparrot_training - Step 38709: {'lr': 6.0723772211139896e-05, 'samples': 19819520, 'steps': 38709, 'batch_loss/train': 0.7382979956455529} +12/28/2021 01:30:42 - INFO - codeparrot_training - Step 38710: {'lr': 6.0713474637194287e-05, 'samples': 19820032, 'steps': 38710, 'batch_loss/train': 0.9048844929784536} +12/28/2021 01:30:53 - INFO - codeparrot_training - Step 38711: {'lr': 6.0703177815780415e-05, 'samples': 19820544, 'steps': 38711, 'batch_loss/train': 0.7959080226719379} +12/28/2021 01:31:03 - INFO - codeparrot_training - Step 38712: {'lr': 6.0692881746939265e-05, 'samples': 19821056, 'steps': 38712, 'batch_loss/train': 0.7648805100470781} +12/28/2021 01:31:17 - INFO - codeparrot_training - Step 38713: {'lr': 6.06825864307117e-05, 'samples': 19821568, 'steps': 38713, 'batch_loss/train': 0.7431241874583066} +12/28/2021 01:31:28 - INFO - codeparrot_training - Step 38714: {'lr': 6.067229186713871e-05, 'samples': 19822080, 'steps': 38714, 'batch_loss/train': 0.7053918289020658} +12/28/2021 01:31:39 - INFO - codeparrot_training - Step 38715: {'lr': 6.066199805626121e-05, 'samples': 19822592, 'steps': 38715, 'batch_loss/train': 0.7808208608767018} +12/28/2021 01:31:51 - INFO - codeparrot_training - Step 38716: {'lr': 6.065170499812009e-05, 'samples': 19823104, 'steps': 38716, 'batch_loss/train': 0.8255712119862437} +12/28/2021 01:32:01 - INFO - codeparrot_training - Step 38717: {'lr': 6.064141269275633e-05, 'samples': 19823616, 'steps': 38717, 'batch_loss/train': 0.688257472589612} +12/28/2021 01:32:12 - INFO - codeparrot_training - Step 38718: {'lr': 6.0631121140210774e-05, 'samples': 19824128, 'steps': 38718, 'batch_loss/train': 0.7936273562954739} +12/28/2021 01:32:24 - INFO - codeparrot_training - Step 38719: {'lr': 6.0620830340524405e-05, 'samples': 19824640, 'steps': 38719, 'batch_loss/train': 0.7717887260951102} +12/28/2021 01:32:35 - INFO - codeparrot_training - Step 38720: {'lr': 6.0610540293738166e-05, 'samples': 19825152, 'steps': 38720, 'batch_loss/train': 1.276128716301173} +12/28/2021 01:32:46 - INFO - codeparrot_training - Step 38721: {'lr': 6.0600250999892856e-05, 'samples': 19825664, 'steps': 38721, 'batch_loss/train': 1.0045360807562247} +12/28/2021 01:33:00 - INFO - codeparrot_training - Step 38722: {'lr': 6.058996245902934e-05, 'samples': 19826176, 'steps': 38722, 'batch_loss/train': 0.7333298311568797} +12/28/2021 01:33:11 - INFO - codeparrot_training - Step 38723: {'lr': 6.057967467118877e-05, 'samples': 19826688, 'steps': 38723, 'batch_loss/train': 1.7297862078994513} +12/28/2021 01:33:21 - INFO - codeparrot_training - Step 38724: {'lr': 6.0569387636411816e-05, 'samples': 19827200, 'steps': 38724, 'batch_loss/train': 0.7518463972955942} +12/28/2021 01:33:32 - INFO - codeparrot_training - Step 38725: {'lr': 6.055910135473938e-05, 'samples': 19827712, 'steps': 38725, 'batch_loss/train': 0.7472869753837585} +12/28/2021 01:33:44 - INFO - codeparrot_training - Step 38726: {'lr': 6.054881582621255e-05, 'samples': 19828224, 'steps': 38726, 'batch_loss/train': 0.7217438519001007} +12/28/2021 01:33:55 - INFO - codeparrot_training - Step 38727: {'lr': 6.053853105087206e-05, 'samples': 19828736, 'steps': 38727, 'batch_loss/train': 0.7655802257359028} +12/28/2021 01:34:05 - INFO - codeparrot_training - Step 38728: {'lr': 6.052824702875881e-05, 'samples': 19829248, 'steps': 38728, 'batch_loss/train': 0.7964593444485217} +12/28/2021 01:34:17 - INFO - codeparrot_training - Step 38729: {'lr': 6.0517963759913695e-05, 'samples': 19829760, 'steps': 38729, 'batch_loss/train': 0.7451753828208894} +12/28/2021 01:34:28 - INFO - codeparrot_training - Step 38730: {'lr': 6.0507681244377636e-05, 'samples': 19830272, 'steps': 38730, 'batch_loss/train': 0.7729161984752864} +12/28/2021 01:34:39 - INFO - codeparrot_training - Step 38731: {'lr': 6.049739948219149e-05, 'samples': 19830784, 'steps': 38731, 'batch_loss/train': 0.6521641805302352} +12/28/2021 01:34:49 - INFO - codeparrot_training - Step 38732: {'lr': 6.048711847339619e-05, 'samples': 19831296, 'steps': 38732, 'batch_loss/train': 0.8994287014938891} +12/28/2021 01:35:01 - INFO - codeparrot_training - Step 38733: {'lr': 6.047683821803241e-05, 'samples': 19831808, 'steps': 38733, 'batch_loss/train': 0.7558269817382097} +12/28/2021 01:35:12 - INFO - codeparrot_training - Step 38734: {'lr': 6.046655871614121e-05, 'samples': 19832320, 'steps': 38734, 'batch_loss/train': 0.9024644764140248} +12/28/2021 01:35:23 - INFO - codeparrot_training - Step 38735: {'lr': 6.045627996776351e-05, 'samples': 19832832, 'steps': 38735, 'batch_loss/train': 0.808067130856216} +12/28/2021 01:35:37 - INFO - codeparrot_training - Step 38736: {'lr': 6.0446001972939976e-05, 'samples': 19833344, 'steps': 38736, 'batch_loss/train': 0.7319668701966293} +12/28/2021 01:35:48 - INFO - codeparrot_training - Step 38737: {'lr': 6.043572473171158e-05, 'samples': 19833856, 'steps': 38737, 'batch_loss/train': 0.8679178757593036} +12/28/2021 01:35:59 - INFO - codeparrot_training - Step 38738: {'lr': 6.042544824411916e-05, 'samples': 19834368, 'steps': 38738, 'batch_loss/train': 0.8553459160029888} +12/28/2021 01:36:09 - INFO - codeparrot_training - Step 38739: {'lr': 6.0415172510203595e-05, 'samples': 19834880, 'steps': 38739, 'batch_loss/train': 1.2584313983097672} +12/28/2021 01:36:21 - INFO - codeparrot_training - Step 38740: {'lr': 6.040489753000569e-05, 'samples': 19835392, 'steps': 38740, 'batch_loss/train': 0.8857108657248318} +12/28/2021 01:36:32 - INFO - codeparrot_training - Step 38741: {'lr': 6.039462330356635e-05, 'samples': 19835904, 'steps': 38741, 'batch_loss/train': 0.648717840667814} +12/28/2021 01:36:43 - INFO - codeparrot_training - Step 38742: {'lr': 6.038434983092636e-05, 'samples': 19836416, 'steps': 38742, 'batch_loss/train': 0.8296328405849636} +12/28/2021 01:36:57 - INFO - codeparrot_training - Step 38743: {'lr': 6.037407711212664e-05, 'samples': 19836928, 'steps': 38743, 'batch_loss/train': 0.7138881469145417} +12/28/2021 01:37:08 - INFO - codeparrot_training - Step 38744: {'lr': 6.036380514720796e-05, 'samples': 19837440, 'steps': 38744, 'batch_loss/train': 0.7651556637138128} +12/28/2021 01:37:19 - INFO - codeparrot_training - Step 38745: {'lr': 6.0353533936211206e-05, 'samples': 19837952, 'steps': 38745, 'batch_loss/train': 1.3095074417069554} +12/28/2021 01:37:29 - INFO - codeparrot_training - Step 38746: {'lr': 6.0343263479177266e-05, 'samples': 19838464, 'steps': 38746, 'batch_loss/train': 0.7070842101238668} +12/28/2021 01:37:41 - INFO - codeparrot_training - Step 38747: {'lr': 6.0332993776146814e-05, 'samples': 19838976, 'steps': 38747, 'batch_loss/train': 0.7129671429283917} +12/28/2021 01:37:52 - INFO - codeparrot_training - Step 38748: {'lr': 6.03227248271607e-05, 'samples': 19839488, 'steps': 38748, 'batch_loss/train': 0.7917311107739806} +12/28/2021 01:38:03 - INFO - codeparrot_training - Step 38749: {'lr': 6.031245663225995e-05, 'samples': 19840000, 'steps': 38749, 'batch_loss/train': 0.8059442164376378} +12/28/2021 01:38:15 - INFO - codeparrot_training - Step 38750: {'lr': 6.030218919148517e-05, 'samples': 19840512, 'steps': 38750, 'batch_loss/train': 0.6573063067626208} +12/28/2021 01:38:25 - INFO - codeparrot_training - Step 38751: {'lr': 6.029192250487719e-05, 'samples': 19841024, 'steps': 38751, 'batch_loss/train': 0.6295698472531512} +12/28/2021 01:38:36 - INFO - codeparrot_training - Step 38752: {'lr': 6.028165657247706e-05, 'samples': 19841536, 'steps': 38752, 'batch_loss/train': 0.8329634480178356} +12/28/2021 01:38:49 - INFO - codeparrot_training - Step 38753: {'lr': 6.027139139432533e-05, 'samples': 19842048, 'steps': 38753, 'batch_loss/train': 0.7431095178471878} +12/28/2021 01:39:00 - INFO - codeparrot_training - Step 38754: {'lr': 6.026112697046293e-05, 'samples': 19842560, 'steps': 38754, 'batch_loss/train': 0.711303245741874} +12/28/2021 01:39:11 - INFO - codeparrot_training - Step 38755: {'lr': 6.025086330093063e-05, 'samples': 19843072, 'steps': 38755, 'batch_loss/train': 0.7635028371587396} +12/28/2021 01:39:23 - INFO - codeparrot_training - Step 38756: {'lr': 6.0240600385769284e-05, 'samples': 19843584, 'steps': 38756, 'batch_loss/train': 0.7833646675571799} +12/28/2021 01:39:34 - INFO - codeparrot_training - Step 38757: {'lr': 6.023033822501961e-05, 'samples': 19844096, 'steps': 38757, 'batch_loss/train': 0.7038839519955218} +12/28/2021 01:39:44 - INFO - codeparrot_training - Step 38758: {'lr': 6.0220076818722576e-05, 'samples': 19844608, 'steps': 38758, 'batch_loss/train': 0.7483367726672441} +12/28/2021 01:39:55 - INFO - codeparrot_training - Step 38759: {'lr': 6.0209816166918675e-05, 'samples': 19845120, 'steps': 38759, 'batch_loss/train': 0.6035371313337237} +12/28/2021 01:40:08 - INFO - codeparrot_training - Step 38760: {'lr': 6.0199556269648987e-05, 'samples': 19845632, 'steps': 38760, 'batch_loss/train': 0.578038819367066} +12/28/2021 01:40:18 - INFO - codeparrot_training - Step 38761: {'lr': 6.0189297126954255e-05, 'samples': 19846144, 'steps': 38761, 'batch_loss/train': 0.6332105241017416} +12/28/2021 01:40:29 - INFO - codeparrot_training - Step 38762: {'lr': 6.0179038738875065e-05, 'samples': 19846656, 'steps': 38762, 'batch_loss/train': 0.9965695794671774} +12/28/2021 01:40:43 - INFO - codeparrot_training - Step 38763: {'lr': 6.0168781105452404e-05, 'samples': 19847168, 'steps': 38763, 'batch_loss/train': 0.7301886667264625} +12/28/2021 01:40:54 - INFO - codeparrot_training - Step 38764: {'lr': 6.015852422672707e-05, 'samples': 19847680, 'steps': 38764, 'batch_loss/train': 0.6989765977486968} +12/28/2021 01:41:04 - INFO - codeparrot_training - Step 38765: {'lr': 6.014826810273971e-05, 'samples': 19848192, 'steps': 38765, 'batch_loss/train': 0.7778428922174498} +12/28/2021 01:41:15 - INFO - codeparrot_training - Step 38766: {'lr': 6.013801273353115e-05, 'samples': 19848704, 'steps': 38766, 'batch_loss/train': 0.7406296054832637} +12/28/2021 01:41:27 - INFO - codeparrot_training - Step 38767: {'lr': 6.0127758119142146e-05, 'samples': 19849216, 'steps': 38767, 'batch_loss/train': 0.8596357628703117} +12/28/2021 01:41:38 - INFO - codeparrot_training - Step 38768: {'lr': 6.0117504259613456e-05, 'samples': 19849728, 'steps': 38768, 'batch_loss/train': 0.7078367155045271} +12/28/2021 01:41:48 - INFO - codeparrot_training - Step 38769: {'lr': 6.0107251154985913e-05, 'samples': 19850240, 'steps': 38769, 'batch_loss/train': 0.7544995984062552} +12/28/2021 01:42:00 - INFO - codeparrot_training - Step 38770: {'lr': 6.009699880530023e-05, 'samples': 19850752, 'steps': 38770, 'batch_loss/train': 0.7930959332734346} +12/28/2021 01:42:11 - INFO - codeparrot_training - Step 38771: {'lr': 6.0086747210597175e-05, 'samples': 19851264, 'steps': 38771, 'batch_loss/train': 0.7301190867146943} +12/28/2021 01:42:21 - INFO - codeparrot_training - Step 38772: {'lr': 6.007649637091758e-05, 'samples': 19851776, 'steps': 38772, 'batch_loss/train': 0.7858914257958531} +12/28/2021 01:42:35 - INFO - codeparrot_training - Step 38773: {'lr': 6.0066246286302025e-05, 'samples': 19852288, 'steps': 38773, 'batch_loss/train': 0.813948676455766} +12/28/2021 01:42:46 - INFO - codeparrot_training - Step 38774: {'lr': 6.005599695679131e-05, 'samples': 19852800, 'steps': 38774, 'batch_loss/train': 0.7616707002744079} +12/28/2021 01:42:57 - INFO - codeparrot_training - Step 38775: {'lr': 6.004574838242638e-05, 'samples': 19853312, 'steps': 38775, 'batch_loss/train': 0.7692202758044004} +12/28/2021 01:43:09 - INFO - codeparrot_training - Step 38776: {'lr': 6.003550056324775e-05, 'samples': 19853824, 'steps': 38776, 'batch_loss/train': 0.7593022435903549} +12/28/2021 01:43:19 - INFO - codeparrot_training - Step 38777: {'lr': 6.002525349929616e-05, 'samples': 19854336, 'steps': 38777, 'batch_loss/train': 0.7605140879750252} +12/28/2021 01:43:30 - INFO - codeparrot_training - Step 38778: {'lr': 6.0015007190612594e-05, 'samples': 19854848, 'steps': 38778, 'batch_loss/train': 0.7254862524569035} +12/28/2021 01:43:41 - INFO - codeparrot_training - Step 38779: {'lr': 6.000476163723753e-05, 'samples': 19855360, 'steps': 38779, 'batch_loss/train': 0.6624912023544312} +12/28/2021 01:43:53 - INFO - codeparrot_training - Step 38780: {'lr': 5.999451683921181e-05, 'samples': 19855872, 'steps': 38780, 'batch_loss/train': 0.6577369277365506} +12/28/2021 01:44:03 - INFO - codeparrot_training - Step 38781: {'lr': 5.998427279657615e-05, 'samples': 19856384, 'steps': 38781, 'batch_loss/train': 0.6859686067327857} +12/28/2021 01:44:14 - INFO - codeparrot_training - Step 38782: {'lr': 5.997402950937125e-05, 'samples': 19856896, 'steps': 38782, 'batch_loss/train': 0.7310810433700681} +12/28/2021 01:44:28 - INFO - codeparrot_training - Step 38783: {'lr': 5.996378697763788e-05, 'samples': 19857408, 'steps': 38783, 'batch_loss/train': 0.7760753370821476} +12/28/2021 01:44:38 - INFO - codeparrot_training - Step 38784: {'lr': 5.995354520141682e-05, 'samples': 19857920, 'steps': 38784, 'batch_loss/train': 0.7425950631732121} +12/28/2021 01:44:49 - INFO - codeparrot_training - Step 38785: {'lr': 5.994330418074856e-05, 'samples': 19858432, 'steps': 38785, 'batch_loss/train': 0.7254392034374177} +12/28/2021 01:45:01 - INFO - codeparrot_training - Step 38786: {'lr': 5.993306391567402e-05, 'samples': 19858944, 'steps': 38786, 'batch_loss/train': 0.7204031131113879} +12/28/2021 01:45:12 - INFO - codeparrot_training - Step 38787: {'lr': 5.992282440623389e-05, 'samples': 19859456, 'steps': 38787, 'batch_loss/train': 0.585219016764313} +12/28/2021 01:45:23 - INFO - codeparrot_training - Step 38788: {'lr': 5.991258565246874e-05, 'samples': 19859968, 'steps': 38788, 'batch_loss/train': 0.7677024444565177} +12/28/2021 01:45:33 - INFO - codeparrot_training - Step 38789: {'lr': 5.990234765441943e-05, 'samples': 19860480, 'steps': 38789, 'batch_loss/train': 0.7853798884898424} +12/28/2021 01:45:47 - INFO - codeparrot_training - Step 38790: {'lr': 5.9892110412126695e-05, 'samples': 19860992, 'steps': 38790, 'batch_loss/train': 0.7907055742107332} +12/28/2021 01:45:58 - INFO - codeparrot_training - Step 38791: {'lr': 5.988187392563107e-05, 'samples': 19861504, 'steps': 38791, 'batch_loss/train': 1.6244096769951284} +12/28/2021 01:46:09 - INFO - codeparrot_training - Step 38792: {'lr': 5.987163819497332e-05, 'samples': 19862016, 'steps': 38792, 'batch_loss/train': 0.8136013529729098} +12/28/2021 01:46:21 - INFO - codeparrot_training - Step 38793: {'lr': 5.986140322019415e-05, 'samples': 19862528, 'steps': 38793, 'batch_loss/train': 0.7031417516991496} +12/28/2021 01:46:31 - INFO - codeparrot_training - Step 38794: {'lr': 5.985116900133425e-05, 'samples': 19863040, 'steps': 38794, 'batch_loss/train': 0.7314954306930304} +12/28/2021 01:46:42 - INFO - codeparrot_training - Step 38795: {'lr': 5.984093553843431e-05, 'samples': 19863552, 'steps': 38795, 'batch_loss/train': 0.8289083261042833} +12/28/2021 01:46:54 - INFO - codeparrot_training - Step 38796: {'lr': 5.9830702831535e-05, 'samples': 19864064, 'steps': 38796, 'batch_loss/train': 0.7228858014568686} +12/28/2021 01:47:05 - INFO - codeparrot_training - Step 38797: {'lr': 5.982047088067702e-05, 'samples': 19864576, 'steps': 38797, 'batch_loss/train': 0.7555120307952166} +12/28/2021 01:47:15 - INFO - codeparrot_training - Step 38798: {'lr': 5.981023968590102e-05, 'samples': 19865088, 'steps': 38798, 'batch_loss/train': 0.7590641397982836} +12/28/2021 01:47:26 - INFO - codeparrot_training - Step 38799: {'lr': 5.980000924724771e-05, 'samples': 19865600, 'steps': 38799, 'batch_loss/train': 1.2569686165079474} +12/28/2021 01:47:39 - INFO - codeparrot_training - Step 38800: {'lr': 5.978977956475773e-05, 'samples': 19866112, 'steps': 38800, 'batch_loss/train': 0.933321843855083} +12/28/2021 01:47:49 - INFO - codeparrot_training - Step 38801: {'lr': 5.977955063847185e-05, 'samples': 19866624, 'steps': 38801, 'batch_loss/train': 0.7414567391388118} +12/28/2021 01:48:00 - INFO - codeparrot_training - Step 38802: {'lr': 5.976932246843059e-05, 'samples': 19867136, 'steps': 38802, 'batch_loss/train': 0.6423656221013516} +12/28/2021 01:48:12 - INFO - codeparrot_training - Step 38803: {'lr': 5.975909505467458e-05, 'samples': 19867648, 'steps': 38803, 'batch_loss/train': 0.7694938462227583} +12/28/2021 01:48:23 - INFO - codeparrot_training - Step 38804: {'lr': 5.974886839724475e-05, 'samples': 19868160, 'steps': 38804, 'batch_loss/train': 0.7471473291516304} +12/28/2021 01:48:33 - INFO - codeparrot_training - Step 38805: {'lr': 5.973864249618149e-05, 'samples': 19868672, 'steps': 38805, 'batch_loss/train': 0.811741316691041} +12/28/2021 01:48:45 - INFO - codeparrot_training - Step 38806: {'lr': 5.972841735152554e-05, 'samples': 19869184, 'steps': 38806, 'batch_loss/train': 0.7464410702232271} +12/28/2021 01:48:56 - INFO - codeparrot_training - Step 38807: {'lr': 5.971819296331757e-05, 'samples': 19869696, 'steps': 38807, 'batch_loss/train': 0.7077734469203278} +12/28/2021 01:49:06 - INFO - codeparrot_training - Step 38808: {'lr': 5.970796933159822e-05, 'samples': 19870208, 'steps': 38808, 'batch_loss/train': 0.7795674952794798} +12/28/2021 01:49:17 - INFO - codeparrot_training - Step 38809: {'lr': 5.9697746456408134e-05, 'samples': 19870720, 'steps': 38809, 'batch_loss/train': 0.7233541025780141} +12/28/2021 01:49:30 - INFO - codeparrot_training - Step 38810: {'lr': 5.968752433778801e-05, 'samples': 19871232, 'steps': 38810, 'batch_loss/train': 0.6496308841742575} +12/28/2021 01:49:40 - INFO - codeparrot_training - Step 38811: {'lr': 5.967730297577831e-05, 'samples': 19871744, 'steps': 38811, 'batch_loss/train': 0.772645550314337} +12/28/2021 01:49:51 - INFO - codeparrot_training - Step 38812: {'lr': 5.966708237041985e-05, 'samples': 19872256, 'steps': 38812, 'batch_loss/train': 0.7367213377729058} +12/28/2021 01:50:03 - INFO - codeparrot_training - Step 38813: {'lr': 5.9656862521753285e-05, 'samples': 19872768, 'steps': 38813, 'batch_loss/train': 0.7238905010744929} +12/28/2021 01:50:14 - INFO - codeparrot_training - Step 38814: {'lr': 5.9646643429818985e-05, 'samples': 19873280, 'steps': 38814, 'batch_loss/train': 0.7329961503855884} +12/28/2021 01:50:24 - INFO - codeparrot_training - Step 38815: {'lr': 5.963642509465786e-05, 'samples': 19873792, 'steps': 38815, 'batch_loss/train': 0.8072380991652608} +12/28/2021 01:50:35 - INFO - codeparrot_training - Step 38816: {'lr': 5.962620751631051e-05, 'samples': 19874304, 'steps': 38816, 'batch_loss/train': 0.7906304346397519} +12/28/2021 01:50:47 - INFO - codeparrot_training - Step 38817: {'lr': 5.961599069481738e-05, 'samples': 19874816, 'steps': 38817, 'batch_loss/train': 0.8015043156920001} +12/28/2021 01:50:58 - INFO - codeparrot_training - Step 38818: {'lr': 5.9605774630219184e-05, 'samples': 19875328, 'steps': 38818, 'batch_loss/train': 0.666825981810689} +12/28/2021 01:51:09 - INFO - codeparrot_training - Step 38819: {'lr': 5.959555932255656e-05, 'samples': 19875840, 'steps': 38819, 'batch_loss/train': 0.7791667096316814} +12/28/2021 01:51:21 - INFO - codeparrot_training - Step 38820: {'lr': 5.958534477187008e-05, 'samples': 19876352, 'steps': 38820, 'batch_loss/train': 0.7748953141272068} +12/28/2021 01:51:32 - INFO - codeparrot_training - Step 38821: {'lr': 5.9575130978200384e-05, 'samples': 19876864, 'steps': 38821, 'batch_loss/train': 0.6771548292599618} +12/28/2021 01:51:43 - INFO - codeparrot_training - Step 38822: {'lr': 5.956491794158805e-05, 'samples': 19877376, 'steps': 38822, 'batch_loss/train': 0.7230031164363027} +12/28/2021 01:51:55 - INFO - codeparrot_training - Step 38823: {'lr': 5.9554705662073716e-05, 'samples': 19877888, 'steps': 38823, 'batch_loss/train': 0.78884730162099} +12/28/2021 01:52:05 - INFO - codeparrot_training - Step 38824: {'lr': 5.9544494139697955e-05, 'samples': 19878400, 'steps': 38824, 'batch_loss/train': 1.381239715963602} +12/28/2021 01:52:16 - INFO - codeparrot_training - Step 38825: {'lr': 5.953428337450137e-05, 'samples': 19878912, 'steps': 38825, 'batch_loss/train': 0.6654192993883044} +12/28/2021 01:52:27 - INFO - codeparrot_training - Step 38826: {'lr': 5.9524073366524574e-05, 'samples': 19879424, 'steps': 38826, 'batch_loss/train': 0.8366665989160538} +12/28/2021 01:52:39 - INFO - codeparrot_training - Step 38827: {'lr': 5.951386411580817e-05, 'samples': 19879936, 'steps': 38827, 'batch_loss/train': 0.8525454485788941} +12/28/2021 01:52:49 - INFO - codeparrot_training - Step 38828: {'lr': 5.950365562239268e-05, 'samples': 19880448, 'steps': 38828, 'batch_loss/train': 0.7345959600061178} +12/28/2021 01:53:00 - INFO - codeparrot_training - Step 38829: {'lr': 5.9493447886318665e-05, 'samples': 19880960, 'steps': 38829, 'batch_loss/train': 0.7401075195521116} +12/28/2021 01:53:13 - INFO - codeparrot_training - Step 38830: {'lr': 5.948324090762688e-05, 'samples': 19881472, 'steps': 38830, 'batch_loss/train': 0.6799955039750785} +12/28/2021 01:53:24 - INFO - codeparrot_training - Step 38831: {'lr': 5.947303468635773e-05, 'samples': 19881984, 'steps': 38831, 'batch_loss/train': 0.7294881027191877} +12/28/2021 01:53:34 - INFO - codeparrot_training - Step 38832: {'lr': 5.9462829222551864e-05, 'samples': 19882496, 'steps': 38832, 'batch_loss/train': 0.7140337214805186} +12/28/2021 01:53:46 - INFO - codeparrot_training - Step 38833: {'lr': 5.945262451624983e-05, 'samples': 19883008, 'steps': 38833, 'batch_loss/train': 0.8686894020065665} +12/28/2021 01:53:57 - INFO - codeparrot_training - Step 38834: {'lr': 5.944242056749225e-05, 'samples': 19883520, 'steps': 38834, 'batch_loss/train': 0.7286101607605815} +12/28/2021 01:54:08 - INFO - codeparrot_training - Step 38835: {'lr': 5.9432217376319625e-05, 'samples': 19884032, 'steps': 38835, 'batch_loss/train': 0.7661595577374101} +12/28/2021 01:54:19 - INFO - codeparrot_training - Step 38836: {'lr': 5.942201494277255e-05, 'samples': 19884544, 'steps': 38836, 'batch_loss/train': 0.5893681399757043} +12/28/2021 01:54:30 - INFO - codeparrot_training - Step 38837: {'lr': 5.941181326689157e-05, 'samples': 19885056, 'steps': 38837, 'batch_loss/train': 0.8442475516349077} +12/28/2021 01:54:41 - INFO - codeparrot_training - Step 38838: {'lr': 5.9401612348717274e-05, 'samples': 19885568, 'steps': 38838, 'batch_loss/train': 0.6974439970217645} +12/28/2021 01:54:51 - INFO - codeparrot_training - Step 38839: {'lr': 5.9391412188290276e-05, 'samples': 19886080, 'steps': 38839, 'batch_loss/train': 0.6679541133344173} +12/28/2021 01:55:04 - INFO - codeparrot_training - Step 38840: {'lr': 5.9381212785650895e-05, 'samples': 19886592, 'steps': 38840, 'batch_loss/train': 0.611889187246561} +12/28/2021 01:55:15 - INFO - codeparrot_training - Step 38841: {'lr': 5.9371014140839925e-05, 'samples': 19887104, 'steps': 38841, 'batch_loss/train': 0.5553254275582731} +12/28/2021 01:55:26 - INFO - codeparrot_training - Step 38842: {'lr': 5.936081625389789e-05, 'samples': 19887616, 'steps': 38842, 'batch_loss/train': 0.7669845055788755} +12/28/2021 01:55:38 - INFO - codeparrot_training - Step 38843: {'lr': 5.93506191248652e-05, 'samples': 19888128, 'steps': 38843, 'batch_loss/train': 0.763154296670109} +12/28/2021 01:55:48 - INFO - codeparrot_training - Step 38844: {'lr': 5.934042275378238e-05, 'samples': 19888640, 'steps': 38844, 'batch_loss/train': 0.6971698282286525} +12/28/2021 01:55:59 - INFO - codeparrot_training - Step 38845: {'lr': 5.93302271406902e-05, 'samples': 19889152, 'steps': 38845, 'batch_loss/train': 0.653616669587791} +12/28/2021 01:56:11 - INFO - codeparrot_training - Step 38846: {'lr': 5.932003228562896e-05, 'samples': 19889664, 'steps': 38846, 'batch_loss/train': 0.7813394935801625} +12/28/2021 01:56:21 - INFO - codeparrot_training - Step 38847: {'lr': 5.9309838188639275e-05, 'samples': 19890176, 'steps': 38847, 'batch_loss/train': 0.7199290576390922} +12/28/2021 01:56:32 - INFO - codeparrot_training - Step 38848: {'lr': 5.9299644849761665e-05, 'samples': 19890688, 'steps': 38848, 'batch_loss/train': 0.7992496425285935} +12/28/2021 01:56:43 - INFO - codeparrot_training - Step 38849: {'lr': 5.9289452269036676e-05, 'samples': 19891200, 'steps': 38849, 'batch_loss/train': 0.726200407370925} +12/28/2021 01:56:55 - INFO - codeparrot_training - Step 38850: {'lr': 5.927926044650481e-05, 'samples': 19891712, 'steps': 38850, 'batch_loss/train': 0.7334647345705889} +12/28/2021 01:57:05 - INFO - codeparrot_training - Step 38851: {'lr': 5.926906938220661e-05, 'samples': 19892224, 'steps': 38851, 'batch_loss/train': 0.6966672316193581} +12/28/2021 01:57:16 - INFO - codeparrot_training - Step 38852: {'lr': 5.925887907618255e-05, 'samples': 19892736, 'steps': 38852, 'batch_loss/train': 0.7602341647725552} +12/28/2021 01:57:29 - INFO - codeparrot_training - Step 38853: {'lr': 5.924868952847318e-05, 'samples': 19893248, 'steps': 38853, 'batch_loss/train': 0.7756495005451143} +12/28/2021 01:57:40 - INFO - codeparrot_training - Step 38854: {'lr': 5.9238500739119044e-05, 'samples': 19893760, 'steps': 38854, 'batch_loss/train': 0.7549344958970323} +12/28/2021 01:57:50 - INFO - codeparrot_training - Step 38855: {'lr': 5.922831270816048e-05, 'samples': 19894272, 'steps': 38855, 'batch_loss/train': 0.7839827528223395} +12/28/2021 01:58:02 - INFO - codeparrot_training - Step 38856: {'lr': 5.921812543563823e-05, 'samples': 19894784, 'steps': 38856, 'batch_loss/train': 0.8407323993742466} +12/28/2021 01:58:13 - INFO - codeparrot_training - Step 38857: {'lr': 5.9207938921592626e-05, 'samples': 19895296, 'steps': 38857, 'batch_loss/train': 0.6385821080766618} +12/28/2021 01:58:24 - INFO - codeparrot_training - Step 38858: {'lr': 5.919775316606424e-05, 'samples': 19895808, 'steps': 38858, 'batch_loss/train': 0.8099682908505201} +12/28/2021 01:58:36 - INFO - codeparrot_training - Step 38859: {'lr': 5.9187568169093514e-05, 'samples': 19896320, 'steps': 38859, 'batch_loss/train': 0.7838155466597527} +12/28/2021 01:58:47 - INFO - codeparrot_training - Step 38860: {'lr': 5.917738393072095e-05, 'samples': 19896832, 'steps': 38860, 'batch_loss/train': 1.3276017657481134} +12/28/2021 01:58:58 - INFO - codeparrot_training - Step 38861: {'lr': 5.9167200450987085e-05, 'samples': 19897344, 'steps': 38861, 'batch_loss/train': 0.7084209523163736} +12/28/2021 01:59:08 - INFO - codeparrot_training - Step 38862: {'lr': 5.915701772993237e-05, 'samples': 19897856, 'steps': 38862, 'batch_loss/train': 0.7289353720843792} +12/28/2021 01:59:20 - INFO - codeparrot_training - Step 38863: {'lr': 5.91468357675973e-05, 'samples': 19898368, 'steps': 38863, 'batch_loss/train': 0.7665232196450233} +12/28/2021 01:59:31 - INFO - codeparrot_training - Step 38864: {'lr': 5.913665456402234e-05, 'samples': 19898880, 'steps': 38864, 'batch_loss/train': 0.599003522656858} +12/28/2021 01:59:42 - INFO - codeparrot_training - Step 38865: {'lr': 5.912647411924804e-05, 'samples': 19899392, 'steps': 38865, 'batch_loss/train': 0.8108168105245568} +12/28/2021 01:59:54 - INFO - codeparrot_training - Step 38866: {'lr': 5.911629443331465e-05, 'samples': 19899904, 'steps': 38866, 'batch_loss/train': 0.7376165273599327} +12/28/2021 02:00:04 - INFO - codeparrot_training - Step 38867: {'lr': 5.9106115506262906e-05, 'samples': 19900416, 'steps': 38867, 'batch_loss/train': 0.7129457327537239} +12/28/2021 02:00:15 - INFO - codeparrot_training - Step 38868: {'lr': 5.909593733813321e-05, 'samples': 19900928, 'steps': 38868, 'batch_loss/train': 0.7725452352315187} +12/28/2021 02:00:28 - INFO - codeparrot_training - Step 38869: {'lr': 5.908575992896592e-05, 'samples': 19901440, 'steps': 38869, 'batch_loss/train': 0.8224568562582135} +12/28/2021 02:00:38 - INFO - codeparrot_training - Step 38870: {'lr': 5.90755832788015e-05, 'samples': 19901952, 'steps': 38870, 'batch_loss/train': 0.8729995894245803} +12/28/2021 02:00:49 - INFO - codeparrot_training - Step 38871: {'lr': 5.906540738768063e-05, 'samples': 19902464, 'steps': 38871, 'batch_loss/train': 0.7762418678030372} +12/28/2021 02:01:00 - INFO - codeparrot_training - Step 38872: {'lr': 5.905523225564349e-05, 'samples': 19902976, 'steps': 38872, 'batch_loss/train': 0.7796810436993837} +12/28/2021 02:01:12 - INFO - codeparrot_training - Step 38873: {'lr': 5.904505788273068e-05, 'samples': 19903488, 'steps': 38873, 'batch_loss/train': 0.6841107485815883} +12/28/2021 02:01:23 - INFO - codeparrot_training - Step 38874: {'lr': 5.9034884268982626e-05, 'samples': 19904000, 'steps': 38874, 'batch_loss/train': 0.5672952672466636} +12/28/2021 02:01:33 - INFO - codeparrot_training - Step 38875: {'lr': 5.9024711414439744e-05, 'samples': 19904512, 'steps': 38875, 'batch_loss/train': 0.7164795164717361} +12/28/2021 02:01:46 - INFO - codeparrot_training - Step 38876: {'lr': 5.9014539319142494e-05, 'samples': 19905024, 'steps': 38876, 'batch_loss/train': 1.0846300888806581} +12/28/2021 02:01:57 - INFO - codeparrot_training - Step 38877: {'lr': 5.900436798313136e-05, 'samples': 19905536, 'steps': 38877, 'batch_loss/train': 0.7545069130137563} +12/28/2021 02:02:08 - INFO - codeparrot_training - Step 38878: {'lr': 5.899419740644671e-05, 'samples': 19906048, 'steps': 38878, 'batch_loss/train': 0.7713282713666558} +12/28/2021 02:02:20 - INFO - codeparrot_training - Step 38879: {'lr': 5.8984027589129016e-05, 'samples': 19906560, 'steps': 38879, 'batch_loss/train': 0.745905906194821} +12/28/2021 02:02:30 - INFO - codeparrot_training - Step 38880: {'lr': 5.897385853121878e-05, 'samples': 19907072, 'steps': 38880, 'batch_loss/train': 0.810807743575424} +12/28/2021 02:02:41 - INFO - codeparrot_training - Step 38881: {'lr': 5.896369023275619e-05, 'samples': 19907584, 'steps': 38881, 'batch_loss/train': 0.7618870753794909} +12/28/2021 02:02:52 - INFO - codeparrot_training - Step 38882: {'lr': 5.895352269378201e-05, 'samples': 19908096, 'steps': 38882, 'batch_loss/train': 0.6563027920201421} +12/28/2021 02:03:04 - INFO - codeparrot_training - Step 38883: {'lr': 5.894335591433639e-05, 'samples': 19908608, 'steps': 38883, 'batch_loss/train': 0.7977256111335009} +12/28/2021 02:03:15 - INFO - codeparrot_training - Step 38884: {'lr': 5.893318989445978e-05, 'samples': 19909120, 'steps': 38884, 'batch_loss/train': 0.7159161258023232} +12/28/2021 02:03:25 - INFO - codeparrot_training - Step 38885: {'lr': 5.892302463419283e-05, 'samples': 19909632, 'steps': 38885, 'batch_loss/train': 0.9951320020772982} +12/28/2021 02:03:38 - INFO - codeparrot_training - Step 38886: {'lr': 5.891286013357569e-05, 'samples': 19910144, 'steps': 38886, 'batch_loss/train': 0.9006099388934672} +12/28/2021 02:03:48 - INFO - codeparrot_training - Step 38887: {'lr': 5.890269639264886e-05, 'samples': 19910656, 'steps': 38887, 'batch_loss/train': 0.7320769582875073} +12/28/2021 02:03:59 - INFO - codeparrot_training - Step 38888: {'lr': 5.889253341145279e-05, 'samples': 19911168, 'steps': 38888, 'batch_loss/train': 0.9140479024499655} +12/28/2021 02:04:11 - INFO - codeparrot_training - Step 38889: {'lr': 5.8882371190027814e-05, 'samples': 19911680, 'steps': 38889, 'batch_loss/train': 0.835481708869338} +12/28/2021 02:04:22 - INFO - codeparrot_training - Step 38890: {'lr': 5.8872209728414365e-05, 'samples': 19912192, 'steps': 38890, 'batch_loss/train': 0.8013046605046839} +12/28/2021 02:04:33 - INFO - codeparrot_training - Step 38891: {'lr': 5.886204902665293e-05, 'samples': 19912704, 'steps': 38891, 'batch_loss/train': 0.7372593968175352} +12/28/2021 02:04:45 - INFO - codeparrot_training - Step 38892: {'lr': 5.885188908478367e-05, 'samples': 19913216, 'steps': 38892, 'batch_loss/train': 0.7445608861744404} +12/28/2021 02:04:55 - INFO - codeparrot_training - Step 38893: {'lr': 5.884172990284722e-05, 'samples': 19913728, 'steps': 38893, 'batch_loss/train': 0.8835845813155174} +12/28/2021 02:05:06 - INFO - codeparrot_training - Step 38894: {'lr': 5.8831571480883916e-05, 'samples': 19914240, 'steps': 38894, 'batch_loss/train': 0.8169951569288969} +12/28/2021 02:05:17 - INFO - codeparrot_training - Step 38895: {'lr': 5.882141381893402e-05, 'samples': 19914752, 'steps': 38895, 'batch_loss/train': 0.7605839520692825} +12/28/2021 02:05:29 - INFO - codeparrot_training - Step 38896: {'lr': 5.8811256917037954e-05, 'samples': 19915264, 'steps': 38896, 'batch_loss/train': 0.6633371226489544} +12/28/2021 02:05:39 - INFO - codeparrot_training - Step 38897: {'lr': 5.880110077523626e-05, 'samples': 19915776, 'steps': 38897, 'batch_loss/train': 0.5991840711794794} +12/28/2021 02:05:50 - INFO - codeparrot_training - Step 38898: {'lr': 5.879094539356913e-05, 'samples': 19916288, 'steps': 38898, 'batch_loss/train': 0.8221656689420342} +12/28/2021 02:06:03 - INFO - codeparrot_training - Step 38899: {'lr': 5.878079077207696e-05, 'samples': 19916800, 'steps': 38899, 'batch_loss/train': 0.6623728199629113} +12/28/2021 02:06:13 - INFO - codeparrot_training - Step 38900: {'lr': 5.8770636910800286e-05, 'samples': 19917312, 'steps': 38900, 'batch_loss/train': 0.6857699257088825} +12/28/2021 02:06:24 - INFO - codeparrot_training - Step 38901: {'lr': 5.8760483809779274e-05, 'samples': 19917824, 'steps': 38901, 'batch_loss/train': 0.6419986761175096} +12/28/2021 02:06:36 - INFO - codeparrot_training - Step 38902: {'lr': 5.875033146905437e-05, 'samples': 19918336, 'steps': 38902, 'batch_loss/train': 0.7975829299539328} +12/28/2021 02:06:47 - INFO - codeparrot_training - Step 38903: {'lr': 5.874017988866595e-05, 'samples': 19918848, 'steps': 38903, 'batch_loss/train': 0.6889579384587705} +12/28/2021 02:06:57 - INFO - codeparrot_training - Step 38904: {'lr': 5.873002906865435e-05, 'samples': 19919360, 'steps': 38904, 'batch_loss/train': 0.8137488844804466} +12/28/2021 02:07:08 - INFO - codeparrot_training - Step 38905: {'lr': 5.871987900905992e-05, 'samples': 19919872, 'steps': 38905, 'batch_loss/train': 0.6281972462311387} +12/28/2021 02:07:21 - INFO - codeparrot_training - Step 38906: {'lr': 5.870972970992311e-05, 'samples': 19920384, 'steps': 38906, 'batch_loss/train': 0.7626373274251819} +12/28/2021 02:07:32 - INFO - codeparrot_training - Step 38907: {'lr': 5.8699581171284046e-05, 'samples': 19920896, 'steps': 38907, 'batch_loss/train': 1.361049523577094} +12/28/2021 02:07:42 - INFO - codeparrot_training - Step 38908: {'lr': 5.868943339318336e-05, 'samples': 19921408, 'steps': 38908, 'batch_loss/train': 0.9095083484426141} +12/28/2021 02:07:55 - INFO - codeparrot_training - Step 38909: {'lr': 5.8679286375661166e-05, 'samples': 19921920, 'steps': 38909, 'batch_loss/train': 0.7304031775565818} +12/28/2021 02:08:05 - INFO - codeparrot_training - Step 38910: {'lr': 5.866914011875782e-05, 'samples': 19922432, 'steps': 38910, 'batch_loss/train': 0.7883678581565619} +12/28/2021 02:08:16 - INFO - codeparrot_training - Step 38911: {'lr': 5.8658994622513894e-05, 'samples': 19922944, 'steps': 38911, 'batch_loss/train': 0.6284033651463687} +12/28/2021 02:08:27 - INFO - codeparrot_training - Step 38912: {'lr': 5.8648849886969474e-05, 'samples': 19923456, 'steps': 38912, 'batch_loss/train': 0.5996469082310796} +12/28/2021 02:08:39 - INFO - codeparrot_training - Step 38913: {'lr': 5.863870591216497e-05, 'samples': 19923968, 'steps': 38913, 'batch_loss/train': 0.6934993928298354} +12/28/2021 02:08:49 - INFO - codeparrot_training - Step 38914: {'lr': 5.862856269814071e-05, 'samples': 19924480, 'steps': 38914, 'batch_loss/train': 0.8294217772781849} +12/28/2021 02:09:00 - INFO - codeparrot_training - Step 38915: {'lr': 5.861842024493705e-05, 'samples': 19924992, 'steps': 38915, 'batch_loss/train': 0.6173024796880782} +12/28/2021 02:09:13 - INFO - codeparrot_training - Step 38916: {'lr': 5.8608278552594265e-05, 'samples': 19925504, 'steps': 38916, 'batch_loss/train': 0.6793924970552325} +12/28/2021 02:09:23 - INFO - codeparrot_training - Step 38917: {'lr': 5.8598137621152766e-05, 'samples': 19926016, 'steps': 38917, 'batch_loss/train': 0.8962904778309166} +12/28/2021 02:09:34 - INFO - codeparrot_training - Step 38918: {'lr': 5.8587997450652686e-05, 'samples': 19926528, 'steps': 38918, 'batch_loss/train': 0.920258960686624} +12/28/2021 02:09:46 - INFO - codeparrot_training - Step 38919: {'lr': 5.8577858041134494e-05, 'samples': 19927040, 'steps': 38919, 'batch_loss/train': 0.7260876779910177} +12/28/2021 02:09:57 - INFO - codeparrot_training - Step 38920: {'lr': 5.8567719392638555e-05, 'samples': 19927552, 'steps': 38920, 'batch_loss/train': 0.7125096749514341} +12/28/2021 02:10:07 - INFO - codeparrot_training - Step 38921: {'lr': 5.855758150520493e-05, 'samples': 19928064, 'steps': 38921, 'batch_loss/train': 0.7646785406395793} +12/28/2021 02:10:18 - INFO - codeparrot_training - Step 38922: {'lr': 5.854744437887413e-05, 'samples': 19928576, 'steps': 38922, 'batch_loss/train': 0.7818533470854163} +12/28/2021 02:10:30 - INFO - codeparrot_training - Step 38923: {'lr': 5.853730801368651e-05, 'samples': 19929088, 'steps': 38923, 'batch_loss/train': 0.7272936915978789} +12/28/2021 02:10:41 - INFO - codeparrot_training - Step 38924: {'lr': 5.852717240968216e-05, 'samples': 19929600, 'steps': 38924, 'batch_loss/train': 0.7588916667737067} +12/28/2021 02:10:51 - INFO - codeparrot_training - Step 38925: {'lr': 5.851703756690141e-05, 'samples': 19930112, 'steps': 38925, 'batch_loss/train': 0.8576550208963454} +12/28/2021 02:11:04 - INFO - codeparrot_training - Step 38926: {'lr': 5.850690348538476e-05, 'samples': 19930624, 'steps': 38926, 'batch_loss/train': 0.7033779537305236} +12/28/2021 02:11:15 - INFO - codeparrot_training - Step 38927: {'lr': 5.8496770165172295e-05, 'samples': 19931136, 'steps': 38927, 'batch_loss/train': 0.8145476386416703} +12/28/2021 02:11:25 - INFO - codeparrot_training - Step 38928: {'lr': 5.848663760630435e-05, 'samples': 19931648, 'steps': 38928, 'batch_loss/train': 0.6935524800792336} +12/28/2021 02:11:37 - INFO - codeparrot_training - Step 38929: {'lr': 5.847650580882122e-05, 'samples': 19932160, 'steps': 38929, 'batch_loss/train': 0.6838181735947728} +12/28/2021 02:11:48 - INFO - codeparrot_training - Step 38930: {'lr': 5.84663747727632e-05, 'samples': 19932672, 'steps': 38930, 'batch_loss/train': 0.6445499290130101} +12/28/2021 02:11:59 - INFO - codeparrot_training - Step 38931: {'lr': 5.8456244498170554e-05, 'samples': 19933184, 'steps': 38931, 'batch_loss/train': 0.7109343875199556} +12/28/2021 02:12:09 - INFO - codeparrot_training - Step 38932: {'lr': 5.844611498508362e-05, 'samples': 19933696, 'steps': 38932, 'batch_loss/train': 0.7128542610444129} +12/28/2021 02:12:22 - INFO - codeparrot_training - Step 38933: {'lr': 5.8435986233542456e-05, 'samples': 19934208, 'steps': 38933, 'batch_loss/train': 0.80335792619735} +12/28/2021 02:12:32 - INFO - codeparrot_training - Step 38934: {'lr': 5.842585824358754e-05, 'samples': 19934720, 'steps': 38934, 'batch_loss/train': 0.8799339402467012} +12/28/2021 02:12:43 - INFO - codeparrot_training - Step 38935: {'lr': 5.8415731015259185e-05, 'samples': 19935232, 'steps': 38935, 'batch_loss/train': 0.754775661509484} +12/28/2021 02:12:56 - INFO - codeparrot_training - Step 38936: {'lr': 5.840560454859736e-05, 'samples': 19935744, 'steps': 38936, 'batch_loss/train': 0.7517993391957134} +12/28/2021 02:13:06 - INFO - codeparrot_training - Step 38937: {'lr': 5.839547884364269e-05, 'samples': 19936256, 'steps': 38937, 'batch_loss/train': 0.7749404646456242} +12/28/2021 02:13:17 - INFO - codeparrot_training - Step 38938: {'lr': 5.8385353900435146e-05, 'samples': 19936768, 'steps': 38938, 'batch_loss/train': 0.6484752336982638} +12/28/2021 02:13:29 - INFO - codeparrot_training - Step 38939: {'lr': 5.83752297190151e-05, 'samples': 19937280, 'steps': 38939, 'batch_loss/train': 0.7580258338712156} +12/28/2021 02:13:40 - INFO - codeparrot_training - Step 38940: {'lr': 5.836510629942276e-05, 'samples': 19937792, 'steps': 38940, 'batch_loss/train': 0.5833626193343662} +12/28/2021 02:13:51 - INFO - codeparrot_training - Step 38941: {'lr': 5.8354983641698445e-05, 'samples': 19938304, 'steps': 38941, 'batch_loss/train': 0.6750135456677526} +12/28/2021 02:14:01 - INFO - codeparrot_training - Step 38942: {'lr': 5.834486174588233e-05, 'samples': 19938816, 'steps': 38942, 'batch_loss/train': 0.724767139647156} +12/28/2021 02:14:13 - INFO - codeparrot_training - Step 38943: {'lr': 5.8334740612014656e-05, 'samples': 19939328, 'steps': 38943, 'batch_loss/train': 0.7900272887200117} +12/28/2021 02:14:24 - INFO - codeparrot_training - Step 38944: {'lr': 5.832462024013571e-05, 'samples': 19939840, 'steps': 38944, 'batch_loss/train': 0.6468957148026675} +12/28/2021 02:14:35 - INFO - codeparrot_training - Step 38945: {'lr': 5.8314500630285705e-05, 'samples': 19940352, 'steps': 38945, 'batch_loss/train': 0.6623841619584709} +12/28/2021 02:14:48 - INFO - codeparrot_training - Step 38946: {'lr': 5.830438178250491e-05, 'samples': 19940864, 'steps': 38946, 'batch_loss/train': 0.7803747630678117} +12/28/2021 02:14:59 - INFO - codeparrot_training - Step 38947: {'lr': 5.829426369683341e-05, 'samples': 19941376, 'steps': 38947, 'batch_loss/train': 0.5436883964575827} +12/28/2021 02:15:09 - INFO - codeparrot_training - Step 38948: {'lr': 5.828414637331158e-05, 'samples': 19941888, 'steps': 38948, 'batch_loss/train': 0.7300732512958348} +12/28/2021 02:15:21 - INFO - codeparrot_training - Step 38949: {'lr': 5.827402981197966e-05, 'samples': 19942400, 'steps': 38949, 'batch_loss/train': 0.6945686156395823} +12/28/2021 02:15:32 - INFO - codeparrot_training - Step 38950: {'lr': 5.8263914012877755e-05, 'samples': 19942912, 'steps': 38950, 'batch_loss/train': 0.7090977104380727} +12/28/2021 02:15:43 - INFO - codeparrot_training - Step 38951: {'lr': 5.825379897604605e-05, 'samples': 19943424, 'steps': 38951, 'batch_loss/train': 0.730247707106173} +12/28/2021 02:15:55 - INFO - codeparrot_training - Step 38952: {'lr': 5.824368470152497e-05, 'samples': 19943936, 'steps': 38952, 'batch_loss/train': 0.6962484261021018} +12/28/2021 02:16:05 - INFO - codeparrot_training - Step 38953: {'lr': 5.823357118935452e-05, 'samples': 19944448, 'steps': 38953, 'batch_loss/train': 0.8092547622509301} +12/28/2021 02:16:16 - INFO - codeparrot_training - Step 38954: {'lr': 5.822345843957499e-05, 'samples': 19944960, 'steps': 38954, 'batch_loss/train': 0.6588497923221439} +12/28/2021 02:16:26 - INFO - codeparrot_training - Step 38955: {'lr': 5.821334645222659e-05, 'samples': 19945472, 'steps': 38955, 'batch_loss/train': 0.6019646821077913} +12/28/2021 02:16:39 - INFO - codeparrot_training - Step 38956: {'lr': 5.820323522734949e-05, 'samples': 19945984, 'steps': 38956, 'batch_loss/train': 0.7519201007671654} +12/28/2021 02:16:50 - INFO - codeparrot_training - Step 38957: {'lr': 5.8193124764983924e-05, 'samples': 19946496, 'steps': 38957, 'batch_loss/train': 0.7987021167064086} +12/28/2021 02:17:00 - INFO - codeparrot_training - Step 38958: {'lr': 5.818301506517007e-05, 'samples': 19947008, 'steps': 38958, 'batch_loss/train': 0.8151186485774815} +12/28/2021 02:17:13 - INFO - codeparrot_training - Step 38959: {'lr': 5.8172906127948125e-05, 'samples': 19947520, 'steps': 38959, 'batch_loss/train': 0.755776546895504} +12/28/2021 02:17:23 - INFO - codeparrot_training - Step 38960: {'lr': 5.8162797953358245e-05, 'samples': 19948032, 'steps': 38960, 'batch_loss/train': 0.7254821372916922} +12/28/2021 02:17:34 - INFO - codeparrot_training - Step 38961: {'lr': 5.8152690541440735e-05, 'samples': 19948544, 'steps': 38961, 'batch_loss/train': 0.7464268170297146} +12/28/2021 02:17:46 - INFO - codeparrot_training - Step 38962: {'lr': 5.814258389223556e-05, 'samples': 19949056, 'steps': 38962, 'batch_loss/train': 0.6244916282594204} +12/28/2021 02:17:57 - INFO - codeparrot_training - Step 38963: {'lr': 5.813247800578314e-05, 'samples': 19949568, 'steps': 38963, 'batch_loss/train': 0.8174523927737027} +12/28/2021 02:18:07 - INFO - codeparrot_training - Step 38964: {'lr': 5.812237288212349e-05, 'samples': 19950080, 'steps': 38964, 'batch_loss/train': 0.6573232733644545} +12/28/2021 02:18:18 - INFO - codeparrot_training - Step 38965: {'lr': 5.8112268521296834e-05, 'samples': 19950592, 'steps': 38965, 'batch_loss/train': 0.7207604278810322} +12/28/2021 02:18:31 - INFO - codeparrot_training - Step 38966: {'lr': 5.810216492334336e-05, 'samples': 19951104, 'steps': 38966, 'batch_loss/train': 0.6564660184085369} +12/28/2021 02:18:42 - INFO - codeparrot_training - Step 38967: {'lr': 5.80920620883032e-05, 'samples': 19951616, 'steps': 38967, 'batch_loss/train': 0.7251774608157575} +12/28/2021 02:18:52 - INFO - codeparrot_training - Step 38968: {'lr': 5.808196001621654e-05, 'samples': 19952128, 'steps': 38968, 'batch_loss/train': 0.7116457913070917} +12/28/2021 02:19:04 - INFO - codeparrot_training - Step 38969: {'lr': 5.807185870712356e-05, 'samples': 19952640, 'steps': 38969, 'batch_loss/train': 0.5018366044387221} +12/28/2021 02:19:15 - INFO - codeparrot_training - Step 38970: {'lr': 5.806175816106438e-05, 'samples': 19953152, 'steps': 38970, 'batch_loss/train': 0.7952136322855949} +12/28/2021 02:19:26 - INFO - codeparrot_training - Step 38971: {'lr': 5.8051658378079184e-05, 'samples': 19953664, 'steps': 38971, 'batch_loss/train': 0.8492340594530106} +12/28/2021 02:19:38 - INFO - codeparrot_training - Step 38972: {'lr': 5.804155935820818e-05, 'samples': 19954176, 'steps': 38972, 'batch_loss/train': 0.6582739867735654} +12/28/2021 02:19:49 - INFO - codeparrot_training - Step 38973: {'lr': 5.803146110149132e-05, 'samples': 19954688, 'steps': 38973, 'batch_loss/train': 0.7093100640922785} +12/28/2021 02:19:59 - INFO - codeparrot_training - Step 38974: {'lr': 5.802136360796895e-05, 'samples': 19955200, 'steps': 38974, 'batch_loss/train': 0.7261002147570252} +12/28/2021 02:20:10 - INFO - codeparrot_training - Step 38975: {'lr': 5.801126687768124e-05, 'samples': 19955712, 'steps': 38975, 'batch_loss/train': 0.7064434089697897} +12/28/2021 02:20:23 - INFO - codeparrot_training - Step 38976: {'lr': 5.800117091066814e-05, 'samples': 19956224, 'steps': 38976, 'batch_loss/train': 0.7817306099459529} +12/28/2021 02:20:33 - INFO - codeparrot_training - Step 38977: {'lr': 5.799107570696985e-05, 'samples': 19956736, 'steps': 38977, 'batch_loss/train': 0.6807959484867752} +12/28/2021 02:20:44 - INFO - codeparrot_training - Step 38978: {'lr': 5.798098126662665e-05, 'samples': 19957248, 'steps': 38978, 'batch_loss/train': 0.6300896471366286} +12/28/2021 02:20:56 - INFO - codeparrot_training - Step 38979: {'lr': 5.797088758967853e-05, 'samples': 19957760, 'steps': 38979, 'batch_loss/train': 0.7736022057943046} +12/28/2021 02:21:07 - INFO - codeparrot_training - Step 38980: {'lr': 5.796079467616563e-05, 'samples': 19958272, 'steps': 38980, 'batch_loss/train': 0.7935903551988304} +12/28/2021 02:21:17 - INFO - codeparrot_training - Step 38981: {'lr': 5.7950702526128104e-05, 'samples': 19958784, 'steps': 38981, 'batch_loss/train': 1.0308968294411898} +12/28/2021 02:21:29 - INFO - codeparrot_training - Step 38982: {'lr': 5.794061113960608e-05, 'samples': 19959296, 'steps': 38982, 'batch_loss/train': 0.7609772710129619} +12/28/2021 02:21:40 - INFO - codeparrot_training - Step 38983: {'lr': 5.793052051663966e-05, 'samples': 19959808, 'steps': 38983, 'batch_loss/train': 0.8116559288464487} +12/28/2021 02:21:51 - INFO - codeparrot_training - Step 38984: {'lr': 5.792043065726898e-05, 'samples': 19960320, 'steps': 38984, 'batch_loss/train': 0.7857253029942513} +12/28/2021 02:22:01 - INFO - codeparrot_training - Step 38985: {'lr': 5.791034156153413e-05, 'samples': 19960832, 'steps': 38985, 'batch_loss/train': 0.7978682722896338} +12/28/2021 02:22:14 - INFO - codeparrot_training - Step 38986: {'lr': 5.790025322947523e-05, 'samples': 19961344, 'steps': 38986, 'batch_loss/train': 0.7780062509700656} +12/28/2021 02:22:25 - INFO - codeparrot_training - Step 38987: {'lr': 5.789016566113245e-05, 'samples': 19961856, 'steps': 38987, 'batch_loss/train': 0.7410834105685353} +12/28/2021 02:22:35 - INFO - codeparrot_training - Step 38988: {'lr': 5.78800788565457e-05, 'samples': 19962368, 'steps': 38988, 'batch_loss/train': 0.7889512377441861} +12/28/2021 02:22:47 - INFO - codeparrot_training - Step 38989: {'lr': 5.7869992815755305e-05, 'samples': 19962880, 'steps': 38989, 'batch_loss/train': 0.7537519251927733} +12/28/2021 02:22:58 - INFO - codeparrot_training - Step 38990: {'lr': 5.785990753880133e-05, 'samples': 19963392, 'steps': 38990, 'batch_loss/train': 0.6963763313833624} +12/28/2021 02:23:09 - INFO - codeparrot_training - Step 38991: {'lr': 5.7849823025723726e-05, 'samples': 19963904, 'steps': 38991, 'batch_loss/train': 0.7478000009432435} +12/28/2021 02:23:22 - INFO - codeparrot_training - Step 38992: {'lr': 5.783973927656269e-05, 'samples': 19964416, 'steps': 38992, 'batch_loss/train': 0.6848917035385966} +12/28/2021 02:23:32 - INFO - codeparrot_training - Step 38993: {'lr': 5.782965629135828e-05, 'samples': 19964928, 'steps': 38993, 'batch_loss/train': 0.7143084367271513} +12/28/2021 02:23:43 - INFO - codeparrot_training - Step 38994: {'lr': 5.781957407015062e-05, 'samples': 19965440, 'steps': 38994, 'batch_loss/train': 0.7902106279507279} +12/28/2021 02:23:54 - INFO - codeparrot_training - Step 38995: {'lr': 5.780949261297974e-05, 'samples': 19965952, 'steps': 38995, 'batch_loss/train': 0.5597991977119818} +12/28/2021 02:24:06 - INFO - codeparrot_training - Step 38996: {'lr': 5.7799411919885774e-05, 'samples': 19966464, 'steps': 38996, 'batch_loss/train': 0.7303906921297312} +12/28/2021 02:24:16 - INFO - codeparrot_training - Step 38997: {'lr': 5.7789331990908775e-05, 'samples': 19966976, 'steps': 38997, 'batch_loss/train': 0.7292778645642102} +12/28/2021 02:24:27 - INFO - codeparrot_training - Step 38998: {'lr': 5.777925282608887e-05, 'samples': 19967488, 'steps': 38998, 'batch_loss/train': 0.9318590294569731} +12/28/2021 02:24:39 - INFO - codeparrot_training - Step 38999: {'lr': 5.776917442546595e-05, 'samples': 19968000, 'steps': 38999, 'batch_loss/train': 0.7915007802657783} +12/28/2021 02:24:50 - INFO - codeparrot_training - Step 39000: {'lr': 5.775909678908026e-05, 'samples': 19968512, 'steps': 39000, 'batch_loss/train': 0.7988896872848272} +12/28/2021 02:25:00 - INFO - codeparrot_training - Step 39001: {'lr': 5.77490199169719e-05, 'samples': 19969024, 'steps': 39001, 'batch_loss/train': 0.7061845031566918} +12/28/2021 02:25:12 - INFO - codeparrot_training - Step 39002: {'lr': 5.773894380918077e-05, 'samples': 19969536, 'steps': 39002, 'batch_loss/train': 0.7446090672165155} +12/28/2021 02:25:23 - INFO - codeparrot_training - Step 39003: {'lr': 5.772886846574693e-05, 'samples': 19970048, 'steps': 39003, 'batch_loss/train': 0.6968137905932963} +12/28/2021 02:25:34 - INFO - codeparrot_training - Step 39004: {'lr': 5.771879388671064e-05, 'samples': 19970560, 'steps': 39004, 'batch_loss/train': 0.5897177751176059} +12/28/2021 02:25:44 - INFO - codeparrot_training - Step 39005: {'lr': 5.7708720072111784e-05, 'samples': 19971072, 'steps': 39005, 'batch_loss/train': 0.5682974413502961} +12/28/2021 02:25:57 - INFO - codeparrot_training - Step 39006: {'lr': 5.769864702199043e-05, 'samples': 19971584, 'steps': 39006, 'batch_loss/train': 0.7232973081991076} +12/28/2021 02:26:08 - INFO - codeparrot_training - Step 39007: {'lr': 5.768857473638664e-05, 'samples': 19972096, 'steps': 39007, 'batch_loss/train': 0.7652281054761261} +12/28/2021 02:26:18 - INFO - codeparrot_training - Step 39008: {'lr': 5.767850321534049e-05, 'samples': 19972608, 'steps': 39008, 'batch_loss/train': 0.7344508320093155} +12/28/2021 02:26:31 - INFO - codeparrot_training - Step 39009: {'lr': 5.766843245889197e-05, 'samples': 19973120, 'steps': 39009, 'batch_loss/train': 0.686232260428369} +12/28/2021 02:26:41 - INFO - codeparrot_training - Step 39010: {'lr': 5.7658362467081146e-05, 'samples': 19973632, 'steps': 39010, 'batch_loss/train': 0.7058380823582411} +12/28/2021 02:26:52 - INFO - codeparrot_training - Step 39011: {'lr': 5.764829323994805e-05, 'samples': 19974144, 'steps': 39011, 'batch_loss/train': 0.7202324038371444} +12/28/2021 02:27:04 - INFO - codeparrot_training - Step 39012: {'lr': 5.763822477753269e-05, 'samples': 19974656, 'steps': 39012, 'batch_loss/train': 0.8386450242251158} +12/28/2021 02:27:15 - INFO - codeparrot_training - Step 39013: {'lr': 5.7628157079875235e-05, 'samples': 19975168, 'steps': 39013, 'batch_loss/train': 0.8295177679974586} +12/28/2021 02:27:25 - INFO - codeparrot_training - Step 39014: {'lr': 5.761809014701541e-05, 'samples': 19975680, 'steps': 39014, 'batch_loss/train': 0.7948974017053843} +12/28/2021 02:27:36 - INFO - codeparrot_training - Step 39015: {'lr': 5.760802397899348e-05, 'samples': 19976192, 'steps': 39015, 'batch_loss/train': 0.7246670797467232} +12/28/2021 02:27:49 - INFO - codeparrot_training - Step 39016: {'lr': 5.759795857584951e-05, 'samples': 19976704, 'steps': 39016, 'batch_loss/train': 0.7858442701399326} +12/28/2021 02:28:00 - INFO - codeparrot_training - Step 39017: {'lr': 5.7587893937623316e-05, 'samples': 19977216, 'steps': 39017, 'batch_loss/train': 0.7908126343972981} +12/28/2021 02:28:10 - INFO - codeparrot_training - Step 39018: {'lr': 5.757783006435499e-05, 'samples': 19977728, 'steps': 39018, 'batch_loss/train': 0.8211064422503114} +12/28/2021 02:28:22 - INFO - codeparrot_training - Step 39019: {'lr': 5.756776695608459e-05, 'samples': 19978240, 'steps': 39019, 'batch_loss/train': 0.7214486096054316} +12/28/2021 02:28:33 - INFO - codeparrot_training - Step 39020: {'lr': 5.7557704612852065e-05, 'samples': 19978752, 'steps': 39020, 'batch_loss/train': 0.7154511697590351} +12/28/2021 02:28:43 - INFO - codeparrot_training - Step 39021: {'lr': 5.754764303469745e-05, 'samples': 19979264, 'steps': 39021, 'batch_loss/train': 0.7008446189574897} +12/28/2021 02:28:56 - INFO - codeparrot_training - Step 39022: {'lr': 5.753758222166072e-05, 'samples': 19979776, 'steps': 39022, 'batch_loss/train': 0.7345029781572521} +12/28/2021 02:29:07 - INFO - codeparrot_training - Step 39023: {'lr': 5.752752217378193e-05, 'samples': 19980288, 'steps': 39023, 'batch_loss/train': 0.715320291928947} +12/28/2021 02:29:18 - INFO - codeparrot_training - Step 39024: {'lr': 5.751746289110102e-05, 'samples': 19980800, 'steps': 39024, 'batch_loss/train': 0.7005163319408894} +12/28/2021 02:29:30 - INFO - codeparrot_training - Step 39025: {'lr': 5.750740437365798e-05, 'samples': 19981312, 'steps': 39025, 'batch_loss/train': 0.719032846391201} +12/28/2021 02:29:40 - INFO - codeparrot_training - Step 39026: {'lr': 5.7497346621492836e-05, 'samples': 19981824, 'steps': 39026, 'batch_loss/train': 0.7662445800378919} +12/28/2021 02:29:51 - INFO - codeparrot_training - Step 39027: {'lr': 5.748728963464564e-05, 'samples': 19982336, 'steps': 39027, 'batch_loss/train': 0.6881159152835608} +12/28/2021 02:30:02 - INFO - codeparrot_training - Step 39028: {'lr': 5.747723341315619e-05, 'samples': 19982848, 'steps': 39028, 'batch_loss/train': 0.6125962869264185} +12/28/2021 02:30:14 - INFO - codeparrot_training - Step 39029: {'lr': 5.746717795706452e-05, 'samples': 19983360, 'steps': 39029, 'batch_loss/train': 0.7205719528719783} +12/28/2021 02:30:24 - INFO - codeparrot_training - Step 39030: {'lr': 5.745712326641078e-05, 'samples': 19983872, 'steps': 39030, 'batch_loss/train': 0.6836889665573835} +12/28/2021 02:30:35 - INFO - codeparrot_training - Step 39031: {'lr': 5.7447069341234754e-05, 'samples': 19984384, 'steps': 39031, 'batch_loss/train': 0.7169577945023775} +12/28/2021 02:30:48 - INFO - codeparrot_training - Step 39032: {'lr': 5.74370161815764e-05, 'samples': 19984896, 'steps': 39032, 'batch_loss/train': 0.7066601142287254} +12/28/2021 02:30:58 - INFO - codeparrot_training - Step 39033: {'lr': 5.742696378747589e-05, 'samples': 19985408, 'steps': 39033, 'batch_loss/train': 0.7465839218348265} +12/28/2021 02:31:09 - INFO - codeparrot_training - Step 39034: {'lr': 5.7416912158973016e-05, 'samples': 19985920, 'steps': 39034, 'batch_loss/train': 0.7310161869972944} +12/28/2021 02:31:21 - INFO - codeparrot_training - Step 39035: {'lr': 5.740686129610776e-05, 'samples': 19986432, 'steps': 39035, 'batch_loss/train': 0.6674642886500806} +12/28/2021 02:31:32 - INFO - codeparrot_training - Step 39036: {'lr': 5.739681119892013e-05, 'samples': 19986944, 'steps': 39036, 'batch_loss/train': 1.1293489960953593} +12/28/2021 02:31:42 - INFO - codeparrot_training - Step 39037: {'lr': 5.738676186745001e-05, 'samples': 19987456, 'steps': 39037, 'batch_loss/train': 0.809371842071414} +12/28/2021 02:31:53 - INFO - codeparrot_training - Step 39038: {'lr': 5.737671330173744e-05, 'samples': 19987968, 'steps': 39038, 'batch_loss/train': 0.7530388967134058} +12/28/2021 02:32:05 - INFO - codeparrot_training - Step 39039: {'lr': 5.736666550182237e-05, 'samples': 19988480, 'steps': 39039, 'batch_loss/train': 0.6820602947846055} +12/28/2021 02:32:16 - INFO - codeparrot_training - Step 39040: {'lr': 5.7356618467744565e-05, 'samples': 19988992, 'steps': 39040, 'batch_loss/train': 0.7259715888649225} +12/28/2021 02:32:26 - INFO - codeparrot_training - Step 39041: {'lr': 5.73465721995442e-05, 'samples': 19989504, 'steps': 39041, 'batch_loss/train': 0.6936422679573298} +12/28/2021 02:32:39 - INFO - codeparrot_training - Step 39042: {'lr': 5.7336526697261164e-05, 'samples': 19990016, 'steps': 39042, 'batch_loss/train': 0.7342496966011822} +12/28/2021 02:32:49 - INFO - codeparrot_training - Step 39043: {'lr': 5.732648196093529e-05, 'samples': 19990528, 'steps': 39043, 'batch_loss/train': 0.689577161683701} +12/28/2021 02:33:00 - INFO - codeparrot_training - Step 39044: {'lr': 5.731643799060657e-05, 'samples': 19991040, 'steps': 39044, 'batch_loss/train': 0.6413693577633239} +12/28/2021 02:33:13 - INFO - codeparrot_training - Step 39045: {'lr': 5.730639478631494e-05, 'samples': 19991552, 'steps': 39045, 'batch_loss/train': 0.6111188475042582} +12/28/2021 02:33:23 - INFO - codeparrot_training - Step 39046: {'lr': 5.7296352348100324e-05, 'samples': 19992064, 'steps': 39046, 'batch_loss/train': 0.71412233565934} +12/28/2021 02:33:34 - INFO - codeparrot_training - Step 39047: {'lr': 5.728631067600265e-05, 'samples': 19992576, 'steps': 39047, 'batch_loss/train': 0.676537390681915} +12/28/2021 02:33:46 - INFO - codeparrot_training - Step 39048: {'lr': 5.727626977006184e-05, 'samples': 19993088, 'steps': 39048, 'batch_loss/train': 0.8802973758429289} +12/28/2021 02:33:57 - INFO - codeparrot_training - Step 39049: {'lr': 5.726622963031783e-05, 'samples': 19993600, 'steps': 39049, 'batch_loss/train': 0.687555665615946} +12/28/2021 02:34:08 - INFO - codeparrot_training - Step 39050: {'lr': 5.725619025681048e-05, 'samples': 19994112, 'steps': 39050, 'batch_loss/train': 0.6066041670273989} +12/28/2021 02:34:18 - INFO - codeparrot_training - Step 39051: {'lr': 5.7246151649579776e-05, 'samples': 19994624, 'steps': 39051, 'batch_loss/train': 0.7209881348535419} +12/28/2021 02:34:31 - INFO - codeparrot_training - Step 39052: {'lr': 5.7236113808665554e-05, 'samples': 19995136, 'steps': 39052, 'batch_loss/train': 0.6771249966695905} +12/28/2021 02:34:42 - INFO - codeparrot_training - Step 39053: {'lr': 5.7226076734107875e-05, 'samples': 19995648, 'steps': 39053, 'batch_loss/train': 0.7578171142376959} +12/28/2021 02:34:52 - INFO - codeparrot_training - Step 39054: {'lr': 5.721604042594641e-05, 'samples': 19996160, 'steps': 39054, 'batch_loss/train': 0.7031766835134476} +12/28/2021 02:35:04 - INFO - codeparrot_training - Step 39055: {'lr': 5.720600488422115e-05, 'samples': 19996672, 'steps': 39055, 'batch_loss/train': 0.7847476839087903} +12/28/2021 02:35:15 - INFO - codeparrot_training - Step 39056: {'lr': 5.719597010897212e-05, 'samples': 19997184, 'steps': 39056, 'batch_loss/train': 0.8297790249343961} +12/28/2021 02:35:26 - INFO - codeparrot_training - Step 39057: {'lr': 5.718593610023906e-05, 'samples': 19997696, 'steps': 39057, 'batch_loss/train': 0.7425177227705717} +12/28/2021 02:35:38 - INFO - codeparrot_training - Step 39058: {'lr': 5.7175902858061846e-05, 'samples': 19998208, 'steps': 39058, 'batch_loss/train': 0.7645022207871079} +12/28/2021 02:35:49 - INFO - codeparrot_training - Step 39059: {'lr': 5.7165870382480586e-05, 'samples': 19998720, 'steps': 39059, 'batch_loss/train': 0.7496865149587393} +12/28/2021 02:35:59 - INFO - codeparrot_training - Step 39060: {'lr': 5.715583867353491e-05, 'samples': 19999232, 'steps': 39060, 'batch_loss/train': 0.7210771018872038} +12/28/2021 02:36:10 - INFO - codeparrot_training - Step 39061: {'lr': 5.714580773126482e-05, 'samples': 19999744, 'steps': 39061, 'batch_loss/train': 0.7269144912716001} +12/28/2021 02:36:23 - INFO - codeparrot_training - Step 39062: {'lr': 5.71357775557102e-05, 'samples': 20000256, 'steps': 39062, 'batch_loss/train': 0.7169050709344447} +12/28/2021 02:36:34 - INFO - codeparrot_training - Step 39063: {'lr': 5.712574814691088e-05, 'samples': 20000768, 'steps': 39063, 'batch_loss/train': 0.7519657616503537} +12/28/2021 02:36:44 - INFO - codeparrot_training - Step 39064: {'lr': 5.711571950490676e-05, 'samples': 20001280, 'steps': 39064, 'batch_loss/train': 0.786591480486095} +12/28/2021 02:36:56 - INFO - codeparrot_training - Step 39065: {'lr': 5.7105691629737805e-05, 'samples': 20001792, 'steps': 39065, 'batch_loss/train': 2.4883801080286503} +12/28/2021 02:37:07 - INFO - codeparrot_training - Step 39066: {'lr': 5.709566452144363e-05, 'samples': 20002304, 'steps': 39066, 'batch_loss/train': 0.7733284472487867} +12/28/2021 02:37:18 - INFO - codeparrot_training - Step 39067: {'lr': 5.708563818006432e-05, 'samples': 20002816, 'steps': 39067, 'batch_loss/train': 0.7624649195931852} +12/28/2021 02:37:31 - INFO - codeparrot_training - Step 39068: {'lr': 5.707561260563973e-05, 'samples': 20003328, 'steps': 39068, 'batch_loss/train': 0.7149886712431908} +12/28/2021 02:37:42 - INFO - codeparrot_training - Step 39069: {'lr': 5.706558779820956e-05, 'samples': 20003840, 'steps': 39069, 'batch_loss/train': 0.7628435073420405} +12/28/2021 02:37:52 - INFO - codeparrot_training - Step 39070: {'lr': 5.705556375781379e-05, 'samples': 20004352, 'steps': 39070, 'batch_loss/train': 1.2716845432296395} +12/28/2021 02:38:03 - INFO - codeparrot_training - Step 39071: {'lr': 5.704554048449234e-05, 'samples': 20004864, 'steps': 39071, 'batch_loss/train': 0.6892978553951252} +12/28/2021 02:38:15 - INFO - codeparrot_training - Step 39072: {'lr': 5.7035517978284876e-05, 'samples': 20005376, 'steps': 39072, 'batch_loss/train': 0.7094128026801627} +12/28/2021 02:38:26 - INFO - codeparrot_training - Step 39073: {'lr': 5.702549623923134e-05, 'samples': 20005888, 'steps': 39073, 'batch_loss/train': 0.7301355414092541} +12/28/2021 02:38:36 - INFO - codeparrot_training - Step 39074: {'lr': 5.701547526737158e-05, 'samples': 20006400, 'steps': 39074, 'batch_loss/train': 0.7844263240695} +12/28/2021 02:38:49 - INFO - codeparrot_training - Step 39075: {'lr': 5.70054550627454e-05, 'samples': 20006912, 'steps': 39075, 'batch_loss/train': 0.6926543549634516} +12/28/2021 02:38:59 - INFO - codeparrot_training - Step 39076: {'lr': 5.6995435625392664e-05, 'samples': 20007424, 'steps': 39076, 'batch_loss/train': 0.7904582098126411} +12/28/2021 02:39:10 - INFO - codeparrot_training - Step 39077: {'lr': 5.6985416955353223e-05, 'samples': 20007936, 'steps': 39077, 'batch_loss/train': 0.7208088338375092} +12/28/2021 02:39:21 - INFO - codeparrot_training - Step 39078: {'lr': 5.6975399052666856e-05, 'samples': 20008448, 'steps': 39078, 'batch_loss/train': 0.7370007480494678} +12/28/2021 02:39:33 - INFO - codeparrot_training - Step 39079: {'lr': 5.696538191737344e-05, 'samples': 20008960, 'steps': 39079, 'batch_loss/train': 0.5895890505053103} +12/28/2021 02:39:43 - INFO - codeparrot_training - Step 39080: {'lr': 5.695536554951283e-05, 'samples': 20009472, 'steps': 39080, 'batch_loss/train': 0.7190752467140555} +12/28/2021 02:39:54 - INFO - codeparrot_training - Step 39081: {'lr': 5.6945349949124676e-05, 'samples': 20009984, 'steps': 39081, 'batch_loss/train': 0.8872180634643883} +12/28/2021 02:40:09 - INFO - codeparrot_training - Step 39082: {'lr': 5.6935335116249044e-05, 'samples': 20010496, 'steps': 39082, 'batch_loss/train': 1.465199787169695} +12/28/2021 02:40:19 - INFO - codeparrot_training - Step 39083: {'lr': 5.6925321050925545e-05, 'samples': 20011008, 'steps': 39083, 'batch_loss/train': 0.7064515573438257} +12/28/2021 02:40:30 - INFO - codeparrot_training - Step 39084: {'lr': 5.691530775319401e-05, 'samples': 20011520, 'steps': 39084, 'batch_loss/train': 0.7416319982148707} +12/28/2021 02:40:40 - INFO - codeparrot_training - Step 39085: {'lr': 5.690529522309446e-05, 'samples': 20012032, 'steps': 39085, 'batch_loss/train': 0.7521039955317974} +12/28/2021 02:40:53 - INFO - codeparrot_training - Step 39086: {'lr': 5.689528346066644e-05, 'samples': 20012544, 'steps': 39086, 'batch_loss/train': 0.8023230582475662} +12/28/2021 02:41:04 - INFO - codeparrot_training - Step 39087: {'lr': 5.6885272465949876e-05, 'samples': 20013056, 'steps': 39087, 'batch_loss/train': 0.6588184274733067} +12/28/2021 02:41:14 - INFO - codeparrot_training - Step 39088: {'lr': 5.687526223898456e-05, 'samples': 20013568, 'steps': 39088, 'batch_loss/train': 0.9240426411852241} +12/28/2021 02:41:26 - INFO - codeparrot_training - Step 39089: {'lr': 5.686525277981028e-05, 'samples': 20014080, 'steps': 39089, 'batch_loss/train': 1.0042285593226552} +12/28/2021 02:41:37 - INFO - codeparrot_training - Step 39090: {'lr': 5.6855244088466816e-05, 'samples': 20014592, 'steps': 39090, 'batch_loss/train': 0.7582023201975971} +12/28/2021 02:41:48 - INFO - codeparrot_training - Step 39091: {'lr': 5.6845236164994043e-05, 'samples': 20015104, 'steps': 39091, 'batch_loss/train': 0.7122832788154483} +12/28/2021 02:42:02 - INFO - codeparrot_training - Step 39092: {'lr': 5.6835229009431556e-05, 'samples': 20015616, 'steps': 39092, 'batch_loss/train': 0.663698447868228} +12/28/2021 02:42:12 - INFO - codeparrot_training - Step 39093: {'lr': 5.68252226218193e-05, 'samples': 20016128, 'steps': 39093, 'batch_loss/train': 0.6419235775247216} +12/28/2021 02:42:23 - INFO - codeparrot_training - Step 39094: {'lr': 5.681521700219713e-05, 'samples': 20016640, 'steps': 39094, 'batch_loss/train': 0.7134287776425481} +12/28/2021 02:42:34 - INFO - codeparrot_training - Step 39095: {'lr': 5.6805212150604554e-05, 'samples': 20017152, 'steps': 39095, 'batch_loss/train': 0.6403492949903011} +12/28/2021 02:42:46 - INFO - codeparrot_training - Step 39096: {'lr': 5.679520806708158e-05, 'samples': 20017664, 'steps': 39096, 'batch_loss/train': 0.7076187252532691} +12/28/2021 02:42:57 - INFO - codeparrot_training - Step 39097: {'lr': 5.678520475166796e-05, 'samples': 20018176, 'steps': 39097, 'batch_loss/train': 0.7785665476694703} +12/28/2021 02:43:07 - INFO - codeparrot_training - Step 39098: {'lr': 5.677520220440335e-05, 'samples': 20018688, 'steps': 39098, 'batch_loss/train': 0.7536584921181202} +12/28/2021 02:43:20 - INFO - codeparrot_training - Step 39099: {'lr': 5.6765200425327596e-05, 'samples': 20019200, 'steps': 39099, 'batch_loss/train': 0.8052341609727591} +12/28/2021 02:43:30 - INFO - codeparrot_training - Step 39100: {'lr': 5.675519941448043e-05, 'samples': 20019712, 'steps': 39100, 'batch_loss/train': 0.7993134071002714} +12/28/2021 02:43:41 - INFO - codeparrot_training - Step 39101: {'lr': 5.67451991719016e-05, 'samples': 20020224, 'steps': 39101, 'batch_loss/train': 0.722289364784956} +12/28/2021 02:43:55 - INFO - codeparrot_training - Step 39102: {'lr': 5.6735199697630914e-05, 'samples': 20020736, 'steps': 39102, 'batch_loss/train': 0.6629281316418201} +12/28/2021 02:44:06 - INFO - codeparrot_training - Step 39103: {'lr': 5.672520099170811e-05, 'samples': 20021248, 'steps': 39103, 'batch_loss/train': 0.726734041236341} +12/28/2021 02:44:16 - INFO - codeparrot_training - Step 39104: {'lr': 5.671520305417291e-05, 'samples': 20021760, 'steps': 39104, 'batch_loss/train': 0.7363632686901838} +12/28/2021 02:44:27 - INFO - codeparrot_training - Step 39105: {'lr': 5.6705205885065087e-05, 'samples': 20022272, 'steps': 39105, 'batch_loss/train': 0.6647102814167738} +12/28/2021 02:44:39 - INFO - codeparrot_training - Step 39106: {'lr': 5.669520948442439e-05, 'samples': 20022784, 'steps': 39106, 'batch_loss/train': 0.6965447305701673} +12/28/2021 02:44:50 - INFO - codeparrot_training - Step 39107: {'lr': 5.668521385229053e-05, 'samples': 20023296, 'steps': 39107, 'batch_loss/train': 0.6690406058914959} +12/28/2021 02:45:01 - INFO - codeparrot_training - Step 39108: {'lr': 5.667521898870334e-05, 'samples': 20023808, 'steps': 39108, 'batch_loss/train': 0.6031132033094764} +12/28/2021 02:45:15 - INFO - codeparrot_training - Step 39109: {'lr': 5.666522489370243e-05, 'samples': 20024320, 'steps': 39109, 'batch_loss/train': 0.8675164701417089} +12/28/2021 02:45:25 - INFO - codeparrot_training - Step 39110: {'lr': 5.665523156732752e-05, 'samples': 20024832, 'steps': 39110, 'batch_loss/train': 0.6868296600878239} +12/28/2021 02:45:36 - INFO - codeparrot_training - Step 39111: {'lr': 5.664523900961852e-05, 'samples': 20025344, 'steps': 39111, 'batch_loss/train': 0.7672839937731624} +12/28/2021 02:45:47 - INFO - codeparrot_training - Step 39112: {'lr': 5.6635247220614974e-05, 'samples': 20025856, 'steps': 39112, 'batch_loss/train': 0.6296299035893753} +12/28/2021 02:45:59 - INFO - codeparrot_training - Step 39113: {'lr': 5.6625256200356687e-05, 'samples': 20026368, 'steps': 39113, 'batch_loss/train': 0.7362305582500994} +12/28/2021 02:46:09 - INFO - codeparrot_training - Step 39114: {'lr': 5.661526594888336e-05, 'samples': 20026880, 'steps': 39114, 'batch_loss/train': 0.746927672997117} +12/28/2021 02:46:20 - INFO - codeparrot_training - Step 39115: {'lr': 5.660527646623473e-05, 'samples': 20027392, 'steps': 39115, 'batch_loss/train': 0.7959609311074018} +12/28/2021 02:46:32 - INFO - codeparrot_training - Step 39116: {'lr': 5.6595287752450485e-05, 'samples': 20027904, 'steps': 39116, 'batch_loss/train': 0.7087512463913299} +12/28/2021 02:46:43 - INFO - codeparrot_training - Step 39117: {'lr': 5.6585299807570406e-05, 'samples': 20028416, 'steps': 39117, 'batch_loss/train': 0.7851991504430771} +12/28/2021 02:46:53 - INFO - codeparrot_training - Step 39118: {'lr': 5.657531263163404e-05, 'samples': 20028928, 'steps': 39118, 'batch_loss/train': 0.9075112221762538} +12/28/2021 02:47:07 - INFO - codeparrot_training - Step 39119: {'lr': 5.6565326224681245e-05, 'samples': 20029440, 'steps': 39119, 'batch_loss/train': 0.9487456511706114} +12/28/2021 02:47:18 - INFO - codeparrot_training - Step 39120: {'lr': 5.6555340586751743e-05, 'samples': 20029952, 'steps': 39120, 'batch_loss/train': 1.3595380205661058} +12/28/2021 02:47:28 - INFO - codeparrot_training - Step 39121: {'lr': 5.654535571788502e-05, 'samples': 20030464, 'steps': 39121, 'batch_loss/train': 0.7637912314385176} +12/28/2021 02:47:39 - INFO - codeparrot_training - Step 39122: {'lr': 5.6535371618121e-05, 'samples': 20030976, 'steps': 39122, 'batch_loss/train': 0.7998102982528508} +12/28/2021 02:47:51 - INFO - codeparrot_training - Step 39123: {'lr': 5.652538828749934e-05, 'samples': 20031488, 'steps': 39123, 'batch_loss/train': 0.7372973309829831} +12/28/2021 02:48:02 - INFO - codeparrot_training - Step 39124: {'lr': 5.6515405726059646e-05, 'samples': 20032000, 'steps': 39124, 'batch_loss/train': 2.118893824517727} +12/28/2021 02:48:13 - INFO - codeparrot_training - Step 39125: {'lr': 5.650542393384161e-05, 'samples': 20032512, 'steps': 39125, 'batch_loss/train': 0.8197911288589239} +12/28/2021 02:48:25 - INFO - codeparrot_training - Step 39126: {'lr': 5.649544291088496e-05, 'samples': 20033024, 'steps': 39126, 'batch_loss/train': 0.7527726045809686} +12/28/2021 02:48:35 - INFO - codeparrot_training - Step 39127: {'lr': 5.6485462657229367e-05, 'samples': 20033536, 'steps': 39127, 'batch_loss/train': 0.7638858039863408} +12/28/2021 02:48:46 - INFO - codeparrot_training - Step 39128: {'lr': 5.647548317291451e-05, 'samples': 20034048, 'steps': 39128, 'batch_loss/train': 0.6879172393819317} +12/28/2021 02:48:58 - INFO - codeparrot_training - Step 39129: {'lr': 5.646550445798007e-05, 'samples': 20034560, 'steps': 39129, 'batch_loss/train': 0.646985711529851} +12/28/2021 02:49:09 - INFO - codeparrot_training - Step 39130: {'lr': 5.6455526512465687e-05, 'samples': 20035072, 'steps': 39130, 'batch_loss/train': 0.7899009403772652} +12/28/2021 02:49:19 - INFO - codeparrot_training - Step 39131: {'lr': 5.644554933641105e-05, 'samples': 20035584, 'steps': 39131, 'batch_loss/train': 0.7494337495299987} +12/28/2021 02:49:30 - INFO - codeparrot_training - Step 39132: {'lr': 5.643557292985585e-05, 'samples': 20036096, 'steps': 39132, 'batch_loss/train': 0.6983427956001833} +12/28/2021 02:49:44 - INFO - codeparrot_training - Step 39133: {'lr': 5.642559729283972e-05, 'samples': 20036608, 'steps': 39133, 'batch_loss/train': 1.4299851216783281} +12/28/2021 02:49:55 - INFO - codeparrot_training - Step 39134: {'lr': 5.641562242540238e-05, 'samples': 20037120, 'steps': 39134, 'batch_loss/train': 0.7367000617086887} +12/28/2021 02:50:05 - INFO - codeparrot_training - Step 39135: {'lr': 5.640564832758335e-05, 'samples': 20037632, 'steps': 39135, 'batch_loss/train': 0.7711104564368725} +12/28/2021 02:50:17 - INFO - codeparrot_training - Step 39136: {'lr': 5.63956749994223e-05, 'samples': 20038144, 'steps': 39136, 'batch_loss/train': 0.7112706899642944} +12/28/2021 02:50:28 - INFO - codeparrot_training - Step 39137: {'lr': 5.6385702440959115e-05, 'samples': 20038656, 'steps': 39137, 'batch_loss/train': 0.7594265290535986} +12/28/2021 02:50:39 - INFO - codeparrot_training - Step 39138: {'lr': 5.637573065223317e-05, 'samples': 20039168, 'steps': 39138, 'batch_loss/train': 0.7740198606625199} +12/28/2021 02:50:53 - INFO - codeparrot_training - Step 39139: {'lr': 5.636575963328422e-05, 'samples': 20039680, 'steps': 39139, 'batch_loss/train': 0.7230961916502565} +12/28/2021 02:51:03 - INFO - codeparrot_training - Step 39140: {'lr': 5.6355789384151895e-05, 'samples': 20040192, 'steps': 39140, 'batch_loss/train': 0.7220613472163677} +12/28/2021 02:51:14 - INFO - codeparrot_training - Step 39141: {'lr': 5.6345819904875835e-05, 'samples': 20040704, 'steps': 39141, 'batch_loss/train': 0.7275355746969581} +12/28/2021 02:51:24 - INFO - codeparrot_training - Step 39142: {'lr': 5.633585119549567e-05, 'samples': 20041216, 'steps': 39142, 'batch_loss/train': 0.7147778326179832} +12/28/2021 02:51:37 - INFO - codeparrot_training - Step 39143: {'lr': 5.6325883256051035e-05, 'samples': 20041728, 'steps': 39143, 'batch_loss/train': 0.6062137135304511} +12/28/2021 02:51:48 - INFO - codeparrot_training - Step 39144: {'lr': 5.631591608658157e-05, 'samples': 20042240, 'steps': 39144, 'batch_loss/train': 0.6736447624862194} +12/28/2021 02:51:58 - INFO - codeparrot_training - Step 39145: {'lr': 5.6305949687126876e-05, 'samples': 20042752, 'steps': 39145, 'batch_loss/train': 0.5904196421615779} +12/28/2021 02:52:10 - INFO - codeparrot_training - Step 39146: {'lr': 5.629598405772668e-05, 'samples': 20043264, 'steps': 39146, 'batch_loss/train': 0.6555983740836382} +12/28/2021 02:52:21 - INFO - codeparrot_training - Step 39147: {'lr': 5.6286019198420356e-05, 'samples': 20043776, 'steps': 39147, 'batch_loss/train': 0.7216521929949522} +12/28/2021 02:52:32 - INFO - codeparrot_training - Step 39148: {'lr': 5.627605510924774e-05, 'samples': 20044288, 'steps': 39148, 'batch_loss/train': 0.7461178579251282} +12/28/2021 02:52:45 - INFO - codeparrot_training - Step 39149: {'lr': 5.626609179024847e-05, 'samples': 20044800, 'steps': 39149, 'batch_loss/train': 0.6269946614047512} +12/28/2021 02:52:56 - INFO - codeparrot_training - Step 39150: {'lr': 5.6256129241462006e-05, 'samples': 20045312, 'steps': 39150, 'batch_loss/train': 0.7345927763963118} +12/28/2021 02:53:07 - INFO - codeparrot_training - Step 39151: {'lr': 5.6246167462927934e-05, 'samples': 20045824, 'steps': 39151, 'batch_loss/train': 0.7017435752786696} +12/28/2021 02:53:17 - INFO - codeparrot_training - Step 39152: {'lr': 5.623620645468608e-05, 'samples': 20046336, 'steps': 39152, 'batch_loss/train': 0.7930043372325599} +12/28/2021 02:53:30 - INFO - codeparrot_training - Step 39153: {'lr': 5.622624621677583e-05, 'samples': 20046848, 'steps': 39153, 'batch_loss/train': 0.7817897265776992} +12/28/2021 02:53:40 - INFO - codeparrot_training - Step 39154: {'lr': 5.621628674923684e-05, 'samples': 20047360, 'steps': 39154, 'batch_loss/train': 0.6823260644450784} +12/28/2021 02:53:51 - INFO - codeparrot_training - Step 39155: {'lr': 5.620632805210876e-05, 'samples': 20047872, 'steps': 39155, 'batch_loss/train': 0.8303641565144062} +12/28/2021 02:54:03 - INFO - codeparrot_training - Step 39156: {'lr': 5.619637012543113e-05, 'samples': 20048384, 'steps': 39156, 'batch_loss/train': 0.7641572426073253} +12/28/2021 02:54:14 - INFO - codeparrot_training - Step 39157: {'lr': 5.6186412969243566e-05, 'samples': 20048896, 'steps': 39157, 'batch_loss/train': 0.6384455407969654} +12/28/2021 02:54:24 - INFO - codeparrot_training - Step 39158: {'lr': 5.6176456583585636e-05, 'samples': 20049408, 'steps': 39158, 'batch_loss/train': 0.7418220650870353} +12/28/2021 02:54:38 - INFO - codeparrot_training - Step 39159: {'lr': 5.616650096849693e-05, 'samples': 20049920, 'steps': 39159, 'batch_loss/train': 0.7925353394821286} +12/28/2021 02:54:49 - INFO - codeparrot_training - Step 39160: {'lr': 5.6156546124017024e-05, 'samples': 20050432, 'steps': 39160, 'batch_loss/train': 0.6954695032909513} +12/28/2021 02:54:59 - INFO - codeparrot_training - Step 39161: {'lr': 5.614659205018558e-05, 'samples': 20050944, 'steps': 39161, 'batch_loss/train': 0.787446585483849} +12/28/2021 02:55:10 - INFO - codeparrot_training - Step 39162: {'lr': 5.6136638747041946e-05, 'samples': 20051456, 'steps': 39162, 'batch_loss/train': 0.6652501919306815} +12/28/2021 02:55:22 - INFO - codeparrot_training - Step 39163: {'lr': 5.6126686214625966e-05, 'samples': 20051968, 'steps': 39163, 'batch_loss/train': 0.763237671693787} +12/28/2021 02:55:33 - INFO - codeparrot_training - Step 39164: {'lr': 5.611673445297702e-05, 'samples': 20052480, 'steps': 39164, 'batch_loss/train': 0.7590137803927064} +12/28/2021 02:55:43 - INFO - codeparrot_training - Step 39165: {'lr': 5.6106783462134715e-05, 'samples': 20052992, 'steps': 39165, 'batch_loss/train': 0.8453309866599739} +12/28/2021 02:55:56 - INFO - codeparrot_training - Step 39166: {'lr': 5.609683324213863e-05, 'samples': 20053504, 'steps': 39166, 'batch_loss/train': 0.7180120686534792} +12/28/2021 02:56:06 - INFO - codeparrot_training - Step 39167: {'lr': 5.608688379302834e-05, 'samples': 20054016, 'steps': 39167, 'batch_loss/train': 0.6773507674224675} +12/28/2021 02:56:17 - INFO - codeparrot_training - Step 39168: {'lr': 5.607693511484335e-05, 'samples': 20054528, 'steps': 39168, 'batch_loss/train': 0.7580087664537132} +12/28/2021 02:56:31 - INFO - codeparrot_training - Step 39169: {'lr': 5.606698720762327e-05, 'samples': 20055040, 'steps': 39169, 'batch_loss/train': 0.8091659133788198} +12/28/2021 02:56:41 - INFO - codeparrot_training - Step 39170: {'lr': 5.6057040071407606e-05, 'samples': 20055552, 'steps': 39170, 'batch_loss/train': 0.7885321588255465} +12/28/2021 02:56:52 - INFO - codeparrot_training - Step 39171: {'lr': 5.6047093706235954e-05, 'samples': 20056064, 'steps': 39171, 'batch_loss/train': 0.7913047322072089} +12/28/2021 02:57:03 - INFO - codeparrot_training - Step 39172: {'lr': 5.603714811214786e-05, 'samples': 20056576, 'steps': 39172, 'batch_loss/train': 0.7515617134049535} +12/28/2021 02:57:15 - INFO - codeparrot_training - Step 39173: {'lr': 5.6027203289182706e-05, 'samples': 20057088, 'steps': 39173, 'batch_loss/train': 0.9370538215152919} +12/28/2021 02:57:26 - INFO - codeparrot_training - Step 39174: {'lr': 5.6017259237380215e-05, 'samples': 20057600, 'steps': 39174, 'batch_loss/train': 0.7360365078784525} +12/28/2021 02:57:36 - INFO - codeparrot_training - Step 39175: {'lr': 5.6007315956779905e-05, 'samples': 20058112, 'steps': 39175, 'batch_loss/train': 0.8339464361779392} +12/28/2021 02:57:48 - INFO - codeparrot_training - Step 39176: {'lr': 5.59973734474212e-05, 'samples': 20058624, 'steps': 39176, 'batch_loss/train': 0.7675507105886936} +12/28/2021 02:57:59 - INFO - codeparrot_training - Step 39177: {'lr': 5.598743170934362e-05, 'samples': 20059136, 'steps': 39177, 'batch_loss/train': 0.6837025727145374} +12/28/2021 02:58:10 - INFO - codeparrot_training - Step 39178: {'lr': 5.5977490742586904e-05, 'samples': 20059648, 'steps': 39178, 'batch_loss/train': 0.7424450716935098} +12/28/2021 02:58:23 - INFO - codeparrot_training - Step 39179: {'lr': 5.596755054719036e-05, 'samples': 20060160, 'steps': 39179, 'batch_loss/train': 0.7340706191025674} +12/28/2021 02:58:34 - INFO - codeparrot_training - Step 39180: {'lr': 5.595761112319356e-05, 'samples': 20060672, 'steps': 39180, 'batch_loss/train': 0.7507338719442487} +12/28/2021 02:58:45 - INFO - codeparrot_training - Step 39181: {'lr': 5.5947672470636005e-05, 'samples': 20061184, 'steps': 39181, 'batch_loss/train': 0.7839977703988552} +12/28/2021 02:58:55 - INFO - codeparrot_training - Step 39182: {'lr': 5.593773458955726e-05, 'samples': 20061696, 'steps': 39182, 'batch_loss/train': 0.7723514903336763} +12/28/2021 02:59:07 - INFO - codeparrot_training - Step 39183: {'lr': 5.592779747999682e-05, 'samples': 20062208, 'steps': 39183, 'batch_loss/train': 0.6724197115399875} +12/28/2021 02:59:18 - INFO - codeparrot_training - Step 39184: {'lr': 5.591786114199415e-05, 'samples': 20062720, 'steps': 39184, 'batch_loss/train': 0.7344922749325633} +12/28/2021 02:59:29 - INFO - codeparrot_training - Step 39185: {'lr': 5.59079255755888e-05, 'samples': 20063232, 'steps': 39185, 'batch_loss/train': 0.7533183414489031} +12/28/2021 02:59:42 - INFO - codeparrot_training - Step 39186: {'lr': 5.589799078082025e-05, 'samples': 20063744, 'steps': 39186, 'batch_loss/train': 0.7892286698333919} +12/28/2021 02:59:53 - INFO - codeparrot_training - Step 39187: {'lr': 5.5888056757728065e-05, 'samples': 20064256, 'steps': 39187, 'batch_loss/train': 0.7746480424539186} +12/28/2021 03:00:04 - INFO - codeparrot_training - Step 39188: {'lr': 5.587812350635152e-05, 'samples': 20064768, 'steps': 39188, 'batch_loss/train': 0.7565336632542312} +12/28/2021 03:00:16 - INFO - codeparrot_training - Step 39189: {'lr': 5.5868191026730405e-05, 'samples': 20065280, 'steps': 39189, 'batch_loss/train': 0.6934180217795074} +12/28/2021 03:00:27 - INFO - codeparrot_training - Step 39190: {'lr': 5.585825931890401e-05, 'samples': 20065792, 'steps': 39190, 'batch_loss/train': 0.7567882658913732} +12/28/2021 03:00:37 - INFO - codeparrot_training - Step 39191: {'lr': 5.5848328382911844e-05, 'samples': 20066304, 'steps': 39191, 'batch_loss/train': 0.7474022302776575} +12/28/2021 03:00:48 - INFO - codeparrot_training - Step 39192: {'lr': 5.583839821879344e-05, 'samples': 20066816, 'steps': 39192, 'batch_loss/train': 0.8671368565410376} +12/28/2021 03:01:00 - INFO - codeparrot_training - Step 39193: {'lr': 5.582846882658824e-05, 'samples': 20067328, 'steps': 39193, 'batch_loss/train': 0.799350316170603} +12/28/2021 03:01:11 - INFO - codeparrot_training - Step 39194: {'lr': 5.5818540206335725e-05, 'samples': 20067840, 'steps': 39194, 'batch_loss/train': 0.7486234353855252} +12/28/2021 03:01:21 - INFO - codeparrot_training - Step 39195: {'lr': 5.5808612358075385e-05, 'samples': 20068352, 'steps': 39195, 'batch_loss/train': 0.6865922480355948} +12/28/2021 03:01:33 - INFO - codeparrot_training - Step 39196: {'lr': 5.579868528184667e-05, 'samples': 20068864, 'steps': 39196, 'batch_loss/train': 0.7432649163529277} +12/28/2021 03:01:44 - INFO - codeparrot_training - Step 39197: {'lr': 5.578875897768906e-05, 'samples': 20069376, 'steps': 39197, 'batch_loss/train': 0.7629923298954964} +12/28/2021 03:01:54 - INFO - codeparrot_training - Step 39198: {'lr': 5.577883344564208e-05, 'samples': 20069888, 'steps': 39198, 'batch_loss/train': 0.6995450358372182} +12/28/2021 03:02:08 - INFO - codeparrot_training - Step 39199: {'lr': 5.5768908685744995e-05, 'samples': 20070400, 'steps': 39199, 'batch_loss/train': 0.7073991019278765} +12/28/2021 03:02:19 - INFO - codeparrot_training - Step 39200: {'lr': 5.575898469803745e-05, 'samples': 20070912, 'steps': 39200, 'batch_loss/train': 0.829474757425487} +12/28/2021 03:02:30 - INFO - codeparrot_training - Step 39201: {'lr': 5.5749061482558896e-05, 'samples': 20071424, 'steps': 39201, 'batch_loss/train': 0.6903760866262019} +12/28/2021 03:02:42 - INFO - codeparrot_training - Step 39202: {'lr': 5.573913903934869e-05, 'samples': 20071936, 'steps': 39202, 'batch_loss/train': 0.7384287554305047} +12/28/2021 03:02:52 - INFO - codeparrot_training - Step 39203: {'lr': 5.5729217368446215e-05, 'samples': 20072448, 'steps': 39203, 'batch_loss/train': 0.6782749886624515} +12/28/2021 03:03:03 - INFO - codeparrot_training - Step 39204: {'lr': 5.5719296469891185e-05, 'samples': 20072960, 'steps': 39204, 'batch_loss/train': 0.6864196147071198} +12/28/2021 03:03:14 - INFO - codeparrot_training - Step 39205: {'lr': 5.570937634372278e-05, 'samples': 20073472, 'steps': 39205, 'batch_loss/train': 0.7555100531317294} +12/28/2021 03:03:26 - INFO - codeparrot_training - Step 39206: {'lr': 5.5699456989980495e-05, 'samples': 20073984, 'steps': 39206, 'batch_loss/train': 0.7965959068387747} +12/28/2021 03:03:36 - INFO - codeparrot_training - Step 39207: {'lr': 5.5689538408703914e-05, 'samples': 20074496, 'steps': 39207, 'batch_loss/train': 0.7768170861527324} +12/28/2021 03:03:47 - INFO - codeparrot_training - Step 39208: {'lr': 5.5679620599932284e-05, 'samples': 20075008, 'steps': 39208, 'batch_loss/train': 0.6433663727948442} +12/28/2021 03:04:01 - INFO - codeparrot_training - Step 39209: {'lr': 5.5669703563705124e-05, 'samples': 20075520, 'steps': 39209, 'batch_loss/train': 0.7623039954341948} +12/28/2021 03:04:12 - INFO - codeparrot_training - Step 39210: {'lr': 5.565978730006185e-05, 'samples': 20076032, 'steps': 39210, 'batch_loss/train': 0.7175253860186785} +12/28/2021 03:04:22 - INFO - codeparrot_training - Step 39211: {'lr': 5.564987180904188e-05, 'samples': 20076544, 'steps': 39211, 'batch_loss/train': 0.8367907995707355} +12/28/2021 03:04:35 - INFO - codeparrot_training - Step 39212: {'lr': 5.563995709068465e-05, 'samples': 20077056, 'steps': 39212, 'batch_loss/train': 0.7077854431699961} +12/28/2021 03:04:45 - INFO - codeparrot_training - Step 39213: {'lr': 5.563004314502962e-05, 'samples': 20077568, 'steps': 39213, 'batch_loss/train': 0.8541156072169542} +12/28/2021 03:04:56 - INFO - codeparrot_training - Step 39214: {'lr': 5.562012997211599e-05, 'samples': 20078080, 'steps': 39214, 'batch_loss/train': 0.6983298109844327} +12/28/2021 03:05:06 - INFO - codeparrot_training - Step 39215: {'lr': 5.561021757198348e-05, 'samples': 20078592, 'steps': 39215, 'batch_loss/train': 0.8039443893358111} +12/28/2021 03:05:20 - INFO - codeparrot_training - Step 39216: {'lr': 5.5600305944671306e-05, 'samples': 20079104, 'steps': 39216, 'batch_loss/train': 0.6996858888305724} +12/28/2021 03:05:31 - INFO - codeparrot_training - Step 39217: {'lr': 5.5590395090218796e-05, 'samples': 20079616, 'steps': 39217, 'batch_loss/train': 0.7758933529257774} +12/28/2021 03:05:42 - INFO - codeparrot_training - Step 39218: {'lr': 5.5580485008665635e-05, 'samples': 20080128, 'steps': 39218, 'batch_loss/train': 0.7471621014992706} +12/28/2021 03:05:54 - INFO - codeparrot_training - Step 39219: {'lr': 5.557057570005098e-05, 'samples': 20080640, 'steps': 39219, 'batch_loss/train': 0.7119061393896118} +12/28/2021 03:06:05 - INFO - codeparrot_training - Step 39220: {'lr': 5.5560667164414294e-05, 'samples': 20081152, 'steps': 39220, 'batch_loss/train': 0.7556946561671793} +12/28/2021 03:06:15 - INFO - codeparrot_training - Step 39221: {'lr': 5.555075940179499e-05, 'samples': 20081664, 'steps': 39221, 'batch_loss/train': 0.7306016732472926} +12/28/2021 03:06:27 - INFO - codeparrot_training - Step 39222: {'lr': 5.554085241223245e-05, 'samples': 20082176, 'steps': 39222, 'batch_loss/train': 0.7649238770827651} +12/28/2021 03:06:38 - INFO - codeparrot_training - Step 39223: {'lr': 5.5530946195766044e-05, 'samples': 20082688, 'steps': 39223, 'batch_loss/train': 0.5017632628441788} +12/28/2021 03:06:49 - INFO - codeparrot_training - Step 39224: {'lr': 5.5521040752435246e-05, 'samples': 20083200, 'steps': 39224, 'batch_loss/train': 0.7575158448889852} +12/28/2021 03:06:59 - INFO - codeparrot_training - Step 39225: {'lr': 5.55111360822792e-05, 'samples': 20083712, 'steps': 39225, 'batch_loss/train': 0.739603815600276} +12/28/2021 03:07:12 - INFO - codeparrot_training - Step 39226: {'lr': 5.5501232185337535e-05, 'samples': 20084224, 'steps': 39226, 'batch_loss/train': 0.7185442727059126} +12/28/2021 03:07:23 - INFO - codeparrot_training - Step 39227: {'lr': 5.549132906164958e-05, 'samples': 20084736, 'steps': 39227, 'batch_loss/train': 0.702891044318676} +12/28/2021 03:07:33 - INFO - codeparrot_training - Step 39228: {'lr': 5.5481426711254614e-05, 'samples': 20085248, 'steps': 39228, 'batch_loss/train': 0.6256917355931364} +12/28/2021 03:07:46 - INFO - codeparrot_training - Step 39229: {'lr': 5.547152513419196e-05, 'samples': 20085760, 'steps': 39229, 'batch_loss/train': 0.7841019220650196} +12/28/2021 03:07:56 - INFO - codeparrot_training - Step 39230: {'lr': 5.54616243305012e-05, 'samples': 20086272, 'steps': 39230, 'batch_loss/train': 0.762813794775866} +12/28/2021 03:08:07 - INFO - codeparrot_training - Step 39231: {'lr': 5.5451724300221526e-05, 'samples': 20086784, 'steps': 39231, 'batch_loss/train': 0.6740454523824155} +12/28/2021 03:08:19 - INFO - codeparrot_training - Step 39232: {'lr': 5.5441825043392234e-05, 'samples': 20087296, 'steps': 39232, 'batch_loss/train': 0.7401770411524922} +12/28/2021 03:08:29 - INFO - codeparrot_training - Step 39233: {'lr': 5.5431926560052945e-05, 'samples': 20087808, 'steps': 39233, 'batch_loss/train': 0.7620372241362929} +12/28/2021 03:08:40 - INFO - codeparrot_training - Step 39234: {'lr': 5.542202885024278e-05, 'samples': 20088320, 'steps': 39234, 'batch_loss/train': 0.7038419148884714} +12/28/2021 03:08:51 - INFO - codeparrot_training - Step 39235: {'lr': 5.541213191400113e-05, 'samples': 20088832, 'steps': 39235, 'batch_loss/train': 0.6857101153582335} +12/28/2021 03:09:03 - INFO - codeparrot_training - Step 39236: {'lr': 5.5402235751367405e-05, 'samples': 20089344, 'steps': 39236, 'batch_loss/train': 0.8378484947606921} +12/28/2021 03:09:13 - INFO - codeparrot_training - Step 39237: {'lr': 5.5392340362380886e-05, 'samples': 20089856, 'steps': 39237, 'batch_loss/train': 0.752046367386356} +12/28/2021 03:09:24 - INFO - codeparrot_training - Step 39238: {'lr': 5.538244574708098e-05, 'samples': 20090368, 'steps': 39238, 'batch_loss/train': 0.8612436940893531} +12/28/2021 03:09:37 - INFO - codeparrot_training - Step 39239: {'lr': 5.5372551905507025e-05, 'samples': 20090880, 'steps': 39239, 'batch_loss/train': 0.7008555894717574} +12/28/2021 03:09:48 - INFO - codeparrot_training - Step 39240: {'lr': 5.536265883769817e-05, 'samples': 20091392, 'steps': 39240, 'batch_loss/train': 0.7511526802554727} +12/28/2021 03:09:58 - INFO - codeparrot_training - Step 39241: {'lr': 5.535276654369398e-05, 'samples': 20091904, 'steps': 39241, 'batch_loss/train': 0.9825930539518595} +12/28/2021 03:10:11 - INFO - codeparrot_training - Step 39242: {'lr': 5.5342875023533746e-05, 'samples': 20092416, 'steps': 39242, 'batch_loss/train': 0.6689064018428326} +12/28/2021 03:10:21 - INFO - codeparrot_training - Step 39243: {'lr': 5.533298427725661e-05, 'samples': 20092928, 'steps': 39243, 'batch_loss/train': 0.7115756273269653} +12/28/2021 03:10:32 - INFO - codeparrot_training - Step 39244: {'lr': 5.532309430490218e-05, 'samples': 20093440, 'steps': 39244, 'batch_loss/train': 1.2759842614468653} +12/28/2021 03:10:42 - INFO - codeparrot_training - Step 39245: {'lr': 5.5313205106509526e-05, 'samples': 20093952, 'steps': 39245, 'batch_loss/train': 0.6271579238818958} +12/28/2021 03:10:55 - INFO - codeparrot_training - Step 39246: {'lr': 5.530331668211808e-05, 'samples': 20094464, 'steps': 39246, 'batch_loss/train': 0.7027442641556263} +12/28/2021 03:11:06 - INFO - codeparrot_training - Step 39247: {'lr': 5.5293429031767125e-05, 'samples': 20094976, 'steps': 39247, 'batch_loss/train': 0.7730758134275675} +12/28/2021 03:11:16 - INFO - codeparrot_training - Step 39248: {'lr': 5.528354215549597e-05, 'samples': 20095488, 'steps': 39248, 'batch_loss/train': 0.8474413473159075} +12/28/2021 03:11:29 - INFO - codeparrot_training - Step 39249: {'lr': 5.527365605334392e-05, 'samples': 20096000, 'steps': 39249, 'batch_loss/train': 0.660292228567414} +12/28/2021 03:11:39 - INFO - codeparrot_training - Step 39250: {'lr': 5.52637707253503e-05, 'samples': 20096512, 'steps': 39250, 'batch_loss/train': 0.7222648758906871} +12/28/2021 03:11:50 - INFO - codeparrot_training - Step 39251: {'lr': 5.525388617155441e-05, 'samples': 20097024, 'steps': 39251, 'batch_loss/train': 0.746161880902946} +12/28/2021 03:12:02 - INFO - codeparrot_training - Step 39252: {'lr': 5.524400239199553e-05, 'samples': 20097536, 'steps': 39252, 'batch_loss/train': 0.755346660502255} +12/28/2021 03:12:13 - INFO - codeparrot_training - Step 39253: {'lr': 5.523411938671302e-05, 'samples': 20098048, 'steps': 39253, 'batch_loss/train': 0.8916336316615343} +12/28/2021 03:12:24 - INFO - codeparrot_training - Step 39254: {'lr': 5.522423715574598e-05, 'samples': 20098560, 'steps': 39254, 'batch_loss/train': 0.6954328084830195} +12/28/2021 03:12:36 - INFO - codeparrot_training - Step 39255: {'lr': 5.521435569913388e-05, 'samples': 20099072, 'steps': 39255, 'batch_loss/train': 0.7629971075803041} +12/28/2021 03:12:47 - INFO - codeparrot_training - Step 39256: {'lr': 5.520447501691603e-05, 'samples': 20099584, 'steps': 39256, 'batch_loss/train': 0.7692915709340014} +12/28/2021 03:12:58 - INFO - codeparrot_training - Step 39257: {'lr': 5.519459510913158e-05, 'samples': 20100096, 'steps': 39257, 'batch_loss/train': 0.8326587174087763} +12/28/2021 03:13:08 - INFO - codeparrot_training - Step 39258: {'lr': 5.518471597581978e-05, 'samples': 20100608, 'steps': 39258, 'batch_loss/train': 0.7293431838043034} +12/28/2021 03:13:21 - INFO - codeparrot_training - Step 39259: {'lr': 5.5174837617020134e-05, 'samples': 20101120, 'steps': 39259, 'batch_loss/train': 0.7504305806942284} +12/28/2021 03:13:31 - INFO - codeparrot_training - Step 39260: {'lr': 5.516496003277171e-05, 'samples': 20101632, 'steps': 39260, 'batch_loss/train': 0.7060175822116435} +12/28/2021 03:13:42 - INFO - codeparrot_training - Step 39261: {'lr': 5.515508322311383e-05, 'samples': 20102144, 'steps': 39261, 'batch_loss/train': 0.9126290725544095} +12/28/2021 03:13:54 - INFO - codeparrot_training - Step 39262: {'lr': 5.5145207188085774e-05, 'samples': 20102656, 'steps': 39262, 'batch_loss/train': 0.847923006862402} +12/28/2021 03:14:05 - INFO - codeparrot_training - Step 39263: {'lr': 5.513533192772679e-05, 'samples': 20103168, 'steps': 39263, 'batch_loss/train': 0.5230202204547822} +12/28/2021 03:14:16 - INFO - codeparrot_training - Step 39264: {'lr': 5.5125457442076146e-05, 'samples': 20103680, 'steps': 39264, 'batch_loss/train': 0.7849086017813534} +12/28/2021 03:14:28 - INFO - codeparrot_training - Step 39265: {'lr': 5.511558373117318e-05, 'samples': 20104192, 'steps': 39265, 'batch_loss/train': 0.7251571016386151} +12/28/2021 03:14:39 - INFO - codeparrot_training - Step 39266: {'lr': 5.510571079505694e-05, 'samples': 20104704, 'steps': 39266, 'batch_loss/train': 0.7654148442670703} +12/28/2021 03:14:50 - INFO - codeparrot_training - Step 39267: {'lr': 5.5095838633766855e-05, 'samples': 20105216, 'steps': 39267, 'batch_loss/train': 0.7496810406446457} +12/28/2021 03:15:00 - INFO - codeparrot_training - Step 39268: {'lr': 5.508596724734219e-05, 'samples': 20105728, 'steps': 39268, 'batch_loss/train': 0.6779345800168812} +12/28/2021 03:15:12 - INFO - codeparrot_training - Step 39269: {'lr': 5.5076096635822e-05, 'samples': 20106240, 'steps': 39269, 'batch_loss/train': 0.7833258796017617} +12/28/2021 03:15:23 - INFO - codeparrot_training - Step 39270: {'lr': 5.506622679924578e-05, 'samples': 20106752, 'steps': 39270, 'batch_loss/train': 0.7677215384319425} +12/28/2021 03:15:33 - INFO - codeparrot_training - Step 39271: {'lr': 5.505635773765255e-05, 'samples': 20107264, 'steps': 39271, 'batch_loss/train': 0.7346278210170567} +12/28/2021 03:15:46 - INFO - codeparrot_training - Step 39272: {'lr': 5.504648945108165e-05, 'samples': 20107776, 'steps': 39272, 'batch_loss/train': 0.7360728038474917} +12/28/2021 03:15:57 - INFO - codeparrot_training - Step 39273: {'lr': 5.503662193957232e-05, 'samples': 20108288, 'steps': 39273, 'batch_loss/train': 0.6914215761935338} +12/28/2021 03:16:07 - INFO - codeparrot_training - Step 39274: {'lr': 5.502675520316372e-05, 'samples': 20108800, 'steps': 39274, 'batch_loss/train': 0.6230258294381201} +12/28/2021 03:16:20 - INFO - codeparrot_training - Step 39275: {'lr': 5.5016889241895156e-05, 'samples': 20109312, 'steps': 39275, 'batch_loss/train': 0.6315879989415407} +12/28/2021 03:16:31 - INFO - codeparrot_training - Step 39276: {'lr': 5.500702405580582e-05, 'samples': 20109824, 'steps': 39276, 'batch_loss/train': 0.5189997056731954} +12/28/2021 03:16:41 - INFO - codeparrot_training - Step 39277: {'lr': 5.49971596449349e-05, 'samples': 20110336, 'steps': 39277, 'batch_loss/train': 0.7872863374650478} +12/28/2021 03:16:52 - INFO - codeparrot_training - Step 39278: {'lr': 5.4987296009321676e-05, 'samples': 20110848, 'steps': 39278, 'batch_loss/train': 0.651611088309437} +12/28/2021 03:17:05 - INFO - codeparrot_training - Step 39279: {'lr': 5.4977433149005384e-05, 'samples': 20111360, 'steps': 39279, 'batch_loss/train': 0.7116173021495342} +12/28/2021 03:17:15 - INFO - codeparrot_training - Step 39280: {'lr': 5.496757106402503e-05, 'samples': 20111872, 'steps': 39280, 'batch_loss/train': 1.095536969602108} +12/28/2021 03:17:26 - INFO - codeparrot_training - Step 39281: {'lr': 5.495770975442005e-05, 'samples': 20112384, 'steps': 39281, 'batch_loss/train': 0.8595628961920738} +12/28/2021 03:17:38 - INFO - codeparrot_training - Step 39282: {'lr': 5.4947849220229634e-05, 'samples': 20112896, 'steps': 39282, 'batch_loss/train': 0.784038903657347} +12/28/2021 03:17:49 - INFO - codeparrot_training - Step 39283: {'lr': 5.493798946149287e-05, 'samples': 20113408, 'steps': 39283, 'batch_loss/train': 0.9069003965705633} +12/28/2021 03:17:59 - INFO - codeparrot_training - Step 39284: {'lr': 5.492813047824893e-05, 'samples': 20113920, 'steps': 39284, 'batch_loss/train': 0.7228540587238967} +12/28/2021 03:18:10 - INFO - codeparrot_training - Step 39285: {'lr': 5.491827227053722e-05, 'samples': 20114432, 'steps': 39285, 'batch_loss/train': 0.7365232175216079} +12/28/2021 03:18:23 - INFO - codeparrot_training - Step 39286: {'lr': 5.490841483839673e-05, 'samples': 20114944, 'steps': 39286, 'batch_loss/train': 0.6454261259641498} +12/28/2021 03:18:33 - INFO - codeparrot_training - Step 39287: {'lr': 5.489855818186673e-05, 'samples': 20115456, 'steps': 39287, 'batch_loss/train': 0.7790150241926312} +12/28/2021 03:18:44 - INFO - codeparrot_training - Step 39288: {'lr': 5.488870230098639e-05, 'samples': 20115968, 'steps': 39288, 'batch_loss/train': 0.6397516429424286} +12/28/2021 03:18:56 - INFO - codeparrot_training - Step 39289: {'lr': 5.487884719579489e-05, 'samples': 20116480, 'steps': 39289, 'batch_loss/train': 0.7827336424961686} +12/28/2021 03:19:07 - INFO - codeparrot_training - Step 39290: {'lr': 5.486899286633143e-05, 'samples': 20116992, 'steps': 39290, 'batch_loss/train': 0.6821238123811781} +12/28/2021 03:19:17 - INFO - codeparrot_training - Step 39291: {'lr': 5.485913931263517e-05, 'samples': 20117504, 'steps': 39291, 'batch_loss/train': 0.6521650210488588} +12/28/2021 03:19:30 - INFO - codeparrot_training - Step 39292: {'lr': 5.48492865347453e-05, 'samples': 20118016, 'steps': 39292, 'batch_loss/train': 0.7070273926947266} +12/28/2021 03:19:41 - INFO - codeparrot_training - Step 39293: {'lr': 5.483943453270096e-05, 'samples': 20118528, 'steps': 39293, 'batch_loss/train': 0.7033117841929197} +12/28/2021 03:19:51 - INFO - codeparrot_training - Step 39294: {'lr': 5.482958330654142e-05, 'samples': 20119040, 'steps': 39294, 'batch_loss/train': 0.7219500308856368} +12/28/2021 03:20:02 - INFO - codeparrot_training - Step 39295: {'lr': 5.481973285630562e-05, 'samples': 20119552, 'steps': 39295, 'batch_loss/train': 0.6957695400342345} +12/28/2021 03:20:14 - INFO - codeparrot_training - Step 39296: {'lr': 5.480988318203295e-05, 'samples': 20120064, 'steps': 39296, 'batch_loss/train': 0.6784625570289791} +12/28/2021 03:20:24 - INFO - codeparrot_training - Step 39297: {'lr': 5.480003428376254e-05, 'samples': 20120576, 'steps': 39297, 'batch_loss/train': 0.723709539975971} +12/28/2021 03:20:35 - INFO - codeparrot_training - Step 39298: {'lr': 5.4790186161533416e-05, 'samples': 20121088, 'steps': 39298, 'batch_loss/train': 0.8584524672478437} +12/28/2021 03:20:47 - INFO - codeparrot_training - Step 39299: {'lr': 5.47803388153848e-05, 'samples': 20121600, 'steps': 39299, 'batch_loss/train': 0.8395007462240756} +12/28/2021 03:20:58 - INFO - codeparrot_training - Step 39300: {'lr': 5.477049224535585e-05, 'samples': 20122112, 'steps': 39300, 'batch_loss/train': 0.7754421019926667} +12/28/2021 03:21:08 - INFO - codeparrot_training - Step 39301: {'lr': 5.476064645148571e-05, 'samples': 20122624, 'steps': 39301, 'batch_loss/train': 0.6521977399243042} +12/28/2021 03:21:21 - INFO - codeparrot_training - Step 39302: {'lr': 5.475080143381353e-05, 'samples': 20123136, 'steps': 39302, 'batch_loss/train': 0.6962668932974339} +12/28/2021 03:21:32 - INFO - codeparrot_training - Step 39303: {'lr': 5.474095719237843e-05, 'samples': 20123648, 'steps': 39303, 'batch_loss/train': 0.6798448404297233} +12/28/2021 03:21:42 - INFO - codeparrot_training - Step 39304: {'lr': 5.473111372721956e-05, 'samples': 20124160, 'steps': 39304, 'batch_loss/train': 0.7251331657171249} +12/28/2021 03:21:53 - INFO - codeparrot_training - Step 39305: {'lr': 5.4721271038376096e-05, 'samples': 20124672, 'steps': 39305, 'batch_loss/train': 0.6981123983860016} +12/28/2021 03:22:05 - INFO - codeparrot_training - Step 39306: {'lr': 5.471142912588703e-05, 'samples': 20125184, 'steps': 39306, 'batch_loss/train': 0.6940828757360578} +12/28/2021 03:22:16 - INFO - codeparrot_training - Step 39307: {'lr': 5.4701587989791605e-05, 'samples': 20125696, 'steps': 39307, 'batch_loss/train': 0.7008584425784647} +12/28/2021 03:22:26 - INFO - codeparrot_training - Step 39308: {'lr': 5.4691747630129045e-05, 'samples': 20126208, 'steps': 39308, 'batch_loss/train': 0.8377436078153551} +12/28/2021 03:22:39 - INFO - codeparrot_training - Step 39309: {'lr': 5.4681908046938233e-05, 'samples': 20126720, 'steps': 39309, 'batch_loss/train': 0.5978302725125104} +12/28/2021 03:22:49 - INFO - codeparrot_training - Step 39310: {'lr': 5.467206924025836e-05, 'samples': 20127232, 'steps': 39310, 'batch_loss/train': 0.8154808850958943} +12/28/2021 03:23:00 - INFO - codeparrot_training - Step 39311: {'lr': 5.4662231210128726e-05, 'samples': 20127744, 'steps': 39311, 'batch_loss/train': 0.7616565343923867} +12/28/2021 03:23:12 - INFO - codeparrot_training - Step 39312: {'lr': 5.465239395658825e-05, 'samples': 20128256, 'steps': 39312, 'batch_loss/train': 0.7957565168617293} +12/28/2021 03:23:23 - INFO - codeparrot_training - Step 39313: {'lr': 5.464255747967606e-05, 'samples': 20128768, 'steps': 39313, 'batch_loss/train': 0.7691554138436913} +12/28/2021 03:23:34 - INFO - codeparrot_training - Step 39314: {'lr': 5.463272177943132e-05, 'samples': 20129280, 'steps': 39314, 'batch_loss/train': 0.7531825415790081} +12/28/2021 03:23:44 - INFO - codeparrot_training - Step 39315: {'lr': 5.462288685589312e-05, 'samples': 20129792, 'steps': 39315, 'batch_loss/train': 0.5904778777621686} +12/28/2021 03:23:57 - INFO - codeparrot_training - Step 39316: {'lr': 5.4613052709100526e-05, 'samples': 20130304, 'steps': 39316, 'batch_loss/train': 0.740202916553244} +12/28/2021 03:24:08 - INFO - codeparrot_training - Step 39317: {'lr': 5.460321933909265e-05, 'samples': 20130816, 'steps': 39317, 'batch_loss/train': 0.7597636259160936} +12/28/2021 03:24:18 - INFO - codeparrot_training - Step 39318: {'lr': 5.459338674590864e-05, 'samples': 20131328, 'steps': 39318, 'batch_loss/train': 0.8290556268766522} +12/28/2021 03:24:30 - INFO - codeparrot_training - Step 39319: {'lr': 5.45835549295875e-05, 'samples': 20131840, 'steps': 39319, 'batch_loss/train': 0.739656075835228} +12/28/2021 03:24:41 - INFO - codeparrot_training - Step 39320: {'lr': 5.457372389016846e-05, 'samples': 20132352, 'steps': 39320, 'batch_loss/train': 0.6986919195624068} +12/28/2021 03:24:52 - INFO - codeparrot_training - Step 39321: {'lr': 5.4563893627690346e-05, 'samples': 20132864, 'steps': 39321, 'batch_loss/train': 0.795779419131577} +12/28/2021 03:25:04 - INFO - codeparrot_training - Step 39322: {'lr': 5.4554064142192466e-05, 'samples': 20133376, 'steps': 39322, 'batch_loss/train': 0.7519307159818709} +12/28/2021 03:25:15 - INFO - codeparrot_training - Step 39323: {'lr': 5.45442354337139e-05, 'samples': 20133888, 'steps': 39323, 'batch_loss/train': 0.7373403459787369} +12/28/2021 03:25:26 - INFO - codeparrot_training - Step 39324: {'lr': 5.453440750229358e-05, 'samples': 20134400, 'steps': 39324, 'batch_loss/train': 0.709995049983263} +12/28/2021 03:25:36 - INFO - codeparrot_training - Step 39325: {'lr': 5.452458034797067e-05, 'samples': 20134912, 'steps': 39325, 'batch_loss/train': 0.6698522182414308} +12/28/2021 03:25:48 - INFO - codeparrot_training - Step 39326: {'lr': 5.45147539707842e-05, 'samples': 20135424, 'steps': 39326, 'batch_loss/train': 0.6979041143786162} +12/28/2021 03:25:59 - INFO - codeparrot_training - Step 39327: {'lr': 5.4504928370773254e-05, 'samples': 20135936, 'steps': 39327, 'batch_loss/train': 0.7409681244753301} +12/28/2021 03:26:10 - INFO - codeparrot_training - Step 39328: {'lr': 5.449510354797691e-05, 'samples': 20136448, 'steps': 39328, 'batch_loss/train': 0.862577274441719} +12/28/2021 03:26:22 - INFO - codeparrot_training - Step 39329: {'lr': 5.44852795024342e-05, 'samples': 20136960, 'steps': 39329, 'batch_loss/train': 0.7847136841155589} +12/28/2021 03:26:32 - INFO - codeparrot_training - Step 39330: {'lr': 5.44754562341842e-05, 'samples': 20137472, 'steps': 39330, 'batch_loss/train': 0.8183312087785453} +12/28/2021 03:26:43 - INFO - codeparrot_training - Step 39331: {'lr': 5.446563374326596e-05, 'samples': 20137984, 'steps': 39331, 'batch_loss/train': 0.7529418069170788} +12/28/2021 03:26:56 - INFO - codeparrot_training - Step 39332: {'lr': 5.445581202971855e-05, 'samples': 20138496, 'steps': 39332, 'batch_loss/train': 0.790756743401289} +12/28/2021 03:27:06 - INFO - codeparrot_training - Step 39333: {'lr': 5.4445991093580975e-05, 'samples': 20139008, 'steps': 39333, 'batch_loss/train': 0.8084138052072376} +12/28/2021 03:27:17 - INFO - codeparrot_training - Step 39334: {'lr': 5.4436170934892365e-05, 'samples': 20139520, 'steps': 39334, 'batch_loss/train': 0.6368199265561998} +12/28/2021 03:27:29 - INFO - codeparrot_training - Step 39335: {'lr': 5.442635155369166e-05, 'samples': 20140032, 'steps': 39335, 'batch_loss/train': 0.6565889311023057} +12/28/2021 03:27:40 - INFO - codeparrot_training - Step 39336: {'lr': 5.441653295001783e-05, 'samples': 20140544, 'steps': 39336, 'batch_loss/train': 0.8225039590615779} +12/28/2021 03:27:50 - INFO - codeparrot_training - Step 39337: {'lr': 5.440671512391018e-05, 'samples': 20141056, 'steps': 39337, 'batch_loss/train': 0.7199473921209574} +12/28/2021 03:28:01 - INFO - codeparrot_training - Step 39338: {'lr': 5.439689807540748e-05, 'samples': 20141568, 'steps': 39338, 'batch_loss/train': 0.7278546569868922} +12/28/2021 03:28:13 - INFO - codeparrot_training - Step 39339: {'lr': 5.438708180454888e-05, 'samples': 20142080, 'steps': 39339, 'batch_loss/train': 0.9168185824528337} +12/28/2021 03:28:24 - INFO - codeparrot_training - Step 39340: {'lr': 5.437726631137338e-05, 'samples': 20142592, 'steps': 39340, 'batch_loss/train': 0.7477435655891895} +12/28/2021 03:28:34 - INFO - codeparrot_training - Step 39341: {'lr': 5.436745159591999e-05, 'samples': 20143104, 'steps': 39341, 'batch_loss/train': 0.580434930510819} +12/28/2021 03:28:47 - INFO - codeparrot_training - Step 39342: {'lr': 5.435763765822776e-05, 'samples': 20143616, 'steps': 39342, 'batch_loss/train': 0.6251293580280617} +12/28/2021 03:28:58 - INFO - codeparrot_training - Step 39343: {'lr': 5.4347824498335694e-05, 'samples': 20144128, 'steps': 39343, 'batch_loss/train': 0.7158258492127061} +12/28/2021 03:29:09 - INFO - codeparrot_training - Step 39344: {'lr': 5.4338012116282784e-05, 'samples': 20144640, 'steps': 39344, 'batch_loss/train': 0.7156038420507684} +12/28/2021 03:29:21 - INFO - codeparrot_training - Step 39345: {'lr': 5.4328200512108076e-05, 'samples': 20145152, 'steps': 39345, 'batch_loss/train': 0.7983291166601703} +12/28/2021 03:29:31 - INFO - codeparrot_training - Step 39346: {'lr': 5.431838968585062e-05, 'samples': 20145664, 'steps': 39346, 'batch_loss/train': 0.6625806582160294} +12/28/2021 03:29:42 - INFO - codeparrot_training - Step 39347: {'lr': 5.430857963754923e-05, 'samples': 20146176, 'steps': 39347, 'batch_loss/train': 0.7165312321740203} +12/28/2021 03:29:53 - INFO - codeparrot_training - Step 39348: {'lr': 5.4298770367243095e-05, 'samples': 20146688, 'steps': 39348, 'batch_loss/train': 1.660647178068757} +12/28/2021 03:30:05 - INFO - codeparrot_training - Step 39349: {'lr': 5.428896187497123e-05, 'samples': 20147200, 'steps': 39349, 'batch_loss/train': 0.7617213084595278} +12/28/2021 03:30:15 - INFO - codeparrot_training - Step 39350: {'lr': 5.427915416077248e-05, 'samples': 20147712, 'steps': 39350, 'batch_loss/train': 0.728268408216536} +12/28/2021 03:30:26 - INFO - codeparrot_training - Step 39351: {'lr': 5.4269347224685905e-05, 'samples': 20148224, 'steps': 39351, 'batch_loss/train': 0.7281312271370552} +12/28/2021 03:30:39 - INFO - codeparrot_training - Step 39352: {'lr': 5.4259541066750516e-05, 'samples': 20148736, 'steps': 39352, 'batch_loss/train': 0.9394921418279409} +12/28/2021 03:30:50 - INFO - codeparrot_training - Step 39353: {'lr': 5.424973568700528e-05, 'samples': 20149248, 'steps': 39353, 'batch_loss/train': 0.758407891029492} +12/28/2021 03:31:00 - INFO - codeparrot_training - Step 39354: {'lr': 5.423993108548919e-05, 'samples': 20149760, 'steps': 39354, 'batch_loss/train': 0.7141364044509828} +12/28/2021 03:31:12 - INFO - codeparrot_training - Step 39355: {'lr': 5.423012726224119e-05, 'samples': 20150272, 'steps': 39355, 'batch_loss/train': 0.8177954736165702} +12/28/2021 03:31:23 - INFO - codeparrot_training - Step 39356: {'lr': 5.422032421730033e-05, 'samples': 20150784, 'steps': 39356, 'batch_loss/train': 0.7170545170083642} +12/28/2021 03:31:34 - INFO - codeparrot_training - Step 39357: {'lr': 5.42105219507055e-05, 'samples': 20151296, 'steps': 39357, 'batch_loss/train': 0.7615496544167399} +12/28/2021 03:31:44 - INFO - codeparrot_training - Step 39358: {'lr': 5.4200720462495716e-05, 'samples': 20151808, 'steps': 39358, 'batch_loss/train': 0.7820789879187942} +12/28/2021 03:31:56 - INFO - codeparrot_training - Step 39359: {'lr': 5.419091975270993e-05, 'samples': 20152320, 'steps': 39359, 'batch_loss/train': 0.591106356587261} +12/28/2021 03:32:07 - INFO - codeparrot_training - Step 39360: {'lr': 5.418111982138721e-05, 'samples': 20152832, 'steps': 39360, 'batch_loss/train': 0.6852802275680006} +12/28/2021 03:32:18 - INFO - codeparrot_training - Step 39361: {'lr': 5.417132066856631e-05, 'samples': 20153344, 'steps': 39361, 'batch_loss/train': 0.7910247007384896} +12/28/2021 03:32:31 - INFO - codeparrot_training - Step 39362: {'lr': 5.4161522294286256e-05, 'samples': 20153856, 'steps': 39362, 'batch_loss/train': 0.7804459021426737} +12/28/2021 03:32:42 - INFO - codeparrot_training - Step 39363: {'lr': 5.415172469858617e-05, 'samples': 20154368, 'steps': 39363, 'batch_loss/train': 0.6767643947387114} +12/28/2021 03:32:52 - INFO - codeparrot_training - Step 39364: {'lr': 5.414192788150482e-05, 'samples': 20154880, 'steps': 39364, 'batch_loss/train': 0.8365609222091734} +12/28/2021 03:33:03 - INFO - codeparrot_training - Step 39365: {'lr': 5.413213184308113e-05, 'samples': 20155392, 'steps': 39365, 'batch_loss/train': 0.6511794272810221} +12/28/2021 03:33:15 - INFO - codeparrot_training - Step 39366: {'lr': 5.4122336583354275e-05, 'samples': 20155904, 'steps': 39366, 'batch_loss/train': 0.6701450924156234} +12/28/2021 03:33:25 - INFO - codeparrot_training - Step 39367: {'lr': 5.4112542102362965e-05, 'samples': 20156416, 'steps': 39367, 'batch_loss/train': 0.7877148590050638} +12/28/2021 03:33:36 - INFO - codeparrot_training - Step 39368: {'lr': 5.410274840014623e-05, 'samples': 20156928, 'steps': 39368, 'batch_loss/train': 0.8056942410767078} +12/28/2021 03:33:48 - INFO - codeparrot_training - Step 39369: {'lr': 5.4092955476743e-05, 'samples': 20157440, 'steps': 39369, 'batch_loss/train': 0.6912134727463126} +12/28/2021 03:33:59 - INFO - codeparrot_training - Step 39370: {'lr': 5.408316333219224e-05, 'samples': 20157952, 'steps': 39370, 'batch_loss/train': 0.7021306874230504} +12/28/2021 03:34:09 - INFO - codeparrot_training - Step 39371: {'lr': 5.4073371966532806e-05, 'samples': 20158464, 'steps': 39371, 'batch_loss/train': 0.765987410210073} +12/28/2021 03:34:22 - INFO - codeparrot_training - Step 39372: {'lr': 5.406358137980374e-05, 'samples': 20158976, 'steps': 39372, 'batch_loss/train': 0.8496917695738375} +12/28/2021 03:34:33 - INFO - codeparrot_training - Step 39373: {'lr': 5.4053791572043765e-05, 'samples': 20159488, 'steps': 39373, 'batch_loss/train': 0.723912678193301} +12/28/2021 03:34:43 - INFO - codeparrot_training - Step 39374: {'lr': 5.4044002543292006e-05, 'samples': 20160000, 'steps': 39374, 'batch_loss/train': 0.7449505040422082} +12/28/2021 03:34:55 - INFO - codeparrot_training - Step 39375: {'lr': 5.403421429358735e-05, 'samples': 20160512, 'steps': 39375, 'batch_loss/train': 0.7325804140418768} +12/28/2021 03:35:06 - INFO - codeparrot_training - Step 39376: {'lr': 5.40244268229686e-05, 'samples': 20161024, 'steps': 39376, 'batch_loss/train': 0.6922009016270749} +12/28/2021 03:35:17 - INFO - codeparrot_training - Step 39377: {'lr': 5.401464013147467e-05, 'samples': 20161536, 'steps': 39377, 'batch_loss/train': 0.7928176121786237} +12/28/2021 03:35:27 - INFO - codeparrot_training - Step 39378: {'lr': 5.4004854219144665e-05, 'samples': 20162048, 'steps': 39378, 'batch_loss/train': 0.6444073719903827} +12/28/2021 03:35:40 - INFO - codeparrot_training - Step 39379: {'lr': 5.399506908601728e-05, 'samples': 20162560, 'steps': 39379, 'batch_loss/train': 0.7647204040549695} +12/28/2021 03:35:51 - INFO - codeparrot_training - Step 39380: {'lr': 5.398528473213149e-05, 'samples': 20163072, 'steps': 39380, 'batch_loss/train': 0.865391593426466} +12/28/2021 03:36:02 - INFO - codeparrot_training - Step 39381: {'lr': 5.3975501157526195e-05, 'samples': 20163584, 'steps': 39381, 'batch_loss/train': 0.7122541816788726} +12/28/2021 03:36:14 - INFO - codeparrot_training - Step 39382: {'lr': 5.396571836224032e-05, 'samples': 20164096, 'steps': 39382, 'batch_loss/train': 0.8927709013223648} +12/28/2021 03:36:24 - INFO - codeparrot_training - Step 39383: {'lr': 5.395593634631268e-05, 'samples': 20164608, 'steps': 39383, 'batch_loss/train': 0.7774208765476942} +12/28/2021 03:36:35 - INFO - codeparrot_training - Step 39384: {'lr': 5.3946155109782265e-05, 'samples': 20165120, 'steps': 39384, 'batch_loss/train': 0.6154045120347291} +12/28/2021 03:36:47 - INFO - codeparrot_training - Step 39385: {'lr': 5.393637465268789e-05, 'samples': 20165632, 'steps': 39385, 'batch_loss/train': 0.6547950087115169} +12/28/2021 03:36:58 - INFO - codeparrot_training - Step 39386: {'lr': 5.392659497506844e-05, 'samples': 20166144, 'steps': 39386, 'batch_loss/train': 0.8555777240544558} +12/28/2021 03:37:08 - INFO - codeparrot_training - Step 39387: {'lr': 5.3916816076962886e-05, 'samples': 20166656, 'steps': 39387, 'batch_loss/train': 0.7775980444857851} +12/28/2021 03:37:19 - INFO - codeparrot_training - Step 39388: {'lr': 5.3907037958409925e-05, 'samples': 20167168, 'steps': 39388, 'batch_loss/train': 0.7323806337080896} +12/28/2021 03:37:31 - INFO - codeparrot_training - Step 39389: {'lr': 5.3897260619448666e-05, 'samples': 20167680, 'steps': 39389, 'batch_loss/train': 0.7204460557550192} +12/28/2021 03:37:42 - INFO - codeparrot_training - Step 39390: {'lr': 5.388748406011776e-05, 'samples': 20168192, 'steps': 39390, 'batch_loss/train': 0.7372444663196802} +12/28/2021 03:37:52 - INFO - codeparrot_training - Step 39391: {'lr': 5.387770828045613e-05, 'samples': 20168704, 'steps': 39391, 'batch_loss/train': 0.6702423873357475} +12/28/2021 03:38:05 - INFO - codeparrot_training - Step 39392: {'lr': 5.38679332805028e-05, 'samples': 20169216, 'steps': 39392, 'batch_loss/train': 0.8153108800761402} +12/28/2021 03:38:16 - INFO - codeparrot_training - Step 39393: {'lr': 5.3858159060296427e-05, 'samples': 20169728, 'steps': 39393, 'batch_loss/train': 0.7746766554191709} +12/28/2021 03:38:26 - INFO - codeparrot_training - Step 39394: {'lr': 5.384838561987599e-05, 'samples': 20170240, 'steps': 39394, 'batch_loss/train': 0.7054853980662301} +12/28/2021 03:38:39 - INFO - codeparrot_training - Step 39395: {'lr': 5.383861295928027e-05, 'samples': 20170752, 'steps': 39395, 'batch_loss/train': 0.7113927068421617} +12/28/2021 03:38:49 - INFO - codeparrot_training - Step 39396: {'lr': 5.3828841078548185e-05, 'samples': 20171264, 'steps': 39396, 'batch_loss/train': 0.7225595540367067} +12/28/2021 03:39:00 - INFO - codeparrot_training - Step 39397: {'lr': 5.381906997771855e-05, 'samples': 20171776, 'steps': 39397, 'batch_loss/train': 0.6008086476940662} +12/28/2021 03:39:12 - INFO - codeparrot_training - Step 39398: {'lr': 5.3809299656830293e-05, 'samples': 20172288, 'steps': 39398, 'batch_loss/train': 0.6633574180305004} +12/28/2021 03:39:23 - INFO - codeparrot_training - Step 39399: {'lr': 5.3799530115922037e-05, 'samples': 20172800, 'steps': 39399, 'batch_loss/train': 0.7911602072417736} +12/28/2021 03:39:33 - INFO - codeparrot_training - Step 39400: {'lr': 5.378976135503283e-05, 'samples': 20173312, 'steps': 39400, 'batch_loss/train': 0.7631060783751309} +12/28/2021 03:39:44 - INFO - codeparrot_training - Step 39401: {'lr': 5.37799933742015e-05, 'samples': 20173824, 'steps': 39401, 'batch_loss/train': 0.6705792983993888} +12/28/2021 03:39:57 - INFO - codeparrot_training - Step 39402: {'lr': 5.3770226173466717e-05, 'samples': 20174336, 'steps': 39402, 'batch_loss/train': 0.8356925444677472} +12/28/2021 03:40:07 - INFO - codeparrot_training - Step 39403: {'lr': 5.376045975286748e-05, 'samples': 20174848, 'steps': 39403, 'batch_loss/train': 0.813334334641695} +12/28/2021 03:40:18 - INFO - codeparrot_training - Step 39404: {'lr': 5.3750694112442625e-05, 'samples': 20175360, 'steps': 39404, 'batch_loss/train': 0.7842542789876461} +12/28/2021 03:40:30 - INFO - codeparrot_training - Step 39405: {'lr': 5.374092925223084e-05, 'samples': 20175872, 'steps': 39405, 'batch_loss/train': 0.8909340412355959} +12/28/2021 03:40:41 - INFO - codeparrot_training - Step 39406: {'lr': 5.373116517227103e-05, 'samples': 20176384, 'steps': 39406, 'batch_loss/train': 0.6795357305090874} +12/28/2021 03:40:51 - INFO - codeparrot_training - Step 39407: {'lr': 5.372140187260199e-05, 'samples': 20176896, 'steps': 39407, 'batch_loss/train': 0.9007132411934435} +12/28/2021 03:41:04 - INFO - codeparrot_training - Step 39408: {'lr': 5.3711639353262534e-05, 'samples': 20177408, 'steps': 39408, 'batch_loss/train': 0.7600457733497024} +12/28/2021 03:41:15 - INFO - codeparrot_training - Step 39409: {'lr': 5.370187761429149e-05, 'samples': 20177920, 'steps': 39409, 'batch_loss/train': 0.7187704727984965} +12/28/2021 03:41:25 - INFO - codeparrot_training - Step 39410: {'lr': 5.369211665572768e-05, 'samples': 20178432, 'steps': 39410, 'batch_loss/train': 0.7440506359562278} +12/28/2021 03:41:36 - INFO - codeparrot_training - Step 39411: {'lr': 5.3682356477609853e-05, 'samples': 20178944, 'steps': 39411, 'batch_loss/train': 0.7333939950913191} +12/28/2021 03:41:48 - INFO - codeparrot_training - Step 39412: {'lr': 5.3672597079976875e-05, 'samples': 20179456, 'steps': 39412, 'batch_loss/train': 0.6842220886610448} +12/28/2021 03:41:59 - INFO - codeparrot_training - Step 39413: {'lr': 5.36628384628676e-05, 'samples': 20179968, 'steps': 39413, 'batch_loss/train': 0.555921696126461} +12/28/2021 03:42:10 - INFO - codeparrot_training - Step 39414: {'lr': 5.36530806263206e-05, 'samples': 20180480, 'steps': 39414, 'batch_loss/train': 0.7547425997909158} +12/28/2021 03:42:22 - INFO - codeparrot_training - Step 39415: {'lr': 5.3643323570374936e-05, 'samples': 20180992, 'steps': 39415, 'batch_loss/train': 0.6621133610606194} +12/28/2021 03:42:33 - INFO - codeparrot_training - Step 39416: {'lr': 5.363356729506921e-05, 'samples': 20181504, 'steps': 39416, 'batch_loss/train': 0.7693030573427677} +12/28/2021 03:42:43 - INFO - codeparrot_training - Step 39417: {'lr': 5.362381180044221e-05, 'samples': 20182016, 'steps': 39417, 'batch_loss/train': 0.8069823402911425} +12/28/2021 03:42:56 - INFO - codeparrot_training - Step 39418: {'lr': 5.361405708653294e-05, 'samples': 20182528, 'steps': 39418, 'batch_loss/train': 0.7614073029253632} +12/28/2021 03:43:07 - INFO - codeparrot_training - Step 39419: {'lr': 5.360430315337994e-05, 'samples': 20183040, 'steps': 39419, 'batch_loss/train': 0.7453854456543922} +12/28/2021 03:43:17 - INFO - codeparrot_training - Step 39420: {'lr': 5.359455000102209e-05, 'samples': 20183552, 'steps': 39420, 'batch_loss/train': 0.7062373943626881} +12/28/2021 03:43:28 - INFO - codeparrot_training - Step 39421: {'lr': 5.3584797629498136e-05, 'samples': 20184064, 'steps': 39421, 'batch_loss/train': 0.7360385390929878} +12/28/2021 03:43:40 - INFO - codeparrot_training - Step 39422: {'lr': 5.3575046038846856e-05, 'samples': 20184576, 'steps': 39422, 'batch_loss/train': 0.7581028379499912} +12/28/2021 03:43:51 - INFO - codeparrot_training - Step 39423: {'lr': 5.356529522910702e-05, 'samples': 20185088, 'steps': 39423, 'batch_loss/train': 0.7054392085410655} +12/28/2021 03:44:01 - INFO - codeparrot_training - Step 39424: {'lr': 5.3555545200317494e-05, 'samples': 20185600, 'steps': 39424, 'batch_loss/train': 0.801244561560452} +12/28/2021 03:44:13 - INFO - codeparrot_training - Step 39425: {'lr': 5.35457959525168e-05, 'samples': 20186112, 'steps': 39425, 'batch_loss/train': 0.7290278421714902} +12/28/2021 03:44:24 - INFO - codeparrot_training - Step 39426: {'lr': 5.3536047485743924e-05, 'samples': 20186624, 'steps': 39426, 'batch_loss/train': 0.7292734319344163} +12/28/2021 03:44:34 - INFO - codeparrot_training - Step 39427: {'lr': 5.3526299800037594e-05, 'samples': 20187136, 'steps': 39427, 'batch_loss/train': 0.7371703959070146} +12/28/2021 03:44:46 - INFO - codeparrot_training - Step 39428: {'lr': 5.351655289543636e-05, 'samples': 20187648, 'steps': 39428, 'batch_loss/train': 0.80770742171444} +12/28/2021 03:44:57 - INFO - codeparrot_training - Step 39429: {'lr': 5.350680677197919e-05, 'samples': 20188160, 'steps': 39429, 'batch_loss/train': 1.2996415947563946} +12/28/2021 03:45:08 - INFO - codeparrot_training - Step 39430: {'lr': 5.349706142970487e-05, 'samples': 20188672, 'steps': 39430, 'batch_loss/train': 0.697851316537708} +12/28/2021 03:45:21 - INFO - codeparrot_training - Step 39431: {'lr': 5.3487316868651934e-05, 'samples': 20189184, 'steps': 39431, 'batch_loss/train': 0.7207304290495813} +12/28/2021 03:45:31 - INFO - codeparrot_training - Step 39432: {'lr': 5.3477573088859174e-05, 'samples': 20189696, 'steps': 39432, 'batch_loss/train': 0.5863667842932045} +12/28/2021 03:45:42 - INFO - codeparrot_training - Step 39433: {'lr': 5.346783009036552e-05, 'samples': 20190208, 'steps': 39433, 'batch_loss/train': 0.723654449917376} +12/28/2021 03:45:53 - INFO - codeparrot_training - Step 39434: {'lr': 5.3458087873209464e-05, 'samples': 20190720, 'steps': 39434, 'batch_loss/train': 0.7606234066188335} +12/28/2021 03:46:05 - INFO - codeparrot_training - Step 39435: {'lr': 5.344834643742988e-05, 'samples': 20191232, 'steps': 39435, 'batch_loss/train': 0.671953622251749} +12/28/2021 03:46:15 - INFO - codeparrot_training - Step 39436: {'lr': 5.3438605783065447e-05, 'samples': 20191744, 'steps': 39436, 'batch_loss/train': 0.7555265463888645} +12/28/2021 03:46:26 - INFO - codeparrot_training - Step 39437: {'lr': 5.342886591015489e-05, 'samples': 20192256, 'steps': 39437, 'batch_loss/train': 0.7214535223320127} +12/28/2021 03:46:39 - INFO - codeparrot_training - Step 39438: {'lr': 5.341912681873693e-05, 'samples': 20192768, 'steps': 39438, 'batch_loss/train': 0.7911840984597802} +12/28/2021 03:46:49 - INFO - codeparrot_training - Step 39439: {'lr': 5.34093885088503e-05, 'samples': 20193280, 'steps': 39439, 'batch_loss/train': 0.7856487259268761} +12/28/2021 03:47:00 - INFO - codeparrot_training - Step 39440: {'lr': 5.339965098053373e-05, 'samples': 20193792, 'steps': 39440, 'batch_loss/train': 0.6575935987057164} +12/28/2021 03:47:12 - INFO - codeparrot_training - Step 39441: {'lr': 5.338991423382597e-05, 'samples': 20194304, 'steps': 39441, 'batch_loss/train': 0.866133195348084} +12/28/2021 03:47:23 - INFO - codeparrot_training - Step 39442: {'lr': 5.3380178268765595e-05, 'samples': 20194816, 'steps': 39442, 'batch_loss/train': 0.674976222217083} +12/28/2021 03:47:33 - INFO - codeparrot_training - Step 39443: {'lr': 5.3370443085391354e-05, 'samples': 20195328, 'steps': 39443, 'batch_loss/train': 0.7927540550008416} +12/28/2021 03:47:44 - INFO - codeparrot_training - Step 39444: {'lr': 5.33607086837421e-05, 'samples': 20195840, 'steps': 39444, 'batch_loss/train': 0.7096011554822326} +12/28/2021 03:47:57 - INFO - codeparrot_training - Step 39445: {'lr': 5.335097506385633e-05, 'samples': 20196352, 'steps': 39445, 'batch_loss/train': 0.8216638413723558} +12/28/2021 03:48:07 - INFO - codeparrot_training - Step 39446: {'lr': 5.334124222577288e-05, 'samples': 20196864, 'steps': 39446, 'batch_loss/train': 0.8768046479672194} +12/28/2021 03:48:18 - INFO - codeparrot_training - Step 39447: {'lr': 5.333151016953036e-05, 'samples': 20197376, 'steps': 39447, 'batch_loss/train': 0.7923226119019091} +12/28/2021 03:48:30 - INFO - codeparrot_training - Step 39448: {'lr': 5.3321778895167514e-05, 'samples': 20197888, 'steps': 39448, 'batch_loss/train': 0.7977863007690758} +12/28/2021 03:48:41 - INFO - codeparrot_training - Step 39449: {'lr': 5.3312048402723e-05, 'samples': 20198400, 'steps': 39449, 'batch_loss/train': 0.7947170315310359} +12/28/2021 03:48:52 - INFO - codeparrot_training - Step 39450: {'lr': 5.330231869223559e-05, 'samples': 20198912, 'steps': 39450, 'batch_loss/train': 0.817412176169455} +12/28/2021 03:49:04 - INFO - codeparrot_training - Step 39451: {'lr': 5.3292589763743766e-05, 'samples': 20199424, 'steps': 39451, 'batch_loss/train': 0.7836900002439506} +12/28/2021 03:49:15 - INFO - codeparrot_training - Step 39452: {'lr': 5.328286161728638e-05, 'samples': 20199936, 'steps': 39452, 'batch_loss/train': 0.832715867087245} +12/28/2021 03:49:25 - INFO - codeparrot_training - Step 39453: {'lr': 5.327313425290215e-05, 'samples': 20200448, 'steps': 39453, 'batch_loss/train': 0.7242568549700081} +12/28/2021 03:49:36 - INFO - codeparrot_training - Step 39454: {'lr': 5.326340767062948e-05, 'samples': 20200960, 'steps': 39454, 'batch_loss/train': 0.9082569624297321} +12/28/2021 03:49:48 - INFO - codeparrot_training - Step 39455: {'lr': 5.32536818705073e-05, 'samples': 20201472, 'steps': 39455, 'batch_loss/train': 0.6768544069491327} +12/28/2021 03:49:59 - INFO - codeparrot_training - Step 39456: {'lr': 5.32439568525743e-05, 'samples': 20201984, 'steps': 39456, 'batch_loss/train': 0.6968962103128433} +12/28/2021 03:50:09 - INFO - codeparrot_training - Step 39457: {'lr': 5.323423261686891e-05, 'samples': 20202496, 'steps': 39457, 'batch_loss/train': 0.6600030197296292} +12/28/2021 03:50:22 - INFO - codeparrot_training - Step 39458: {'lr': 5.322450916342989e-05, 'samples': 20203008, 'steps': 39458, 'batch_loss/train': 0.7658739178441465} +12/28/2021 03:50:33 - INFO - codeparrot_training - Step 39459: {'lr': 5.321478649229605e-05, 'samples': 20203520, 'steps': 39459, 'batch_loss/train': 0.7794425114989281} +12/28/2021 03:50:43 - INFO - codeparrot_training - Step 39460: {'lr': 5.320506460350582e-05, 'samples': 20204032, 'steps': 39460, 'batch_loss/train': 0.7390720979310572} +12/28/2021 03:50:55 - INFO - codeparrot_training - Step 39461: {'lr': 5.319534349709798e-05, 'samples': 20204544, 'steps': 39461, 'batch_loss/train': 0.7812855243682861} +12/28/2021 03:51:06 - INFO - codeparrot_training - Step 39462: {'lr': 5.318562317311112e-05, 'samples': 20205056, 'steps': 39462, 'batch_loss/train': 0.8252431834116578} +12/28/2021 03:51:17 - INFO - codeparrot_training - Step 39463: {'lr': 5.317590363158392e-05, 'samples': 20205568, 'steps': 39463, 'batch_loss/train': 0.9619421968236566} +12/28/2021 03:51:27 - INFO - codeparrot_training - Step 39464: {'lr': 5.316618487255501e-05, 'samples': 20206080, 'steps': 39464, 'batch_loss/train': 0.7328809965401888} +12/28/2021 03:51:39 - INFO - codeparrot_training - Step 39465: {'lr': 5.3156466896063014e-05, 'samples': 20206592, 'steps': 39465, 'batch_loss/train': 0.7762683797627687} +12/28/2021 03:51:50 - INFO - codeparrot_training - Step 39466: {'lr': 5.314674970214661e-05, 'samples': 20207104, 'steps': 39466, 'batch_loss/train': 0.789859258569777} +12/28/2021 03:52:01 - INFO - codeparrot_training - Step 39467: {'lr': 5.3137033290844375e-05, 'samples': 20207616, 'steps': 39467, 'batch_loss/train': 0.7961180275306106} +12/28/2021 03:52:14 - INFO - codeparrot_training - Step 39468: {'lr': 5.312731766219503e-05, 'samples': 20208128, 'steps': 39468, 'batch_loss/train': 0.7739022010937333} +12/28/2021 03:52:24 - INFO - codeparrot_training - Step 39469: {'lr': 5.311760281623701e-05, 'samples': 20208640, 'steps': 39469, 'batch_loss/train': 0.7133411024697125} +12/28/2021 03:52:35 - INFO - codeparrot_training - Step 39470: {'lr': 5.31078887530092e-05, 'samples': 20209152, 'steps': 39470, 'batch_loss/train': 0.6681692483834922} +12/28/2021 03:52:47 - INFO - codeparrot_training - Step 39471: {'lr': 5.3098175472549996e-05, 'samples': 20209664, 'steps': 39471, 'batch_loss/train': 0.753307712264359} +12/28/2021 03:52:57 - INFO - codeparrot_training - Step 39472: {'lr': 5.308846297489811e-05, 'samples': 20210176, 'steps': 39472, 'batch_loss/train': 0.7991049312986434} +12/28/2021 03:53:08 - INFO - codeparrot_training - Step 39473: {'lr': 5.307875126009215e-05, 'samples': 20210688, 'steps': 39473, 'batch_loss/train': 0.8328766617923975} +12/28/2021 03:53:19 - INFO - codeparrot_training - Step 39474: {'lr': 5.30690403281707e-05, 'samples': 20211200, 'steps': 39474, 'batch_loss/train': 0.7671182518824935} +12/28/2021 03:53:31 - INFO - codeparrot_training - Step 39475: {'lr': 5.3059330179172416e-05, 'samples': 20211712, 'steps': 39475, 'batch_loss/train': 0.7624864378012717} +12/28/2021 03:53:42 - INFO - codeparrot_training - Step 39476: {'lr': 5.304962081313586e-05, 'samples': 20212224, 'steps': 39476, 'batch_loss/train': 0.7009523280430585} +12/28/2021 03:53:52 - INFO - codeparrot_training - Step 39477: {'lr': 5.303991223009963e-05, 'samples': 20212736, 'steps': 39477, 'batch_loss/train': 0.7281534804496914} +12/28/2021 03:54:05 - INFO - codeparrot_training - Step 39478: {'lr': 5.303020443010237e-05, 'samples': 20213248, 'steps': 39478, 'batch_loss/train': 0.7090143738314509} +12/28/2021 03:54:16 - INFO - codeparrot_training - Step 39479: {'lr': 5.302049741318268e-05, 'samples': 20213760, 'steps': 39479, 'batch_loss/train': 0.7509523617336527} +12/28/2021 03:54:26 - INFO - codeparrot_training - Step 39480: {'lr': 5.3010791179378986e-05, 'samples': 20214272, 'steps': 39480, 'batch_loss/train': 0.7893891409039497} +12/28/2021 03:54:39 - INFO - codeparrot_training - Step 39481: {'lr': 5.300108572873011e-05, 'samples': 20214784, 'steps': 39481, 'batch_loss/train': 0.795395475346595} +12/28/2021 03:54:50 - INFO - codeparrot_training - Step 39482: {'lr': 5.299138106127458e-05, 'samples': 20215296, 'steps': 39482, 'batch_loss/train': 0.8057125471532345} +12/28/2021 03:55:00 - INFO - codeparrot_training - Step 39483: {'lr': 5.298167717705085e-05, 'samples': 20215808, 'steps': 39483, 'batch_loss/train': 0.7741200644522905} +12/28/2021 03:55:11 - INFO - codeparrot_training - Step 39484: {'lr': 5.2971974076097524e-05, 'samples': 20216320, 'steps': 39484, 'batch_loss/train': 0.8375154016539454} +12/28/2021 03:55:24 - INFO - codeparrot_training - Step 39485: {'lr': 5.296227175845336e-05, 'samples': 20216832, 'steps': 39485, 'batch_loss/train': 1.1451807075645775} +12/28/2021 03:55:35 - INFO - codeparrot_training - Step 39486: {'lr': 5.295257022415673e-05, 'samples': 20217344, 'steps': 39486, 'batch_loss/train': 0.5663501154631376} +12/28/2021 03:55:45 - INFO - codeparrot_training - Step 39487: {'lr': 5.2942869473246306e-05, 'samples': 20217856, 'steps': 39487, 'batch_loss/train': 0.5451652510091662} +12/28/2021 03:55:57 - INFO - codeparrot_training - Step 39488: {'lr': 5.2933169505760607e-05, 'samples': 20218368, 'steps': 39488, 'batch_loss/train': 0.6872301953844726} +12/28/2021 03:56:08 - INFO - codeparrot_training - Step 39489: {'lr': 5.292347032173822e-05, 'samples': 20218880, 'steps': 39489, 'batch_loss/train': 0.6846832782030106} +12/28/2021 03:56:19 - INFO - codeparrot_training - Step 39490: {'lr': 5.2913771921217714e-05, 'samples': 20219392, 'steps': 39490, 'batch_loss/train': 0.6886759297922254} +12/28/2021 03:56:29 - INFO - codeparrot_training - Step 39491: {'lr': 5.29040743042376e-05, 'samples': 20219904, 'steps': 39491, 'batch_loss/train': 0.7797865075990558} +12/28/2021 03:56:42 - INFO - codeparrot_training - Step 39492: {'lr': 5.28943774708365e-05, 'samples': 20220416, 'steps': 39492, 'batch_loss/train': 0.733067914377898} +12/28/2021 03:56:52 - INFO - codeparrot_training - Step 39493: {'lr': 5.288468142105293e-05, 'samples': 20220928, 'steps': 39493, 'batch_loss/train': 0.976993944728747} +12/28/2021 03:57:03 - INFO - codeparrot_training - Step 39494: {'lr': 5.2874986154925504e-05, 'samples': 20221440, 'steps': 39494, 'batch_loss/train': 0.6689965697005391} +12/28/2021 03:57:16 - INFO - codeparrot_training - Step 39495: {'lr': 5.2865291672492544e-05, 'samples': 20221952, 'steps': 39495, 'batch_loss/train': 0.6948798093944788} +12/28/2021 03:57:26 - INFO - codeparrot_training - Step 39496: {'lr': 5.285559797379291e-05, 'samples': 20222464, 'steps': 39496, 'batch_loss/train': 0.7858500145375729} +12/28/2021 03:57:37 - INFO - codeparrot_training - Step 39497: {'lr': 5.2845905058864916e-05, 'samples': 20222976, 'steps': 39497, 'batch_loss/train': 0.7580390367656946} +12/28/2021 03:57:47 - INFO - codeparrot_training - Step 39498: {'lr': 5.2836212927747124e-05, 'samples': 20223488, 'steps': 39498, 'batch_loss/train': 0.7396728296298534} +12/28/2021 03:58:00 - INFO - codeparrot_training - Step 39499: {'lr': 5.282652158047813e-05, 'samples': 20224000, 'steps': 39499, 'batch_loss/train': 0.6015521096996963} +12/28/2021 03:58:10 - INFO - codeparrot_training - Step 39500: {'lr': 5.281683101709644e-05, 'samples': 20224512, 'steps': 39500, 'batch_loss/train': 0.6730036404915154} +12/28/2021 03:58:21 - INFO - codeparrot_training - Step 39501: {'lr': 5.280714123764058e-05, 'samples': 20225024, 'steps': 39501, 'batch_loss/train': 0.7437726520001888} +12/28/2021 03:58:33 - INFO - codeparrot_training - Step 39502: {'lr': 5.2797452242149046e-05, 'samples': 20225536, 'steps': 39502, 'batch_loss/train': 0.5893693719990551} +12/28/2021 03:58:44 - INFO - codeparrot_training - Step 39503: {'lr': 5.278776403066041e-05, 'samples': 20226048, 'steps': 39503, 'batch_loss/train': 0.5687245114240795} +12/28/2021 03:58:55 - INFO - codeparrot_training - Step 39504: {'lr': 5.277807660321313e-05, 'samples': 20226560, 'steps': 39504, 'batch_loss/train': 0.8163926750421524} +12/28/2021 03:59:05 - INFO - codeparrot_training - Step 39505: {'lr': 5.276838995984584e-05, 'samples': 20227072, 'steps': 39505, 'batch_loss/train': 0.7564628190593794} +12/28/2021 03:59:17 - INFO - codeparrot_training - Step 39506: {'lr': 5.27587041005968e-05, 'samples': 20227584, 'steps': 39506, 'batch_loss/train': 0.8105967100709677} +12/28/2021 03:59:28 - INFO - codeparrot_training - Step 39507: {'lr': 5.2749019025504777e-05, 'samples': 20228096, 'steps': 39507, 'batch_loss/train': 0.7663383865728974} +12/28/2021 03:59:39 - INFO - codeparrot_training - Step 39508: {'lr': 5.273933473460821e-05, 'samples': 20228608, 'steps': 39508, 'batch_loss/train': 0.8066987665370107} +12/28/2021 03:59:51 - INFO - codeparrot_training - Step 39509: {'lr': 5.2729651227945514e-05, 'samples': 20229120, 'steps': 39509, 'batch_loss/train': 0.7933130403980613} +12/28/2021 04:00:02 - INFO - codeparrot_training - Step 39510: {'lr': 5.271996850555516e-05, 'samples': 20229632, 'steps': 39510, 'batch_loss/train': 0.7127966834232211} +12/28/2021 04:00:12 - INFO - codeparrot_training - Step 39511: {'lr': 5.2710286567475867e-05, 'samples': 20230144, 'steps': 39511, 'batch_loss/train': 0.7647283058613539} +12/28/2021 04:00:24 - INFO - codeparrot_training - Step 39512: {'lr': 5.270060541374594e-05, 'samples': 20230656, 'steps': 39512, 'batch_loss/train': 0.8309175898320973} +12/28/2021 04:00:35 - INFO - codeparrot_training - Step 39513: {'lr': 5.269092504440379e-05, 'samples': 20231168, 'steps': 39513, 'batch_loss/train': 0.7005123142153025} +12/28/2021 04:00:46 - INFO - codeparrot_training - Step 39514: {'lr': 5.2681245459488184e-05, 'samples': 20231680, 'steps': 39514, 'batch_loss/train': 0.786739761591889} +12/28/2021 04:01:00 - INFO - codeparrot_training - Step 39515: {'lr': 5.2671566659037386e-05, 'samples': 20232192, 'steps': 39515, 'batch_loss/train': 0.7465428072027862} +12/28/2021 04:01:11 - INFO - codeparrot_training - Step 39516: {'lr': 5.26618886430899e-05, 'samples': 20232704, 'steps': 39516, 'batch_loss/train': 0.7741858903318644} +12/28/2021 04:01:21 - INFO - codeparrot_training - Step 39517: {'lr': 5.265221141168425e-05, 'samples': 20233216, 'steps': 39517, 'batch_loss/train': 0.8053018320351839} +12/28/2021 04:01:32 - INFO - codeparrot_training - Step 39518: {'lr': 5.264253496485888e-05, 'samples': 20233728, 'steps': 39518, 'batch_loss/train': 0.7997505096718669} +12/28/2021 04:01:44 - INFO - codeparrot_training - Step 39519: {'lr': 5.2632859302652295e-05, 'samples': 20234240, 'steps': 39519, 'batch_loss/train': 0.7403130661696196} +12/28/2021 04:01:55 - INFO - codeparrot_training - Step 39520: {'lr': 5.2623184425103e-05, 'samples': 20234752, 'steps': 39520, 'batch_loss/train': 0.703240207512863} +12/28/2021 04:02:06 - INFO - codeparrot_training - Step 39521: {'lr': 5.261351033224923e-05, 'samples': 20235264, 'steps': 39521, 'batch_loss/train': 0.8045481871813536} +12/28/2021 04:02:19 - INFO - codeparrot_training - Step 39522: {'lr': 5.260383702412971e-05, 'samples': 20235776, 'steps': 39522, 'batch_loss/train': 0.8855097591876984} +12/28/2021 04:02:30 - INFO - codeparrot_training - Step 39523: {'lr': 5.259416450078286e-05, 'samples': 20236288, 'steps': 39523, 'batch_loss/train': 0.700821787584573} +12/28/2021 04:02:41 - INFO - codeparrot_training - Step 39524: {'lr': 5.2584492762247025e-05, 'samples': 20236800, 'steps': 39524, 'batch_loss/train': 0.819702785462141} +12/28/2021 04:02:53 - INFO - codeparrot_training - Step 39525: {'lr': 5.2574821808560685e-05, 'samples': 20237312, 'steps': 39525, 'batch_loss/train': 0.677205074345693} +12/28/2021 04:03:04 - INFO - codeparrot_training - Step 39526: {'lr': 5.256515163976231e-05, 'samples': 20237824, 'steps': 39526, 'batch_loss/train': 0.7097938568331301} +12/28/2021 04:03:14 - INFO - codeparrot_training - Step 39527: {'lr': 5.2555482255890353e-05, 'samples': 20238336, 'steps': 39527, 'batch_loss/train': 0.8950821693288162} +12/28/2021 04:03:25 - INFO - codeparrot_training - Step 39528: {'lr': 5.2545813656983247e-05, 'samples': 20238848, 'steps': 39528, 'batch_loss/train': 0.8221970946760848} +12/28/2021 04:03:37 - INFO - codeparrot_training - Step 39529: {'lr': 5.253614584307942e-05, 'samples': 20239360, 'steps': 39529, 'batch_loss/train': 0.717199434991926} +12/28/2021 04:03:48 - INFO - codeparrot_training - Step 39530: {'lr': 5.252647881421732e-05, 'samples': 20239872, 'steps': 39530, 'batch_loss/train': 0.6191679565235972} +12/28/2021 04:03:59 - INFO - codeparrot_training - Step 39531: {'lr': 5.251681257043545e-05, 'samples': 20240384, 'steps': 39531, 'batch_loss/train': 0.9427189854905009} +12/28/2021 04:04:13 - INFO - codeparrot_training - Step 39532: {'lr': 5.2507147111772014e-05, 'samples': 20240896, 'steps': 39532, 'batch_loss/train': 0.7077612257562578} +12/28/2021 04:04:24 - INFO - codeparrot_training - Step 39533: {'lr': 5.2497482438265656e-05, 'samples': 20241408, 'steps': 39533, 'batch_loss/train': 0.7485678996890783} +12/28/2021 04:04:34 - INFO - codeparrot_training - Step 39534: {'lr': 5.248781854995482e-05, 'samples': 20241920, 'steps': 39534, 'batch_loss/train': 0.6064744120230898} +12/28/2021 04:04:47 - INFO - codeparrot_training - Step 39535: {'lr': 5.247815544687778e-05, 'samples': 20242432, 'steps': 39535, 'batch_loss/train': 0.7415142427198589} +12/28/2021 04:04:57 - INFO - codeparrot_training - Step 39536: {'lr': 5.246849312907292e-05, 'samples': 20242944, 'steps': 39536, 'batch_loss/train': 0.7681648316793144} +12/28/2021 04:05:08 - INFO - codeparrot_training - Step 39537: {'lr': 5.245883159657891e-05, 'samples': 20243456, 'steps': 39537, 'batch_loss/train': 0.8556383970426396} +12/28/2021 04:05:19 - INFO - codeparrot_training - Step 39538: {'lr': 5.24491708494339e-05, 'samples': 20243968, 'steps': 39538, 'batch_loss/train': 0.6816891863709316} +12/28/2021 04:05:32 - INFO - codeparrot_training - Step 39539: {'lr': 5.2439510887676345e-05, 'samples': 20244480, 'steps': 39539, 'batch_loss/train': 0.7694361675530672} +12/28/2021 04:05:43 - INFO - codeparrot_training - Step 39540: {'lr': 5.2429851711344846e-05, 'samples': 20244992, 'steps': 39540, 'batch_loss/train': 0.7293865485116839} +12/28/2021 04:05:54 - INFO - codeparrot_training - Step 39541: {'lr': 5.242019332047757e-05, 'samples': 20245504, 'steps': 39541, 'batch_loss/train': 0.6256009312346578} +12/28/2021 04:06:06 - INFO - codeparrot_training - Step 39542: {'lr': 5.2410535715113005e-05, 'samples': 20246016, 'steps': 39542, 'batch_loss/train': 0.7398149874061346} +12/28/2021 04:06:16 - INFO - codeparrot_training - Step 39543: {'lr': 5.240087889528952e-05, 'samples': 20246528, 'steps': 39543, 'batch_loss/train': 0.7173384143970907} +12/28/2021 04:06:27 - INFO - codeparrot_training - Step 39544: {'lr': 5.2391222861045557e-05, 'samples': 20247040, 'steps': 39544, 'batch_loss/train': 0.7675345093011856} +12/28/2021 04:06:39 - INFO - codeparrot_training - Step 39545: {'lr': 5.2381567612419495e-05, 'samples': 20247552, 'steps': 39545, 'batch_loss/train': 0.6943133411696181} +12/28/2021 04:06:50 - INFO - codeparrot_training - Step 39546: {'lr': 5.237191314944975e-05, 'samples': 20248064, 'steps': 39546, 'batch_loss/train': 0.7671464122831821} +12/28/2021 04:07:00 - INFO - codeparrot_training - Step 39547: {'lr': 5.236225947217452e-05, 'samples': 20248576, 'steps': 39547, 'batch_loss/train': 0.7169099416350946} +12/28/2021 04:07:11 - INFO - codeparrot_training - Step 39548: {'lr': 5.235260658063237e-05, 'samples': 20249088, 'steps': 39548, 'batch_loss/train': 0.6591742495074868} +12/28/2021 04:07:25 - INFO - codeparrot_training - Step 39549: {'lr': 5.234295447486173e-05, 'samples': 20249600, 'steps': 39549, 'batch_loss/train': 0.8073120112530887} +12/28/2021 04:07:36 - INFO - codeparrot_training - Step 39550: {'lr': 5.23333031549007e-05, 'samples': 20250112, 'steps': 39550, 'batch_loss/train': 0.8019711021333933} +12/28/2021 04:07:46 - INFO - codeparrot_training - Step 39551: {'lr': 5.232365262078798e-05, 'samples': 20250624, 'steps': 39551, 'batch_loss/train': 0.7972091233823448} +12/28/2021 04:07:58 - INFO - codeparrot_training - Step 39552: {'lr': 5.2314002872561714e-05, 'samples': 20251136, 'steps': 39552, 'batch_loss/train': 0.7372803362086415} +12/28/2021 04:08:09 - INFO - codeparrot_training - Step 39553: {'lr': 5.230435391026031e-05, 'samples': 20251648, 'steps': 39553, 'batch_loss/train': 0.7310261745005846} +12/28/2021 04:08:20 - INFO - codeparrot_training - Step 39554: {'lr': 5.229470573392217e-05, 'samples': 20252160, 'steps': 39554, 'batch_loss/train': 0.6623462662100792} +12/28/2021 04:08:32 - INFO - codeparrot_training - Step 39555: {'lr': 5.228505834358563e-05, 'samples': 20252672, 'steps': 39555, 'batch_loss/train': 0.7733089118264616} +12/28/2021 04:08:42 - INFO - codeparrot_training - Step 39556: {'lr': 5.2275411739289016e-05, 'samples': 20253184, 'steps': 39556, 'batch_loss/train': 0.7210176941007376} +12/28/2021 04:08:53 - INFO - codeparrot_training - Step 39557: {'lr': 5.226576592107074e-05, 'samples': 20253696, 'steps': 39557, 'batch_loss/train': 0.7121625049039721} +12/28/2021 04:09:06 - INFO - codeparrot_training - Step 39558: {'lr': 5.2256120888969085e-05, 'samples': 20254208, 'steps': 39558, 'batch_loss/train': 0.768906332552433} +12/28/2021 04:09:17 - INFO - codeparrot_training - Step 39559: {'lr': 5.2246476643022456e-05, 'samples': 20254720, 'steps': 39559, 'batch_loss/train': 1.3456422664457932} +12/28/2021 04:09:27 - INFO - codeparrot_training - Step 39560: {'lr': 5.223683318326922e-05, 'samples': 20255232, 'steps': 39560, 'batch_loss/train': 1.2541991639882326} +12/28/2021 04:09:38 - INFO - codeparrot_training - Step 39561: {'lr': 5.2227190509747585e-05, 'samples': 20255744, 'steps': 39561, 'batch_loss/train': 0.7395387664437294} +12/28/2021 04:09:52 - INFO - codeparrot_training - Step 39562: {'lr': 5.221754862249592e-05, 'samples': 20256256, 'steps': 39562, 'batch_loss/train': 0.7635279502719641} +12/28/2021 04:10:03 - INFO - codeparrot_training - Step 39563: {'lr': 5.2207907521552715e-05, 'samples': 20256768, 'steps': 39563, 'batch_loss/train': 0.5678895735181868} +12/28/2021 04:10:13 - INFO - codeparrot_training - Step 39564: {'lr': 5.219826720695614e-05, 'samples': 20257280, 'steps': 39564, 'batch_loss/train': 0.7383971717208624} +12/28/2021 04:10:26 - INFO - codeparrot_training - Step 39565: {'lr': 5.218862767874449e-05, 'samples': 20257792, 'steps': 39565, 'batch_loss/train': 0.6920982759911567} +12/28/2021 04:10:36 - INFO - codeparrot_training - Step 39566: {'lr': 5.21789889369563e-05, 'samples': 20258304, 'steps': 39566, 'batch_loss/train': 0.6707998330239207} +12/28/2021 04:10:47 - INFO - codeparrot_training - Step 39567: {'lr': 5.216935098162967e-05, 'samples': 20258816, 'steps': 39567, 'batch_loss/train': 0.770027888007462} +12/28/2021 04:10:57 - INFO - codeparrot_training - Step 39568: {'lr': 5.215971381280304e-05, 'samples': 20259328, 'steps': 39568, 'batch_loss/train': 0.764484285376966} +12/28/2021 04:11:11 - INFO - codeparrot_training - Step 39569: {'lr': 5.215007743051467e-05, 'samples': 20259840, 'steps': 39569, 'batch_loss/train': 0.7969617573544383} +12/28/2021 04:11:22 - INFO - codeparrot_training - Step 39570: {'lr': 5.2140441834802874e-05, 'samples': 20260352, 'steps': 39570, 'batch_loss/train': 0.6881572604179382} +12/28/2021 04:11:33 - INFO - codeparrot_training - Step 39571: {'lr': 5.2130807025706e-05, 'samples': 20260864, 'steps': 39571, 'batch_loss/train': 0.694445324363187} +12/28/2021 04:11:45 - INFO - codeparrot_training - Step 39572: {'lr': 5.2121173003262355e-05, 'samples': 20261376, 'steps': 39572, 'batch_loss/train': 0.7845599967986345} +12/28/2021 04:11:55 - INFO - codeparrot_training - Step 39573: {'lr': 5.21115397675101e-05, 'samples': 20261888, 'steps': 39573, 'batch_loss/train': 0.7332054583821446} +12/28/2021 04:12:06 - INFO - codeparrot_training - Step 39574: {'lr': 5.210190731848771e-05, 'samples': 20262400, 'steps': 39574, 'batch_loss/train': 0.751812526024878} +12/28/2021 04:12:18 - INFO - codeparrot_training - Step 39575: {'lr': 5.209227565623348e-05, 'samples': 20262912, 'steps': 39575, 'batch_loss/train': 0.8098681080155075} +12/28/2021 04:12:29 - INFO - codeparrot_training - Step 39576: {'lr': 5.208264478078548e-05, 'samples': 20263424, 'steps': 39576, 'batch_loss/train': 0.6588932536542416} +12/28/2021 04:12:39 - INFO - codeparrot_training - Step 39577: {'lr': 5.207301469218231e-05, 'samples': 20263936, 'steps': 39577, 'batch_loss/train': 0.6943796155974269} +12/28/2021 04:12:50 - INFO - codeparrot_training - Step 39578: {'lr': 5.206338539046204e-05, 'samples': 20264448, 'steps': 39578, 'batch_loss/train': 0.6054997416213155} +12/28/2021 04:13:04 - INFO - codeparrot_training - Step 39579: {'lr': 5.205375687566299e-05, 'samples': 20264960, 'steps': 39579, 'batch_loss/train': 0.6770995217375457} +12/28/2021 04:13:15 - INFO - codeparrot_training - Step 39580: {'lr': 5.204412914782347e-05, 'samples': 20265472, 'steps': 39580, 'batch_loss/train': 0.6087929778150283} +12/28/2021 04:13:25 - INFO - codeparrot_training - Step 39581: {'lr': 5.203450220698175e-05, 'samples': 20265984, 'steps': 39581, 'batch_loss/train': 0.7493968333583325} +12/28/2021 04:13:37 - INFO - codeparrot_training - Step 39582: {'lr': 5.20248760531761e-05, 'samples': 20266496, 'steps': 39582, 'batch_loss/train': 0.8489007772877812} +12/28/2021 04:13:48 - INFO - codeparrot_training - Step 39583: {'lr': 5.201525068644478e-05, 'samples': 20267008, 'steps': 39583, 'batch_loss/train': 0.7359315338544548} +12/28/2021 04:13:58 - INFO - codeparrot_training - Step 39584: {'lr': 5.2005626106826074e-05, 'samples': 20267520, 'steps': 39584, 'batch_loss/train': 0.6875851983204484} +12/28/2021 04:14:10 - INFO - codeparrot_training - Step 39585: {'lr': 5.199600231435825e-05, 'samples': 20268032, 'steps': 39585, 'batch_loss/train': 0.7023877198225819} +12/28/2021 04:14:21 - INFO - codeparrot_training - Step 39586: {'lr': 5.1986379309079595e-05, 'samples': 20268544, 'steps': 39586, 'batch_loss/train': 0.7152821407653391} +12/28/2021 04:14:32 - INFO - codeparrot_training - Step 39587: {'lr': 5.197675709102823e-05, 'samples': 20269056, 'steps': 39587, 'batch_loss/train': 0.7999163055792451} +12/28/2021 04:14:46 - INFO - codeparrot_training - Step 39588: {'lr': 5.196713566024255e-05, 'samples': 20269568, 'steps': 39588, 'batch_loss/train': 0.769162273965776} +12/28/2021 04:14:56 - INFO - codeparrot_training - Step 39589: {'lr': 5.195751501676085e-05, 'samples': 20270080, 'steps': 39589, 'batch_loss/train': 0.7438411233015358} +12/28/2021 04:15:07 - INFO - codeparrot_training - Step 39590: {'lr': 5.194789516062121e-05, 'samples': 20270592, 'steps': 39590, 'batch_loss/train': 0.7379464539699256} +12/28/2021 04:15:18 - INFO - codeparrot_training - Step 39591: {'lr': 5.193827609186189e-05, 'samples': 20271104, 'steps': 39591, 'batch_loss/train': 0.7672349782660604} +12/28/2021 04:15:30 - INFO - codeparrot_training - Step 39592: {'lr': 5.192865781052133e-05, 'samples': 20271616, 'steps': 39592, 'batch_loss/train': 0.75081807654351} +12/28/2021 04:15:40 - INFO - codeparrot_training - Step 39593: {'lr': 5.191904031663758e-05, 'samples': 20272128, 'steps': 39593, 'batch_loss/train': 0.6515283366898075} +12/28/2021 04:15:51 - INFO - codeparrot_training - Step 39594: {'lr': 5.1909423610248943e-05, 'samples': 20272640, 'steps': 39594, 'batch_loss/train': 0.7152600893750787} +12/28/2021 04:16:03 - INFO - codeparrot_training - Step 39595: {'lr': 5.189980769139363e-05, 'samples': 20273152, 'steps': 39595, 'batch_loss/train': 0.8139065532013774} +12/28/2021 04:16:14 - INFO - codeparrot_training - Step 39596: {'lr': 5.189019256010988e-05, 'samples': 20273664, 'steps': 39596, 'batch_loss/train': 0.6639184164814651} +12/28/2021 04:16:24 - INFO - codeparrot_training - Step 39597: {'lr': 5.188057821643594e-05, 'samples': 20274176, 'steps': 39597, 'batch_loss/train': 0.7342574633657932} +12/28/2021 04:16:38 - INFO - codeparrot_training - Step 39598: {'lr': 5.1870964660410054e-05, 'samples': 20274688, 'steps': 39598, 'batch_loss/train': 0.7287344094365835} +12/28/2021 04:16:49 - INFO - codeparrot_training - Step 39599: {'lr': 5.186135189207028e-05, 'samples': 20275200, 'steps': 39599, 'batch_loss/train': 0.7585508488118649} +12/28/2021 04:16:59 - INFO - codeparrot_training - Step 39600: {'lr': 5.185173991145503e-05, 'samples': 20275712, 'steps': 39600, 'batch_loss/train': 0.7730108662508428} +12/28/2021 04:17:10 - INFO - codeparrot_training - Step 39601: {'lr': 5.18421287186025e-05, 'samples': 20276224, 'steps': 39601, 'batch_loss/train': 0.6897345795296133} +12/28/2021 04:17:22 - INFO - codeparrot_training - Step 39602: {'lr': 5.183251831355071e-05, 'samples': 20276736, 'steps': 39602, 'batch_loss/train': 0.6653158687986434} +12/28/2021 04:17:33 - INFO - codeparrot_training - Step 39603: {'lr': 5.182290869633807e-05, 'samples': 20277248, 'steps': 39603, 'batch_loss/train': 0.9142854269593954} +12/28/2021 04:17:43 - INFO - codeparrot_training - Step 39604: {'lr': 5.1813299867002784e-05, 'samples': 20277760, 'steps': 39604, 'batch_loss/train': 0.795382262673229} +12/28/2021 04:17:56 - INFO - codeparrot_training - Step 39605: {'lr': 5.1803691825582924e-05, 'samples': 20278272, 'steps': 39605, 'batch_loss/train': 0.8666467778384686} +12/28/2021 04:18:06 - INFO - codeparrot_training - Step 39606: {'lr': 5.1794084572116765e-05, 'samples': 20278784, 'steps': 39606, 'batch_loss/train': 0.6971340943127871} +12/28/2021 04:18:17 - INFO - codeparrot_training - Step 39607: {'lr': 5.178447810664244e-05, 'samples': 20279296, 'steps': 39607, 'batch_loss/train': 0.662275439593941} +12/28/2021 04:18:31 - INFO - codeparrot_training - Step 39608: {'lr': 5.1774872429198236e-05, 'samples': 20279808, 'steps': 39608, 'batch_loss/train': 0.7480289028026164} +12/28/2021 04:18:42 - INFO - codeparrot_training - Step 39609: {'lr': 5.1765267539822277e-05, 'samples': 20280320, 'steps': 39609, 'batch_loss/train': 0.691581305116415} +12/28/2021 04:18:52 - INFO - codeparrot_training - Step 39610: {'lr': 5.1755663438552733e-05, 'samples': 20280832, 'steps': 39610, 'batch_loss/train': 1.4826431386172771} +12/28/2021 04:19:03 - INFO - codeparrot_training - Step 39611: {'lr': 5.1746060125427846e-05, 'samples': 20281344, 'steps': 39611, 'batch_loss/train': 0.7383647351525724} +12/28/2021 04:19:15 - INFO - codeparrot_training - Step 39612: {'lr': 5.1736457600485845e-05, 'samples': 20281856, 'steps': 39612, 'batch_loss/train': 0.7275293837301433} +12/28/2021 04:19:26 - INFO - codeparrot_training - Step 39613: {'lr': 5.172685586376466e-05, 'samples': 20282368, 'steps': 39613, 'batch_loss/train': 0.7694696311373264} +12/28/2021 04:19:37 - INFO - codeparrot_training - Step 39614: {'lr': 5.171725491530271e-05, 'samples': 20282880, 'steps': 39614, 'batch_loss/train': 0.8144179163500667} +12/28/2021 04:19:49 - INFO - codeparrot_training - Step 39615: {'lr': 5.170765475513817e-05, 'samples': 20283392, 'steps': 39615, 'batch_loss/train': 0.7570091024972498} +12/28/2021 04:19:59 - INFO - codeparrot_training - Step 39616: {'lr': 5.169805538330904e-05, 'samples': 20283904, 'steps': 39616, 'batch_loss/train': 0.7317489363485947} +12/28/2021 04:20:10 - INFO - codeparrot_training - Step 39617: {'lr': 5.1688456799853515e-05, 'samples': 20284416, 'steps': 39617, 'batch_loss/train': 0.6716732583008707} +12/28/2021 04:20:24 - INFO - codeparrot_training - Step 39618: {'lr': 5.1678859004809926e-05, 'samples': 20284928, 'steps': 39618, 'batch_loss/train': 0.6830583205446601} +12/28/2021 04:20:35 - INFO - codeparrot_training - Step 39619: {'lr': 5.166926199821623e-05, 'samples': 20285440, 'steps': 39619, 'batch_loss/train': 0.6768754990771413} +12/28/2021 04:20:45 - INFO - codeparrot_training - Step 39620: {'lr': 5.165966578011069e-05, 'samples': 20285952, 'steps': 39620, 'batch_loss/train': 0.8053790563717484} +12/28/2021 04:20:56 - INFO - codeparrot_training - Step 39621: {'lr': 5.1650070350531405e-05, 'samples': 20286464, 'steps': 39621, 'batch_loss/train': 1.5687153005274013} +12/28/2021 04:21:08 - INFO - codeparrot_training - Step 39622: {'lr': 5.164047570951655e-05, 'samples': 20286976, 'steps': 39622, 'batch_loss/train': 0.7284638904966414} +12/28/2021 04:21:19 - INFO - codeparrot_training - Step 39623: {'lr': 5.1630881857104266e-05, 'samples': 20287488, 'steps': 39623, 'batch_loss/train': 0.7600716883316636} +12/28/2021 04:21:29 - INFO - codeparrot_training - Step 39624: {'lr': 5.162128879333272e-05, 'samples': 20288000, 'steps': 39624, 'batch_loss/train': 0.781382367014885} +12/28/2021 04:21:43 - INFO - codeparrot_training - Step 39625: {'lr': 5.1611696518239994e-05, 'samples': 20288512, 'steps': 39625, 'batch_loss/train': 0.8082688609138131} +12/28/2021 04:21:54 - INFO - codeparrot_training - Step 39626: {'lr': 5.1602105031864274e-05, 'samples': 20289024, 'steps': 39626, 'batch_loss/train': 0.8433557678945363} +12/28/2021 04:22:04 - INFO - codeparrot_training - Step 39627: {'lr': 5.1592514334243724e-05, 'samples': 20289536, 'steps': 39627, 'batch_loss/train': 0.7461097706109285} +12/28/2021 04:22:16 - INFO - codeparrot_training - Step 39628: {'lr': 5.158292442541629e-05, 'samples': 20290048, 'steps': 39628, 'batch_loss/train': 0.6623764494433999} +12/28/2021 04:22:27 - INFO - codeparrot_training - Step 39629: {'lr': 5.15733353054203e-05, 'samples': 20290560, 'steps': 39629, 'batch_loss/train': 0.8051587454974651} +12/28/2021 04:22:38 - INFO - codeparrot_training - Step 39630: {'lr': 5.156374697429389e-05, 'samples': 20291072, 'steps': 39630, 'batch_loss/train': 0.7394818935426883} +12/28/2021 04:22:48 - INFO - codeparrot_training - Step 39631: {'lr': 5.1554159432075006e-05, 'samples': 20291584, 'steps': 39631, 'batch_loss/train': 0.6912846127524972} +12/28/2021 04:23:00 - INFO - codeparrot_training - Step 39632: {'lr': 5.154457267880186e-05, 'samples': 20292096, 'steps': 39632, 'batch_loss/train': 0.747750504873693} +12/28/2021 04:23:11 - INFO - codeparrot_training - Step 39633: {'lr': 5.153498671451254e-05, 'samples': 20292608, 'steps': 39633, 'batch_loss/train': 0.7268418396124616} +12/28/2021 04:23:22 - INFO - codeparrot_training - Step 39634: {'lr': 5.1525401539245205e-05, 'samples': 20293120, 'steps': 39634, 'batch_loss/train': 0.6669947896152735} +12/28/2021 04:23:35 - INFO - codeparrot_training - Step 39635: {'lr': 5.151581715303791e-05, 'samples': 20293632, 'steps': 39635, 'batch_loss/train': 0.7268478516489267} +12/28/2021 04:23:46 - INFO - codeparrot_training - Step 39636: {'lr': 5.1506233555928786e-05, 'samples': 20294144, 'steps': 39636, 'batch_loss/train': 0.7868985859677196} +12/28/2021 04:23:57 - INFO - codeparrot_training - Step 39637: {'lr': 5.149665074795595e-05, 'samples': 20294656, 'steps': 39637, 'batch_loss/train': 0.7289516888558865} +12/28/2021 04:24:09 - INFO - codeparrot_training - Step 39638: {'lr': 5.148706872915745e-05, 'samples': 20295168, 'steps': 39638, 'batch_loss/train': 0.7530101051088423} +12/28/2021 04:24:19 - INFO - codeparrot_training - Step 39639: {'lr': 5.147748749957143e-05, 'samples': 20295680, 'steps': 39639, 'batch_loss/train': 0.7121061543002725} +12/28/2021 04:24:30 - INFO - codeparrot_training - Step 39640: {'lr': 5.1467907059235956e-05, 'samples': 20296192, 'steps': 39640, 'batch_loss/train': 0.7346760043874383} +12/28/2021 04:24:41 - INFO - codeparrot_training - Step 39641: {'lr': 5.1458327408189205e-05, 'samples': 20296704, 'steps': 39641, 'batch_loss/train': 0.8713387306779623} +12/28/2021 04:24:53 - INFO - codeparrot_training - Step 39642: {'lr': 5.1448748546469085e-05, 'samples': 20297216, 'steps': 39642, 'batch_loss/train': 0.6795435920357704} +12/28/2021 04:25:04 - INFO - codeparrot_training - Step 39643: {'lr': 5.14391704741137e-05, 'samples': 20297728, 'steps': 39643, 'batch_loss/train': 0.7367433030158281} +12/28/2021 04:25:14 - INFO - codeparrot_training - Step 39644: {'lr': 5.142959319116133e-05, 'samples': 20298240, 'steps': 39644, 'batch_loss/train': 0.7975420886650681} +12/28/2021 04:25:27 - INFO - codeparrot_training - Step 39645: {'lr': 5.142001669764987e-05, 'samples': 20298752, 'steps': 39645, 'batch_loss/train': 0.6187227219343185} +12/28/2021 04:25:37 - INFO - codeparrot_training - Step 39646: {'lr': 5.141044099361744e-05, 'samples': 20299264, 'steps': 39646, 'batch_loss/train': 0.82148152962327} +12/28/2021 04:25:48 - INFO - codeparrot_training - Step 39647: {'lr': 5.1400866079102135e-05, 'samples': 20299776, 'steps': 39647, 'batch_loss/train': 0.6870973228942603} +12/28/2021 04:26:02 - INFO - codeparrot_training - Step 39648: {'lr': 5.139129195414196e-05, 'samples': 20300288, 'steps': 39648, 'batch_loss/train': 0.6994103563483804} +12/28/2021 04:26:13 - INFO - codeparrot_training - Step 39649: {'lr': 5.138171861877502e-05, 'samples': 20300800, 'steps': 39649, 'batch_loss/train': 0.8051293268799782} +12/28/2021 04:26:23 - INFO - codeparrot_training - Step 39650: {'lr': 5.137214607303939e-05, 'samples': 20301312, 'steps': 39650, 'batch_loss/train': 0.7993084769695997} +12/28/2021 04:26:34 - INFO - codeparrot_training - Step 39651: {'lr': 5.136257431697311e-05, 'samples': 20301824, 'steps': 39651, 'batch_loss/train': 0.5789559172117151} +12/28/2021 04:26:46 - INFO - codeparrot_training - Step 39652: {'lr': 5.1353003350614214e-05, 'samples': 20302336, 'steps': 39652, 'batch_loss/train': 0.5658015960943885} +12/28/2021 04:26:57 - INFO - codeparrot_training - Step 39653: {'lr': 5.1343433174000866e-05, 'samples': 20302848, 'steps': 39653, 'batch_loss/train': 0.7830357691273093} +12/28/2021 04:27:07 - INFO - codeparrot_training - Step 39654: {'lr': 5.133386378717084e-05, 'samples': 20303360, 'steps': 39654, 'batch_loss/train': 0.8100753370672464} +12/28/2021 04:27:20 - INFO - codeparrot_training - Step 39655: {'lr': 5.132429519016246e-05, 'samples': 20303872, 'steps': 39655, 'batch_loss/train': 0.7296365089714527} +12/28/2021 04:27:31 - INFO - codeparrot_training - Step 39656: {'lr': 5.131472738301371e-05, 'samples': 20304384, 'steps': 39656, 'batch_loss/train': 0.8258457621559501} +12/28/2021 04:27:41 - INFO - codeparrot_training - Step 39657: {'lr': 5.130516036576255e-05, 'samples': 20304896, 'steps': 39657, 'batch_loss/train': 0.8143079364672303} +12/28/2021 04:27:53 - INFO - codeparrot_training - Step 39658: {'lr': 5.129559413844701e-05, 'samples': 20305408, 'steps': 39658, 'batch_loss/train': 0.6054388731718063} +12/28/2021 04:28:04 - INFO - codeparrot_training - Step 39659: {'lr': 5.128602870110519e-05, 'samples': 20305920, 'steps': 39659, 'batch_loss/train': 0.6691978343296796} +12/28/2021 04:28:15 - INFO - codeparrot_training - Step 39660: {'lr': 5.127646405377506e-05, 'samples': 20306432, 'steps': 39660, 'batch_loss/train': 0.6746454895474017} +12/28/2021 04:28:25 - INFO - codeparrot_training - Step 39661: {'lr': 5.1266900196494685e-05, 'samples': 20306944, 'steps': 39661, 'batch_loss/train': 0.7539047719910741} +12/28/2021 04:28:38 - INFO - codeparrot_training - Step 39662: {'lr': 5.125733712930208e-05, 'samples': 20307456, 'steps': 39662, 'batch_loss/train': 0.7677418082021177} +12/28/2021 04:28:49 - INFO - codeparrot_training - Step 39663: {'lr': 5.124777485223525e-05, 'samples': 20307968, 'steps': 39663, 'batch_loss/train': 1.6005069811362773} +12/28/2021 04:28:59 - INFO - codeparrot_training - Step 39664: {'lr': 5.1238213365332245e-05, 'samples': 20308480, 'steps': 39664, 'batch_loss/train': 0.7367983208969235} +12/28/2021 04:29:13 - INFO - codeparrot_training - Step 39665: {'lr': 5.122865266863105e-05, 'samples': 20308992, 'steps': 39665, 'batch_loss/train': 0.6889164033345878} +12/28/2021 04:29:23 - INFO - codeparrot_training - Step 39666: {'lr': 5.1219092762169655e-05, 'samples': 20309504, 'steps': 39666, 'batch_loss/train': 0.7066614113282412} +12/28/2021 04:29:34 - INFO - codeparrot_training - Step 39667: {'lr': 5.120953364598618e-05, 'samples': 20310016, 'steps': 39667, 'batch_loss/train': 0.7094760332256556} +12/28/2021 04:29:46 - INFO - codeparrot_training - Step 39668: {'lr': 5.1199975320118467e-05, 'samples': 20310528, 'steps': 39668, 'batch_loss/train': 0.5545350525062531} +12/28/2021 04:29:57 - INFO - codeparrot_training - Step 39669: {'lr': 5.119041778460451e-05, 'samples': 20311040, 'steps': 39669, 'batch_loss/train': 0.6602079742588103} +12/28/2021 04:30:07 - INFO - codeparrot_training - Step 39670: {'lr': 5.1180861039482526e-05, 'samples': 20311552, 'steps': 39670, 'batch_loss/train': 0.765222018584609} +12/28/2021 04:30:19 - INFO - codeparrot_training - Step 39671: {'lr': 5.1171305084790284e-05, 'samples': 20312064, 'steps': 39671, 'batch_loss/train': 0.6978749255649745} +12/28/2021 04:30:30 - INFO - codeparrot_training - Step 39672: {'lr': 5.116174992056588e-05, 'samples': 20312576, 'steps': 39672, 'batch_loss/train': 0.72997931484133} +12/28/2021 04:30:40 - INFO - codeparrot_training - Step 39673: {'lr': 5.115219554684728e-05, 'samples': 20313088, 'steps': 39673, 'batch_loss/train': 0.7458478999324143} +12/28/2021 04:30:51 - INFO - codeparrot_training - Step 39674: {'lr': 5.114264196367247e-05, 'samples': 20313600, 'steps': 39674, 'batch_loss/train': 0.7742873746901751} +12/28/2021 04:31:04 - INFO - codeparrot_training - Step 39675: {'lr': 5.113308917107942e-05, 'samples': 20314112, 'steps': 39675, 'batch_loss/train': 0.675979760941118} +12/28/2021 04:31:15 - INFO - codeparrot_training - Step 39676: {'lr': 5.112353716910612e-05, 'samples': 20314624, 'steps': 39676, 'batch_loss/train': 0.6857357671833597} +12/28/2021 04:31:25 - INFO - codeparrot_training - Step 39677: {'lr': 5.1113985957790546e-05, 'samples': 20315136, 'steps': 39677, 'batch_loss/train': 0.7814164651790634} +12/28/2021 04:31:38 - INFO - codeparrot_training - Step 39678: {'lr': 5.1104435537170644e-05, 'samples': 20315648, 'steps': 39678, 'batch_loss/train': 0.6770315776811913} +12/28/2021 04:31:49 - INFO - codeparrot_training - Step 39679: {'lr': 5.1094885907284505e-05, 'samples': 20316160, 'steps': 39679, 'batch_loss/train': 0.7506450917571783} +12/28/2021 04:31:59 - INFO - codeparrot_training - Step 39680: {'lr': 5.108533706816987e-05, 'samples': 20316672, 'steps': 39680, 'batch_loss/train': 0.7587979503441602} +12/28/2021 04:32:11 - INFO - codeparrot_training - Step 39681: {'lr': 5.1075789019864874e-05, 'samples': 20317184, 'steps': 39681, 'batch_loss/train': 0.6533469976857305} +12/28/2021 04:32:22 - INFO - codeparrot_training - Step 39682: {'lr': 5.1066241762407486e-05, 'samples': 20317696, 'steps': 39682, 'batch_loss/train': 0.6978532541543245} +12/28/2021 04:32:33 - INFO - codeparrot_training - Step 39683: {'lr': 5.105669529583556e-05, 'samples': 20318208, 'steps': 39683, 'batch_loss/train': 0.7809240389615297} +12/28/2021 04:32:43 - INFO - codeparrot_training - Step 39684: {'lr': 5.1047149620187e-05, 'samples': 20318720, 'steps': 39684, 'batch_loss/train': 0.745190987829119} +12/28/2021 04:32:56 - INFO - codeparrot_training - Step 39685: {'lr': 5.103760473550001e-05, 'samples': 20319232, 'steps': 39685, 'batch_loss/train': 0.7341688266023993} +12/28/2021 04:33:07 - INFO - codeparrot_training - Step 39686: {'lr': 5.102806064181231e-05, 'samples': 20319744, 'steps': 39686, 'batch_loss/train': 0.7314262269064784} +12/28/2021 04:33:18 - INFO - codeparrot_training - Step 39687: {'lr': 5.1018517339161905e-05, 'samples': 20320256, 'steps': 39687, 'batch_loss/train': 0.8017358900979161} +12/28/2021 04:33:30 - INFO - codeparrot_training - Step 39688: {'lr': 5.100897482758676e-05, 'samples': 20320768, 'steps': 39688, 'batch_loss/train': 0.6232454550918192} +12/28/2021 04:33:40 - INFO - codeparrot_training - Step 39689: {'lr': 5.0999433107124794e-05, 'samples': 20321280, 'steps': 39689, 'batch_loss/train': 0.7667644200846553} +12/28/2021 04:33:51 - INFO - codeparrot_training - Step 39690: {'lr': 5.098989217781391e-05, 'samples': 20321792, 'steps': 39690, 'batch_loss/train': 0.8533526044338942} +12/28/2021 04:34:03 - INFO - codeparrot_training - Step 39691: {'lr': 5.098035203969209e-05, 'samples': 20322304, 'steps': 39691, 'batch_loss/train': 0.774764153175056} +12/28/2021 04:34:14 - INFO - codeparrot_training - Step 39692: {'lr': 5.097081269279724e-05, 'samples': 20322816, 'steps': 39692, 'batch_loss/train': 0.733292636461556} +12/28/2021 04:34:24 - INFO - codeparrot_training - Step 39693: {'lr': 5.096127413716731e-05, 'samples': 20323328, 'steps': 39693, 'batch_loss/train': 0.6494915883522481} +12/28/2021 04:34:35 - INFO - codeparrot_training - Step 39694: {'lr': 5.0951736372840255e-05, 'samples': 20323840, 'steps': 39694, 'batch_loss/train': 0.7339739948511124} +12/28/2021 04:34:49 - INFO - codeparrot_training - Step 39695: {'lr': 5.094219939985378e-05, 'samples': 20324352, 'steps': 39695, 'batch_loss/train': 0.7131502823904157} +12/28/2021 04:34:59 - INFO - codeparrot_training - Step 39696: {'lr': 5.0932663218246136e-05, 'samples': 20324864, 'steps': 39696, 'batch_loss/train': 0.7928963676095009} +12/28/2021 04:35:10 - INFO - codeparrot_training - Step 39697: {'lr': 5.0923127828054945e-05, 'samples': 20325376, 'steps': 39697, 'batch_loss/train': 0.9259353033266962} +12/28/2021 04:35:22 - INFO - codeparrot_training - Step 39698: {'lr': 5.0913593229318186e-05, 'samples': 20325888, 'steps': 39698, 'batch_loss/train': 0.6910555954091251} +12/28/2021 04:35:33 - INFO - codeparrot_training - Step 39699: {'lr': 5.0904059422073964e-05, 'samples': 20326400, 'steps': 39699, 'batch_loss/train': 0.7807842651382089} +12/28/2021 04:35:43 - INFO - codeparrot_training - Step 39700: {'lr': 5.089452640635991e-05, 'samples': 20326912, 'steps': 39700, 'batch_loss/train': 0.7532022586092353} +12/28/2021 04:35:54 - INFO - codeparrot_training - Step 39701: {'lr': 5.088499418221407e-05, 'samples': 20327424, 'steps': 39701, 'batch_loss/train': 0.7520043560070917} +12/28/2021 04:36:06 - INFO - codeparrot_training - Step 39702: {'lr': 5.08754627496743e-05, 'samples': 20327936, 'steps': 39702, 'batch_loss/train': 0.7325256809126586} +12/28/2021 04:36:16 - INFO - codeparrot_training - Step 39703: {'lr': 5.086593210877852e-05, 'samples': 20328448, 'steps': 39703, 'batch_loss/train': 0.8377211084589362} +12/28/2021 04:36:27 - INFO - codeparrot_training - Step 39704: {'lr': 5.085640225956459e-05, 'samples': 20328960, 'steps': 39704, 'batch_loss/train': 0.7649906938895583} +12/28/2021 04:36:40 - INFO - codeparrot_training - Step 39705: {'lr': 5.0846873202070486e-05, 'samples': 20329472, 'steps': 39705, 'batch_loss/train': 0.7677501016296446} +12/28/2021 04:36:50 - INFO - codeparrot_training - Step 39706: {'lr': 5.083734493633388e-05, 'samples': 20329984, 'steps': 39706, 'batch_loss/train': 0.8074010710697621} +12/28/2021 04:37:01 - INFO - codeparrot_training - Step 39707: {'lr': 5.082781746239285e-05, 'samples': 20330496, 'steps': 39707, 'batch_loss/train': 0.6933404211886227} +12/28/2021 04:37:13 - INFO - codeparrot_training - Step 39708: {'lr': 5.08182907802853e-05, 'samples': 20331008, 'steps': 39708, 'batch_loss/train': 0.653140138136223} +12/28/2021 04:37:24 - INFO - codeparrot_training - Step 39709: {'lr': 5.080876489004893e-05, 'samples': 20331520, 'steps': 39709, 'batch_loss/train': 0.7836046000011265} +12/28/2021 04:37:34 - INFO - codeparrot_training - Step 39710: {'lr': 5.079923979172163e-05, 'samples': 20332032, 'steps': 39710, 'batch_loss/train': 0.7895966839860193} +12/28/2021 04:37:47 - INFO - codeparrot_training - Step 39711: {'lr': 5.0789715485341477e-05, 'samples': 20332544, 'steps': 39711, 'batch_loss/train': 0.7994383564218879} +12/28/2021 04:37:58 - INFO - codeparrot_training - Step 39712: {'lr': 5.078019197094613e-05, 'samples': 20333056, 'steps': 39712, 'batch_loss/train': 0.7797977887094021} +12/28/2021 04:38:08 - INFO - codeparrot_training - Step 39713: {'lr': 5.0770669248573514e-05, 'samples': 20333568, 'steps': 39713, 'batch_loss/train': 0.7854027841240168} +12/28/2021 04:38:19 - INFO - codeparrot_training - Step 39714: {'lr': 5.076114731826151e-05, 'samples': 20334080, 'steps': 39714, 'batch_loss/train': 0.6859970227815211} +12/28/2021 04:38:31 - INFO - codeparrot_training - Step 39715: {'lr': 5.075162618004795e-05, 'samples': 20334592, 'steps': 39715, 'batch_loss/train': 0.7183624561876059} +12/28/2021 04:38:42 - INFO - codeparrot_training - Step 39716: {'lr': 5.074210583397068e-05, 'samples': 20335104, 'steps': 39716, 'batch_loss/train': 0.6956932513276115} +12/28/2021 04:38:53 - INFO - codeparrot_training - Step 39717: {'lr': 5.073258628006755e-05, 'samples': 20335616, 'steps': 39717, 'batch_loss/train': 0.7742658290080726} +12/28/2021 04:39:05 - INFO - codeparrot_training - Step 39718: {'lr': 5.072306751837644e-05, 'samples': 20336128, 'steps': 39718, 'batch_loss/train': 0.7182571869343519} +12/28/2021 04:39:16 - INFO - codeparrot_training - Step 39719: {'lr': 5.071354954893515e-05, 'samples': 20336640, 'steps': 39719, 'batch_loss/train': 0.7647685795091093} +12/28/2021 04:39:26 - INFO - codeparrot_training - Step 39720: {'lr': 5.0704032371781614e-05, 'samples': 20337152, 'steps': 39720, 'batch_loss/train': 0.8209935538470745} +12/28/2021 04:39:38 - INFO - codeparrot_training - Step 39721: {'lr': 5.069451598695346e-05, 'samples': 20337664, 'steps': 39721, 'batch_loss/train': 0.7504037320613861} +12/28/2021 04:39:49 - INFO - codeparrot_training - Step 39722: {'lr': 5.068500039448878e-05, 'samples': 20338176, 'steps': 39722, 'batch_loss/train': 0.7910297320922837} +12/28/2021 04:40:00 - INFO - codeparrot_training - Step 39723: {'lr': 5.0675485594425224e-05, 'samples': 20338688, 'steps': 39723, 'batch_loss/train': 0.8079738868400455} +12/28/2021 04:40:10 - INFO - codeparrot_training - Step 39724: {'lr': 5.0665971586800614e-05, 'samples': 20339200, 'steps': 39724, 'batch_loss/train': 0.6965051302686334} +12/28/2021 04:40:23 - INFO - codeparrot_training - Step 39725: {'lr': 5.0656458371652975e-05, 'samples': 20339712, 'steps': 39725, 'batch_loss/train': 0.7764860829338431} +12/28/2021 04:40:34 - INFO - codeparrot_training - Step 39726: {'lr': 5.064694594901989e-05, 'samples': 20340224, 'steps': 39726, 'batch_loss/train': 0.7272054459899664} +12/28/2021 04:40:44 - INFO - codeparrot_training - Step 39727: {'lr': 5.06374343189393e-05, 'samples': 20340736, 'steps': 39727, 'batch_loss/train': 0.6896307705901563} +12/28/2021 04:40:57 - INFO - codeparrot_training - Step 39728: {'lr': 5.062792348144899e-05, 'samples': 20341248, 'steps': 39728, 'batch_loss/train': 0.6899226435925812} +12/28/2021 04:41:07 - INFO - codeparrot_training - Step 39729: {'lr': 5.0618413436586767e-05, 'samples': 20341760, 'steps': 39729, 'batch_loss/train': 0.8545089745894074} +12/28/2021 04:41:18 - INFO - codeparrot_training - Step 39730: {'lr': 5.060890418439046e-05, 'samples': 20342272, 'steps': 39730, 'batch_loss/train': 0.7304475172422826} +12/28/2021 04:41:30 - INFO - codeparrot_training - Step 39731: {'lr': 5.059939572489794e-05, 'samples': 20342784, 'steps': 39731, 'batch_loss/train': 0.7346817152574658} +12/28/2021 04:41:41 - INFO - codeparrot_training - Step 39732: {'lr': 5.058988805814677e-05, 'samples': 20343296, 'steps': 39732, 'batch_loss/train': 0.631346293259412} +12/28/2021 04:41:51 - INFO - codeparrot_training - Step 39733: {'lr': 5.058038118417499e-05, 'samples': 20343808, 'steps': 39733, 'batch_loss/train': 0.7176552200689912} +12/28/2021 04:42:04 - INFO - codeparrot_training - Step 39734: {'lr': 5.057087510302039e-05, 'samples': 20344320, 'steps': 39734, 'batch_loss/train': 0.7818622312042862} +12/28/2021 04:42:15 - INFO - codeparrot_training - Step 39735: {'lr': 5.056136981472057e-05, 'samples': 20344832, 'steps': 39735, 'batch_loss/train': 0.7108720196411014} +12/28/2021 04:42:25 - INFO - codeparrot_training - Step 39736: {'lr': 5.055186531931347e-05, 'samples': 20345344, 'steps': 39736, 'batch_loss/train': 0.9735381733626127} +12/28/2021 04:42:36 - INFO - codeparrot_training - Step 39737: {'lr': 5.054236161683692e-05, 'samples': 20345856, 'steps': 39737, 'batch_loss/train': 0.82792554423213} +12/28/2021 04:42:48 - INFO - codeparrot_training - Step 39738: {'lr': 5.0532858707328575e-05, 'samples': 20346368, 'steps': 39738, 'batch_loss/train': 0.8212988143786788} +12/28/2021 04:42:59 - INFO - codeparrot_training - Step 39739: {'lr': 5.052335659082619e-05, 'samples': 20346880, 'steps': 39739, 'batch_loss/train': 0.7255764249712229} +12/28/2021 04:43:10 - INFO - codeparrot_training - Step 39740: {'lr': 5.051385526736777e-05, 'samples': 20347392, 'steps': 39740, 'batch_loss/train': 0.7419421775266528} +12/28/2021 04:43:22 - INFO - codeparrot_training - Step 39741: {'lr': 5.0504354736990856e-05, 'samples': 20347904, 'steps': 39741, 'batch_loss/train': 1.1137117072939873} +12/28/2021 04:43:33 - INFO - codeparrot_training - Step 39742: {'lr': 5.04948549997333e-05, 'samples': 20348416, 'steps': 39742, 'batch_loss/train': 0.6608170173713006} +12/28/2021 04:43:44 - INFO - codeparrot_training - Step 39743: {'lr': 5.048535605563287e-05, 'samples': 20348928, 'steps': 39743, 'batch_loss/train': 0.6216358547098935} +12/28/2021 04:43:56 - INFO - codeparrot_training - Step 39744: {'lr': 5.0475857904727334e-05, 'samples': 20349440, 'steps': 39744, 'batch_loss/train': 0.746854430064559} +12/28/2021 04:44:06 - INFO - codeparrot_training - Step 39745: {'lr': 5.046636054705447e-05, 'samples': 20349952, 'steps': 39745, 'batch_loss/train': 0.7288057929836214} +12/28/2021 04:44:17 - INFO - codeparrot_training - Step 39746: {'lr': 5.0456863982652063e-05, 'samples': 20350464, 'steps': 39746, 'batch_loss/train': 0.7604599501937628} +12/28/2021 04:44:27 - INFO - codeparrot_training - Step 39747: {'lr': 5.044736821155768e-05, 'samples': 20350976, 'steps': 39747, 'batch_loss/train': 0.6768269825261086} +12/28/2021 04:44:40 - INFO - codeparrot_training - Step 39748: {'lr': 5.043787323380936e-05, 'samples': 20351488, 'steps': 39748, 'batch_loss/train': 0.9991639945656061} +12/28/2021 04:44:50 - INFO - codeparrot_training - Step 39749: {'lr': 5.042837904944461e-05, 'samples': 20352000, 'steps': 39749, 'batch_loss/train': 0.6718710745917633} +12/28/2021 04:45:01 - INFO - codeparrot_training - Step 39750: {'lr': 5.0418885658501226e-05, 'samples': 20352512, 'steps': 39750, 'batch_loss/train': 0.6270379375200719} +12/28/2021 04:45:14 - INFO - codeparrot_training - Step 39751: {'lr': 5.0409393061017136e-05, 'samples': 20353024, 'steps': 39751, 'batch_loss/train': 0.7381706379819661} +12/28/2021 04:45:24 - INFO - codeparrot_training - Step 39752: {'lr': 5.0399901257029865e-05, 'samples': 20353536, 'steps': 39752, 'batch_loss/train': 0.7562128135468811} +12/28/2021 04:45:35 - INFO - codeparrot_training - Step 39753: {'lr': 5.03904102465772e-05, 'samples': 20354048, 'steps': 39753, 'batch_loss/train': 0.759062435477972} +12/28/2021 04:45:47 - INFO - codeparrot_training - Step 39754: {'lr': 5.0380920029696906e-05, 'samples': 20354560, 'steps': 39754, 'batch_loss/train': 0.6560955638997257} +12/28/2021 04:45:58 - INFO - codeparrot_training - Step 39755: {'lr': 5.037143060642671e-05, 'samples': 20355072, 'steps': 39755, 'batch_loss/train': 0.9190838998183608} +12/28/2021 04:46:09 - INFO - codeparrot_training - Step 39756: {'lr': 5.0361941976804326e-05, 'samples': 20355584, 'steps': 39756, 'batch_loss/train': 0.6910492079332471} +12/28/2021 04:46:19 - INFO - codeparrot_training - Step 39757: {'lr': 5.035245414086753e-05, 'samples': 20356096, 'steps': 39757, 'batch_loss/train': 0.7313598245382309} +12/28/2021 04:46:32 - INFO - codeparrot_training - Step 39758: {'lr': 5.034296709865388e-05, 'samples': 20356608, 'steps': 39758, 'batch_loss/train': 0.6483521088957787} +12/28/2021 04:46:42 - INFO - codeparrot_training - Step 39759: {'lr': 5.0333480850201285e-05, 'samples': 20357120, 'steps': 39759, 'batch_loss/train': 0.6653044098056853} +12/28/2021 04:46:53 - INFO - codeparrot_training - Step 39760: {'lr': 5.032399539554741e-05, 'samples': 20357632, 'steps': 39760, 'batch_loss/train': 0.8537539206445217} +12/28/2021 04:47:05 - INFO - codeparrot_training - Step 39761: {'lr': 5.031451073472981e-05, 'samples': 20358144, 'steps': 39761, 'batch_loss/train': 0.7028817769605666} +12/28/2021 04:47:15 - INFO - codeparrot_training - Step 39762: {'lr': 5.0305026867786405e-05, 'samples': 20358656, 'steps': 39762, 'batch_loss/train': 0.6612722160643898} +12/28/2021 04:47:26 - INFO - codeparrot_training - Step 39763: {'lr': 5.0295543794754844e-05, 'samples': 20359168, 'steps': 39763, 'batch_loss/train': 0.7122567687183619} +12/28/2021 04:47:39 - INFO - codeparrot_training - Step 39764: {'lr': 5.028606151567275e-05, 'samples': 20359680, 'steps': 39764, 'batch_loss/train': 0.7123064855113626} +12/28/2021 04:47:50 - INFO - codeparrot_training - Step 39765: {'lr': 5.0276580030577806e-05, 'samples': 20360192, 'steps': 39765, 'batch_loss/train': 0.6432053820462897} +12/28/2021 04:48:00 - INFO - codeparrot_training - Step 39766: {'lr': 5.0267099339507904e-05, 'samples': 20360704, 'steps': 39766, 'batch_loss/train': 0.7383444863371551} +12/28/2021 04:48:11 - INFO - codeparrot_training - Step 39767: {'lr': 5.025761944250051e-05, 'samples': 20361216, 'steps': 39767, 'batch_loss/train': 0.5691069369204342} +12/28/2021 04:48:23 - INFO - codeparrot_training - Step 39768: {'lr': 5.0248140339593405e-05, 'samples': 20361728, 'steps': 39768, 'batch_loss/train': 0.6893642018549144} +12/28/2021 04:48:34 - INFO - codeparrot_training - Step 39769: {'lr': 5.023866203082428e-05, 'samples': 20362240, 'steps': 39769, 'batch_loss/train': 0.8177347593009472} +12/28/2021 04:48:44 - INFO - codeparrot_training - Step 39770: {'lr': 5.0229184516230816e-05, 'samples': 20362752, 'steps': 39770, 'batch_loss/train': 0.6641706889495254} +12/28/2021 04:48:57 - INFO - codeparrot_training - Step 39771: {'lr': 5.0219707795850665e-05, 'samples': 20363264, 'steps': 39771, 'batch_loss/train': 0.7218467378988862} +12/28/2021 04:49:08 - INFO - codeparrot_training - Step 39772: {'lr': 5.021023186972154e-05, 'samples': 20363776, 'steps': 39772, 'batch_loss/train': 0.7177609419450164} +12/28/2021 04:49:18 - INFO - codeparrot_training - Step 39773: {'lr': 5.020075673788108e-05, 'samples': 20364288, 'steps': 39773, 'batch_loss/train': 0.688247271347791} +12/28/2021 04:49:30 - INFO - codeparrot_training - Step 39774: {'lr': 5.019128240036697e-05, 'samples': 20364800, 'steps': 39774, 'batch_loss/train': 0.7240539803169668} +12/28/2021 04:49:41 - INFO - codeparrot_training - Step 39775: {'lr': 5.0181808857216956e-05, 'samples': 20365312, 'steps': 39775, 'batch_loss/train': 0.7940141563303769} +12/28/2021 04:49:52 - INFO - codeparrot_training - Step 39776: {'lr': 5.017233610846847e-05, 'samples': 20365824, 'steps': 39776, 'batch_loss/train': 0.711829187348485} +12/28/2021 04:50:02 - INFO - codeparrot_training - Step 39777: {'lr': 5.016286415415946e-05, 'samples': 20366336, 'steps': 39777, 'batch_loss/train': 0.6623277934268117} +12/28/2021 04:50:14 - INFO - codeparrot_training - Step 39778: {'lr': 5.015339299432739e-05, 'samples': 20366848, 'steps': 39778, 'batch_loss/train': 0.7839710447005928} +12/28/2021 04:50:25 - INFO - codeparrot_training - Step 39779: {'lr': 5.0143922629009966e-05, 'samples': 20367360, 'steps': 39779, 'batch_loss/train': 0.795794123550877} +12/28/2021 04:50:35 - INFO - codeparrot_training - Step 39780: {'lr': 5.013445305824485e-05, 'samples': 20367872, 'steps': 39780, 'batch_loss/train': 0.7642777375876904} +12/28/2021 04:50:48 - INFO - codeparrot_training - Step 39781: {'lr': 5.0124984282069684e-05, 'samples': 20368384, 'steps': 39781, 'batch_loss/train': 0.4431344246841036} +12/28/2021 04:50:59 - INFO - codeparrot_training - Step 39782: {'lr': 5.011551630052211e-05, 'samples': 20368896, 'steps': 39782, 'batch_loss/train': 0.7656749924644828} +12/28/2021 04:51:10 - INFO - codeparrot_training - Step 39783: {'lr': 5.010604911363975e-05, 'samples': 20369408, 'steps': 39783, 'batch_loss/train': 0.7348893592134118} +12/28/2021 04:51:22 - INFO - codeparrot_training - Step 39784: {'lr': 5.009658272146028e-05, 'samples': 20369920, 'steps': 39784, 'batch_loss/train': 0.6924156388267875} +12/28/2021 04:51:32 - INFO - codeparrot_training - Step 39785: {'lr': 5.008711712402131e-05, 'samples': 20370432, 'steps': 39785, 'batch_loss/train': 0.7947660475037992} +12/28/2021 04:51:43 - INFO - codeparrot_training - Step 39786: {'lr': 5.007765232136055e-05, 'samples': 20370944, 'steps': 39786, 'batch_loss/train': 0.7219466683454812} +12/28/2021 04:51:55 - INFO - codeparrot_training - Step 39787: {'lr': 5.00681883135154e-05, 'samples': 20371456, 'steps': 39787, 'batch_loss/train': 0.7146967765875161} +12/28/2021 04:52:06 - INFO - codeparrot_training - Step 39788: {'lr': 5.0058725100523726e-05, 'samples': 20371968, 'steps': 39788, 'batch_loss/train': 0.7513910047709942} +12/28/2021 04:52:16 - INFO - codeparrot_training - Step 39789: {'lr': 5.004926268242313e-05, 'samples': 20372480, 'steps': 39789, 'batch_loss/train': 0.8639144456246868} +12/28/2021 04:52:27 - INFO - codeparrot_training - Step 39790: {'lr': 5.0039801059251125e-05, 'samples': 20372992, 'steps': 39790, 'batch_loss/train': 0.6865161065943539} +12/28/2021 04:52:40 - INFO - codeparrot_training - Step 39791: {'lr': 5.0030340231045276e-05, 'samples': 20373504, 'steps': 39791, 'batch_loss/train': 0.7515451656654477} +12/28/2021 04:52:50 - INFO - codeparrot_training - Step 39792: {'lr': 5.002088019784343e-05, 'samples': 20374016, 'steps': 39792, 'batch_loss/train': 0.6583125074976124} +12/28/2021 04:53:01 - INFO - codeparrot_training - Step 39793: {'lr': 5.001142095968297e-05, 'samples': 20374528, 'steps': 39793, 'batch_loss/train': 0.715933442581445} +12/28/2021 04:53:13 - INFO - codeparrot_training - Step 39794: {'lr': 5.000196251660161e-05, 'samples': 20375040, 'steps': 39794, 'batch_loss/train': 0.9800722101936117} +12/28/2021 04:53:24 - INFO - codeparrot_training - Step 39795: {'lr': 4.999250486863693e-05, 'samples': 20375552, 'steps': 39795, 'batch_loss/train': 1.1365963323041797} +12/28/2021 04:53:35 - INFO - codeparrot_training - Step 39796: {'lr': 4.9983048015826537e-05, 'samples': 20376064, 'steps': 39796, 'batch_loss/train': 0.753644231474027} +12/28/2021 04:53:45 - INFO - codeparrot_training - Step 39797: {'lr': 4.9973591958208e-05, 'samples': 20376576, 'steps': 39797, 'batch_loss/train': 0.6403349335305393} +12/28/2021 04:53:57 - INFO - codeparrot_training - Step 39798: {'lr': 4.996413669581895e-05, 'samples': 20377088, 'steps': 39798, 'batch_loss/train': 0.6380948266014457} +12/28/2021 04:54:08 - INFO - codeparrot_training - Step 39799: {'lr': 4.995468222869695e-05, 'samples': 20377600, 'steps': 39799, 'batch_loss/train': 0.6719279219396412} +12/28/2021 04:54:19 - INFO - codeparrot_training - Step 39800: {'lr': 4.994522855687961e-05, 'samples': 20378112, 'steps': 39800, 'batch_loss/train': 0.8265840623062104} +12/28/2021 04:54:31 - INFO - codeparrot_training - Step 39801: {'lr': 4.9935775680404554e-05, 'samples': 20378624, 'steps': 39801, 'batch_loss/train': 0.728833005297929} +12/28/2021 04:54:42 - INFO - codeparrot_training - Step 39802: {'lr': 4.992632359930921e-05, 'samples': 20379136, 'steps': 39802, 'batch_loss/train': 0.7408273234032094} +12/28/2021 04:54:53 - INFO - codeparrot_training - Step 39803: {'lr': 4.991687231363137e-05, 'samples': 20379648, 'steps': 39803, 'batch_loss/train': 0.6991786288563162} +12/28/2021 04:55:05 - INFO - codeparrot_training - Step 39804: {'lr': 4.990742182340843e-05, 'samples': 20380160, 'steps': 39804, 'batch_loss/train': 0.8360333554446697} +12/28/2021 04:55:15 - INFO - codeparrot_training - Step 39805: {'lr': 4.9897972128678026e-05, 'samples': 20380672, 'steps': 39805, 'batch_loss/train': 0.7433055061846972} +12/28/2021 04:55:26 - INFO - codeparrot_training - Step 39806: {'lr': 4.988852322947773e-05, 'samples': 20381184, 'steps': 39806, 'batch_loss/train': 0.7587762866169214} +12/28/2021 04:55:38 - INFO - codeparrot_training - Step 39807: {'lr': 4.987907512584511e-05, 'samples': 20381696, 'steps': 39807, 'batch_loss/train': 0.7211737306788564} +12/28/2021 04:55:49 - INFO - codeparrot_training - Step 39808: {'lr': 4.986962781781773e-05, 'samples': 20382208, 'steps': 39808, 'batch_loss/train': 0.736599646625109} +12/28/2021 04:55:59 - INFO - codeparrot_training - Step 39809: {'lr': 4.986018130543316e-05, 'samples': 20382720, 'steps': 39809, 'batch_loss/train': 0.8070330210030079} +12/28/2021 04:56:10 - INFO - codeparrot_training - Step 39810: {'lr': 4.9850735588728904e-05, 'samples': 20383232, 'steps': 39810, 'batch_loss/train': 0.7934607099741697} +12/28/2021 04:56:23 - INFO - codeparrot_training - Step 39811: {'lr': 4.984129066774257e-05, 'samples': 20383744, 'steps': 39811, 'batch_loss/train': 0.8413771723862737} +12/28/2021 04:56:33 - INFO - codeparrot_training - Step 39812: {'lr': 4.983184654251175e-05, 'samples': 20384256, 'steps': 39812, 'batch_loss/train': 0.7958390507847071} +12/28/2021 04:56:44 - INFO - codeparrot_training - Step 39813: {'lr': 4.9822403213073795e-05, 'samples': 20384768, 'steps': 39813, 'batch_loss/train': 0.7250747121870518} +12/28/2021 04:56:56 - INFO - codeparrot_training - Step 39814: {'lr': 4.981296067946645e-05, 'samples': 20385280, 'steps': 39814, 'batch_loss/train': 0.7380621936172247} +12/28/2021 04:57:07 - INFO - codeparrot_training - Step 39815: {'lr': 4.980351894172724e-05, 'samples': 20385792, 'steps': 39815, 'batch_loss/train': 0.6155534646240994} +12/28/2021 04:57:18 - INFO - codeparrot_training - Step 39816: {'lr': 4.979407799989358e-05, 'samples': 20386304, 'steps': 39816, 'batch_loss/train': 0.8064520820043981} +12/28/2021 04:57:30 - INFO - codeparrot_training - Step 39817: {'lr': 4.978463785400303e-05, 'samples': 20386816, 'steps': 39817, 'batch_loss/train': 0.7061855751089752} +12/28/2021 04:57:40 - INFO - codeparrot_training - Step 39818: {'lr': 4.977519850409326e-05, 'samples': 20387328, 'steps': 39818, 'batch_loss/train': 0.7361379065550864} +12/28/2021 04:57:51 - INFO - codeparrot_training - Step 39819: {'lr': 4.976575995020166e-05, 'samples': 20387840, 'steps': 39819, 'batch_loss/train': 0.7112049299757928} +12/28/2021 04:58:01 - INFO - codeparrot_training - Step 39820: {'lr': 4.975632219236578e-05, 'samples': 20388352, 'steps': 39820, 'batch_loss/train': 0.6798792658373713} +12/28/2021 04:58:14 - INFO - codeparrot_training - Step 39821: {'lr': 4.974688523062315e-05, 'samples': 20388864, 'steps': 39821, 'batch_loss/train': 0.7578471945598722} +12/28/2021 04:58:25 - INFO - codeparrot_training - Step 39822: {'lr': 4.973744906501129e-05, 'samples': 20389376, 'steps': 39822, 'batch_loss/train': 0.7402013633400202} +12/28/2021 04:58:35 - INFO - codeparrot_training - Step 39823: {'lr': 4.972801369556773e-05, 'samples': 20389888, 'steps': 39823, 'batch_loss/train': 0.750357611104846} +12/28/2021 04:58:48 - INFO - codeparrot_training - Step 39824: {'lr': 4.9718579122329967e-05, 'samples': 20390400, 'steps': 39824, 'batch_loss/train': 0.5647259424440563} +12/28/2021 04:58:58 - INFO - codeparrot_training - Step 39825: {'lr': 4.970914534533552e-05, 'samples': 20390912, 'steps': 39825, 'batch_loss/train': 0.6370991412550211} +12/28/2021 04:59:09 - INFO - codeparrot_training - Step 39826: {'lr': 4.969971236462187e-05, 'samples': 20391424, 'steps': 39826, 'batch_loss/train': 0.7458902378566563} +12/28/2021 04:59:21 - INFO - codeparrot_training - Step 39827: {'lr': 4.969028018022659e-05, 'samples': 20391936, 'steps': 39827, 'batch_loss/train': 0.8265282995998859} +12/28/2021 04:59:32 - INFO - codeparrot_training - Step 39828: {'lr': 4.9680848792187013e-05, 'samples': 20392448, 'steps': 39828, 'batch_loss/train': 0.5846214359626174} +12/28/2021 04:59:43 - INFO - codeparrot_training - Step 39829: {'lr': 4.967141820054077e-05, 'samples': 20392960, 'steps': 39829, 'batch_loss/train': 0.7691222685389221} +12/28/2021 04:59:53 - INFO - codeparrot_training - Step 39830: {'lr': 4.966198840532543e-05, 'samples': 20393472, 'steps': 39830, 'batch_loss/train': 0.773607611656189} +12/28/2021 05:00:06 - INFO - codeparrot_training - Step 39831: {'lr': 4.9652559406578316e-05, 'samples': 20393984, 'steps': 39831, 'batch_loss/train': 0.6831269297399558} +12/28/2021 05:00:17 - INFO - codeparrot_training - Step 39832: {'lr': 4.9643131204336945e-05, 'samples': 20394496, 'steps': 39832, 'batch_loss/train': 0.48173614748520777} +12/28/2021 05:00:28 - INFO - codeparrot_training - Step 39833: {'lr': 4.963370379863885e-05, 'samples': 20395008, 'steps': 39833, 'batch_loss/train': 0.9394326624460518} +12/28/2021 05:00:40 - INFO - codeparrot_training - Step 39834: {'lr': 4.96242771895215e-05, 'samples': 20395520, 'steps': 39834, 'batch_loss/train': 0.660415331250988} +12/28/2021 05:00:50 - INFO - codeparrot_training - Step 39835: {'lr': 4.961485137702237e-05, 'samples': 20396032, 'steps': 39835, 'batch_loss/train': 0.8097616462036967} +12/28/2021 05:01:01 - INFO - codeparrot_training - Step 39836: {'lr': 4.96054263611789e-05, 'samples': 20396544, 'steps': 39836, 'batch_loss/train': 0.8355239061638713} +12/28/2021 05:01:13 - INFO - codeparrot_training - Step 39837: {'lr': 4.959600214202861e-05, 'samples': 20397056, 'steps': 39837, 'batch_loss/train': 0.7024563648737967} +12/28/2021 05:01:24 - INFO - codeparrot_training - Step 39838: {'lr': 4.958657871960901e-05, 'samples': 20397568, 'steps': 39838, 'batch_loss/train': 0.7373758647590876} +12/28/2021 05:01:34 - INFO - codeparrot_training - Step 39839: {'lr': 4.9577156093957384e-05, 'samples': 20398080, 'steps': 39839, 'batch_loss/train': 0.6431780674029142} +12/28/2021 05:01:45 - INFO - codeparrot_training - Step 39840: {'lr': 4.9567734265111354e-05, 'samples': 20398592, 'steps': 39840, 'batch_loss/train': 0.6619913543108851} +12/28/2021 05:01:58 - INFO - codeparrot_training - Step 39841: {'lr': 4.9558313233108385e-05, 'samples': 20399104, 'steps': 39841, 'batch_loss/train': 0.7218330255709589} +12/28/2021 05:02:08 - INFO - codeparrot_training - Step 39842: {'lr': 4.954889299798585e-05, 'samples': 20399616, 'steps': 39842, 'batch_loss/train': 0.8011735803447664} +12/28/2021 05:02:19 - INFO - codeparrot_training - Step 39843: {'lr': 4.953947355978114e-05, 'samples': 20400128, 'steps': 39843, 'batch_loss/train': 0.7518304942641407} +12/28/2021 05:02:31 - INFO - codeparrot_training - Step 39844: {'lr': 4.953005491853191e-05, 'samples': 20400640, 'steps': 39844, 'batch_loss/train': 0.771454282104969} +12/28/2021 05:02:42 - INFO - codeparrot_training - Step 39845: {'lr': 4.9520637074275455e-05, 'samples': 20401152, 'steps': 39845, 'batch_loss/train': 0.7881674477830529} +12/28/2021 05:02:52 - INFO - codeparrot_training - Step 39846: {'lr': 4.9511220027049166e-05, 'samples': 20401664, 'steps': 39846, 'batch_loss/train': 0.7169817630201578} +12/28/2021 05:03:03 - INFO - codeparrot_training - Step 39847: {'lr': 4.950180377689067e-05, 'samples': 20402176, 'steps': 39847, 'batch_loss/train': 0.8663263898342848} +12/28/2021 05:03:15 - INFO - codeparrot_training - Step 39848: {'lr': 4.9492388323837265e-05, 'samples': 20402688, 'steps': 39848, 'batch_loss/train': 0.690076083294116} +12/28/2021 05:03:26 - INFO - codeparrot_training - Step 39849: {'lr': 4.948297366792642e-05, 'samples': 20403200, 'steps': 39849, 'batch_loss/train': 0.730540378484875} +12/28/2021 05:03:36 - INFO - codeparrot_training - Step 39850: {'lr': 4.9473559809195546e-05, 'samples': 20403712, 'steps': 39850, 'batch_loss/train': 0.6886882432736456} +12/28/2021 05:03:49 - INFO - codeparrot_training - Step 39851: {'lr': 4.946414674768207e-05, 'samples': 20404224, 'steps': 39851, 'batch_loss/train': 0.6837566918693483} +12/28/2021 05:04:00 - INFO - codeparrot_training - Step 39852: {'lr': 4.945473448342344e-05, 'samples': 20404736, 'steps': 39852, 'batch_loss/train': 0.6485180892050266} +12/28/2021 05:04:11 - INFO - codeparrot_training - Step 39853: {'lr': 4.944532301645713e-05, 'samples': 20405248, 'steps': 39853, 'batch_loss/train': 0.6500386514235288} +12/28/2021 05:04:23 - INFO - codeparrot_training - Step 39854: {'lr': 4.943591234682035e-05, 'samples': 20405760, 'steps': 39854, 'batch_loss/train': 0.828183725476265} +12/28/2021 05:04:33 - INFO - codeparrot_training - Step 39855: {'lr': 4.942650247455074e-05, 'samples': 20406272, 'steps': 39855, 'batch_loss/train': 0.7558549987152219} +12/28/2021 05:04:44 - INFO - codeparrot_training - Step 39856: {'lr': 4.941709339968567e-05, 'samples': 20406784, 'steps': 39856, 'batch_loss/train': 0.7619363917037845} +12/28/2021 05:04:57 - INFO - codeparrot_training - Step 39857: {'lr': 4.940768512226243e-05, 'samples': 20407296, 'steps': 39857, 'batch_loss/train': 0.7657769601792097} +12/28/2021 05:05:08 - INFO - codeparrot_training - Step 39858: {'lr': 4.9398277642318496e-05, 'samples': 20407808, 'steps': 39858, 'batch_loss/train': 0.6369429999031126} +12/28/2021 05:05:18 - INFO - codeparrot_training - Step 39859: {'lr': 4.938887095989128e-05, 'samples': 20408320, 'steps': 39859, 'batch_loss/train': 0.746681297197938} +12/28/2021 05:05:29 - INFO - codeparrot_training - Step 39860: {'lr': 4.9379465075018145e-05, 'samples': 20408832, 'steps': 39860, 'batch_loss/train': 0.7824710458517075} +12/28/2021 05:05:41 - INFO - codeparrot_training - Step 39861: {'lr': 4.937005998773653e-05, 'samples': 20409344, 'steps': 39861, 'batch_loss/train': 0.5884156879037619} +12/28/2021 05:05:52 - INFO - codeparrot_training - Step 39862: {'lr': 4.936065569808379e-05, 'samples': 20409856, 'steps': 39862, 'batch_loss/train': 0.7188476109877229} +12/28/2021 05:06:02 - INFO - codeparrot_training - Step 39863: {'lr': 4.935125220609732e-05, 'samples': 20410368, 'steps': 39863, 'batch_loss/train': 0.7795562222599983} +12/28/2021 05:06:15 - INFO - codeparrot_training - Step 39864: {'lr': 4.9341849511814505e-05, 'samples': 20410880, 'steps': 39864, 'batch_loss/train': 0.8405674798414111} +12/28/2021 05:06:25 - INFO - codeparrot_training - Step 39865: {'lr': 4.933244761527272e-05, 'samples': 20411392, 'steps': 39865, 'batch_loss/train': 0.716137545183301} +12/28/2021 05:06:36 - INFO - codeparrot_training - Step 39866: {'lr': 4.9323046516509376e-05, 'samples': 20411904, 'steps': 39866, 'batch_loss/train': 0.7245471007190645} +12/28/2021 05:06:46 - INFO - codeparrot_training - Step 39867: {'lr': 4.931364621556189e-05, 'samples': 20412416, 'steps': 39867, 'batch_loss/train': 0.7262518396601081} +12/28/2021 05:07:00 - INFO - codeparrot_training - Step 39868: {'lr': 4.93042467124675e-05, 'samples': 20412928, 'steps': 39868, 'batch_loss/train': 0.4915123031241819} +12/28/2021 05:07:10 - INFO - codeparrot_training - Step 39869: {'lr': 4.9294848007263584e-05, 'samples': 20413440, 'steps': 39869, 'batch_loss/train': 0.8148463163524866} +12/28/2021 05:07:21 - INFO - codeparrot_training - Step 39870: {'lr': 4.92854500999877e-05, 'samples': 20413952, 'steps': 39870, 'batch_loss/train': 0.8619000297039747} +12/28/2021 05:07:33 - INFO - codeparrot_training - Step 39871: {'lr': 4.9276052990677e-05, 'samples': 20414464, 'steps': 39871, 'batch_loss/train': 0.7833787677809596} +12/28/2021 05:07:43 - INFO - codeparrot_training - Step 39872: {'lr': 4.926665667936889e-05, 'samples': 20414976, 'steps': 39872, 'batch_loss/train': 0.7840820904821157} +12/28/2021 05:07:54 - INFO - codeparrot_training - Step 39873: {'lr': 4.925726116610085e-05, 'samples': 20415488, 'steps': 39873, 'batch_loss/train': 0.833738328423351} +12/28/2021 05:08:06 - INFO - codeparrot_training - Step 39874: {'lr': 4.9247866450910116e-05, 'samples': 20416000, 'steps': 39874, 'batch_loss/train': 0.727863809093833} +12/28/2021 05:08:17 - INFO - codeparrot_training - Step 39875: {'lr': 4.923847253383404e-05, 'samples': 20416512, 'steps': 39875, 'batch_loss/train': 0.8411591777112335} +12/28/2021 05:08:28 - INFO - codeparrot_training - Step 39876: {'lr': 4.922907941490998e-05, 'samples': 20417024, 'steps': 39876, 'batch_loss/train': 0.77149255014956} +12/28/2021 05:08:38 - INFO - codeparrot_training - Step 39877: {'lr': 4.921968709417532e-05, 'samples': 20417536, 'steps': 39877, 'batch_loss/train': 0.7150974697433412} +12/28/2021 05:08:51 - INFO - codeparrot_training - Step 39878: {'lr': 4.9210295571667366e-05, 'samples': 20418048, 'steps': 39878, 'batch_loss/train': 0.746669284068048} +12/28/2021 05:09:02 - INFO - codeparrot_training - Step 39879: {'lr': 4.9200904847423514e-05, 'samples': 20418560, 'steps': 39879, 'batch_loss/train': 0.6559899556450546} +12/28/2021 05:09:12 - INFO - codeparrot_training - Step 39880: {'lr': 4.919151492148091e-05, 'samples': 20419072, 'steps': 39880, 'batch_loss/train': 0.755777764134109} +12/28/2021 05:09:25 - INFO - codeparrot_training - Step 39881: {'lr': 4.9182125793877096e-05, 'samples': 20419584, 'steps': 39881, 'batch_loss/train': 0.7089682864025235} +12/28/2021 05:09:35 - INFO - codeparrot_training - Step 39882: {'lr': 4.9172737464649395e-05, 'samples': 20420096, 'steps': 39882, 'batch_loss/train': 0.7699264555703849} +12/28/2021 05:09:46 - INFO - codeparrot_training - Step 39883: {'lr': 4.9163349933834923e-05, 'samples': 20420608, 'steps': 39883, 'batch_loss/train': 0.6184877984924242} +12/28/2021 05:09:58 - INFO - codeparrot_training - Step 39884: {'lr': 4.915396320147128e-05, 'samples': 20421120, 'steps': 39884, 'batch_loss/train': 0.5573945879004896} +12/28/2021 05:10:09 - INFO - codeparrot_training - Step 39885: {'lr': 4.914457726759558e-05, 'samples': 20421632, 'steps': 39885, 'batch_loss/train': 0.6796557670459151} +12/28/2021 05:10:19 - INFO - codeparrot_training - Step 39886: {'lr': 4.913519213224518e-05, 'samples': 20422144, 'steps': 39886, 'batch_loss/train': 0.7077162365894765} +12/28/2021 05:10:32 - INFO - codeparrot_training - Step 39887: {'lr': 4.912580779545742e-05, 'samples': 20422656, 'steps': 39887, 'batch_loss/train': 0.711505691986531} +12/28/2021 05:10:43 - INFO - codeparrot_training - Step 39888: {'lr': 4.911642425726962e-05, 'samples': 20423168, 'steps': 39888, 'batch_loss/train': 0.8020911471685395} +12/28/2021 05:10:53 - INFO - codeparrot_training - Step 39889: {'lr': 4.9107041517719055e-05, 'samples': 20423680, 'steps': 39889, 'batch_loss/train': 0.7065968783572316} +12/28/2021 05:11:04 - INFO - codeparrot_training - Step 39890: {'lr': 4.909765957684301e-05, 'samples': 20424192, 'steps': 39890, 'batch_loss/train': 0.7949862158857286} +12/28/2021 05:11:16 - INFO - codeparrot_training - Step 39891: {'lr': 4.908827843467883e-05, 'samples': 20424704, 'steps': 39891, 'batch_loss/train': 0.7171989595517516} +12/28/2021 05:11:27 - INFO - codeparrot_training - Step 39892: {'lr': 4.907889809126381e-05, 'samples': 20425216, 'steps': 39892, 'batch_loss/train': 0.8670126684010029} +12/28/2021 05:11:37 - INFO - codeparrot_training - Step 39893: {'lr': 4.90695185466353e-05, 'samples': 20425728, 'steps': 39893, 'batch_loss/train': 0.8427414791658521} +12/28/2021 05:11:50 - INFO - codeparrot_training - Step 39894: {'lr': 4.906013980083043e-05, 'samples': 20426240, 'steps': 39894, 'batch_loss/train': 0.7511925660073757} +12/28/2021 05:12:00 - INFO - codeparrot_training - Step 39895: {'lr': 4.905076185388649e-05, 'samples': 20426752, 'steps': 39895, 'batch_loss/train': 0.9598741484805942} +12/28/2021 05:12:11 - INFO - codeparrot_training - Step 39896: {'lr': 4.9041384705841016e-05, 'samples': 20427264, 'steps': 39896, 'batch_loss/train': 0.7220978578552604} +12/28/2021 05:12:22 - INFO - codeparrot_training - Step 39897: {'lr': 4.9032008356731025e-05, 'samples': 20427776, 'steps': 39897, 'batch_loss/train': 0.6954730511642992} +12/28/2021 05:12:35 - INFO - codeparrot_training - Step 39898: {'lr': 4.9022632806593834e-05, 'samples': 20428288, 'steps': 39898, 'batch_loss/train': 0.7235852954909205} +12/28/2021 05:12:45 - INFO - codeparrot_training - Step 39899: {'lr': 4.901325805546689e-05, 'samples': 20428800, 'steps': 39899, 'batch_loss/train': 0.8040645150467753} +12/28/2021 05:12:56 - INFO - codeparrot_training - Step 39900: {'lr': 4.900388410338727e-05, 'samples': 20429312, 'steps': 39900, 'batch_loss/train': 0.6110126823186874} +12/28/2021 05:13:08 - INFO - codeparrot_training - Step 39901: {'lr': 4.899451095039231e-05, 'samples': 20429824, 'steps': 39901, 'batch_loss/train': 0.7023674710653722} +12/28/2021 05:13:18 - INFO - codeparrot_training - Step 39902: {'lr': 4.898513859651932e-05, 'samples': 20430336, 'steps': 39902, 'batch_loss/train': 0.7920525397639722} +12/28/2021 05:13:29 - INFO - codeparrot_training - Step 39903: {'lr': 4.897576704180548e-05, 'samples': 20430848, 'steps': 39903, 'batch_loss/train': 0.8222892489284277} +12/28/2021 05:13:42 - INFO - codeparrot_training - Step 39904: {'lr': 4.89663962862881e-05, 'samples': 20431360, 'steps': 39904, 'batch_loss/train': 0.6772895148023963} +12/28/2021 05:13:52 - INFO - codeparrot_training - Step 39905: {'lr': 4.895702633000449e-05, 'samples': 20431872, 'steps': 39905, 'batch_loss/train': 0.680454833433032} +12/28/2021 05:14:03 - INFO - codeparrot_training - Step 39906: {'lr': 4.894765717299168e-05, 'samples': 20432384, 'steps': 39906, 'batch_loss/train': 0.7552986267255619} +12/28/2021 05:14:14 - INFO - codeparrot_training - Step 39907: {'lr': 4.893828881528714e-05, 'samples': 20432896, 'steps': 39907, 'batch_loss/train': 0.7628909889608622} +12/28/2021 05:14:26 - INFO - codeparrot_training - Step 39908: {'lr': 4.8928921256928135e-05, 'samples': 20433408, 'steps': 39908, 'batch_loss/train': 0.7112678055418655} +12/28/2021 05:14:36 - INFO - codeparrot_training - Step 39909: {'lr': 4.8919554497951655e-05, 'samples': 20433920, 'steps': 39909, 'batch_loss/train': 0.7648602807894349} +12/28/2021 05:14:47 - INFO - codeparrot_training - Step 39910: {'lr': 4.8910188538395214e-05, 'samples': 20434432, 'steps': 39910, 'batch_loss/train': 0.6217182218679227} +12/28/2021 05:14:59 - INFO - codeparrot_training - Step 39911: {'lr': 4.890082337829596e-05, 'samples': 20434944, 'steps': 39911, 'batch_loss/train': 0.7597804742399603} +12/28/2021 05:15:10 - INFO - codeparrot_training - Step 39912: {'lr': 4.889145901769104e-05, 'samples': 20435456, 'steps': 39912, 'batch_loss/train': 0.7225272833602503} +12/28/2021 05:15:20 - INFO - codeparrot_training - Step 39913: {'lr': 4.888209545661776e-05, 'samples': 20435968, 'steps': 39913, 'batch_loss/train': 0.7936659283004701} +12/28/2021 05:15:32 - INFO - codeparrot_training - Step 39914: {'lr': 4.8872732695113304e-05, 'samples': 20436480, 'steps': 39914, 'batch_loss/train': 1.0987494746223092} +12/28/2021 05:15:43 - INFO - codeparrot_training - Step 39915: {'lr': 4.886337073321495e-05, 'samples': 20436992, 'steps': 39915, 'batch_loss/train': 0.824276955332607} +12/28/2021 05:15:54 - INFO - codeparrot_training - Step 39916: {'lr': 4.8854009570959866e-05, 'samples': 20437504, 'steps': 39916, 'batch_loss/train': 0.6910157076781616} +12/28/2021 05:16:07 - INFO - codeparrot_training - Step 39917: {'lr': 4.88446492083853e-05, 'samples': 20438016, 'steps': 39917, 'batch_loss/train': 0.7049756972119212} +12/28/2021 05:16:17 - INFO - codeparrot_training - Step 39918: {'lr': 4.8835289645528444e-05, 'samples': 20438528, 'steps': 39918, 'batch_loss/train': 0.5298192466143519} +12/28/2021 05:16:28 - INFO - codeparrot_training - Step 39919: {'lr': 4.882593088242654e-05, 'samples': 20439040, 'steps': 39919, 'batch_loss/train': 0.7223300149198622} +12/28/2021 05:16:39 - INFO - codeparrot_training - Step 39920: {'lr': 4.881657291911676e-05, 'samples': 20439552, 'steps': 39920, 'batch_loss/train': 0.6698145315749571} +12/28/2021 05:16:51 - INFO - codeparrot_training - Step 39921: {'lr': 4.880721575563632e-05, 'samples': 20440064, 'steps': 39921, 'batch_loss/train': 0.6787695731036365} +12/28/2021 05:17:01 - INFO - codeparrot_training - Step 39922: {'lr': 4.87978593920225e-05, 'samples': 20440576, 'steps': 39922, 'batch_loss/train': 0.7478927788324654} +12/28/2021 05:17:12 - INFO - codeparrot_training - Step 39923: {'lr': 4.878850382831235e-05, 'samples': 20441088, 'steps': 39923, 'batch_loss/train': 0.6917288331314921} +12/28/2021 05:17:24 - INFO - codeparrot_training - Step 39924: {'lr': 4.877914906454306e-05, 'samples': 20441600, 'steps': 39924, 'batch_loss/train': 0.8207361027598381} +12/28/2021 05:17:35 - INFO - codeparrot_training - Step 39925: {'lr': 4.876979510075202e-05, 'samples': 20442112, 'steps': 39925, 'batch_loss/train': 0.8016135627403855} +12/28/2021 05:17:45 - INFO - codeparrot_training - Step 39926: {'lr': 4.876044193697623e-05, 'samples': 20442624, 'steps': 39926, 'batch_loss/train': 0.67627636436373} +12/28/2021 05:17:58 - INFO - codeparrot_training - Step 39927: {'lr': 4.875108957325297e-05, 'samples': 20443136, 'steps': 39927, 'batch_loss/train': 0.72848389018327} +12/28/2021 05:18:09 - INFO - codeparrot_training - Step 39928: {'lr': 4.8741738009619365e-05, 'samples': 20443648, 'steps': 39928, 'batch_loss/train': 0.7004497654270381} +12/28/2021 05:18:19 - INFO - codeparrot_training - Step 39929: {'lr': 4.873238724611262e-05, 'samples': 20444160, 'steps': 39929, 'batch_loss/train': 0.7511444021947682} +12/28/2021 05:18:30 - INFO - codeparrot_training - Step 39930: {'lr': 4.872303728276989e-05, 'samples': 20444672, 'steps': 39930, 'batch_loss/train': 0.7761231092736125} +12/28/2021 05:18:42 - INFO - codeparrot_training - Step 39931: {'lr': 4.871368811962845e-05, 'samples': 20445184, 'steps': 39931, 'batch_loss/train': 0.760146792512387} +12/28/2021 05:18:53 - INFO - codeparrot_training - Step 39932: {'lr': 4.870433975672525e-05, 'samples': 20445696, 'steps': 39932, 'batch_loss/train': 0.6883700140751898} +12/28/2021 05:19:03 - INFO - codeparrot_training - Step 39933: {'lr': 4.8694992194097646e-05, 'samples': 20446208, 'steps': 39933, 'batch_loss/train': 0.7810357119888067} +12/28/2021 05:19:17 - INFO - codeparrot_training - Step 39934: {'lr': 4.8685645431782834e-05, 'samples': 20446720, 'steps': 39934, 'batch_loss/train': 0.781963643617928} +12/28/2021 05:19:27 - INFO - codeparrot_training - Step 39935: {'lr': 4.867629946981772e-05, 'samples': 20447232, 'steps': 39935, 'batch_loss/train': 0.7267339611425996} +12/28/2021 05:19:38 - INFO - codeparrot_training - Step 39936: {'lr': 4.8666954308239695e-05, 'samples': 20447744, 'steps': 39936, 'batch_loss/train': 0.6614659721963108} +12/28/2021 05:19:50 - INFO - codeparrot_training - Step 39937: {'lr': 4.865760994708593e-05, 'samples': 20448256, 'steps': 39937, 'batch_loss/train': 0.4342554225586355} +12/28/2021 05:20:01 - INFO - codeparrot_training - Step 39938: {'lr': 4.8648266386393385e-05, 'samples': 20448768, 'steps': 39938, 'batch_loss/train': 0.6929652998223901} +12/28/2021 05:20:11 - INFO - codeparrot_training - Step 39939: {'lr': 4.863892362619932e-05, 'samples': 20449280, 'steps': 39939, 'batch_loss/train': 0.5728589989012107} +12/28/2021 05:20:22 - INFO - codeparrot_training - Step 39940: {'lr': 4.862958166654086e-05, 'samples': 20449792, 'steps': 39940, 'batch_loss/train': 0.7457765117287636} +12/28/2021 05:20:34 - INFO - codeparrot_training - Step 39941: {'lr': 4.862024050745517e-05, 'samples': 20450304, 'steps': 39941, 'batch_loss/train': 0.7637727474793792} +12/28/2021 05:20:45 - INFO - codeparrot_training - Step 39942: {'lr': 4.8610900148979345e-05, 'samples': 20450816, 'steps': 39942, 'batch_loss/train': 0.7957620844244957} +12/28/2021 05:20:55 - INFO - codeparrot_training - Step 39943: {'lr': 4.860156059115053e-05, 'samples': 20451328, 'steps': 39943, 'batch_loss/train': 0.79855886567384} +12/28/2021 05:21:07 - INFO - codeparrot_training - Step 39944: {'lr': 4.859222183400588e-05, 'samples': 20451840, 'steps': 39944, 'batch_loss/train': 0.7167785735800862} +12/28/2021 05:21:18 - INFO - codeparrot_training - Step 39945: {'lr': 4.8582883877582504e-05, 'samples': 20452352, 'steps': 39945, 'batch_loss/train': 0.6854241940309294} +12/28/2021 05:21:28 - INFO - codeparrot_training - Step 39946: {'lr': 4.8573546721917525e-05, 'samples': 20452864, 'steps': 39946, 'batch_loss/train': 0.7750622876919806} +12/28/2021 05:21:41 - INFO - codeparrot_training - Step 39947: {'lr': 4.8564210367048086e-05, 'samples': 20453376, 'steps': 39947, 'batch_loss/train': 0.6676519424654543} +12/28/2021 05:21:52 - INFO - codeparrot_training - Step 39948: {'lr': 4.8554874813011354e-05, 'samples': 20453888, 'steps': 39948, 'batch_loss/train': 0.6413571778684855} +12/28/2021 05:22:02 - INFO - codeparrot_training - Step 39949: {'lr': 4.8545540059844295e-05, 'samples': 20454400, 'steps': 39949, 'batch_loss/train': 0.8233997211791575} +12/28/2021 05:22:17 - INFO - codeparrot_training - Step 39950: {'lr': 4.853620610758403e-05, 'samples': 20454912, 'steps': 39950, 'batch_loss/train': 0.7307212052401155} +12/28/2021 05:22:27 - INFO - codeparrot_training - Step 39951: {'lr': 4.852687295626787e-05, 'samples': 20455424, 'steps': 39951, 'batch_loss/train': 0.648082387400791} +12/28/2021 05:22:38 - INFO - codeparrot_training - Step 39952: {'lr': 4.8517540605932744e-05, 'samples': 20455936, 'steps': 39952, 'batch_loss/train': 0.7062582885846496} +12/28/2021 05:22:49 - INFO - codeparrot_training - Step 39953: {'lr': 4.850820905661579e-05, 'samples': 20456448, 'steps': 39953, 'batch_loss/train': 0.697128121741116} +12/28/2021 05:23:01 - INFO - codeparrot_training - Step 39954: {'lr': 4.8498878308354115e-05, 'samples': 20456960, 'steps': 39954, 'batch_loss/train': 0.6733671426773071} +12/28/2021 05:23:11 - INFO - codeparrot_training - Step 39955: {'lr': 4.8489548361184825e-05, 'samples': 20457472, 'steps': 39955, 'batch_loss/train': 0.7609276305884123} +12/28/2021 05:23:22 - INFO - codeparrot_training - Step 39956: {'lr': 4.8480219215145e-05, 'samples': 20457984, 'steps': 39956, 'batch_loss/train': 0.9310565600171685} +12/28/2021 05:23:34 - INFO - codeparrot_training - Step 39957: {'lr': 4.847089087027173e-05, 'samples': 20458496, 'steps': 39957, 'batch_loss/train': 0.7762394770979881} +12/28/2021 05:23:45 - INFO - codeparrot_training - Step 39958: {'lr': 4.846156332660209e-05, 'samples': 20459008, 'steps': 39958, 'batch_loss/train': 0.648093085270375} +12/28/2021 05:23:56 - INFO - codeparrot_training - Step 39959: {'lr': 4.8452236584173194e-05, 'samples': 20459520, 'steps': 39959, 'batch_loss/train': 0.6069926344789565} +12/28/2021 05:24:09 - INFO - codeparrot_training - Step 39960: {'lr': 4.844291064302217e-05, 'samples': 20460032, 'steps': 39960, 'batch_loss/train': 0.8443022691644728} +12/28/2021 05:24:20 - INFO - codeparrot_training - Step 39961: {'lr': 4.843358550318588e-05, 'samples': 20460544, 'steps': 39961, 'batch_loss/train': 0.7688412666320801} +12/28/2021 05:24:31 - INFO - codeparrot_training - Step 39962: {'lr': 4.8424261164701636e-05, 'samples': 20461056, 'steps': 39962, 'batch_loss/train': 0.7510229385225102} +12/28/2021 05:24:41 - INFO - codeparrot_training - Step 39963: {'lr': 4.841493762760646e-05, 'samples': 20461568, 'steps': 39963, 'batch_loss/train': 0.7107068495824933} +12/28/2021 05:24:53 - INFO - codeparrot_training - Step 39964: {'lr': 4.8405614891937316e-05, 'samples': 20462080, 'steps': 39964, 'batch_loss/train': 0.7134862234815955} +12/28/2021 05:25:04 - INFO - codeparrot_training - Step 39965: {'lr': 4.8396292957731345e-05, 'samples': 20462592, 'steps': 39965, 'batch_loss/train': 0.7644257177598774} +12/28/2021 05:25:15 - INFO - codeparrot_training - Step 39966: {'lr': 4.838697182502558e-05, 'samples': 20463104, 'steps': 39966, 'batch_loss/train': 0.7466722251847386} +12/28/2021 05:25:29 - INFO - codeparrot_training - Step 39967: {'lr': 4.837765149385709e-05, 'samples': 20463616, 'steps': 39967, 'batch_loss/train': 0.7462595161050558} +12/28/2021 05:25:39 - INFO - codeparrot_training - Step 39968: {'lr': 4.8368331964262904e-05, 'samples': 20464128, 'steps': 39968, 'batch_loss/train': 0.7238707341020927} +12/28/2021 05:25:50 - INFO - codeparrot_training - Step 39969: {'lr': 4.8359013236280143e-05, 'samples': 20464640, 'steps': 39969, 'batch_loss/train': 0.7272126708412543} +12/28/2021 05:26:02 - INFO - codeparrot_training - Step 39970: {'lr': 4.834969530994576e-05, 'samples': 20465152, 'steps': 39970, 'batch_loss/train': 0.7447898250538856} +12/28/2021 05:26:13 - INFO - codeparrot_training - Step 39971: {'lr': 4.834037818529688e-05, 'samples': 20465664, 'steps': 39971, 'batch_loss/train': 0.6997296211775392} +12/28/2021 05:26:23 - INFO - codeparrot_training - Step 39972: {'lr': 4.833106186237052e-05, 'samples': 20466176, 'steps': 39972, 'batch_loss/train': 0.7551518427208066} +12/28/2021 05:26:34 - INFO - codeparrot_training - Step 39973: {'lr': 4.8321746341203676e-05, 'samples': 20466688, 'steps': 39973, 'batch_loss/train': 0.7469924897886813} +12/28/2021 05:26:46 - INFO - codeparrot_training - Step 39974: {'lr': 4.831243162183352e-05, 'samples': 20467200, 'steps': 39974, 'batch_loss/train': 1.461985974572599} +12/28/2021 05:26:57 - INFO - codeparrot_training - Step 39975: {'lr': 4.830311770429688e-05, 'samples': 20467712, 'steps': 39975, 'batch_loss/train': 0.7708878261037171} +12/28/2021 05:27:07 - INFO - codeparrot_training - Step 39976: {'lr': 4.829380458863086e-05, 'samples': 20468224, 'steps': 39976, 'batch_loss/train': 0.7244509160518646} +12/28/2021 05:27:21 - INFO - codeparrot_training - Step 39977: {'lr': 4.828449227487261e-05, 'samples': 20468736, 'steps': 39977, 'batch_loss/train': 0.7400825284421444} +12/28/2021 05:27:32 - INFO - codeparrot_training - Step 39978: {'lr': 4.827518076305901e-05, 'samples': 20469248, 'steps': 39978, 'batch_loss/train': 0.7957007819786668} +12/28/2021 05:27:42 - INFO - codeparrot_training - Step 39979: {'lr': 4.826587005322711e-05, 'samples': 20469760, 'steps': 39979, 'batch_loss/train': 0.7145046535879374} +12/28/2021 05:27:55 - INFO - codeparrot_training - Step 39980: {'lr': 4.825656014541393e-05, 'samples': 20470272, 'steps': 39980, 'batch_loss/train': 0.6844358865637332} +12/28/2021 05:28:06 - INFO - codeparrot_training - Step 39981: {'lr': 4.824725103965652e-05, 'samples': 20470784, 'steps': 39981, 'batch_loss/train': 0.630889365915209} +12/28/2021 05:28:17 - INFO - codeparrot_training - Step 39982: {'lr': 4.823794273599183e-05, 'samples': 20471296, 'steps': 39982, 'batch_loss/train': 0.9450323438504711} +12/28/2021 05:28:27 - INFO - codeparrot_training - Step 39983: {'lr': 4.822863523445692e-05, 'samples': 20471808, 'steps': 39983, 'batch_loss/train': 0.8667396260425448} +12/28/2021 05:28:41 - INFO - codeparrot_training - Step 39984: {'lr': 4.8219328535088755e-05, 'samples': 20472320, 'steps': 39984, 'batch_loss/train': 0.7474055122584105} +12/28/2021 05:28:52 - INFO - codeparrot_training - Step 39985: {'lr': 4.821002263792437e-05, 'samples': 20472832, 'steps': 39985, 'batch_loss/train': 0.9616368832066655} +12/28/2021 05:29:03 - INFO - codeparrot_training - Step 39986: {'lr': 4.8200717543000785e-05, 'samples': 20473344, 'steps': 39986, 'batch_loss/train': 0.8350311927497387} +12/28/2021 05:29:15 - INFO - codeparrot_training - Step 39987: {'lr': 4.8191413250354834e-05, 'samples': 20473856, 'steps': 39987, 'batch_loss/train': 0.7804884095676243} +12/28/2021 05:29:25 - INFO - codeparrot_training - Step 39988: {'lr': 4.818210976002368e-05, 'samples': 20474368, 'steps': 39988, 'batch_loss/train': 0.7445569094270468} +12/28/2021 05:29:36 - INFO - codeparrot_training - Step 39989: {'lr': 4.8172807072044326e-05, 'samples': 20474880, 'steps': 39989, 'batch_loss/train': 0.665947736473754} +12/28/2021 05:29:47 - INFO - codeparrot_training - Step 39990: {'lr': 4.816350518645363e-05, 'samples': 20475392, 'steps': 39990, 'batch_loss/train': 0.5766502392943949} +12/28/2021 05:29:59 - INFO - codeparrot_training - Step 39991: {'lr': 4.815420410328855e-05, 'samples': 20475904, 'steps': 39991, 'batch_loss/train': 0.6910803131759167} +12/28/2021 05:30:10 - INFO - codeparrot_training - Step 39992: {'lr': 4.8144903822586264e-05, 'samples': 20476416, 'steps': 39992, 'batch_loss/train': 0.8380065956152976} +12/28/2021 05:30:20 - INFO - codeparrot_training - Step 39993: {'lr': 4.813560434438358e-05, 'samples': 20476928, 'steps': 39993, 'batch_loss/train': 0.7213678297703154} +12/28/2021 05:30:32 - INFO - codeparrot_training - Step 39994: {'lr': 4.8126305668717476e-05, 'samples': 20477440, 'steps': 39994, 'batch_loss/train': 0.7722652526572347} +12/28/2021 05:30:43 - INFO - codeparrot_training - Step 39995: {'lr': 4.811700779562497e-05, 'samples': 20477952, 'steps': 39995, 'batch_loss/train': 0.7667667725472711} +12/28/2021 05:30:54 - INFO - codeparrot_training - Step 39996: {'lr': 4.810771072514303e-05, 'samples': 20478464, 'steps': 39996, 'batch_loss/train': 0.6698966561816633} +12/28/2021 05:31:08 - INFO - codeparrot_training - Step 39997: {'lr': 4.809841445730859e-05, 'samples': 20478976, 'steps': 39997, 'batch_loss/train': 0.6796284410520457} +12/28/2021 05:31:18 - INFO - codeparrot_training - Step 39998: {'lr': 4.808911899215862e-05, 'samples': 20479488, 'steps': 39998, 'batch_loss/train': 0.6249241037294269} +12/28/2021 05:31:29 - INFO - codeparrot_training - Step 39999: {'lr': 4.807982432973007e-05, 'samples': 20480000, 'steps': 39999, 'batch_loss/train': 0.7455571456812322} +12/28/2021 05:31:29 - INFO - codeparrot_training - Evaluating and saving model checkpoint +12/28/2021 05:34:50 - INFO - codeparrot_training - Step 40000: {'loss/eval': 0.7470561265945435, 'perplexity': 2.110776901245117}