diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -42579,3 +42579,2506 @@ Use FP16 precision: False 12/28/2021 05:31:29 - INFO - codeparrot_training - Step 39999: {'lr': 4.807982432973007e-05, 'samples': 20480000, 'steps': 39999, 'batch_loss/train': 0.7455571456812322} 12/28/2021 05:31:29 - INFO - codeparrot_training - Evaluating and saving model checkpoint 12/28/2021 05:34:50 - INFO - codeparrot_training - Step 40000: {'loss/eval': 0.7470561265945435, 'perplexity': 2.110776901245117} +12/28/2021 05:35:08 - WARNING - huggingface_hub.repository - Several commits (14) will be pushed upstream. +12/28/2021 05:35:22 - INFO - codeparrot_training - Step 40000: {'lr': 4.8070530470059906e-05, 'samples': 20480512, 'steps': 40000, 'batch_loss/train': 0.7726424112915993} +12/28/2021 05:35:34 - INFO - codeparrot_training - Step 40001: {'lr': 4.806123741318513e-05, 'samples': 20481024, 'steps': 40001, 'batch_loss/train': 0.800928701646626} +12/28/2021 05:35:45 - INFO - codeparrot_training - Step 40002: {'lr': 4.805194515914249e-05, 'samples': 20481536, 'steps': 40002, 'batch_loss/train': 0.8114345027133822} +12/28/2021 05:35:55 - INFO - codeparrot_training - Step 40003: {'lr': 4.804265370796918e-05, 'samples': 20482048, 'steps': 40003, 'batch_loss/train': 0.7743837628513575} +12/28/2021 05:36:07 - INFO - codeparrot_training - Step 40004: {'lr': 4.803336305970199e-05, 'samples': 20482560, 'steps': 40004, 'batch_loss/train': 0.7756875240593217} +12/28/2021 05:36:18 - INFO - codeparrot_training - Step 40005: {'lr': 4.802407321437785e-05, 'samples': 20483072, 'steps': 40005, 'batch_loss/train': 0.7255599570926279} +12/28/2021 05:36:29 - INFO - codeparrot_training - Step 40006: {'lr': 4.8014784172033756e-05, 'samples': 20483584, 'steps': 40006, 'batch_loss/train': 0.7071920777671039} +12/28/2021 05:36:42 - INFO - codeparrot_training - Step 40007: {'lr': 4.8005495932706616e-05, 'samples': 20484096, 'steps': 40007, 'batch_loss/train': 0.7917331256903708} +12/28/2021 05:36:53 - INFO - codeparrot_training - Step 40008: {'lr': 4.7996208496433354e-05, 'samples': 20484608, 'steps': 40008, 'batch_loss/train': 0.815594139508903} +12/28/2021 05:37:04 - INFO - codeparrot_training - Step 40009: {'lr': 4.7986921863250874e-05, 'samples': 20485120, 'steps': 40009, 'batch_loss/train': 0.7863869220018387} +12/28/2021 05:37:14 - INFO - codeparrot_training - Step 40010: {'lr': 4.7977636033196134e-05, 'samples': 20485632, 'steps': 40010, 'batch_loss/train': 0.9276824467815459} +12/28/2021 05:37:27 - INFO - codeparrot_training - Step 40011: {'lr': 4.796835100630603e-05, 'samples': 20486144, 'steps': 40011, 'batch_loss/train': 0.6679785071173683} +12/28/2021 05:37:37 - INFO - codeparrot_training - Step 40012: {'lr': 4.795906678261755e-05, 'samples': 20486656, 'steps': 40012, 'batch_loss/train': 0.9688804498873651} +12/28/2021 05:37:48 - INFO - codeparrot_training - Step 40013: {'lr': 4.7949783362167375e-05, 'samples': 20487168, 'steps': 40013, 'batch_loss/train': 0.7663782855379395} +12/28/2021 05:38:01 - INFO - codeparrot_training - Step 40014: {'lr': 4.794050074499265e-05, 'samples': 20487680, 'steps': 40014, 'batch_loss/train': 0.6950382841750979} +12/28/2021 05:38:12 - INFO - codeparrot_training - Step 40015: {'lr': 4.7931218931130283e-05, 'samples': 20488192, 'steps': 40015, 'batch_loss/train': 0.6488298668409698} +12/28/2021 05:38:22 - INFO - codeparrot_training - Step 40016: {'lr': 4.7921937920617e-05, 'samples': 20488704, 'steps': 40016, 'batch_loss/train': 0.8357140067964792} +12/28/2021 05:38:34 - INFO - codeparrot_training - Step 40017: {'lr': 4.7912657713489735e-05, 'samples': 20489216, 'steps': 40017, 'batch_loss/train': 0.7229162203148007} +12/28/2021 05:38:45 - INFO - codeparrot_training - Step 40018: {'lr': 4.790337830978558e-05, 'samples': 20489728, 'steps': 40018, 'batch_loss/train': 1.463360167806968} +12/28/2021 05:38:56 - INFO - codeparrot_training - Step 40019: {'lr': 4.78940997095412e-05, 'samples': 20490240, 'steps': 40019, 'batch_loss/train': 0.7067258744500577} +12/28/2021 05:39:06 - INFO - codeparrot_training - Step 40020: {'lr': 4.788482191279359e-05, 'samples': 20490752, 'steps': 40020, 'batch_loss/train': 0.6605885562021285} +12/28/2021 05:39:18 - INFO - codeparrot_training - Step 40021: {'lr': 4.78755449195796e-05, 'samples': 20491264, 'steps': 40021, 'batch_loss/train': 0.7995709789101966} +12/28/2021 05:39:29 - INFO - codeparrot_training - Step 40022: {'lr': 4.78662687299361e-05, 'samples': 20491776, 'steps': 40022, 'batch_loss/train': 1.5829131617210805} +12/28/2021 05:39:40 - INFO - codeparrot_training - Step 40023: {'lr': 4.785699334390003e-05, 'samples': 20492288, 'steps': 40023, 'batch_loss/train': 0.8458932721987367} +12/28/2021 05:39:54 - INFO - codeparrot_training - Step 40024: {'lr': 4.78477187615082e-05, 'samples': 20492800, 'steps': 40024, 'batch_loss/train': 0.7342158211395144} +12/28/2021 05:40:04 - INFO - codeparrot_training - Step 40025: {'lr': 4.7838444982797515e-05, 'samples': 20493312, 'steps': 40025, 'batch_loss/train': 0.6796498766634613} +12/28/2021 05:40:15 - INFO - codeparrot_training - Step 40026: {'lr': 4.782917200780487e-05, 'samples': 20493824, 'steps': 40026, 'batch_loss/train': 0.745524940546602} +12/28/2021 05:40:27 - INFO - codeparrot_training - Step 40027: {'lr': 4.781989983656712e-05, 'samples': 20494336, 'steps': 40027, 'batch_loss/train': 0.6622004909440875} +12/28/2021 05:40:38 - INFO - codeparrot_training - Step 40028: {'lr': 4.7810628469121015e-05, 'samples': 20494848, 'steps': 40028, 'batch_loss/train': 0.6874584686011076} +12/28/2021 05:40:49 - INFO - codeparrot_training - Step 40029: {'lr': 4.7801357905503633e-05, 'samples': 20495360, 'steps': 40029, 'batch_loss/train': 0.817433814983815} +12/28/2021 05:40:59 - INFO - codeparrot_training - Step 40030: {'lr': 4.7792088145751615e-05, 'samples': 20495872, 'steps': 40030, 'batch_loss/train': 0.8729924038052559} +12/28/2021 05:41:11 - INFO - codeparrot_training - Step 40031: {'lr': 4.778281918990185e-05, 'samples': 20496384, 'steps': 40031, 'batch_loss/train': 0.811583916656673} +12/28/2021 05:41:22 - INFO - codeparrot_training - Step 40032: {'lr': 4.7773551037991385e-05, 'samples': 20496896, 'steps': 40032, 'batch_loss/train': 0.6568457271787338} +12/28/2021 05:41:33 - INFO - codeparrot_training - Step 40033: {'lr': 4.7764283690056864e-05, 'samples': 20497408, 'steps': 40033, 'batch_loss/train': 0.7364921532571316} +12/28/2021 05:41:47 - INFO - codeparrot_training - Step 40034: {'lr': 4.775501714613517e-05, 'samples': 20497920, 'steps': 40034, 'batch_loss/train': 0.7001315681263804} +12/28/2021 05:41:57 - INFO - codeparrot_training - Step 40035: {'lr': 4.7745751406263163e-05, 'samples': 20498432, 'steps': 40035, 'batch_loss/train': 0.7493197584990412} +12/28/2021 05:42:08 - INFO - codeparrot_training - Step 40036: {'lr': 4.773648647047768e-05, 'samples': 20498944, 'steps': 40036, 'batch_loss/train': 0.8795939201954752} +12/28/2021 05:42:20 - INFO - codeparrot_training - Step 40037: {'lr': 4.772722233881557e-05, 'samples': 20499456, 'steps': 40037, 'batch_loss/train': 0.8135405406355858} +12/28/2021 05:42:31 - INFO - codeparrot_training - Step 40038: {'lr': 4.7717959011313705e-05, 'samples': 20499968, 'steps': 40038, 'batch_loss/train': 0.6592048453167081} +12/28/2021 05:42:41 - INFO - codeparrot_training - Step 40039: {'lr': 4.770869648800871e-05, 'samples': 20500480, 'steps': 40039, 'batch_loss/train': 0.7735336166806519} +12/28/2021 05:42:52 - INFO - codeparrot_training - Step 40040: {'lr': 4.769943476893765e-05, 'samples': 20500992, 'steps': 40040, 'batch_loss/train': 0.8419398432597518} +12/28/2021 05:43:04 - INFO - codeparrot_training - Step 40041: {'lr': 4.769017385413729e-05, 'samples': 20501504, 'steps': 40041, 'batch_loss/train': 0.6829155758023262} +12/28/2021 05:43:15 - INFO - codeparrot_training - Step 40042: {'lr': 4.768091374364436e-05, 'samples': 20502016, 'steps': 40042, 'batch_loss/train': 0.6927287201397121} +12/28/2021 05:43:25 - INFO - codeparrot_training - Step 40043: {'lr': 4.767165443749566e-05, 'samples': 20502528, 'steps': 40043, 'batch_loss/train': 0.7168359580682591} +12/28/2021 05:43:39 - INFO - codeparrot_training - Step 40044: {'lr': 4.7662395935728186e-05, 'samples': 20503040, 'steps': 40044, 'batch_loss/train': 0.7643388365395367} +12/28/2021 05:43:50 - INFO - codeparrot_training - Step 40045: {'lr': 4.765313823837858e-05, 'samples': 20503552, 'steps': 40045, 'batch_loss/train': 0.7368368022143841} +12/28/2021 05:44:01 - INFO - codeparrot_training - Step 40046: {'lr': 4.764388134548361e-05, 'samples': 20504064, 'steps': 40046, 'batch_loss/train': 0.8131799167022109} +12/28/2021 05:44:13 - INFO - codeparrot_training - Step 40047: {'lr': 4.763462525708029e-05, 'samples': 20504576, 'steps': 40047, 'batch_loss/train': 0.6907230252400041} +12/28/2021 05:44:23 - INFO - codeparrot_training - Step 40048: {'lr': 4.762536997320524e-05, 'samples': 20505088, 'steps': 40048, 'batch_loss/train': 0.6759302476420999} +12/28/2021 05:44:34 - INFO - codeparrot_training - Step 40049: {'lr': 4.7616115493895304e-05, 'samples': 20505600, 'steps': 40049, 'batch_loss/train': 0.7495749788358808} +12/28/2021 05:44:45 - INFO - codeparrot_training - Step 40050: {'lr': 4.760686181918725e-05, 'samples': 20506112, 'steps': 40050, 'batch_loss/train': 0.6996154943481088} +12/28/2021 05:44:57 - INFO - codeparrot_training - Step 40051: {'lr': 4.759760894911791e-05, 'samples': 20506624, 'steps': 40051, 'batch_loss/train': 0.677469982067123} +12/28/2021 05:45:08 - INFO - codeparrot_training - Step 40052: {'lr': 4.758835688372406e-05, 'samples': 20507136, 'steps': 40052, 'batch_loss/train': 0.6714246098417789} +12/28/2021 05:45:18 - INFO - codeparrot_training - Step 40053: {'lr': 4.757910562304252e-05, 'samples': 20507648, 'steps': 40053, 'batch_loss/train': 0.6989887170493603} +12/28/2021 05:45:32 - INFO - codeparrot_training - Step 40054: {'lr': 4.756985516710991e-05, 'samples': 20508160, 'steps': 40054, 'batch_loss/train': 0.6315414141863585} +12/28/2021 05:45:43 - INFO - codeparrot_training - Step 40055: {'lr': 4.756060551596325e-05, 'samples': 20508672, 'steps': 40055, 'batch_loss/train': 0.6698095900937915} +12/28/2021 05:45:54 - INFO - codeparrot_training - Step 40056: {'lr': 4.7551356669639115e-05, 'samples': 20509184, 'steps': 40056, 'batch_loss/train': 0.7954205675050616} +12/28/2021 05:46:06 - INFO - codeparrot_training - Step 40057: {'lr': 4.754210862817429e-05, 'samples': 20509696, 'steps': 40057, 'batch_loss/train': 0.8081500781700015} +12/28/2021 05:46:17 - INFO - codeparrot_training - Step 40058: {'lr': 4.753286139160573e-05, 'samples': 20510208, 'steps': 40058, 'batch_loss/train': 0.6933741185348481} +12/28/2021 05:46:27 - INFO - codeparrot_training - Step 40059: {'lr': 4.752361495996998e-05, 'samples': 20510720, 'steps': 40059, 'batch_loss/train': 0.7685907443519682} +12/28/2021 05:46:38 - INFO - codeparrot_training - Step 40060: {'lr': 4.751436933330389e-05, 'samples': 20511232, 'steps': 40060, 'batch_loss/train': 0.7637361148372293} +12/28/2021 05:46:50 - INFO - codeparrot_training - Step 40061: {'lr': 4.7505124511644207e-05, 'samples': 20511744, 'steps': 40061, 'batch_loss/train': 0.7806929033249617} +12/28/2021 05:47:01 - INFO - codeparrot_training - Step 40062: {'lr': 4.7495880495027704e-05, 'samples': 20512256, 'steps': 40062, 'batch_loss/train': 0.680672881193459} +12/28/2021 05:47:11 - INFO - codeparrot_training - Step 40063: {'lr': 4.748663728349112e-05, 'samples': 20512768, 'steps': 40063, 'batch_loss/train': 0.6434937492012978} +12/28/2021 05:47:25 - INFO - codeparrot_training - Step 40064: {'lr': 4.7477394877071254e-05, 'samples': 20513280, 'steps': 40064, 'batch_loss/train': 0.6964603730011731} +12/28/2021 05:47:36 - INFO - codeparrot_training - Step 40065: {'lr': 4.7468153275804673e-05, 'samples': 20513792, 'steps': 40065, 'batch_loss/train': 0.7641759514808655} +12/28/2021 05:47:47 - INFO - codeparrot_training - Step 40066: {'lr': 4.7458912479728304e-05, 'samples': 20514304, 'steps': 40066, 'batch_loss/train': 0.6509502637200058} +12/28/2021 05:47:59 - INFO - codeparrot_training - Step 40067: {'lr': 4.744967248887888e-05, 'samples': 20514816, 'steps': 40067, 'batch_loss/train': 0.7363841221667826} +12/28/2021 05:48:09 - INFO - codeparrot_training - Step 40068: {'lr': 4.744043330329295e-05, 'samples': 20515328, 'steps': 40068, 'batch_loss/train': 0.9020786052569747} +12/28/2021 05:48:20 - INFO - codeparrot_training - Step 40069: {'lr': 4.743119492300746e-05, 'samples': 20515840, 'steps': 40069, 'batch_loss/train': 0.6640748856589198} +12/28/2021 05:48:31 - INFO - codeparrot_training - Step 40070: {'lr': 4.7421957348059096e-05, 'samples': 20516352, 'steps': 40070, 'batch_loss/train': 0.6751219858415425} +12/28/2021 05:48:43 - INFO - codeparrot_training - Step 40071: {'lr': 4.7412720578484474e-05, 'samples': 20516864, 'steps': 40071, 'batch_loss/train': 0.7810753248631954} +12/28/2021 05:48:54 - INFO - codeparrot_training - Step 40072: {'lr': 4.740348461432031e-05, 'samples': 20517376, 'steps': 40072, 'batch_loss/train': 0.6616936158388853} +12/28/2021 05:49:04 - INFO - codeparrot_training - Step 40073: {'lr': 4.739424945560353e-05, 'samples': 20517888, 'steps': 40073, 'batch_loss/train': 0.5942631929647177} +12/28/2021 05:49:16 - INFO - codeparrot_training - Step 40074: {'lr': 4.738501510237067e-05, 'samples': 20518400, 'steps': 40074, 'batch_loss/train': 0.701023300876841} +12/28/2021 05:49:27 - INFO - codeparrot_training - Step 40075: {'lr': 4.737578155465847e-05, 'samples': 20518912, 'steps': 40075, 'batch_loss/train': 0.6186836792621762} +12/28/2021 05:49:38 - INFO - codeparrot_training - Step 40076: {'lr': 4.736654881250363e-05, 'samples': 20519424, 'steps': 40076, 'batch_loss/train': 0.7556325769983232} +12/28/2021 05:49:52 - INFO - codeparrot_training - Step 40077: {'lr': 4.735731687594291e-05, 'samples': 20519936, 'steps': 40077, 'batch_loss/train': 0.8123668618500233} +12/28/2021 05:50:02 - INFO - codeparrot_training - Step 40078: {'lr': 4.734808574501298e-05, 'samples': 20520448, 'steps': 40078, 'batch_loss/train': 0.7661389694549143} +12/28/2021 05:50:13 - INFO - codeparrot_training - Step 40079: {'lr': 4.733885541975061e-05, 'samples': 20520960, 'steps': 40079, 'batch_loss/train': 0.6948056421242654} +12/28/2021 05:50:24 - INFO - codeparrot_training - Step 40080: {'lr': 4.73296259001923e-05, 'samples': 20521472, 'steps': 40080, 'batch_loss/train': 0.6982705732807517} +12/28/2021 05:50:36 - INFO - codeparrot_training - Step 40081: {'lr': 4.73203971863749e-05, 'samples': 20521984, 'steps': 40081, 'batch_loss/train': 0.8135706642642617} +12/28/2021 05:50:47 - INFO - codeparrot_training - Step 40082: {'lr': 4.731116927833517e-05, 'samples': 20522496, 'steps': 40082, 'batch_loss/train': 0.6406755186617374} +12/28/2021 05:50:57 - INFO - codeparrot_training - Step 40083: {'lr': 4.7301942176109585e-05, 'samples': 20523008, 'steps': 40083, 'batch_loss/train': 1.0748769277706742} +12/28/2021 05:51:11 - INFO - codeparrot_training - Step 40084: {'lr': 4.729271587973505e-05, 'samples': 20523520, 'steps': 40084, 'batch_loss/train': 0.6860901620239019} +12/28/2021 05:51:22 - INFO - codeparrot_training - Step 40085: {'lr': 4.728349038924806e-05, 'samples': 20524032, 'steps': 40085, 'batch_loss/train': 0.7516537010669708} +12/28/2021 05:51:32 - INFO - codeparrot_training - Step 40086: {'lr': 4.72742657046854e-05, 'samples': 20524544, 'steps': 40086, 'batch_loss/train': 0.8911277304869145} +12/28/2021 05:51:44 - INFO - codeparrot_training - Step 40087: {'lr': 4.726504182608368e-05, 'samples': 20525056, 'steps': 40087, 'batch_loss/train': 0.781242098659277} +12/28/2021 05:51:55 - INFO - codeparrot_training - Step 40088: {'lr': 4.725581875347962e-05, 'samples': 20525568, 'steps': 40088, 'batch_loss/train': 0.7306082248687744} +12/28/2021 05:52:06 - INFO - codeparrot_training - Step 40089: {'lr': 4.7246596486909875e-05, 'samples': 20526080, 'steps': 40089, 'batch_loss/train': 0.7065264428965747} +12/28/2021 05:52:16 - INFO - codeparrot_training - Step 40090: {'lr': 4.7237375026411125e-05, 'samples': 20526592, 'steps': 40090, 'batch_loss/train': 0.8349707731977105} +12/28/2021 05:52:28 - INFO - codeparrot_training - Step 40091: {'lr': 4.7228154372019964e-05, 'samples': 20527104, 'steps': 40091, 'batch_loss/train': 0.7017207755707204} +12/28/2021 05:52:39 - INFO - codeparrot_training - Step 40092: {'lr': 4.7218934523773125e-05, 'samples': 20527616, 'steps': 40092, 'batch_loss/train': 0.704219285864383} +12/28/2021 05:52:50 - INFO - codeparrot_training - Step 40093: {'lr': 4.72097154817073e-05, 'samples': 20528128, 'steps': 40093, 'batch_loss/train': 0.7250997233204544} +12/28/2021 05:53:02 - INFO - codeparrot_training - Step 40094: {'lr': 4.720049724585895e-05, 'samples': 20528640, 'steps': 40094, 'batch_loss/train': 0.7830549101345241} +12/28/2021 05:53:13 - INFO - codeparrot_training - Step 40095: {'lr': 4.719127981626492e-05, 'samples': 20529152, 'steps': 40095, 'batch_loss/train': 0.814109405502677} +12/28/2021 05:53:24 - INFO - codeparrot_training - Step 40096: {'lr': 4.718206319296184e-05, 'samples': 20529664, 'steps': 40096, 'batch_loss/train': 0.7858811332844198} +12/28/2021 05:53:36 - INFO - codeparrot_training - Step 40097: {'lr': 4.717284737598621e-05, 'samples': 20530176, 'steps': 40097, 'batch_loss/train': 0.7556718182750046} +12/28/2021 05:53:46 - INFO - codeparrot_training - Step 40098: {'lr': 4.7163632365374714e-05, 'samples': 20530688, 'steps': 40098, 'batch_loss/train': 0.6868764595128596} +12/28/2021 05:53:57 - INFO - codeparrot_training - Step 40099: {'lr': 4.7154418161164145e-05, 'samples': 20531200, 'steps': 40099, 'batch_loss/train': 0.7651700675487518} +12/28/2021 05:54:08 - INFO - codeparrot_training - Step 40100: {'lr': 4.714520476339096e-05, 'samples': 20531712, 'steps': 40100, 'batch_loss/train': 0.7147667082026601} +12/28/2021 05:54:21 - INFO - codeparrot_training - Step 40101: {'lr': 4.7135992172091855e-05, 'samples': 20532224, 'steps': 40101, 'batch_loss/train': 0.6884416316170245} +12/28/2021 05:54:31 - INFO - codeparrot_training - Step 40102: {'lr': 4.712678038730342e-05, 'samples': 20532736, 'steps': 40102, 'batch_loss/train': 0.805068395100534} +12/28/2021 05:54:42 - INFO - codeparrot_training - Step 40103: {'lr': 4.711756940906234e-05, 'samples': 20533248, 'steps': 40103, 'batch_loss/train': 0.8814319018274546} +12/28/2021 05:54:54 - INFO - codeparrot_training - Step 40104: {'lr': 4.710835923740517e-05, 'samples': 20533760, 'steps': 40104, 'batch_loss/train': 0.7640890548937023} +12/28/2021 05:55:05 - INFO - codeparrot_training - Step 40105: {'lr': 4.709914987236855e-05, 'samples': 20534272, 'steps': 40105, 'batch_loss/train': 0.7451956603908911} +12/28/2021 05:55:15 - INFO - codeparrot_training - Step 40106: {'lr': 4.7089941313989106e-05, 'samples': 20534784, 'steps': 40106, 'batch_loss/train': 0.7695491751655936} +12/28/2021 05:55:27 - INFO - codeparrot_training - Step 40107: {'lr': 4.708073356230347e-05, 'samples': 20535296, 'steps': 40107, 'batch_loss/train': 0.6959663246525452} +12/28/2021 05:55:38 - INFO - codeparrot_training - Step 40108: {'lr': 4.707152661734826e-05, 'samples': 20535808, 'steps': 40108, 'batch_loss/train': 0.6380113493651152} +12/28/2021 05:55:49 - INFO - codeparrot_training - Step 40109: {'lr': 4.70623204791599e-05, 'samples': 20536320, 'steps': 40109, 'batch_loss/train': 0.8995381304994226} +12/28/2021 05:55:59 - INFO - codeparrot_training - Step 40110: {'lr': 4.7053115147775245e-05, 'samples': 20536832, 'steps': 40110, 'batch_loss/train': 0.7362576564773917} +12/28/2021 05:56:11 - INFO - codeparrot_training - Step 40111: {'lr': 4.704391062323074e-05, 'samples': 20537344, 'steps': 40111, 'batch_loss/train': 0.7188084074296057} +12/28/2021 05:56:22 - INFO - codeparrot_training - Step 40112: {'lr': 4.7034706905562994e-05, 'samples': 20537856, 'steps': 40112, 'batch_loss/train': 0.691858347505331} +12/28/2021 05:56:33 - INFO - codeparrot_training - Step 40113: {'lr': 4.702550399480862e-05, 'samples': 20538368, 'steps': 40113, 'batch_loss/train': 0.9134210618212819} +12/28/2021 05:56:46 - INFO - codeparrot_training - Step 40114: {'lr': 4.70163018910042e-05, 'samples': 20538880, 'steps': 40114, 'batch_loss/train': 0.7380348790320568} +12/28/2021 05:56:56 - INFO - codeparrot_training - Step 40115: {'lr': 4.7007100594186334e-05, 'samples': 20539392, 'steps': 40115, 'batch_loss/train': 0.7254260383779183} +12/28/2021 05:57:07 - INFO - codeparrot_training - Step 40116: {'lr': 4.6997900104391586e-05, 'samples': 20539904, 'steps': 40116, 'batch_loss/train': 0.6438292134553194} +12/28/2021 05:57:19 - INFO - codeparrot_training - Step 40117: {'lr': 4.698870042165651e-05, 'samples': 20540416, 'steps': 40117, 'batch_loss/train': 0.7268433021381497} +12/28/2021 05:57:30 - INFO - codeparrot_training - Step 40118: {'lr': 4.697950154601774e-05, 'samples': 20540928, 'steps': 40118, 'batch_loss/train': 0.792010001372546} +12/28/2021 05:57:40 - INFO - codeparrot_training - Step 40119: {'lr': 4.697030347751186e-05, 'samples': 20541440, 'steps': 40119, 'batch_loss/train': 0.7561102127656341} +12/28/2021 05:57:51 - INFO - codeparrot_training - Step 40120: {'lr': 4.696110621617527e-05, 'samples': 20541952, 'steps': 40120, 'batch_loss/train': 0.7290724692866206} +12/28/2021 05:58:03 - INFO - codeparrot_training - Step 40121: {'lr': 4.695190976204472e-05, 'samples': 20542464, 'steps': 40121, 'batch_loss/train': 0.8543835822492838} +12/28/2021 05:58:14 - INFO - codeparrot_training - Step 40122: {'lr': 4.694271411515677e-05, 'samples': 20542976, 'steps': 40122, 'batch_loss/train': 0.6849077336955816} +12/28/2021 05:58:25 - INFO - codeparrot_training - Step 40123: {'lr': 4.693351927554787e-05, 'samples': 20543488, 'steps': 40123, 'batch_loss/train': 0.593883607420139} +12/28/2021 05:58:37 - INFO - codeparrot_training - Step 40124: {'lr': 4.692432524325454e-05, 'samples': 20544000, 'steps': 40124, 'batch_loss/train': 0.7160068085649982} +12/28/2021 05:58:48 - INFO - codeparrot_training - Step 40125: {'lr': 4.6915132018313545e-05, 'samples': 20544512, 'steps': 40125, 'batch_loss/train': 0.7517594691598788} +12/28/2021 05:58:59 - INFO - codeparrot_training - Step 40126: {'lr': 4.6905939600761256e-05, 'samples': 20545024, 'steps': 40126, 'batch_loss/train': 0.7075189519673586} +12/28/2021 05:59:11 - INFO - codeparrot_training - Step 40127: {'lr': 4.6896747990634234e-05, 'samples': 20545536, 'steps': 40127, 'batch_loss/train': 0.8129744082689285} +12/28/2021 05:59:21 - INFO - codeparrot_training - Step 40128: {'lr': 4.688755718796908e-05, 'samples': 20546048, 'steps': 40128, 'batch_loss/train': 0.7152688959613442} +12/28/2021 05:59:32 - INFO - codeparrot_training - Step 40129: {'lr': 4.687836719280231e-05, 'samples': 20546560, 'steps': 40129, 'batch_loss/train': 0.7336066528223455} +12/28/2021 05:59:43 - INFO - codeparrot_training - Step 40130: {'lr': 4.686917800517043e-05, 'samples': 20547072, 'steps': 40130, 'batch_loss/train': 0.7710396586917341} +12/28/2021 05:59:56 - INFO - codeparrot_training - Step 40131: {'lr': 4.685998962511001e-05, 'samples': 20547584, 'steps': 40131, 'batch_loss/train': 1.2028237571939826} +12/28/2021 06:00:07 - INFO - codeparrot_training - Step 40132: {'lr': 4.685080205265757e-05, 'samples': 20548096, 'steps': 40132, 'batch_loss/train': 0.6674700700677931} +12/28/2021 06:00:17 - INFO - codeparrot_training - Step 40133: {'lr': 4.6841615287849636e-05, 'samples': 20548608, 'steps': 40133, 'batch_loss/train': 0.7251418391242623} +12/28/2021 06:00:29 - INFO - codeparrot_training - Step 40134: {'lr': 4.683242933072279e-05, 'samples': 20549120, 'steps': 40134, 'batch_loss/train': 0.7454051394015551} +12/28/2021 06:00:40 - INFO - codeparrot_training - Step 40135: {'lr': 4.682324418131337e-05, 'samples': 20549632, 'steps': 40135, 'batch_loss/train': 0.7820845819078386} +12/28/2021 06:00:51 - INFO - codeparrot_training - Step 40136: {'lr': 4.6814059839658086e-05, 'samples': 20550144, 'steps': 40136, 'batch_loss/train': 0.7339739399030805} +12/28/2021 06:01:01 - INFO - codeparrot_training - Step 40137: {'lr': 4.680487630579344e-05, 'samples': 20550656, 'steps': 40137, 'batch_loss/train': 0.8311495203524828} +12/28/2021 06:01:13 - INFO - codeparrot_training - Step 40138: {'lr': 4.67956935797558e-05, 'samples': 20551168, 'steps': 40138, 'batch_loss/train': 0.7540168245323002} +12/28/2021 06:01:24 - INFO - codeparrot_training - Step 40139: {'lr': 4.6786511661581765e-05, 'samples': 20551680, 'steps': 40139, 'batch_loss/train': 0.797098396345973} +12/28/2021 06:01:35 - INFO - codeparrot_training - Step 40140: {'lr': 4.677733055130784e-05, 'samples': 20552192, 'steps': 40140, 'batch_loss/train': 0.7272909586317837} +12/28/2021 06:01:48 - INFO - codeparrot_training - Step 40141: {'lr': 4.676815024897052e-05, 'samples': 20552704, 'steps': 40141, 'batch_loss/train': 0.7645660217385739} +12/28/2021 06:01:58 - INFO - codeparrot_training - Step 40142: {'lr': 4.675897075460628e-05, 'samples': 20553216, 'steps': 40142, 'batch_loss/train': 1.5937109971418977} +12/28/2021 06:02:09 - INFO - codeparrot_training - Step 40143: {'lr': 4.674979206825164e-05, 'samples': 20553728, 'steps': 40143, 'batch_loss/train': 0.8191686095669866} +12/28/2021 06:02:21 - INFO - codeparrot_training - Step 40144: {'lr': 4.6740614189943075e-05, 'samples': 20554240, 'steps': 40144, 'batch_loss/train': 0.7516524894163013} +12/28/2021 06:02:32 - INFO - codeparrot_training - Step 40145: {'lr': 4.673143711971714e-05, 'samples': 20554752, 'steps': 40145, 'batch_loss/train': 0.9289237782359123} +12/28/2021 06:02:42 - INFO - codeparrot_training - Step 40146: {'lr': 4.672226085761014e-05, 'samples': 20555264, 'steps': 40146, 'batch_loss/train': 0.7269920455291867} +12/28/2021 06:02:53 - INFO - codeparrot_training - Step 40147: {'lr': 4.671308540365873e-05, 'samples': 20555776, 'steps': 40147, 'batch_loss/train': 0.7842437736690044} +12/28/2021 06:03:06 - INFO - codeparrot_training - Step 40148: {'lr': 4.6703910757899425e-05, 'samples': 20556288, 'steps': 40148, 'batch_loss/train': 0.7669282825663686} +12/28/2021 06:03:16 - INFO - codeparrot_training - Step 40149: {'lr': 4.669473692036852e-05, 'samples': 20556800, 'steps': 40149, 'batch_loss/train': 0.7780309577938169} +12/28/2021 06:03:27 - INFO - codeparrot_training - Step 40150: {'lr': 4.668556389110251e-05, 'samples': 20557312, 'steps': 40150, 'batch_loss/train': 0.623958622221835} +12/28/2021 06:03:39 - INFO - codeparrot_training - Step 40151: {'lr': 4.6676391670138066e-05, 'samples': 20557824, 'steps': 40151, 'batch_loss/train': 0.7381182019598782} +12/28/2021 06:03:50 - INFO - codeparrot_training - Step 40152: {'lr': 4.666722025751147e-05, 'samples': 20558336, 'steps': 40152, 'batch_loss/train': 0.6137489060638472} +12/28/2021 06:04:00 - INFO - codeparrot_training - Step 40153: {'lr': 4.6658049653259206e-05, 'samples': 20558848, 'steps': 40153, 'batch_loss/train': 0.8699547164142132} +12/28/2021 06:04:13 - INFO - codeparrot_training - Step 40154: {'lr': 4.6648879857417784e-05, 'samples': 20559360, 'steps': 40154, 'batch_loss/train': 0.6783642712980509} +12/28/2021 06:04:23 - INFO - codeparrot_training - Step 40155: {'lr': 4.663971087002361e-05, 'samples': 20559872, 'steps': 40155, 'batch_loss/train': 0.7100474620237947} +12/28/2021 06:04:34 - INFO - codeparrot_training - Step 40156: {'lr': 4.6630542691113164e-05, 'samples': 20560384, 'steps': 40156, 'batch_loss/train': 0.6898130043409765} +12/28/2021 06:04:45 - INFO - codeparrot_training - Step 40157: {'lr': 4.6621375320722885e-05, 'samples': 20560896, 'steps': 40157, 'batch_loss/train': 0.9339557671919465} +12/28/2021 06:04:57 - INFO - codeparrot_training - Step 40158: {'lr': 4.6612208758889244e-05, 'samples': 20561408, 'steps': 40158, 'batch_loss/train': 0.5800666536670178} +12/28/2021 06:05:07 - INFO - codeparrot_training - Step 40159: {'lr': 4.660304300564863e-05, 'samples': 20561920, 'steps': 40159, 'batch_loss/train': 0.7479292238131166} +12/28/2021 06:05:18 - INFO - codeparrot_training - Step 40160: {'lr': 4.659387806103763e-05, 'samples': 20562432, 'steps': 40160, 'batch_loss/train': 0.7104186154901981} +12/28/2021 06:05:31 - INFO - codeparrot_training - Step 40161: {'lr': 4.658471392509239e-05, 'samples': 20562944, 'steps': 40161, 'batch_loss/train': 0.674330823123455} +12/28/2021 06:05:41 - INFO - codeparrot_training - Step 40162: {'lr': 4.657555059784963e-05, 'samples': 20563456, 'steps': 40162, 'batch_loss/train': 0.6603686662856489} +12/28/2021 06:05:52 - INFO - codeparrot_training - Step 40163: {'lr': 4.656638807934571e-05, 'samples': 20563968, 'steps': 40163, 'batch_loss/train': 0.7181301829405129} +12/28/2021 06:06:04 - INFO - codeparrot_training - Step 40164: {'lr': 4.6557226369616955e-05, 'samples': 20564480, 'steps': 40164, 'batch_loss/train': 0.7908923365175724} +12/28/2021 06:06:15 - INFO - codeparrot_training - Step 40165: {'lr': 4.654806546869986e-05, 'samples': 20564992, 'steps': 40165, 'batch_loss/train': 0.6979473950341344} +12/28/2021 06:06:26 - INFO - codeparrot_training - Step 40166: {'lr': 4.653890537663083e-05, 'samples': 20565504, 'steps': 40166, 'batch_loss/train': 0.6934539144858718} +12/28/2021 06:06:38 - INFO - codeparrot_training - Step 40167: {'lr': 4.652974609344629e-05, 'samples': 20566016, 'steps': 40167, 'batch_loss/train': 0.5567430946975946} +12/28/2021 06:06:49 - INFO - codeparrot_training - Step 40168: {'lr': 4.6520587619182624e-05, 'samples': 20566528, 'steps': 40168, 'batch_loss/train': 0.7653859951533377} +12/28/2021 06:06:59 - INFO - codeparrot_training - Step 40169: {'lr': 4.6511429953876303e-05, 'samples': 20567040, 'steps': 40169, 'batch_loss/train': 0.7245139258448035} +12/28/2021 06:07:10 - INFO - codeparrot_training - Step 40170: {'lr': 4.650227309756369e-05, 'samples': 20567552, 'steps': 40170, 'batch_loss/train': 0.7280717361718416} +12/28/2021 06:07:22 - INFO - codeparrot_training - Step 40171: {'lr': 4.649311705028123e-05, 'samples': 20568064, 'steps': 40171, 'batch_loss/train': 0.8020648611709476} +12/28/2021 06:07:33 - INFO - codeparrot_training - Step 40172: {'lr': 4.648396181206527e-05, 'samples': 20568576, 'steps': 40172, 'batch_loss/train': 0.8018746161833405} +12/28/2021 06:07:44 - INFO - codeparrot_training - Step 40173: {'lr': 4.647480738295223e-05, 'samples': 20569088, 'steps': 40173, 'batch_loss/train': 0.8006339520215988} +12/28/2021 06:07:56 - INFO - codeparrot_training - Step 40174: {'lr': 4.6465653762978564e-05, 'samples': 20569600, 'steps': 40174, 'batch_loss/train': 0.6263004555366933} +12/28/2021 06:08:07 - INFO - codeparrot_training - Step 40175: {'lr': 4.6456500952180566e-05, 'samples': 20570112, 'steps': 40175, 'batch_loss/train': 0.6879673732910305} +12/28/2021 06:08:17 - INFO - codeparrot_training - Step 40176: {'lr': 4.64473489505946e-05, 'samples': 20570624, 'steps': 40176, 'batch_loss/train': 0.7384785963222384} +12/28/2021 06:08:29 - INFO - codeparrot_training - Step 40177: {'lr': 4.6438197758257224e-05, 'samples': 20571136, 'steps': 40177, 'batch_loss/train': 0.5585901099257171} +12/28/2021 06:08:40 - INFO - codeparrot_training - Step 40178: {'lr': 4.6429047375204676e-05, 'samples': 20571648, 'steps': 40178, 'batch_loss/train': 0.7729799980297685} +12/28/2021 06:08:51 - INFO - codeparrot_training - Step 40179: {'lr': 4.6419897801473265e-05, 'samples': 20572160, 'steps': 40179, 'batch_loss/train': 0.8727727336809039} +12/28/2021 06:09:01 - INFO - codeparrot_training - Step 40180: {'lr': 4.641074903709963e-05, 'samples': 20572672, 'steps': 40180, 'batch_loss/train': 0.7291929626371711} +12/28/2021 06:09:14 - INFO - codeparrot_training - Step 40181: {'lr': 4.640160108211991e-05, 'samples': 20573184, 'steps': 40181, 'batch_loss/train': 0.7603638991713524} +12/28/2021 06:09:25 - INFO - codeparrot_training - Step 40182: {'lr': 4.639245393657057e-05, 'samples': 20573696, 'steps': 40182, 'batch_loss/train': 0.7337395520880818} +12/28/2021 06:09:35 - INFO - codeparrot_training - Step 40183: {'lr': 4.638330760048792e-05, 'samples': 20574208, 'steps': 40183, 'batch_loss/train': 0.7593145728460513} +12/28/2021 06:09:47 - INFO - codeparrot_training - Step 40184: {'lr': 4.637416207390838e-05, 'samples': 20574720, 'steps': 40184, 'batch_loss/train': 0.7211215626448393} +12/28/2021 06:09:58 - INFO - codeparrot_training - Step 40185: {'lr': 4.63650173568683e-05, 'samples': 20575232, 'steps': 40185, 'batch_loss/train': 0.9008082249201834} +12/28/2021 06:10:08 - INFO - codeparrot_training - Step 40186: {'lr': 4.635587344940404e-05, 'samples': 20575744, 'steps': 40186, 'batch_loss/train': 0.854013180360198} +12/28/2021 06:10:21 - INFO - codeparrot_training - Step 40187: {'lr': 4.6346730351551846e-05, 'samples': 20576256, 'steps': 40187, 'batch_loss/train': 0.7564545031636953} +12/28/2021 06:10:31 - INFO - codeparrot_training - Step 40188: {'lr': 4.63375880633482e-05, 'samples': 20576768, 'steps': 40188, 'batch_loss/train': 0.7453168296488002} +12/28/2021 06:10:42 - INFO - codeparrot_training - Step 40189: {'lr': 4.632844658482946e-05, 'samples': 20577280, 'steps': 40189, 'batch_loss/train': 0.7270262502133846} +12/28/2021 06:10:52 - INFO - codeparrot_training - Step 40190: {'lr': 4.631930591603187e-05, 'samples': 20577792, 'steps': 40190, 'batch_loss/train': 0.7835988942533731} +12/28/2021 06:11:05 - INFO - codeparrot_training - Step 40191: {'lr': 4.631016605699179e-05, 'samples': 20578304, 'steps': 40191, 'batch_loss/train': 0.7808220256119967} +12/28/2021 06:11:16 - INFO - codeparrot_training - Step 40192: {'lr': 4.6301027007745605e-05, 'samples': 20578816, 'steps': 40192, 'batch_loss/train': 0.7259357664734125} +12/28/2021 06:11:26 - INFO - codeparrot_training - Step 40193: {'lr': 4.62918887683296e-05, 'samples': 20579328, 'steps': 40193, 'batch_loss/train': 0.8264507446438074} +12/28/2021 06:11:39 - INFO - codeparrot_training - Step 40194: {'lr': 4.6282751338780154e-05, 'samples': 20579840, 'steps': 40194, 'batch_loss/train': 0.7525029049720615} +12/28/2021 06:11:49 - INFO - codeparrot_training - Step 40195: {'lr': 4.627361471913355e-05, 'samples': 20580352, 'steps': 40195, 'batch_loss/train': 0.7075708326883614} +12/28/2021 06:12:00 - INFO - codeparrot_training - Step 40196: {'lr': 4.6264478909426116e-05, 'samples': 20580864, 'steps': 40196, 'batch_loss/train': 0.7560378313064575} +12/28/2021 06:12:12 - INFO - codeparrot_training - Step 40197: {'lr': 4.625534390969422e-05, 'samples': 20581376, 'steps': 40197, 'batch_loss/train': 0.7362106170621701} +12/28/2021 06:12:23 - INFO - codeparrot_training - Step 40198: {'lr': 4.624620971997412e-05, 'samples': 20581888, 'steps': 40198, 'batch_loss/train': 0.8208184936083853} +12/28/2021 06:12:33 - INFO - codeparrot_training - Step 40199: {'lr': 4.623707634030217e-05, 'samples': 20582400, 'steps': 40199, 'batch_loss/train': 0.6917662369087338} +12/28/2021 06:12:46 - INFO - codeparrot_training - Step 40200: {'lr': 4.622794377071474e-05, 'samples': 20582912, 'steps': 40200, 'batch_loss/train': 0.7291523981839418} +12/28/2021 06:12:57 - INFO - codeparrot_training - Step 40201: {'lr': 4.621881201124797e-05, 'samples': 20583424, 'steps': 40201, 'batch_loss/train': 0.7691547808935866} +12/28/2021 06:13:07 - INFO - codeparrot_training - Step 40202: {'lr': 4.620968106193821e-05, 'samples': 20583936, 'steps': 40202, 'batch_loss/train': 0.8545965468510985} +12/28/2021 06:13:18 - INFO - codeparrot_training - Step 40203: {'lr': 4.620055092282194e-05, 'samples': 20584448, 'steps': 40203, 'batch_loss/train': 0.8856263877823949} +12/28/2021 06:13:30 - INFO - codeparrot_training - Step 40204: {'lr': 4.6191421593935244e-05, 'samples': 20584960, 'steps': 40204, 'batch_loss/train': 0.6945271869190037} +12/28/2021 06:13:41 - INFO - codeparrot_training - Step 40205: {'lr': 4.6182293075314464e-05, 'samples': 20585472, 'steps': 40205, 'batch_loss/train': 0.794737797928974} +12/28/2021 06:13:52 - INFO - codeparrot_training - Step 40206: {'lr': 4.6173165366996064e-05, 'samples': 20585984, 'steps': 40206, 'batch_loss/train': 0.7196602271869779} +12/28/2021 06:14:05 - INFO - codeparrot_training - Step 40207: {'lr': 4.616403846901609e-05, 'samples': 20586496, 'steps': 40207, 'batch_loss/train': 0.5729159782058559} +12/28/2021 06:14:15 - INFO - codeparrot_training - Step 40208: {'lr': 4.615491238141098e-05, 'samples': 20587008, 'steps': 40208, 'batch_loss/train': 0.7088658227585256} +12/28/2021 06:14:26 - INFO - codeparrot_training - Step 40209: {'lr': 4.6145787104216934e-05, 'samples': 20587520, 'steps': 40209, 'batch_loss/train': 0.6995396646670997} +12/28/2021 06:14:38 - INFO - codeparrot_training - Step 40210: {'lr': 4.613666263747029e-05, 'samples': 20588032, 'steps': 40210, 'batch_loss/train': 1.024068814702332} +12/28/2021 06:14:49 - INFO - codeparrot_training - Step 40211: {'lr': 4.612753898120728e-05, 'samples': 20588544, 'steps': 40211, 'batch_loss/train': 0.6899694236926734} +12/28/2021 06:15:00 - INFO - codeparrot_training - Step 40212: {'lr': 4.611841613546428e-05, 'samples': 20589056, 'steps': 40212, 'batch_loss/train': 0.7376313647255301} +12/28/2021 06:15:10 - INFO - codeparrot_training - Step 40213: {'lr': 4.6109294100277337e-05, 'samples': 20589568, 'steps': 40213, 'batch_loss/train': 0.7735328169655986} +12/28/2021 06:15:22 - INFO - codeparrot_training - Step 40214: {'lr': 4.610017287568291e-05, 'samples': 20590080, 'steps': 40214, 'batch_loss/train': 0.7333654477261007} +12/28/2021 06:15:33 - INFO - codeparrot_training - Step 40215: {'lr': 4.609105246171727e-05, 'samples': 20590592, 'steps': 40215, 'batch_loss/train': 0.6854652261827141} +12/28/2021 06:15:44 - INFO - codeparrot_training - Step 40216: {'lr': 4.60819328584165e-05, 'samples': 20591104, 'steps': 40216, 'batch_loss/train': 0.7847662142012268} +12/28/2021 06:15:57 - INFO - codeparrot_training - Step 40217: {'lr': 4.607281406581701e-05, 'samples': 20591616, 'steps': 40217, 'batch_loss/train': 0.727213722653687} +12/28/2021 06:16:07 - INFO - codeparrot_training - Step 40218: {'lr': 4.606369608395511e-05, 'samples': 20592128, 'steps': 40218, 'batch_loss/train': 0.748876495170407} +12/28/2021 06:16:18 - INFO - codeparrot_training - Step 40219: {'lr': 4.605457891286685e-05, 'samples': 20592640, 'steps': 40219, 'batch_loss/train': 0.7545107076875865} +12/28/2021 06:16:30 - INFO - codeparrot_training - Step 40220: {'lr': 4.6045462552588595e-05, 'samples': 20593152, 'steps': 40220, 'batch_loss/train': 0.6624656743369997} +12/28/2021 06:16:41 - INFO - codeparrot_training - Step 40221: {'lr': 4.603634700315657e-05, 'samples': 20593664, 'steps': 40221, 'batch_loss/train': 0.7996667558327317} +12/28/2021 06:16:51 - INFO - codeparrot_training - Step 40222: {'lr': 4.6027232264607034e-05, 'samples': 20594176, 'steps': 40222, 'batch_loss/train': 0.7159662208869122} +12/28/2021 06:17:02 - INFO - codeparrot_training - Step 40223: {'lr': 4.601811833697617e-05, 'samples': 20594688, 'steps': 40223, 'batch_loss/train': 0.8244710927829146} +12/28/2021 06:17:14 - INFO - codeparrot_training - Step 40224: {'lr': 4.600900522030027e-05, 'samples': 20595200, 'steps': 40224, 'batch_loss/train': 0.6315179578959942} +12/28/2021 06:17:25 - INFO - codeparrot_training - Step 40225: {'lr': 4.5999892914615536e-05, 'samples': 20595712, 'steps': 40225, 'batch_loss/train': 0.708378707524389} +12/28/2021 06:17:35 - INFO - codeparrot_training - Step 40226: {'lr': 4.599078141995819e-05, 'samples': 20596224, 'steps': 40226, 'batch_loss/train': 0.676982197444886} +12/28/2021 06:17:47 - INFO - codeparrot_training - Step 40227: {'lr': 4.5981670736364554e-05, 'samples': 20596736, 'steps': 40227, 'batch_loss/train': 0.7785528632812202} +12/28/2021 06:17:58 - INFO - codeparrot_training - Step 40228: {'lr': 4.597256086387061e-05, 'samples': 20597248, 'steps': 40228, 'batch_loss/train': 0.8378266557119787} +12/28/2021 06:18:09 - INFO - codeparrot_training - Step 40229: {'lr': 4.596345180251285e-05, 'samples': 20597760, 'steps': 40229, 'batch_loss/train': 0.715711553581059} +12/28/2021 06:18:22 - INFO - codeparrot_training - Step 40230: {'lr': 4.5954343552327334e-05, 'samples': 20598272, 'steps': 40230, 'batch_loss/train': 0.7938433862291276} +12/28/2021 06:18:32 - INFO - codeparrot_training - Step 40231: {'lr': 4.594523611335022e-05, 'samples': 20598784, 'steps': 40231, 'batch_loss/train': 0.6824783040210605} +12/28/2021 06:18:43 - INFO - codeparrot_training - Step 40232: {'lr': 4.593612948561793e-05, 'samples': 20599296, 'steps': 40232, 'batch_loss/train': 0.6440543931676075} +12/28/2021 06:18:54 - INFO - codeparrot_training - Step 40233: {'lr': 4.5927023669166456e-05, 'samples': 20599808, 'steps': 40233, 'batch_loss/train': 0.7693226784467697} +12/28/2021 06:19:06 - INFO - codeparrot_training - Step 40234: {'lr': 4.591791866403208e-05, 'samples': 20600320, 'steps': 40234, 'batch_loss/train': 0.9088359652087092} +12/28/2021 06:19:17 - INFO - codeparrot_training - Step 40235: {'lr': 4.5908814470251023e-05, 'samples': 20600832, 'steps': 40235, 'batch_loss/train': 0.7398194647394121} +12/28/2021 06:19:27 - INFO - codeparrot_training - Step 40236: {'lr': 4.5899711087859455e-05, 'samples': 20601344, 'steps': 40236, 'batch_loss/train': 0.6731044850312173} +12/28/2021 06:19:39 - INFO - codeparrot_training - Step 40237: {'lr': 4.5890608516893554e-05, 'samples': 20601856, 'steps': 40237, 'batch_loss/train': 0.7905208840966225} +12/28/2021 06:19:50 - INFO - codeparrot_training - Step 40238: {'lr': 4.5881506757389615e-05, 'samples': 20602368, 'steps': 40238, 'batch_loss/train': 0.7808189447969198} +12/28/2021 06:20:01 - INFO - codeparrot_training - Step 40239: {'lr': 4.587240580938359e-05, 'samples': 20602880, 'steps': 40239, 'batch_loss/train': 0.5924371783621609} +12/28/2021 06:20:13 - INFO - codeparrot_training - Step 40240: {'lr': 4.586330567291186e-05, 'samples': 20603392, 'steps': 40240, 'batch_loss/train': 0.6454531333874911} +12/28/2021 06:20:24 - INFO - codeparrot_training - Step 40241: {'lr': 4.585420634801063e-05, 'samples': 20603904, 'steps': 40241, 'batch_loss/train': 0.9963292954489589} +12/28/2021 06:20:35 - INFO - codeparrot_training - Step 40242: {'lr': 4.5845107834715846e-05, 'samples': 20604416, 'steps': 40242, 'batch_loss/train': 0.7926731873303652} +12/28/2021 06:20:45 - INFO - codeparrot_training - Step 40243: {'lr': 4.58360101330639e-05, 'samples': 20604928, 'steps': 40243, 'batch_loss/train': 0.7621364649385214} +12/28/2021 06:20:57 - INFO - codeparrot_training - Step 40244: {'lr': 4.5826913243090964e-05, 'samples': 20605440, 'steps': 40244, 'batch_loss/train': 0.7518322910182178} +12/28/2021 06:21:08 - INFO - codeparrot_training - Step 40245: {'lr': 4.5817817164833045e-05, 'samples': 20605952, 'steps': 40245, 'batch_loss/train': 0.7710496479412541} +12/28/2021 06:21:19 - INFO - codeparrot_training - Step 40246: {'lr': 4.580872189832638e-05, 'samples': 20606464, 'steps': 40246, 'batch_loss/train': 0.743881743401289} +12/28/2021 06:21:32 - INFO - codeparrot_training - Step 40247: {'lr': 4.579962744360716e-05, 'samples': 20606976, 'steps': 40247, 'batch_loss/train': 0.8423977182246745} +12/28/2021 06:21:43 - INFO - codeparrot_training - Step 40248: {'lr': 4.579053380071152e-05, 'samples': 20607488, 'steps': 40248, 'batch_loss/train': 0.7508388366550207} +12/28/2021 06:21:53 - INFO - codeparrot_training - Step 40249: {'lr': 4.57814409696756e-05, 'samples': 20608000, 'steps': 40249, 'batch_loss/train': 0.4346784856170416} +12/28/2021 06:22:04 - INFO - codeparrot_training - Step 40250: {'lr': 4.5772348950535556e-05, 'samples': 20608512, 'steps': 40250, 'batch_loss/train': 1.7507757423445582} +12/28/2021 06:22:16 - INFO - codeparrot_training - Step 40251: {'lr': 4.576325774332757e-05, 'samples': 20609024, 'steps': 40251, 'batch_loss/train': 0.7039974995423108} +12/28/2021 06:22:27 - INFO - codeparrot_training - Step 40252: {'lr': 4.575416734808771e-05, 'samples': 20609536, 'steps': 40252, 'batch_loss/train': 0.7581706494092941} +12/28/2021 06:22:37 - INFO - codeparrot_training - Step 40253: {'lr': 4.574507776485218e-05, 'samples': 20610048, 'steps': 40253, 'batch_loss/train': 0.6803680490702391} +12/28/2021 06:22:49 - INFO - codeparrot_training - Step 40254: {'lr': 4.5735988993657106e-05, 'samples': 20610560, 'steps': 40254, 'batch_loss/train': 0.7942255488596857} +12/28/2021 06:23:00 - INFO - codeparrot_training - Step 40255: {'lr': 4.572690103453866e-05, 'samples': 20611072, 'steps': 40255, 'batch_loss/train': 0.6663732214365155} +12/28/2021 06:23:11 - INFO - codeparrot_training - Step 40256: {'lr': 4.571781388753288e-05, 'samples': 20611584, 'steps': 40256, 'batch_loss/train': 0.5522491771262139} +12/28/2021 06:23:24 - INFO - codeparrot_training - Step 40257: {'lr': 4.570872755267586e-05, 'samples': 20612096, 'steps': 40257, 'batch_loss/train': 0.863415963947773} +12/28/2021 06:23:34 - INFO - codeparrot_training - Step 40258: {'lr': 4.5699642030003927e-05, 'samples': 20612608, 'steps': 40258, 'batch_loss/train': 0.6366765582934022} +12/28/2021 06:23:45 - INFO - codeparrot_training - Step 40259: {'lr': 4.569055731955305e-05, 'samples': 20613120, 'steps': 40259, 'batch_loss/train': 0.7557388048153371} +12/28/2021 06:23:55 - INFO - codeparrot_training - Step 40260: {'lr': 4.568147342135934e-05, 'samples': 20613632, 'steps': 40260, 'batch_loss/train': 0.6848362879827619} +12/28/2021 06:24:08 - INFO - codeparrot_training - Step 40261: {'lr': 4.567239033545895e-05, 'samples': 20614144, 'steps': 40261, 'batch_loss/train': 0.6847449662163854} +12/28/2021 06:24:18 - INFO - codeparrot_training - Step 40262: {'lr': 4.5663308061887956e-05, 'samples': 20614656, 'steps': 40262, 'batch_loss/train': 0.6524062184616923} +12/28/2021 06:24:29 - INFO - codeparrot_training - Step 40263: {'lr': 4.565422660068253e-05, 'samples': 20615168, 'steps': 40263, 'batch_loss/train': 0.6466153231449425} +12/28/2021 06:24:41 - INFO - codeparrot_training - Step 40264: {'lr': 4.56451459518788e-05, 'samples': 20615680, 'steps': 40264, 'batch_loss/train': 0.7250316496938467} +12/28/2021 06:24:52 - INFO - codeparrot_training - Step 40265: {'lr': 4.5636066115512657e-05, 'samples': 20616192, 'steps': 40265, 'batch_loss/train': 0.7624264745973051} +12/28/2021 06:25:02 - INFO - codeparrot_training - Step 40266: {'lr': 4.5626987091620443e-05, 'samples': 20616704, 'steps': 40266, 'batch_loss/train': 0.9628012962639332} +12/28/2021 06:25:15 - INFO - codeparrot_training - Step 40267: {'lr': 4.561790888023823e-05, 'samples': 20617216, 'steps': 40267, 'batch_loss/train': 0.7458269856870174} +12/28/2021 06:25:25 - INFO - codeparrot_training - Step 40268: {'lr': 4.560883148140188e-05, 'samples': 20617728, 'steps': 40268, 'batch_loss/train': 0.6666088649071753} +12/28/2021 06:25:36 - INFO - codeparrot_training - Step 40269: {'lr': 4.559975489514773e-05, 'samples': 20618240, 'steps': 40269, 'batch_loss/train': 0.8917230033548549} +12/28/2021 06:25:49 - INFO - codeparrot_training - Step 40270: {'lr': 4.559067912151182e-05, 'samples': 20618752, 'steps': 40270, 'batch_loss/train': 0.7990964390337467} +12/28/2021 06:25:59 - INFO - codeparrot_training - Step 40271: {'lr': 4.558160416053015e-05, 'samples': 20619264, 'steps': 40271, 'batch_loss/train': 0.6644421459641308} +12/28/2021 06:26:10 - INFO - codeparrot_training - Step 40272: {'lr': 4.557253001223874e-05, 'samples': 20619776, 'steps': 40272, 'batch_loss/train': 0.8393463417887688} +12/28/2021 06:26:21 - INFO - codeparrot_training - Step 40273: {'lr': 4.556345667667391e-05, 'samples': 20620288, 'steps': 40273, 'batch_loss/train': 0.7972837402485311} +12/28/2021 06:26:33 - INFO - codeparrot_training - Step 40274: {'lr': 4.5554384153871505e-05, 'samples': 20620800, 'steps': 40274, 'batch_loss/train': 0.7739929230883718} +12/28/2021 06:26:43 - INFO - codeparrot_training - Step 40275: {'lr': 4.55453124438677e-05, 'samples': 20621312, 'steps': 40275, 'batch_loss/train': 0.7953910250216722} +12/28/2021 06:26:54 - INFO - codeparrot_training - Step 40276: {'lr': 4.553624154669853e-05, 'samples': 20621824, 'steps': 40276, 'batch_loss/train': 0.7936930847354233} +12/28/2021 06:27:07 - INFO - codeparrot_training - Step 40277: {'lr': 4.5527171462400044e-05, 'samples': 20622336, 'steps': 40277, 'batch_loss/train': 0.7505458142259158} +12/28/2021 06:27:18 - INFO - codeparrot_training - Step 40278: {'lr': 4.551810219100833e-05, 'samples': 20622848, 'steps': 40278, 'batch_loss/train': 0.696972418227233} +12/28/2021 06:27:28 - INFO - codeparrot_training - Step 40279: {'lr': 4.550903373255941e-05, 'samples': 20623360, 'steps': 40279, 'batch_loss/train': 0.772690094076097} +12/28/2021 06:27:40 - INFO - codeparrot_training - Step 40280: {'lr': 4.549996608708937e-05, 'samples': 20623872, 'steps': 40280, 'batch_loss/train': 0.6245235684327781} +12/28/2021 06:27:51 - INFO - codeparrot_training - Step 40281: {'lr': 4.549089925463432e-05, 'samples': 20624384, 'steps': 40281, 'batch_loss/train': 0.6893124175257981} +12/28/2021 06:28:02 - INFO - codeparrot_training - Step 40282: {'lr': 4.548183323523017e-05, 'samples': 20624896, 'steps': 40282, 'batch_loss/train': 0.6867762189358473} +12/28/2021 06:28:12 - INFO - codeparrot_training - Step 40283: {'lr': 4.547276802891295e-05, 'samples': 20625408, 'steps': 40283, 'batch_loss/train': 0.7544021687936038} +12/28/2021 06:28:25 - INFO - codeparrot_training - Step 40284: {'lr': 4.546370363571892e-05, 'samples': 20625920, 'steps': 40284, 'batch_loss/train': 0.7631574403494596} +12/28/2021 06:28:35 - INFO - codeparrot_training - Step 40285: {'lr': 4.545464005568392e-05, 'samples': 20626432, 'steps': 40285, 'batch_loss/train': 0.6919530352461152} +12/28/2021 06:28:46 - INFO - codeparrot_training - Step 40286: {'lr': 4.544557728884402e-05, 'samples': 20626944, 'steps': 40286, 'batch_loss/train': 0.6514772013761103} +12/28/2021 06:28:59 - INFO - codeparrot_training - Step 40287: {'lr': 4.543651533523527e-05, 'samples': 20627456, 'steps': 40287, 'batch_loss/train': 0.785541869699955} +12/28/2021 06:29:09 - INFO - codeparrot_training - Step 40288: {'lr': 4.542745419489372e-05, 'samples': 20627968, 'steps': 40288, 'batch_loss/train': 0.7480772412382066} +12/28/2021 06:29:20 - INFO - codeparrot_training - Step 40289: {'lr': 4.541839386785535e-05, 'samples': 20628480, 'steps': 40289, 'batch_loss/train': 0.792255156673491} +12/28/2021 06:29:32 - INFO - codeparrot_training - Step 40290: {'lr': 4.540933435415617e-05, 'samples': 20628992, 'steps': 40290, 'batch_loss/train': 0.6860524406656623} +12/28/2021 06:29:43 - INFO - codeparrot_training - Step 40291: {'lr': 4.540027565383226e-05, 'samples': 20629504, 'steps': 40291, 'batch_loss/train': 0.680097867269069} +12/28/2021 06:29:54 - INFO - codeparrot_training - Step 40292: {'lr': 4.5391217766919595e-05, 'samples': 20630016, 'steps': 40292, 'batch_loss/train': 0.681931477971375} +12/28/2021 06:30:04 - INFO - codeparrot_training - Step 40293: {'lr': 4.538216069345427e-05, 'samples': 20630528, 'steps': 40293, 'batch_loss/train': 0.7966600758954883} +12/28/2021 06:30:17 - INFO - codeparrot_training - Step 40294: {'lr': 4.5373104433472064e-05, 'samples': 20631040, 'steps': 40294, 'batch_loss/train': 0.7557394681498408} +12/28/2021 06:30:28 - INFO - codeparrot_training - Step 40295: {'lr': 4.5364048987009187e-05, 'samples': 20631552, 'steps': 40295, 'batch_loss/train': 0.564502936322242} +12/28/2021 06:30:38 - INFO - codeparrot_training - Step 40296: {'lr': 4.5354994354101675e-05, 'samples': 20632064, 'steps': 40296, 'batch_loss/train': 0.7572728506056592} +12/28/2021 06:30:50 - INFO - codeparrot_training - Step 40297: {'lr': 4.5345940534785356e-05, 'samples': 20632576, 'steps': 40297, 'batch_loss/train': 0.7275639334693551} +12/28/2021 06:31:01 - INFO - codeparrot_training - Step 40298: {'lr': 4.533688752909623e-05, 'samples': 20633088, 'steps': 40298, 'batch_loss/train': 0.6040925492416136} +12/28/2021 06:31:12 - INFO - codeparrot_training - Step 40299: {'lr': 4.532783533707049e-05, 'samples': 20633600, 'steps': 40299, 'batch_loss/train': 0.6393568261992186} +12/28/2021 06:31:24 - INFO - codeparrot_training - Step 40300: {'lr': 4.5318783958743946e-05, 'samples': 20634112, 'steps': 40300, 'batch_loss/train': 0.6654426899040118} +12/28/2021 06:31:34 - INFO - codeparrot_training - Step 40301: {'lr': 4.530973339415262e-05, 'samples': 20634624, 'steps': 40301, 'batch_loss/train': 0.7517715645954013} +12/28/2021 06:31:45 - INFO - codeparrot_training - Step 40302: {'lr': 4.5300683643332506e-05, 'samples': 20635136, 'steps': 40302, 'batch_loss/train': 0.7570774261839688} +12/28/2021 06:31:56 - INFO - codeparrot_training - Step 40303: {'lr': 4.5291634706319583e-05, 'samples': 20635648, 'steps': 40303, 'batch_loss/train': 0.6969921824056655} +12/28/2021 06:32:08 - INFO - codeparrot_training - Step 40304: {'lr': 4.528258658314985e-05, 'samples': 20636160, 'steps': 40304, 'batch_loss/train': 0.6894847080111504} +12/28/2021 06:32:18 - INFO - codeparrot_training - Step 40305: {'lr': 4.527353927385924e-05, 'samples': 20636672, 'steps': 40305, 'batch_loss/train': 0.644744984805584} +12/28/2021 06:32:29 - INFO - codeparrot_training - Step 40306: {'lr': 4.5264492778483714e-05, 'samples': 20637184, 'steps': 40306, 'batch_loss/train': 0.7197618130594492} +12/28/2021 06:32:42 - INFO - codeparrot_training - Step 40307: {'lr': 4.5255447097059285e-05, 'samples': 20637696, 'steps': 40307, 'batch_loss/train': 0.7548554753884673} +12/28/2021 06:32:52 - INFO - codeparrot_training - Step 40308: {'lr': 4.524640222962195e-05, 'samples': 20638208, 'steps': 40308, 'batch_loss/train': 0.7267511712852865} +12/28/2021 06:33:03 - INFO - codeparrot_training - Step 40309: {'lr': 4.523735817620747e-05, 'samples': 20638720, 'steps': 40309, 'batch_loss/train': 0.7412265175953507} +12/28/2021 06:33:15 - INFO - codeparrot_training - Step 40310: {'lr': 4.5228314936852064e-05, 'samples': 20639232, 'steps': 40310, 'batch_loss/train': 0.6586296865716577} +12/28/2021 06:33:26 - INFO - codeparrot_training - Step 40311: {'lr': 4.5219272511591494e-05, 'samples': 20639744, 'steps': 40311, 'batch_loss/train': 0.668377501424402} +12/28/2021 06:33:37 - INFO - codeparrot_training - Step 40312: {'lr': 4.5210230900461805e-05, 'samples': 20640256, 'steps': 40312, 'batch_loss/train': 0.6921990740811452} +12/28/2021 06:33:49 - INFO - codeparrot_training - Step 40313: {'lr': 4.520119010349888e-05, 'samples': 20640768, 'steps': 40313, 'batch_loss/train': 0.6982037564739585} +12/28/2021 06:33:59 - INFO - codeparrot_training - Step 40314: {'lr': 4.519215012073871e-05, 'samples': 20641280, 'steps': 40314, 'batch_loss/train': 0.6607424018438905} +12/28/2021 06:34:10 - INFO - codeparrot_training - Step 40315: {'lr': 4.518311095221722e-05, 'samples': 20641792, 'steps': 40315, 'batch_loss/train': 0.7692979229614139} +12/28/2021 06:34:20 - INFO - codeparrot_training - Step 40316: {'lr': 4.5174072597970324e-05, 'samples': 20642304, 'steps': 40316, 'batch_loss/train': 0.7494422132149339} +12/28/2021 06:34:33 - INFO - codeparrot_training - Step 40317: {'lr': 4.516503505803399e-05, 'samples': 20642816, 'steps': 40317, 'batch_loss/train': 0.7915421957150102} +12/28/2021 06:34:44 - INFO - codeparrot_training - Step 40318: {'lr': 4.515599833244413e-05, 'samples': 20643328, 'steps': 40318, 'batch_loss/train': 0.7614111355505884} +12/28/2021 06:34:55 - INFO - codeparrot_training - Step 40319: {'lr': 4.514696242123675e-05, 'samples': 20643840, 'steps': 40319, 'batch_loss/train': 0.8241767575964332} +12/28/2021 06:35:07 - INFO - codeparrot_training - Step 40320: {'lr': 4.5137927324447575e-05, 'samples': 20644352, 'steps': 40320, 'batch_loss/train': 0.7231407444924116} +12/28/2021 06:35:18 - INFO - codeparrot_training - Step 40321: {'lr': 4.512889304211268e-05, 'samples': 20644864, 'steps': 40321, 'batch_loss/train': 0.8183887973427773} +12/28/2021 06:35:28 - INFO - codeparrot_training - Step 40322: {'lr': 4.511985957426803e-05, 'samples': 20645376, 'steps': 40322, 'batch_loss/train': 0.8192988319788128} +12/28/2021 06:35:41 - INFO - codeparrot_training - Step 40323: {'lr': 4.511082692094942e-05, 'samples': 20645888, 'steps': 40323, 'batch_loss/train': 0.8066581599414349} +12/28/2021 06:35:52 - INFO - codeparrot_training - Step 40324: {'lr': 4.51017950821927e-05, 'samples': 20646400, 'steps': 40324, 'batch_loss/train': 0.6354812462814152} +12/28/2021 06:36:02 - INFO - codeparrot_training - Step 40325: {'lr': 4.509276405803403e-05, 'samples': 20646912, 'steps': 40325, 'batch_loss/train': 0.726825232617557} +12/28/2021 06:36:13 - INFO - codeparrot_training - Step 40326: {'lr': 4.508373384850908e-05, 'samples': 20647424, 'steps': 40326, 'batch_loss/train': 0.7103064735420048} +12/28/2021 06:36:25 - INFO - codeparrot_training - Step 40327: {'lr': 4.507470445365383e-05, 'samples': 20647936, 'steps': 40327, 'batch_loss/train': 0.7486369023099542} +12/28/2021 06:36:36 - INFO - codeparrot_training - Step 40328: {'lr': 4.5065675873504187e-05, 'samples': 20648448, 'steps': 40328, 'batch_loss/train': 0.6423563386779279} +12/28/2021 06:36:46 - INFO - codeparrot_training - Step 40329: {'lr': 4.505664810809604e-05, 'samples': 20648960, 'steps': 40329, 'batch_loss/train': 0.6429315954446793} +12/28/2021 06:36:59 - INFO - codeparrot_training - Step 40330: {'lr': 4.5047621157465244e-05, 'samples': 20649472, 'steps': 40330, 'batch_loss/train': 0.713623245479539} +12/28/2021 06:37:09 - INFO - codeparrot_training - Step 40331: {'lr': 4.5038595021647756e-05, 'samples': 20649984, 'steps': 40331, 'batch_loss/train': 0.601132112351479} +12/28/2021 06:37:20 - INFO - codeparrot_training - Step 40332: {'lr': 4.5029569700679425e-05, 'samples': 20650496, 'steps': 40332, 'batch_loss/train': 0.7046808041632175} +12/28/2021 06:37:31 - INFO - codeparrot_training - Step 40333: {'lr': 4.502054519459611e-05, 'samples': 20651008, 'steps': 40333, 'batch_loss/train': 0.6328116709191818} +12/28/2021 06:37:43 - INFO - codeparrot_training - Step 40334: {'lr': 4.5011521503433766e-05, 'samples': 20651520, 'steps': 40334, 'batch_loss/train': 0.6843619390856475} +12/28/2021 06:37:53 - INFO - codeparrot_training - Step 40335: {'lr': 4.5002498627228103e-05, 'samples': 20652032, 'steps': 40335, 'batch_loss/train': 0.6055632308125496} +12/28/2021 06:38:04 - INFO - codeparrot_training - Step 40336: {'lr': 4.499347656601524e-05, 'samples': 20652544, 'steps': 40336, 'batch_loss/train': 0.7485259051900357} +12/28/2021 06:38:17 - INFO - codeparrot_training - Step 40337: {'lr': 4.498445531983081e-05, 'samples': 20653056, 'steps': 40337, 'batch_loss/train': 0.7878321474418044} +12/28/2021 06:38:27 - INFO - codeparrot_training - Step 40338: {'lr': 4.4975434888710806e-05, 'samples': 20653568, 'steps': 40338, 'batch_loss/train': 0.7925006484147161} +12/28/2021 06:38:38 - INFO - codeparrot_training - Step 40339: {'lr': 4.496641527269105e-05, 'samples': 20654080, 'steps': 40339, 'batch_loss/train': 0.6955664535053074} +12/28/2021 06:38:50 - INFO - codeparrot_training - Step 40340: {'lr': 4.495739647180741e-05, 'samples': 20654592, 'steps': 40340, 'batch_loss/train': 0.7116742162033916} +12/28/2021 06:39:01 - INFO - codeparrot_training - Step 40341: {'lr': 4.494837848609573e-05, 'samples': 20655104, 'steps': 40341, 'batch_loss/train': 0.7096735220984556} +12/28/2021 06:39:12 - INFO - codeparrot_training - Step 40342: {'lr': 4.493936131559187e-05, 'samples': 20655616, 'steps': 40342, 'batch_loss/train': 0.7167394198477268} +12/28/2021 06:39:24 - INFO - codeparrot_training - Step 40343: {'lr': 4.493034496033169e-05, 'samples': 20656128, 'steps': 40343, 'batch_loss/train': 0.7696335611399263} +12/28/2021 06:39:35 - INFO - codeparrot_training - Step 40344: {'lr': 4.492132942035104e-05, 'samples': 20656640, 'steps': 40344, 'batch_loss/train': 0.8068884732201695} +12/28/2021 06:39:45 - INFO - codeparrot_training - Step 40345: {'lr': 4.49123146956858e-05, 'samples': 20657152, 'steps': 40345, 'batch_loss/train': 0.6970208091661334} +12/28/2021 06:39:56 - INFO - codeparrot_training - Step 40346: {'lr': 4.490330078637161e-05, 'samples': 20657664, 'steps': 40346, 'batch_loss/train': 0.8049480179324746} +12/28/2021 06:40:09 - INFO - codeparrot_training - Step 40347: {'lr': 4.4894287692444555e-05, 'samples': 20658176, 'steps': 40347, 'batch_loss/train': 0.686192176071927} +12/28/2021 06:40:19 - INFO - codeparrot_training - Step 40348: {'lr': 4.4885275413940414e-05, 'samples': 20658688, 'steps': 40348, 'batch_loss/train': 0.6632946697063744} +12/28/2021 06:40:30 - INFO - codeparrot_training - Step 40349: {'lr': 4.4876263950894916e-05, 'samples': 20659200, 'steps': 40349, 'batch_loss/train': 0.76527691911906} +12/28/2021 06:40:42 - INFO - codeparrot_training - Step 40350: {'lr': 4.486725330334387e-05, 'samples': 20659712, 'steps': 40350, 'batch_loss/train': 0.6463152086362243} +12/28/2021 06:40:53 - INFO - codeparrot_training - Step 40351: {'lr': 4.485824347132328e-05, 'samples': 20660224, 'steps': 40351, 'batch_loss/train': 0.7435352597385645} +12/28/2021 06:41:03 - INFO - codeparrot_training - Step 40352: {'lr': 4.4849234454868826e-05, 'samples': 20660736, 'steps': 40352, 'batch_loss/train': 0.6508435532450676} +12/28/2021 06:41:15 - INFO - codeparrot_training - Step 40353: {'lr': 4.4840226254016266e-05, 'samples': 20661248, 'steps': 40353, 'batch_loss/train': 0.8452808046713471} +12/28/2021 06:41:26 - INFO - codeparrot_training - Step 40354: {'lr': 4.483121886880162e-05, 'samples': 20661760, 'steps': 40354, 'batch_loss/train': 0.7802135436795652} +12/28/2021 06:41:37 - INFO - codeparrot_training - Step 40355: {'lr': 4.4822212299260534e-05, 'samples': 20662272, 'steps': 40355, 'batch_loss/train': 0.7926875883713365} +12/28/2021 06:41:47 - INFO - codeparrot_training - Step 40356: {'lr': 4.4813206545428865e-05, 'samples': 20662784, 'steps': 40356, 'batch_loss/train': 0.702887165651191} +12/28/2021 06:42:00 - INFO - codeparrot_training - Step 40357: {'lr': 4.4804201607342386e-05, 'samples': 20663296, 'steps': 40357, 'batch_loss/train': 0.7504991827299818} +12/28/2021 06:42:11 - INFO - codeparrot_training - Step 40358: {'lr': 4.4795197485036964e-05, 'samples': 20663808, 'steps': 40358, 'batch_loss/train': 0.7805438591167331} +12/28/2021 06:42:21 - INFO - codeparrot_training - Step 40359: {'lr': 4.4786194178548316e-05, 'samples': 20664320, 'steps': 40359, 'batch_loss/train': 0.7715212716720998} +12/28/2021 06:42:34 - INFO - codeparrot_training - Step 40360: {'lr': 4.477719168791236e-05, 'samples': 20664832, 'steps': 40360, 'batch_loss/train': 0.7841995302587748} +12/28/2021 06:42:44 - INFO - codeparrot_training - Step 40361: {'lr': 4.476819001316465e-05, 'samples': 20665344, 'steps': 40361, 'batch_loss/train': 0.6795927661005408} +12/28/2021 06:42:55 - INFO - codeparrot_training - Step 40362: {'lr': 4.4759189154341214e-05, 'samples': 20665856, 'steps': 40362, 'batch_loss/train': 0.7366246108431369} +12/28/2021 06:43:07 - INFO - codeparrot_training - Step 40363: {'lr': 4.47501891114778e-05, 'samples': 20666368, 'steps': 40363, 'batch_loss/train': 0.8261316968128085} +12/28/2021 06:43:18 - INFO - codeparrot_training - Step 40364: {'lr': 4.4741189884609986e-05, 'samples': 20666880, 'steps': 40364, 'batch_loss/train': 0.7273365771397948} +12/28/2021 06:43:29 - INFO - codeparrot_training - Step 40365: {'lr': 4.473219147377386e-05, 'samples': 20667392, 'steps': 40365, 'batch_loss/train': 0.721334948670119} +12/28/2021 06:43:41 - INFO - codeparrot_training - Step 40366: {'lr': 4.4723193879004944e-05, 'samples': 20667904, 'steps': 40366, 'batch_loss/train': 0.745970893651247} +12/28/2021 06:43:51 - INFO - codeparrot_training - Step 40367: {'lr': 4.471419710033911e-05, 'samples': 20668416, 'steps': 40367, 'batch_loss/train': 0.8160071412567049} +12/28/2021 06:44:02 - INFO - codeparrot_training - Step 40368: {'lr': 4.470520113781212e-05, 'samples': 20668928, 'steps': 40368, 'batch_loss/train': 0.7659455267712474} +12/28/2021 06:44:13 - INFO - codeparrot_training - Step 40369: {'lr': 4.4696205991459737e-05, 'samples': 20669440, 'steps': 40369, 'batch_loss/train': 0.6840039702365175} +12/28/2021 06:44:25 - INFO - codeparrot_training - Step 40370: {'lr': 4.46872116613177e-05, 'samples': 20669952, 'steps': 40370, 'batch_loss/train': 0.742690893355757} +12/28/2021 06:44:36 - INFO - codeparrot_training - Step 40371: {'lr': 4.4678218147421874e-05, 'samples': 20670464, 'steps': 40371, 'batch_loss/train': 0.7508225957863033} +12/28/2021 06:44:47 - INFO - codeparrot_training - Step 40372: {'lr': 4.4669225449807784e-05, 'samples': 20670976, 'steps': 40372, 'batch_loss/train': 0.5897313749883324} +12/28/2021 06:44:59 - INFO - codeparrot_training - Step 40373: {'lr': 4.46602335685114e-05, 'samples': 20671488, 'steps': 40373, 'batch_loss/train': 0.7224359293468297} +12/28/2021 06:45:09 - INFO - codeparrot_training - Step 40374: {'lr': 4.4651242503568445e-05, 'samples': 20672000, 'steps': 40374, 'batch_loss/train': 0.7400741763412952} +12/28/2021 06:45:20 - INFO - codeparrot_training - Step 40375: {'lr': 4.464225225501456e-05, 'samples': 20672512, 'steps': 40375, 'batch_loss/train': 0.6779354461468756} +12/28/2021 06:45:33 - INFO - codeparrot_training - Step 40376: {'lr': 4.463326282288546e-05, 'samples': 20673024, 'steps': 40376, 'batch_loss/train': 0.8140461007133126} +12/28/2021 06:45:43 - INFO - codeparrot_training - Step 40377: {'lr': 4.46242742072171e-05, 'samples': 20673536, 'steps': 40377, 'batch_loss/train': 0.7223074636422098} +12/28/2021 06:45:54 - INFO - codeparrot_training - Step 40378: {'lr': 4.461528640804502e-05, 'samples': 20674048, 'steps': 40378, 'batch_loss/train': 0.7753790849819779} +12/28/2021 06:46:05 - INFO - codeparrot_training - Step 40379: {'lr': 4.460629942540495e-05, 'samples': 20674560, 'steps': 40379, 'batch_loss/train': 0.6548361635068431} +12/28/2021 06:46:17 - INFO - codeparrot_training - Step 40380: {'lr': 4.459731325933278e-05, 'samples': 20675072, 'steps': 40380, 'batch_loss/train': 0.7644127840176225} +12/28/2021 06:46:27 - INFO - codeparrot_training - Step 40381: {'lr': 4.458832790986411e-05, 'samples': 20675584, 'steps': 40381, 'batch_loss/train': 0.7622699518688023} +12/28/2021 06:46:38 - INFO - codeparrot_training - Step 40382: {'lr': 4.4579343377034646e-05, 'samples': 20676096, 'steps': 40382, 'batch_loss/train': 0.7808735091239214} +12/28/2021 06:46:52 - INFO - codeparrot_training - Step 40383: {'lr': 4.457035966088019e-05, 'samples': 20676608, 'steps': 40383, 'batch_loss/train': 0.7398498728871346} +12/28/2021 06:47:02 - INFO - codeparrot_training - Step 40384: {'lr': 4.45613767614364e-05, 'samples': 20677120, 'steps': 40384, 'batch_loss/train': 0.7479553013108671} +12/28/2021 06:47:13 - INFO - codeparrot_training - Step 40385: {'lr': 4.455239467873901e-05, 'samples': 20677632, 'steps': 40385, 'batch_loss/train': 0.7058447608724236} +12/28/2021 06:47:25 - INFO - codeparrot_training - Step 40386: {'lr': 4.454341341282378e-05, 'samples': 20678144, 'steps': 40386, 'batch_loss/train': 0.5964519984554499} +12/28/2021 06:47:36 - INFO - codeparrot_training - Step 40387: {'lr': 4.453443296372622e-05, 'samples': 20678656, 'steps': 40387, 'batch_loss/train': 0.5950711362529546} +12/28/2021 06:47:46 - INFO - codeparrot_training - Step 40388: {'lr': 4.452545333148228e-05, 'samples': 20679168, 'steps': 40388, 'batch_loss/train': 0.671034696046263} +12/28/2021 06:47:57 - INFO - codeparrot_training - Step 40389: {'lr': 4.451647451612759e-05, 'samples': 20679680, 'steps': 40389, 'batch_loss/train': 0.6680516800843179} +12/28/2021 06:48:10 - INFO - codeparrot_training - Step 40390: {'lr': 4.450749651769767e-05, 'samples': 20680192, 'steps': 40390, 'batch_loss/train': 0.7849270151928067} +12/28/2021 06:48:21 - INFO - codeparrot_training - Step 40391: {'lr': 4.44985193362285e-05, 'samples': 20680704, 'steps': 40391, 'batch_loss/train': 0.799812157638371} +12/28/2021 06:48:32 - INFO - codeparrot_training - Step 40392: {'lr': 4.448954297175556e-05, 'samples': 20681216, 'steps': 40392, 'batch_loss/train': 0.7023670710623264} +12/28/2021 06:48:44 - INFO - codeparrot_training - Step 40393: {'lr': 4.448056742431458e-05, 'samples': 20681728, 'steps': 40393, 'batch_loss/train': 0.7365585044026375} +12/28/2021 06:48:54 - INFO - codeparrot_training - Step 40394: {'lr': 4.447159269394127e-05, 'samples': 20682240, 'steps': 40394, 'batch_loss/train': 0.8058910807594657} +12/28/2021 06:49:05 - INFO - codeparrot_training - Step 40395: {'lr': 4.446261878067132e-05, 'samples': 20682752, 'steps': 40395, 'batch_loss/train': 0.7472696108743548} +12/28/2021 06:49:17 - INFO - codeparrot_training - Step 40396: {'lr': 4.4453645684540356e-05, 'samples': 20683264, 'steps': 40396, 'batch_loss/train': 0.7356691532768309} +12/28/2021 06:49:28 - INFO - codeparrot_training - Step 40397: {'lr': 4.444467340558411e-05, 'samples': 20683776, 'steps': 40397, 'batch_loss/train': 0.6935667032375932} +12/28/2021 06:49:38 - INFO - codeparrot_training - Step 40398: {'lr': 4.443570194383822e-05, 'samples': 20684288, 'steps': 40398, 'batch_loss/train': 0.6918642777018249} +12/28/2021 06:49:49 - INFO - codeparrot_training - Step 40399: {'lr': 4.4426731299338385e-05, 'samples': 20684800, 'steps': 40399, 'batch_loss/train': 0.664076185785234} +12/28/2021 06:50:02 - INFO - codeparrot_training - Step 40400: {'lr': 4.441776147212029e-05, 'samples': 20685312, 'steps': 40400, 'batch_loss/train': 0.7744817081838846} +12/28/2021 06:50:13 - INFO - codeparrot_training - Step 40401: {'lr': 4.4408792462219434e-05, 'samples': 20685824, 'steps': 40401, 'batch_loss/train': 0.8001825036481023} +12/28/2021 06:50:23 - INFO - codeparrot_training - Step 40402: {'lr': 4.439982426967162e-05, 'samples': 20686336, 'steps': 40402, 'batch_loss/train': 0.7999228937551379} +12/28/2021 06:50:36 - INFO - codeparrot_training - Step 40403: {'lr': 4.439085689451258e-05, 'samples': 20686848, 'steps': 40403, 'batch_loss/train': 0.6971823237836361} +12/28/2021 06:50:46 - INFO - codeparrot_training - Step 40404: {'lr': 4.4381890336777754e-05, 'samples': 20687360, 'steps': 40404, 'batch_loss/train': 0.6616303997579962} +12/28/2021 06:50:57 - INFO - codeparrot_training - Step 40405: {'lr': 4.437292459650286e-05, 'samples': 20687872, 'steps': 40405, 'batch_loss/train': 0.5457518079783767} +12/28/2021 06:51:09 - INFO - codeparrot_training - Step 40406: {'lr': 4.4363959673723696e-05, 'samples': 20688384, 'steps': 40406, 'batch_loss/train': 0.7405124353244901} +12/28/2021 06:51:20 - INFO - codeparrot_training - Step 40407: {'lr': 4.435499556847572e-05, 'samples': 20688896, 'steps': 40407, 'batch_loss/train': 0.7502556978724897} +12/28/2021 06:51:30 - INFO - codeparrot_training - Step 40408: {'lr': 4.434603228079462e-05, 'samples': 20689408, 'steps': 40408, 'batch_loss/train': 0.7432135734707117} +12/28/2021 06:51:41 - INFO - codeparrot_training - Step 40409: {'lr': 4.433706981071606e-05, 'samples': 20689920, 'steps': 40409, 'batch_loss/train': 0.8127118023112416} +12/28/2021 06:51:54 - INFO - codeparrot_training - Step 40410: {'lr': 4.432810815827565e-05, 'samples': 20690432, 'steps': 40410, 'batch_loss/train': 0.7959068352356553} +12/28/2021 06:52:04 - INFO - codeparrot_training - Step 40411: {'lr': 4.431914732350903e-05, 'samples': 20690944, 'steps': 40411, 'batch_loss/train': 0.7859764881432056} +12/28/2021 06:52:15 - INFO - codeparrot_training - Step 40412: {'lr': 4.4310187306451864e-05, 'samples': 20691456, 'steps': 40412, 'batch_loss/train': 0.7002678123535588} +12/28/2021 06:52:27 - INFO - codeparrot_training - Step 40413: {'lr': 4.4301228107139604e-05, 'samples': 20691968, 'steps': 40413, 'batch_loss/train': 0.6074316906742752} +12/28/2021 06:52:38 - INFO - codeparrot_training - Step 40414: {'lr': 4.429226972560807e-05, 'samples': 20692480, 'steps': 40414, 'batch_loss/train': 0.9278447104152292} +12/28/2021 06:52:48 - INFO - codeparrot_training - Step 40415: {'lr': 4.4283312161892854e-05, 'samples': 20692992, 'steps': 40415, 'batch_loss/train': 0.8009730060584843} +12/28/2021 06:53:01 - INFO - codeparrot_training - Step 40416: {'lr': 4.4274355416029395e-05, 'samples': 20693504, 'steps': 40416, 'batch_loss/train': 0.7178189416881651} +12/28/2021 06:53:11 - INFO - codeparrot_training - Step 40417: {'lr': 4.426539948805353e-05, 'samples': 20694016, 'steps': 40417, 'batch_loss/train': 0.6820188593119383} +12/28/2021 06:53:22 - INFO - codeparrot_training - Step 40418: {'lr': 4.4256444378000724e-05, 'samples': 20694528, 'steps': 40418, 'batch_loss/train': 0.7452272624941543} +12/28/2021 06:53:32 - INFO - codeparrot_training - Step 40419: {'lr': 4.424749008590656e-05, 'samples': 20695040, 'steps': 40419, 'batch_loss/train': 0.7192664025351405} +12/28/2021 06:53:46 - INFO - codeparrot_training - Step 40420: {'lr': 4.423853661180671e-05, 'samples': 20695552, 'steps': 40420, 'batch_loss/train': 0.6509690617676824} +12/28/2021 06:53:56 - INFO - codeparrot_training - Step 40421: {'lr': 4.422958395573676e-05, 'samples': 20696064, 'steps': 40421, 'batch_loss/train': 0.6503997256513685} +12/28/2021 06:54:07 - INFO - codeparrot_training - Step 40422: {'lr': 4.4220632117732276e-05, 'samples': 20696576, 'steps': 40422, 'batch_loss/train': 0.696329802274704} +12/28/2021 06:54:20 - INFO - codeparrot_training - Step 40423: {'lr': 4.421168109782886e-05, 'samples': 20697088, 'steps': 40423, 'batch_loss/train': 0.7269235127605498} +12/28/2021 06:54:30 - INFO - codeparrot_training - Step 40424: {'lr': 4.420273089606211e-05, 'samples': 20697600, 'steps': 40424, 'batch_loss/train': 0.6996766175143421} +12/28/2021 06:54:41 - INFO - codeparrot_training - Step 40425: {'lr': 4.419378151246758e-05, 'samples': 20698112, 'steps': 40425, 'batch_loss/train': 0.44220740560558625} +12/28/2021 06:54:53 - INFO - codeparrot_training - Step 40426: {'lr': 4.418483294708092e-05, 'samples': 20698624, 'steps': 40426, 'batch_loss/train': 0.6988136784057133} +12/28/2021 06:55:04 - INFO - codeparrot_training - Step 40427: {'lr': 4.417588519993754e-05, 'samples': 20699136, 'steps': 40427, 'batch_loss/train': 0.7363351709209383} +12/28/2021 06:55:14 - INFO - codeparrot_training - Step 40428: {'lr': 4.416693827107318e-05, 'samples': 20699648, 'steps': 40428, 'batch_loss/train': 0.6852467516437173} +12/28/2021 06:55:25 - INFO - codeparrot_training - Step 40429: {'lr': 4.4157992160523434e-05, 'samples': 20700160, 'steps': 40429, 'batch_loss/train': 0.7354586706496775} +12/28/2021 06:55:38 - INFO - codeparrot_training - Step 40430: {'lr': 4.4149046868323716e-05, 'samples': 20700672, 'steps': 40430, 'batch_loss/train': 0.7491131192073226} +12/28/2021 06:55:48 - INFO - codeparrot_training - Step 40431: {'lr': 4.414010239450958e-05, 'samples': 20701184, 'steps': 40431, 'batch_loss/train': 0.6438946039415896} +12/28/2021 06:55:59 - INFO - codeparrot_training - Step 40432: {'lr': 4.41311587391168e-05, 'samples': 20701696, 'steps': 40432, 'batch_loss/train': 0.6673587433760986} +12/28/2021 06:56:11 - INFO - codeparrot_training - Step 40433: {'lr': 4.4122215902180766e-05, 'samples': 20702208, 'steps': 40433, 'batch_loss/train': 0.7638094639405608} +12/28/2021 06:56:22 - INFO - codeparrot_training - Step 40434: {'lr': 4.4113273883737036e-05, 'samples': 20702720, 'steps': 40434, 'batch_loss/train': 0.8434046627953649} +12/28/2021 06:56:32 - INFO - codeparrot_training - Step 40435: {'lr': 4.410433268382119e-05, 'samples': 20703232, 'steps': 40435, 'batch_loss/train': 0.7132640369236469} +12/28/2021 06:56:45 - INFO - codeparrot_training - Step 40436: {'lr': 4.409539230246879e-05, 'samples': 20703744, 'steps': 40436, 'batch_loss/train': 0.7270356491208076} +12/28/2021 06:56:55 - INFO - codeparrot_training - Step 40437: {'lr': 4.4086452739715344e-05, 'samples': 20704256, 'steps': 40437, 'batch_loss/train': 0.6987936076475307} +12/28/2021 06:57:06 - INFO - codeparrot_training - Step 40438: {'lr': 4.407751399559642e-05, 'samples': 20704768, 'steps': 40438, 'batch_loss/train': 0.7039797168690711} +12/28/2021 06:57:19 - INFO - codeparrot_training - Step 40439: {'lr': 4.4068576070147557e-05, 'samples': 20705280, 'steps': 40439, 'batch_loss/train': 0.8111632717773318} +12/28/2021 06:57:29 - INFO - codeparrot_training - Step 40440: {'lr': 4.405963896340426e-05, 'samples': 20705792, 'steps': 40440, 'batch_loss/train': 0.8001940255053341} +12/28/2021 06:57:40 - INFO - codeparrot_training - Step 40441: {'lr': 4.405070267540218e-05, 'samples': 20706304, 'steps': 40441, 'batch_loss/train': 0.7207147390581667} +12/28/2021 06:57:51 - INFO - codeparrot_training - Step 40442: {'lr': 4.4041767206176587e-05, 'samples': 20706816, 'steps': 40442, 'batch_loss/train': 0.8295932505279779} +12/28/2021 06:58:03 - INFO - codeparrot_training - Step 40443: {'lr': 4.403283255576321e-05, 'samples': 20707328, 'steps': 40443, 'batch_loss/train': 0.7354676341637969} +12/28/2021 06:58:13 - INFO - codeparrot_training - Step 40444: {'lr': 4.4023898724197596e-05, 'samples': 20707840, 'steps': 40444, 'batch_loss/train': 1.3388256477192044} +12/28/2021 06:58:24 - INFO - codeparrot_training - Step 40445: {'lr': 4.4014965711515145e-05, 'samples': 20708352, 'steps': 40445, 'batch_loss/train': 0.8282366618514061} +12/28/2021 06:58:37 - INFO - codeparrot_training - Step 40446: {'lr': 4.40060335177514e-05, 'samples': 20708864, 'steps': 40446, 'batch_loss/train': 0.7577597787603736} +12/28/2021 06:58:47 - INFO - codeparrot_training - Step 40447: {'lr': 4.399710214294186e-05, 'samples': 20709376, 'steps': 40447, 'batch_loss/train': 0.7728798631578684} +12/28/2021 06:58:58 - INFO - codeparrot_training - Step 40448: {'lr': 4.3988171587122094e-05, 'samples': 20709888, 'steps': 40448, 'batch_loss/train': 0.8089292179793119} +12/28/2021 06:59:10 - INFO - codeparrot_training - Step 40449: {'lr': 4.397924185032756e-05, 'samples': 20710400, 'steps': 40449, 'batch_loss/train': 0.7089675484457985} +12/28/2021 06:59:21 - INFO - codeparrot_training - Step 40450: {'lr': 4.3970312932593766e-05, 'samples': 20710912, 'steps': 40450, 'batch_loss/train': 0.6706904349848628} +12/28/2021 06:59:31 - INFO - codeparrot_training - Step 40451: {'lr': 4.39613848339562e-05, 'samples': 20711424, 'steps': 40451, 'batch_loss/train': 0.896645063534379} +12/28/2021 06:59:42 - INFO - codeparrot_training - Step 40452: {'lr': 4.395245755445046e-05, 'samples': 20711936, 'steps': 40452, 'batch_loss/train': 0.6920712045393884} +12/28/2021 06:59:54 - INFO - codeparrot_training - Step 40453: {'lr': 4.394353109411181e-05, 'samples': 20712448, 'steps': 40453, 'batch_loss/train': 0.7074859770946205} +12/28/2021 07:00:05 - INFO - codeparrot_training - Step 40454: {'lr': 4.393460545297595e-05, 'samples': 20712960, 'steps': 40454, 'batch_loss/train': 0.7303616921417415} +12/28/2021 07:00:15 - INFO - codeparrot_training - Step 40455: {'lr': 4.3925680631078323e-05, 'samples': 20713472, 'steps': 40455, 'batch_loss/train': 0.7463876297697425} +12/28/2021 07:00:27 - INFO - codeparrot_training - Step 40456: {'lr': 4.3916756628454346e-05, 'samples': 20713984, 'steps': 40456, 'batch_loss/train': 0.7606421448290348} +12/28/2021 07:00:38 - INFO - codeparrot_training - Step 40457: {'lr': 4.390783344513946e-05, 'samples': 20714496, 'steps': 40457, 'batch_loss/train': 0.8395556687028147} +12/28/2021 07:00:49 - INFO - codeparrot_training - Step 40458: {'lr': 4.3898911081169335e-05, 'samples': 20715008, 'steps': 40458, 'batch_loss/train': 0.8595121167600155} +12/28/2021 07:01:02 - INFO - codeparrot_training - Step 40459: {'lr': 4.388998953657927e-05, 'samples': 20715520, 'steps': 40459, 'batch_loss/train': 0.7806896581314504} +12/28/2021 07:01:12 - INFO - codeparrot_training - Step 40460: {'lr': 4.388106881140477e-05, 'samples': 20716032, 'steps': 40460, 'batch_loss/train': 0.7423249213024974} +12/28/2021 07:01:23 - INFO - codeparrot_training - Step 40461: {'lr': 4.3872148905681306e-05, 'samples': 20716544, 'steps': 40461, 'batch_loss/train': 0.8029386037960649} +12/28/2021 07:01:35 - INFO - codeparrot_training - Step 40462: {'lr': 4.386322981944435e-05, 'samples': 20717056, 'steps': 40462, 'batch_loss/train': 0.7612905083224177} +12/28/2021 07:01:45 - INFO - codeparrot_training - Step 40463: {'lr': 4.385431155272937e-05, 'samples': 20717568, 'steps': 40463, 'batch_loss/train': 0.7350390052888542} +12/28/2021 07:01:56 - INFO - codeparrot_training - Step 40464: {'lr': 4.384539410557178e-05, 'samples': 20718080, 'steps': 40464, 'batch_loss/train': 0.7063028584234416} +12/28/2021 07:02:07 - INFO - codeparrot_training - Step 40465: {'lr': 4.383647747800709e-05, 'samples': 20718592, 'steps': 40465, 'batch_loss/train': 0.6657015336677432} +12/28/2021 07:02:19 - INFO - codeparrot_training - Step 40466: {'lr': 4.382756167007071e-05, 'samples': 20719104, 'steps': 40466, 'batch_loss/train': 0.7736846497282386} +12/28/2021 07:02:30 - INFO - codeparrot_training - Step 40467: {'lr': 4.3818646681798165e-05, 'samples': 20719616, 'steps': 40467, 'batch_loss/train': 0.7118331221863627} +12/28/2021 07:02:40 - INFO - codeparrot_training - Step 40468: {'lr': 4.3809732513224706e-05, 'samples': 20720128, 'steps': 40468, 'batch_loss/train': 0.915772573556751} +12/28/2021 07:02:53 - INFO - codeparrot_training - Step 40469: {'lr': 4.380081916438597e-05, 'samples': 20720640, 'steps': 40469, 'batch_loss/train': 0.7623628964647651} +12/28/2021 07:03:04 - INFO - codeparrot_training - Step 40470: {'lr': 4.3791906635317373e-05, 'samples': 20721152, 'steps': 40470, 'batch_loss/train': 0.7596438234904781} +12/28/2021 07:03:14 - INFO - codeparrot_training - Step 40471: {'lr': 4.3782994926054224e-05, 'samples': 20721664, 'steps': 40471, 'batch_loss/train': 0.8092542166123167} +12/28/2021 07:03:26 - INFO - codeparrot_training - Step 40472: {'lr': 4.377408403663202e-05, 'samples': 20722176, 'steps': 40472, 'batch_loss/train': 0.7622175207361579} +12/28/2021 07:03:37 - INFO - codeparrot_training - Step 40473: {'lr': 4.3765173967086205e-05, 'samples': 20722688, 'steps': 40473, 'batch_loss/train': 0.7788681564852595} +12/28/2021 07:03:48 - INFO - codeparrot_training - Step 40474: {'lr': 4.375626471745217e-05, 'samples': 20723200, 'steps': 40474, 'batch_loss/train': 0.6657405537553132} +12/28/2021 07:03:58 - INFO - codeparrot_training - Step 40475: {'lr': 4.3747356287765356e-05, 'samples': 20723712, 'steps': 40475, 'batch_loss/train': 0.7730306498706341} +12/28/2021 07:04:11 - INFO - codeparrot_training - Step 40476: {'lr': 4.3738448678061176e-05, 'samples': 20724224, 'steps': 40476, 'batch_loss/train': 0.8383334465324879} +12/28/2021 07:04:22 - INFO - codeparrot_training - Step 40477: {'lr': 4.3729541888375054e-05, 'samples': 20724736, 'steps': 40477, 'batch_loss/train': 0.7063796282745898} +12/28/2021 07:04:32 - INFO - codeparrot_training - Step 40478: {'lr': 4.372063591874237e-05, 'samples': 20725248, 'steps': 40478, 'batch_loss/train': 0.790394798386842} +12/28/2021 07:04:44 - INFO - codeparrot_training - Step 40479: {'lr': 4.3711730769198574e-05, 'samples': 20725760, 'steps': 40479, 'batch_loss/train': 0.6721185715869069} +12/28/2021 07:04:55 - INFO - codeparrot_training - Step 40480: {'lr': 4.370282643977902e-05, 'samples': 20726272, 'steps': 40480, 'batch_loss/train': 0.714354227297008} +12/28/2021 07:05:05 - INFO - codeparrot_training - Step 40481: {'lr': 4.369392293051922e-05, 'samples': 20726784, 'steps': 40481, 'batch_loss/train': 0.7196159912273288} +12/28/2021 07:05:18 - INFO - codeparrot_training - Step 40482: {'lr': 4.3685020241454406e-05, 'samples': 20727296, 'steps': 40482, 'batch_loss/train': 0.5290621293534059} +12/28/2021 07:05:29 - INFO - codeparrot_training - Step 40483: {'lr': 4.3676118372619974e-05, 'samples': 20727808, 'steps': 40483, 'batch_loss/train': 0.8130447221919894} +12/28/2021 07:05:39 - INFO - codeparrot_training - Step 40484: {'lr': 4.3667217324051543e-05, 'samples': 20728320, 'steps': 40484, 'batch_loss/train': 0.6883433223702013} +12/28/2021 07:05:52 - INFO - codeparrot_training - Step 40485: {'lr': 4.365831709578427e-05, 'samples': 20728832, 'steps': 40485, 'batch_loss/train': 0.759350418113172} +12/28/2021 07:06:03 - INFO - codeparrot_training - Step 40486: {'lr': 4.364941768785363e-05, 'samples': 20729344, 'steps': 40486, 'batch_loss/train': 0.6457134396769106} +12/28/2021 07:06:13 - INFO - codeparrot_training - Step 40487: {'lr': 4.3640519100294985e-05, 'samples': 20729856, 'steps': 40487, 'batch_loss/train': 0.7223708657547832} +12/28/2021 07:06:24 - INFO - codeparrot_training - Step 40488: {'lr': 4.363162133314369e-05, 'samples': 20730368, 'steps': 40488, 'batch_loss/train': 0.7724894904531538} +12/28/2021 07:06:36 - INFO - codeparrot_training - Step 40489: {'lr': 4.362272438643519e-05, 'samples': 20730880, 'steps': 40489, 'batch_loss/train': 0.7460242919623852} +12/28/2021 07:06:47 - INFO - codeparrot_training - Step 40490: {'lr': 4.36138282602048e-05, 'samples': 20731392, 'steps': 40490, 'batch_loss/train': 0.6393421785905957} +12/28/2021 07:06:57 - INFO - codeparrot_training - Step 40491: {'lr': 4.360493295448789e-05, 'samples': 20731904, 'steps': 40491, 'batch_loss/train': 0.7945715775713325} +12/28/2021 07:07:09 - INFO - codeparrot_training - Step 40492: {'lr': 4.359603846931984e-05, 'samples': 20732416, 'steps': 40492, 'batch_loss/train': 0.7069477783516049} +12/28/2021 07:07:20 - INFO - codeparrot_training - Step 40493: {'lr': 4.3587144804736054e-05, 'samples': 20732928, 'steps': 40493, 'batch_loss/train': 0.7622345332056284} +12/28/2021 07:07:31 - INFO - codeparrot_training - Step 40494: {'lr': 4.357825196077173e-05, 'samples': 20733440, 'steps': 40494, 'batch_loss/train': 0.6526105371303856} +12/28/2021 07:07:43 - INFO - codeparrot_training - Step 40495: {'lr': 4.3569359937462386e-05, 'samples': 20733952, 'steps': 40495, 'batch_loss/train': 0.7509161569178104} +12/28/2021 07:07:54 - INFO - codeparrot_training - Step 40496: {'lr': 4.35604687348434e-05, 'samples': 20734464, 'steps': 40496, 'batch_loss/train': 0.6733111045323312} +12/28/2021 07:08:04 - INFO - codeparrot_training - Step 40497: {'lr': 4.355157835294998e-05, 'samples': 20734976, 'steps': 40497, 'batch_loss/train': 0.750083816004917} +12/28/2021 07:08:15 - INFO - codeparrot_training - Step 40498: {'lr': 4.3542688791817525e-05, 'samples': 20735488, 'steps': 40498, 'batch_loss/train': 0.6876324848271906} +12/28/2021 07:08:28 - INFO - codeparrot_training - Step 40499: {'lr': 4.353380005148139e-05, 'samples': 20736000, 'steps': 40499, 'batch_loss/train': 0.5949320029467344} +12/28/2021 07:08:39 - INFO - codeparrot_training - Step 40500: {'lr': 4.35249121319769e-05, 'samples': 20736512, 'steps': 40500, 'batch_loss/train': 0.7940413439646363} +12/28/2021 07:08:49 - INFO - codeparrot_training - Step 40501: {'lr': 4.35160250333394e-05, 'samples': 20737024, 'steps': 40501, 'batch_loss/train': 0.7636195430532098} +12/28/2021 07:09:01 - INFO - codeparrot_training - Step 40502: {'lr': 4.350713875560422e-05, 'samples': 20737536, 'steps': 40502, 'batch_loss/train': 0.7588082896545529} +12/28/2021 07:09:12 - INFO - codeparrot_training - Step 40503: {'lr': 4.349825329880669e-05, 'samples': 20738048, 'steps': 40503, 'batch_loss/train': 0.7338974387967028} +12/28/2021 07:09:22 - INFO - codeparrot_training - Step 40504: {'lr': 4.348936866298212e-05, 'samples': 20738560, 'steps': 40504, 'batch_loss/train': 0.7638389370404184} +12/28/2021 07:09:35 - INFO - codeparrot_training - Step 40505: {'lr': 4.348048484816586e-05, 'samples': 20739072, 'steps': 40505, 'batch_loss/train': 0.7735584378242493} +12/28/2021 07:09:45 - INFO - codeparrot_training - Step 40506: {'lr': 4.3471601854393225e-05, 'samples': 20739584, 'steps': 40506, 'batch_loss/train': 0.5759111801162362} +12/28/2021 07:09:56 - INFO - codeparrot_training - Step 40507: {'lr': 4.346271968169954e-05, 'samples': 20740096, 'steps': 40507, 'batch_loss/train': 0.7154236133210361} +12/28/2021 07:10:06 - INFO - codeparrot_training - Step 40508: {'lr': 4.3453838330120064e-05, 'samples': 20740608, 'steps': 40508, 'batch_loss/train': 0.6520477507729083} +12/28/2021 07:10:19 - INFO - codeparrot_training - Step 40509: {'lr': 4.344495779969007e-05, 'samples': 20741120, 'steps': 40509, 'batch_loss/train': 0.8365994347259402} +12/28/2021 07:10:30 - INFO - codeparrot_training - Step 40510: {'lr': 4.3436078090445056e-05, 'samples': 20741632, 'steps': 40510, 'batch_loss/train': 0.726845424156636} +12/28/2021 07:10:40 - INFO - codeparrot_training - Step 40511: {'lr': 4.342719920242014e-05, 'samples': 20742144, 'steps': 40511, 'batch_loss/train': 0.7402559109032154} +12/28/2021 07:10:53 - INFO - codeparrot_training - Step 40512: {'lr': 4.3418321135650614e-05, 'samples': 20742656, 'steps': 40512, 'batch_loss/train': 0.7335935081355274} +12/28/2021 07:11:03 - INFO - codeparrot_training - Step 40513: {'lr': 4.3409443890171985e-05, 'samples': 20743168, 'steps': 40513, 'batch_loss/train': 0.8512417776510119} +12/28/2021 07:11:14 - INFO - codeparrot_training - Step 40514: {'lr': 4.340056746601931e-05, 'samples': 20743680, 'steps': 40514, 'batch_loss/train': 0.7390624778345227} +12/28/2021 07:11:27 - INFO - codeparrot_training - Step 40515: {'lr': 4.3391691863227975e-05, 'samples': 20744192, 'steps': 40515, 'batch_loss/train': 0.6833853693678975} +12/28/2021 07:11:37 - INFO - codeparrot_training - Step 40516: {'lr': 4.3382817081833283e-05, 'samples': 20744704, 'steps': 40516, 'batch_loss/train': 0.76112678530626} +12/28/2021 07:11:48 - INFO - codeparrot_training - Step 40517: {'lr': 4.337394312187048e-05, 'samples': 20745216, 'steps': 40517, 'batch_loss/train': 0.7513343128375709} +12/28/2021 07:12:01 - INFO - codeparrot_training - Step 40518: {'lr': 4.336506998337486e-05, 'samples': 20745728, 'steps': 40518, 'batch_loss/train': 0.7123119598254561} +12/28/2021 07:12:12 - INFO - codeparrot_training - Step 40519: {'lr': 4.335619766638174e-05, 'samples': 20746240, 'steps': 40519, 'batch_loss/train': 0.7237182650715113} +12/28/2021 07:12:22 - INFO - codeparrot_training - Step 40520: {'lr': 4.3347326170926263e-05, 'samples': 20746752, 'steps': 40520, 'batch_loss/train': 0.7479345677420497} +12/28/2021 07:12:33 - INFO - codeparrot_training - Step 40521: {'lr': 4.333845549704385e-05, 'samples': 20747264, 'steps': 40521, 'batch_loss/train': 0.6260703318403102} +12/28/2021 07:12:45 - INFO - codeparrot_training - Step 40522: {'lr': 4.3329585644769745e-05, 'samples': 20747776, 'steps': 40522, 'batch_loss/train': 0.7309683738276362} +12/28/2021 07:12:55 - INFO - codeparrot_training - Step 40523: {'lr': 4.332071661413911e-05, 'samples': 20748288, 'steps': 40523, 'batch_loss/train': 0.7408329956233501} +12/28/2021 07:13:06 - INFO - codeparrot_training - Step 40524: {'lr': 4.3311848405187235e-05, 'samples': 20748800, 'steps': 40524, 'batch_loss/train': 0.7186753489077091} +12/28/2021 07:13:19 - INFO - codeparrot_training - Step 40525: {'lr': 4.330298101794949e-05, 'samples': 20749312, 'steps': 40525, 'batch_loss/train': 0.7416467727161944} +12/28/2021 07:13:30 - INFO - codeparrot_training - Step 40526: {'lr': 4.329411445246101e-05, 'samples': 20749824, 'steps': 40526, 'batch_loss/train': 0.7296819197945297} +12/28/2021 07:13:40 - INFO - codeparrot_training - Step 40527: {'lr': 4.3285248708757083e-05, 'samples': 20750336, 'steps': 40527, 'batch_loss/train': 0.7110558720305562} +12/28/2021 07:13:51 - INFO - codeparrot_training - Step 40528: {'lr': 4.327638378687296e-05, 'samples': 20750848, 'steps': 40528, 'batch_loss/train': 0.8256549974903464} +12/28/2021 07:14:03 - INFO - codeparrot_training - Step 40529: {'lr': 4.326751968684386e-05, 'samples': 20751360, 'steps': 40529, 'batch_loss/train': 0.7253541597165167} +12/28/2021 07:14:13 - INFO - codeparrot_training - Step 40530: {'lr': 4.325865640870505e-05, 'samples': 20751872, 'steps': 40530, 'batch_loss/train': 0.7117501763859764} +12/28/2021 07:14:24 - INFO - codeparrot_training - Step 40531: {'lr': 4.324979395249176e-05, 'samples': 20752384, 'steps': 40531, 'batch_loss/train': 0.6467249714769423} +12/28/2021 07:14:36 - INFO - codeparrot_training - Step 40532: {'lr': 4.3240932318239225e-05, 'samples': 20752896, 'steps': 40532, 'batch_loss/train': 0.7109428740222938} +12/28/2021 07:14:47 - INFO - codeparrot_training - Step 40533: {'lr': 4.323207150598268e-05, 'samples': 20753408, 'steps': 40533, 'batch_loss/train': 0.7867603176273406} +12/28/2021 07:14:57 - INFO - codeparrot_training - Step 40534: {'lr': 4.322321151575739e-05, 'samples': 20753920, 'steps': 40534, 'batch_loss/train': 0.745884703937918} +12/28/2021 07:15:10 - INFO - codeparrot_training - Step 40535: {'lr': 4.3214352347598435e-05, 'samples': 20754432, 'steps': 40535, 'batch_loss/train': 0.7932768100872636} +12/28/2021 07:15:21 - INFO - codeparrot_training - Step 40536: {'lr': 4.320549400154125e-05, 'samples': 20754944, 'steps': 40536, 'batch_loss/train': 0.8357607750222087} +12/28/2021 07:15:32 - INFO - codeparrot_training - Step 40537: {'lr': 4.319663647762087e-05, 'samples': 20755456, 'steps': 40537, 'batch_loss/train': 0.760215878020972} +12/28/2021 07:15:42 - INFO - codeparrot_training - Step 40538: {'lr': 4.318777977587251e-05, 'samples': 20755968, 'steps': 40538, 'batch_loss/train': 0.5637442129664123} +12/28/2021 07:15:54 - INFO - codeparrot_training - Step 40539: {'lr': 4.3178923896331576e-05, 'samples': 20756480, 'steps': 40539, 'batch_loss/train': 0.7087879017926753} +12/28/2021 07:16:05 - INFO - codeparrot_training - Step 40540: {'lr': 4.31700688390331e-05, 'samples': 20756992, 'steps': 40540, 'batch_loss/train': 0.7462062027771026} +12/28/2021 07:16:16 - INFO - codeparrot_training - Step 40541: {'lr': 4.316121460401232e-05, 'samples': 20757504, 'steps': 40541, 'batch_loss/train': 0.7519100923091173} +12/28/2021 07:16:28 - INFO - codeparrot_training - Step 40542: {'lr': 4.3152361191304475e-05, 'samples': 20758016, 'steps': 40542, 'batch_loss/train': 0.8120131697505713} +12/28/2021 07:16:39 - INFO - codeparrot_training - Step 40543: {'lr': 4.314350860094471e-05, 'samples': 20758528, 'steps': 40543, 'batch_loss/train': 0.8884657807648182} +12/28/2021 07:16:49 - INFO - codeparrot_training - Step 40544: {'lr': 4.3134656832968275e-05, 'samples': 20759040, 'steps': 40544, 'batch_loss/train': 0.9888082183897495} +12/28/2021 07:17:02 - INFO - codeparrot_training - Step 40545: {'lr': 4.312580588741036e-05, 'samples': 20759552, 'steps': 40545, 'batch_loss/train': 0.6998034184798598} +12/28/2021 07:17:13 - INFO - codeparrot_training - Step 40546: {'lr': 4.311695576430605e-05, 'samples': 20760064, 'steps': 40546, 'batch_loss/train': 0.738491348689422} +12/28/2021 07:17:23 - INFO - codeparrot_training - Step 40547: {'lr': 4.310810646369062e-05, 'samples': 20760576, 'steps': 40547, 'batch_loss/train': 0.6962211760692298} +12/28/2021 07:17:35 - INFO - codeparrot_training - Step 40548: {'lr': 4.309925798559933e-05, 'samples': 20761088, 'steps': 40548, 'batch_loss/train': 0.747937835752964} +12/28/2021 07:17:46 - INFO - codeparrot_training - Step 40549: {'lr': 4.309041033006717e-05, 'samples': 20761600, 'steps': 40549, 'batch_loss/train': 0.7610799479298294} +12/28/2021 07:17:57 - INFO - codeparrot_training - Step 40550: {'lr': 4.308156349712944e-05, 'samples': 20762112, 'steps': 40550, 'batch_loss/train': 0.7626270987093449} +12/28/2021 07:18:07 - INFO - codeparrot_training - Step 40551: {'lr': 4.307271748682134e-05, 'samples': 20762624, 'steps': 40551, 'batch_loss/train': 0.701226320117712} +12/28/2021 07:18:20 - INFO - codeparrot_training - Step 40552: {'lr': 4.3063872299177935e-05, 'samples': 20763136, 'steps': 40552, 'batch_loss/train': 0.7340475304517895} +12/28/2021 07:18:30 - INFO - codeparrot_training - Step 40553: {'lr': 4.3055027934234434e-05, 'samples': 20763648, 'steps': 40553, 'batch_loss/train': 0.8568011526949704} +12/28/2021 07:18:41 - INFO - codeparrot_training - Step 40554: {'lr': 4.304618439202601e-05, 'samples': 20764160, 'steps': 40554, 'batch_loss/train': 0.6693047331064008} +12/28/2021 07:18:54 - INFO - codeparrot_training - Step 40555: {'lr': 4.30373416725878e-05, 'samples': 20764672, 'steps': 40555, 'batch_loss/train': 0.708349627442658} +12/28/2021 07:19:04 - INFO - codeparrot_training - Step 40556: {'lr': 4.302849977595499e-05, 'samples': 20765184, 'steps': 40556, 'batch_loss/train': 0.7876220964826643} +12/28/2021 07:19:15 - INFO - codeparrot_training - Step 40557: {'lr': 4.301965870216271e-05, 'samples': 20765696, 'steps': 40557, 'batch_loss/train': 0.7988054449670017} +12/28/2021 07:19:27 - INFO - codeparrot_training - Step 40558: {'lr': 4.3010818451246104e-05, 'samples': 20766208, 'steps': 40558, 'batch_loss/train': 0.7426678696647286} +12/28/2021 07:19:38 - INFO - codeparrot_training - Step 40559: {'lr': 4.300197902324032e-05, 'samples': 20766720, 'steps': 40559, 'batch_loss/train': 0.7354076281189919} +12/28/2021 07:19:48 - INFO - codeparrot_training - Step 40560: {'lr': 4.2993140418180535e-05, 'samples': 20767232, 'steps': 40560, 'batch_loss/train': 0.7616027854382992} +12/28/2021 07:19:59 - INFO - codeparrot_training - Step 40561: {'lr': 4.298430263610184e-05, 'samples': 20767744, 'steps': 40561, 'batch_loss/train': 0.6864342237822711} +12/28/2021 07:20:12 - INFO - codeparrot_training - Step 40562: {'lr': 4.297546567703947e-05, 'samples': 20768256, 'steps': 40562, 'batch_loss/train': 0.8359753869008273} +12/28/2021 07:20:22 - INFO - codeparrot_training - Step 40563: {'lr': 4.2966629541028426e-05, 'samples': 20768768, 'steps': 40563, 'batch_loss/train': 0.7039832898881286} +12/28/2021 07:20:33 - INFO - codeparrot_training - Step 40564: {'lr': 4.2957794228103805e-05, 'samples': 20769280, 'steps': 40564, 'batch_loss/train': 0.754576587351039} +12/28/2021 07:20:46 - INFO - codeparrot_training - Step 40565: {'lr': 4.294895973830093e-05, 'samples': 20769792, 'steps': 40565, 'batch_loss/train': 0.8623255821876228} +12/28/2021 07:20:56 - INFO - codeparrot_training - Step 40566: {'lr': 4.294012607165479e-05, 'samples': 20770304, 'steps': 40566, 'batch_loss/train': 0.7082144510932267} +12/28/2021 07:21:07 - INFO - codeparrot_training - Step 40567: {'lr': 4.2931293228200496e-05, 'samples': 20770816, 'steps': 40567, 'batch_loss/train': 0.8770223122555763} +12/28/2021 07:21:19 - INFO - codeparrot_training - Step 40568: {'lr': 4.292246120797319e-05, 'samples': 20771328, 'steps': 40568, 'batch_loss/train': 0.71395722636953} +12/28/2021 07:21:30 - INFO - codeparrot_training - Step 40569: {'lr': 4.291363001100801e-05, 'samples': 20771840, 'steps': 40569, 'batch_loss/train': 0.7798535823822021} +12/28/2021 07:21:40 - INFO - codeparrot_training - Step 40570: {'lr': 4.2904799637340034e-05, 'samples': 20772352, 'steps': 40570, 'batch_loss/train': 0.6846280230674893} +12/28/2021 07:21:52 - INFO - codeparrot_training - Step 40571: {'lr': 4.2895970087004434e-05, 'samples': 20772864, 'steps': 40571, 'batch_loss/train': 0.9248480987735093} +12/28/2021 07:22:03 - INFO - codeparrot_training - Step 40572: {'lr': 4.2887141360036155e-05, 'samples': 20773376, 'steps': 40572, 'batch_loss/train': 0.6503368981648237} +12/28/2021 07:22:14 - INFO - codeparrot_training - Step 40573: {'lr': 4.287831345647045e-05, 'samples': 20773888, 'steps': 40573, 'batch_loss/train': 0.8215255672112107} +12/28/2021 07:22:24 - INFO - codeparrot_training - Step 40574: {'lr': 4.286948637634241e-05, 'samples': 20774400, 'steps': 40574, 'batch_loss/train': 0.7089400246040896} +12/28/2021 07:22:37 - INFO - codeparrot_training - Step 40575: {'lr': 4.286066011968698e-05, 'samples': 20774912, 'steps': 40575, 'batch_loss/train': 0.772858239710331} +12/28/2021 07:22:48 - INFO - codeparrot_training - Step 40576: {'lr': 4.2851834686539416e-05, 'samples': 20775424, 'steps': 40576, 'batch_loss/train': 0.7619973488617688} +12/28/2021 07:22:58 - INFO - codeparrot_training - Step 40577: {'lr': 4.284301007693478e-05, 'samples': 20775936, 'steps': 40577, 'batch_loss/train': 0.7353330638725311} +12/28/2021 07:23:11 - INFO - codeparrot_training - Step 40578: {'lr': 4.283418629090807e-05, 'samples': 20776448, 'steps': 40578, 'batch_loss/train': 0.6452664269600064} +12/28/2021 07:23:21 - INFO - codeparrot_training - Step 40579: {'lr': 4.282536332849435e-05, 'samples': 20776960, 'steps': 40579, 'batch_loss/train': 0.7027977611869574} +12/28/2021 07:23:32 - INFO - codeparrot_training - Step 40580: {'lr': 4.2816541189728886e-05, 'samples': 20777472, 'steps': 40580, 'batch_loss/train': 0.9266190659254789} +12/28/2021 07:23:44 - INFO - codeparrot_training - Step 40581: {'lr': 4.280771987464657e-05, 'samples': 20777984, 'steps': 40581, 'batch_loss/train': 0.8158270679414272} +12/28/2021 07:23:55 - INFO - codeparrot_training - Step 40582: {'lr': 4.279889938328252e-05, 'samples': 20778496, 'steps': 40582, 'batch_loss/train': 0.746169954072684} +12/28/2021 07:24:05 - INFO - codeparrot_training - Step 40583: {'lr': 4.279007971567181e-05, 'samples': 20779008, 'steps': 40583, 'batch_loss/train': 0.8031284213066101} +12/28/2021 07:24:16 - INFO - codeparrot_training - Step 40584: {'lr': 4.2781260871849495e-05, 'samples': 20779520, 'steps': 40584, 'batch_loss/train': 0.7808414264582098} +12/28/2021 07:24:29 - INFO - codeparrot_training - Step 40585: {'lr': 4.277244285185064e-05, 'samples': 20780032, 'steps': 40585, 'batch_loss/train': 0.6923339702188969} +12/28/2021 07:24:39 - INFO - codeparrot_training - Step 40586: {'lr': 4.276362565571032e-05, 'samples': 20780544, 'steps': 40586, 'batch_loss/train': 0.655676388181746} +12/28/2021 07:24:50 - INFO - codeparrot_training - Step 40587: {'lr': 4.2754809283463564e-05, 'samples': 20781056, 'steps': 40587, 'batch_loss/train': 0.7263139076530933} +12/28/2021 07:25:02 - INFO - codeparrot_training - Step 40588: {'lr': 4.2745993735145514e-05, 'samples': 20781568, 'steps': 40588, 'batch_loss/train': 0.7278920598328114} +12/28/2021 07:25:13 - INFO - codeparrot_training - Step 40589: {'lr': 4.273717901079105e-05, 'samples': 20782080, 'steps': 40589, 'batch_loss/train': 0.822988745290786} +12/28/2021 07:25:23 - INFO - codeparrot_training - Step 40590: {'lr': 4.272836511043526e-05, 'samples': 20782592, 'steps': 40590, 'batch_loss/train': 0.7021273723803461} +12/28/2021 07:25:36 - INFO - codeparrot_training - Step 40591: {'lr': 4.271955203411334e-05, 'samples': 20783104, 'steps': 40591, 'batch_loss/train': 0.7372853665146977} +12/28/2021 07:25:47 - INFO - codeparrot_training - Step 40592: {'lr': 4.2710739781860156e-05, 'samples': 20783616, 'steps': 40592, 'batch_loss/train': 0.7757987300865352} +12/28/2021 07:25:57 - INFO - codeparrot_training - Step 40593: {'lr': 4.270192835371081e-05, 'samples': 20784128, 'steps': 40593, 'batch_loss/train': 0.662891672225669} +12/28/2021 07:26:08 - INFO - codeparrot_training - Step 40594: {'lr': 4.2693117749700336e-05, 'samples': 20784640, 'steps': 40594, 'batch_loss/train': 0.7100764852948487} +12/28/2021 07:26:20 - INFO - codeparrot_training - Step 40595: {'lr': 4.268430796986372e-05, 'samples': 20785152, 'steps': 40595, 'batch_loss/train': 0.7817408749833703} +12/28/2021 07:26:31 - INFO - codeparrot_training - Step 40596: {'lr': 4.2675499014236034e-05, 'samples': 20785664, 'steps': 40596, 'batch_loss/train': 0.766152742318809} +12/28/2021 07:26:41 - INFO - codeparrot_training - Step 40597: {'lr': 4.266669088285227e-05, 'samples': 20786176, 'steps': 40597, 'batch_loss/train': 0.7139109233394265} +12/28/2021 07:26:53 - INFO - codeparrot_training - Step 40598: {'lr': 4.265788357574746e-05, 'samples': 20786688, 'steps': 40598, 'batch_loss/train': 0.7292976537719369} +12/28/2021 07:27:04 - INFO - codeparrot_training - Step 40599: {'lr': 4.264907709295662e-05, 'samples': 20787200, 'steps': 40599, 'batch_loss/train': 0.7484232778660953} +12/28/2021 07:27:15 - INFO - codeparrot_training - Step 40600: {'lr': 4.2640271434514823e-05, 'samples': 20787712, 'steps': 40600, 'batch_loss/train': 0.8082332825288177} +12/28/2021 07:27:27 - INFO - codeparrot_training - Step 40601: {'lr': 4.263146660045686e-05, 'samples': 20788224, 'steps': 40601, 'batch_loss/train': 0.6835345765575767} +12/28/2021 07:27:38 - INFO - codeparrot_training - Step 40602: {'lr': 4.262266259081799e-05, 'samples': 20788736, 'steps': 40602, 'batch_loss/train': 0.8053344255313277} +12/28/2021 07:27:49 - INFO - codeparrot_training - Step 40603: {'lr': 4.2613859405633145e-05, 'samples': 20789248, 'steps': 40603, 'batch_loss/train': 0.7825241964310408} +12/28/2021 07:28:01 - INFO - codeparrot_training - Step 40604: {'lr': 4.2605057044937224e-05, 'samples': 20789760, 'steps': 40604, 'batch_loss/train': 0.6594318682327867} +12/28/2021 07:28:12 - INFO - codeparrot_training - Step 40605: {'lr': 4.259625550876525e-05, 'samples': 20790272, 'steps': 40605, 'batch_loss/train': 0.8575113471597433} +12/28/2021 07:28:22 - INFO - codeparrot_training - Step 40606: {'lr': 4.258745479715234e-05, 'samples': 20790784, 'steps': 40606, 'batch_loss/train': 0.7349455670919269} +12/28/2021 07:28:33 - INFO - codeparrot_training - Step 40607: {'lr': 4.257865491013335e-05, 'samples': 20791296, 'steps': 40607, 'batch_loss/train': 0.7179251709021628} +12/28/2021 07:28:45 - INFO - codeparrot_training - Step 40608: {'lr': 4.2569855847743314e-05, 'samples': 20791808, 'steps': 40608, 'batch_loss/train': 0.7144948802888393} +12/28/2021 07:28:56 - INFO - codeparrot_training - Step 40609: {'lr': 4.2561057610017215e-05, 'samples': 20792320, 'steps': 40609, 'batch_loss/train': 0.7244045790284872} +12/28/2021 07:29:06 - INFO - codeparrot_training - Step 40610: {'lr': 4.2552260196990014e-05, 'samples': 20792832, 'steps': 40610, 'batch_loss/train': 0.6355009878752753} +12/28/2021 07:29:19 - INFO - codeparrot_training - Step 40611: {'lr': 4.254346360869671e-05, 'samples': 20793344, 'steps': 40611, 'batch_loss/train': 0.7868872210383415} +12/28/2021 07:29:29 - INFO - codeparrot_training - Step 40612: {'lr': 4.2534667845172234e-05, 'samples': 20793856, 'steps': 40612, 'batch_loss/train': 0.7301114676520228} +12/28/2021 07:29:40 - INFO - codeparrot_training - Step 40613: {'lr': 4.25258729064516e-05, 'samples': 20794368, 'steps': 40613, 'batch_loss/train': 0.747345557436347} +12/28/2021 07:29:53 - INFO - codeparrot_training - Step 40614: {'lr': 4.2517078792569745e-05, 'samples': 20794880, 'steps': 40614, 'batch_loss/train': 0.7513360283337533} +12/28/2021 07:30:03 - INFO - codeparrot_training - Step 40615: {'lr': 4.250828550356173e-05, 'samples': 20795392, 'steps': 40615, 'batch_loss/train': 0.7004601592198014} +12/28/2021 07:30:14 - INFO - codeparrot_training - Step 40616: {'lr': 4.249949303946229e-05, 'samples': 20795904, 'steps': 40616, 'batch_loss/train': 0.7247890050057322} +12/28/2021 07:30:25 - INFO - codeparrot_training - Step 40617: {'lr': 4.2490701400306645e-05, 'samples': 20796416, 'steps': 40617, 'batch_loss/train': 0.774853796698153} +12/28/2021 07:30:37 - INFO - codeparrot_training - Step 40618: {'lr': 4.2481910586129545e-05, 'samples': 20796928, 'steps': 40618, 'batch_loss/train': 0.7173725226894021} +12/28/2021 07:30:47 - INFO - codeparrot_training - Step 40619: {'lr': 4.2473120596966005e-05, 'samples': 20797440, 'steps': 40619, 'batch_loss/train': 0.7012114259414375} +12/28/2021 07:30:58 - INFO - codeparrot_training - Step 40620: {'lr': 4.2464331432851e-05, 'samples': 20797952, 'steps': 40620, 'batch_loss/train': 0.705242620781064} +12/28/2021 07:31:11 - INFO - codeparrot_training - Step 40621: {'lr': 4.245554309381944e-05, 'samples': 20798464, 'steps': 40621, 'batch_loss/train': 0.7369975112378597} +12/28/2021 07:31:21 - INFO - codeparrot_training - Step 40622: {'lr': 4.2446755579906266e-05, 'samples': 20798976, 'steps': 40622, 'batch_loss/train': 0.751986900344491} +12/28/2021 07:31:32 - INFO - codeparrot_training - Step 40623: {'lr': 4.2437968891146425e-05, 'samples': 20799488, 'steps': 40623, 'batch_loss/train': 0.721095765940845} +12/28/2021 07:31:44 - INFO - codeparrot_training - Step 40624: {'lr': 4.242918302757487e-05, 'samples': 20800000, 'steps': 40624, 'batch_loss/train': 0.6530221067368984} +12/28/2021 07:31:55 - INFO - codeparrot_training - Step 40625: {'lr': 4.242039798922648e-05, 'samples': 20800512, 'steps': 40625, 'batch_loss/train': 0.7435789601877332} +12/28/2021 07:32:05 - INFO - codeparrot_training - Step 40626: {'lr': 4.2411613776136284e-05, 'samples': 20801024, 'steps': 40626, 'batch_loss/train': 0.6181810375419445} +12/28/2021 07:32:16 - INFO - codeparrot_training - Step 40627: {'lr': 4.2402830388339e-05, 'samples': 20801536, 'steps': 40627, 'batch_loss/train': 0.7402001670561731} +12/28/2021 07:32:28 - INFO - codeparrot_training - Step 40628: {'lr': 4.239404782586975e-05, 'samples': 20802048, 'steps': 40628, 'batch_loss/train': 0.6814148649573326} +12/28/2021 07:32:39 - INFO - codeparrot_training - Step 40629: {'lr': 4.2385266088763445e-05, 'samples': 20802560, 'steps': 40629, 'batch_loss/train': 0.8208508482202888} +12/28/2021 07:32:50 - INFO - codeparrot_training - Step 40630: {'lr': 4.237648517705484e-05, 'samples': 20803072, 'steps': 40630, 'batch_loss/train': 0.7534816788975149} +12/28/2021 07:33:03 - INFO - codeparrot_training - Step 40631: {'lr': 4.236770509077889e-05, 'samples': 20803584, 'steps': 40631, 'batch_loss/train': 0.6500470811733976} +12/28/2021 07:33:13 - INFO - codeparrot_training - Step 40632: {'lr': 4.235892582997067e-05, 'samples': 20804096, 'steps': 40632, 'batch_loss/train': 0.6783441814477555} +12/28/2021 07:33:24 - INFO - codeparrot_training - Step 40633: {'lr': 4.2350147394664886e-05, 'samples': 20804608, 'steps': 40633, 'batch_loss/train': 1.0305607672780752} +12/28/2021 07:33:36 - INFO - codeparrot_training - Step 40634: {'lr': 4.234136978489653e-05, 'samples': 20805120, 'steps': 40634, 'batch_loss/train': 0.7072052843868732} +12/28/2021 07:33:47 - INFO - codeparrot_training - Step 40635: {'lr': 4.233259300070047e-05, 'samples': 20805632, 'steps': 40635, 'batch_loss/train': 0.7453579287976027} +12/28/2021 07:33:57 - INFO - codeparrot_training - Step 40636: {'lr': 4.232381704211161e-05, 'samples': 20806144, 'steps': 40636, 'batch_loss/train': 0.7214720237534493} +12/28/2021 07:34:10 - INFO - codeparrot_training - Step 40637: {'lr': 4.231504190916482e-05, 'samples': 20806656, 'steps': 40637, 'batch_loss/train': 0.7345947017893195} +12/28/2021 07:34:20 - INFO - codeparrot_training - Step 40638: {'lr': 4.2306267601895014e-05, 'samples': 20807168, 'steps': 40638, 'batch_loss/train': 0.7500717549119145} +12/28/2021 07:34:31 - INFO - codeparrot_training - Step 40639: {'lr': 4.2297494120337066e-05, 'samples': 20807680, 'steps': 40639, 'batch_loss/train': 0.8484051432460546} +12/28/2021 07:34:42 - INFO - codeparrot_training - Step 40640: {'lr': 4.228872146452586e-05, 'samples': 20808192, 'steps': 40640, 'batch_loss/train': 0.7856563050299883} +12/28/2021 07:34:54 - INFO - codeparrot_training - Step 40641: {'lr': 4.227994963449633e-05, 'samples': 20808704, 'steps': 40641, 'batch_loss/train': 0.854753433726728} +12/28/2021 07:35:05 - INFO - codeparrot_training - Step 40642: {'lr': 4.227117863028318e-05, 'samples': 20809216, 'steps': 40642, 'batch_loss/train': 0.774443844333291} +12/28/2021 07:35:16 - INFO - codeparrot_training - Step 40643: {'lr': 4.226240845192147e-05, 'samples': 20809728, 'steps': 40643, 'batch_loss/train': 0.7853194219060242} +12/28/2021 07:35:28 - INFO - codeparrot_training - Step 40644: {'lr': 4.225363909944596e-05, 'samples': 20810240, 'steps': 40644, 'batch_loss/train': 0.640794061939232} +12/28/2021 07:35:39 - INFO - codeparrot_training - Step 40645: {'lr': 4.224487057289153e-05, 'samples': 20810752, 'steps': 40645, 'batch_loss/train': 0.6855551626067609} +12/28/2021 07:35:49 - INFO - codeparrot_training - Step 40646: {'lr': 4.223610287229304e-05, 'samples': 20811264, 'steps': 40646, 'batch_loss/train': 0.7159038202371448} +12/28/2021 07:36:02 - INFO - codeparrot_training - Step 40647: {'lr': 4.2227335997685384e-05, 'samples': 20811776, 'steps': 40647, 'batch_loss/train': 0.7006733543239534} +12/28/2021 07:36:13 - INFO - codeparrot_training - Step 40648: {'lr': 4.221856994910336e-05, 'samples': 20812288, 'steps': 40648, 'batch_loss/train': 0.701503056101501} +12/28/2021 07:36:23 - INFO - codeparrot_training - Step 40649: {'lr': 4.220980472658187e-05, 'samples': 20812800, 'steps': 40649, 'batch_loss/train': 0.7750974022783339} +12/28/2021 07:36:34 - INFO - codeparrot_training - Step 40650: {'lr': 4.22010403301557e-05, 'samples': 20813312, 'steps': 40650, 'batch_loss/train': 0.6398797721485607} +12/28/2021 07:36:46 - INFO - codeparrot_training - Step 40651: {'lr': 4.219227675985976e-05, 'samples': 20813824, 'steps': 40651, 'batch_loss/train': 0.7065785503946245} +12/28/2021 07:36:57 - INFO - codeparrot_training - Step 40652: {'lr': 4.2183514015728926e-05, 'samples': 20814336, 'steps': 40652, 'batch_loss/train': 0.7394550545141101} +12/28/2021 07:37:07 - INFO - codeparrot_training - Step 40653: {'lr': 4.217475209779784e-05, 'samples': 20814848, 'steps': 40653, 'batch_loss/train': 0.7357168922899291} +12/28/2021 07:37:19 - INFO - codeparrot_training - Step 40654: {'lr': 4.216599100610152e-05, 'samples': 20815360, 'steps': 40654, 'batch_loss/train': 0.7670901552774012} +12/28/2021 07:37:30 - INFO - codeparrot_training - Step 40655: {'lr': 4.215723074067482e-05, 'samples': 20815872, 'steps': 40655, 'batch_loss/train': 0.8073156904429197} +12/28/2021 07:37:41 - INFO - codeparrot_training - Step 40656: {'lr': 4.214847130155244e-05, 'samples': 20816384, 'steps': 40656, 'batch_loss/train': 0.7686747601255774} +12/28/2021 07:37:53 - INFO - codeparrot_training - Step 40657: {'lr': 4.2139712688769176e-05, 'samples': 20816896, 'steps': 40657, 'batch_loss/train': 0.700076679000631} +12/28/2021 07:38:04 - INFO - codeparrot_training - Step 40658: {'lr': 4.213095490236008e-05, 'samples': 20817408, 'steps': 40658, 'batch_loss/train': 0.7687384532764554} +12/28/2021 07:38:14 - INFO - codeparrot_training - Step 40659: {'lr': 4.212219794235972e-05, 'samples': 20817920, 'steps': 40659, 'batch_loss/train': 0.7548835035413504} +12/28/2021 07:38:25 - INFO - codeparrot_training - Step 40660: {'lr': 4.211344180880297e-05, 'samples': 20818432, 'steps': 40660, 'batch_loss/train': 0.69797428464517} +12/28/2021 07:38:38 - INFO - codeparrot_training - Step 40661: {'lr': 4.2104686501724806e-05, 'samples': 20818944, 'steps': 40661, 'batch_loss/train': 0.7092579645104706} +12/28/2021 07:38:48 - INFO - codeparrot_training - Step 40662: {'lr': 4.209593202115983e-05, 'samples': 20819456, 'steps': 40662, 'batch_loss/train': 0.7524372008629143} +12/28/2021 07:38:59 - INFO - codeparrot_training - Step 40663: {'lr': 4.2087178367142933e-05, 'samples': 20819968, 'steps': 40663, 'batch_loss/train': 0.9000622471794486} +12/28/2021 07:39:11 - INFO - codeparrot_training - Step 40664: {'lr': 4.207842553970892e-05, 'samples': 20820480, 'steps': 40664, 'batch_loss/train': 0.7292219758965075} +12/28/2021 07:39:22 - INFO - codeparrot_training - Step 40665: {'lr': 4.2069673538892574e-05, 'samples': 20820992, 'steps': 40665, 'batch_loss/train': 0.9145163646899164} +12/28/2021 07:39:33 - INFO - codeparrot_training - Step 40666: {'lr': 4.206092236472869e-05, 'samples': 20821504, 'steps': 40666, 'batch_loss/train': 0.757245680782944} +12/28/2021 07:39:45 - INFO - codeparrot_training - Step 40667: {'lr': 4.2052172017252144e-05, 'samples': 20822016, 'steps': 40667, 'batch_loss/train': 0.783816454000771} +12/28/2021 07:39:55 - INFO - codeparrot_training - Step 40668: {'lr': 4.204342249649748e-05, 'samples': 20822528, 'steps': 40668, 'batch_loss/train': 0.7671822467818856} +12/28/2021 07:40:06 - INFO - codeparrot_training - Step 40669: {'lr': 4.2034673802499755e-05, 'samples': 20823040, 'steps': 40669, 'batch_loss/train': 0.7763790171593428} +12/28/2021 07:40:18 - INFO - codeparrot_training - Step 40670: {'lr': 4.202592593529367e-05, 'samples': 20823552, 'steps': 40670, 'batch_loss/train': 0.7258293356280774} +12/28/2021 07:40:29 - INFO - codeparrot_training - Step 40671: {'lr': 4.2017178894913885e-05, 'samples': 20824064, 'steps': 40671, 'batch_loss/train': 0.8097266294062138} +12/28/2021 07:40:40 - INFO - codeparrot_training - Step 40672: {'lr': 4.200843268139537e-05, 'samples': 20824576, 'steps': 40672, 'batch_loss/train': 0.6864114096388221} +12/28/2021 07:40:50 - INFO - codeparrot_training - Step 40673: {'lr': 4.199968729477271e-05, 'samples': 20825088, 'steps': 40673, 'batch_loss/train': 0.7614730102941394} +12/28/2021 07:41:02 - INFO - codeparrot_training - Step 40674: {'lr': 4.199094273508078e-05, 'samples': 20825600, 'steps': 40674, 'batch_loss/train': 0.7720507844351232} +12/28/2021 07:41:13 - INFO - codeparrot_training - Step 40675: {'lr': 4.198219900235431e-05, 'samples': 20826112, 'steps': 40675, 'batch_loss/train': 0.7784947361797094} +12/28/2021 07:41:24 - INFO - codeparrot_training - Step 40676: {'lr': 4.197345609662809e-05, 'samples': 20826624, 'steps': 40676, 'batch_loss/train': 0.79681012686342} +12/28/2021 07:41:36 - INFO - codeparrot_training - Step 40677: {'lr': 4.196471401793683e-05, 'samples': 20827136, 'steps': 40677, 'batch_loss/train': 0.6189190764562227} +12/28/2021 07:41:47 - INFO - codeparrot_training - Step 40678: {'lr': 4.195597276631541e-05, 'samples': 20827648, 'steps': 40678, 'batch_loss/train': 0.833181472029537} +12/28/2021 07:41:58 - INFO - codeparrot_training - Step 40679: {'lr': 4.194723234179837e-05, 'samples': 20828160, 'steps': 40679, 'batch_loss/train': 0.8128392724320292} +12/28/2021 07:42:10 - INFO - codeparrot_training - Step 40680: {'lr': 4.19384927444206e-05, 'samples': 20828672, 'steps': 40680, 'batch_loss/train': 0.6114092651987448} +12/28/2021 07:42:20 - INFO - codeparrot_training - Step 40681: {'lr': 4.1929753974216914e-05, 'samples': 20829184, 'steps': 40681, 'batch_loss/train': 0.8030234957113862} +12/28/2021 07:42:31 - INFO - codeparrot_training - Step 40682: {'lr': 4.192101603122189e-05, 'samples': 20829696, 'steps': 40682, 'batch_loss/train': 0.8277191873639822} +12/28/2021 07:42:43 - INFO - codeparrot_training - Step 40683: {'lr': 4.1912278915470284e-05, 'samples': 20830208, 'steps': 40683, 'batch_loss/train': 0.655609019100666} +12/28/2021 07:42:54 - INFO - codeparrot_training - Step 40684: {'lr': 4.1903542626997e-05, 'samples': 20830720, 'steps': 40684, 'batch_loss/train': 0.7165168887004256} +12/28/2021 07:43:04 - INFO - codeparrot_training - Step 40685: {'lr': 4.189480716583657e-05, 'samples': 20831232, 'steps': 40685, 'batch_loss/train': 0.7616796847432852} +12/28/2021 07:43:15 - INFO - codeparrot_training - Step 40686: {'lr': 4.188607253202378e-05, 'samples': 20831744, 'steps': 40686, 'batch_loss/train': 0.6666535176336765} +12/28/2021 07:43:28 - INFO - codeparrot_training - Step 40687: {'lr': 4.187733872559352e-05, 'samples': 20832256, 'steps': 40687, 'batch_loss/train': 0.7600087840110064} +12/28/2021 07:43:38 - INFO - codeparrot_training - Step 40688: {'lr': 4.18686057465803e-05, 'samples': 20832768, 'steps': 40688, 'batch_loss/train': 0.6358608029549941} +12/28/2021 07:43:49 - INFO - codeparrot_training - Step 40689: {'lr': 4.1859873595018916e-05, 'samples': 20833280, 'steps': 40689, 'batch_loss/train': 0.8583040460944176} +12/28/2021 07:44:01 - INFO - codeparrot_training - Step 40690: {'lr': 4.185114227094408e-05, 'samples': 20833792, 'steps': 40690, 'batch_loss/train': 0.7759388415142894} +12/28/2021 07:44:12 - INFO - codeparrot_training - Step 40691: {'lr': 4.1842411774390495e-05, 'samples': 20834304, 'steps': 40691, 'batch_loss/train': 0.8128209235146642} +12/28/2021 07:44:22 - INFO - codeparrot_training - Step 40692: {'lr': 4.18336821053929e-05, 'samples': 20834816, 'steps': 40692, 'batch_loss/train': 0.7875097296200693} +12/28/2021 07:44:34 - INFO - codeparrot_training - Step 40693: {'lr': 4.1824953263986056e-05, 'samples': 20835328, 'steps': 40693, 'batch_loss/train': 0.7186686978675425} +12/28/2021 07:44:45 - INFO - codeparrot_training - Step 40694: {'lr': 4.1816225250204455e-05, 'samples': 20835840, 'steps': 40694, 'batch_loss/train': 0.7415815033018589} +12/28/2021 07:44:56 - INFO - codeparrot_training - Step 40695: {'lr': 4.1807498064082986e-05, 'samples': 20836352, 'steps': 40695, 'batch_loss/train': 0.6528902454301715} +12/28/2021 07:45:08 - INFO - codeparrot_training - Step 40696: {'lr': 4.1798771705656377e-05, 'samples': 20836864, 'steps': 40696, 'batch_loss/train': 0.7602603398263454} +12/28/2021 07:45:19 - INFO - codeparrot_training - Step 40697: {'lr': 4.179004617495913e-05, 'samples': 20837376, 'steps': 40697, 'batch_loss/train': 0.7167619881220162} +12/28/2021 07:45:30 - INFO - codeparrot_training - Step 40698: {'lr': 4.178132147202612e-05, 'samples': 20837888, 'steps': 40698, 'batch_loss/train': 0.7573718414641917} +12/28/2021 07:45:40 - INFO - codeparrot_training - Step 40699: {'lr': 4.177259759689192e-05, 'samples': 20838400, 'steps': 40699, 'batch_loss/train': 0.664089894387871} +12/28/2021 07:45:52 - INFO - codeparrot_training - Step 40700: {'lr': 4.176387454959127e-05, 'samples': 20838912, 'steps': 40700, 'batch_loss/train': 0.8720166247803718} +12/28/2021 07:46:03 - INFO - codeparrot_training - Step 40701: {'lr': 4.175515233015878e-05, 'samples': 20839424, 'steps': 40701, 'batch_loss/train': 0.8457121839746833} +12/28/2021 07:46:14 - INFO - codeparrot_training - Step 40702: {'lr': 4.1746430938629206e-05, 'samples': 20839936, 'steps': 40702, 'batch_loss/train': 0.877267955802381} +12/28/2021 07:46:26 - INFO - codeparrot_training - Step 40703: {'lr': 4.173771037503718e-05, 'samples': 20840448, 'steps': 40703, 'batch_loss/train': 1.4148209234699607} +12/28/2021 07:46:37 - INFO - codeparrot_training - Step 40704: {'lr': 4.1728990639417405e-05, 'samples': 20840960, 'steps': 40704, 'batch_loss/train': 0.798246799968183} +12/28/2021 07:46:47 - INFO - codeparrot_training - Step 40705: {'lr': 4.1720271731804484e-05, 'samples': 20841472, 'steps': 40705, 'batch_loss/train': 0.8080144841223955} +12/28/2021 07:47:00 - INFO - codeparrot_training - Step 40706: {'lr': 4.1711553652233144e-05, 'samples': 20841984, 'steps': 40706, 'batch_loss/train': 0.642435138579458} +12/28/2021 07:47:11 - INFO - codeparrot_training - Step 40707: {'lr': 4.170283640073808e-05, 'samples': 20842496, 'steps': 40707, 'batch_loss/train': 0.7317743799649179} +12/28/2021 07:47:22 - INFO - codeparrot_training - Step 40708: {'lr': 4.1694119977353753e-05, 'samples': 20843008, 'steps': 40708, 'batch_loss/train': 0.7794406842440367} +12/28/2021 07:47:32 - INFO - codeparrot_training - Step 40709: {'lr': 4.168540438211502e-05, 'samples': 20843520, 'steps': 40709, 'batch_loss/train': 0.7453263686038554} +12/28/2021 07:47:44 - INFO - codeparrot_training - Step 40710: {'lr': 4.1676689615056525e-05, 'samples': 20844032, 'steps': 40710, 'batch_loss/train': 0.5665198883507401} +12/28/2021 07:47:55 - INFO - codeparrot_training - Step 40711: {'lr': 4.1667975676212775e-05, 'samples': 20844544, 'steps': 40711, 'batch_loss/train': 0.7288318555802107} +12/28/2021 07:48:06 - INFO - codeparrot_training - Step 40712: {'lr': 4.1659262565618447e-05, 'samples': 20845056, 'steps': 40712, 'batch_loss/train': 0.65050786617212} +12/28/2021 07:48:18 - INFO - codeparrot_training - Step 40713: {'lr': 4.165055028330836e-05, 'samples': 20845568, 'steps': 40713, 'batch_loss/train': 0.65991983236745} +12/28/2021 07:48:28 - INFO - codeparrot_training - Step 40714: {'lr': 4.164183882931694e-05, 'samples': 20846080, 'steps': 40714, 'batch_loss/train': 0.7103259917348623} +12/28/2021 07:48:39 - INFO - codeparrot_training - Step 40715: {'lr': 4.1633128203678874e-05, 'samples': 20846592, 'steps': 40715, 'batch_loss/train': 0.6915511819534004} +12/28/2021 07:48:52 - INFO - codeparrot_training - Step 40716: {'lr': 4.1624418406428834e-05, 'samples': 20847104, 'steps': 40716, 'batch_loss/train': 0.733995626680553} +12/28/2021 07:49:02 - INFO - codeparrot_training - Step 40717: {'lr': 4.161570943760143e-05, 'samples': 20847616, 'steps': 40717, 'batch_loss/train': 0.7224213387817144} +12/28/2021 07:49:13 - INFO - codeparrot_training - Step 40718: {'lr': 4.1607001297231274e-05, 'samples': 20848128, 'steps': 40718, 'batch_loss/train': 0.7033664523623884} +12/28/2021 07:49:24 - INFO - codeparrot_training - Step 40719: {'lr': 4.159829398535306e-05, 'samples': 20848640, 'steps': 40719, 'batch_loss/train': 0.6937895249575377} +12/28/2021 07:49:36 - INFO - codeparrot_training - Step 40720: {'lr': 4.158958750200123e-05, 'samples': 20849152, 'steps': 40720, 'batch_loss/train': 0.7203337200917304} +12/28/2021 07:49:47 - INFO - codeparrot_training - Step 40721: {'lr': 4.158088184721054e-05, 'samples': 20849664, 'steps': 40721, 'batch_loss/train': 0.7367396047338843} +12/28/2021 07:49:57 - INFO - codeparrot_training - Step 40722: {'lr': 4.157217702101565e-05, 'samples': 20850176, 'steps': 40722, 'batch_loss/train': 0.7417818706016988} +12/28/2021 07:50:09 - INFO - codeparrot_training - Step 40723: {'lr': 4.1563473023450956e-05, 'samples': 20850688, 'steps': 40723, 'batch_loss/train': 0.6719220378436148} +12/28/2021 07:50:20 - INFO - codeparrot_training - Step 40724: {'lr': 4.155476985455131e-05, 'samples': 20851200, 'steps': 40724, 'batch_loss/train': 0.7026813069824129} +12/28/2021 07:50:31 - INFO - codeparrot_training - Step 40725: {'lr': 4.1546067514351164e-05, 'samples': 20851712, 'steps': 40725, 'batch_loss/train': 0.8174453256651759} +12/28/2021 07:50:43 - INFO - codeparrot_training - Step 40726: {'lr': 4.15373660028851e-05, 'samples': 20852224, 'steps': 40726, 'batch_loss/train': 0.6363631887361407} +12/28/2021 07:50:54 - INFO - codeparrot_training - Step 40727: {'lr': 4.1528665320187784e-05, 'samples': 20852736, 'steps': 40727, 'batch_loss/train': 0.6799150351434946} +12/28/2021 07:51:04 - INFO - codeparrot_training - Step 40728: {'lr': 4.151996546629377e-05, 'samples': 20853248, 'steps': 40728, 'batch_loss/train': 0.7785804076120257} +12/28/2021 07:51:17 - INFO - codeparrot_training - Step 40729: {'lr': 4.151126644123765e-05, 'samples': 20853760, 'steps': 40729, 'batch_loss/train': 0.601430131791858} +12/28/2021 07:51:27 - INFO - codeparrot_training - Step 40730: {'lr': 4.150256824505402e-05, 'samples': 20854272, 'steps': 40730, 'batch_loss/train': 0.7245340086519718} +12/28/2021 07:51:38 - INFO - codeparrot_training - Step 40731: {'lr': 4.149387087777745e-05, 'samples': 20854784, 'steps': 40731, 'batch_loss/train': 0.7662334060296416} +12/28/2021 07:51:49 - INFO - codeparrot_training - Step 40732: {'lr': 4.1485174339442514e-05, 'samples': 20855296, 'steps': 40732, 'batch_loss/train': 0.7629100979538634} +12/28/2021 07:52:01 - INFO - codeparrot_training - Step 40733: {'lr': 4.147647863008386e-05, 'samples': 20855808, 'steps': 40733, 'batch_loss/train': 0.6488250037655234} +12/28/2021 07:52:12 - INFO - codeparrot_training - Step 40734: {'lr': 4.146778374973587e-05, 'samples': 20856320, 'steps': 40734, 'batch_loss/train': 0.7172049188520759} +12/28/2021 07:52:23 - INFO - codeparrot_training - Step 40735: {'lr': 4.145908969843329e-05, 'samples': 20856832, 'steps': 40735, 'batch_loss/train': 0.7835246324539185} +12/28/2021 07:52:35 - INFO - codeparrot_training - Step 40736: {'lr': 4.1450396476210686e-05, 'samples': 20857344, 'steps': 40736, 'batch_loss/train': 0.7479403633624315} +12/28/2021 07:52:45 - INFO - codeparrot_training - Step 40737: {'lr': 4.1441704083102495e-05, 'samples': 20857856, 'steps': 40737, 'batch_loss/train': 0.5921849808655679} +12/28/2021 07:52:56 - INFO - codeparrot_training - Step 40738: {'lr': 4.14330125191433e-05, 'samples': 20858368, 'steps': 40738, 'batch_loss/train': 0.6953566945157945} +12/28/2021 07:53:08 - INFO - codeparrot_training - Step 40739: {'lr': 4.142432178436781e-05, 'samples': 20858880, 'steps': 40739, 'batch_loss/train': 0.7310928259976208} +12/28/2021 07:53:19 - INFO - codeparrot_training - Step 40740: {'lr': 4.141563187881039e-05, 'samples': 20859392, 'steps': 40740, 'batch_loss/train': 0.7639391941484064} +12/28/2021 07:53:29 - INFO - codeparrot_training - Step 40741: {'lr': 4.140694280250568e-05, 'samples': 20859904, 'steps': 40741, 'batch_loss/train': 0.6032217959873378} +12/28/2021 07:53:40 - INFO - codeparrot_training - Step 40742: {'lr': 4.139825455548818e-05, 'samples': 20860416, 'steps': 40742, 'batch_loss/train': 0.7010692185722291} +12/28/2021 07:53:52 - INFO - codeparrot_training - Step 40743: {'lr': 4.1389567137792464e-05, 'samples': 20860928, 'steps': 40743, 'batch_loss/train': 0.7107723108492792} +12/28/2021 07:54:03 - INFO - codeparrot_training - Step 40744: {'lr': 4.138088054945305e-05, 'samples': 20861440, 'steps': 40744, 'batch_loss/train': 0.717473323456943} +12/28/2021 07:54:13 - INFO - codeparrot_training - Step 40745: {'lr': 4.1372194790504496e-05, 'samples': 20861952, 'steps': 40745, 'batch_loss/train': 0.8085134685970843} +12/28/2021 07:54:26 - INFO - codeparrot_training - Step 40746: {'lr': 4.136350986098133e-05, 'samples': 20862464, 'steps': 40746, 'batch_loss/train': 0.6964029748924077} +12/28/2021 07:54:37 - INFO - codeparrot_training - Step 40747: {'lr': 4.135482576091806e-05, 'samples': 20862976, 'steps': 40747, 'batch_loss/train': 0.7161815314320847} +12/28/2021 07:54:47 - INFO - codeparrot_training - Step 40748: {'lr': 4.1346142490349266e-05, 'samples': 20863488, 'steps': 40748, 'batch_loss/train': 0.7587327612563968} +12/28/2021 07:55:00 - INFO - codeparrot_training - Step 40749: {'lr': 4.133746004930933e-05, 'samples': 20864000, 'steps': 40749, 'batch_loss/train': 0.7076088883914053} +12/28/2021 07:55:10 - INFO - codeparrot_training - Step 40750: {'lr': 4.132877843783292e-05, 'samples': 20864512, 'steps': 40750, 'batch_loss/train': 0.6965926848351955} +12/28/2021 07:55:21 - INFO - codeparrot_training - Step 40751: {'lr': 4.132009765595454e-05, 'samples': 20865024, 'steps': 40751, 'batch_loss/train': 0.7552621948998421} +12/28/2021 07:55:31 - INFO - codeparrot_training - Step 40752: {'lr': 4.131141770370861e-05, 'samples': 20865536, 'steps': 40752, 'batch_loss/train': 0.7737587029114366} +12/28/2021 07:55:44 - INFO - codeparrot_training - Step 40753: {'lr': 4.130273858112968e-05, 'samples': 20866048, 'steps': 40753, 'batch_loss/train': 0.7317665731534362} +12/28/2021 07:55:54 - INFO - codeparrot_training - Step 40754: {'lr': 4.129406028825228e-05, 'samples': 20866560, 'steps': 40754, 'batch_loss/train': 0.7342617083340883} +12/28/2021 07:56:05 - INFO - codeparrot_training - Step 40755: {'lr': 4.128538282511088e-05, 'samples': 20867072, 'steps': 40755, 'batch_loss/train': 0.8336591525003314} +12/28/2021 07:56:18 - INFO - codeparrot_training - Step 40756: {'lr': 4.127670619173998e-05, 'samples': 20867584, 'steps': 40756, 'batch_loss/train': 0.8310557792428881} +12/28/2021 07:56:28 - INFO - codeparrot_training - Step 40757: {'lr': 4.1268030388174086e-05, 'samples': 20868096, 'steps': 40757, 'batch_loss/train': 0.6094690192257985} +12/28/2021 07:56:39 - INFO - codeparrot_training - Step 40758: {'lr': 4.1259355414447695e-05, 'samples': 20868608, 'steps': 40758, 'batch_loss/train': 0.8312271465547383} +12/28/2021 07:56:52 - INFO - codeparrot_training - Step 40759: {'lr': 4.125068127059528e-05, 'samples': 20869120, 'steps': 40759, 'batch_loss/train': 0.6718096631811932} +12/28/2021 07:57:02 - INFO - codeparrot_training - Step 40760: {'lr': 4.124200795665134e-05, 'samples': 20869632, 'steps': 40760, 'batch_loss/train': 0.7605562536045909} +12/28/2021 07:57:13 - INFO - codeparrot_training - Step 40761: {'lr': 4.1233335472650374e-05, 'samples': 20870144, 'steps': 40761, 'batch_loss/train': 0.8100473936647177} +12/28/2021 07:57:23 - INFO - codeparrot_training - Step 40762: {'lr': 4.122466381862688e-05, 'samples': 20870656, 'steps': 40762, 'batch_loss/train': 0.7434199675917625} +12/28/2021 07:57:36 - INFO - codeparrot_training - Step 40763: {'lr': 4.121599299461523e-05, 'samples': 20871168, 'steps': 40763, 'batch_loss/train': 0.7912529641762376} +12/28/2021 07:57:47 - INFO - codeparrot_training - Step 40764: {'lr': 4.1207323000649915e-05, 'samples': 20871680, 'steps': 40764, 'batch_loss/train': 0.7944597192108631} +12/28/2021 07:57:58 - INFO - codeparrot_training - Step 40765: {'lr': 4.1198653836765546e-05, 'samples': 20872192, 'steps': 40765, 'batch_loss/train': 0.6240514607925434} +12/28/2021 07:58:10 - INFO - codeparrot_training - Step 40766: {'lr': 4.118998550299644e-05, 'samples': 20872704, 'steps': 40766, 'batch_loss/train': 0.7475660988129675} +12/28/2021 07:58:21 - INFO - codeparrot_training - Step 40767: {'lr': 4.1181317999377106e-05, 'samples': 20873216, 'steps': 40767, 'batch_loss/train': 0.8647178076207638} +12/28/2021 07:58:31 - INFO - codeparrot_training - Step 40768: {'lr': 4.1172651325942004e-05, 'samples': 20873728, 'steps': 40768, 'batch_loss/train': 0.589450455037877} +12/28/2021 07:58:42 - INFO - codeparrot_training - Step 40769: {'lr': 4.116398548272557e-05, 'samples': 20874240, 'steps': 40769, 'batch_loss/train': 0.7925018453970551} +12/28/2021 07:58:54 - INFO - codeparrot_training - Step 40770: {'lr': 4.115532046976231e-05, 'samples': 20874752, 'steps': 40770, 'batch_loss/train': 0.8813148494809866} +12/28/2021 07:59:05 - INFO - codeparrot_training - Step 40771: {'lr': 4.114665628708661e-05, 'samples': 20875264, 'steps': 40771, 'batch_loss/train': 0.7590142348781228} +12/28/2021 07:59:15 - INFO - codeparrot_training - Step 40772: {'lr': 4.113799293473297e-05, 'samples': 20875776, 'steps': 40772, 'batch_loss/train': 0.7334465449675918} +12/28/2021 07:59:28 - INFO - codeparrot_training - Step 40773: {'lr': 4.112933041273581e-05, 'samples': 20876288, 'steps': 40773, 'batch_loss/train': 0.7317959188949317} +12/28/2021 07:59:39 - INFO - codeparrot_training - Step 40774: {'lr': 4.11206687211296e-05, 'samples': 20876800, 'steps': 40774, 'batch_loss/train': 0.7937497952952981} +12/28/2021 07:59:50 - INFO - codeparrot_training - Step 40775: {'lr': 4.111200785994865e-05, 'samples': 20877312, 'steps': 40775, 'batch_loss/train': 0.6856520976871252} +12/28/2021 08:00:02 - INFO - codeparrot_training - Step 40776: {'lr': 4.110334782922751e-05, 'samples': 20877824, 'steps': 40776, 'batch_loss/train': 0.6960872863419354} +12/28/2021 08:00:12 - INFO - codeparrot_training - Step 40777: {'lr': 4.1094688629000675e-05, 'samples': 20878336, 'steps': 40777, 'batch_loss/train': 0.8210043804720044} +12/28/2021 08:00:23 - INFO - codeparrot_training - Step 40778: {'lr': 4.108603025930238e-05, 'samples': 20878848, 'steps': 40778, 'batch_loss/train': 0.7882100150454789} +12/28/2021 08:00:35 - INFO - codeparrot_training - Step 40779: {'lr': 4.1077372720167176e-05, 'samples': 20879360, 'steps': 40779, 'batch_loss/train': 0.7245460618287325} +12/28/2021 08:00:46 - INFO - codeparrot_training - Step 40780: {'lr': 4.1068716011629457e-05, 'samples': 20879872, 'steps': 40780, 'batch_loss/train': 0.638135762361344} +12/28/2021 08:00:56 - INFO - codeparrot_training - Step 40781: {'lr': 4.1060060133723626e-05, 'samples': 20880384, 'steps': 40781, 'batch_loss/train': 0.5251462738378905} +12/28/2021 08:01:07 - INFO - codeparrot_training - Step 40782: {'lr': 4.105140508648408e-05, 'samples': 20880896, 'steps': 40782, 'batch_loss/train': 0.6975575862452388} +12/28/2021 08:01:19 - INFO - codeparrot_training - Step 40783: {'lr': 4.104275086994527e-05, 'samples': 20881408, 'steps': 40783, 'batch_loss/train': 0.6967811306240037} +12/28/2021 08:01:30 - INFO - codeparrot_training - Step 40784: {'lr': 4.103409748414161e-05, 'samples': 20881920, 'steps': 40784, 'batch_loss/train': 0.8066884269937873} +12/28/2021 08:01:40 - INFO - codeparrot_training - Step 40785: {'lr': 4.102544492910743e-05, 'samples': 20882432, 'steps': 40785, 'batch_loss/train': 0.7094352636486292} +12/28/2021 08:01:53 - INFO - codeparrot_training - Step 40786: {'lr': 4.1016793204877196e-05, 'samples': 20882944, 'steps': 40786, 'batch_loss/train': 0.7244310521055013} +12/28/2021 08:02:04 - INFO - codeparrot_training - Step 40787: {'lr': 4.100814231148528e-05, 'samples': 20883456, 'steps': 40787, 'batch_loss/train': 0.6767775388434529} +12/28/2021 08:02:14 - INFO - codeparrot_training - Step 40788: {'lr': 4.0999492248966155e-05, 'samples': 20883968, 'steps': 40788, 'batch_loss/train': 0.7895207339897752} +12/28/2021 08:02:26 - INFO - codeparrot_training - Step 40789: {'lr': 4.0990843017354063e-05, 'samples': 20884480, 'steps': 40789, 'batch_loss/train': 0.7867800313979387} +12/28/2021 08:02:37 - INFO - codeparrot_training - Step 40790: {'lr': 4.098219461668337e-05, 'samples': 20884992, 'steps': 40790, 'batch_loss/train': 0.7835348092485219} +12/28/2021 08:02:48 - INFO - codeparrot_training - Step 40791: {'lr': 4.097354704698872e-05, 'samples': 20885504, 'steps': 40791, 'batch_loss/train': 0.8889917978085577} +12/28/2021 08:03:01 - INFO - codeparrot_training - Step 40792: {'lr': 4.0964900308304216e-05, 'samples': 20886016, 'steps': 40792, 'batch_loss/train': 0.6146574900485575} +12/28/2021 08:03:11 - INFO - codeparrot_training - Step 40793: {'lr': 4.095625440066436e-05, 'samples': 20886528, 'steps': 40793, 'batch_loss/train': 0.7280031447298825} +12/28/2021 08:03:22 - INFO - codeparrot_training - Step 40794: {'lr': 4.0947609324103515e-05, 'samples': 20887040, 'steps': 40794, 'batch_loss/train': 0.7498950269073248} +12/28/2021 08:03:33 - INFO - codeparrot_training - Step 40795: {'lr': 4.093896507865602e-05, 'samples': 20887552, 'steps': 40795, 'batch_loss/train': 0.6845525964163244} +12/28/2021 08:03:45 - INFO - codeparrot_training - Step 40796: {'lr': 4.0930321664356287e-05, 'samples': 20888064, 'steps': 40796, 'batch_loss/train': 0.8108875080943108} +12/28/2021 08:03:56 - INFO - codeparrot_training - Step 40797: {'lr': 4.0921679081238646e-05, 'samples': 20888576, 'steps': 40797, 'batch_loss/train': 0.6928751524537802} +12/28/2021 08:04:06 - INFO - codeparrot_training - Step 40798: {'lr': 4.091303732933746e-05, 'samples': 20889088, 'steps': 40798, 'batch_loss/train': 0.7400817507877946} +12/28/2021 08:04:19 - INFO - codeparrot_training - Step 40799: {'lr': 4.0904396408687065e-05, 'samples': 20889600, 'steps': 40799, 'batch_loss/train': 1.0234768162481487} +12/28/2021 08:04:29 - INFO - codeparrot_training - Step 40800: {'lr': 4.0895756319321936e-05, 'samples': 20890112, 'steps': 40800, 'batch_loss/train': 0.9282355844043195} +12/28/2021 08:04:40 - INFO - codeparrot_training - Step 40801: {'lr': 4.088711706127618e-05, 'samples': 20890624, 'steps': 40801, 'batch_loss/train': 0.7572405622922815} +12/28/2021 08:04:51 - INFO - codeparrot_training - Step 40802: {'lr': 4.0878478634584356e-05, 'samples': 20891136, 'steps': 40802, 'batch_loss/train': 0.7469926429912448} +12/28/2021 08:05:03 - INFO - codeparrot_training - Step 40803: {'lr': 4.086984103928079e-05, 'samples': 20891648, 'steps': 40803, 'batch_loss/train': 0.5036926487227902} +12/28/2021 08:05:14 - INFO - codeparrot_training - Step 40804: {'lr': 4.0861204275399744e-05, 'samples': 20892160, 'steps': 40804, 'batch_loss/train': 0.6855258527211845} +12/28/2021 08:05:25 - INFO - codeparrot_training - Step 40805: {'lr': 4.085256834297554e-05, 'samples': 20892672, 'steps': 40805, 'batch_loss/train': 0.7988513829186559} +12/28/2021 08:05:37 - INFO - codeparrot_training - Step 40806: {'lr': 4.084393324204258e-05, 'samples': 20893184, 'steps': 40806, 'batch_loss/train': 0.7118301806040108} +12/28/2021 08:05:47 - INFO - codeparrot_training - Step 40807: {'lr': 4.083529897263516e-05, 'samples': 20893696, 'steps': 40807, 'batch_loss/train': 0.7569132214412093} +12/28/2021 08:05:58 - INFO - codeparrot_training - Step 40808: {'lr': 4.082666553478762e-05, 'samples': 20894208, 'steps': 40808, 'batch_loss/train': 0.7219583834521472} +12/28/2021 08:06:10 - INFO - codeparrot_training - Step 40809: {'lr': 4.081803292853428e-05, 'samples': 20894720, 'steps': 40809, 'batch_loss/train': 0.6599214193411171} +12/28/2021 08:06:21 - INFO - codeparrot_training - Step 40810: {'lr': 4.0809401153909436e-05, 'samples': 20895232, 'steps': 40810, 'batch_loss/train': 0.6745909973978996} +12/28/2021 08:06:31 - INFO - codeparrot_training - Step 40811: {'lr': 4.080077021094744e-05, 'samples': 20895744, 'steps': 40811, 'batch_loss/train': 0.706435113446787} +12/28/2021 08:06:46 - INFO - codeparrot_training - Step 40812: {'lr': 4.07921400996826e-05, 'samples': 20896256, 'steps': 40812, 'batch_loss/train': 0.7253389987163246} +12/28/2021 08:06:56 - INFO - codeparrot_training - Step 40813: {'lr': 4.078351082014919e-05, 'samples': 20896768, 'steps': 40813, 'batch_loss/train': 0.7810253719799221} +12/28/2021 08:07:07 - INFO - codeparrot_training - Step 40814: {'lr': 4.0774882372381636e-05, 'samples': 20897280, 'steps': 40814, 'batch_loss/train': 0.7343464647419751} +12/28/2021 08:07:18 - INFO - codeparrot_training - Step 40815: {'lr': 4.076625475641407e-05, 'samples': 20897792, 'steps': 40815, 'batch_loss/train': 0.7067703192587942} +12/28/2021 08:07:30 - INFO - codeparrot_training - Step 40816: {'lr': 4.07576279722808e-05, 'samples': 20898304, 'steps': 40816, 'batch_loss/train': 0.7177039822563529} +12/28/2021 08:07:40 - INFO - codeparrot_training - Step 40817: {'lr': 4.0749002020016355e-05, 'samples': 20898816, 'steps': 40817, 'batch_loss/train': 0.6973445336334407} +12/28/2021 08:07:51 - INFO - codeparrot_training - Step 40818: {'lr': 4.074037689965476e-05, 'samples': 20899328, 'steps': 40818, 'batch_loss/train': 0.7521849544718862} +12/28/2021 08:08:05 - INFO - codeparrot_training - Step 40819: {'lr': 4.073175261123038e-05, 'samples': 20899840, 'steps': 40819, 'batch_loss/train': 0.6470858384855092} +12/28/2021 08:08:16 - INFO - codeparrot_training - Step 40820: {'lr': 4.072312915477766e-05, 'samples': 20900352, 'steps': 40820, 'batch_loss/train': 0.6911444240249693} +12/28/2021 08:08:26 - INFO - codeparrot_training - Step 40821: {'lr': 4.0714506530330684e-05, 'samples': 20900864, 'steps': 40821, 'batch_loss/train': 0.6977413343265653} +12/28/2021 08:08:39 - INFO - codeparrot_training - Step 40822: {'lr': 4.07058847379238e-05, 'samples': 20901376, 'steps': 40822, 'batch_loss/train': 0.692427780944854} +12/28/2021 08:08:49 - INFO - codeparrot_training - Step 40823: {'lr': 4.069726377759131e-05, 'samples': 20901888, 'steps': 40823, 'batch_loss/train': 0.7235128059983253} +12/28/2021 08:09:00 - INFO - codeparrot_training - Step 40824: {'lr': 4.0688643649367475e-05, 'samples': 20902400, 'steps': 40824, 'batch_loss/train': 0.6572346081957221} +12/28/2021 08:09:11 - INFO - codeparrot_training - Step 40825: {'lr': 4.068002435328655e-05, 'samples': 20902912, 'steps': 40825, 'batch_loss/train': 0.544927520386409} +12/28/2021 08:09:23 - INFO - codeparrot_training - Step 40826: {'lr': 4.0671405889382865e-05, 'samples': 20903424, 'steps': 40826, 'batch_loss/train': 0.5916246909182519} +12/28/2021 08:09:33 - INFO - codeparrot_training - Step 40827: {'lr': 4.0662788257690516e-05, 'samples': 20903936, 'steps': 40827, 'batch_loss/train': 0.7087174896150827} +12/28/2021 08:09:44 - INFO - codeparrot_training - Step 40828: {'lr': 4.065417145824393e-05, 'samples': 20904448, 'steps': 40828, 'batch_loss/train': 0.658261013100855} +12/28/2021 08:09:58 - INFO - codeparrot_training - Step 40829: {'lr': 4.06455554910774e-05, 'samples': 20904960, 'steps': 40829, 'batch_loss/train': 0.6932759964838624} +12/28/2021 08:10:09 - INFO - codeparrot_training - Step 40830: {'lr': 4.0636940356225005e-05, 'samples': 20905472, 'steps': 40830, 'batch_loss/train': 0.7265910124406219} +12/28/2021 08:10:19 - INFO - codeparrot_training - Step 40831: {'lr': 4.062832605372102e-05, 'samples': 20905984, 'steps': 40831, 'batch_loss/train': 0.6284644283587113} +12/28/2021 08:10:32 - INFO - codeparrot_training - Step 40832: {'lr': 4.061971258359989e-05, 'samples': 20906496, 'steps': 40832, 'batch_loss/train': 0.8130172928795218} +12/28/2021 08:10:42 - INFO - codeparrot_training - Step 40833: {'lr': 4.061109994589568e-05, 'samples': 20907008, 'steps': 40833, 'batch_loss/train': 0.9580638236948289} +12/28/2021 08:10:53 - INFO - codeparrot_training - Step 40834: {'lr': 4.0602488140642655e-05, 'samples': 20907520, 'steps': 40834, 'batch_loss/train': 0.7190508742351085} +12/28/2021 08:11:03 - INFO - codeparrot_training - Step 40835: {'lr': 4.059387716787505e-05, 'samples': 20908032, 'steps': 40835, 'batch_loss/train': 0.6590245272964239} +12/28/2021 08:11:16 - INFO - codeparrot_training - Step 40836: {'lr': 4.058526702762716e-05, 'samples': 20908544, 'steps': 40836, 'batch_loss/train': 0.7385020204819739} +12/28/2021 08:11:26 - INFO - codeparrot_training - Step 40837: {'lr': 4.0576657719933156e-05, 'samples': 20909056, 'steps': 40837, 'batch_loss/train': 0.6754777510650456} +12/28/2021 08:11:37 - INFO - codeparrot_training - Step 40838: {'lr': 4.056804924482729e-05, 'samples': 20909568, 'steps': 40838, 'batch_loss/train': 0.6502916989848018} +12/28/2021 08:11:51 - INFO - codeparrot_training - Step 40839: {'lr': 4.055944160234376e-05, 'samples': 20910080, 'steps': 40839, 'batch_loss/train': 0.6659611454233527} +12/28/2021 08:12:01 - INFO - codeparrot_training - Step 40840: {'lr': 4.055083479251684e-05, 'samples': 20910592, 'steps': 40840, 'batch_loss/train': 0.6685709408484399} +12/28/2021 08:12:12 - INFO - codeparrot_training - Step 40841: {'lr': 4.054222881538075e-05, 'samples': 20911104, 'steps': 40841, 'batch_loss/train': 0.6577684483199846} +12/28/2021 08:12:24 - INFO - codeparrot_training - Step 40842: {'lr': 4.053362367096958e-05, 'samples': 20911616, 'steps': 40842, 'batch_loss/train': 0.6542776407732163} +12/28/2021 08:12:35 - INFO - codeparrot_training - Step 40843: {'lr': 4.052501935931774e-05, 'samples': 20912128, 'steps': 40843, 'batch_loss/train': 0.6373980494390707} +12/28/2021 08:12:46 - INFO - codeparrot_training - Step 40844: {'lr': 4.0516415880459264e-05, 'samples': 20912640, 'steps': 40844, 'batch_loss/train': 0.7946234801784158} +12/28/2021 08:12:58 - INFO - codeparrot_training - Step 40845: {'lr': 4.050781323442834e-05, 'samples': 20913152, 'steps': 40845, 'batch_loss/train': 0.5628054953704122} +12/28/2021 08:13:08 - INFO - codeparrot_training - Step 40846: {'lr': 4.04992114212594e-05, 'samples': 20913664, 'steps': 40846, 'batch_loss/train': 0.692930793389678} +12/28/2021 08:13:19 - INFO - codeparrot_training - Step 40847: {'lr': 4.049061044098642e-05, 'samples': 20914176, 'steps': 40847, 'batch_loss/train': 0.6830710208741948} +12/28/2021 08:13:30 - INFO - codeparrot_training - Step 40848: {'lr': 4.048201029364365e-05, 'samples': 20914688, 'steps': 40848, 'batch_loss/train': 0.7237851349636912} +12/28/2021 08:13:42 - INFO - codeparrot_training - Step 40849: {'lr': 4.0473410979265326e-05, 'samples': 20915200, 'steps': 40849, 'batch_loss/train': 0.7243833700194955} +12/28/2021 08:13:52 - INFO - codeparrot_training - Step 40850: {'lr': 4.0464812497885586e-05, 'samples': 20915712, 'steps': 40850, 'batch_loss/train': 0.7025236571207643} +12/28/2021 08:14:03 - INFO - codeparrot_training - Step 40851: {'lr': 4.045621484953865e-05, 'samples': 20916224, 'steps': 40851, 'batch_loss/train': 0.6987596522085369} +12/28/2021 08:14:17 - INFO - codeparrot_training - Step 40852: {'lr': 4.044761803425873e-05, 'samples': 20916736, 'steps': 40852, 'batch_loss/train': 0.6870682072476484} +12/28/2021 08:14:28 - INFO - codeparrot_training - Step 40853: {'lr': 4.043902205207983e-05, 'samples': 20917248, 'steps': 40853, 'batch_loss/train': 0.7131095523945987} +12/28/2021 08:14:38 - INFO - codeparrot_training - Step 40854: {'lr': 4.0430426903036336e-05, 'samples': 20917760, 'steps': 40854, 'batch_loss/train': 0.6667875002603978} +12/28/2021 08:14:51 - INFO - codeparrot_training - Step 40855: {'lr': 4.0421832587162366e-05, 'samples': 20918272, 'steps': 40855, 'batch_loss/train': 0.5813282279996201} +12/28/2021 08:15:01 - INFO - codeparrot_training - Step 40856: {'lr': 4.0413239104491964e-05, 'samples': 20918784, 'steps': 40856, 'batch_loss/train': 0.7102663051337004} +12/28/2021 08:15:12 - INFO - codeparrot_training - Step 40857: {'lr': 4.040464645505943e-05, 'samples': 20919296, 'steps': 40857, 'batch_loss/train': 0.7749960874207318} +12/28/2021 08:15:26 - INFO - codeparrot_training - Step 40858: {'lr': 4.039605463889892e-05, 'samples': 20919808, 'steps': 40858, 'batch_loss/train': 0.7783101070672274} +12/28/2021 08:15:37 - INFO - codeparrot_training - Step 40859: {'lr': 4.038746365604454e-05, 'samples': 20920320, 'steps': 40859, 'batch_loss/train': 0.723256993573159} +12/28/2021 08:15:47 - INFO - codeparrot_training - Step 40860: {'lr': 4.037887350653041e-05, 'samples': 20920832, 'steps': 40860, 'batch_loss/train': 0.6927997210877948} +12/28/2021 08:15:58 - INFO - codeparrot_training - Step 40861: {'lr': 4.0370284190390765e-05, 'samples': 20921344, 'steps': 40861, 'batch_loss/train': 0.6586523680016398} +12/28/2021 08:16:10 - INFO - codeparrot_training - Step 40862: {'lr': 4.036169570765971e-05, 'samples': 20921856, 'steps': 40862, 'batch_loss/train': 0.7635441245511174} +12/28/2021 08:16:21 - INFO - codeparrot_training - Step 40863: {'lr': 4.035310805837139e-05, 'samples': 20922368, 'steps': 40863, 'batch_loss/train': 0.784608312882483} +12/28/2021 08:16:31 - INFO - codeparrot_training - Step 40864: {'lr': 4.0344521242559937e-05, 'samples': 20922880, 'steps': 40864, 'batch_loss/train': 0.5385210756212473} +12/28/2021 08:16:43 - INFO - codeparrot_training - Step 40865: {'lr': 4.033593526025952e-05, 'samples': 20923392, 'steps': 40865, 'batch_loss/train': 0.6137958148028702} +12/28/2021 08:16:54 - INFO - codeparrot_training - Step 40866: {'lr': 4.0327350111504264e-05, 'samples': 20923904, 'steps': 40866, 'batch_loss/train': 0.7334831384941936} +12/28/2021 08:17:05 - INFO - codeparrot_training - Step 40867: {'lr': 4.031876579632837e-05, 'samples': 20924416, 'steps': 40867, 'batch_loss/train': 0.6553557189181447} +12/28/2021 08:17:19 - INFO - codeparrot_training - Step 40868: {'lr': 4.031018231476574e-05, 'samples': 20924928, 'steps': 40868, 'batch_loss/train': 0.7519469824619591} +12/28/2021 08:17:29 - INFO - codeparrot_training - Step 40869: {'lr': 4.030159966685079e-05, 'samples': 20925440, 'steps': 40869, 'batch_loss/train': 0.4944920669659041} +12/28/2021 08:17:40 - INFO - codeparrot_training - Step 40870: {'lr': 4.029301785261744e-05, 'samples': 20925952, 'steps': 40870, 'batch_loss/train': 0.7247950863093138} +12/28/2021 08:17:52 - INFO - codeparrot_training - Step 40871: {'lr': 4.028443687209979e-05, 'samples': 20926464, 'steps': 40871, 'batch_loss/train': 0.5891144923516549} +12/28/2021 08:18:03 - INFO - codeparrot_training - Step 40872: {'lr': 4.027585672533215e-05, 'samples': 20926976, 'steps': 40872, 'batch_loss/train': 0.6969266110099852} +12/28/2021 08:18:14 - INFO - codeparrot_training - Step 40873: {'lr': 4.026727741234848e-05, 'samples': 20927488, 'steps': 40873, 'batch_loss/train': 0.6443629893474281} +12/28/2021 08:18:24 - INFO - codeparrot_training - Step 40874: {'lr': 4.02586989331829e-05, 'samples': 20928000, 'steps': 40874, 'batch_loss/train': 0.7954091932624578} +12/28/2021 08:18:37 - INFO - codeparrot_training - Step 40875: {'lr': 4.025012128786956e-05, 'samples': 20928512, 'steps': 40875, 'batch_loss/train': 0.5030603063642047} +12/28/2021 08:18:47 - INFO - codeparrot_training - Step 40876: {'lr': 4.024154447644251e-05, 'samples': 20929024, 'steps': 40876, 'batch_loss/train': 0.7290251604281366} +12/28/2021 08:18:58 - INFO - codeparrot_training - Step 40877: {'lr': 4.02329684989359e-05, 'samples': 20929536, 'steps': 40877, 'batch_loss/train': 0.5803073047718499} +12/28/2021 08:19:10 - INFO - codeparrot_training - Step 40878: {'lr': 4.022439335538386e-05, 'samples': 20930048, 'steps': 40878, 'batch_loss/train': 0.6194904213771224} +12/28/2021 08:19:21 - INFO - codeparrot_training - Step 40879: {'lr': 4.021581904582031e-05, 'samples': 20930560, 'steps': 40879, 'batch_loss/train': 0.8420431513804942} +12/28/2021 08:19:31 - INFO - codeparrot_training - Step 40880: {'lr': 4.0207245570279494e-05, 'samples': 20931072, 'steps': 40880, 'batch_loss/train': 0.6297401059418917} +12/28/2021 08:19:42 - INFO - codeparrot_training - Step 40881: {'lr': 4.0198672928795535e-05, 'samples': 20931584, 'steps': 40881, 'batch_loss/train': 0.49351426685461774} +12/28/2021 08:19:56 - INFO - codeparrot_training - Step 40882: {'lr': 4.0190101121402315e-05, 'samples': 20932096, 'steps': 40882, 'batch_loss/train': 0.6560376089764759} +12/28/2021 08:20:07 - INFO - codeparrot_training - Step 40883: {'lr': 4.0181530148134086e-05, 'samples': 20932608, 'steps': 40883, 'batch_loss/train': 0.45016351505182683} +12/28/2021 08:20:17 - INFO - codeparrot_training - Step 40884: {'lr': 4.01729600090249e-05, 'samples': 20933120, 'steps': 40884, 'batch_loss/train': 0.6178651647642255} +12/28/2021 08:20:29 - INFO - codeparrot_training - Step 40885: {'lr': 4.0164390704108765e-05, 'samples': 20933632, 'steps': 40885, 'batch_loss/train': 0.6337673990055919} +12/28/2021 08:20:40 - INFO - codeparrot_training - Step 40886: {'lr': 4.0155822233419735e-05, 'samples': 20934144, 'steps': 40886, 'batch_loss/train': 0.6947794547304511} +12/28/2021 08:20:51 - INFO - codeparrot_training - Step 40887: {'lr': 4.014725459699201e-05, 'samples': 20934656, 'steps': 40887, 'batch_loss/train': 0.6439341919030994} +12/28/2021 08:21:05 - INFO - codeparrot_training - Step 40888: {'lr': 4.013868779485952e-05, 'samples': 20935168, 'steps': 40888, 'batch_loss/train': 0.5420798073173501} +12/28/2021 08:21:15 - INFO - codeparrot_training - Step 40889: {'lr': 4.013012182705636e-05, 'samples': 20935680, 'steps': 40889, 'batch_loss/train': 0.6226628129370511} +12/28/2021 08:21:26 - INFO - codeparrot_training - Step 40890: {'lr': 4.012155669361661e-05, 'samples': 20936192, 'steps': 40890, 'batch_loss/train': 0.5665991939604282} +12/28/2021 08:21:37 - INFO - codeparrot_training - Step 40891: {'lr': 4.011299239457428e-05, 'samples': 20936704, 'steps': 40891, 'batch_loss/train': 0.9215042535215616} +12/28/2021 08:21:49 - INFO - codeparrot_training - Step 40892: {'lr': 4.010442892996347e-05, 'samples': 20937216, 'steps': 40892, 'batch_loss/train': 0.7530100792646408} +12/28/2021 08:21:59 - INFO - codeparrot_training - Step 40893: {'lr': 4.009586629981818e-05, 'samples': 20937728, 'steps': 40893, 'batch_loss/train': 0.6466807861579582} +12/28/2021 08:22:10 - INFO - codeparrot_training - Step 40894: {'lr': 4.0087304504172444e-05, 'samples': 20938240, 'steps': 40894, 'batch_loss/train': 0.40634606551611796} +12/28/2021 08:22:22 - INFO - codeparrot_training - Step 40895: {'lr': 4.007874354306043e-05, 'samples': 20938752, 'steps': 40895, 'batch_loss/train': 0.736417468637228} +12/28/2021 08:22:33 - INFO - codeparrot_training - Step 40896: {'lr': 4.007018341651597e-05, 'samples': 20939264, 'steps': 40896, 'batch_loss/train': 0.6071084657451138} +12/28/2021 08:22:44 - INFO - codeparrot_training - Step 40897: {'lr': 4.006162412457318e-05, 'samples': 20939776, 'steps': 40897, 'batch_loss/train': 0.6828027958981693} +12/28/2021 08:22:57 - INFO - codeparrot_training - Step 40898: {'lr': 4.0053065667266185e-05, 'samples': 20940288, 'steps': 40898, 'batch_loss/train': 0.6857202150858939} +12/28/2021 08:23:08 - INFO - codeparrot_training - Step 40899: {'lr': 4.0044508044628866e-05, 'samples': 20940800, 'steps': 40899, 'batch_loss/train': 0.6799521250650287} +12/28/2021 08:23:19 - INFO - codeparrot_training - Step 40900: {'lr': 4.003595125669532e-05, 'samples': 20941312, 'steps': 40900, 'batch_loss/train': 0.6959968125447631} +12/28/2021 08:23:31 - INFO - codeparrot_training - Step 40901: {'lr': 4.002739530349955e-05, 'samples': 20941824, 'steps': 40901, 'batch_loss/train': 0.6625967902364209} +12/28/2021 08:23:42 - INFO - codeparrot_training - Step 40902: {'lr': 4.0018840185075594e-05, 'samples': 20942336, 'steps': 40902, 'batch_loss/train': 0.7493959255516529} +12/28/2021 08:23:52 - INFO - codeparrot_training - Step 40903: {'lr': 4.0010285901457414e-05, 'samples': 20942848, 'steps': 40903, 'batch_loss/train': 0.7326390558155254} +12/28/2021 08:24:03 - INFO - codeparrot_training - Step 40904: {'lr': 4.000173245267913e-05, 'samples': 20943360, 'steps': 40904, 'batch_loss/train': 0.6672530574724078} +12/28/2021 08:24:17 - INFO - codeparrot_training - Step 40905: {'lr': 3.9993179838774516e-05, 'samples': 20943872, 'steps': 40905, 'batch_loss/train': 0.6121920440928079} +12/28/2021 08:24:27 - INFO - codeparrot_training - Step 40906: {'lr': 3.998462805977782e-05, 'samples': 20944384, 'steps': 40906, 'batch_loss/train': 0.6867705835029483} +12/28/2021 08:24:38 - INFO - codeparrot_training - Step 40907: {'lr': 3.9976077115722985e-05, 'samples': 20944896, 'steps': 40907, 'batch_loss/train': 0.6684385766275227} +12/28/2021 08:24:50 - INFO - codeparrot_training - Step 40908: {'lr': 3.996752700664383e-05, 'samples': 20945408, 'steps': 40908, 'batch_loss/train': 0.676907014567405} +12/28/2021 08:25:01 - INFO - codeparrot_training - Step 40909: {'lr': 3.995897773257456e-05, 'samples': 20945920, 'steps': 40909, 'batch_loss/train': 0.7138728415593505} +12/28/2021 08:25:11 - INFO - codeparrot_training - Step 40910: {'lr': 3.995042929354917e-05, 'samples': 20946432, 'steps': 40910, 'batch_loss/train': 0.6427814780618064} +12/28/2021 08:25:23 - INFO - codeparrot_training - Step 40911: {'lr': 3.994188168960147e-05, 'samples': 20946944, 'steps': 40911, 'batch_loss/train': 0.7020695749670267} +12/28/2021 08:25:34 - INFO - codeparrot_training - Step 40912: {'lr': 3.993333492076548e-05, 'samples': 20947456, 'steps': 40912, 'batch_loss/train': 0.6114307290990837} +12/28/2021 08:25:45 - INFO - codeparrot_training - Step 40913: {'lr': 3.992478898707535e-05, 'samples': 20947968, 'steps': 40913, 'batch_loss/train': 0.8100155834108591} +12/28/2021 08:25:57 - INFO - codeparrot_training - Step 40914: {'lr': 3.991624388856488e-05, 'samples': 20948480, 'steps': 40914, 'batch_loss/train': 0.7353471578098834} +12/28/2021 08:26:07 - INFO - codeparrot_training - Step 40915: {'lr': 3.990769962526811e-05, 'samples': 20948992, 'steps': 40915, 'batch_loss/train': 0.6167314944323152} +12/28/2021 08:26:18 - INFO - codeparrot_training - Step 40916: {'lr': 3.989915619721898e-05, 'samples': 20949504, 'steps': 40916, 'batch_loss/train': 0.5814788838033564} +12/28/2021 08:26:29 - INFO - codeparrot_training - Step 40917: {'lr': 3.989061360445148e-05, 'samples': 20950016, 'steps': 40917, 'batch_loss/train': 0.8362226318567991} +12/28/2021 08:26:41 - INFO - codeparrot_training - Step 40918: {'lr': 3.988207184699957e-05, 'samples': 20950528, 'steps': 40918, 'batch_loss/train': 0.6033623195253313} +12/28/2021 08:26:51 - INFO - codeparrot_training - Step 40919: {'lr': 3.987353092489721e-05, 'samples': 20951040, 'steps': 40919, 'batch_loss/train': 0.763713008724153} +12/28/2021 08:27:02 - INFO - codeparrot_training - Step 40920: {'lr': 3.986499083817832e-05, 'samples': 20951552, 'steps': 40920, 'batch_loss/train': 0.7149619038682431} +12/28/2021 08:27:16 - INFO - codeparrot_training - Step 40921: {'lr': 3.985645158687692e-05, 'samples': 20952064, 'steps': 40921, 'batch_loss/train': 0.746123637072742} +12/28/2021 08:27:27 - INFO - codeparrot_training - Step 40922: {'lr': 3.9847913171026954e-05, 'samples': 20952576, 'steps': 40922, 'batch_loss/train': 0.7337620388716459} +12/28/2021 08:27:37 - INFO - codeparrot_training - Step 40923: {'lr': 3.983937559066225e-05, 'samples': 20953088, 'steps': 40923, 'batch_loss/train': 0.5832779845513869} +12/28/2021 08:27:49 - INFO - codeparrot_training - Step 40924: {'lr': 3.983083884581692e-05, 'samples': 20953600, 'steps': 40924, 'batch_loss/train': 0.6045855877455324} +12/28/2021 08:28:00 - INFO - codeparrot_training - Step 40925: {'lr': 3.982230293652478e-05, 'samples': 20954112, 'steps': 40925, 'batch_loss/train': 0.7231644135899842} +12/28/2021 08:28:10 - INFO - codeparrot_training - Step 40926: {'lr': 3.981376786281979e-05, 'samples': 20954624, 'steps': 40926, 'batch_loss/train': 0.6827997509390116} +12/28/2021 08:28:21 - INFO - codeparrot_training - Step 40927: {'lr': 3.98052336247359e-05, 'samples': 20955136, 'steps': 40927, 'batch_loss/train': 0.6375197973102331} +12/28/2021 08:28:35 - INFO - codeparrot_training - Step 40928: {'lr': 3.9796700222307054e-05, 'samples': 20955648, 'steps': 40928, 'batch_loss/train': 0.6622905712283682} +12/28/2021 08:28:46 - INFO - codeparrot_training - Step 40929: {'lr': 3.9788167655567115e-05, 'samples': 20956160, 'steps': 40929, 'batch_loss/train': 0.7080025486648083} +12/28/2021 08:28:56 - INFO - codeparrot_training - Step 40930: {'lr': 3.9779635924550086e-05, 'samples': 20956672, 'steps': 40930, 'batch_loss/train': 0.6548257363028824} +12/28/2021 08:29:09 - INFO - codeparrot_training - Step 40931: {'lr': 3.977110502928982e-05, 'samples': 20957184, 'steps': 40931, 'batch_loss/train': 0.7895181756466627} +12/28/2021 08:29:19 - INFO - codeparrot_training - Step 40932: {'lr': 3.97625749698203e-05, 'samples': 20957696, 'steps': 40932, 'batch_loss/train': 0.7153754732571542} +12/28/2021 08:29:30 - INFO - codeparrot_training - Step 40933: {'lr': 3.9754045746175415e-05, 'samples': 20958208, 'steps': 40933, 'batch_loss/train': 0.6724345730617642} +12/28/2021 08:29:42 - INFO - codeparrot_training - Step 40934: {'lr': 3.9745517358388965e-05, 'samples': 20958720, 'steps': 40934, 'batch_loss/train': 0.6520634123007767} +12/28/2021 08:29:53 - INFO - codeparrot_training - Step 40935: {'lr': 3.973698980649501e-05, 'samples': 20959232, 'steps': 40935, 'batch_loss/train': 0.758246289100498} +12/28/2021 08:30:03 - INFO - codeparrot_training - Step 40936: {'lr': 3.972846309052744e-05, 'samples': 20959744, 'steps': 40936, 'batch_loss/train': 0.6070809378288686} +12/28/2021 08:30:14 - INFO - codeparrot_training - Step 40937: {'lr': 3.971993721052006e-05, 'samples': 20960256, 'steps': 40937, 'batch_loss/train': 0.7967420276254416} +12/28/2021 08:30:28 - INFO - codeparrot_training - Step 40938: {'lr': 3.971141216650673e-05, 'samples': 20960768, 'steps': 40938, 'batch_loss/train': 0.780502162873745} +12/28/2021 08:30:38 - INFO - codeparrot_training - Step 40939: {'lr': 3.970288795852156e-05, 'samples': 20961280, 'steps': 40939, 'batch_loss/train': 0.6946358415298164} +12/28/2021 08:30:49 - INFO - codeparrot_training - Step 40940: {'lr': 3.9694364586598226e-05, 'samples': 20961792, 'steps': 40940, 'batch_loss/train': 0.7588870311155915} +12/28/2021 08:31:01 - INFO - codeparrot_training - Step 40941: {'lr': 3.968584205077072e-05, 'samples': 20962304, 'steps': 40941, 'batch_loss/train': 0.731440001167357} +12/28/2021 08:31:12 - INFO - codeparrot_training - Step 40942: {'lr': 3.967732035107285e-05, 'samples': 20962816, 'steps': 40942, 'batch_loss/train': 0.7171827387064695} +12/28/2021 08:31:22 - INFO - codeparrot_training - Step 40943: {'lr': 3.966879948753857e-05, 'samples': 20963328, 'steps': 40943, 'batch_loss/train': 0.620220291428268} +12/28/2021 08:31:36 - INFO - codeparrot_training - Step 40944: {'lr': 3.9660279460201734e-05, 'samples': 20963840, 'steps': 40944, 'batch_loss/train': 0.6758493844245095} +12/28/2021 08:31:47 - INFO - codeparrot_training - Step 40945: {'lr': 3.96517602690962e-05, 'samples': 20964352, 'steps': 40945, 'batch_loss/train': 0.5959604777744971} +12/28/2021 08:31:57 - INFO - codeparrot_training - Step 40946: {'lr': 3.964324191425583e-05, 'samples': 20964864, 'steps': 40946, 'batch_loss/train': 0.7218564269132912} +12/28/2021 08:32:09 - INFO - codeparrot_training - Step 40947: {'lr': 3.96347243957145e-05, 'samples': 20965376, 'steps': 40947, 'batch_loss/train': 0.6980614820495248} +12/28/2021 08:32:20 - INFO - codeparrot_training - Step 40948: {'lr': 3.962620771350614e-05, 'samples': 20965888, 'steps': 40948, 'batch_loss/train': 0.715427081566304} +12/28/2021 08:32:31 - INFO - codeparrot_training - Step 40949: {'lr': 3.961769186766442e-05, 'samples': 20966400, 'steps': 40949, 'batch_loss/train': 0.7260456448420882} +12/28/2021 08:32:41 - INFO - codeparrot_training - Step 40950: {'lr': 3.960917685822346e-05, 'samples': 20966912, 'steps': 40950, 'batch_loss/train': 0.8835179635789245} +12/28/2021 08:32:54 - INFO - codeparrot_training - Step 40951: {'lr': 3.9600662685216896e-05, 'samples': 20967424, 'steps': 40951, 'batch_loss/train': 0.7027821369701996} +12/28/2021 08:33:04 - INFO - codeparrot_training - Step 40952: {'lr': 3.9592149348678664e-05, 'samples': 20967936, 'steps': 40952, 'batch_loss/train': 0.6872052401304245} +12/28/2021 08:33:15 - INFO - codeparrot_training - Step 40953: {'lr': 3.958363684864258e-05, 'samples': 20968448, 'steps': 40953, 'batch_loss/train': 0.6647005905397236} +12/28/2021 08:33:29 - INFO - codeparrot_training - Step 40954: {'lr': 3.957512518514253e-05, 'samples': 20968960, 'steps': 40954, 'batch_loss/train': 0.9686220148578286} +12/28/2021 08:33:39 - INFO - codeparrot_training - Step 40955: {'lr': 3.956661435821229e-05, 'samples': 20969472, 'steps': 40955, 'batch_loss/train': 0.6960217608720995} +12/28/2021 08:33:50 - INFO - codeparrot_training - Step 40956: {'lr': 3.955810436788576e-05, 'samples': 20969984, 'steps': 40956, 'batch_loss/train': 0.6703286992851645} +12/28/2021 08:34:02 - INFO - codeparrot_training - Step 40957: {'lr': 3.954959521419677e-05, 'samples': 20970496, 'steps': 40957, 'batch_loss/train': 0.7374784145504236} +12/28/2021 08:34:13 - INFO - codeparrot_training - Step 40958: {'lr': 3.954108689717908e-05, 'samples': 20971008, 'steps': 40958, 'batch_loss/train': 0.6826874143444002} +12/28/2021 08:34:24 - INFO - codeparrot_training - Step 40959: {'lr': 3.9532579416866666e-05, 'samples': 20971520, 'steps': 40959, 'batch_loss/train': 0.6913118017837405} +12/28/2021 08:34:36 - INFO - codeparrot_training - Step 40960: {'lr': 3.952407277329309e-05, 'samples': 20972032, 'steps': 40960, 'batch_loss/train': 0.7193396403454244} +12/28/2021 08:34:46 - INFO - codeparrot_training - Step 40961: {'lr': 3.9515566966492415e-05, 'samples': 20972544, 'steps': 40961, 'batch_loss/train': 0.6763024493120611} +12/28/2021 08:34:57 - INFO - codeparrot_training - Step 40962: {'lr': 3.950706199649842e-05, 'samples': 20973056, 'steps': 40962, 'batch_loss/train': 0.7865863875485957} +12/28/2021 08:35:08 - INFO - codeparrot_training - Step 40963: {'lr': 3.94985578633448e-05, 'samples': 20973568, 'steps': 40963, 'batch_loss/train': 0.7268219129182398} +12/28/2021 08:35:20 - INFO - codeparrot_training - Step 40964: {'lr': 3.9490054567065394e-05, 'samples': 20974080, 'steps': 40964, 'batch_loss/train': 0.7221620129421353} +12/28/2021 08:35:31 - INFO - codeparrot_training - Step 40965: {'lr': 3.9481552107694134e-05, 'samples': 20974592, 'steps': 40965, 'batch_loss/train': 0.7883222829550505} +12/28/2021 08:35:41 - INFO - codeparrot_training - Step 40966: {'lr': 3.94730504852647e-05, 'samples': 20975104, 'steps': 40966, 'batch_loss/train': 0.7662522667087615} +12/28/2021 08:35:55 - INFO - codeparrot_training - Step 40967: {'lr': 3.9464549699810864e-05, 'samples': 20975616, 'steps': 40967, 'batch_loss/train': 0.7708728867582977} +12/28/2021 08:36:06 - INFO - codeparrot_training - Step 40968: {'lr': 3.9456049751366604e-05, 'samples': 20976128, 'steps': 40968, 'batch_loss/train': 0.6755052814260125} +12/28/2021 08:36:16 - INFO - codeparrot_training - Step 40969: {'lr': 3.9447550639965526e-05, 'samples': 20976640, 'steps': 40969, 'batch_loss/train': 0.713741097599268} +12/28/2021 08:36:28 - INFO - codeparrot_training - Step 40970: {'lr': 3.9439052365641465e-05, 'samples': 20977152, 'steps': 40970, 'batch_loss/train': 0.7319603650830686} +12/28/2021 08:36:39 - INFO - codeparrot_training - Step 40971: {'lr': 3.9430554928428255e-05, 'samples': 20977664, 'steps': 40971, 'batch_loss/train': 0.5827410406200215} +12/28/2021 08:36:50 - INFO - codeparrot_training - Step 40972: {'lr': 3.942205832835963e-05, 'samples': 20978176, 'steps': 40972, 'batch_loss/train': 0.7315609101206064} +12/28/2021 08:37:00 - INFO - codeparrot_training - Step 40973: {'lr': 3.9413562565469405e-05, 'samples': 20978688, 'steps': 40973, 'batch_loss/train': 0.6737823858857155} +12/28/2021 08:37:14 - INFO - codeparrot_training - Step 40974: {'lr': 3.9405067639791405e-05, 'samples': 20979200, 'steps': 40974, 'batch_loss/train': 0.8535825340077281} +12/28/2021 08:37:25 - INFO - codeparrot_training - Step 40975: {'lr': 3.93965735513592e-05, 'samples': 20979712, 'steps': 40975, 'batch_loss/train': 0.6766426134854555} +12/28/2021 08:37:36 - INFO - codeparrot_training - Step 40976: {'lr': 3.938808030020677e-05, 'samples': 20980224, 'steps': 40976, 'batch_loss/train': 0.672106456477195} +12/28/2021 08:37:48 - INFO - codeparrot_training - Step 40977: {'lr': 3.9379587886367866e-05, 'samples': 20980736, 'steps': 40977, 'batch_loss/train': 0.5782705805613659} +12/28/2021 08:37:58 - INFO - codeparrot_training - Step 40978: {'lr': 3.9371096309876117e-05, 'samples': 20981248, 'steps': 40978, 'batch_loss/train': 0.707532165106386} +12/28/2021 08:38:09 - INFO - codeparrot_training - Step 40979: {'lr': 3.9362605570765374e-05, 'samples': 20981760, 'steps': 40979, 'batch_loss/train': 0.5782677548704669} +12/28/2021 08:38:21 - INFO - codeparrot_training - Step 40980: {'lr': 3.9354115669069405e-05, 'samples': 20982272, 'steps': 40980, 'batch_loss/train': 0.6688160467892885} +12/28/2021 08:38:32 - INFO - codeparrot_training - Step 40981: {'lr': 3.9345626604821914e-05, 'samples': 20982784, 'steps': 40981, 'batch_loss/train': 0.6850603406783193} +12/28/2021 08:38:43 - INFO - codeparrot_training - Step 40982: {'lr': 3.9337138378056666e-05, 'samples': 20983296, 'steps': 40982, 'batch_loss/train': 0.623952922411263} +12/28/2021 08:38:57 - INFO - codeparrot_training - Step 40983: {'lr': 3.932865098880739e-05, 'samples': 20983808, 'steps': 40983, 'batch_loss/train': 0.7035118527710438} +12/28/2021 08:39:07 - INFO - codeparrot_training - Step 40984: {'lr': 3.9320164437107895e-05, 'samples': 20984320, 'steps': 40984, 'batch_loss/train': 0.6277270601131022} +12/28/2021 08:39:18 - INFO - codeparrot_training - Step 40985: {'lr': 3.931167872299191e-05, 'samples': 20984832, 'steps': 40985, 'batch_loss/train': 0.5782120067160577} +12/28/2021 08:39:29 - INFO - codeparrot_training - Step 40986: {'lr': 3.930319384649303e-05, 'samples': 20985344, 'steps': 40986, 'batch_loss/train': 0.6824689696077257} +12/28/2021 08:39:41 - INFO - codeparrot_training - Step 40987: {'lr': 3.929470980764513e-05, 'samples': 20985856, 'steps': 40987, 'batch_loss/train': 0.59990727272816} +12/28/2021 08:39:51 - INFO - codeparrot_training - Step 40988: {'lr': 3.928622660648198e-05, 'samples': 20986368, 'steps': 40988, 'batch_loss/train': 0.5975989430153277} +12/28/2021 08:40:02 - INFO - codeparrot_training - Step 40989: {'lr': 3.927774424303718e-05, 'samples': 20986880, 'steps': 40989, 'batch_loss/train': 0.670177798718214} +12/28/2021 08:40:16 - INFO - codeparrot_training - Step 40990: {'lr': 3.9269262717344434e-05, 'samples': 20987392, 'steps': 40990, 'batch_loss/train': 0.7229069033637643} +12/28/2021 08:40:27 - INFO - codeparrot_training - Step 40991: {'lr': 3.926078202943767e-05, 'samples': 20987904, 'steps': 40991, 'batch_loss/train': 0.6791283199563622} +12/28/2021 08:40:37 - INFO - codeparrot_training - Step 40992: {'lr': 3.9252302179350386e-05, 'samples': 20988416, 'steps': 40992, 'batch_loss/train': 0.6826344570145011} +12/28/2021 08:40:49 - INFO - codeparrot_training - Step 40993: {'lr': 3.92438231671163e-05, 'samples': 20988928, 'steps': 40993, 'batch_loss/train': 0.5391172296658624} +12/28/2021 08:41:00 - INFO - codeparrot_training - Step 40994: {'lr': 3.9235344992769344e-05, 'samples': 20989440, 'steps': 40994, 'batch_loss/train': 0.6604084530845284} +12/28/2021 08:41:11 - INFO - codeparrot_training - Step 40995: {'lr': 3.922686765634301e-05, 'samples': 20989952, 'steps': 40995, 'batch_loss/train': 0.6641875705681741} +12/28/2021 08:41:21 - INFO - codeparrot_training - Step 40996: {'lr': 3.9218391157871043e-05, 'samples': 20990464, 'steps': 40996, 'batch_loss/train': 0.8124537747353315} +12/28/2021 08:41:33 - INFO - codeparrot_training - Step 40997: {'lr': 3.920991549738717e-05, 'samples': 20990976, 'steps': 40997, 'batch_loss/train': 0.7049988496000879} +12/28/2021 08:41:44 - INFO - codeparrot_training - Step 40998: {'lr': 3.920144067492509e-05, 'samples': 20991488, 'steps': 40998, 'batch_loss/train': 0.47644823175505735} +12/28/2021 08:41:55 - INFO - codeparrot_training - Step 40999: {'lr': 3.91929666905185e-05, 'samples': 20992000, 'steps': 40999, 'batch_loss/train': 0.7375806807540357} +12/28/2021 08:42:07 - INFO - codeparrot_training - Step 41000: {'lr': 3.9184493544201116e-05, 'samples': 20992512, 'steps': 41000, 'batch_loss/train': 0.6156824808567762} +12/28/2021 08:42:17 - INFO - codeparrot_training - Step 41001: {'lr': 3.917602123600647e-05, 'samples': 20993024, 'steps': 41001, 'batch_loss/train': 0.740080120973289} +12/28/2021 08:42:28 - INFO - codeparrot_training - Step 41002: {'lr': 3.9167549765968426e-05, 'samples': 20993536, 'steps': 41002, 'batch_loss/train': 0.5476583614072297} +12/28/2021 08:42:42 - INFO - codeparrot_training - Step 41003: {'lr': 3.915907913412067e-05, 'samples': 20994048, 'steps': 41003, 'batch_loss/train': 0.7624564757570624} +12/28/2021 08:42:53 - INFO - codeparrot_training - Step 41004: {'lr': 3.915060934049666e-05, 'samples': 20994560, 'steps': 41004, 'batch_loss/train': 0.602488388074562} +12/28/2021 08:43:03 - INFO - codeparrot_training - Step 41005: {'lr': 3.914214038513034e-05, 'samples': 20995072, 'steps': 41005, 'batch_loss/train': 0.7623747661709785} +12/28/2021 08:43:15 - INFO - codeparrot_training - Step 41006: {'lr': 3.9133672268055195e-05, 'samples': 20995584, 'steps': 41006, 'batch_loss/train': 0.6086212736554444} +12/28/2021 08:43:26 - INFO - codeparrot_training - Step 41007: {'lr': 3.912520498930494e-05, 'samples': 20996096, 'steps': 41007, 'batch_loss/train': 0.6172106396406889} +12/28/2021 08:43:37 - INFO - codeparrot_training - Step 41008: {'lr': 3.911673854891326e-05, 'samples': 20996608, 'steps': 41008, 'batch_loss/train': 0.7264805305749178} +12/28/2021 08:43:47 - INFO - codeparrot_training - Step 41009: {'lr': 3.9108272946913795e-05, 'samples': 20997120, 'steps': 41009, 'batch_loss/train': 0.6538771805353463} +12/28/2021 08:44:00 - INFO - codeparrot_training - Step 41010: {'lr': 3.909980818334022e-05, 'samples': 20997632, 'steps': 41010, 'batch_loss/train': 0.6472063814289868} +12/28/2021 08:44:10 - INFO - codeparrot_training - Step 41011: {'lr': 3.909134425822616e-05, 'samples': 20998144, 'steps': 41011, 'batch_loss/train': 0.6916025201790035} +12/28/2021 08:44:21 - INFO - codeparrot_training - Step 41012: {'lr': 3.90828811716053e-05, 'samples': 20998656, 'steps': 41012, 'batch_loss/train': 0.5915128356427886} +12/28/2021 08:44:35 - INFO - codeparrot_training - Step 41013: {'lr': 3.907441892351124e-05, 'samples': 20999168, 'steps': 41013, 'batch_loss/train': 0.6229546125978231} +12/28/2021 08:44:45 - INFO - codeparrot_training - Step 41014: {'lr': 3.9065957513977706e-05, 'samples': 20999680, 'steps': 41014, 'batch_loss/train': 0.705613651778549} +12/28/2021 08:44:56 - INFO - codeparrot_training - Step 41015: {'lr': 3.905749694303823e-05, 'samples': 21000192, 'steps': 41015, 'batch_loss/train': 0.6722292292397469} +12/28/2021 08:45:08 - INFO - codeparrot_training - Step 41016: {'lr': 3.904903721072645e-05, 'samples': 21000704, 'steps': 41016, 'batch_loss/train': 0.7025203285738826} +12/28/2021 08:45:19 - INFO - codeparrot_training - Step 41017: {'lr': 3.904057831707614e-05, 'samples': 21001216, 'steps': 41017, 'batch_loss/train': 1.449188731610775} +12/28/2021 08:45:29 - INFO - codeparrot_training - Step 41018: {'lr': 3.90321202621208e-05, 'samples': 21001728, 'steps': 41018, 'batch_loss/train': 0.6801795838400722} +12/28/2021 08:45:40 - INFO - codeparrot_training - Step 41019: {'lr': 3.902366304589402e-05, 'samples': 21002240, 'steps': 41019, 'batch_loss/train': 0.6897533335722983} +12/28/2021 08:45:52 - INFO - codeparrot_training - Step 41020: {'lr': 3.9015206668429615e-05, 'samples': 21002752, 'steps': 41020, 'batch_loss/train': 0.6340732043026946} +12/28/2021 08:46:03 - INFO - codeparrot_training - Step 41021: {'lr': 3.9006751129761016e-05, 'samples': 21003264, 'steps': 41021, 'batch_loss/train': 0.7050162996165454} +12/28/2021 08:46:13 - INFO - codeparrot_training - Step 41022: {'lr': 3.899829642992192e-05, 'samples': 21003776, 'steps': 41022, 'batch_loss/train': 0.680200690869242} +12/28/2021 08:46:27 - INFO - codeparrot_training - Step 41023: {'lr': 3.898984256894592e-05, 'samples': 21004288, 'steps': 41023, 'batch_loss/train': 0.6289814094780013} +12/28/2021 08:46:38 - INFO - codeparrot_training - Step 41024: {'lr': 3.898138954686661e-05, 'samples': 21004800, 'steps': 41024, 'batch_loss/train': 0.853571762010688} +12/28/2021 08:46:49 - INFO - codeparrot_training - Step 41025: {'lr': 3.897293736371765e-05, 'samples': 21005312, 'steps': 41025, 'batch_loss/train': 0.6320315356715582} +12/28/2021 08:47:01 - INFO - codeparrot_training - Step 41026: {'lr': 3.896448601953267e-05, 'samples': 21005824, 'steps': 41026, 'batch_loss/train': 0.7679512137547135} +12/28/2021 08:47:12 - INFO - codeparrot_training - Step 41027: {'lr': 3.895603551434507e-05, 'samples': 21006336, 'steps': 41027, 'batch_loss/train': 0.4743196351919323} +12/28/2021 08:47:23 - INFO - codeparrot_training - Step 41028: {'lr': 3.8947585848188665e-05, 'samples': 21006848, 'steps': 41028, 'batch_loss/train': 0.7050107689574361} +12/28/2021 08:47:37 - INFO - codeparrot_training - Step 41029: {'lr': 3.8939137021097e-05, 'samples': 21007360, 'steps': 41029, 'batch_loss/train': 0.7028492148965597} +12/28/2021 08:47:47 - INFO - codeparrot_training - Step 41030: {'lr': 3.893068903310354e-05, 'samples': 21007872, 'steps': 41030, 'batch_loss/train': 0.7067852541804314} +12/28/2021 08:47:58 - INFO - codeparrot_training - Step 41031: {'lr': 3.8922241884242065e-05, 'samples': 21008384, 'steps': 41031, 'batch_loss/train': 0.6540909046307206} +12/28/2021 08:48:09 - INFO - codeparrot_training - Step 41032: {'lr': 3.8913795574546026e-05, 'samples': 21008896, 'steps': 41032, 'batch_loss/train': 0.7217647875659168} +12/28/2021 08:48:21 - INFO - codeparrot_training - Step 41033: {'lr': 3.8905350104049e-05, 'samples': 21009408, 'steps': 41033, 'batch_loss/train': 0.649707242147997} +12/28/2021 08:48:32 - INFO - codeparrot_training - Step 41034: {'lr': 3.8896905472784596e-05, 'samples': 21009920, 'steps': 41034, 'batch_loss/train': 0.678785455354955} +12/28/2021 08:48:42 - INFO - codeparrot_training - Step 41035: {'lr': 3.888846168078641e-05, 'samples': 21010432, 'steps': 41035, 'batch_loss/train': 1.0008740330813453} +12/28/2021 08:48:54 - INFO - codeparrot_training - Step 41036: {'lr': 3.888001872808797e-05, 'samples': 21010944, 'steps': 41036, 'batch_loss/train': 0.7714537847787142} +12/28/2021 08:49:05 - INFO - codeparrot_training - Step 41037: {'lr': 3.8871576614722876e-05, 'samples': 21011456, 'steps': 41037, 'batch_loss/train': 0.6367290369234979} +12/28/2021 08:49:16 - INFO - codeparrot_training - Step 41038: {'lr': 3.886313534072467e-05, 'samples': 21011968, 'steps': 41038, 'batch_loss/train': 0.5813463260419667} +12/28/2021 08:49:28 - INFO - codeparrot_training - Step 41039: {'lr': 3.8854694906126895e-05, 'samples': 21012480, 'steps': 41039, 'batch_loss/train': 0.7425906481221318} +12/28/2021 08:49:39 - INFO - codeparrot_training - Step 41040: {'lr': 3.8846255310963194e-05, 'samples': 21012992, 'steps': 41040, 'batch_loss/train': 0.6866338268155232} +12/28/2021 08:49:49 - INFO - codeparrot_training - Step 41041: {'lr': 3.8837816555266966e-05, 'samples': 21013504, 'steps': 41041, 'batch_loss/train': 0.680233810126083} +12/28/2021 08:50:00 - INFO - codeparrot_training - Step 41042: {'lr': 3.8829378639071876e-05, 'samples': 21014016, 'steps': 41042, 'batch_loss/train': 0.7226305669173598} +12/28/2021 08:50:14 - INFO - codeparrot_training - Step 41043: {'lr': 3.8820941562411505e-05, 'samples': 21014528, 'steps': 41043, 'batch_loss/train': 0.6473705652169883} +12/28/2021 08:50:25 - INFO - codeparrot_training - Step 41044: {'lr': 3.881250532531927e-05, 'samples': 21015040, 'steps': 41044, 'batch_loss/train': 0.7301937467418611} +12/28/2021 08:50:35 - INFO - codeparrot_training - Step 41045: {'lr': 3.880406992782873e-05, 'samples': 21015552, 'steps': 41045, 'batch_loss/train': 0.7040322795510292} +12/28/2021 08:50:47 - INFO - codeparrot_training - Step 41046: {'lr': 3.879563536997358e-05, 'samples': 21016064, 'steps': 41046, 'batch_loss/train': 0.5120556718320586} +12/28/2021 08:50:58 - INFO - codeparrot_training - Step 41047: {'lr': 3.8787201651787174e-05, 'samples': 21016576, 'steps': 41047, 'batch_loss/train': 0.6989949585986324} +12/28/2021 08:51:09 - INFO - codeparrot_training - Step 41048: {'lr': 3.877876877330308e-05, 'samples': 21017088, 'steps': 41048, 'batch_loss/train': 0.6161583552602679} +12/28/2021 08:51:21 - INFO - codeparrot_training - Step 41049: {'lr': 3.8770336734554875e-05, 'samples': 21017600, 'steps': 41049, 'batch_loss/train': 0.6277750316075981} +12/28/2021 08:51:31 - INFO - codeparrot_training - Step 41050: {'lr': 3.8761905535576045e-05, 'samples': 21018112, 'steps': 41050, 'batch_loss/train': 0.7274577813223004} +12/28/2021 08:51:42 - INFO - codeparrot_training - Step 41051: {'lr': 3.87534751764001e-05, 'samples': 21018624, 'steps': 41051, 'batch_loss/train': 0.8209987364243716} +12/28/2021 08:51:53 - INFO - codeparrot_training - Step 41052: {'lr': 3.874504565706066e-05, 'samples': 21019136, 'steps': 41052, 'batch_loss/train': 0.6843736106529832} +12/28/2021 08:52:06 - INFO - codeparrot_training - Step 41053: {'lr': 3.8736616977591036e-05, 'samples': 21019648, 'steps': 41053, 'batch_loss/train': 0.706131134647876} +12/28/2021 08:52:16 - INFO - codeparrot_training - Step 41054: {'lr': 3.8728189138024896e-05, 'samples': 21020160, 'steps': 41054, 'batch_loss/train': 0.7685583438724279} +12/28/2021 08:52:27 - INFO - codeparrot_training - Step 41055: {'lr': 3.871976213839576e-05, 'samples': 21020672, 'steps': 41055, 'batch_loss/train': 0.7273634066805243} +12/28/2021 08:52:39 - INFO - codeparrot_training - Step 41056: {'lr': 3.871133597873697e-05, 'samples': 21021184, 'steps': 41056, 'batch_loss/train': 0.703189882915467} +12/28/2021 08:52:50 - INFO - codeparrot_training - Step 41057: {'lr': 3.870291065908218e-05, 'samples': 21021696, 'steps': 41057, 'batch_loss/train': 0.7672564806416631} +12/28/2021 08:53:01 - INFO - codeparrot_training - Step 41058: {'lr': 3.869448617946489e-05, 'samples': 21022208, 'steps': 41058, 'batch_loss/train': 0.696374392719008} +12/28/2021 08:53:15 - INFO - codeparrot_training - Step 41059: {'lr': 3.8686062539918463e-05, 'samples': 21022720, 'steps': 41059, 'batch_loss/train': 0.4668176129925996} +12/28/2021 08:53:25 - INFO - codeparrot_training - Step 41060: {'lr': 3.86776397404765e-05, 'samples': 21023232, 'steps': 41060, 'batch_loss/train': 0.7293521272949874} +12/28/2021 08:53:36 - INFO - codeparrot_training - Step 41061: {'lr': 3.8669217781172414e-05, 'samples': 21023744, 'steps': 41061, 'batch_loss/train': 0.750057527795434} +12/28/2021 08:53:46 - INFO - codeparrot_training - Step 41062: {'lr': 3.8660796662039756e-05, 'samples': 21024256, 'steps': 41062, 'batch_loss/train': 0.6050268891267478} +12/28/2021 08:53:58 - INFO - codeparrot_training - Step 41063: {'lr': 3.865237638311195e-05, 'samples': 21024768, 'steps': 41063, 'batch_loss/train': 0.7253961493261158} +12/28/2021 08:54:09 - INFO - codeparrot_training - Step 41064: {'lr': 3.864395694442249e-05, 'samples': 21025280, 'steps': 41064, 'batch_loss/train': 0.7020030862186104} +12/28/2021 08:54:20 - INFO - codeparrot_training - Step 41065: {'lr': 3.863553834600486e-05, 'samples': 21025792, 'steps': 41065, 'batch_loss/train': 0.6573014911264181} +12/28/2021 08:54:32 - INFO - codeparrot_training - Step 41066: {'lr': 3.862712058789253e-05, 'samples': 21026304, 'steps': 41066, 'batch_loss/train': 0.744113015010953} +12/28/2021 08:54:42 - INFO - codeparrot_training - Step 41067: {'lr': 3.861870367011894e-05, 'samples': 21026816, 'steps': 41067, 'batch_loss/train': 0.7544735479168594} +12/28/2021 08:54:53 - INFO - codeparrot_training - Step 41068: {'lr': 3.861028759271759e-05, 'samples': 21027328, 'steps': 41068, 'batch_loss/train': 0.7412649355828762} +12/28/2021 08:55:07 - INFO - codeparrot_training - Step 41069: {'lr': 3.860187235572196e-05, 'samples': 21027840, 'steps': 41069, 'batch_loss/train': 0.6725435764528811} +12/28/2021 08:55:18 - INFO - codeparrot_training - Step 41070: {'lr': 3.859345795916541e-05, 'samples': 21028352, 'steps': 41070, 'batch_loss/train': 1.2438027255702764} +12/28/2021 08:55:28 - INFO - codeparrot_training - Step 41071: {'lr': 3.858504440308139e-05, 'samples': 21028864, 'steps': 41071, 'batch_loss/train': 0.6785328704863787} +12/28/2021 08:55:39 - INFO - codeparrot_training - Step 41072: {'lr': 3.8576631687503525e-05, 'samples': 21029376, 'steps': 41072, 'batch_loss/train': 0.6027086321264505} +12/28/2021 08:55:51 - INFO - codeparrot_training - Step 41073: {'lr': 3.8568219812465085e-05, 'samples': 21029888, 'steps': 41073, 'batch_loss/train': 0.7301613597082905} +12/28/2021 08:56:02 - INFO - codeparrot_training - Step 41074: {'lr': 3.855980877799956e-05, 'samples': 21030400, 'steps': 41074, 'batch_loss/train': 0.716975349932909} +12/28/2021 08:56:12 - INFO - codeparrot_training - Step 41075: {'lr': 3.8551398584140415e-05, 'samples': 21030912, 'steps': 41075, 'batch_loss/train': 0.5715714764955919} +12/28/2021 08:56:25 - INFO - codeparrot_training - Step 41076: {'lr': 3.854298923092106e-05, 'samples': 21031424, 'steps': 41076, 'batch_loss/train': 0.8706163046881557} +12/28/2021 08:56:35 - INFO - codeparrot_training - Step 41077: {'lr': 3.853458071837493e-05, 'samples': 21031936, 'steps': 41077, 'batch_loss/train': 0.6095366193330847} +12/28/2021 08:56:46 - INFO - codeparrot_training - Step 41078: {'lr': 3.8526173046535465e-05, 'samples': 21032448, 'steps': 41078, 'batch_loss/train': 0.6574749066494405} +12/28/2021 08:56:58 - INFO - codeparrot_training - Step 41079: {'lr': 3.8517766215436094e-05, 'samples': 21032960, 'steps': 41079, 'batch_loss/train': 0.7476839395239949} +12/28/2021 08:57:09 - INFO - codeparrot_training - Step 41080: {'lr': 3.850936022511023e-05, 'samples': 21033472, 'steps': 41080, 'batch_loss/train': 0.7712726895697415} +12/28/2021 08:57:20 - INFO - codeparrot_training - Step 41081: {'lr': 3.850095507559134e-05, 'samples': 21033984, 'steps': 41081, 'batch_loss/train': 0.680847468553111} +12/28/2021 08:57:30 - INFO - codeparrot_training - Step 41082: {'lr': 3.8492550766912685e-05, 'samples': 21034496, 'steps': 41082, 'batch_loss/train': 0.674831434211228} +12/28/2021 08:57:44 - INFO - codeparrot_training - Step 41083: {'lr': 3.848414729910785e-05, 'samples': 21035008, 'steps': 41083, 'batch_loss/train': 0.5210017965873703} +12/28/2021 08:57:55 - INFO - codeparrot_training - Step 41084: {'lr': 3.847574467221024e-05, 'samples': 21035520, 'steps': 41084, 'batch_loss/train': 0.7455147528089583} +12/28/2021 08:58:05 - INFO - codeparrot_training - Step 41085: {'lr': 3.846734288625311e-05, 'samples': 21036032, 'steps': 41085, 'batch_loss/train': 0.720810635946691} +12/28/2021 08:58:18 - INFO - codeparrot_training - Step 41086: {'lr': 3.845894194127e-05, 'samples': 21036544, 'steps': 41086, 'batch_loss/train': 0.7308098529465497} +12/28/2021 08:58:28 - INFO - codeparrot_training - Step 41087: {'lr': 3.8450541837294236e-05, 'samples': 21037056, 'steps': 41087, 'batch_loss/train': 0.7425839272909798} +12/28/2021 08:58:39 - INFO - codeparrot_training - Step 41088: {'lr': 3.844214257435924e-05, 'samples': 21037568, 'steps': 41088, 'batch_loss/train': 0.763797145918943} +12/28/2021 08:58:53 - INFO - codeparrot_training - Step 41089: {'lr': 3.843374415249842e-05, 'samples': 21038080, 'steps': 41089, 'batch_loss/train': 0.6687535420060158} +12/28/2021 08:59:03 - INFO - codeparrot_training - Step 41090: {'lr': 3.8425346571745124e-05, 'samples': 21038592, 'steps': 41090, 'batch_loss/train': 0.6982157869497314} +12/28/2021 08:59:14 - INFO - codeparrot_training - Step 41091: {'lr': 3.8416949832132786e-05, 'samples': 21039104, 'steps': 41091, 'batch_loss/train': 0.7261992925778031} +12/28/2021 08:59:26 - INFO - codeparrot_training - Step 41092: {'lr': 3.8408553933694746e-05, 'samples': 21039616, 'steps': 41092, 'batch_loss/train': 0.6239045895636082} +12/28/2021 08:59:37 - INFO - codeparrot_training - Step 41093: {'lr': 3.840015887646442e-05, 'samples': 21040128, 'steps': 41093, 'batch_loss/train': 0.5993877062574029} +12/28/2021 08:59:47 - INFO - codeparrot_training - Step 41094: {'lr': 3.839176466047514e-05, 'samples': 21040640, 'steps': 41094, 'batch_loss/train': 0.7213416248559952} +12/28/2021 08:59:58 - INFO - codeparrot_training - Step 41095: {'lr': 3.8383371285760385e-05, 'samples': 21041152, 'steps': 41095, 'batch_loss/train': 0.7049128506332636} +12/28/2021 09:00:10 - INFO - codeparrot_training - Step 41096: {'lr': 3.837497875235338e-05, 'samples': 21041664, 'steps': 41096, 'batch_loss/train': 0.6818074071779847} +12/28/2021 09:00:21 - INFO - codeparrot_training - Step 41097: {'lr': 3.8366587060287506e-05, 'samples': 21042176, 'steps': 41097, 'batch_loss/train': 0.6981854625046253} +12/28/2021 09:00:31 - INFO - codeparrot_training - Step 41098: {'lr': 3.835819620959627e-05, 'samples': 21042688, 'steps': 41098, 'batch_loss/train': 0.7090179969673045} +12/28/2021 09:00:45 - INFO - codeparrot_training - Step 41099: {'lr': 3.834980620031286e-05, 'samples': 21043200, 'steps': 41099, 'batch_loss/train': 0.6884708777070045} +12/28/2021 09:00:56 - INFO - codeparrot_training - Step 41100: {'lr': 3.8341417032470735e-05, 'samples': 21043712, 'steps': 41100, 'batch_loss/train': 0.658650394063443} +12/28/2021 09:01:06 - INFO - codeparrot_training - Step 41101: {'lr': 3.8333028706103196e-05, 'samples': 21044224, 'steps': 41101, 'batch_loss/train': 0.7298250913154334} +12/28/2021 09:01:19 - INFO - codeparrot_training - Step 41102: {'lr': 3.8324641221243625e-05, 'samples': 21044736, 'steps': 41102, 'batch_loss/train': 0.6017200942442287} +12/28/2021 09:01:29 - INFO - codeparrot_training - Step 41103: {'lr': 3.831625457792537e-05, 'samples': 21045248, 'steps': 41103, 'batch_loss/train': 0.6957866689190269} +12/28/2021 09:01:40 - INFO - codeparrot_training - Step 41104: {'lr': 3.830786877618172e-05, 'samples': 21045760, 'steps': 41104, 'batch_loss/train': 0.6836799941956997} +12/28/2021 09:01:51 - INFO - codeparrot_training - Step 41105: {'lr': 3.8299483816046085e-05, 'samples': 21046272, 'steps': 41105, 'batch_loss/train': 0.8622136292979121} +12/28/2021 09:02:05 - INFO - codeparrot_training - Step 41106: {'lr': 3.8291099697551765e-05, 'samples': 21046784, 'steps': 41106, 'batch_loss/train': 0.6192258559167385} +12/28/2021 09:02:16 - INFO - codeparrot_training - Step 41107: {'lr': 3.8282716420732136e-05, 'samples': 21047296, 'steps': 41107, 'batch_loss/train': 0.7011196021921933} +12/28/2021 09:02:26 - INFO - codeparrot_training - Step 41108: {'lr': 3.8274333985620355e-05, 'samples': 21047808, 'steps': 41108, 'batch_loss/train': 0.7598989857360721} +12/28/2021 09:02:38 - INFO - codeparrot_training - Step 41109: {'lr': 3.826595239224998e-05, 'samples': 21048320, 'steps': 41109, 'batch_loss/train': 0.7253643311560154} +12/28/2021 09:02:49 - INFO - codeparrot_training - Step 41110: {'lr': 3.825757164065424e-05, 'samples': 21048832, 'steps': 41110, 'batch_loss/train': 0.691170132078696} +12/28/2021 09:02:59 - INFO - codeparrot_training - Step 41111: {'lr': 3.824919173086644e-05, 'samples': 21049344, 'steps': 41111, 'batch_loss/train': 0.7418858013115823} +12/28/2021 09:03:11 - INFO - codeparrot_training - Step 41112: {'lr': 3.8240812662919787e-05, 'samples': 21049856, 'steps': 41112, 'batch_loss/train': 0.7987308786250651} +12/28/2021 09:03:22 - INFO - codeparrot_training - Step 41113: {'lr': 3.823243443684785e-05, 'samples': 21050368, 'steps': 41113, 'batch_loss/train': 0.6861478579230607} +12/28/2021 09:03:33 - INFO - codeparrot_training - Step 41114: {'lr': 3.822405705268375e-05, 'samples': 21050880, 'steps': 41114, 'batch_loss/train': 0.6589106584433466} +12/28/2021 09:03:43 - INFO - codeparrot_training - Step 41115: {'lr': 3.8215680510460814e-05, 'samples': 21051392, 'steps': 41115, 'batch_loss/train': 0.7665241397917271} +12/28/2021 09:03:56 - INFO - codeparrot_training - Step 41116: {'lr': 3.820730481021237e-05, 'samples': 21051904, 'steps': 41116, 'batch_loss/train': 0.5355068790086079} +12/28/2021 09:04:06 - INFO - codeparrot_training - Step 41117: {'lr': 3.8198929951971714e-05, 'samples': 21052416, 'steps': 41117, 'batch_loss/train': 0.6686906209215522} +12/28/2021 09:04:17 - INFO - codeparrot_training - Step 41118: {'lr': 3.819055593577214e-05, 'samples': 21052928, 'steps': 41118, 'batch_loss/train': 0.8029351746663451} +12/28/2021 09:04:32 - INFO - codeparrot_training - Step 41119: {'lr': 3.818218276164692e-05, 'samples': 21053440, 'steps': 41119, 'batch_loss/train': 0.7177003078977577} +12/28/2021 09:04:43 - INFO - codeparrot_training - Step 41120: {'lr': 3.817381042962939e-05, 'samples': 21053952, 'steps': 41120, 'batch_loss/train': 0.45153408218175173} +12/28/2021 09:04:53 - INFO - codeparrot_training - Step 41121: {'lr': 3.8165438939752856e-05, 'samples': 21054464, 'steps': 41121, 'batch_loss/train': 0.6522258128970861} +12/28/2021 09:05:05 - INFO - codeparrot_training - Step 41122: {'lr': 3.815706829205048e-05, 'samples': 21054976, 'steps': 41122, 'batch_loss/train': 0.7492904337123036} +12/28/2021 09:05:16 - INFO - codeparrot_training - Step 41123: {'lr': 3.8148698486555556e-05, 'samples': 21055488, 'steps': 41123, 'batch_loss/train': 0.681745583191514} +12/28/2021 09:05:27 - INFO - codeparrot_training - Step 41124: {'lr': 3.814032952330154e-05, 'samples': 21056000, 'steps': 41124, 'batch_loss/train': 0.7450733166188002} +12/28/2021 09:05:39 - INFO - codeparrot_training - Step 41125: {'lr': 3.813196140232153e-05, 'samples': 21056512, 'steps': 41125, 'batch_loss/train': 0.6380019226344302} +12/28/2021 09:05:50 - INFO - codeparrot_training - Step 41126: {'lr': 3.8123594123648825e-05, 'samples': 21057024, 'steps': 41126, 'batch_loss/train': 0.6843234356492758} +12/28/2021 09:06:00 - INFO - codeparrot_training - Step 41127: {'lr': 3.811522768731673e-05, 'samples': 21057536, 'steps': 41127, 'batch_loss/train': 0.6556411189958453} +12/28/2021 09:06:11 - INFO - codeparrot_training - Step 41128: {'lr': 3.810686209335848e-05, 'samples': 21058048, 'steps': 41128, 'batch_loss/train': 0.6869215667247772} +12/28/2021 09:06:25 - INFO - codeparrot_training - Step 41129: {'lr': 3.809849734180731e-05, 'samples': 21058560, 'steps': 41129, 'batch_loss/train': 0.6826174389570951} +12/28/2021 09:06:35 - INFO - codeparrot_training - Step 41130: {'lr': 3.809013343269654e-05, 'samples': 21059072, 'steps': 41130, 'batch_loss/train': 0.6508313848462421} +12/28/2021 09:06:46 - INFO - codeparrot_training - Step 41131: {'lr': 3.8081770366059366e-05, 'samples': 21059584, 'steps': 41131, 'batch_loss/train': 0.7441626903600991} +12/28/2021 09:06:58 - INFO - codeparrot_training - Step 41132: {'lr': 3.807340814192905e-05, 'samples': 21060096, 'steps': 41132, 'batch_loss/train': 0.6546672964468598} +12/28/2021 09:07:09 - INFO - codeparrot_training - Step 41133: {'lr': 3.806504676033892e-05, 'samples': 21060608, 'steps': 41133, 'batch_loss/train': 0.5862844835792203} +12/28/2021 09:07:19 - INFO - codeparrot_training - Step 41134: {'lr': 3.8056686221322024e-05, 'samples': 21061120, 'steps': 41134, 'batch_loss/train': 0.7195357494056225} +12/28/2021 09:07:31 - INFO - codeparrot_training - Step 41135: {'lr': 3.804832652491175e-05, 'samples': 21061632, 'steps': 41135, 'batch_loss/train': 0.7045822162181139} +12/28/2021 09:07:42 - INFO - codeparrot_training - Step 41136: {'lr': 3.803996767114138e-05, 'samples': 21062144, 'steps': 41136, 'batch_loss/train': 0.7940586563199759} +12/28/2021 09:07:53 - INFO - codeparrot_training - Step 41137: {'lr': 3.8031609660044015e-05, 'samples': 21062656, 'steps': 41137, 'batch_loss/train': 0.6994975302368402} +12/28/2021 09:08:06 - INFO - codeparrot_training - Step 41138: {'lr': 3.8023252491652835e-05, 'samples': 21063168, 'steps': 41138, 'batch_loss/train': 0.621351567999227} +12/28/2021 09:08:17 - INFO - codeparrot_training - Step 41139: {'lr': 3.801489616600132e-05, 'samples': 21063680, 'steps': 41139, 'batch_loss/train': 0.6439565353794023} +12/28/2021 09:08:28 - INFO - codeparrot_training - Step 41140: {'lr': 3.800654068312248e-05, 'samples': 21064192, 'steps': 41140, 'batch_loss/train': 0.6247211089357734} +12/28/2021 09:08:38 - INFO - codeparrot_training - Step 41141: {'lr': 3.7998186043049555e-05, 'samples': 21064704, 'steps': 41141, 'batch_loss/train': 0.8009377708658576} +12/28/2021 09:08:50 - INFO - codeparrot_training - Step 41142: {'lr': 3.798983224581581e-05, 'samples': 21065216, 'steps': 41142, 'batch_loss/train': 0.7523943439591676} +12/28/2021 09:09:01 - INFO - codeparrot_training - Step 41143: {'lr': 3.798147929145446e-05, 'samples': 21065728, 'steps': 41143, 'batch_loss/train': 0.679139286512509} +12/28/2021 09:09:12 - INFO - codeparrot_training - Step 41144: {'lr': 3.797312717999865e-05, 'samples': 21066240, 'steps': 41144, 'batch_loss/train': 0.7310160775668919} +12/28/2021 09:09:26 - INFO - codeparrot_training - Step 41145: {'lr': 3.7964775911481676e-05, 'samples': 21066752, 'steps': 41145, 'batch_loss/train': 0.703602428548038} +12/28/2021 09:09:36 - INFO - codeparrot_training - Step 41146: {'lr': 3.7956425485936666e-05, 'samples': 21067264, 'steps': 41146, 'batch_loss/train': 0.6927729360759258} +12/28/2021 09:09:47 - INFO - codeparrot_training - Step 41147: {'lr': 3.7948075903396825e-05, 'samples': 21067776, 'steps': 41147, 'batch_loss/train': 0.6413777600391768} +12/28/2021 09:09:59 - INFO - codeparrot_training - Step 41148: {'lr': 3.793972716389546e-05, 'samples': 21068288, 'steps': 41148, 'batch_loss/train': 0.8138997852802277} +12/28/2021 09:10:10 - INFO - codeparrot_training - Step 41149: {'lr': 3.793137926746554e-05, 'samples': 21068800, 'steps': 41149, 'batch_loss/train': 0.7251161150634289} +12/28/2021 09:10:20 - INFO - codeparrot_training - Step 41150: {'lr': 3.792303221414051e-05, 'samples': 21069312, 'steps': 41150, 'batch_loss/train': 0.7367305760271847} +12/28/2021 09:10:32 - INFO - codeparrot_training - Step 41151: {'lr': 3.791468600395334e-05, 'samples': 21069824, 'steps': 41151, 'batch_loss/train': 0.8673788886517286} +12/28/2021 09:10:43 - INFO - codeparrot_training - Step 41152: {'lr': 3.790634063693726e-05, 'samples': 21070336, 'steps': 41152, 'batch_loss/train': 0.7560377530753613} +12/28/2021 09:10:53 - INFO - codeparrot_training - Step 41153: {'lr': 3.7897996113125585e-05, 'samples': 21070848, 'steps': 41153, 'batch_loss/train': 0.7278735740110278} +12/28/2021 09:11:07 - INFO - codeparrot_training - Step 41154: {'lr': 3.7889652432551353e-05, 'samples': 21071360, 'steps': 41154, 'batch_loss/train': 0.6035345494747162} +12/28/2021 09:11:18 - INFO - codeparrot_training - Step 41155: {'lr': 3.788130959524777e-05, 'samples': 21071872, 'steps': 41155, 'batch_loss/train': 0.7272494791541249} +12/28/2021 09:11:29 - INFO - codeparrot_training - Step 41156: {'lr': 3.7872967601248006e-05, 'samples': 21072384, 'steps': 41156, 'batch_loss/train': 0.7449235031381249} +12/28/2021 09:11:39 - INFO - codeparrot_training - Step 41157: {'lr': 3.786462645058522e-05, 'samples': 21072896, 'steps': 41157, 'batch_loss/train': 0.7142978506162763} +12/28/2021 09:11:51 - INFO - codeparrot_training - Step 41158: {'lr': 3.785628614329259e-05, 'samples': 21073408, 'steps': 41158, 'batch_loss/train': 0.6754511739127338} +12/28/2021 09:12:02 - INFO - codeparrot_training - Step 41159: {'lr': 3.7847946679403304e-05, 'samples': 21073920, 'steps': 41159, 'batch_loss/train': 0.630590573331574} +12/28/2021 09:12:13 - INFO - codeparrot_training - Step 41160: {'lr': 3.783960805895037e-05, 'samples': 21074432, 'steps': 41160, 'batch_loss/train': 0.5654899703804404} +12/28/2021 09:12:25 - INFO - codeparrot_training - Step 41161: {'lr': 3.783127028196712e-05, 'samples': 21074944, 'steps': 41161, 'batch_loss/train': 0.7304074340499938} +12/28/2021 09:12:35 - INFO - codeparrot_training - Step 41162: {'lr': 3.782293334848666e-05, 'samples': 21075456, 'steps': 41162, 'batch_loss/train': 0.7387046385556459} +12/28/2021 09:12:46 - INFO - codeparrot_training - Step 41163: {'lr': 3.7814597258542056e-05, 'samples': 21075968, 'steps': 41163, 'batch_loss/train': 0.7060185967711732} +12/28/2021 09:12:58 - INFO - codeparrot_training - Step 41164: {'lr': 3.780626201216644e-05, 'samples': 21076480, 'steps': 41164, 'batch_loss/train': 0.5864907624199986} +12/28/2021 09:13:09 - INFO - codeparrot_training - Step 41165: {'lr': 3.779792760939313e-05, 'samples': 21076992, 'steps': 41165, 'batch_loss/train': 0.6900190818123519} +12/28/2021 09:13:19 - INFO - codeparrot_training - Step 41166: {'lr': 3.778959405025506e-05, 'samples': 21077504, 'steps': 41166, 'batch_loss/train': 0.7194865567144006} +12/28/2021 09:13:30 - INFO - codeparrot_training - Step 41167: {'lr': 3.778126133478543e-05, 'samples': 21078016, 'steps': 41167, 'batch_loss/train': 0.6098726103082299} +12/28/2021 09:13:44 - INFO - codeparrot_training - Step 41168: {'lr': 3.7772929463017386e-05, 'samples': 21078528, 'steps': 41168, 'batch_loss/train': 0.7041658367961645} +12/28/2021 09:13:54 - INFO - codeparrot_training - Step 41169: {'lr': 3.7764598434984035e-05, 'samples': 21079040, 'steps': 41169, 'batch_loss/train': 0.7266049841418862} +12/28/2021 09:14:05 - INFO - codeparrot_training - Step 41170: {'lr': 3.775626825071851e-05, 'samples': 21079552, 'steps': 41170, 'batch_loss/train': 0.710936508141458} +12/28/2021 09:14:17 - INFO - codeparrot_training - Step 41171: {'lr': 3.774793891025394e-05, 'samples': 21080064, 'steps': 41171, 'batch_loss/train': 0.7397889774292707} +12/28/2021 09:14:28 - INFO - codeparrot_training - Step 41172: {'lr': 3.7739610413623394e-05, 'samples': 21080576, 'steps': 41172, 'batch_loss/train': 0.7973775891587138} +12/28/2021 09:14:39 - INFO - codeparrot_training - Step 41173: {'lr': 3.7731282760860016e-05, 'samples': 21081088, 'steps': 41173, 'batch_loss/train': 0.6204420882277191} +12/28/2021 09:14:52 - INFO - codeparrot_training - Step 41174: {'lr': 3.772295595199699e-05, 'samples': 21081600, 'steps': 41174, 'batch_loss/train': 0.6953377779573202} +12/28/2021 09:15:03 - INFO - codeparrot_training - Step 41175: {'lr': 3.771462998706718e-05, 'samples': 21082112, 'steps': 41175, 'batch_loss/train': 0.6483640470542014} +12/28/2021 09:15:14 - INFO - codeparrot_training - Step 41176: {'lr': 3.770630486610399e-05, 'samples': 21082624, 'steps': 41176, 'batch_loss/train': 0.7037874015804846} +12/28/2021 09:15:26 - INFO - codeparrot_training - Step 41177: {'lr': 3.769798058914031e-05, 'samples': 21083136, 'steps': 41177, 'batch_loss/train': 0.6502860287437215} +12/28/2021 09:15:36 - INFO - codeparrot_training - Step 41178: {'lr': 3.768965715620923e-05, 'samples': 21083648, 'steps': 41178, 'batch_loss/train': 0.7227981556206942} +12/28/2021 09:15:47 - INFO - codeparrot_training - Step 41179: {'lr': 3.7681334567344045e-05, 'samples': 21084160, 'steps': 41179, 'batch_loss/train': 0.7190803599078208} +12/28/2021 09:15:58 - INFO - codeparrot_training - Step 41180: {'lr': 3.7673012822577625e-05, 'samples': 21084672, 'steps': 41180, 'batch_loss/train': 0.7318063837010413} +12/28/2021 09:16:10 - INFO - codeparrot_training - Step 41181: {'lr': 3.766469192194313e-05, 'samples': 21085184, 'steps': 41181, 'batch_loss/train': 0.7545141819864511} +12/28/2021 09:16:20 - INFO - codeparrot_training - Step 41182: {'lr': 3.765637186547366e-05, 'samples': 21085696, 'steps': 41182, 'batch_loss/train': 0.6912437789142132} +12/28/2021 09:16:31 - INFO - codeparrot_training - Step 41183: {'lr': 3.764805265320228e-05, 'samples': 21086208, 'steps': 41183, 'batch_loss/train': 0.8110213037580252} +12/28/2021 09:16:45 - INFO - codeparrot_training - Step 41184: {'lr': 3.763973428516207e-05, 'samples': 21086720, 'steps': 41184, 'batch_loss/train': 0.717070622369647} +12/28/2021 09:16:56 - INFO - codeparrot_training - Step 41185: {'lr': 3.763141676138612e-05, 'samples': 21087232, 'steps': 41185, 'batch_loss/train': 0.6487305578775704} +12/28/2021 09:17:06 - INFO - codeparrot_training - Step 41186: {'lr': 3.762310008190739e-05, 'samples': 21087744, 'steps': 41186, 'batch_loss/train': 0.652310804463923} +12/28/2021 09:17:18 - INFO - codeparrot_training - Step 41187: {'lr': 3.761478424675907e-05, 'samples': 21088256, 'steps': 41187, 'batch_loss/train': 0.7759278607554734} +12/28/2021 09:17:29 - INFO - codeparrot_training - Step 41188: {'lr': 3.760646925597422e-05, 'samples': 21088768, 'steps': 41188, 'batch_loss/train': 0.7528243875131011} +12/28/2021 09:17:40 - INFO - codeparrot_training - Step 41189: {'lr': 3.759815510958573e-05, 'samples': 21089280, 'steps': 41189, 'batch_loss/train': 0.7572869649156928} +12/28/2021 09:17:54 - INFO - codeparrot_training - Step 41190: {'lr': 3.758984180762684e-05, 'samples': 21089792, 'steps': 41190, 'batch_loss/train': 0.662991936551407} +12/28/2021 09:18:04 - INFO - codeparrot_training - Step 41191: {'lr': 3.758152935013062e-05, 'samples': 21090304, 'steps': 41191, 'batch_loss/train': 0.5625101177429315} +12/28/2021 09:18:15 - INFO - codeparrot_training - Step 41192: {'lr': 3.7573217737129936e-05, 'samples': 21090816, 'steps': 41192, 'batch_loss/train': 0.6842352310195565} +12/28/2021 09:18:26 - INFO - codeparrot_training - Step 41193: {'lr': 3.7564906968657886e-05, 'samples': 21091328, 'steps': 41193, 'batch_loss/train': 0.6371550264302641} +12/28/2021 09:18:38 - INFO - codeparrot_training - Step 41194: {'lr': 3.755659704474768e-05, 'samples': 21091840, 'steps': 41194, 'batch_loss/train': 0.6440538596361876} +12/28/2021 09:18:49 - INFO - codeparrot_training - Step 41195: {'lr': 3.754828796543217e-05, 'samples': 21092352, 'steps': 41195, 'batch_loss/train': 0.6383040716173127} +12/28/2021 09:18:59 - INFO - codeparrot_training - Step 41196: {'lr': 3.7539979730744437e-05, 'samples': 21092864, 'steps': 41196, 'batch_loss/train': 0.716913802549243} +12/28/2021 09:19:12 - INFO - codeparrot_training - Step 41197: {'lr': 3.753167234071753e-05, 'samples': 21093376, 'steps': 41197, 'batch_loss/train': 1.013464032439515} +12/28/2021 09:19:22 - INFO - codeparrot_training - Step 41198: {'lr': 3.7523365795384454e-05, 'samples': 21093888, 'steps': 41198, 'batch_loss/train': 0.7556806474458426} +12/28/2021 09:19:33 - INFO - codeparrot_training - Step 41199: {'lr': 3.751506009477826e-05, 'samples': 21094400, 'steps': 41199, 'batch_loss/train': 0.8064182931557298} +12/28/2021 09:19:44 - INFO - codeparrot_training - Step 41200: {'lr': 3.750675523893202e-05, 'samples': 21094912, 'steps': 41200, 'batch_loss/train': 1.4612974864430726} +12/28/2021 09:19:56 - INFO - codeparrot_training - Step 41201: {'lr': 3.749845122787857e-05, 'samples': 21095424, 'steps': 41201, 'batch_loss/train': 0.7267713551409543} +12/28/2021 09:20:06 - INFO - codeparrot_training - Step 41202: {'lr': 3.74901480616511e-05, 'samples': 21095936, 'steps': 41202, 'batch_loss/train': 0.9038039385341108} +12/28/2021 09:20:17 - INFO - codeparrot_training - Step 41203: {'lr': 3.74818457402826e-05, 'samples': 21096448, 'steps': 41203, 'batch_loss/train': 0.6347234840504825} +12/28/2021 09:20:31 - INFO - codeparrot_training - Step 41204: {'lr': 3.7473544263805945e-05, 'samples': 21096960, 'steps': 41204, 'batch_loss/train': 0.650759092066437} +12/28/2021 09:20:42 - INFO - codeparrot_training - Step 41205: {'lr': 3.746524363225431e-05, 'samples': 21097472, 'steps': 41205, 'batch_loss/train': 0.651950042694807} +12/28/2021 09:20:52 - INFO - codeparrot_training - Step 41206: {'lr': 3.745694384566059e-05, 'samples': 21097984, 'steps': 41206, 'batch_loss/train': 0.4698366750671994} +12/28/2021 09:21:04 - INFO - codeparrot_training - Step 41207: {'lr': 3.744864490405781e-05, 'samples': 21098496, 'steps': 41207, 'batch_loss/train': 0.6655584929976612} +12/28/2021 09:21:15 - INFO - codeparrot_training - Step 41208: {'lr': 3.744034680747896e-05, 'samples': 21099008, 'steps': 41208, 'batch_loss/train': 0.5669842089992017} +12/28/2021 09:21:26 - INFO - codeparrot_training - Step 41209: {'lr': 3.7432049555957025e-05, 'samples': 21099520, 'steps': 41209, 'batch_loss/train': 0.739200841402635} +12/28/2021 09:21:38 - INFO - codeparrot_training - Step 41210: {'lr': 3.742375314952498e-05, 'samples': 21100032, 'steps': 41210, 'batch_loss/train': 0.5669695362448692} +12/28/2021 09:21:48 - INFO - codeparrot_training - Step 41211: {'lr': 3.741545758821591e-05, 'samples': 21100544, 'steps': 41211, 'batch_loss/train': 0.6718160333111882} +12/28/2021 09:21:59 - INFO - codeparrot_training - Step 41212: {'lr': 3.740716287206258e-05, 'samples': 21101056, 'steps': 41212, 'batch_loss/train': 0.7352861466351897} +12/28/2021 09:22:10 - INFO - codeparrot_training - Step 41213: {'lr': 3.739886900109818e-05, 'samples': 21101568, 'steps': 41213, 'batch_loss/train': 0.6703471783548594} +12/28/2021 09:22:24 - INFO - codeparrot_training - Step 41214: {'lr': 3.739057597535564e-05, 'samples': 21102080, 'steps': 41214, 'batch_loss/train': 0.521841102745384} +12/28/2021 09:22:35 - INFO - codeparrot_training - Step 41215: {'lr': 3.73822837948678e-05, 'samples': 21102592, 'steps': 41215, 'batch_loss/train': 0.6209904020070098} +12/28/2021 09:22:45 - INFO - codeparrot_training - Step 41216: {'lr': 3.737399245966774e-05, 'samples': 21103104, 'steps': 41216, 'batch_loss/train': 0.6937187118455768} +12/28/2021 09:22:58 - INFO - codeparrot_training - Step 41217: {'lr': 3.7365701969788475e-05, 'samples': 21103616, 'steps': 41217, 'batch_loss/train': 0.6511717806570232} +12/28/2021 09:23:08 - INFO - codeparrot_training - Step 41218: {'lr': 3.735741232526285e-05, 'samples': 21104128, 'steps': 41218, 'batch_loss/train': 0.6354565774672665} +12/28/2021 09:23:19 - INFO - codeparrot_training - Step 41219: {'lr': 3.7349123526123796e-05, 'samples': 21104640, 'steps': 41219, 'batch_loss/train': 0.7733172457665205} +12/28/2021 09:23:33 - INFO - codeparrot_training - Step 41220: {'lr': 3.734083557240442e-05, 'samples': 21105152, 'steps': 41220, 'batch_loss/train': 0.6237650560215116} +12/28/2021 09:23:44 - INFO - codeparrot_training - Step 41221: {'lr': 3.733254846413756e-05, 'samples': 21105664, 'steps': 41221, 'batch_loss/train': 1.0311354203149676} +12/28/2021 09:23:54 - INFO - codeparrot_training - Step 41222: {'lr': 3.732426220135618e-05, 'samples': 21106176, 'steps': 41222, 'batch_loss/train': 0.73996068444103} +12/28/2021 09:24:05 - INFO - codeparrot_training - Step 41223: {'lr': 3.731597678409324e-05, 'samples': 21106688, 'steps': 41223, 'batch_loss/train': 0.7228375893319026} +12/28/2021 09:24:17 - INFO - codeparrot_training - Step 41224: {'lr': 3.7307692212381646e-05, 'samples': 21107200, 'steps': 41224, 'batch_loss/train': 0.7059612213633955} +12/28/2021 09:24:28 - INFO - codeparrot_training - Step 41225: {'lr': 3.7299408486254374e-05, 'samples': 21107712, 'steps': 41225, 'batch_loss/train': 0.6042306388553698} +12/28/2021 09:24:38 - INFO - codeparrot_training - Step 41226: {'lr': 3.7291125605744324e-05, 'samples': 21108224, 'steps': 41226, 'batch_loss/train': 0.676368243759498} +12/28/2021 09:24:50 - INFO - codeparrot_training - Step 41227: {'lr': 3.7282843570884464e-05, 'samples': 21108736, 'steps': 41227, 'batch_loss/train': 0.7916870564222336} +12/28/2021 09:25:01 - INFO - codeparrot_training - Step 41228: {'lr': 3.727456238170768e-05, 'samples': 21109248, 'steps': 41228, 'batch_loss/train': 0.6438826145604253} +12/28/2021 09:25:12 - INFO - codeparrot_training - Step 41229: {'lr': 3.7266282038247004e-05, 'samples': 21109760, 'steps': 41229, 'batch_loss/train': 0.6591504639945924} +12/28/2021 09:25:26 - INFO - codeparrot_training - Step 41230: {'lr': 3.72580025405351e-05, 'samples': 21110272, 'steps': 41230, 'batch_loss/train': 0.6771864043548703} +12/28/2021 09:25:37 - INFO - codeparrot_training - Step 41231: {'lr': 3.72497238886052e-05, 'samples': 21110784, 'steps': 41231, 'batch_loss/train': 0.7245011096820235} +12/28/2021 09:25:47 - INFO - codeparrot_training - Step 41232: {'lr': 3.724144608249e-05, 'samples': 21111296, 'steps': 41232, 'batch_loss/train': 0.6692306912736967} +12/28/2021 09:25:59 - INFO - codeparrot_training - Step 41233: {'lr': 3.7233169122222494e-05, 'samples': 21111808, 'steps': 41233, 'batch_loss/train': 0.5219236745033413} +12/28/2021 09:26:10 - INFO - codeparrot_training - Step 41234: {'lr': 3.722489300783555e-05, 'samples': 21112320, 'steps': 41234, 'batch_loss/train': 0.6832046722993255} +12/28/2021 09:26:20 - INFO - codeparrot_training - Step 41235: {'lr': 3.721661773936208e-05, 'samples': 21112832, 'steps': 41235, 'batch_loss/train': 0.6712789782322943} +12/28/2021 09:26:31 - INFO - codeparrot_training - Step 41236: {'lr': 3.720834331683501e-05, 'samples': 21113344, 'steps': 41236, 'batch_loss/train': 0.6738295461982489} +12/28/2021 09:26:43 - INFO - codeparrot_training - Step 41237: {'lr': 3.7200069740287224e-05, 'samples': 21113856, 'steps': 41237, 'batch_loss/train': 0.7208307578694075} +12/28/2021 09:26:54 - INFO - codeparrot_training - Step 41238: {'lr': 3.7191797009751616e-05, 'samples': 21114368, 'steps': 41238, 'batch_loss/train': 0.6835049837827682} +12/28/2021 09:27:04 - INFO - codeparrot_training - Step 41239: {'lr': 3.718352512526105e-05, 'samples': 21114880, 'steps': 41239, 'batch_loss/train': 0.624258894007653} +12/28/2021 09:27:17 - INFO - codeparrot_training - Step 41240: {'lr': 3.7175254086848494e-05, 'samples': 21115392, 'steps': 41240, 'batch_loss/train': 0.6817541461496148} +12/28/2021 09:27:28 - INFO - codeparrot_training - Step 41241: {'lr': 3.7166983894546676e-05, 'samples': 21115904, 'steps': 41241, 'batch_loss/train': 0.7144450573250651} +12/28/2021 09:27:38 - INFO - codeparrot_training - Step 41242: {'lr': 3.71587145483886e-05, 'samples': 21116416, 'steps': 41242, 'batch_loss/train': 0.568187752738595} +12/28/2021 09:27:52 - INFO - codeparrot_training - Step 41243: {'lr': 3.715044604840717e-05, 'samples': 21116928, 'steps': 41243, 'batch_loss/train': 0.6803495909553021} +12/28/2021 09:28:03 - INFO - codeparrot_training - Step 41244: {'lr': 3.714217839463513e-05, 'samples': 21117440, 'steps': 41244, 'batch_loss/train': 0.7576418472453952} +12/28/2021 09:28:13 - INFO - codeparrot_training - Step 41245: {'lr': 3.713391158710539e-05, 'samples': 21117952, 'steps': 41245, 'batch_loss/train': 0.7295684572309256} +12/28/2021 09:28:26 - INFO - codeparrot_training - Step 41246: {'lr': 3.712564562585094e-05, 'samples': 21118464, 'steps': 41246, 'batch_loss/train': 0.716779128764756} +12/28/2021 09:28:36 - INFO - codeparrot_training - Step 41247: {'lr': 3.7117380510904494e-05, 'samples': 21118976, 'steps': 41247, 'batch_loss/train': 0.7459867694415152} +12/28/2021 09:28:47 - INFO - codeparrot_training - Step 41248: {'lr': 3.710911624229893e-05, 'samples': 21119488, 'steps': 41248, 'batch_loss/train': 0.567680909531191} +12/28/2021 09:28:57 - INFO - codeparrot_training - Step 41249: {'lr': 3.710085282006717e-05, 'samples': 21120000, 'steps': 41249, 'batch_loss/train': 0.7558305433485657} +12/28/2021 09:29:11 - INFO - codeparrot_training - Step 41250: {'lr': 3.709259024424202e-05, 'samples': 21120512, 'steps': 41250, 'batch_loss/train': 0.589160452131182} +12/28/2021 09:29:22 - INFO - codeparrot_training - Step 41251: {'lr': 3.708432851485635e-05, 'samples': 21121024, 'steps': 41251, 'batch_loss/train': 0.6431146282702684} +12/28/2021 09:29:33 - INFO - codeparrot_training - Step 41252: {'lr': 3.707606763194299e-05, 'samples': 21121536, 'steps': 41252, 'batch_loss/train': 0.6791288694366813} +12/28/2021 09:29:45 - INFO - codeparrot_training - Step 41253: {'lr': 3.706780759553477e-05, 'samples': 21122048, 'steps': 41253, 'batch_loss/train': 0.7589171454310417} +12/28/2021 09:29:55 - INFO - codeparrot_training - Step 41254: {'lr': 3.705954840566458e-05, 'samples': 21122560, 'steps': 41254, 'batch_loss/train': 0.5996603104285896} +12/28/2021 09:30:06 - INFO - codeparrot_training - Step 41255: {'lr': 3.705129006236527e-05, 'samples': 21123072, 'steps': 41255, 'batch_loss/train': 0.6779196033312473} +12/28/2021 09:30:18 - INFO - codeparrot_training - Step 41256: {'lr': 3.7043032565669485e-05, 'samples': 21123584, 'steps': 41256, 'batch_loss/train': 0.7678357316181064} +12/28/2021 09:30:29 - INFO - codeparrot_training - Step 41257: {'lr': 3.703477591561033e-05, 'samples': 21124096, 'steps': 41257, 'batch_loss/train': 0.768838569521904} +12/28/2021 09:30:39 - INFO - codeparrot_training - Step 41258: {'lr': 3.702652011222044e-05, 'samples': 21124608, 'steps': 41258, 'batch_loss/train': 0.6487887691473588} +12/28/2021 09:30:50 - INFO - codeparrot_training - Step 41259: {'lr': 3.7018265155532666e-05, 'samples': 21125120, 'steps': 41259, 'batch_loss/train': 0.6902922161389142} +12/28/2021 09:31:03 - INFO - codeparrot_training - Step 41260: {'lr': 3.701001104557988e-05, 'samples': 21125632, 'steps': 41260, 'batch_loss/train': 0.6824781731702387} +12/28/2021 09:31:13 - INFO - codeparrot_training - Step 41261: {'lr': 3.700175778239484e-05, 'samples': 21126144, 'steps': 41261, 'batch_loss/train': 0.7024354375316761} +12/28/2021 09:31:24 - INFO - codeparrot_training - Step 41262: {'lr': 3.69935053660104e-05, 'samples': 21126656, 'steps': 41262, 'batch_loss/train': 0.6880040680989623} +12/28/2021 09:31:38 - INFO - codeparrot_training - Step 41263: {'lr': 3.6985253796459366e-05, 'samples': 21127168, 'steps': 41263, 'batch_loss/train': 0.7558018085546792} +12/28/2021 09:31:49 - INFO - codeparrot_training - Step 41264: {'lr': 3.697700307377452e-05, 'samples': 21127680, 'steps': 41264, 'batch_loss/train': 0.7850516940234229} +12/28/2021 09:32:00 - INFO - codeparrot_training - Step 41265: {'lr': 3.6968753197988675e-05, 'samples': 21128192, 'steps': 41265, 'batch_loss/train': 0.7125332332216203} +12/28/2021 09:32:12 - INFO - codeparrot_training - Step 41266: {'lr': 3.696050416913468e-05, 'samples': 21128704, 'steps': 41266, 'batch_loss/train': 0.6845153691247106} +12/28/2021 09:32:22 - INFO - codeparrot_training - Step 41267: {'lr': 3.695225598724519e-05, 'samples': 21129216, 'steps': 41267, 'batch_loss/train': 0.7271852198755369} +12/28/2021 09:32:33 - INFO - codeparrot_training - Step 41268: {'lr': 3.6944008652353116e-05, 'samples': 21129728, 'steps': 41268, 'batch_loss/train': 0.7318122442811728} +12/28/2021 09:32:44 - INFO - codeparrot_training - Step 41269: {'lr': 3.6935762164491294e-05, 'samples': 21130240, 'steps': 41269, 'batch_loss/train': 0.6550665586255491} +12/28/2021 09:32:58 - INFO - codeparrot_training - Step 41270: {'lr': 3.692751652369236e-05, 'samples': 21130752, 'steps': 41270, 'batch_loss/train': 0.6113391304388642} +12/28/2021 09:33:08 - INFO - codeparrot_training - Step 41271: {'lr': 3.6919271729989126e-05, 'samples': 21131264, 'steps': 41271, 'batch_loss/train': 0.7887900425121188} +12/28/2021 09:33:19 - INFO - codeparrot_training - Step 41272: {'lr': 3.691102778341451e-05, 'samples': 21131776, 'steps': 41272, 'batch_loss/train': 0.7274414584971964} +12/28/2021 09:33:31 - INFO - codeparrot_training - Step 41273: {'lr': 3.6902784684001126e-05, 'samples': 21132288, 'steps': 41273, 'batch_loss/train': 0.6946681709523546} +12/28/2021 09:33:42 - INFO - codeparrot_training - Step 41274: {'lr': 3.6894542431781833e-05, 'samples': 21132800, 'steps': 41274, 'batch_loss/train': 1.3641751483082771} +12/28/2021 09:33:53 - INFO - codeparrot_training - Step 41275: {'lr': 3.688630102678936e-05, 'samples': 21133312, 'steps': 41275, 'batch_loss/train': 0.6354056850541383} +12/28/2021 09:34:03 - INFO - codeparrot_training - Step 41276: {'lr': 3.687806046905648e-05, 'samples': 21133824, 'steps': 41276, 'batch_loss/train': 0.7161515336483717} +12/28/2021 09:34:15 - INFO - codeparrot_training - Step 41277: {'lr': 3.6869820758615974e-05, 'samples': 21134336, 'steps': 41277, 'batch_loss/train': 0.6393119678832591} +12/28/2021 09:34:26 - INFO - codeparrot_training - Step 41278: {'lr': 3.6861581895500566e-05, 'samples': 21134848, 'steps': 41278, 'batch_loss/train': 0.7785124657675624} +12/28/2021 09:34:37 - INFO - codeparrot_training - Step 41279: {'lr': 3.685334387974304e-05, 'samples': 21135360, 'steps': 41279, 'batch_loss/train': 0.8031252510845661} +12/28/2021 09:34:51 - INFO - codeparrot_training - Step 41280: {'lr': 3.684510671137614e-05, 'samples': 21135872, 'steps': 41280, 'batch_loss/train': 0.6982943685725331} +12/28/2021 09:35:01 - INFO - codeparrot_training - Step 41281: {'lr': 3.683687039043268e-05, 'samples': 21136384, 'steps': 41281, 'batch_loss/train': 0.8892350541427732} +12/28/2021 09:35:12 - INFO - codeparrot_training - Step 41282: {'lr': 3.682863491694521e-05, 'samples': 21136896, 'steps': 41282, 'batch_loss/train': 0.6056649533566087} +12/28/2021 09:35:24 - INFO - codeparrot_training - Step 41283: {'lr': 3.682040029094666e-05, 'samples': 21137408, 'steps': 41283, 'batch_loss/train': 0.8371935049071908} +12/28/2021 09:35:35 - INFO - codeparrot_training - Step 41284: {'lr': 3.681216651246974e-05, 'samples': 21137920, 'steps': 41284, 'batch_loss/train': 0.7367120889248326} +12/28/2021 09:35:45 - INFO - codeparrot_training - Step 41285: {'lr': 3.68039335815471e-05, 'samples': 21138432, 'steps': 41285, 'batch_loss/train': 1.0235940162092447} +12/28/2021 09:35:56 - INFO - codeparrot_training - Step 41286: {'lr': 3.6795701498211546e-05, 'samples': 21138944, 'steps': 41286, 'batch_loss/train': 0.7064820118248463} +12/28/2021 09:36:08 - INFO - codeparrot_training - Step 41287: {'lr': 3.6787470262495765e-05, 'samples': 21139456, 'steps': 41287, 'batch_loss/train': 0.7086339453235269} +12/28/2021 09:36:19 - INFO - codeparrot_training - Step 41288: {'lr': 3.677923987443249e-05, 'samples': 21139968, 'steps': 41288, 'batch_loss/train': 0.47254529781639576} +12/28/2021 09:36:29 - INFO - codeparrot_training - Step 41289: {'lr': 3.6771010334054444e-05, 'samples': 21140480, 'steps': 41289, 'batch_loss/train': 0.7110097615513951} +12/28/2021 09:36:43 - INFO - codeparrot_training - Step 41290: {'lr': 3.676278164139435e-05, 'samples': 21140992, 'steps': 41290, 'batch_loss/train': 0.6431890819221735} +12/28/2021 09:36:54 - INFO - codeparrot_training - Step 41291: {'lr': 3.675455379648493e-05, 'samples': 21141504, 'steps': 41291, 'batch_loss/train': 0.7224012603983283} +12/28/2021 09:37:05 - INFO - codeparrot_training - Step 41292: {'lr': 3.674632679935897e-05, 'samples': 21142016, 'steps': 41292, 'batch_loss/train': 0.7445216625928879} +12/28/2021 09:37:17 - INFO - codeparrot_training - Step 41293: {'lr': 3.673810065004893e-05, 'samples': 21142528, 'steps': 41293, 'batch_loss/train': 0.630819242913276} +12/28/2021 09:37:27 - INFO - codeparrot_training - Step 41294: {'lr': 3.672987534858776e-05, 'samples': 21143040, 'steps': 41294, 'batch_loss/train': 0.7119573480449617} +12/28/2021 09:37:38 - INFO - codeparrot_training - Step 41295: {'lr': 3.6721650895008115e-05, 'samples': 21143552, 'steps': 41295, 'batch_loss/train': 0.593722794437781} +12/28/2021 09:37:52 - INFO - codeparrot_training - Step 41296: {'lr': 3.6713427289342627e-05, 'samples': 21144064, 'steps': 41296, 'batch_loss/train': 0.6063010385842063} +12/28/2021 09:38:03 - INFO - codeparrot_training - Step 41297: {'lr': 3.670520453162393e-05, 'samples': 21144576, 'steps': 41297, 'batch_loss/train': 0.6845707604661584} +12/28/2021 09:38:13 - INFO - codeparrot_training - Step 41298: {'lr': 3.669698262188495e-05, 'samples': 21145088, 'steps': 41298, 'batch_loss/train': 0.8195094205439091} +12/28/2021 09:38:24 - INFO - codeparrot_training - Step 41299: {'lr': 3.6688761560158155e-05, 'samples': 21145600, 'steps': 41299, 'batch_loss/train': 0.6552643924951553} +12/28/2021 09:38:36 - INFO - codeparrot_training - Step 41300: {'lr': 3.668054134647622e-05, 'samples': 21146112, 'steps': 41300, 'batch_loss/train': 0.6671909254509956} +12/28/2021 09:38:47 - INFO - codeparrot_training - Step 41301: {'lr': 3.6672321980872056e-05, 'samples': 21146624, 'steps': 41301, 'batch_loss/train': 0.6381581394234672} +12/28/2021 09:38:57 - INFO - codeparrot_training - Step 41302: {'lr': 3.666410346337812e-05, 'samples': 21147136, 'steps': 41302, 'batch_loss/train': 0.732788740308024} +12/28/2021 09:39:10 - INFO - codeparrot_training - Step 41303: {'lr': 3.665588579402715e-05, 'samples': 21147648, 'steps': 41303, 'batch_loss/train': 0.6960077900439501} +12/28/2021 09:39:20 - INFO - codeparrot_training - Step 41304: {'lr': 3.664766897285182e-05, 'samples': 21148160, 'steps': 41304, 'batch_loss/train': 0.46704469178803265} +12/28/2021 09:39:31 - INFO - codeparrot_training - Step 41305: {'lr': 3.663945299988483e-05, 'samples': 21148672, 'steps': 41305, 'batch_loss/train': 0.6666191723197699} +12/28/2021 09:39:43 - INFO - codeparrot_training - Step 41306: {'lr': 3.663123787515879e-05, 'samples': 21149184, 'steps': 41306, 'batch_loss/train': 0.7199815157800913} +12/28/2021 09:39:54 - INFO - codeparrot_training - Step 41307: {'lr': 3.662302359870642e-05, 'samples': 21149696, 'steps': 41307, 'batch_loss/train': 0.6530139878159389} +12/28/2021 09:40:04 - INFO - codeparrot_training - Step 41308: {'lr': 3.6614810170560256e-05, 'samples': 21150208, 'steps': 41308, 'batch_loss/train': 0.6634487572591752} +12/28/2021 09:40:15 - INFO - codeparrot_training - Step 41309: {'lr': 3.6606597590753074e-05, 'samples': 21150720, 'steps': 41309, 'batch_loss/train': 0.7754041599109769} +12/28/2021 09:40:29 - INFO - codeparrot_training - Step 41310: {'lr': 3.659838585931757e-05, 'samples': 21151232, 'steps': 41310, 'batch_loss/train': 0.6908998684957623} +12/28/2021 09:40:40 - INFO - codeparrot_training - Step 41311: {'lr': 3.659017497628622e-05, 'samples': 21151744, 'steps': 41311, 'batch_loss/train': 0.6985064735636115} +12/28/2021 09:40:50 - INFO - codeparrot_training - Step 41312: {'lr': 3.658196494169178e-05, 'samples': 21152256, 'steps': 41312, 'batch_loss/train': 0.7703279057750478} +12/28/2021 09:41:02 - INFO - codeparrot_training - Step 41313: {'lr': 3.657375575556684e-05, 'samples': 21152768, 'steps': 41313, 'batch_loss/train': 0.6361422254703939} +12/28/2021 09:41:13 - INFO - codeparrot_training - Step 41314: {'lr': 3.656554741794407e-05, 'samples': 21153280, 'steps': 41314, 'batch_loss/train': 0.7260195069247857} +12/28/2021 09:41:24 - INFO - codeparrot_training - Step 41315: {'lr': 3.6557339928856096e-05, 'samples': 21153792, 'steps': 41315, 'batch_loss/train': 0.7785343704745173} +12/28/2021 09:41:36 - INFO - codeparrot_training - Step 41316: {'lr': 3.654913328833556e-05, 'samples': 21154304, 'steps': 41316, 'batch_loss/train': 0.8272271379828453} +12/28/2021 09:41:46 - INFO - codeparrot_training - Step 41317: {'lr': 3.654092749641505e-05, 'samples': 21154816, 'steps': 41317, 'batch_loss/train': 0.6693582145962864} +12/28/2021 09:41:57 - INFO - codeparrot_training - Step 41318: {'lr': 3.653272255312723e-05, 'samples': 21155328, 'steps': 41318, 'batch_loss/train': 0.8454563235864043} +12/28/2021 09:42:11 - INFO - codeparrot_training - Step 41319: {'lr': 3.652451845850468e-05, 'samples': 21155840, 'steps': 41319, 'batch_loss/train': 0.7278905333951116} +12/28/2021 09:42:22 - INFO - codeparrot_training - Step 41320: {'lr': 3.651631521258006e-05, 'samples': 21156352, 'steps': 41320, 'batch_loss/train': 0.5625256375642493} +12/28/2021 09:42:32 - INFO - codeparrot_training - Step 41321: {'lr': 3.650811281538602e-05, 'samples': 21156864, 'steps': 41321, 'batch_loss/train': 0.7845224412158132} +12/28/2021 09:42:43 - INFO - codeparrot_training - Step 41322: {'lr': 3.649991126695504e-05, 'samples': 21157376, 'steps': 41322, 'batch_loss/train': 0.7891795947216451} +12/28/2021 09:42:55 - INFO - codeparrot_training - Step 41323: {'lr': 3.649171056731976e-05, 'samples': 21157888, 'steps': 41323, 'batch_loss/train': 0.7375099551863968} +12/28/2021 09:43:06 - INFO - codeparrot_training - Step 41324: {'lr': 3.64835107165129e-05, 'samples': 21158400, 'steps': 41324, 'batch_loss/train': 0.7036183471791446} +12/28/2021 09:43:16 - INFO - codeparrot_training - Step 41325: {'lr': 3.647531171456697e-05, 'samples': 21158912, 'steps': 41325, 'batch_loss/train': 0.42858819843968377} +12/28/2021 09:43:28 - INFO - codeparrot_training - Step 41326: {'lr': 3.6467113561514496e-05, 'samples': 21159424, 'steps': 41326, 'batch_loss/train': 0.5434925808513071} +12/28/2021 09:43:39 - INFO - codeparrot_training - Step 41327: {'lr': 3.6458916257388255e-05, 'samples': 21159936, 'steps': 41327, 'batch_loss/train': 0.8104254570789635} +12/28/2021 09:43:50 - INFO - codeparrot_training - Step 41328: {'lr': 3.64507198022207e-05, 'samples': 21160448, 'steps': 41328, 'batch_loss/train': 0.7742325821891427} +12/28/2021 09:44:04 - INFO - codeparrot_training - Step 41329: {'lr': 3.6442524196044435e-05, 'samples': 21160960, 'steps': 41329, 'batch_loss/train': 0.7351267314516008} +12/28/2021 09:44:14 - INFO - codeparrot_training - Step 41330: {'lr': 3.643432943889205e-05, 'samples': 21161472, 'steps': 41330, 'batch_loss/train': 0.814394747838378} +12/28/2021 09:44:25 - INFO - codeparrot_training - Step 41331: {'lr': 3.6426135530796153e-05, 'samples': 21161984, 'steps': 41331, 'batch_loss/train': 0.6849334319122136} +12/28/2021 09:44:36 - INFO - codeparrot_training - Step 41332: {'lr': 3.6417942471789285e-05, 'samples': 21162496, 'steps': 41332, 'batch_loss/train': 0.6789911522064358} +12/28/2021 09:44:48 - INFO - codeparrot_training - Step 41333: {'lr': 3.64097502619041e-05, 'samples': 21163008, 'steps': 41333, 'batch_loss/train': 0.6835669698193669} +12/28/2021 09:44:58 - INFO - codeparrot_training - Step 41334: {'lr': 3.640155890117297e-05, 'samples': 21163520, 'steps': 41334, 'batch_loss/train': 0.7481736754998565} +12/28/2021 09:45:09 - INFO - codeparrot_training - Step 41335: {'lr': 3.639336838962867e-05, 'samples': 21164032, 'steps': 41335, 'batch_loss/train': 0.8020946392789483} +12/28/2021 09:45:23 - INFO - codeparrot_training - Step 41336: {'lr': 3.638517872730371e-05, 'samples': 21164544, 'steps': 41336, 'batch_loss/train': 0.7952767820097506} +12/28/2021 09:45:34 - INFO - codeparrot_training - Step 41337: {'lr': 3.637698991423052e-05, 'samples': 21165056, 'steps': 41337, 'batch_loss/train': 0.7325661228969693} +12/28/2021 09:45:44 - INFO - codeparrot_training - Step 41338: {'lr': 3.636880195044187e-05, 'samples': 21165568, 'steps': 41338, 'batch_loss/train': 0.614356700622011} +12/28/2021 09:45:56 - INFO - codeparrot_training - Step 41339: {'lr': 3.6360614835970165e-05, 'samples': 21166080, 'steps': 41339, 'batch_loss/train': 0.659411586355418} +12/28/2021 09:46:07 - INFO - codeparrot_training - Step 41340: {'lr': 3.635242857084797e-05, 'samples': 21166592, 'steps': 41340, 'batch_loss/train': 0.6550866331672296} +12/28/2021 09:46:18 - INFO - codeparrot_training - Step 41341: {'lr': 3.634424315510784e-05, 'samples': 21167104, 'steps': 41341, 'batch_loss/train': 0.7542017516680062} +12/28/2021 09:46:30 - INFO - codeparrot_training - Step 41342: {'lr': 3.633605858878234e-05, 'samples': 21167616, 'steps': 41342, 'batch_loss/train': 0.7200029110535979} +12/28/2021 09:46:41 - INFO - codeparrot_training - Step 41343: {'lr': 3.632787487190398e-05, 'samples': 21168128, 'steps': 41343, 'batch_loss/train': 0.6976273600012064} +12/28/2021 09:46:51 - INFO - codeparrot_training - Step 41344: {'lr': 3.631969200450533e-05, 'samples': 21168640, 'steps': 41344, 'batch_loss/train': 0.823764665517956} +12/28/2021 09:47:02 - INFO - codeparrot_training - Step 41345: {'lr': 3.631150998661889e-05, 'samples': 21169152, 'steps': 41345, 'batch_loss/train': 0.6541530226822942} +12/28/2021 09:47:14 - INFO - codeparrot_training - Step 41346: {'lr': 3.6303328818277194e-05, 'samples': 21169664, 'steps': 41346, 'batch_loss/train': 0.7191128144040704} +12/28/2021 09:47:25 - INFO - codeparrot_training - Step 41347: {'lr': 3.629514849951285e-05, 'samples': 21170176, 'steps': 41347, 'batch_loss/train': 0.597061128122732} +12/28/2021 09:47:35 - INFO - codeparrot_training - Step 41348: {'lr': 3.628696903035822e-05, 'samples': 21170688, 'steps': 41348, 'batch_loss/train': 0.6845103583764285} +12/28/2021 09:47:49 - INFO - codeparrot_training - Step 41349: {'lr': 3.6278790410845865e-05, 'samples': 21171200, 'steps': 41349, 'batch_loss/train': 1.1107436628080904} +12/28/2021 09:48:00 - INFO - codeparrot_training - Step 41350: {'lr': 3.627061264100848e-05, 'samples': 21171712, 'steps': 41350, 'batch_loss/train': 1.0229457193054259} +12/28/2021 09:48:11 - INFO - codeparrot_training - Step 41351: {'lr': 3.6262435720878336e-05, 'samples': 21172224, 'steps': 41351, 'batch_loss/train': 0.7660776292905211} +12/28/2021 09:48:23 - INFO - codeparrot_training - Step 41352: {'lr': 3.625425965048801e-05, 'samples': 21172736, 'steps': 41352, 'batch_loss/train': 0.7684371163486503} +12/28/2021 09:48:34 - INFO - codeparrot_training - Step 41353: {'lr': 3.624608442987015e-05, 'samples': 21173248, 'steps': 41353, 'batch_loss/train': 0.7952549690380692} +12/28/2021 09:48:44 - INFO - codeparrot_training - Step 41354: {'lr': 3.6237910059057104e-05, 'samples': 21173760, 'steps': 41354, 'batch_loss/train': 0.6049908443819731} +12/28/2021 09:48:55 - INFO - codeparrot_training - Step 41355: {'lr': 3.62297365380814e-05, 'samples': 21174272, 'steps': 41355, 'batch_loss/train': 0.6976963421329856} +12/28/2021 09:49:09 - INFO - codeparrot_training - Step 41356: {'lr': 3.622156386697553e-05, 'samples': 21174784, 'steps': 41356, 'batch_loss/train': 0.758418507874012} +12/28/2021 09:49:19 - INFO - codeparrot_training - Step 41357: {'lr': 3.6213392045772034e-05, 'samples': 21175296, 'steps': 41357, 'batch_loss/train': 0.802768430672586} +12/28/2021 09:49:30 - INFO - codeparrot_training - Step 41358: {'lr': 3.620522107450336e-05, 'samples': 21175808, 'steps': 41358, 'batch_loss/train': 0.6922384053468704} +12/28/2021 09:49:42 - INFO - codeparrot_training - Step 41359: {'lr': 3.619705095320205e-05, 'samples': 21176320, 'steps': 41359, 'batch_loss/train': 0.7796165393665433} +12/28/2021 09:49:53 - INFO - codeparrot_training - Step 41360: {'lr': 3.6188881681900435e-05, 'samples': 21176832, 'steps': 41360, 'batch_loss/train': 0.776460996363312} +12/28/2021 09:50:03 - INFO - codeparrot_training - Step 41361: {'lr': 3.618071326063113e-05, 'samples': 21177344, 'steps': 41361, 'batch_loss/train': 0.7465294422581792} +12/28/2021 09:50:16 - INFO - codeparrot_training - Step 41362: {'lr': 3.6172545689426654e-05, 'samples': 21177856, 'steps': 41362, 'batch_loss/train': 0.7396129202097654} +12/28/2021 09:50:26 - INFO - codeparrot_training - Step 41363: {'lr': 3.6164378968319285e-05, 'samples': 21178368, 'steps': 41363, 'batch_loss/train': 0.6960986114572734} +12/28/2021 09:50:37 - INFO - codeparrot_training - Step 41364: {'lr': 3.615621309734163e-05, 'samples': 21178880, 'steps': 41364, 'batch_loss/train': 0.7891762480139732} +12/28/2021 09:50:51 - INFO - codeparrot_training - Step 41365: {'lr': 3.614804807652622e-05, 'samples': 21179392, 'steps': 41365, 'batch_loss/train': 0.7167719537392259} +12/28/2021 09:51:01 - INFO - codeparrot_training - Step 41366: {'lr': 3.613988390590534e-05, 'samples': 21179904, 'steps': 41366, 'batch_loss/train': 0.669820950308349} +12/28/2021 09:51:12 - INFO - codeparrot_training - Step 41367: {'lr': 3.613172058551154e-05, 'samples': 21180416, 'steps': 41367, 'batch_loss/train': 0.626499067991972} +12/28/2021 09:51:23 - INFO - codeparrot_training - Step 41368: {'lr': 3.6123558115377264e-05, 'samples': 21180928, 'steps': 41368, 'batch_loss/train': 0.661468249745667} +12/28/2021 09:51:35 - INFO - codeparrot_training - Step 41369: {'lr': 3.611539649553497e-05, 'samples': 21181440, 'steps': 41369, 'batch_loss/train': 0.7777959262020886} +12/28/2021 09:51:45 - INFO - codeparrot_training - Step 41370: {'lr': 3.6107235726017094e-05, 'samples': 21181952, 'steps': 41370, 'batch_loss/train': 0.7369706449098885} +12/28/2021 09:51:56 - INFO - codeparrot_training - Step 41371: {'lr': 3.6099075806856094e-05, 'samples': 21182464, 'steps': 41371, 'batch_loss/train': 0.7136802095919847} +12/28/2021 09:52:08 - INFO - codeparrot_training - Step 41372: {'lr': 3.60909167380844e-05, 'samples': 21182976, 'steps': 41372, 'batch_loss/train': 0.7265907065011561} +12/28/2021 09:52:18 - INFO - codeparrot_training - Step 41373: {'lr': 3.608275851973447e-05, 'samples': 21183488, 'steps': 41373, 'batch_loss/train': 0.7033501705154777} +12/28/2021 09:52:29 - INFO - codeparrot_training - Step 41374: {'lr': 3.607460115183869e-05, 'samples': 21184000, 'steps': 41374, 'batch_loss/train': 0.7423858223482966} +12/28/2021 09:52:43 - INFO - codeparrot_training - Step 41375: {'lr': 3.606644463442954e-05, 'samples': 21184512, 'steps': 41375, 'batch_loss/train': 0.726475334726274} +12/28/2021 09:52:54 - INFO - codeparrot_training - Step 41376: {'lr': 3.605828896753946e-05, 'samples': 21185024, 'steps': 41376, 'batch_loss/train': 0.8106808278243989} +12/28/2021 09:53:04 - INFO - codeparrot_training - Step 41377: {'lr': 3.605013415120081e-05, 'samples': 21185536, 'steps': 41377, 'batch_loss/train': 0.6872041271999478} +12/28/2021 09:53:15 - INFO - codeparrot_training - Step 41378: {'lr': 3.604198018544597e-05, 'samples': 21186048, 'steps': 41378, 'batch_loss/train': 0.7492382526397705} +12/28/2021 09:53:27 - INFO - codeparrot_training - Step 41379: {'lr': 3.603382707030756e-05, 'samples': 21186560, 'steps': 41379, 'batch_loss/train': 0.7471189559437335} +12/28/2021 09:53:38 - INFO - codeparrot_training - Step 41380: {'lr': 3.6025674805817807e-05, 'samples': 21187072, 'steps': 41380, 'batch_loss/train': 1.5515589825809002} +12/28/2021 09:53:48 - INFO - codeparrot_training - Step 41381: {'lr': 3.601752339200915e-05, 'samples': 21187584, 'steps': 41381, 'batch_loss/train': 0.7226838070782833} +12/28/2021 09:54:01 - INFO - codeparrot_training - Step 41382: {'lr': 3.6009372828914036e-05, 'samples': 21188096, 'steps': 41382, 'batch_loss/train': 0.8541686469689012} +12/28/2021 09:54:12 - INFO - codeparrot_training - Step 41383: {'lr': 3.6001223116564866e-05, 'samples': 21188608, 'steps': 41383, 'batch_loss/train': 0.7891027135774493} +12/28/2021 09:54:22 - INFO - codeparrot_training - Step 41384: {'lr': 3.599307425499404e-05, 'samples': 21189120, 'steps': 41384, 'batch_loss/train': 0.7292692735791206} +12/28/2021 09:54:36 - INFO - codeparrot_training - Step 41385: {'lr': 3.598492624423397e-05, 'samples': 21189632, 'steps': 41385, 'batch_loss/train': 0.7556986237177625} +12/28/2021 09:54:47 - INFO - codeparrot_training - Step 41386: {'lr': 3.597677908431693e-05, 'samples': 21190144, 'steps': 41386, 'batch_loss/train': 0.7222819686867297} +12/28/2021 09:54:57 - INFO - codeparrot_training - Step 41387: {'lr': 3.5968632775275454e-05, 'samples': 21190656, 'steps': 41387, 'batch_loss/train': 0.6699817516491748} +12/28/2021 09:55:08 - INFO - codeparrot_training - Step 41388: {'lr': 3.5960487317141936e-05, 'samples': 21191168, 'steps': 41388, 'batch_loss/train': 0.7519885208457708} +12/28/2021 09:55:20 - INFO - codeparrot_training - Step 41389: {'lr': 3.59523427099486e-05, 'samples': 21191680, 'steps': 41389, 'batch_loss/train': 0.5584596438857261} +12/28/2021 09:55:31 - INFO - codeparrot_training - Step 41390: {'lr': 3.594419895372797e-05, 'samples': 21192192, 'steps': 41390, 'batch_loss/train': 0.6991808153688908} +12/28/2021 09:55:41 - INFO - codeparrot_training - Step 41391: {'lr': 3.593605604851244e-05, 'samples': 21192704, 'steps': 41391, 'batch_loss/train': 0.6592591116204858} +12/28/2021 09:55:54 - INFO - codeparrot_training - Step 41392: {'lr': 3.592791399433429e-05, 'samples': 21193216, 'steps': 41392, 'batch_loss/train': 0.7549303029663861} +12/28/2021 09:56:04 - INFO - codeparrot_training - Step 41393: {'lr': 3.591977279122591e-05, 'samples': 21193728, 'steps': 41393, 'batch_loss/train': 0.6978828432038426} +12/28/2021 09:56:15 - INFO - codeparrot_training - Step 41394: {'lr': 3.591163243921966e-05, 'samples': 21194240, 'steps': 41394, 'batch_loss/train': 0.699662746861577} +12/28/2021 09:56:29 - INFO - codeparrot_training - Step 41395: {'lr': 3.5903492938347935e-05, 'samples': 21194752, 'steps': 41395, 'batch_loss/train': 0.6805013036355376} +12/28/2021 09:56:39 - INFO - codeparrot_training - Step 41396: {'lr': 3.58953542886431e-05, 'samples': 21195264, 'steps': 41396, 'batch_loss/train': 0.6624349676130805} +12/28/2021 09:56:50 - INFO - codeparrot_training - Step 41397: {'lr': 3.588721649013746e-05, 'samples': 21195776, 'steps': 41397, 'batch_loss/train': 0.6774794205557555} +12/28/2021 09:57:01 - INFO - codeparrot_training - Step 41398: {'lr': 3.587907954286343e-05, 'samples': 21196288, 'steps': 41398, 'batch_loss/train': 0.7410375126637518} +12/28/2021 09:57:13 - INFO - codeparrot_training - Step 41399: {'lr': 3.587094344685332e-05, 'samples': 21196800, 'steps': 41399, 'batch_loss/train': 0.7619422231800854} +12/28/2021 09:57:24 - INFO - codeparrot_training - Step 41400: {'lr': 3.5862808202139493e-05, 'samples': 21197312, 'steps': 41400, 'batch_loss/train': 0.5348813589371275} +12/28/2021 09:57:34 - INFO - codeparrot_training - Step 41401: {'lr': 3.585467380875426e-05, 'samples': 21197824, 'steps': 41401, 'batch_loss/train': 0.7095349375158548} +12/28/2021 09:57:46 - INFO - codeparrot_training - Step 41402: {'lr': 3.5846540266730064e-05, 'samples': 21198336, 'steps': 41402, 'batch_loss/train': 0.702797326259315} +12/28/2021 09:57:57 - INFO - codeparrot_training - Step 41403: {'lr': 3.583840757609913e-05, 'samples': 21198848, 'steps': 41403, 'batch_loss/train': 0.7073953752405941} +12/28/2021 09:58:08 - INFO - codeparrot_training - Step 41404: {'lr': 3.5830275736893734e-05, 'samples': 21199360, 'steps': 41404, 'batch_loss/train': 0.6398974065668881} +12/28/2021 09:58:21 - INFO - codeparrot_training - Step 41405: {'lr': 3.5822144749146416e-05, 'samples': 21199872, 'steps': 41405, 'batch_loss/train': 0.6663378071971238} +12/28/2021 09:58:32 - INFO - codeparrot_training - Step 41406: {'lr': 3.581401461288933e-05, 'samples': 21200384, 'steps': 41406, 'batch_loss/train': 0.721143067930825} +12/28/2021 09:58:43 - INFO - codeparrot_training - Step 41407: {'lr': 3.580588532815482e-05, 'samples': 21200896, 'steps': 41407, 'batch_loss/train': 0.6976943309418857} +12/28/2021 09:58:53 - INFO - codeparrot_training - Step 41408: {'lr': 3.5797756894975274e-05, 'samples': 21201408, 'steps': 41408, 'batch_loss/train': 0.7480922872200608} +12/28/2021 09:59:05 - INFO - codeparrot_training - Step 41409: {'lr': 3.578962931338292e-05, 'samples': 21201920, 'steps': 41409, 'batch_loss/train': 0.7598857771372423} +12/28/2021 09:59:16 - INFO - codeparrot_training - Step 41410: {'lr': 3.5781502583410146e-05, 'samples': 21202432, 'steps': 41410, 'batch_loss/train': 0.6712366412393749} +12/28/2021 09:59:27 - INFO - codeparrot_training - Step 41411: {'lr': 3.577337670508923e-05, 'samples': 21202944, 'steps': 41411, 'batch_loss/train': 0.7067398023791611} +12/28/2021 09:59:41 - INFO - codeparrot_training - Step 41412: {'lr': 3.5765251678452487e-05, 'samples': 21203456, 'steps': 41412, 'batch_loss/train': 0.7014013021253049} +12/28/2021 09:59:51 - INFO - codeparrot_training - Step 41413: {'lr': 3.575712750353222e-05, 'samples': 21203968, 'steps': 41413, 'batch_loss/train': 0.7436866108328104} +12/28/2021 10:00:02 - INFO - codeparrot_training - Step 41414: {'lr': 3.5749004180360755e-05, 'samples': 21204480, 'steps': 41414, 'batch_loss/train': 0.6309584584087133} +12/28/2021 10:00:14 - INFO - codeparrot_training - Step 41415: {'lr': 3.5740881708970246e-05, 'samples': 21204992, 'steps': 41415, 'batch_loss/train': 0.779998348094523} +12/28/2021 10:00:25 - INFO - codeparrot_training - Step 41416: {'lr': 3.5732760089393135e-05, 'samples': 21205504, 'steps': 41416, 'batch_loss/train': 0.7734233979135752} +12/28/2021 10:00:35 - INFO - codeparrot_training - Step 41417: {'lr': 3.572463932166173e-05, 'samples': 21206016, 'steps': 41417, 'batch_loss/train': 0.7196399802342057} +12/28/2021 10:00:47 - INFO - codeparrot_training - Step 41418: {'lr': 3.5716519405808194e-05, 'samples': 21206528, 'steps': 41418, 'batch_loss/train': 0.658553677611053} +12/28/2021 10:00:58 - INFO - codeparrot_training - Step 41419: {'lr': 3.5708400341864813e-05, 'samples': 21207040, 'steps': 41419, 'batch_loss/train': 1.5773012647405267} +12/28/2021 10:01:09 - INFO - codeparrot_training - Step 41420: {'lr': 3.5700282129864034e-05, 'samples': 21207552, 'steps': 41420, 'batch_loss/train': 0.7552374671213329} +12/28/2021 10:01:19 - INFO - codeparrot_training - Step 41421: {'lr': 3.569216476983794e-05, 'samples': 21208064, 'steps': 41421, 'batch_loss/train': 0.733895187266171} +12/28/2021 10:01:31 - INFO - codeparrot_training - Step 41422: {'lr': 3.5684048261818925e-05, 'samples': 21208576, 'steps': 41422, 'batch_loss/train': 0.6139865994337015} +12/28/2021 10:01:42 - INFO - codeparrot_training - Step 41423: {'lr': 3.567593260583918e-05, 'samples': 21209088, 'steps': 41423, 'batch_loss/train': 0.745440058875829} +12/28/2021 10:01:53 - INFO - codeparrot_training - Step 41424: {'lr': 3.566781780193099e-05, 'samples': 21209600, 'steps': 41424, 'batch_loss/train': 0.7986506698653102} +12/28/2021 10:02:07 - INFO - codeparrot_training - Step 41425: {'lr': 3.565970385012665e-05, 'samples': 21210112, 'steps': 41425, 'batch_loss/train': 0.675746824243106} +12/28/2021 10:02:17 - INFO - codeparrot_training - Step 41426: {'lr': 3.565159075045837e-05, 'samples': 21210624, 'steps': 41426, 'batch_loss/train': 0.803439955227077} +12/28/2021 10:02:28 - INFO - codeparrot_training - Step 41427: {'lr': 3.564347850295846e-05, 'samples': 21211136, 'steps': 41427, 'batch_loss/train': 0.6059773583256174} +12/28/2021 10:02:40 - INFO - codeparrot_training - Step 41428: {'lr': 3.5635367107659186e-05, 'samples': 21211648, 'steps': 41428, 'batch_loss/train': 0.6882062670774758} +12/28/2021 10:02:51 - INFO - codeparrot_training - Step 41429: {'lr': 3.56272565645927e-05, 'samples': 21212160, 'steps': 41429, 'batch_loss/train': 0.7103268513455987} +12/28/2021 10:03:01 - INFO - codeparrot_training - Step 41430: {'lr': 3.5619146873791225e-05, 'samples': 21212672, 'steps': 41430, 'batch_loss/train': 0.7175955334678292} +12/28/2021 10:03:12 - INFO - codeparrot_training - Step 41431: {'lr': 3.56110380352872e-05, 'samples': 21213184, 'steps': 41431, 'batch_loss/train': 0.7485152399167418} +12/28/2021 10:03:24 - INFO - codeparrot_training - Step 41432: {'lr': 3.560293004911269e-05, 'samples': 21213696, 'steps': 41432, 'batch_loss/train': 0.744437396991998} +12/28/2021 10:03:35 - INFO - codeparrot_training - Step 41433: {'lr': 3.5594822915299975e-05, 'samples': 21214208, 'steps': 41433, 'batch_loss/train': 0.7377573754638433} +12/28/2021 10:03:46 - INFO - codeparrot_training - Step 41434: {'lr': 3.558671663388127e-05, 'samples': 21214720, 'steps': 41434, 'batch_loss/train': 0.8680910468101501} +12/28/2021 10:03:59 - INFO - codeparrot_training - Step 41435: {'lr': 3.557861120488884e-05, 'samples': 21215232, 'steps': 41435, 'batch_loss/train': 0.5910369991324842} +12/28/2021 10:04:10 - INFO - codeparrot_training - Step 41436: {'lr': 3.557050662835487e-05, 'samples': 21215744, 'steps': 41436, 'batch_loss/train': 0.7173869274556637} +12/28/2021 10:04:21 - INFO - codeparrot_training - Step 41437: {'lr': 3.556240290431162e-05, 'samples': 21216256, 'steps': 41437, 'batch_loss/train': 0.7027274118736386} +12/28/2021 10:04:33 - INFO - codeparrot_training - Step 41438: {'lr': 3.555430003279128e-05, 'samples': 21216768, 'steps': 41438, 'batch_loss/train': 0.7303887428715825} +12/28/2021 10:04:44 - INFO - codeparrot_training - Step 41439: {'lr': 3.5546198013826054e-05, 'samples': 21217280, 'steps': 41439, 'batch_loss/train': 0.7853540927171707} +12/28/2021 10:04:54 - INFO - codeparrot_training - Step 41440: {'lr': 3.5538096847448244e-05, 'samples': 21217792, 'steps': 41440, 'batch_loss/train': 0.9499731245450675} +12/28/2021 10:05:05 - INFO - codeparrot_training - Step 41441: {'lr': 3.5529996533689876e-05, 'samples': 21218304, 'steps': 41441, 'batch_loss/train': 0.8124514520168304} +12/28/2021 10:05:19 - INFO - codeparrot_training - Step 41442: {'lr': 3.5521897072583325e-05, 'samples': 21218816, 'steps': 41442, 'batch_loss/train': 0.7255048165097833} +12/28/2021 10:05:29 - INFO - codeparrot_training - Step 41443: {'lr': 3.551379846416075e-05, 'samples': 21219328, 'steps': 41443, 'batch_loss/train': 0.7481932644732296} +12/28/2021 10:05:40 - INFO - codeparrot_training - Step 41444: {'lr': 3.55057007084543e-05, 'samples': 21219840, 'steps': 41444, 'batch_loss/train': 0.6710994336754084} +12/28/2021 10:05:52 - INFO - codeparrot_training - Step 41445: {'lr': 3.549760380549613e-05, 'samples': 21220352, 'steps': 41445, 'batch_loss/train': 0.6870848406106234} +12/28/2021 10:06:03 - INFO - codeparrot_training - Step 41446: {'lr': 3.548950775531859e-05, 'samples': 21220864, 'steps': 41446, 'batch_loss/train': 0.7304796847165562} +12/28/2021 10:06:13 - INFO - codeparrot_training - Step 41447: {'lr': 3.548141255795373e-05, 'samples': 21221376, 'steps': 41447, 'batch_loss/train': 0.6976257711648941} +12/28/2021 10:06:25 - INFO - codeparrot_training - Step 41448: {'lr': 3.547331821343378e-05, 'samples': 21221888, 'steps': 41448, 'batch_loss/train': 0.6722100544720888} +12/28/2021 10:06:36 - INFO - codeparrot_training - Step 41449: {'lr': 3.546522472179089e-05, 'samples': 21222400, 'steps': 41449, 'batch_loss/train': 0.7539834575727582} +12/28/2021 10:06:47 - INFO - codeparrot_training - Step 41450: {'lr': 3.545713208305728e-05, 'samples': 21222912, 'steps': 41450, 'batch_loss/train': 0.7034709630534053} +12/28/2021 10:06:57 - INFO - codeparrot_training - Step 41451: {'lr': 3.544904029726512e-05, 'samples': 21223424, 'steps': 41451, 'batch_loss/train': 0.7056423723697662} +12/28/2021 10:07:11 - INFO - codeparrot_training - Step 41452: {'lr': 3.5440949364446526e-05, 'samples': 21223936, 'steps': 41452, 'batch_loss/train': 0.6302275392226875} +12/28/2021 10:07:22 - INFO - codeparrot_training - Step 41453: {'lr': 3.5432859284633745e-05, 'samples': 21224448, 'steps': 41453, 'batch_loss/train': 0.8590461824787781} +12/28/2021 10:07:33 - INFO - codeparrot_training - Step 41454: {'lr': 3.542477005785888e-05, 'samples': 21224960, 'steps': 41454, 'batch_loss/train': 0.7278438843786716} +12/28/2021 10:07:45 - INFO - codeparrot_training - Step 41455: {'lr': 3.541668168415415e-05, 'samples': 21225472, 'steps': 41455, 'batch_loss/train': 0.7649635598063469} +12/28/2021 10:07:56 - INFO - codeparrot_training - Step 41456: {'lr': 3.5408594163551575e-05, 'samples': 21225984, 'steps': 41456, 'batch_loss/train': 0.6600856208242476} +12/28/2021 10:08:06 - INFO - codeparrot_training - Step 41457: {'lr': 3.540050749608351e-05, 'samples': 21226496, 'steps': 41457, 'batch_loss/train': 0.6556982174515724} +12/28/2021 10:08:18 - INFO - codeparrot_training - Step 41458: {'lr': 3.539242168178194e-05, 'samples': 21227008, 'steps': 41458, 'batch_loss/train': 0.6940722000435926} +12/28/2021 10:08:29 - INFO - codeparrot_training - Step 41459: {'lr': 3.538433672067906e-05, 'samples': 21227520, 'steps': 41459, 'batch_loss/train': 0.7404183517210186} +12/28/2021 10:08:39 - INFO - codeparrot_training - Step 41460: {'lr': 3.537625261280705e-05, 'samples': 21228032, 'steps': 41460, 'batch_loss/train': 0.5952767922135536} +12/28/2021 10:08:50 - INFO - codeparrot_training - Step 41461: {'lr': 3.5368169358198e-05, 'samples': 21228544, 'steps': 41461, 'batch_loss/train': 0.6943807132774964} +12/28/2021 10:09:02 - INFO - codeparrot_training - Step 41462: {'lr': 3.5360086956884056e-05, 'samples': 21229056, 'steps': 41462, 'batch_loss/train': 0.6756628663279116} +12/28/2021 10:09:13 - INFO - codeparrot_training - Step 41463: {'lr': 3.535200540889738e-05, 'samples': 21229568, 'steps': 41463, 'batch_loss/train': 0.7914465002249926} +12/28/2021 10:09:23 - INFO - codeparrot_training - Step 41464: {'lr': 3.5343924714270085e-05, 'samples': 21230080, 'steps': 41464, 'batch_loss/train': 0.696938862092793} +12/28/2021 10:09:39 - INFO - codeparrot_training - Step 41465: {'lr': 3.533584487303429e-05, 'samples': 21230592, 'steps': 41465, 'batch_loss/train': 0.6638363052916247} +12/28/2021 10:09:50 - INFO - codeparrot_training - Step 41466: {'lr': 3.5327765885222154e-05, 'samples': 21231104, 'steps': 41466, 'batch_loss/train': 0.6644636560231447} +12/28/2021 10:10:00 - INFO - codeparrot_training - Step 41467: {'lr': 3.531968775086569e-05, 'samples': 21231616, 'steps': 41467, 'batch_loss/train': 0.7095117443241179} +12/28/2021 10:10:13 - INFO - codeparrot_training - Step 41468: {'lr': 3.531161046999712e-05, 'samples': 21232128, 'steps': 41468, 'batch_loss/train': 0.6860526840901002} +12/28/2021 10:10:23 - INFO - codeparrot_training - Step 41469: {'lr': 3.5303534042648574e-05, 'samples': 21232640, 'steps': 41469, 'batch_loss/train': 0.93458566442132} +12/28/2021 10:10:34 - INFO - codeparrot_training - Step 41470: {'lr': 3.529545846885207e-05, 'samples': 21233152, 'steps': 41470, 'batch_loss/train': 0.6569227868458256} +12/28/2021 10:10:44 - INFO - codeparrot_training - Step 41471: {'lr': 3.5287383748639713e-05, 'samples': 21233664, 'steps': 41471, 'batch_loss/train': 0.6676495219580829} +12/28/2021 10:10:58 - INFO - codeparrot_training - Step 41472: {'lr': 3.5279309882043723e-05, 'samples': 21234176, 'steps': 41472, 'batch_loss/train': 0.7009779454674572} +12/28/2021 10:11:09 - INFO - codeparrot_training - Step 41473: {'lr': 3.527123686909608e-05, 'samples': 21234688, 'steps': 41473, 'batch_loss/train': 0.7674765530973673} +12/28/2021 10:11:20 - INFO - codeparrot_training - Step 41474: {'lr': 3.526316470982893e-05, 'samples': 21235200, 'steps': 41474, 'batch_loss/train': 0.6895979577675462} +12/28/2021 10:11:32 - INFO - codeparrot_training - Step 41475: {'lr': 3.525509340427435e-05, 'samples': 21235712, 'steps': 41475, 'batch_loss/train': 0.7154574510641396} +12/28/2021 10:11:42 - INFO - codeparrot_training - Step 41476: {'lr': 3.524702295246443e-05, 'samples': 21236224, 'steps': 41476, 'batch_loss/train': 0.5788409428205341} +12/28/2021 10:11:53 - INFO - codeparrot_training - Step 41477: {'lr': 3.523895335443125e-05, 'samples': 21236736, 'steps': 41477, 'batch_loss/train': 0.572933447547257} +12/28/2021 10:12:05 - INFO - codeparrot_training - Step 41478: {'lr': 3.523088461020693e-05, 'samples': 21237248, 'steps': 41478, 'batch_loss/train': 0.7700228816829622} +12/28/2021 10:12:16 - INFO - codeparrot_training - Step 41479: {'lr': 3.5222816719823494e-05, 'samples': 21237760, 'steps': 41479, 'batch_loss/train': 0.6618377935374156} +12/28/2021 10:12:26 - INFO - codeparrot_training - Step 41480: {'lr': 3.521474968331303e-05, 'samples': 21238272, 'steps': 41480, 'batch_loss/train': 0.651579761877656} +12/28/2021 10:12:40 - INFO - codeparrot_training - Step 41481: {'lr': 3.5206683500707714e-05, 'samples': 21238784, 'steps': 41481, 'batch_loss/train': 0.70101447426714} +12/28/2021 10:12:51 - INFO - codeparrot_training - Step 41482: {'lr': 3.51986181720394e-05, 'samples': 21239296, 'steps': 41482, 'batch_loss/train': 0.6989846425130963} +12/28/2021 10:13:01 - INFO - codeparrot_training - Step 41483: {'lr': 3.5190553697340365e-05, 'samples': 21239808, 'steps': 41483, 'batch_loss/train': 0.6929174528922886} +12/28/2021 10:13:12 - INFO - codeparrot_training - Step 41484: {'lr': 3.518249007664254e-05, 'samples': 21240320, 'steps': 41484, 'batch_loss/train': 0.7727010906673968} +12/28/2021 10:13:24 - INFO - codeparrot_training - Step 41485: {'lr': 3.517442730997797e-05, 'samples': 21240832, 'steps': 41485, 'batch_loss/train': 0.7140384912490845} +12/28/2021 10:13:35 - INFO - codeparrot_training - Step 41486: {'lr': 3.516636539737886e-05, 'samples': 21241344, 'steps': 41486, 'batch_loss/train': 0.7417801870033145} +12/28/2021 10:13:46 - INFO - codeparrot_training - Step 41487: {'lr': 3.515830433887712e-05, 'samples': 21241856, 'steps': 41487, 'batch_loss/train': 0.7747007198631763} +12/28/2021 10:13:58 - INFO - codeparrot_training - Step 41488: {'lr': 3.515024413450482e-05, 'samples': 21242368, 'steps': 41488, 'batch_loss/train': 0.6816759537905455} +12/28/2021 10:14:08 - INFO - codeparrot_training - Step 41489: {'lr': 3.514218478429404e-05, 'samples': 21242880, 'steps': 41489, 'batch_loss/train': 0.6840355889871716} +12/28/2021 10:14:19 - INFO - codeparrot_training - Step 41490: {'lr': 3.5134126288276805e-05, 'samples': 21243392, 'steps': 41490, 'batch_loss/train': 0.7382751135155559} +12/28/2021 10:14:31 - INFO - codeparrot_training - Step 41491: {'lr': 3.512606864648515e-05, 'samples': 21243904, 'steps': 41491, 'batch_loss/train': 0.6929496573284268} +12/28/2021 10:14:42 - INFO - codeparrot_training - Step 41492: {'lr': 3.511801185895119e-05, 'samples': 21244416, 'steps': 41492, 'batch_loss/train': 0.7450800538063049} +12/28/2021 10:14:52 - INFO - codeparrot_training - Step 41493: {'lr': 3.510995592570676e-05, 'samples': 21244928, 'steps': 41493, 'batch_loss/train': 0.5736684853618499} +12/28/2021 10:15:03 - INFO - codeparrot_training - Step 41494: {'lr': 3.510190084678405e-05, 'samples': 21245440, 'steps': 41494, 'batch_loss/train': 0.8723988849669695} +12/28/2021 10:15:17 - INFO - codeparrot_training - Step 41495: {'lr': 3.50938466222151e-05, 'samples': 21245952, 'steps': 41495, 'batch_loss/train': 0.6615924679208547} +12/28/2021 10:15:27 - INFO - codeparrot_training - Step 41496: {'lr': 3.508579325203182e-05, 'samples': 21246464, 'steps': 41496, 'batch_loss/train': 0.7264257939532399} +12/28/2021 10:15:38 - INFO - codeparrot_training - Step 41497: {'lr': 3.507774073626621e-05, 'samples': 21246976, 'steps': 41497, 'batch_loss/train': 0.6520982615766115} +12/28/2021 10:15:50 - INFO - codeparrot_training - Step 41498: {'lr': 3.506968907495045e-05, 'samples': 21247488, 'steps': 41498, 'batch_loss/train': 0.7089025129098445} +12/28/2021 10:16:01 - INFO - codeparrot_training - Step 41499: {'lr': 3.506163826811642e-05, 'samples': 21248000, 'steps': 41499, 'batch_loss/train': 0.8701697699725628} +12/28/2021 10:16:11 - INFO - codeparrot_training - Step 41500: {'lr': 3.505358831579611e-05, 'samples': 21248512, 'steps': 41500, 'batch_loss/train': 0.7204147554002702} +12/28/2021 10:16:26 - INFO - codeparrot_training - Step 41501: {'lr': 3.5045539218021675e-05, 'samples': 21249024, 'steps': 41501, 'batch_loss/train': 0.74898443184793} +12/28/2021 10:16:36 - INFO - codeparrot_training - Step 41502: {'lr': 3.503749097482495e-05, 'samples': 21249536, 'steps': 41502, 'batch_loss/train': 0.5226102356100455} +12/28/2021 10:16:47 - INFO - codeparrot_training - Step 41503: {'lr': 3.502944358623797e-05, 'samples': 21250048, 'steps': 41503, 'batch_loss/train': 0.5909537689294666} +12/28/2021 10:16:59 - INFO - codeparrot_training - Step 41504: {'lr': 3.5021397052292794e-05, 'samples': 21250560, 'steps': 41504, 'batch_loss/train': 0.7644563224166632} +12/28/2021 10:17:10 - INFO - codeparrot_training - Step 41505: {'lr': 3.501335137302136e-05, 'samples': 21251072, 'steps': 41505, 'batch_loss/train': 0.773739037103951} +12/28/2021 10:17:20 - INFO - codeparrot_training - Step 41506: {'lr': 3.5005306548455654e-05, 'samples': 21251584, 'steps': 41506, 'batch_loss/train': 0.7556928126141429} +12/28/2021 10:17:31 - INFO - codeparrot_training - Step 41507: {'lr': 3.499726257862773e-05, 'samples': 21252096, 'steps': 41507, 'batch_loss/train': 0.7263468806631863} +12/28/2021 10:17:43 - INFO - codeparrot_training - Step 41508: {'lr': 3.4989219463569395e-05, 'samples': 21252608, 'steps': 41508, 'batch_loss/train': 0.6638321182690561} +12/28/2021 10:17:54 - INFO - codeparrot_training - Step 41509: {'lr': 3.498117720331282e-05, 'samples': 21253120, 'steps': 41509, 'batch_loss/train': 0.7685074498876929} +12/28/2021 10:18:04 - INFO - codeparrot_training - Step 41510: {'lr': 3.497313579788991e-05, 'samples': 21253632, 'steps': 41510, 'batch_loss/train': 0.7978712692856789} +12/28/2021 10:18:18 - INFO - codeparrot_training - Step 41511: {'lr': 3.496509524733252e-05, 'samples': 21254144, 'steps': 41511, 'batch_loss/train': 0.7532082623802125} +12/28/2021 10:18:29 - INFO - codeparrot_training - Step 41512: {'lr': 3.4957055551672865e-05, 'samples': 21254656, 'steps': 41512, 'batch_loss/train': 0.6150559587404132} +12/28/2021 10:18:40 - INFO - codeparrot_training - Step 41513: {'lr': 3.494901671094267e-05, 'samples': 21255168, 'steps': 41513, 'batch_loss/train': 0.7512076604180038} +12/28/2021 10:18:52 - INFO - codeparrot_training - Step 41514: {'lr': 3.4940978725173986e-05, 'samples': 21255680, 'steps': 41514, 'batch_loss/train': 0.7143118041567504} +12/28/2021 10:19:03 - INFO - codeparrot_training - Step 41515: {'lr': 3.4932941594398775e-05, 'samples': 21256192, 'steps': 41515, 'batch_loss/train': 0.7866953844204545} +12/28/2021 10:19:13 - INFO - codeparrot_training - Step 41516: {'lr': 3.492490531864897e-05, 'samples': 21256704, 'steps': 41516, 'batch_loss/train': 0.7360972058959305} +12/28/2021 10:19:24 - INFO - codeparrot_training - Step 41517: {'lr': 3.491686989795656e-05, 'samples': 21257216, 'steps': 41517, 'batch_loss/train': 0.6515723783522844} +12/28/2021 10:19:38 - INFO - codeparrot_training - Step 41518: {'lr': 3.4908835332353505e-05, 'samples': 21257728, 'steps': 41518, 'batch_loss/train': 0.6575706470757723} +12/28/2021 10:19:48 - INFO - codeparrot_training - Step 41519: {'lr': 3.490080162187159e-05, 'samples': 21258240, 'steps': 41519, 'batch_loss/train': 0.7216002019122243} +12/28/2021 10:19:59 - INFO - codeparrot_training - Step 41520: {'lr': 3.48927687665429e-05, 'samples': 21258752, 'steps': 41520, 'batch_loss/train': 0.6853739202488214} +12/28/2021 10:20:11 - INFO - codeparrot_training - Step 41521: {'lr': 3.488473676639944e-05, 'samples': 21259264, 'steps': 41521, 'batch_loss/train': 0.7325316313654184} +12/28/2021 10:20:22 - INFO - codeparrot_training - Step 41522: {'lr': 3.487670562147291e-05, 'samples': 21259776, 'steps': 41522, 'batch_loss/train': 0.8086121436208487} +12/28/2021 10:20:32 - INFO - codeparrot_training - Step 41523: {'lr': 3.486867533179544e-05, 'samples': 21260288, 'steps': 41523, 'batch_loss/train': 0.7236459372797981} +12/28/2021 10:20:44 - INFO - codeparrot_training - Step 41524: {'lr': 3.486064589739893e-05, 'samples': 21260800, 'steps': 41524, 'batch_loss/train': 0.7053675716742873} +12/28/2021 10:20:55 - INFO - codeparrot_training - Step 41525: {'lr': 3.485261731831521e-05, 'samples': 21261312, 'steps': 41525, 'batch_loss/train': 0.8576425425708294} +12/28/2021 10:21:06 - INFO - codeparrot_training - Step 41526: {'lr': 3.484458959457618e-05, 'samples': 21261824, 'steps': 41526, 'batch_loss/train': 0.7228547120466828} +12/28/2021 10:21:16 - INFO - codeparrot_training - Step 41527: {'lr': 3.483656272621394e-05, 'samples': 21262336, 'steps': 41527, 'batch_loss/train': 0.6522757486090995} +12/28/2021 10:21:29 - INFO - codeparrot_training - Step 41528: {'lr': 3.482853671326022e-05, 'samples': 21262848, 'steps': 41528, 'batch_loss/train': 0.7025906909257174} +12/28/2021 10:21:39 - INFO - codeparrot_training - Step 41529: {'lr': 3.4820511555747004e-05, 'samples': 21263360, 'steps': 41529, 'batch_loss/train': 0.7774187680915929} +12/28/2021 10:21:50 - INFO - codeparrot_training - Step 41530: {'lr': 3.481248725370617e-05, 'samples': 21263872, 'steps': 41530, 'batch_loss/train': 0.7454806575551629} +12/28/2021 10:22:04 - INFO - codeparrot_training - Step 41531: {'lr': 3.4804463807169646e-05, 'samples': 21264384, 'steps': 41531, 'batch_loss/train': 0.7557276580482721} +12/28/2021 10:22:15 - INFO - codeparrot_training - Step 41532: {'lr': 3.479644121616932e-05, 'samples': 21264896, 'steps': 41532, 'batch_loss/train': 0.5602715175191406} +12/28/2021 10:22:26 - INFO - codeparrot_training - Step 41533: {'lr': 3.478841948073716e-05, 'samples': 21265408, 'steps': 41533, 'batch_loss/train': 0.653933810943272} +12/28/2021 10:22:38 - INFO - codeparrot_training - Step 41534: {'lr': 3.478039860090484e-05, 'samples': 21265920, 'steps': 41534, 'batch_loss/train': 0.7098502945154905} +12/28/2021 10:22:49 - INFO - codeparrot_training - Step 41535: {'lr': 3.4772378576704455e-05, 'samples': 21266432, 'steps': 41535, 'batch_loss/train': 0.7172465873882174} +12/28/2021 10:23:00 - INFO - codeparrot_training - Step 41536: {'lr': 3.476435940816788e-05, 'samples': 21266944, 'steps': 41536, 'batch_loss/train': 0.68351168371737} +12/28/2021 10:23:10 - INFO - codeparrot_training - Step 41537: {'lr': 3.475634109532685e-05, 'samples': 21267456, 'steps': 41537, 'batch_loss/train': 0.8156015304848552} +12/28/2021 10:23:22 - INFO - codeparrot_training - Step 41538: {'lr': 3.474832363821345e-05, 'samples': 21267968, 'steps': 41538, 'batch_loss/train': 0.6881545819342136} +12/28/2021 10:23:33 - INFO - codeparrot_training - Step 41539: {'lr': 3.4740307036859346e-05, 'samples': 21268480, 'steps': 41539, 'batch_loss/train': 0.6484912112355232} +12/28/2021 10:23:44 - INFO - codeparrot_training - Step 41540: {'lr': 3.473229129129654e-05, 'samples': 21268992, 'steps': 41540, 'batch_loss/train': 0.7659668801352382} +12/28/2021 10:23:58 - INFO - codeparrot_training - Step 41541: {'lr': 3.472427640155684e-05, 'samples': 21269504, 'steps': 41541, 'batch_loss/train': 0.7014773855917156} +12/28/2021 10:24:08 - INFO - codeparrot_training - Step 41542: {'lr': 3.471626236767214e-05, 'samples': 21270016, 'steps': 41542, 'batch_loss/train': 0.6956529356539249} +12/28/2021 10:24:19 - INFO - codeparrot_training - Step 41543: {'lr': 3.47082491896743e-05, 'samples': 21270528, 'steps': 41543, 'batch_loss/train': 0.7236835472285748} +12/28/2021 10:24:31 - INFO - codeparrot_training - Step 41544: {'lr': 3.4700236867595155e-05, 'samples': 21271040, 'steps': 41544, 'batch_loss/train': 0.7500525210052729} +12/28/2021 10:24:42 - INFO - codeparrot_training - Step 41545: {'lr': 3.4692225401466574e-05, 'samples': 21271552, 'steps': 41545, 'batch_loss/train': 0.7652287995442748} +12/28/2021 10:24:52 - INFO - codeparrot_training - Step 41546: {'lr': 3.468421479132042e-05, 'samples': 21272064, 'steps': 41546, 'batch_loss/train': 0.5832478639204055} +12/28/2021 10:25:03 - INFO - codeparrot_training - Step 41547: {'lr': 3.4676205037188574e-05, 'samples': 21272576, 'steps': 41547, 'batch_loss/train': 0.7136660672258586} +12/28/2021 10:25:15 - INFO - codeparrot_training - Step 41548: {'lr': 3.466819613910274e-05, 'samples': 21273088, 'steps': 41548, 'batch_loss/train': 0.7653752830810845} +12/28/2021 10:25:26 - INFO - codeparrot_training - Step 41549: {'lr': 3.4660188097094897e-05, 'samples': 21273600, 'steps': 41549, 'batch_loss/train': 0.6360893670062069} +12/28/2021 10:25:36 - INFO - codeparrot_training - Step 41550: {'lr': 3.465218091119687e-05, 'samples': 21274112, 'steps': 41550, 'batch_loss/train': 0.7892587996320799} +12/28/2021 10:25:51 - INFO - codeparrot_training - Step 41551: {'lr': 3.4644174581440425e-05, 'samples': 21274624, 'steps': 41551, 'batch_loss/train': 0.7434839960187674} +12/28/2021 10:26:01 - INFO - codeparrot_training - Step 41552: {'lr': 3.463616910785738e-05, 'samples': 21275136, 'steps': 41552, 'batch_loss/train': 0.7535849995911121} +12/28/2021 10:26:12 - INFO - codeparrot_training - Step 41553: {'lr': 3.46281644904797e-05, 'samples': 21275648, 'steps': 41553, 'batch_loss/train': 0.748363628052175} +12/28/2021 10:26:24 - INFO - codeparrot_training - Step 41554: {'lr': 3.4620160729339076e-05, 'samples': 21276160, 'steps': 41554, 'batch_loss/train': 0.6835882076993585} +12/28/2021 10:26:35 - INFO - codeparrot_training - Step 41555: {'lr': 3.461215782446736e-05, 'samples': 21276672, 'steps': 41555, 'batch_loss/train': 0.7157702259719372} +12/28/2021 10:26:45 - INFO - codeparrot_training - Step 41556: {'lr': 3.4604155775896376e-05, 'samples': 21277184, 'steps': 41556, 'batch_loss/train': 0.7075220651458949} +12/28/2021 10:26:59 - INFO - codeparrot_training - Step 41557: {'lr': 3.4596154583657946e-05, 'samples': 21277696, 'steps': 41557, 'batch_loss/train': 0.7707954701036215} +12/28/2021 10:27:10 - INFO - codeparrot_training - Step 41558: {'lr': 3.458815424778386e-05, 'samples': 21278208, 'steps': 41558, 'batch_loss/train': 0.7908078380860388} +12/28/2021 10:27:21 - INFO - codeparrot_training - Step 41559: {'lr': 3.458015476830595e-05, 'samples': 21278720, 'steps': 41559, 'batch_loss/train': 0.7036960162222385} +12/28/2021 10:27:31 - INFO - codeparrot_training - Step 41560: {'lr': 3.457215614525599e-05, 'samples': 21279232, 'steps': 41560, 'batch_loss/train': 0.6793039643671364} +12/28/2021 10:27:43 - INFO - codeparrot_training - Step 41561: {'lr': 3.45641583786658e-05, 'samples': 21279744, 'steps': 41561, 'batch_loss/train': 0.7100477069616318} +12/28/2021 10:27:54 - INFO - codeparrot_training - Step 41562: {'lr': 3.455616146856724e-05, 'samples': 21280256, 'steps': 41562, 'batch_loss/train': 0.6856347140856087} +12/28/2021 10:28:04 - INFO - codeparrot_training - Step 41563: {'lr': 3.454816541499192e-05, 'samples': 21280768, 'steps': 41563, 'batch_loss/train': 0.7223341539502144} +12/28/2021 10:28:17 - INFO - codeparrot_training - Step 41564: {'lr': 3.454017021797184e-05, 'samples': 21281280, 'steps': 41564, 'batch_loss/train': 0.7427229294553399} +12/28/2021 10:28:27 - INFO - codeparrot_training - Step 41565: {'lr': 3.453217587753865e-05, 'samples': 21281792, 'steps': 41565, 'batch_loss/train': 0.7203288269229233} +12/28/2021 10:28:38 - INFO - codeparrot_training - Step 41566: {'lr': 3.452418239372415e-05, 'samples': 21282304, 'steps': 41566, 'batch_loss/train': 0.7192516019567847} +12/28/2021 10:28:50 - INFO - codeparrot_training - Step 41567: {'lr': 3.451618976656018e-05, 'samples': 21282816, 'steps': 41567, 'batch_loss/train': 0.620019753521774} +12/28/2021 10:29:01 - INFO - codeparrot_training - Step 41568: {'lr': 3.450819799607843e-05, 'samples': 21283328, 'steps': 41568, 'batch_loss/train': 0.7212742543779314} +12/28/2021 10:29:11 - INFO - codeparrot_training - Step 41569: {'lr': 3.450020708231075e-05, 'samples': 21283840, 'steps': 41569, 'batch_loss/train': 0.6399565101601183} +12/28/2021 10:29:22 - INFO - codeparrot_training - Step 41570: {'lr': 3.449221702528885e-05, 'samples': 21284352, 'steps': 41570, 'batch_loss/train': 0.581018467550166} +12/28/2021 10:29:36 - INFO - codeparrot_training - Step 41571: {'lr': 3.448422782504454e-05, 'samples': 21284864, 'steps': 41571, 'batch_loss/train': 0.6070524178503547} +12/28/2021 10:29:47 - INFO - codeparrot_training - Step 41572: {'lr': 3.447623948160955e-05, 'samples': 21285376, 'steps': 41572, 'batch_loss/train': 0.7066384591162205} +12/28/2021 10:29:57 - INFO - codeparrot_training - Step 41573: {'lr': 3.4468251995015745e-05, 'samples': 21285888, 'steps': 41573, 'batch_loss/train': 0.7659238697960973} +12/28/2021 10:30:10 - INFO - codeparrot_training - Step 41574: {'lr': 3.4460265365294644e-05, 'samples': 21286400, 'steps': 41574, 'batch_loss/train': 0.7132376489462331} +12/28/2021 10:30:20 - INFO - codeparrot_training - Step 41575: {'lr': 3.4452279592478206e-05, 'samples': 21286912, 'steps': 41575, 'batch_loss/train': 0.7365926904603839} +12/28/2021 10:30:31 - INFO - codeparrot_training - Step 41576: {'lr': 3.4444294676598184e-05, 'samples': 21287424, 'steps': 41576, 'batch_loss/train': 0.7713529723696411} +12/28/2021 10:30:43 - INFO - codeparrot_training - Step 41577: {'lr': 3.443631061768618e-05, 'samples': 21287936, 'steps': 41577, 'batch_loss/train': 0.6968986308202147} +12/28/2021 10:30:54 - INFO - codeparrot_training - Step 41578: {'lr': 3.442832741577395e-05, 'samples': 21288448, 'steps': 41578, 'batch_loss/train': 0.643276626477018} +12/28/2021 10:31:04 - INFO - codeparrot_training - Step 41579: {'lr': 3.442034507089342e-05, 'samples': 21288960, 'steps': 41579, 'batch_loss/train': 0.6064038836630061} +12/28/2021 10:31:15 - INFO - codeparrot_training - Step 41580: {'lr': 3.4412363583076144e-05, 'samples': 21289472, 'steps': 41580, 'batch_loss/train': 0.7782243653200567} +12/28/2021 10:31:29 - INFO - codeparrot_training - Step 41581: {'lr': 3.44043829523539e-05, 'samples': 21289984, 'steps': 41581, 'batch_loss/train': 0.741127533139661} +12/28/2021 10:31:39 - INFO - codeparrot_training - Step 41582: {'lr': 3.4396403178758446e-05, 'samples': 21290496, 'steps': 41582, 'batch_loss/train': 0.751310970634222} +12/28/2021 10:31:50 - INFO - codeparrot_training - Step 41583: {'lr': 3.438842426232147e-05, 'samples': 21291008, 'steps': 41583, 'batch_loss/train': 0.7762007284909487} +12/28/2021 10:32:02 - INFO - codeparrot_training - Step 41584: {'lr': 3.4380446203074705e-05, 'samples': 21291520, 'steps': 41584, 'batch_loss/train': 0.7568511599674821} +12/28/2021 10:32:13 - INFO - codeparrot_training - Step 41585: {'lr': 3.437246900104987e-05, 'samples': 21292032, 'steps': 41585, 'batch_loss/train': 0.6449960693717003} +12/28/2021 10:32:23 - INFO - codeparrot_training - Step 41586: {'lr': 3.4364492656278665e-05, 'samples': 21292544, 'steps': 41586, 'batch_loss/train': 0.6619370910339057} +12/28/2021 10:32:37 - INFO - codeparrot_training - Step 41587: {'lr': 3.4356517168792845e-05, 'samples': 21293056, 'steps': 41587, 'batch_loss/train': 0.6511564991087653} +12/28/2021 10:32:48 - INFO - codeparrot_training - Step 41588: {'lr': 3.434854253862413e-05, 'samples': 21293568, 'steps': 41588, 'batch_loss/train': 0.6896169431274757} +12/28/2021 10:32:59 - INFO - codeparrot_training - Step 41589: {'lr': 3.4340568765804074e-05, 'samples': 21294080, 'steps': 41589, 'batch_loss/train': 0.5788967196131125} +12/28/2021 10:33:11 - INFO - codeparrot_training - Step 41590: {'lr': 3.433259585036455e-05, 'samples': 21294592, 'steps': 41590, 'batch_loss/train': 0.7532752561382949} +12/28/2021 10:33:21 - INFO - codeparrot_training - Step 41591: {'lr': 3.432462379233722e-05, 'samples': 21295104, 'steps': 41591, 'batch_loss/train': 0.5728822260862216} +12/28/2021 10:33:32 - INFO - codeparrot_training - Step 41592: {'lr': 3.4316652591753736e-05, 'samples': 21295616, 'steps': 41592, 'batch_loss/train': 0.7287245257757604} +12/28/2021 10:33:43 - INFO - codeparrot_training - Step 41593: {'lr': 3.430868224864578e-05, 'samples': 21296128, 'steps': 41593, 'batch_loss/train': 0.6515525048598647} +12/28/2021 10:33:55 - INFO - codeparrot_training - Step 41594: {'lr': 3.430071276304506e-05, 'samples': 21296640, 'steps': 41594, 'batch_loss/train': 0.674888267647475} +12/28/2021 10:34:05 - INFO - codeparrot_training - Step 41595: {'lr': 3.4292744134983264e-05, 'samples': 21297152, 'steps': 41595, 'batch_loss/train': 0.7235937230288982} +12/28/2021 10:34:16 - INFO - codeparrot_training - Step 41596: {'lr': 3.428477636449206e-05, 'samples': 21297664, 'steps': 41596, 'batch_loss/train': 0.7189266262575984} +12/28/2021 10:34:30 - INFO - codeparrot_training - Step 41597: {'lr': 3.427680945160314e-05, 'samples': 21298176, 'steps': 41597, 'batch_loss/train': 0.7129168603569269} +12/28/2021 10:34:41 - INFO - codeparrot_training - Step 41598: {'lr': 3.4268843396348174e-05, 'samples': 21298688, 'steps': 41598, 'batch_loss/train': 0.6198351392522454} +12/28/2021 10:34:51 - INFO - codeparrot_training - Step 41599: {'lr': 3.426087819875884e-05, 'samples': 21299200, 'steps': 41599, 'batch_loss/train': 0.8147013857960701} +12/28/2021 10:35:03 - INFO - codeparrot_training - Step 41600: {'lr': 3.425291385886678e-05, 'samples': 21299712, 'steps': 41600, 'batch_loss/train': 0.7514394298195839} +12/28/2021 10:35:14 - INFO - codeparrot_training - Step 41601: {'lr': 3.424495037670369e-05, 'samples': 21300224, 'steps': 41601, 'batch_loss/train': 0.8837468679994345} +12/28/2021 10:35:25 - INFO - codeparrot_training - Step 41602: {'lr': 3.4236987752301245e-05, 'samples': 21300736, 'steps': 41602, 'batch_loss/train': 0.6994913902599365} +12/28/2021 10:35:35 - INFO - codeparrot_training - Step 41603: {'lr': 3.422902598569102e-05, 'samples': 21301248, 'steps': 41603, 'batch_loss/train': 0.722947375557851} +12/28/2021 10:35:48 - INFO - codeparrot_training - Step 41604: {'lr': 3.422106507690467e-05, 'samples': 21301760, 'steps': 41604, 'batch_loss/train': 0.5024168840027414} +12/28/2021 10:35:59 - INFO - codeparrot_training - Step 41605: {'lr': 3.4213105025973994e-05, 'samples': 21302272, 'steps': 41605, 'batch_loss/train': 0.509930731728673} +12/28/2021 10:36:09 - INFO - codeparrot_training - Step 41606: {'lr': 3.420514583293047e-05, 'samples': 21302784, 'steps': 41606, 'batch_loss/train': 0.6758315288461745} +12/28/2021 10:36:23 - INFO - codeparrot_training - Step 41607: {'lr': 3.419718749780581e-05, 'samples': 21303296, 'steps': 41607, 'batch_loss/train': 0.4229970572050661} +12/28/2021 10:36:34 - INFO - codeparrot_training - Step 41608: {'lr': 3.418923002063165e-05, 'samples': 21303808, 'steps': 41608, 'batch_loss/train': 0.6634138058288954} +12/28/2021 10:36:45 - INFO - codeparrot_training - Step 41609: {'lr': 3.4181273401439614e-05, 'samples': 21304320, 'steps': 41609, 'batch_loss/train': 0.7650661491788924} +12/28/2021 10:36:55 - INFO - codeparrot_training - Step 41610: {'lr': 3.417331764026138e-05, 'samples': 21304832, 'steps': 41610, 'batch_loss/train': 0.7743208413012326} +12/28/2021 10:37:08 - INFO - codeparrot_training - Step 41611: {'lr': 3.4165362737128504e-05, 'samples': 21305344, 'steps': 41611, 'batch_loss/train': 0.6867004218511283} +12/28/2021 10:37:18 - INFO - codeparrot_training - Step 41612: {'lr': 3.4157408692072675e-05, 'samples': 21305856, 'steps': 41612, 'batch_loss/train': 0.73965975176543} +12/28/2021 10:37:29 - INFO - codeparrot_training - Step 41613: {'lr': 3.414945550512547e-05, 'samples': 21306368, 'steps': 41613, 'batch_loss/train': 0.7649435694329441} +12/28/2021 10:37:41 - INFO - codeparrot_training - Step 41614: {'lr': 3.414150317631859e-05, 'samples': 21306880, 'steps': 41614, 'batch_loss/train': 0.45833559962920845} +12/28/2021 10:37:52 - INFO - codeparrot_training - Step 41615: {'lr': 3.41335517056835e-05, 'samples': 21307392, 'steps': 41615, 'batch_loss/train': 0.7668561283499002} +12/28/2021 10:38:02 - INFO - codeparrot_training - Step 41616: {'lr': 3.412560109325191e-05, 'samples': 21307904, 'steps': 41616, 'batch_loss/train': 0.7088095592334867} +12/28/2021 10:38:13 - INFO - codeparrot_training - Step 41617: {'lr': 3.4117651339055504e-05, 'samples': 21308416, 'steps': 41617, 'batch_loss/train': 0.6881768628954887} +12/28/2021 10:38:27 - INFO - codeparrot_training - Step 41618: {'lr': 3.410970244312575e-05, 'samples': 21308928, 'steps': 41618, 'batch_loss/train': 0.7674330184236169} +12/28/2021 10:38:37 - INFO - codeparrot_training - Step 41619: {'lr': 3.410175440549429e-05, 'samples': 21309440, 'steps': 41619, 'batch_loss/train': 0.6819914559600875} +12/28/2021 10:38:48 - INFO - codeparrot_training - Step 41620: {'lr': 3.4093807226192737e-05, 'samples': 21309952, 'steps': 41620, 'batch_loss/train': 0.6294901357032359} +12/28/2021 10:39:00 - INFO - codeparrot_training - Step 41621: {'lr': 3.40858609052527e-05, 'samples': 21310464, 'steps': 41621, 'batch_loss/train': 0.6438775411807001} +12/28/2021 10:39:11 - INFO - codeparrot_training - Step 41622: {'lr': 3.407791544270575e-05, 'samples': 21310976, 'steps': 41622, 'batch_loss/train': 0.7001776807010174} +12/28/2021 10:39:21 - INFO - codeparrot_training - Step 41623: {'lr': 3.406997083858346e-05, 'samples': 21311488, 'steps': 41623, 'batch_loss/train': 0.6350125847384334} +12/28/2021 10:39:34 - INFO - codeparrot_training - Step 41624: {'lr': 3.406202709291745e-05, 'samples': 21312000, 'steps': 41624, 'batch_loss/train': 0.6917784647084773} +12/28/2021 10:39:45 - INFO - codeparrot_training - Step 41625: {'lr': 3.4054084205739274e-05, 'samples': 21312512, 'steps': 41625, 'batch_loss/train': 0.7685033748857677} +12/28/2021 10:39:55 - INFO - codeparrot_training - Step 41626: {'lr': 3.404614217708052e-05, 'samples': 21313024, 'steps': 41626, 'batch_loss/train': 0.6922330986708403} +12/28/2021 10:40:06 - INFO - codeparrot_training - Step 41627: {'lr': 3.4038201006972773e-05, 'samples': 21313536, 'steps': 41627, 'batch_loss/train': 0.45875386882107705} +12/28/2021 10:40:20 - INFO - codeparrot_training - Step 41628: {'lr': 3.403026069544765e-05, 'samples': 21314048, 'steps': 41628, 'batch_loss/train': 0.9093511867686175} +12/28/2021 10:40:31 - INFO - codeparrot_training - Step 41629: {'lr': 3.4022321242536594e-05, 'samples': 21314560, 'steps': 41629, 'batch_loss/train': 0.6939028832130134} +12/28/2021 10:40:42 - INFO - codeparrot_training - Step 41630: {'lr': 3.40143826482712e-05, 'samples': 21315072, 'steps': 41630, 'batch_loss/train': 0.7750792887527496} +12/28/2021 10:40:55 - INFO - codeparrot_training - Step 41631: {'lr': 3.400644491268318e-05, 'samples': 21315584, 'steps': 41631, 'batch_loss/train': 0.7607819233089685} +12/28/2021 10:41:05 - INFO - codeparrot_training - Step 41632: {'lr': 3.39985080358039e-05, 'samples': 21316096, 'steps': 41632, 'batch_loss/train': 0.6085472325794399} +12/28/2021 10:41:16 - INFO - codeparrot_training - Step 41633: {'lr': 3.399057201766495e-05, 'samples': 21316608, 'steps': 41633, 'batch_loss/train': 0.7401102874428034} +12/28/2021 10:41:27 - INFO - codeparrot_training - Step 41634: {'lr': 3.398263685829805e-05, 'samples': 21317120, 'steps': 41634, 'batch_loss/train': 0.7141675469465554} +12/28/2021 10:41:39 - INFO - codeparrot_training - Step 41635: {'lr': 3.397470255773455e-05, 'samples': 21317632, 'steps': 41635, 'batch_loss/train': 0.7164322668686509} +12/28/2021 10:41:49 - INFO - codeparrot_training - Step 41636: {'lr': 3.396676911600607e-05, 'samples': 21318144, 'steps': 41636, 'batch_loss/train': 0.6967063075862825} +12/28/2021 10:42:00 - INFO - codeparrot_training - Step 41637: {'lr': 3.3958836533144136e-05, 'samples': 21318656, 'steps': 41637, 'batch_loss/train': 0.6747384397312999} +12/28/2021 10:42:14 - INFO - codeparrot_training - Step 41638: {'lr': 3.395090480918031e-05, 'samples': 21319168, 'steps': 41638, 'batch_loss/train': 0.7131575820967555} +12/28/2021 10:42:25 - INFO - codeparrot_training - Step 41639: {'lr': 3.394297394414611e-05, 'samples': 21319680, 'steps': 41639, 'batch_loss/train': 0.6681948634795845} +12/28/2021 10:42:35 - INFO - codeparrot_training - Step 41640: {'lr': 3.3935043938073116e-05, 'samples': 21320192, 'steps': 41640, 'batch_loss/train': 0.7001492194831371} +12/28/2021 10:42:46 - INFO - codeparrot_training - Step 41641: {'lr': 3.392711479099267e-05, 'samples': 21320704, 'steps': 41641, 'batch_loss/train': 0.796325109899044} +12/28/2021 10:42:58 - INFO - codeparrot_training - Step 41642: {'lr': 3.391918650293652e-05, 'samples': 21321216, 'steps': 41642, 'batch_loss/train': 0.73693577805534} +12/28/2021 10:43:09 - INFO - codeparrot_training - Step 41643: {'lr': 3.391125907393613e-05, 'samples': 21321728, 'steps': 41643, 'batch_loss/train': 0.6866414558608085} +12/28/2021 10:43:19 - INFO - codeparrot_training - Step 41644: {'lr': 3.390333250402294e-05, 'samples': 21322240, 'steps': 41644, 'batch_loss/train': 0.7059020238230005} +12/28/2021 10:43:32 - INFO - codeparrot_training - Step 41645: {'lr': 3.3895406793228504e-05, 'samples': 21322752, 'steps': 41645, 'batch_loss/train': 0.7368628005497158} +12/28/2021 10:43:42 - INFO - codeparrot_training - Step 41646: {'lr': 3.3887481941584306e-05, 'samples': 21323264, 'steps': 41646, 'batch_loss/train': 0.720416127354838} +12/28/2021 10:43:53 - INFO - codeparrot_training - Step 41647: {'lr': 3.38795579491219e-05, 'samples': 21323776, 'steps': 41647, 'batch_loss/train': 0.6946071879938245} +12/28/2021 10:44:06 - INFO - codeparrot_training - Step 41648: {'lr': 3.387163481587277e-05, 'samples': 21324288, 'steps': 41648, 'batch_loss/train': 0.7125500324182212} +12/28/2021 10:44:17 - INFO - codeparrot_training - Step 41649: {'lr': 3.38637125418684e-05, 'samples': 21324800, 'steps': 41649, 'batch_loss/train': 0.6579812015406787} +12/28/2021 10:44:27 - INFO - codeparrot_training - Step 41650: {'lr': 3.385579112714032e-05, 'samples': 21325312, 'steps': 41650, 'batch_loss/train': 0.7548663383349776} +12/28/2021 10:44:38 - INFO - codeparrot_training - Step 41651: {'lr': 3.384787057171998e-05, 'samples': 21325824, 'steps': 41651, 'batch_loss/train': 0.6430167959770188} +12/28/2021 10:44:50 - INFO - codeparrot_training - Step 41652: {'lr': 3.38399508756389e-05, 'samples': 21326336, 'steps': 41652, 'batch_loss/train': 0.7938863169401884} +12/28/2021 10:45:01 - INFO - codeparrot_training - Step 41653: {'lr': 3.383203203892857e-05, 'samples': 21326848, 'steps': 41653, 'batch_loss/train': 0.7112267164047807} +12/28/2021 10:45:11 - INFO - codeparrot_training - Step 41654: {'lr': 3.38241140616205e-05, 'samples': 21327360, 'steps': 41654, 'batch_loss/train': 0.6519762079697102} +12/28/2021 10:45:23 - INFO - codeparrot_training - Step 41655: {'lr': 3.381619694374608e-05, 'samples': 21327872, 'steps': 41655, 'batch_loss/train': 0.7146370515692979} +12/28/2021 10:45:34 - INFO - codeparrot_training - Step 41656: {'lr': 3.3808280685336755e-05, 'samples': 21328384, 'steps': 41656, 'batch_loss/train': 0.671924838796258} +12/28/2021 10:45:45 - INFO - codeparrot_training - Step 41657: {'lr': 3.380036528642422e-05, 'samples': 21328896, 'steps': 41657, 'batch_loss/train': 0.7005132320336998} +12/28/2021 10:45:59 - INFO - codeparrot_training - Step 41658: {'lr': 3.379245074703971e-05, 'samples': 21329408, 'steps': 41658, 'batch_loss/train': 0.6520265061408281} +12/28/2021 10:46:09 - INFO - codeparrot_training - Step 41659: {'lr': 3.378453706721474e-05, 'samples': 21329920, 'steps': 41659, 'batch_loss/train': 0.7740650940686464} +12/28/2021 10:46:20 - INFO - codeparrot_training - Step 41660: {'lr': 3.377662424698091e-05, 'samples': 21330432, 'steps': 41660, 'batch_loss/train': 0.6402227770304307} +12/28/2021 10:46:30 - INFO - codeparrot_training - Step 41661: {'lr': 3.376871228636955e-05, 'samples': 21330944, 'steps': 41661, 'batch_loss/train': 0.7183920503593981} +12/28/2021 10:46:43 - INFO - codeparrot_training - Step 41662: {'lr': 3.376080118541214e-05, 'samples': 21331456, 'steps': 41662, 'batch_loss/train': 0.794546527788043} +12/28/2021 10:46:53 - INFO - codeparrot_training - Step 41663: {'lr': 3.375289094414011e-05, 'samples': 21331968, 'steps': 41663, 'batch_loss/train': 0.6699543287977576} +12/28/2021 10:47:04 - INFO - codeparrot_training - Step 41664: {'lr': 3.3744981562584956e-05, 'samples': 21332480, 'steps': 41664, 'batch_loss/train': 0.6818393818102777} +12/28/2021 10:47:18 - INFO - codeparrot_training - Step 41665: {'lr': 3.373707304077808e-05, 'samples': 21332992, 'steps': 41665, 'batch_loss/train': 0.792970722541213} +12/28/2021 10:47:28 - INFO - codeparrot_training - Step 41666: {'lr': 3.3729165378751006e-05, 'samples': 21333504, 'steps': 41666, 'batch_loss/train': 0.763794269412756} +12/28/2021 10:47:39 - INFO - codeparrot_training - Step 41667: {'lr': 3.372125857653502e-05, 'samples': 21334016, 'steps': 41667, 'batch_loss/train': 0.750882713124156} +12/28/2021 10:47:51 - INFO - codeparrot_training - Step 41668: {'lr': 3.37133526341617e-05, 'samples': 21334528, 'steps': 41668, 'batch_loss/train': 0.6594730257056653} +12/28/2021 10:48:02 - INFO - codeparrot_training - Step 41669: {'lr': 3.370544755166247e-05, 'samples': 21335040, 'steps': 41669, 'batch_loss/train': 0.8183435313403606} +12/28/2021 10:48:13 - INFO - codeparrot_training - Step 41670: {'lr': 3.369754332906858e-05, 'samples': 21335552, 'steps': 41670, 'batch_loss/train': 0.8662413582205772} +12/28/2021 10:48:23 - INFO - codeparrot_training - Step 41671: {'lr': 3.368963996641167e-05, 'samples': 21336064, 'steps': 41671, 'batch_loss/train': 0.7357504880055785} +12/28/2021 10:48:35 - INFO - codeparrot_training - Step 41672: {'lr': 3.36817374637231e-05, 'samples': 21336576, 'steps': 41672, 'batch_loss/train': 0.6448475755751133} +12/28/2021 10:48:46 - INFO - codeparrot_training - Step 41673: {'lr': 3.3673835821034225e-05, 'samples': 21337088, 'steps': 41673, 'batch_loss/train': 0.769943174906075} +12/28/2021 10:48:57 - INFO - codeparrot_training - Step 41674: {'lr': 3.36659350383765e-05, 'samples': 21337600, 'steps': 41674, 'batch_loss/train': 0.5877407299121842} +12/28/2021 10:49:09 - INFO - codeparrot_training - Step 41675: {'lr': 3.365803511578133e-05, 'samples': 21338112, 'steps': 41675, 'batch_loss/train': 0.8583541161497124} +12/28/2021 10:49:19 - INFO - codeparrot_training - Step 41676: {'lr': 3.365013605328013e-05, 'samples': 21338624, 'steps': 41676, 'batch_loss/train': 0.7722990745678544} +12/28/2021 10:49:30 - INFO - codeparrot_training - Step 41677: {'lr': 3.364223785090428e-05, 'samples': 21339136, 'steps': 41677, 'batch_loss/train': 0.7364085272420198} +12/28/2021 10:49:44 - INFO - codeparrot_training - Step 41678: {'lr': 3.3634340508685217e-05, 'samples': 21339648, 'steps': 41678, 'batch_loss/train': 0.7439810652285814} +12/28/2021 10:49:55 - INFO - codeparrot_training - Step 41679: {'lr': 3.3626444026654325e-05, 'samples': 21340160, 'steps': 41679, 'batch_loss/train': 0.6129724632482976} +12/28/2021 10:50:05 - INFO - codeparrot_training - Step 41680: {'lr': 3.361854840484296e-05, 'samples': 21340672, 'steps': 41680, 'batch_loss/train': 0.5335897258482873} +12/28/2021 10:50:16 - INFO - codeparrot_training - Step 41681: {'lr': 3.361065364328264e-05, 'samples': 21341184, 'steps': 41681, 'batch_loss/train': 0.6454820563085377} +12/28/2021 10:50:28 - INFO - codeparrot_training - Step 41682: {'lr': 3.3602759742004525e-05, 'samples': 21341696, 'steps': 41682, 'batch_loss/train': 0.6419020507310051} +12/28/2021 10:50:39 - INFO - codeparrot_training - Step 41683: {'lr': 3.3594866701040225e-05, 'samples': 21342208, 'steps': 41683, 'batch_loss/train': 0.7363594932248816} +12/28/2021 10:50:49 - INFO - codeparrot_training - Step 41684: {'lr': 3.3586974520421005e-05, 'samples': 21342720, 'steps': 41684, 'batch_loss/train': 0.8245597584173083} +12/28/2021 10:51:01 - INFO - codeparrot_training - Step 41685: {'lr': 3.357908320017819e-05, 'samples': 21343232, 'steps': 41685, 'batch_loss/train': 0.7865561195649207} +12/28/2021 10:51:12 - INFO - codeparrot_training - Step 41686: {'lr': 3.357119274034331e-05, 'samples': 21343744, 'steps': 41686, 'batch_loss/train': 0.7336862650699914} +12/28/2021 10:51:22 - INFO - codeparrot_training - Step 41687: {'lr': 3.35633031409476e-05, 'samples': 21344256, 'steps': 41687, 'batch_loss/train': 0.7392761157825589} +12/28/2021 10:51:36 - INFO - codeparrot_training - Step 41688: {'lr': 3.35554144020225e-05, 'samples': 21344768, 'steps': 41688, 'batch_loss/train': 1.6893998505547643} +12/28/2021 10:51:47 - INFO - codeparrot_training - Step 41689: {'lr': 3.354752652359933e-05, 'samples': 21345280, 'steps': 41689, 'batch_loss/train': 0.8034251306671649} +12/28/2021 10:51:58 - INFO - codeparrot_training - Step 41690: {'lr': 3.3539639505709454e-05, 'samples': 21345792, 'steps': 41690, 'batch_loss/train': 0.6900888648815453} +12/28/2021 10:52:10 - INFO - codeparrot_training - Step 41691: {'lr': 3.3531753348384266e-05, 'samples': 21346304, 'steps': 41691, 'batch_loss/train': 0.7426247769035399} +12/28/2021 10:52:21 - INFO - codeparrot_training - Step 41692: {'lr': 3.352386805165511e-05, 'samples': 21346816, 'steps': 41692, 'batch_loss/train': 0.6217413037084043} +12/28/2021 10:52:31 - INFO - codeparrot_training - Step 41693: {'lr': 3.3515983615553245e-05, 'samples': 21347328, 'steps': 41693, 'batch_loss/train': 0.7075135419145226} +12/28/2021 10:52:42 - INFO - codeparrot_training - Step 41694: {'lr': 3.3508100040110126e-05, 'samples': 21347840, 'steps': 41694, 'batch_loss/train': 0.6957081460859627} +12/28/2021 10:52:56 - INFO - codeparrot_training - Step 41695: {'lr': 3.35002173253571e-05, 'samples': 21348352, 'steps': 41695, 'batch_loss/train': 0.7463557254523039} +12/28/2021 10:53:06 - INFO - codeparrot_training - Step 41696: {'lr': 3.349233547132535e-05, 'samples': 21348864, 'steps': 41696, 'batch_loss/train': 0.6860312717035413} +12/28/2021 10:53:17 - INFO - codeparrot_training - Step 41697: {'lr': 3.3484454478046403e-05, 'samples': 21349376, 'steps': 41697, 'batch_loss/train': 0.7428756440058351} +12/28/2021 10:53:29 - INFO - codeparrot_training - Step 41698: {'lr': 3.347657434555154e-05, 'samples': 21349888, 'steps': 41698, 'batch_loss/train': 0.7760354625061154} +12/28/2021 10:53:40 - INFO - codeparrot_training - Step 41699: {'lr': 3.346869507387204e-05, 'samples': 21350400, 'steps': 41699, 'batch_loss/train': 0.7693417686969042} +12/28/2021 10:53:50 - INFO - codeparrot_training - Step 41700: {'lr': 3.346081666303924e-05, 'samples': 21350912, 'steps': 41700, 'batch_loss/train': 0.7970671951770782} +12/28/2021 10:54:03 - INFO - codeparrot_training - Step 41701: {'lr': 3.345293911308445e-05, 'samples': 21351424, 'steps': 41701, 'batch_loss/train': 0.7514374516904354} +12/28/2021 10:54:13 - INFO - codeparrot_training - Step 41702: {'lr': 3.3445062424039024e-05, 'samples': 21351936, 'steps': 41702, 'batch_loss/train': 0.6127544129267335} +12/28/2021 10:54:24 - INFO - codeparrot_training - Step 41703: {'lr': 3.343718659593425e-05, 'samples': 21352448, 'steps': 41703, 'batch_loss/train': 0.7899465197697282} +12/28/2021 10:54:34 - INFO - codeparrot_training - Step 41704: {'lr': 3.342931162880147e-05, 'samples': 21352960, 'steps': 41704, 'batch_loss/train': 0.7654546359553933} +12/28/2021 10:54:48 - INFO - codeparrot_training - Step 41705: {'lr': 3.342143752267196e-05, 'samples': 21353472, 'steps': 41705, 'batch_loss/train': 0.7978151910938323} +12/28/2021 10:54:59 - INFO - codeparrot_training - Step 41706: {'lr': 3.341356427757705e-05, 'samples': 21353984, 'steps': 41706, 'batch_loss/train': 0.6954420374240726} +12/28/2021 10:55:10 - INFO - codeparrot_training - Step 41707: {'lr': 3.3405691893548e-05, 'samples': 21354496, 'steps': 41707, 'batch_loss/train': 1.0825394177809358} +12/28/2021 10:55:22 - INFO - codeparrot_training - Step 41708: {'lr': 3.339782037061617e-05, 'samples': 21355008, 'steps': 41708, 'batch_loss/train': 0.7299121045507491} +12/28/2021 10:55:33 - INFO - codeparrot_training - Step 41709: {'lr': 3.338994970881284e-05, 'samples': 21355520, 'steps': 41709, 'batch_loss/train': 0.6514819809235632} +12/28/2021 10:55:43 - INFO - codeparrot_training - Step 41710: {'lr': 3.3382079908169274e-05, 'samples': 21356032, 'steps': 41710, 'batch_loss/train': 0.9919869601726532} +12/28/2021 10:55:55 - INFO - codeparrot_training - Step 41711: {'lr': 3.337421096871668e-05, 'samples': 21356544, 'steps': 41711, 'batch_loss/train': 0.7106104120612144} +12/28/2021 10:56:06 - INFO - codeparrot_training - Step 41712: {'lr': 3.336634289048654e-05, 'samples': 21357056, 'steps': 41712, 'batch_loss/train': 0.6218902618857101} +12/28/2021 10:56:17 - INFO - codeparrot_training - Step 41713: {'lr': 3.335847567350997e-05, 'samples': 21357568, 'steps': 41713, 'batch_loss/train': 0.6905035865493119} +12/28/2021 10:56:31 - INFO - codeparrot_training - Step 41714: {'lr': 3.3350609317818305e-05, 'samples': 21358080, 'steps': 41714, 'batch_loss/train': 0.6852475134655833} +12/28/2021 10:56:42 - INFO - codeparrot_training - Step 41715: {'lr': 3.3342743823442794e-05, 'samples': 21358592, 'steps': 41715, 'batch_loss/train': 0.7687827246263623} +12/28/2021 10:56:53 - INFO - codeparrot_training - Step 41716: {'lr': 3.333487919041475e-05, 'samples': 21359104, 'steps': 41716, 'batch_loss/train': 0.8107953332364559} +12/28/2021 10:57:03 - INFO - codeparrot_training - Step 41717: {'lr': 3.332701541876543e-05, 'samples': 21359616, 'steps': 41717, 'batch_loss/train': 0.7546178954653442} +12/28/2021 10:57:16 - INFO - codeparrot_training - Step 41718: {'lr': 3.33191525085261e-05, 'samples': 21360128, 'steps': 41718, 'batch_loss/train': 0.7719386991811916} +12/28/2021 10:57:26 - INFO - codeparrot_training - Step 41719: {'lr': 3.331129045972792e-05, 'samples': 21360640, 'steps': 41719, 'batch_loss/train': 0.7577532255090773} +12/28/2021 10:57:37 - INFO - codeparrot_training - Step 41720: {'lr': 3.330342927240226e-05, 'samples': 21361152, 'steps': 41720, 'batch_loss/train': 0.726936852093786} +12/28/2021 10:57:50 - INFO - codeparrot_training - Step 41721: {'lr': 3.3295568946580426e-05, 'samples': 21361664, 'steps': 41721, 'batch_loss/train': 0.6539331017993391} +12/28/2021 10:58:01 - INFO - codeparrot_training - Step 41722: {'lr': 3.328770948229348e-05, 'samples': 21362176, 'steps': 41722, 'batch_loss/train': 0.6498598931357265} +12/28/2021 10:58:12 - INFO - codeparrot_training - Step 41723: {'lr': 3.3279850879572805e-05, 'samples': 21362688, 'steps': 41723, 'batch_loss/train': 0.6758797662332654} +12/28/2021 10:58:22 - INFO - codeparrot_training - Step 41724: {'lr': 3.3271993138449666e-05, 'samples': 21363200, 'steps': 41724, 'batch_loss/train': 0.5392313187476248} +12/28/2021 10:58:34 - INFO - codeparrot_training - Step 41725: {'lr': 3.326413625895522e-05, 'samples': 21363712, 'steps': 41725, 'batch_loss/train': 0.7907941835001111} +12/28/2021 10:58:45 - INFO - codeparrot_training - Step 41726: {'lr': 3.325628024112065e-05, 'samples': 21364224, 'steps': 41726, 'batch_loss/train': 0.5178714976354968} +12/28/2021 10:58:56 - INFO - codeparrot_training - Step 41727: {'lr': 3.3248425084977415e-05, 'samples': 21364736, 'steps': 41727, 'batch_loss/train': 0.623984138481319} +12/28/2021 10:59:08 - INFO - codeparrot_training - Step 41728: {'lr': 3.324057079055651e-05, 'samples': 21365248, 'steps': 41728, 'batch_loss/train': 0.5820913141360506} +12/28/2021 10:59:18 - INFO - codeparrot_training - Step 41729: {'lr': 3.3232717357889267e-05, 'samples': 21365760, 'steps': 41729, 'batch_loss/train': 0.7913073264062405} +12/28/2021 10:59:29 - INFO - codeparrot_training - Step 41730: {'lr': 3.3224864787006896e-05, 'samples': 21366272, 'steps': 41730, 'batch_loss/train': 0.7016260190866888} +12/28/2021 10:59:43 - INFO - codeparrot_training - Step 41731: {'lr': 3.321701307794059e-05, 'samples': 21366784, 'steps': 41731, 'batch_loss/train': 0.8657043948769569} +12/28/2021 10:59:54 - INFO - codeparrot_training - Step 41732: {'lr': 3.320916223072159e-05, 'samples': 21367296, 'steps': 41732, 'batch_loss/train': 0.6497496757656336} +12/28/2021 11:00:04 - INFO - codeparrot_training - Step 41733: {'lr': 3.3201312245381145e-05, 'samples': 21367808, 'steps': 41733, 'batch_loss/train': 0.720042422413826} +12/28/2021 11:00:15 - INFO - codeparrot_training - Step 41734: {'lr': 3.3193463121950375e-05, 'samples': 21368320, 'steps': 41734, 'batch_loss/train': 0.7176904110237956} +12/28/2021 11:00:27 - INFO - codeparrot_training - Step 41735: {'lr': 3.3185614860460596e-05, 'samples': 21368832, 'steps': 41735, 'batch_loss/train': 0.8056777352467179} +12/28/2021 11:00:37 - INFO - codeparrot_training - Step 41736: {'lr': 3.317776746094292e-05, 'samples': 21369344, 'steps': 41736, 'batch_loss/train': 0.6893313275068067} +12/28/2021 11:00:48 - INFO - codeparrot_training - Step 41737: {'lr': 3.316992092342849e-05, 'samples': 21369856, 'steps': 41737, 'batch_loss/train': 0.79159040376544} +12/28/2021 11:01:00 - INFO - codeparrot_training - Step 41738: {'lr': 3.3162075247948706e-05, 'samples': 21370368, 'steps': 41738, 'batch_loss/train': 0.6718965144827962} +12/28/2021 11:01:11 - INFO - codeparrot_training - Step 41739: {'lr': 3.315423043453455e-05, 'samples': 21370880, 'steps': 41739, 'batch_loss/train': 0.6776474744547158} +12/28/2021 11:01:21 - INFO - codeparrot_training - Step 41740: {'lr': 3.314638648321733e-05, 'samples': 21371392, 'steps': 41740, 'batch_loss/train': 0.7013206053525209} +12/28/2021 11:01:35 - INFO - codeparrot_training - Step 41741: {'lr': 3.313854339402819e-05, 'samples': 21371904, 'steps': 41741, 'batch_loss/train': 0.6175560620613396} +12/28/2021 11:01:46 - INFO - codeparrot_training - Step 41742: {'lr': 3.313070116699832e-05, 'samples': 21372416, 'steps': 41742, 'batch_loss/train': 0.7521521728485823} +12/28/2021 11:01:57 - INFO - codeparrot_training - Step 41743: {'lr': 3.312285980215887e-05, 'samples': 21372928, 'steps': 41743, 'batch_loss/train': 0.6875565480440855} +12/28/2021 11:02:09 - INFO - codeparrot_training - Step 41744: {'lr': 3.3115019299541055e-05, 'samples': 21373440, 'steps': 41744, 'batch_loss/train': 0.710025099106133} +12/28/2021 11:02:20 - INFO - codeparrot_training - Step 41745: {'lr': 3.310717965917603e-05, 'samples': 21373952, 'steps': 41745, 'batch_loss/train': 0.7733574183657765} +12/28/2021 11:02:30 - INFO - codeparrot_training - Step 41746: {'lr': 3.309934088109498e-05, 'samples': 21374464, 'steps': 41746, 'batch_loss/train': 0.7444104989990592} +12/28/2021 11:02:41 - INFO - codeparrot_training - Step 41747: {'lr': 3.309150296532909e-05, 'samples': 21374976, 'steps': 41747, 'batch_loss/train': 0.5237641632556915} +12/28/2021 11:02:53 - INFO - codeparrot_training - Step 41748: {'lr': 3.30836659119094e-05, 'samples': 21375488, 'steps': 41748, 'batch_loss/train': 0.7068630787543952} +12/28/2021 11:03:03 - INFO - codeparrot_training - Step 41749: {'lr': 3.307582972086717e-05, 'samples': 21376000, 'steps': 41749, 'batch_loss/train': 0.6989161800593138} +12/28/2021 11:03:14 - INFO - codeparrot_training - Step 41750: {'lr': 3.3067994392233595e-05, 'samples': 21376512, 'steps': 41750, 'batch_loss/train': 0.7546912566758692} +12/28/2021 11:03:28 - INFO - codeparrot_training - Step 41751: {'lr': 3.306015992603972e-05, 'samples': 21377024, 'steps': 41751, 'batch_loss/train': 0.6401650165207684} +12/28/2021 11:03:39 - INFO - codeparrot_training - Step 41752: {'lr': 3.305232632231667e-05, 'samples': 21377536, 'steps': 41752, 'batch_loss/train': 0.6591723151504993} +12/28/2021 11:03:49 - INFO - codeparrot_training - Step 41753: {'lr': 3.3044493581095765e-05, 'samples': 21378048, 'steps': 41753, 'batch_loss/train': 0.6327168283751234} +12/28/2021 11:04:01 - INFO - codeparrot_training - Step 41754: {'lr': 3.303666170240799e-05, 'samples': 21378560, 'steps': 41754, 'batch_loss/train': 0.655041717516724} +12/28/2021 11:04:12 - INFO - codeparrot_training - Step 41755: {'lr': 3.302883068628454e-05, 'samples': 21379072, 'steps': 41755, 'batch_loss/train': 0.8106603743508458} +12/28/2021 11:04:23 - INFO - codeparrot_training - Step 41756: {'lr': 3.30210005327565e-05, 'samples': 21379584, 'steps': 41756, 'batch_loss/train': 0.7723469990305603} +12/28/2021 11:04:33 - INFO - codeparrot_training - Step 41757: {'lr': 3.3013171241855044e-05, 'samples': 21380096, 'steps': 41757, 'batch_loss/train': 0.6199663656298071} +12/28/2021 11:04:45 - INFO - codeparrot_training - Step 41758: {'lr': 3.3005342813611286e-05, 'samples': 21380608, 'steps': 41758, 'batch_loss/train': 0.7488572428701445} +12/28/2021 11:04:56 - INFO - codeparrot_training - Step 41759: {'lr': 3.299751524805636e-05, 'samples': 21381120, 'steps': 41759, 'batch_loss/train': 0.6439883357379586} +12/28/2021 11:05:06 - INFO - codeparrot_training - Step 41760: {'lr': 3.298968854522139e-05, 'samples': 21381632, 'steps': 41760, 'batch_loss/train': 0.7059955094009638} +12/28/2021 11:05:21 - INFO - codeparrot_training - Step 41761: {'lr': 3.298186270513745e-05, 'samples': 21382144, 'steps': 41761, 'batch_loss/train': 0.7361024022102356} +12/28/2021 11:05:31 - INFO - codeparrot_training - Step 41762: {'lr': 3.297403772783575e-05, 'samples': 21382656, 'steps': 41762, 'batch_loss/train': 0.6956549333408475} +12/28/2021 11:05:42 - INFO - codeparrot_training - Step 41763: {'lr': 3.296621361334723e-05, 'samples': 21383168, 'steps': 41763, 'batch_loss/train': 0.5921090564224869} +12/28/2021 11:05:54 - INFO - codeparrot_training - Step 41764: {'lr': 3.295839036170317e-05, 'samples': 21383680, 'steps': 41764, 'batch_loss/train': 0.5587882734835148} +12/28/2021 11:06:05 - INFO - codeparrot_training - Step 41765: {'lr': 3.295056797293458e-05, 'samples': 21384192, 'steps': 41765, 'batch_loss/train': 0.6765157540794462} +12/28/2021 11:06:15 - INFO - codeparrot_training - Step 41766: {'lr': 3.2942746447072566e-05, 'samples': 21384704, 'steps': 41766, 'batch_loss/train': 0.7851464599370956} +12/28/2021 11:06:26 - INFO - codeparrot_training - Step 41767: {'lr': 3.293492578414822e-05, 'samples': 21385216, 'steps': 41767, 'batch_loss/train': 0.720063341781497} +12/28/2021 11:06:38 - INFO - codeparrot_training - Step 41768: {'lr': 3.2927105984192686e-05, 'samples': 21385728, 'steps': 41768, 'batch_loss/train': 0.7260022085392848} +12/28/2021 11:06:49 - INFO - codeparrot_training - Step 41769: {'lr': 3.291928704723699e-05, 'samples': 21386240, 'steps': 41769, 'batch_loss/train': 0.6827942421659827} +12/28/2021 11:06:59 - INFO - codeparrot_training - Step 41770: {'lr': 3.2911468973312235e-05, 'samples': 21386752, 'steps': 41770, 'batch_loss/train': 0.6572760839480907} +12/28/2021 11:07:13 - INFO - codeparrot_training - Step 41771: {'lr': 3.2903651762449525e-05, 'samples': 21387264, 'steps': 41771, 'batch_loss/train': 0.6535160690546036} +12/28/2021 11:07:24 - INFO - codeparrot_training - Step 41772: {'lr': 3.2895835414679914e-05, 'samples': 21387776, 'steps': 41772, 'batch_loss/train': 0.8635183153674006} +12/28/2021 11:07:34 - INFO - codeparrot_training - Step 41773: {'lr': 3.288801993003454e-05, 'samples': 21388288, 'steps': 41773, 'batch_loss/train': 0.7804685281589627} +12/28/2021 11:07:47 - INFO - codeparrot_training - Step 41774: {'lr': 3.28802053085443e-05, 'samples': 21388800, 'steps': 41774, 'batch_loss/train': 0.7670624041929841} +12/28/2021 11:07:57 - INFO - codeparrot_training - Step 41775: {'lr': 3.2872391550240466e-05, 'samples': 21389312, 'steps': 41775, 'batch_loss/train': 0.7559237948153168} +12/28/2021 11:08:08 - INFO - codeparrot_training - Step 41776: {'lr': 3.286457865515405e-05, 'samples': 21389824, 'steps': 41776, 'batch_loss/train': 0.6848525386303663} +12/28/2021 11:08:19 - INFO - codeparrot_training - Step 41777: {'lr': 3.285676662331605e-05, 'samples': 21390336, 'steps': 41777, 'batch_loss/train': 0.6635421039536595} +12/28/2021 11:08:33 - INFO - codeparrot_training - Step 41778: {'lr': 3.284895545475747e-05, 'samples': 21390848, 'steps': 41778, 'batch_loss/train': 0.7309759529307485} +12/28/2021 11:08:43 - INFO - codeparrot_training - Step 41779: {'lr': 3.2841145149509585e-05, 'samples': 21391360, 'steps': 41779, 'batch_loss/train': 0.7447129711508751} +12/28/2021 11:08:54 - INFO - codeparrot_training - Step 41780: {'lr': 3.2833335707603245e-05, 'samples': 21391872, 'steps': 41780, 'batch_loss/train': 0.5116026933828834} +12/28/2021 11:09:06 - INFO - codeparrot_training - Step 41781: {'lr': 3.282552712906955e-05, 'samples': 21392384, 'steps': 41781, 'batch_loss/train': 0.6821883958764374} +12/28/2021 11:09:17 - INFO - codeparrot_training - Step 41782: {'lr': 3.28177194139396e-05, 'samples': 21392896, 'steps': 41782, 'batch_loss/train': 0.688403346342966} +12/28/2021 11:09:27 - INFO - codeparrot_training - Step 41783: {'lr': 3.280991256224436e-05, 'samples': 21393408, 'steps': 41783, 'batch_loss/train': 0.6858189541380852} +12/28/2021 11:09:39 - INFO - codeparrot_training - Step 41784: {'lr': 3.280210657401489e-05, 'samples': 21393920, 'steps': 41784, 'batch_loss/train': 0.6862544011091813} +12/28/2021 11:09:50 - INFO - codeparrot_training - Step 41785: {'lr': 3.279430144928225e-05, 'samples': 21394432, 'steps': 41785, 'batch_loss/train': 0.7350833611562848} +12/28/2021 11:10:01 - INFO - codeparrot_training - Step 41786: {'lr': 3.2786497188077466e-05, 'samples': 21394944, 'steps': 41786, 'batch_loss/train': 0.709492432884872} +12/28/2021 11:10:13 - INFO - codeparrot_training - Step 41787: {'lr': 3.277869379043152e-05, 'samples': 21395456, 'steps': 41787, 'batch_loss/train': 0.8430783078074455} +12/28/2021 11:10:23 - INFO - codeparrot_training - Step 41788: {'lr': 3.277089125637556e-05, 'samples': 21395968, 'steps': 41788, 'batch_loss/train': 0.6663887738250196} +12/28/2021 11:10:34 - INFO - codeparrot_training - Step 41789: {'lr': 3.2763089585940376e-05, 'samples': 21396480, 'steps': 41789, 'batch_loss/train': 0.8156125424429774} +12/28/2021 11:10:45 - INFO - codeparrot_training - Step 41790: {'lr': 3.275528877915726e-05, 'samples': 21396992, 'steps': 41790, 'batch_loss/train': 0.6789877903647721} +12/28/2021 11:10:59 - INFO - codeparrot_training - Step 41791: {'lr': 3.274748883605699e-05, 'samples': 21397504, 'steps': 41791, 'batch_loss/train': 0.6887303427793086} +12/28/2021 11:11:09 - INFO - codeparrot_training - Step 41792: {'lr': 3.273968975667072e-05, 'samples': 21398016, 'steps': 41792, 'batch_loss/train': 0.846010793466121} +12/28/2021 11:11:20 - INFO - codeparrot_training - Step 41793: {'lr': 3.273189154102937e-05, 'samples': 21398528, 'steps': 41793, 'batch_loss/train': 0.6988995354622602} +12/28/2021 11:11:32 - INFO - codeparrot_training - Step 41794: {'lr': 3.2724094189164e-05, 'samples': 21399040, 'steps': 41794, 'batch_loss/train': 0.7448417304549366} +12/28/2021 11:11:43 - INFO - codeparrot_training - Step 41795: {'lr': 3.27162977011056e-05, 'samples': 21399552, 'steps': 41795, 'batch_loss/train': 0.7791194226592779} +12/28/2021 11:11:54 - INFO - codeparrot_training - Step 41796: {'lr': 3.270850207688514e-05, 'samples': 21400064, 'steps': 41796, 'batch_loss/train': 0.6591093372553587} +12/28/2021 11:12:06 - INFO - codeparrot_training - Step 41797: {'lr': 3.2700707316533654e-05, 'samples': 21400576, 'steps': 41797, 'batch_loss/train': 0.8866951148957014} +12/28/2021 11:12:16 - INFO - codeparrot_training - Step 41798: {'lr': 3.269291342008207e-05, 'samples': 21401088, 'steps': 41798, 'batch_loss/train': 0.6653291108086705} +12/28/2021 11:12:27 - INFO - codeparrot_training - Step 41799: {'lr': 3.2685120387561495e-05, 'samples': 21401600, 'steps': 41799, 'batch_loss/train': 0.6315810601226985} +12/28/2021 11:12:38 - INFO - codeparrot_training - Step 41800: {'lr': 3.267732821900271e-05, 'samples': 21402112, 'steps': 41800, 'batch_loss/train': 1.065648791845888} +12/28/2021 11:12:52 - INFO - codeparrot_training - Step 41801: {'lr': 3.266953691443686e-05, 'samples': 21402624, 'steps': 41801, 'batch_loss/train': 0.917663287371397} +12/28/2021 11:13:02 - INFO - codeparrot_training - Step 41802: {'lr': 3.2661746473894915e-05, 'samples': 21403136, 'steps': 41802, 'batch_loss/train': 0.8683682456612587} +12/28/2021 11:13:13 - INFO - codeparrot_training - Step 41803: {'lr': 3.2653956897407774e-05, 'samples': 21403648, 'steps': 41803, 'batch_loss/train': 0.6885051636490971} +12/28/2021 11:13:25 - INFO - codeparrot_training - Step 41804: {'lr': 3.264616818500637e-05, 'samples': 21404160, 'steps': 41804, 'batch_loss/train': 0.7439057808369398} +12/28/2021 11:13:36 - INFO - codeparrot_training - Step 41805: {'lr': 3.263838033672184e-05, 'samples': 21404672, 'steps': 41805, 'batch_loss/train': 0.8016996365040541} +12/28/2021 11:13:46 - INFO - codeparrot_training - Step 41806: {'lr': 3.263059335258497e-05, 'samples': 21405184, 'steps': 41806, 'batch_loss/train': 0.7001268188469112} +12/28/2021 11:14:00 - INFO - codeparrot_training - Step 41807: {'lr': 3.262280723262673e-05, 'samples': 21405696, 'steps': 41807, 'batch_loss/train': 0.6539054298773408} +12/28/2021 11:14:11 - INFO - codeparrot_training - Step 41808: {'lr': 3.261502197687827e-05, 'samples': 21406208, 'steps': 41808, 'batch_loss/train': 0.7449217475950718} +12/28/2021 11:14:22 - INFO - codeparrot_training - Step 41809: {'lr': 3.260723758537032e-05, 'samples': 21406720, 'steps': 41809, 'batch_loss/train': 0.8194195628166199} +12/28/2021 11:14:34 - INFO - codeparrot_training - Step 41810: {'lr': 3.259945405813392e-05, 'samples': 21407232, 'steps': 41810, 'batch_loss/train': 0.7091462928801775} +12/28/2021 11:14:44 - INFO - codeparrot_training - Step 41811: {'lr': 3.2591671395199975e-05, 'samples': 21407744, 'steps': 41811, 'batch_loss/train': 0.7157964073121548} +12/28/2021 11:14:55 - INFO - codeparrot_training - Step 41812: {'lr': 3.258388959659947e-05, 'samples': 21408256, 'steps': 41812, 'batch_loss/train': 0.7150787242571823} +12/28/2021 11:15:05 - INFO - codeparrot_training - Step 41813: {'lr': 3.257610866236335e-05, 'samples': 21408768, 'steps': 41813, 'batch_loss/train': 0.7850616322830319} +12/28/2021 11:15:18 - INFO - codeparrot_training - Step 41814: {'lr': 3.256832859252257e-05, 'samples': 21409280, 'steps': 41814, 'batch_loss/train': 0.6858817194588482} +12/28/2021 11:15:28 - INFO - codeparrot_training - Step 41815: {'lr': 3.2560549387107896e-05, 'samples': 21409792, 'steps': 41815, 'batch_loss/train': 0.7166521803010255} +12/28/2021 11:15:39 - INFO - codeparrot_training - Step 41816: {'lr': 3.2552771046150426e-05, 'samples': 21410304, 'steps': 41816, 'batch_loss/train': 0.7486347649246454} +12/28/2021 11:15:53 - INFO - codeparrot_training - Step 41817: {'lr': 3.2544993569681104e-05, 'samples': 21410816, 'steps': 41817, 'batch_loss/train': 0.6676380494609475} +12/28/2021 11:16:03 - INFO - codeparrot_training - Step 41818: {'lr': 3.2537216957730654e-05, 'samples': 21411328, 'steps': 41818, 'batch_loss/train': 0.7744477177038789} +12/28/2021 11:16:14 - INFO - codeparrot_training - Step 41819: {'lr': 3.252944121033025e-05, 'samples': 21411840, 'steps': 41819, 'batch_loss/train': 0.676997919101268} +12/28/2021 11:16:26 - INFO - codeparrot_training - Step 41820: {'lr': 3.2521666327510606e-05, 'samples': 21412352, 'steps': 41820, 'batch_loss/train': 0.6637225586455315} +12/28/2021 11:16:37 - INFO - codeparrot_training - Step 41821: {'lr': 3.251389230930271e-05, 'samples': 21412864, 'steps': 41821, 'batch_loss/train': 0.7255173800513148} +12/28/2021 11:16:48 - INFO - codeparrot_training - Step 41822: {'lr': 3.250611915573745e-05, 'samples': 21413376, 'steps': 41822, 'batch_loss/train': 0.6782847158610821} +12/28/2021 11:16:58 - INFO - codeparrot_training - Step 41823: {'lr': 3.249834686684572e-05, 'samples': 21413888, 'steps': 41823, 'batch_loss/train': 0.9891242564772256} +12/28/2021 11:17:11 - INFO - codeparrot_training - Step 41824: {'lr': 3.249057544265846e-05, 'samples': 21414400, 'steps': 41824, 'batch_loss/train': 0.5498278585146181} +12/28/2021 11:17:21 - INFO - codeparrot_training - Step 41825: {'lr': 3.24828048832066e-05, 'samples': 21414912, 'steps': 41825, 'batch_loss/train': 0.6544965747743845} +12/28/2021 11:17:32 - INFO - codeparrot_training - Step 41826: {'lr': 3.247503518852085e-05, 'samples': 21415424, 'steps': 41826, 'batch_loss/train': 0.5842925147153437} +12/28/2021 11:17:44 - INFO - codeparrot_training - Step 41827: {'lr': 3.2467266358632287e-05, 'samples': 21415936, 'steps': 41827, 'batch_loss/train': 0.7163987052626908} +12/28/2021 11:17:55 - INFO - codeparrot_training - Step 41828: {'lr': 3.245949839357182e-05, 'samples': 21416448, 'steps': 41828, 'batch_loss/train': 0.6820010272786021} +12/28/2021 11:18:05 - INFO - codeparrot_training - Step 41829: {'lr': 3.2451731293370144e-05, 'samples': 21416960, 'steps': 41829, 'batch_loss/train': 0.6493507409468293} +12/28/2021 11:18:19 - INFO - codeparrot_training - Step 41830: {'lr': 3.2443965058058214e-05, 'samples': 21417472, 'steps': 41830, 'batch_loss/train': 0.6576981833204627} +12/28/2021 11:18:30 - INFO - codeparrot_training - Step 41831: {'lr': 3.2436199687667036e-05, 'samples': 21417984, 'steps': 41831, 'batch_loss/train': 0.907519212923944} +12/28/2021 11:18:41 - INFO - codeparrot_training - Step 41832: {'lr': 3.2428435182227326e-05, 'samples': 21418496, 'steps': 41832, 'batch_loss/train': 0.8451500781811774} +12/28/2021 11:18:51 - INFO - codeparrot_training - Step 41833: {'lr': 3.2420671541769944e-05, 'samples': 21419008, 'steps': 41833, 'batch_loss/train': 0.610056625213474} +12/28/2021 11:19:03 - INFO - codeparrot_training - Step 41834: {'lr': 3.241290876632594e-05, 'samples': 21419520, 'steps': 41834, 'batch_loss/train': 0.4907033514173236} +12/28/2021 11:19:14 - INFO - codeparrot_training - Step 41835: {'lr': 3.240514685592599e-05, 'samples': 21420032, 'steps': 41835, 'batch_loss/train': 0.6298485505394638} +12/28/2021 11:19:25 - INFO - codeparrot_training - Step 41836: {'lr': 3.239738581060103e-05, 'samples': 21420544, 'steps': 41836, 'batch_loss/train': 0.6606026692315936} +12/28/2021 11:19:37 - INFO - codeparrot_training - Step 41837: {'lr': 3.238962563038189e-05, 'samples': 21421056, 'steps': 41837, 'batch_loss/train': 0.6803353005088866} +12/28/2021 11:19:47 - INFO - codeparrot_training - Step 41838: {'lr': 3.238186631529943e-05, 'samples': 21421568, 'steps': 41838, 'batch_loss/train': 1.4896519221365452} +12/28/2021 11:19:58 - INFO - codeparrot_training - Step 41839: {'lr': 3.23741078653845e-05, 'samples': 21422080, 'steps': 41839, 'batch_loss/train': 0.5709012541919947} +12/28/2021 11:20:12 - INFO - codeparrot_training - Step 41840: {'lr': 3.2366350280668005e-05, 'samples': 21422592, 'steps': 41840, 'batch_loss/train': 0.723574265895877} +12/28/2021 11:20:23 - INFO - codeparrot_training - Step 41841: {'lr': 3.235859356118062e-05, 'samples': 21423104, 'steps': 41841, 'batch_loss/train': 0.5943883988074958} +12/28/2021 11:20:33 - INFO - codeparrot_training - Step 41842: {'lr': 3.2350837706953325e-05, 'samples': 21423616, 'steps': 41842, 'batch_loss/train': 0.8199374433606863} +12/28/2021 11:20:44 - INFO - codeparrot_training - Step 41843: {'lr': 3.234308271801698e-05, 'samples': 21424128, 'steps': 41843, 'batch_loss/train': 0.6109979720786214} +12/28/2021 11:20:56 - INFO - codeparrot_training - Step 41844: {'lr': 3.2335328594402245e-05, 'samples': 21424640, 'steps': 41844, 'batch_loss/train': 0.7386167556978762} +12/28/2021 11:21:07 - INFO - codeparrot_training - Step 41845: {'lr': 3.232757533614017e-05, 'samples': 21425152, 'steps': 41845, 'batch_loss/train': 0.6577914687804878} +12/28/2021 11:21:17 - INFO - codeparrot_training - Step 41846: {'lr': 3.2319822943261415e-05, 'samples': 21425664, 'steps': 41846, 'batch_loss/train': 0.7020851233974099} +12/28/2021 11:21:31 - INFO - codeparrot_training - Step 41847: {'lr': 3.231207141579684e-05, 'samples': 21426176, 'steps': 41847, 'batch_loss/train': 0.6649303804151714} +12/28/2021 11:21:42 - INFO - codeparrot_training - Step 41848: {'lr': 3.2304320753777286e-05, 'samples': 21426688, 'steps': 41848, 'batch_loss/train': 0.7378305410966277} +12/28/2021 11:21:52 - INFO - codeparrot_training - Step 41849: {'lr': 3.229657095723354e-05, 'samples': 21427200, 'steps': 41849, 'batch_loss/train': 0.644118650816381} +12/28/2021 11:22:05 - INFO - codeparrot_training - Step 41850: {'lr': 3.228882202619643e-05, 'samples': 21427712, 'steps': 41850, 'batch_loss/train': 0.7200832781381905} +12/28/2021 11:22:15 - INFO - codeparrot_training - Step 41851: {'lr': 3.2281073960696764e-05, 'samples': 21428224, 'steps': 41851, 'batch_loss/train': 0.8013863041996956} +12/28/2021 11:22:26 - INFO - codeparrot_training - Step 41852: {'lr': 3.227332676076536e-05, 'samples': 21428736, 'steps': 41852, 'batch_loss/train': 0.7368810009211302} +12/28/2021 11:22:38 - INFO - codeparrot_training - Step 41853: {'lr': 3.226558042643296e-05, 'samples': 21429248, 'steps': 41853, 'batch_loss/train': 0.7976217579562217} +12/28/2021 11:22:49 - INFO - codeparrot_training - Step 41854: {'lr': 3.2257834957730464e-05, 'samples': 21429760, 'steps': 41854, 'batch_loss/train': 0.7557666962966323} +12/28/2021 11:22:59 - INFO - codeparrot_training - Step 41855: {'lr': 3.225009035468851e-05, 'samples': 21430272, 'steps': 41855, 'batch_loss/train': 0.7692720210179687} +12/28/2021 11:23:10 - INFO - codeparrot_training - Step 41856: {'lr': 3.224234661733802e-05, 'samples': 21430784, 'steps': 41856, 'batch_loss/train': 0.5566669620748144} +12/28/2021 11:23:24 - INFO - codeparrot_training - Step 41857: {'lr': 3.22346037457098e-05, 'samples': 21431296, 'steps': 41857, 'batch_loss/train': 0.9297602036967874} +12/28/2021 11:23:34 - INFO - codeparrot_training - Step 41858: {'lr': 3.22268617398345e-05, 'samples': 21431808, 'steps': 41858, 'batch_loss/train': 0.7125035915523767} +12/28/2021 11:23:45 - INFO - codeparrot_training - Step 41859: {'lr': 3.221912059974291e-05, 'samples': 21432320, 'steps': 41859, 'batch_loss/train': 0.7037900503491983} +12/28/2021 11:23:57 - INFO - codeparrot_training - Step 41860: {'lr': 3.2211380325466e-05, 'samples': 21432832, 'steps': 41860, 'batch_loss/train': 0.8468417413532734} +12/28/2021 11:24:08 - INFO - codeparrot_training - Step 41861: {'lr': 3.2203640917034346e-05, 'samples': 21433344, 'steps': 41861, 'batch_loss/train': 0.6989048479590565} +12/28/2021 11:24:18 - INFO - codeparrot_training - Step 41862: {'lr': 3.219590237447878e-05, 'samples': 21433856, 'steps': 41862, 'batch_loss/train': 0.7399593316949904} +12/28/2021 11:24:30 - INFO - codeparrot_training - Step 41863: {'lr': 3.218816469783004e-05, 'samples': 21434368, 'steps': 41863, 'batch_loss/train': 0.7189590008929372} +12/28/2021 11:24:41 - INFO - codeparrot_training - Step 41864: {'lr': 3.2180427887118946e-05, 'samples': 21434880, 'steps': 41864, 'batch_loss/train': 0.723692902829498} +12/28/2021 11:24:51 - INFO - codeparrot_training - Step 41865: {'lr': 3.217269194237621e-05, 'samples': 21435392, 'steps': 41865, 'batch_loss/train': 0.7216054555028677} +12/28/2021 11:25:06 - INFO - codeparrot_training - Step 41866: {'lr': 3.216495686363266e-05, 'samples': 21435904, 'steps': 41866, 'batch_loss/train': 0.6948236444150098} +12/28/2021 11:25:16 - INFO - codeparrot_training - Step 41867: {'lr': 3.2157222650918885e-05, 'samples': 21436416, 'steps': 41867, 'batch_loss/train': 0.816343781829346} +12/28/2021 11:25:27 - INFO - codeparrot_training - Step 41868: {'lr': 3.2149489304265786e-05, 'samples': 21436928, 'steps': 41868, 'batch_loss/train': 0.786481544142589} +12/28/2021 11:25:37 - INFO - codeparrot_training - Step 41869: {'lr': 3.2141756823704126e-05, 'samples': 21437440, 'steps': 41869, 'batch_loss/train': 0.6548048508702777} +12/28/2021 11:25:50 - INFO - codeparrot_training - Step 41870: {'lr': 3.213402520926448e-05, 'samples': 21437952, 'steps': 41870, 'batch_loss/train': 0.5440617083513644} +12/28/2021 11:26:01 - INFO - codeparrot_training - Step 41871: {'lr': 3.2126294460977754e-05, 'samples': 21438464, 'steps': 41871, 'batch_loss/train': 0.7231053402647376} +12/28/2021 11:26:11 - INFO - codeparrot_training - Step 41872: {'lr': 3.2118564578874604e-05, 'samples': 21438976, 'steps': 41872, 'batch_loss/train': 0.7190522474702448} +12/28/2021 11:26:23 - INFO - codeparrot_training - Step 41873: {'lr': 3.211083556298575e-05, 'samples': 21439488, 'steps': 41873, 'batch_loss/train': 0.4663205978577025} +12/28/2021 11:26:34 - INFO - codeparrot_training - Step 41874: {'lr': 3.210310741334196e-05, 'samples': 21440000, 'steps': 41874, 'batch_loss/train': 0.7644684142433107} +12/28/2021 11:26:44 - INFO - codeparrot_training - Step 41875: {'lr': 3.209538012997393e-05, 'samples': 21440512, 'steps': 41875, 'batch_loss/train': 0.73669399227947} +12/28/2021 11:26:58 - INFO - codeparrot_training - Step 41876: {'lr': 3.208765371291239e-05, 'samples': 21441024, 'steps': 41876, 'batch_loss/train': 0.6896932595409453} +12/28/2021 11:27:08 - INFO - codeparrot_training - Step 41877: {'lr': 3.207992816218805e-05, 'samples': 21441536, 'steps': 41877, 'batch_loss/train': 0.7024845089763403} +12/28/2021 11:27:19 - INFO - codeparrot_training - Step 41878: {'lr': 3.207220347783163e-05, 'samples': 21442048, 'steps': 41878, 'batch_loss/train': 0.6696505798026919} +12/28/2021 11:27:30 - INFO - codeparrot_training - Step 41879: {'lr': 3.2064479659873855e-05, 'samples': 21442560, 'steps': 41879, 'batch_loss/train': 0.7037370875477791} +12/28/2021 11:27:42 - INFO - codeparrot_training - Step 41880: {'lr': 3.2056756708345486e-05, 'samples': 21443072, 'steps': 41880, 'batch_loss/train': 0.6928228511242196} +12/28/2021 11:27:52 - INFO - codeparrot_training - Step 41881: {'lr': 3.204903462327702e-05, 'samples': 21443584, 'steps': 41881, 'batch_loss/train': 0.7113428660668433} +12/28/2021 11:28:03 - INFO - codeparrot_training - Step 41882: {'lr': 3.2041313404699376e-05, 'samples': 21444096, 'steps': 41882, 'batch_loss/train': 0.6884794444777071} +12/28/2021 11:28:17 - INFO - codeparrot_training - Step 41883: {'lr': 3.2033593052643225e-05, 'samples': 21444608, 'steps': 41883, 'batch_loss/train': 0.7550574047490954} +12/28/2021 11:28:28 - INFO - codeparrot_training - Step 41884: {'lr': 3.202587356713915e-05, 'samples': 21445120, 'steps': 41884, 'batch_loss/train': 0.6788158467970788} +12/28/2021 11:28:38 - INFO - codeparrot_training - Step 41885: {'lr': 3.201815494821783e-05, 'samples': 21445632, 'steps': 41885, 'batch_loss/train': 0.6465384536422789} +12/28/2021 11:28:50 - INFO - codeparrot_training - Step 41886: {'lr': 3.201043719591015e-05, 'samples': 21446144, 'steps': 41886, 'batch_loss/train': 0.6882705690222792} +12/28/2021 11:29:01 - INFO - codeparrot_training - Step 41887: {'lr': 3.200272031024659e-05, 'samples': 21446656, 'steps': 41887, 'batch_loss/train': 0.6339459273440298} +12/28/2021 11:29:12 - INFO - codeparrot_training - Step 41888: {'lr': 3.19950042912579e-05, 'samples': 21447168, 'steps': 41888, 'batch_loss/train': 0.7151960977353156} +12/28/2021 11:29:24 - INFO - codeparrot_training - Step 41889: {'lr': 3.198728913897478e-05, 'samples': 21447680, 'steps': 41889, 'batch_loss/train': 0.6735725114122033} +12/28/2021 11:29:35 - INFO - codeparrot_training - Step 41890: {'lr': 3.1979574853427867e-05, 'samples': 21448192, 'steps': 41890, 'batch_loss/train': 0.6691383798606694} +12/28/2021 11:29:45 - INFO - codeparrot_training - Step 41891: {'lr': 3.197186143464784e-05, 'samples': 21448704, 'steps': 41891, 'batch_loss/train': 0.6249335269676521} +12/28/2021 11:29:56 - INFO - codeparrot_training - Step 41892: {'lr': 3.196414888266536e-05, 'samples': 21449216, 'steps': 41892, 'batch_loss/train': 0.7190344082191586} +12/28/2021 11:30:10 - INFO - codeparrot_training - Step 41893: {'lr': 3.1956437197511094e-05, 'samples': 21449728, 'steps': 41893, 'batch_loss/train': 0.6600276732351631} +12/28/2021 11:30:20 - INFO - codeparrot_training - Step 41894: {'lr': 3.194872637921573e-05, 'samples': 21450240, 'steps': 41894, 'batch_loss/train': 0.6057414966635406} +12/28/2021 11:30:31 - INFO - codeparrot_training - Step 41895: {'lr': 3.194101642780994e-05, 'samples': 21450752, 'steps': 41895, 'batch_loss/train': 0.6281923049828038} +12/28/2021 11:30:43 - INFO - codeparrot_training - Step 41896: {'lr': 3.1933307343324226e-05, 'samples': 21451264, 'steps': 41896, 'batch_loss/train': 0.7608408750966191} +12/28/2021 11:30:54 - INFO - codeparrot_training - Step 41897: {'lr': 3.1925599125789375e-05, 'samples': 21451776, 'steps': 41897, 'batch_loss/train': 0.5782144937547855} +12/28/2021 11:31:04 - INFO - codeparrot_training - Step 41898: {'lr': 3.191789177523607e-05, 'samples': 21452288, 'steps': 41898, 'batch_loss/train': 0.8418185827322304} +12/28/2021 11:31:17 - INFO - codeparrot_training - Step 41899: {'lr': 3.191018529169484e-05, 'samples': 21452800, 'steps': 41899, 'batch_loss/train': 0.7384760184213519} +12/28/2021 11:31:27 - INFO - codeparrot_training - Step 41900: {'lr': 3.190247967519635e-05, 'samples': 21453312, 'steps': 41900, 'batch_loss/train': 0.6830251840874553} +12/28/2021 11:31:38 - INFO - codeparrot_training - Step 41901: {'lr': 3.1894774925771254e-05, 'samples': 21453824, 'steps': 41901, 'batch_loss/train': 0.6634230203926563} +12/28/2021 11:31:48 - INFO - codeparrot_training - Step 41902: {'lr': 3.188707104345018e-05, 'samples': 21454336, 'steps': 41902, 'batch_loss/train': 0.8104381589218974} +12/28/2021 11:32:01 - INFO - codeparrot_training - Step 41903: {'lr': 3.187936802826377e-05, 'samples': 21454848, 'steps': 41903, 'batch_loss/train': 0.7628686968237162} +12/28/2021 11:32:11 - INFO - codeparrot_training - Step 41904: {'lr': 3.187166588024262e-05, 'samples': 21455360, 'steps': 41904, 'batch_loss/train': 0.631461763754487} +12/28/2021 11:32:22 - INFO - codeparrot_training - Step 41905: {'lr': 3.186396459941737e-05, 'samples': 21455872, 'steps': 41905, 'batch_loss/train': 0.6603207486041356} +12/28/2021 11:32:36 - INFO - codeparrot_training - Step 41906: {'lr': 3.185626418581861e-05, 'samples': 21456384, 'steps': 41906, 'batch_loss/train': 0.7356271436437964} +12/28/2021 11:32:46 - INFO - codeparrot_training - Step 41907: {'lr': 3.1848564639477e-05, 'samples': 21456896, 'steps': 41907, 'batch_loss/train': 0.6869251280440949} +12/28/2021 11:32:57 - INFO - codeparrot_training - Step 41908: {'lr': 3.1840865960423127e-05, 'samples': 21457408, 'steps': 41908, 'batch_loss/train': 0.7061462076380849} +12/28/2021 11:33:09 - INFO - codeparrot_training - Step 41909: {'lr': 3.183316814868764e-05, 'samples': 21457920, 'steps': 41909, 'batch_loss/train': 0.6570779655594379} +12/28/2021 11:33:20 - INFO - codeparrot_training - Step 41910: {'lr': 3.182547120430104e-05, 'samples': 21458432, 'steps': 41910, 'batch_loss/train': 0.6296967691741884} +12/28/2021 11:33:31 - INFO - codeparrot_training - Step 41911: {'lr': 3.1817775127293945e-05, 'samples': 21458944, 'steps': 41911, 'batch_loss/train': 0.6701431244728155} +12/28/2021 11:33:43 - INFO - codeparrot_training - Step 41912: {'lr': 3.18100799176971e-05, 'samples': 21459456, 'steps': 41912, 'batch_loss/train': 0.7575281513854861} +12/28/2021 11:33:53 - INFO - codeparrot_training - Step 41913: {'lr': 3.1802385575540935e-05, 'samples': 21459968, 'steps': 41913, 'batch_loss/train': 0.6937234337674454} +12/28/2021 11:34:04 - INFO - codeparrot_training - Step 41914: {'lr': 3.179469210085611e-05, 'samples': 21460480, 'steps': 41914, 'batch_loss/train': 0.6747793322429061} +12/28/2021 11:34:14 - INFO - codeparrot_training - Step 41915: {'lr': 3.1786999493673166e-05, 'samples': 21460992, 'steps': 41915, 'batch_loss/train': 0.7667432120069861} +12/28/2021 11:34:28 - INFO - codeparrot_training - Step 41916: {'lr': 3.177930775402274e-05, 'samples': 21461504, 'steps': 41916, 'batch_loss/train': 0.6959433824522421} +12/28/2021 11:34:39 - INFO - codeparrot_training - Step 41917: {'lr': 3.177161688193539e-05, 'samples': 21462016, 'steps': 41917, 'batch_loss/train': 0.572084509767592} +12/28/2021 11:34:50 - INFO - codeparrot_training - Step 41918: {'lr': 3.17639268774417e-05, 'samples': 21462528, 'steps': 41918, 'batch_loss/train': 0.708789570897352} +12/28/2021 11:35:02 - INFO - codeparrot_training - Step 41919: {'lr': 3.175623774057221e-05, 'samples': 21463040, 'steps': 41919, 'batch_loss/train': 0.5679454689379781} +12/28/2021 11:35:13 - INFO - codeparrot_training - Step 41920: {'lr': 3.174854947135752e-05, 'samples': 21463552, 'steps': 41920, 'batch_loss/train': 0.7347481594479177} +12/28/2021 11:35:23 - INFO - codeparrot_training - Step 41921: {'lr': 3.174086206982826e-05, 'samples': 21464064, 'steps': 41921, 'batch_loss/train': 0.673481794539839} +12/28/2021 11:35:37 - INFO - codeparrot_training - Step 41922: {'lr': 3.173317553601482e-05, 'samples': 21464576, 'steps': 41922, 'batch_loss/train': 0.7084976712940261} +12/28/2021 11:35:48 - INFO - codeparrot_training - Step 41923: {'lr': 3.172548986994789e-05, 'samples': 21465088, 'steps': 41923, 'batch_loss/train': 0.748061683960259} +12/28/2021 11:35:58 - INFO - codeparrot_training - Step 41924: {'lr': 3.1717805071658056e-05, 'samples': 21465600, 'steps': 41924, 'batch_loss/train': 0.6865296382457018} +12/28/2021 11:36:09 - INFO - codeparrot_training - Step 41925: {'lr': 3.1710121141175744e-05, 'samples': 21466112, 'steps': 41925, 'batch_loss/train': 0.6633579162880778} +12/28/2021 11:36:21 - INFO - codeparrot_training - Step 41926: {'lr': 3.170243807853157e-05, 'samples': 21466624, 'steps': 41926, 'batch_loss/train': 0.7109962801914662} +12/28/2021 11:36:32 - INFO - codeparrot_training - Step 41927: {'lr': 3.1694755883756085e-05, 'samples': 21467136, 'steps': 41927, 'batch_loss/train': 0.7638377868570387} +12/28/2021 11:36:42 - INFO - codeparrot_training - Step 41928: {'lr': 3.168707455687983e-05, 'samples': 21467648, 'steps': 41928, 'batch_loss/train': 0.6808460103347898} +12/28/2021 11:36:54 - INFO - codeparrot_training - Step 41929: {'lr': 3.167939409793333e-05, 'samples': 21468160, 'steps': 41929, 'batch_loss/train': 0.6150993839837611} +12/28/2021 11:37:05 - INFO - codeparrot_training - Step 41930: {'lr': 3.1671714506947114e-05, 'samples': 21468672, 'steps': 41930, 'batch_loss/train': 0.7134444513358176} +12/28/2021 11:37:16 - INFO - codeparrot_training - Step 41931: {'lr': 3.1664035783951716e-05, 'samples': 21469184, 'steps': 41931, 'batch_loss/train': 0.65095388982445} +12/28/2021 11:37:28 - INFO - codeparrot_training - Step 41932: {'lr': 3.16563579289777e-05, 'samples': 21469696, 'steps': 41932, 'batch_loss/train': 0.5824168764520437} +12/28/2021 11:37:39 - INFO - codeparrot_training - Step 41933: {'lr': 3.164868094205553e-05, 'samples': 21470208, 'steps': 41933, 'batch_loss/train': 0.5716842953115702} +12/28/2021 11:37:49 - INFO - codeparrot_training - Step 41934: {'lr': 3.164100482321577e-05, 'samples': 21470720, 'steps': 41934, 'batch_loss/train': 0.7452558809891343} +12/28/2021 11:38:00 - INFO - codeparrot_training - Step 41935: {'lr': 3.163332957248899e-05, 'samples': 21471232, 'steps': 41935, 'batch_loss/train': 0.7260061141569167} +12/28/2021 11:38:14 - INFO - codeparrot_training - Step 41936: {'lr': 3.1625655189905595e-05, 'samples': 21471744, 'steps': 41936, 'batch_loss/train': 0.7202402930706739} +12/28/2021 11:38:25 - INFO - codeparrot_training - Step 41937: {'lr': 3.1617981675496066e-05, 'samples': 21472256, 'steps': 41937, 'batch_loss/train': 0.5068489476107061} +12/28/2021 11:38:35 - INFO - codeparrot_training - Step 41938: {'lr': 3.16103090292911e-05, 'samples': 21472768, 'steps': 41938, 'batch_loss/train': 0.6265196242020465} +12/28/2021 11:38:47 - INFO - codeparrot_training - Step 41939: {'lr': 3.160263725132101e-05, 'samples': 21473280, 'steps': 41939, 'batch_loss/train': 0.6123976334929466} +12/28/2021 11:38:58 - INFO - codeparrot_training - Step 41940: {'lr': 3.15949663416164e-05, 'samples': 21473792, 'steps': 41940, 'batch_loss/train': 0.6973670720472} +12/28/2021 11:39:09 - INFO - codeparrot_training - Step 41941: {'lr': 3.1587296300207705e-05, 'samples': 21474304, 'steps': 41941, 'batch_loss/train': 0.7710319720208645} +12/28/2021 11:39:21 - INFO - codeparrot_training - Step 41942: {'lr': 3.1579627127125475e-05, 'samples': 21474816, 'steps': 41942, 'batch_loss/train': 0.7491324730217457} +12/28/2021 11:39:31 - INFO - codeparrot_training - Step 41943: {'lr': 3.1571958822400186e-05, 'samples': 21475328, 'steps': 41943, 'batch_loss/train': 0.7999070044606924} +12/28/2021 11:39:42 - INFO - codeparrot_training - Step 41944: {'lr': 3.156429138606229e-05, 'samples': 21475840, 'steps': 41944, 'batch_loss/train': 0.7638589683920145} +12/28/2021 11:39:56 - INFO - codeparrot_training - Step 41945: {'lr': 3.155662481814231e-05, 'samples': 21476352, 'steps': 41945, 'batch_loss/train': 0.6933551477268338} +12/28/2021 11:40:07 - INFO - codeparrot_training - Step 41946: {'lr': 3.15489591186707e-05, 'samples': 21476864, 'steps': 41946, 'batch_loss/train': 0.6060746019938961} +12/28/2021 11:40:17 - INFO - codeparrot_training - Step 41947: {'lr': 3.154129428767799e-05, 'samples': 21477376, 'steps': 41947, 'batch_loss/train': 1.4385629380121827} +12/28/2021 11:40:28 - INFO - codeparrot_training - Step 41948: {'lr': 3.1533630325194517e-05, 'samples': 21477888, 'steps': 41948, 'batch_loss/train': 0.7108439006842673} +12/28/2021 11:40:40 - INFO - codeparrot_training - Step 41949: {'lr': 3.152596723125087e-05, 'samples': 21478400, 'steps': 41949, 'batch_loss/train': 0.6059795008040965} +12/28/2021 11:40:51 - INFO - codeparrot_training - Step 41950: {'lr': 3.151830500587755e-05, 'samples': 21478912, 'steps': 41950, 'batch_loss/train': 0.736034143017605} +12/28/2021 11:41:01 - INFO - codeparrot_training - Step 41951: {'lr': 3.1510643649104916e-05, 'samples': 21479424, 'steps': 41951, 'batch_loss/train': 0.6479436529043596} +12/28/2021 11:41:13 - INFO - codeparrot_training - Step 41952: {'lr': 3.150298316096339e-05, 'samples': 21479936, 'steps': 41952, 'batch_loss/train': 0.7009750502184033} +12/28/2021 11:41:24 - INFO - codeparrot_training - Step 41953: {'lr': 3.1495323541483626e-05, 'samples': 21480448, 'steps': 41953, 'batch_loss/train': 0.6771215936169028} +12/28/2021 11:41:35 - INFO - codeparrot_training - Step 41954: {'lr': 3.1487664790695866e-05, 'samples': 21480960, 'steps': 41954, 'batch_loss/train': 0.7063091518357396} +12/28/2021 11:41:49 - INFO - codeparrot_training - Step 41955: {'lr': 3.1480006908630674e-05, 'samples': 21481472, 'steps': 41955, 'batch_loss/train': 0.6372980964370072} +12/28/2021 11:41:59 - INFO - codeparrot_training - Step 41956: {'lr': 3.147234989531847e-05, 'samples': 21481984, 'steps': 41956, 'batch_loss/train': 0.6808179087820463} +12/28/2021 11:42:10 - INFO - codeparrot_training - Step 41957: {'lr': 3.146469375078967e-05, 'samples': 21482496, 'steps': 41957, 'batch_loss/train': 0.7443834603764117} +12/28/2021 11:42:20 - INFO - codeparrot_training - Step 41958: {'lr': 3.145703847507475e-05, 'samples': 21483008, 'steps': 41958, 'batch_loss/train': 0.7069458061596379} +12/28/2021 11:42:33 - INFO - codeparrot_training - Step 41959: {'lr': 3.144938406820411e-05, 'samples': 21483520, 'steps': 41959, 'batch_loss/train': 0.8005030578933656} +12/28/2021 11:42:43 - INFO - codeparrot_training - Step 41960: {'lr': 3.144173053020821e-05, 'samples': 21484032, 'steps': 41960, 'batch_loss/train': 0.7060241284198128} +12/28/2021 11:42:54 - INFO - codeparrot_training - Step 41961: {'lr': 3.143407786111749e-05, 'samples': 21484544, 'steps': 41961, 'batch_loss/train': 0.5888929240172729} +12/28/2021 11:43:08 - INFO - codeparrot_training - Step 41962: {'lr': 3.142642606096233e-05, 'samples': 21485056, 'steps': 41962, 'batch_loss/train': 0.6117163323797286} +12/28/2021 11:43:18 - INFO - codeparrot_training - Step 41963: {'lr': 3.1418775129773094e-05, 'samples': 21485568, 'steps': 41963, 'batch_loss/train': 0.7302329316735268} +12/28/2021 11:43:29 - INFO - codeparrot_training - Step 41964: {'lr': 3.14111250675804e-05, 'samples': 21486080, 'steps': 41964, 'batch_loss/train': 0.7085748764220625} +12/28/2021 11:43:41 - INFO - codeparrot_training - Step 41965: {'lr': 3.140347587441447e-05, 'samples': 21486592, 'steps': 41965, 'batch_loss/train': 0.6931532183662057} +12/28/2021 11:43:52 - INFO - codeparrot_training - Step 41966: {'lr': 3.1395827550305734e-05, 'samples': 21487104, 'steps': 41966, 'batch_loss/train': 0.7059168880805373} +12/28/2021 11:44:02 - INFO - codeparrot_training - Step 41967: {'lr': 3.138818009528474e-05, 'samples': 21487616, 'steps': 41967, 'batch_loss/train': 0.647093212697655} +12/28/2021 11:44:13 - INFO - codeparrot_training - Step 41968: {'lr': 3.1380533509381775e-05, 'samples': 21488128, 'steps': 41968, 'batch_loss/train': 0.7356294032651931} +12/28/2021 11:44:25 - INFO - codeparrot_training - Step 41969: {'lr': 3.137288779262723e-05, 'samples': 21488640, 'steps': 41969, 'batch_loss/train': 0.9048241511918604} +12/28/2021 11:44:36 - INFO - codeparrot_training - Step 41970: {'lr': 3.136524294505155e-05, 'samples': 21489152, 'steps': 41970, 'batch_loss/train': 0.7928490396589041} +12/28/2021 11:44:46 - INFO - codeparrot_training - Step 41971: {'lr': 3.135759896668511e-05, 'samples': 21489664, 'steps': 41971, 'batch_loss/train': 0.7542711752466857} +12/28/2021 11:44:59 - INFO - codeparrot_training - Step 41972: {'lr': 3.134995585755829e-05, 'samples': 21490176, 'steps': 41972, 'batch_loss/train': 0.6628543925471604} +12/28/2021 11:45:09 - INFO - codeparrot_training - Step 41973: {'lr': 3.134231361770154e-05, 'samples': 21490688, 'steps': 41973, 'batch_loss/train': 0.517011390067637} +12/28/2021 11:45:20 - INFO - codeparrot_training - Step 41974: {'lr': 3.133467224714509e-05, 'samples': 21491200, 'steps': 41974, 'batch_loss/train': 0.7040979412849993} +12/28/2021 11:45:34 - INFO - codeparrot_training - Step 41975: {'lr': 3.132703174591947e-05, 'samples': 21491712, 'steps': 41975, 'batch_loss/train': 0.7132402188144624} +12/28/2021 11:45:44 - INFO - codeparrot_training - Step 41976: {'lr': 3.131939211405505e-05, 'samples': 21492224, 'steps': 41976, 'batch_loss/train': 0.7220703447237611} +12/28/2021 11:45:55 - INFO - codeparrot_training - Step 41977: {'lr': 3.1311753351582125e-05, 'samples': 21492736, 'steps': 41977, 'batch_loss/train': 0.7358604930341244} +12/28/2021 11:46:06 - INFO - codeparrot_training - Step 41978: {'lr': 3.130411545853101e-05, 'samples': 21493248, 'steps': 41978, 'batch_loss/train': 0.8005333133041859} +12/28/2021 11:46:18 - INFO - codeparrot_training - Step 41979: {'lr': 3.1296478434932276e-05, 'samples': 21493760, 'steps': 41979, 'batch_loss/train': 0.7180085110594518} +12/28/2021 11:46:29 - INFO - codeparrot_training - Step 41980: {'lr': 3.1288842280816134e-05, 'samples': 21494272, 'steps': 41980, 'batch_loss/train': 0.7774800481274724} +12/28/2021 11:46:39 - INFO - codeparrot_training - Step 41981: {'lr': 3.128120699621295e-05, 'samples': 21494784, 'steps': 41981, 'batch_loss/train': 0.7100552460178733} +12/28/2021 11:46:51 - INFO - codeparrot_training - Step 41982: {'lr': 3.1273572581153095e-05, 'samples': 21495296, 'steps': 41982, 'batch_loss/train': 0.7351865163072944} +12/28/2021 11:47:02 - INFO - codeparrot_training - Step 41983: {'lr': 3.126593903566693e-05, 'samples': 21495808, 'steps': 41983, 'batch_loss/train': 0.8579235086217523} +12/28/2021 11:47:13 - INFO - codeparrot_training - Step 41984: {'lr': 3.12583063597848e-05, 'samples': 21496320, 'steps': 41984, 'batch_loss/train': 0.722505803219974} +12/28/2021 11:47:26 - INFO - codeparrot_training - Step 41985: {'lr': 3.125067455353706e-05, 'samples': 21496832, 'steps': 41985, 'batch_loss/train': 0.6968956450000405} +12/28/2021 11:47:37 - INFO - codeparrot_training - Step 41986: {'lr': 3.124304361695404e-05, 'samples': 21497344, 'steps': 41986, 'batch_loss/train': 0.6322755496948957} +12/28/2021 11:47:48 - INFO - codeparrot_training - Step 41987: {'lr': 3.1235413550066094e-05, 'samples': 21497856, 'steps': 41987, 'batch_loss/train': 0.761603572871536} +12/28/2021 11:48:00 - INFO - codeparrot_training - Step 41988: {'lr': 3.1227784352903574e-05, 'samples': 21498368, 'steps': 41988, 'batch_loss/train': 0.8213880104012787} +12/28/2021 11:48:10 - INFO - codeparrot_training - Step 41989: {'lr': 3.122015602549669e-05, 'samples': 21498880, 'steps': 41989, 'batch_loss/train': 0.8131972691044211} +12/28/2021 11:48:21 - INFO - codeparrot_training - Step 41990: {'lr': 3.121252856787593e-05, 'samples': 21499392, 'steps': 41990, 'batch_loss/train': 0.6774098873138428} +12/28/2021 11:48:32 - INFO - codeparrot_training - Step 41991: {'lr': 3.120490198007153e-05, 'samples': 21499904, 'steps': 41991, 'batch_loss/train': 0.601451316382736} +12/28/2021 11:48:46 - INFO - codeparrot_training - Step 41992: {'lr': 3.1197276262113744e-05, 'samples': 21500416, 'steps': 41992, 'batch_loss/train': 0.7170384284108877} +12/28/2021 11:48:56 - INFO - codeparrot_training - Step 41993: {'lr': 3.1189651414033096e-05, 'samples': 21500928, 'steps': 41993, 'batch_loss/train': 0.6324339853599668} +12/28/2021 11:49:07 - INFO - codeparrot_training - Step 41994: {'lr': 3.118202743585971e-05, 'samples': 21501440, 'steps': 41994, 'batch_loss/train': 0.6593000832945108} +12/28/2021 11:49:19 - INFO - codeparrot_training - Step 41995: {'lr': 3.117440432762397e-05, 'samples': 21501952, 'steps': 41995, 'batch_loss/train': 0.776714924024418} +12/28/2021 11:49:30 - INFO - codeparrot_training - Step 41996: {'lr': 3.116678208935617e-05, 'samples': 21502464, 'steps': 41996, 'batch_loss/train': 0.6424117792048492} +12/28/2021 11:49:40 - INFO - codeparrot_training - Step 41997: {'lr': 3.11591607210866e-05, 'samples': 21502976, 'steps': 41997, 'batch_loss/train': 0.6986369757214561} +12/28/2021 11:49:52 - INFO - codeparrot_training - Step 41998: {'lr': 3.115154022284561e-05, 'samples': 21503488, 'steps': 41998, 'batch_loss/train': 0.6843653712421656} +12/28/2021 11:50:03 - INFO - codeparrot_training - Step 41999: {'lr': 3.114392059466348e-05, 'samples': 21504000, 'steps': 41999, 'batch_loss/train': 0.8061250858008862} +12/28/2021 11:50:14 - INFO - codeparrot_training - Step 42000: {'lr': 3.11363018365704e-05, 'samples': 21504512, 'steps': 42000, 'batch_loss/train': 0.7708917809650302} +12/28/2021 11:50:28 - INFO - codeparrot_training - Step 42001: {'lr': 3.112868394859678e-05, 'samples': 21505024, 'steps': 42001, 'batch_loss/train': 0.7321282718330622} +12/28/2021 11:50:38 - INFO - codeparrot_training - Step 42002: {'lr': 3.112106693077293e-05, 'samples': 21505536, 'steps': 42002, 'batch_loss/train': 0.5780193648533896} +12/28/2021 11:50:49 - INFO - codeparrot_training - Step 42003: {'lr': 3.1113450783128997e-05, 'samples': 21506048, 'steps': 42003, 'batch_loss/train': 0.741110757458955} +12/28/2021 11:51:00 - INFO - codeparrot_training - Step 42004: {'lr': 3.1105835505695345e-05, 'samples': 21506560, 'steps': 42004, 'batch_loss/train': 0.698977845342597} +12/28/2021 11:51:12 - INFO - codeparrot_training - Step 42005: {'lr': 3.10982210985023e-05, 'samples': 21507072, 'steps': 42005, 'batch_loss/train': 0.7190391522890422} +12/28/2021 11:51:23 - INFO - codeparrot_training - Step 42006: {'lr': 3.1090607561580055e-05, 'samples': 21507584, 'steps': 42006, 'batch_loss/train': 0.7425698721781373} +12/28/2021 11:51:33 - INFO - codeparrot_training - Step 42007: {'lr': 3.108299489495886e-05, 'samples': 21508096, 'steps': 42007, 'batch_loss/train': 0.6459686039015651} +12/28/2021 11:51:46 - INFO - codeparrot_training - Step 42008: {'lr': 3.107538309866903e-05, 'samples': 21508608, 'steps': 42008, 'batch_loss/train': 0.784155959263444} +12/28/2021 11:51:56 - INFO - codeparrot_training - Step 42009: {'lr': 3.1067772172740814e-05, 'samples': 21509120, 'steps': 42009, 'batch_loss/train': 0.7182777556590736} +12/28/2021 11:52:07 - INFO - codeparrot_training - Step 42010: {'lr': 3.106016211720447e-05, 'samples': 21509632, 'steps': 42010, 'batch_loss/train': 0.512531709682662} +12/28/2021 11:52:19 - INFO - codeparrot_training - Step 42011: {'lr': 3.105255293209025e-05, 'samples': 21510144, 'steps': 42011, 'batch_loss/train': 0.5873741476680152} +12/28/2021 11:52:30 - INFO - codeparrot_training - Step 42012: {'lr': 3.10449446174284e-05, 'samples': 21510656, 'steps': 42012, 'batch_loss/train': 0.7110297083854675} +12/28/2021 11:52:41 - INFO - codeparrot_training - Step 42013: {'lr': 3.1037337173249195e-05, 'samples': 21511168, 'steps': 42013, 'batch_loss/train': 0.7532150116749108} +12/28/2021 11:52:51 - INFO - codeparrot_training - Step 42014: {'lr': 3.102973059958292e-05, 'samples': 21511680, 'steps': 42014, 'batch_loss/train': 0.7167070779833011} +12/28/2021 11:53:05 - INFO - codeparrot_training - Step 42015: {'lr': 3.102212489645964e-05, 'samples': 21512192, 'steps': 42015, 'batch_loss/train': 0.6527030444703996} +12/28/2021 11:53:16 - INFO - codeparrot_training - Step 42016: {'lr': 3.1014520063909816e-05, 'samples': 21512704, 'steps': 42016, 'batch_loss/train': 0.6344720079214312} +12/28/2021 11:53:26 - INFO - codeparrot_training - Step 42017: {'lr': 3.100691610196349e-05, 'samples': 21513216, 'steps': 42017, 'batch_loss/train': 0.72479756642133} +12/28/2021 11:53:39 - INFO - codeparrot_training - Step 42018: {'lr': 3.0999313010650954e-05, 'samples': 21513728, 'steps': 42018, 'batch_loss/train': 0.6387896756641567} +12/28/2021 11:53:49 - INFO - codeparrot_training - Step 42019: {'lr': 3.099171079000257e-05, 'samples': 21514240, 'steps': 42019, 'batch_loss/train': 0.6105557167902589} +12/28/2021 11:54:00 - INFO - codeparrot_training - Step 42020: {'lr': 3.09841094400484e-05, 'samples': 21514752, 'steps': 42020, 'batch_loss/train': 0.8423809031955898} +12/28/2021 11:54:14 - INFO - codeparrot_training - Step 42021: {'lr': 3.097650896081869e-05, 'samples': 21515264, 'steps': 42021, 'batch_loss/train': 0.6107899884227663} +12/28/2021 11:54:24 - INFO - codeparrot_training - Step 42022: {'lr': 3.09689093523437e-05, 'samples': 21515776, 'steps': 42022, 'batch_loss/train': 0.6927351891063154} +12/28/2021 11:54:35 - INFO - codeparrot_training - Step 42023: {'lr': 3.09613106146536e-05, 'samples': 21516288, 'steps': 42023, 'batch_loss/train': 0.6829368694452569} +12/28/2021 11:54:46 - INFO - codeparrot_training - Step 42024: {'lr': 3.095371274777864e-05, 'samples': 21516800, 'steps': 42024, 'batch_loss/train': 0.790919145103544} +12/28/2021 11:54:58 - INFO - codeparrot_training - Step 42025: {'lr': 3.094611575174905e-05, 'samples': 21517312, 'steps': 42025, 'batch_loss/train': 0.7018769095302559} +12/28/2021 11:55:08 - INFO - codeparrot_training - Step 42026: {'lr': 3.0938519626594914e-05, 'samples': 21517824, 'steps': 42026, 'batch_loss/train': 0.7547580128884874} +12/28/2021 11:55:19 - INFO - codeparrot_training - Step 42027: {'lr': 3.093092437234654e-05, 'samples': 21518336, 'steps': 42027, 'batch_loss/train': 0.739997498691082} +12/28/2021 11:55:31 - INFO - codeparrot_training - Step 42028: {'lr': 3.092332998903416e-05, 'samples': 21518848, 'steps': 42028, 'batch_loss/train': 0.6105195025447756} +12/28/2021 11:55:41 - INFO - codeparrot_training - Step 42029: {'lr': 3.091573647668777e-05, 'samples': 21519360, 'steps': 42029, 'batch_loss/train': 0.7585651030531153} +12/28/2021 11:55:52 - INFO - codeparrot_training - Step 42030: {'lr': 3.090814383533777e-05, 'samples': 21519872, 'steps': 42030, 'batch_loss/train': 0.6644924620632082} +12/28/2021 11:56:06 - INFO - codeparrot_training - Step 42031: {'lr': 3.09005520650143e-05, 'samples': 21520384, 'steps': 42031, 'batch_loss/train': 0.6540573881939054} +12/28/2021 11:56:17 - INFO - codeparrot_training - Step 42032: {'lr': 3.089296116574744e-05, 'samples': 21520896, 'steps': 42032, 'batch_loss/train': 0.693518653512001} +12/28/2021 11:56:27 - INFO - codeparrot_training - Step 42033: {'lr': 3.0885371137567395e-05, 'samples': 21521408, 'steps': 42033, 'batch_loss/train': 0.6540943121653982} +12/28/2021 11:56:39 - INFO - codeparrot_training - Step 42034: {'lr': 3.08777819805045e-05, 'samples': 21521920, 'steps': 42034, 'batch_loss/train': 0.688216601498425} +12/28/2021 11:56:50 - INFO - codeparrot_training - Step 42035: {'lr': 3.087019369458874e-05, 'samples': 21522432, 'steps': 42035, 'batch_loss/train': 1.379055651370436} +12/28/2021 11:57:01 - INFO - codeparrot_training - Step 42036: {'lr': 3.086260627985035e-05, 'samples': 21522944, 'steps': 42036, 'batch_loss/train': 0.706163567956537} +12/28/2021 11:57:11 - INFO - codeparrot_training - Step 42037: {'lr': 3.0855019736319514e-05, 'samples': 21523456, 'steps': 42037, 'batch_loss/train': 0.7754089708905667} +12/28/2021 11:57:26 - INFO - codeparrot_training - Step 42038: {'lr': 3.0847434064026345e-05, 'samples': 21523968, 'steps': 42038, 'batch_loss/train': 0.7534445887431502} +12/28/2021 11:57:36 - INFO - codeparrot_training - Step 42039: {'lr': 3.083984926300104e-05, 'samples': 21524480, 'steps': 42039, 'batch_loss/train': 0.9123668447136879} +12/28/2021 11:57:47 - INFO - codeparrot_training - Step 42040: {'lr': 3.083226533327374e-05, 'samples': 21524992, 'steps': 42040, 'batch_loss/train': 0.793608233332634} +12/28/2021 11:57:59 - INFO - codeparrot_training - Step 42041: {'lr': 3.082468227487461e-05, 'samples': 21525504, 'steps': 42041, 'batch_loss/train': 0.7895969464443624} +12/28/2021 11:58:10 - INFO - codeparrot_training - Step 42042: {'lr': 3.081710008783378e-05, 'samples': 21526016, 'steps': 42042, 'batch_loss/train': 0.7555259857326746} +12/28/2021 11:58:20 - INFO - codeparrot_training - Step 42043: {'lr': 3.0809518772181465e-05, 'samples': 21526528, 'steps': 42043, 'batch_loss/train': 0.6924257939681411} +12/28/2021 11:58:32 - INFO - codeparrot_training - Step 42044: {'lr': 3.0801938327947614e-05, 'samples': 21527040, 'steps': 42044, 'batch_loss/train': 0.7171974214725196} +12/28/2021 11:58:43 - INFO - codeparrot_training - Step 42045: {'lr': 3.079435875516259e-05, 'samples': 21527552, 'steps': 42045, 'batch_loss/train': 0.6837652521207929} +12/28/2021 11:58:54 - INFO - codeparrot_training - Step 42046: {'lr': 3.078678005385641e-05, 'samples': 21528064, 'steps': 42046, 'batch_loss/train': 0.8141253711655736} +12/28/2021 11:59:04 - INFO - codeparrot_training - Step 42047: {'lr': 3.077920222405919e-05, 'samples': 21528576, 'steps': 42047, 'batch_loss/train': 0.6154182725585997} +12/28/2021 11:59:17 - INFO - codeparrot_training - Step 42048: {'lr': 3.0771625265801104e-05, 'samples': 21529088, 'steps': 42048, 'batch_loss/train': 0.6398020589258522} +12/28/2021 11:59:28 - INFO - codeparrot_training - Step 42049: {'lr': 3.076404917911224e-05, 'samples': 21529600, 'steps': 42049, 'batch_loss/train': 0.5196841177530587} +12/28/2021 11:59:38 - INFO - codeparrot_training - Step 42050: {'lr': 3.075647396402273e-05, 'samples': 21530112, 'steps': 42050, 'batch_loss/train': 0.7255936443107203} +12/28/2021 11:59:52 - INFO - codeparrot_training - Step 42051: {'lr': 3.074889962056276e-05, 'samples': 21530624, 'steps': 42051, 'batch_loss/train': 1.1108382549136877} +12/28/2021 12:00:03 - INFO - codeparrot_training - Step 42052: {'lr': 3.07413261487623e-05, 'samples': 21531136, 'steps': 42052, 'batch_loss/train': 0.5875003343971912} +12/28/2021 12:00:14 - INFO - codeparrot_training - Step 42053: {'lr': 3.073375354865157e-05, 'samples': 21531648, 'steps': 42053, 'batch_loss/train': 0.6944120228290558} +12/28/2021 12:00:24 - INFO - codeparrot_training - Step 42054: {'lr': 3.072618182026069e-05, 'samples': 21532160, 'steps': 42054, 'batch_loss/train': 0.7634709157282487} +12/28/2021 12:00:36 - INFO - codeparrot_training - Step 42055: {'lr': 3.071861096361961e-05, 'samples': 21532672, 'steps': 42055, 'batch_loss/train': 0.7248158110305667} +12/28/2021 12:00:47 - INFO - codeparrot_training - Step 42056: {'lr': 3.07110409787586e-05, 'samples': 21533184, 'steps': 42056, 'batch_loss/train': 0.8226844053715467} +12/28/2021 12:00:57 - INFO - codeparrot_training - Step 42057: {'lr': 3.0703471865707736e-05, 'samples': 21533696, 'steps': 42057, 'batch_loss/train': 0.7680627973750234} +12/28/2021 12:01:10 - INFO - codeparrot_training - Step 42058: {'lr': 3.069590362449701e-05, 'samples': 21534208, 'steps': 42058, 'batch_loss/train': 0.6653251838870347} +12/28/2021 12:01:21 - INFO - codeparrot_training - Step 42059: {'lr': 3.068833625515652e-05, 'samples': 21534720, 'steps': 42059, 'batch_loss/train': 0.6926947627216578} +12/28/2021 12:01:31 - INFO - codeparrot_training - Step 42060: {'lr': 3.0680769757716496e-05, 'samples': 21535232, 'steps': 42060, 'batch_loss/train': 0.7038185372948647} +12/28/2021 12:01:45 - INFO - codeparrot_training - Step 42061: {'lr': 3.067320413220687e-05, 'samples': 21535744, 'steps': 42061, 'batch_loss/train': 2.1345515940338373} +12/28/2021 12:01:56 - INFO - codeparrot_training - Step 42062: {'lr': 3.0665639378657765e-05, 'samples': 21536256, 'steps': 42062, 'batch_loss/train': 0.6705858651548624} +12/28/2021 12:02:06 - INFO - codeparrot_training - Step 42063: {'lr': 3.0658075497099285e-05, 'samples': 21536768, 'steps': 42063, 'batch_loss/train': 0.6215886248683091} +12/28/2021 12:02:17 - INFO - codeparrot_training - Step 42064: {'lr': 3.0650512487561455e-05, 'samples': 21537280, 'steps': 42064, 'batch_loss/train': 0.7310331175103784} +12/28/2021 12:02:29 - INFO - codeparrot_training - Step 42065: {'lr': 3.064295035007436e-05, 'samples': 21537792, 'steps': 42065, 'batch_loss/train': 0.6947784638032317} +12/28/2021 12:02:40 - INFO - codeparrot_training - Step 42066: {'lr': 3.063538908466806e-05, 'samples': 21538304, 'steps': 42066, 'batch_loss/train': 0.7118810098618269} +12/28/2021 12:02:50 - INFO - codeparrot_training - Step 42067: {'lr': 3.062782869137265e-05, 'samples': 21538816, 'steps': 42067, 'batch_loss/train': 0.8627311802702025} +12/28/2021 12:03:04 - INFO - codeparrot_training - Step 42068: {'lr': 3.062026917021815e-05, 'samples': 21539328, 'steps': 42068, 'batch_loss/train': 0.6164000611752272} +12/28/2021 12:03:15 - INFO - codeparrot_training - Step 42069: {'lr': 3.061271052123468e-05, 'samples': 21539840, 'steps': 42069, 'batch_loss/train': 0.6761223068460822} +12/28/2021 12:03:26 - INFO - codeparrot_training - Step 42070: {'lr': 3.060515274445213e-05, 'samples': 21540352, 'steps': 42070, 'batch_loss/train': 0.7063394272699952} +12/28/2021 12:03:38 - INFO - codeparrot_training - Step 42071: {'lr': 3.059759583990077e-05, 'samples': 21540864, 'steps': 42071, 'batch_loss/train': 0.7571243201382458} +12/28/2021 12:03:48 - INFO - codeparrot_training - Step 42072: {'lr': 3.059003980761044e-05, 'samples': 21541376, 'steps': 42072, 'batch_loss/train': 0.7511971171479672} +12/28/2021 12:03:59 - INFO - codeparrot_training - Step 42073: {'lr': 3.0582484647611295e-05, 'samples': 21541888, 'steps': 42073, 'batch_loss/train': 0.6595828840509057} +12/28/2021 12:04:09 - INFO - codeparrot_training - Step 42074: {'lr': 3.0574930359933335e-05, 'samples': 21542400, 'steps': 42074, 'batch_loss/train': 0.724582630675286} +12/28/2021 12:04:22 - INFO - codeparrot_training - Step 42075: {'lr': 3.05673769446066e-05, 'samples': 21542912, 'steps': 42075, 'batch_loss/train': 0.7946827560663223} +12/28/2021 12:04:32 - INFO - codeparrot_training - Step 42076: {'lr': 3.055982440166111e-05, 'samples': 21543424, 'steps': 42076, 'batch_loss/train': 0.7488500422332436} +12/28/2021 12:04:43 - INFO - codeparrot_training - Step 42077: {'lr': 3.05522727311269e-05, 'samples': 21543936, 'steps': 42077, 'batch_loss/train': 0.7448560423217714} +12/28/2021 12:04:57 - INFO - codeparrot_training - Step 42078: {'lr': 3.054472193303401e-05, 'samples': 21544448, 'steps': 42078, 'batch_loss/train': 0.6459129170980304} +12/28/2021 12:05:08 - INFO - codeparrot_training - Step 42079: {'lr': 3.053717200741243e-05, 'samples': 21544960, 'steps': 42079, 'batch_loss/train': 0.7531528302351944} +12/28/2021 12:05:18 - INFO - codeparrot_training - Step 42080: {'lr': 3.0529622954292235e-05, 'samples': 21545472, 'steps': 42080, 'batch_loss/train': 0.6834086782473605} +12/28/2021 12:05:30 - INFO - codeparrot_training - Step 42081: {'lr': 3.0522074773703305e-05, 'samples': 21545984, 'steps': 42081, 'batch_loss/train': 0.7857113976497203} +12/28/2021 12:05:41 - INFO - codeparrot_training - Step 42082: {'lr': 3.051452746567576e-05, 'samples': 21546496, 'steps': 42082, 'batch_loss/train': 0.7366734575480223} +12/28/2021 12:05:52 - INFO - codeparrot_training - Step 42083: {'lr': 3.050698103023966e-05, 'samples': 21547008, 'steps': 42083, 'batch_loss/train': 0.8117014011368155} +12/28/2021 12:06:02 - INFO - codeparrot_training - Step 42084: {'lr': 3.049943546742487e-05, 'samples': 21547520, 'steps': 42084, 'batch_loss/train': 0.5207059012027457} +12/28/2021 12:06:14 - INFO - codeparrot_training - Step 42085: {'lr': 3.049189077726136e-05, 'samples': 21548032, 'steps': 42085, 'batch_loss/train': 0.7008366654627025} +12/28/2021 12:06:25 - INFO - codeparrot_training - Step 42086: {'lr': 3.0484346959779363e-05, 'samples': 21548544, 'steps': 42086, 'batch_loss/train': 0.5326908781134989} +12/28/2021 12:06:36 - INFO - codeparrot_training - Step 42087: {'lr': 3.047680401500863e-05, 'samples': 21549056, 'steps': 42087, 'batch_loss/train': 0.8244755691848695} +12/28/2021 12:06:50 - INFO - codeparrot_training - Step 42088: {'lr': 3.046926194297925e-05, 'samples': 21549568, 'steps': 42088, 'batch_loss/train': 0.69879838405177} +12/28/2021 12:07:00 - INFO - codeparrot_training - Step 42089: {'lr': 3.0461720743721167e-05, 'samples': 21550080, 'steps': 42089, 'batch_loss/train': 0.6579243410378695} +12/28/2021 12:07:11 - INFO - codeparrot_training - Step 42090: {'lr': 3.0454180417264414e-05, 'samples': 21550592, 'steps': 42090, 'batch_loss/train': 0.6512710587121546} +12/28/2021 12:07:23 - INFO - codeparrot_training - Step 42091: {'lr': 3.0446640963638945e-05, 'samples': 21551104, 'steps': 42091, 'batch_loss/train': 0.6819271154236048} +12/28/2021 12:07:34 - INFO - codeparrot_training - Step 42092: {'lr': 3.043910238287473e-05, 'samples': 21551616, 'steps': 42092, 'batch_loss/train': 0.7350648883730173} +12/28/2021 12:07:44 - INFO - codeparrot_training - Step 42093: {'lr': 3.0431564675001714e-05, 'samples': 21552128, 'steps': 42093, 'batch_loss/train': 0.8062497922219336} +12/28/2021 12:07:56 - INFO - codeparrot_training - Step 42094: {'lr': 3.0424027840049912e-05, 'samples': 21552640, 'steps': 42094, 'batch_loss/train': 0.6002153540030122} +12/28/2021 12:08:07 - INFO - codeparrot_training - Step 42095: {'lr': 3.041649187804932e-05, 'samples': 21553152, 'steps': 42095, 'batch_loss/train': 0.6915466976352036} +12/28/2021 12:08:18 - INFO - codeparrot_training - Step 42096: {'lr': 3.0408956789029747e-05, 'samples': 21553664, 'steps': 42096, 'batch_loss/train': 0.7344412822276354} +12/28/2021 12:08:28 - INFO - codeparrot_training - Step 42097: {'lr': 3.0401422573021336e-05, 'samples': 21554176, 'steps': 42097, 'batch_loss/train': 0.7833897513337433} +12/28/2021 12:08:40 - INFO - codeparrot_training - Step 42098: {'lr': 3.03938892300539e-05, 'samples': 21554688, 'steps': 42098, 'batch_loss/train': 0.6258653451805003} +12/28/2021 12:08:51 - INFO - codeparrot_training - Step 42099: {'lr': 3.038635676015744e-05, 'samples': 21555200, 'steps': 42099, 'batch_loss/train': 0.6674538506194949} +12/28/2021 12:09:01 - INFO - codeparrot_training - Step 42100: {'lr': 3.0378825163361935e-05, 'samples': 21555712, 'steps': 42100, 'batch_loss/train': 0.749171975068748} +12/28/2021 12:09:15 - INFO - codeparrot_training - Step 42101: {'lr': 3.0371294439697244e-05, 'samples': 21556224, 'steps': 42101, 'batch_loss/train': 0.6646880283951759} +12/28/2021 12:09:26 - INFO - codeparrot_training - Step 42102: {'lr': 3.0363764589193405e-05, 'samples': 21556736, 'steps': 42102, 'batch_loss/train': 0.7270884420722723} +12/28/2021 12:09:37 - INFO - codeparrot_training - Step 42103: {'lr': 3.035623561188028e-05, 'samples': 21557248, 'steps': 42103, 'batch_loss/train': 0.7219607373699546} +12/28/2021 12:09:49 - INFO - codeparrot_training - Step 42104: {'lr': 3.0348707507787814e-05, 'samples': 21557760, 'steps': 42104, 'batch_loss/train': 0.6080461301025935} +12/28/2021 12:10:00 - INFO - codeparrot_training - Step 42105: {'lr': 3.034118027694599e-05, 'samples': 21558272, 'steps': 42105, 'batch_loss/train': 0.7412554593756795} +12/28/2021 12:10:10 - INFO - codeparrot_training - Step 42106: {'lr': 3.0333653919384725e-05, 'samples': 21558784, 'steps': 42106, 'batch_loss/train': 1.4796033897437155} +12/28/2021 12:10:24 - INFO - codeparrot_training - Step 42107: {'lr': 3.03261284351338e-05, 'samples': 21559296, 'steps': 42107, 'batch_loss/train': 0.7152477006311528} +12/28/2021 12:10:35 - INFO - codeparrot_training - Step 42108: {'lr': 3.03186038242233e-05, 'samples': 21559808, 'steps': 42108, 'batch_loss/train': 0.691626354586333} +12/28/2021 12:10:46 - INFO - codeparrot_training - Step 42109: {'lr': 3.0311080086683123e-05, 'samples': 21560320, 'steps': 42109, 'batch_loss/train': 0.691294183023274} +12/28/2021 12:10:56 - INFO - codeparrot_training - Step 42110: {'lr': 3.0303557222543106e-05, 'samples': 21560832, 'steps': 42110, 'batch_loss/train': 0.8683350309729576} +12/28/2021 12:11:08 - INFO - codeparrot_training - Step 42111: {'lr': 3.0296035231833136e-05, 'samples': 21561344, 'steps': 42111, 'batch_loss/train': 0.6282326197251678} +12/28/2021 12:11:19 - INFO - codeparrot_training - Step 42112: {'lr': 3.0288514114583278e-05, 'samples': 21561856, 'steps': 42112, 'batch_loss/train': 0.6335135181434453} +12/28/2021 12:11:30 - INFO - codeparrot_training - Step 42113: {'lr': 3.0280993870823254e-05, 'samples': 21562368, 'steps': 42113, 'batch_loss/train': 0.7095033898949623} +12/28/2021 12:11:42 - INFO - codeparrot_training - Step 42114: {'lr': 3.0273474500583014e-05, 'samples': 21562880, 'steps': 42114, 'batch_loss/train': 0.6124396752566099} +12/28/2021 12:11:52 - INFO - codeparrot_training - Step 42115: {'lr': 3.0265956003892565e-05, 'samples': 21563392, 'steps': 42115, 'batch_loss/train': 0.6973539692698978} +12/28/2021 12:12:03 - INFO - codeparrot_training - Step 42116: {'lr': 3.0258438380781654e-05, 'samples': 21563904, 'steps': 42116, 'batch_loss/train': 0.654293421539478} +12/28/2021 12:12:17 - INFO - codeparrot_training - Step 42117: {'lr': 3.025092163128021e-05, 'samples': 21564416, 'steps': 42117, 'batch_loss/train': 1.4786142490338534} +12/28/2021 12:12:28 - INFO - codeparrot_training - Step 42118: {'lr': 3.0243405755418148e-05, 'samples': 21564928, 'steps': 42118, 'batch_loss/train': 0.6683694655075669} +12/28/2021 12:12:38 - INFO - codeparrot_training - Step 42119: {'lr': 3.0235890753225304e-05, 'samples': 21565440, 'steps': 42119, 'batch_loss/train': 0.77516122860834} +12/28/2021 12:12:50 - INFO - codeparrot_training - Step 42120: {'lr': 3.02283766247316e-05, 'samples': 21565952, 'steps': 42120, 'batch_loss/train': 0.7363458829931915} +12/28/2021 12:13:01 - INFO - codeparrot_training - Step 42121: {'lr': 3.0220863369966935e-05, 'samples': 21566464, 'steps': 42121, 'batch_loss/train': 0.7823777176672593} +12/28/2021 12:13:12 - INFO - codeparrot_training - Step 42122: {'lr': 3.0213350988961023e-05, 'samples': 21566976, 'steps': 42122, 'batch_loss/train': 0.6684254971332848} +12/28/2021 12:13:22 - INFO - codeparrot_training - Step 42123: {'lr': 3.020583948174391e-05, 'samples': 21567488, 'steps': 42123, 'batch_loss/train': 0.6411684962222353} +12/28/2021 12:13:36 - INFO - codeparrot_training - Step 42124: {'lr': 3.019832884834542e-05, 'samples': 21568000, 'steps': 42124, 'batch_loss/train': 0.7312909867614508} +12/28/2021 12:13:47 - INFO - codeparrot_training - Step 42125: {'lr': 3.0190819088795312e-05, 'samples': 21568512, 'steps': 42125, 'batch_loss/train': 0.5900555057451129} +12/28/2021 12:13:57 - INFO - codeparrot_training - Step 42126: {'lr': 3.0183310203123536e-05, 'samples': 21569024, 'steps': 42126, 'batch_loss/train': 0.6085826618364081} +12/28/2021 12:14:09 - INFO - codeparrot_training - Step 42127: {'lr': 3.017580219135993e-05, 'samples': 21569536, 'steps': 42127, 'batch_loss/train': 0.6998118385672569} +12/28/2021 12:14:20 - INFO - codeparrot_training - Step 42128: {'lr': 3.0168295053534302e-05, 'samples': 21570048, 'steps': 42128, 'batch_loss/train': 0.6909197620116174} +12/28/2021 12:14:31 - INFO - codeparrot_training - Step 42129: {'lr': 3.016078878967654e-05, 'samples': 21570560, 'steps': 42129, 'batch_loss/train': 0.7695561479777098} +12/28/2021 12:14:43 - INFO - codeparrot_training - Step 42130: {'lr': 3.015328339981649e-05, 'samples': 21571072, 'steps': 42130, 'batch_loss/train': 0.7045266055501997} +12/28/2021 12:14:53 - INFO - codeparrot_training - Step 42131: {'lr': 3.0145778883983955e-05, 'samples': 21571584, 'steps': 42131, 'batch_loss/train': 0.7733764164149761} +12/28/2021 12:15:04 - INFO - codeparrot_training - Step 42132: {'lr': 3.013827524220886e-05, 'samples': 21572096, 'steps': 42132, 'batch_loss/train': 0.5953639664803632} +12/28/2021 12:15:14 - INFO - codeparrot_training - Step 42133: {'lr': 3.013077247452084e-05, 'samples': 21572608, 'steps': 42133, 'batch_loss/train': 0.6881025582551956} +12/28/2021 12:15:27 - INFO - codeparrot_training - Step 42134: {'lr': 3.0123270580949908e-05, 'samples': 21573120, 'steps': 42134, 'batch_loss/train': 0.7073607838246971} +12/28/2021 12:15:37 - INFO - codeparrot_training - Step 42135: {'lr': 3.0115769561525868e-05, 'samples': 21573632, 'steps': 42135, 'batch_loss/train': 0.6874534138478339} +12/28/2021 12:15:48 - INFO - codeparrot_training - Step 42136: {'lr': 3.010826941627848e-05, 'samples': 21574144, 'steps': 42136, 'batch_loss/train': 0.7636595047079027} +12/28/2021 12:16:02 - INFO - codeparrot_training - Step 42137: {'lr': 3.0100770145237517e-05, 'samples': 21574656, 'steps': 42137, 'batch_loss/train': 0.6888761762529612} +12/28/2021 12:16:12 - INFO - codeparrot_training - Step 42138: {'lr': 3.0093271748432958e-05, 'samples': 21575168, 'steps': 42138, 'batch_loss/train': 0.7991378819569945} +12/28/2021 12:16:23 - INFO - codeparrot_training - Step 42139: {'lr': 3.008577422589448e-05, 'samples': 21575680, 'steps': 42139, 'batch_loss/train': 0.7568331426009536} +12/28/2021 12:16:35 - INFO - codeparrot_training - Step 42140: {'lr': 3.0078277577651886e-05, 'samples': 21576192, 'steps': 42140, 'batch_loss/train': 0.6934818116715178} +12/28/2021 12:16:46 - INFO - codeparrot_training - Step 42141: {'lr': 3.0070781803735098e-05, 'samples': 21576704, 'steps': 42141, 'batch_loss/train': 0.7036594203673303} +12/28/2021 12:16:56 - INFO - codeparrot_training - Step 42142: {'lr': 3.0063286904173816e-05, 'samples': 21577216, 'steps': 42142, 'batch_loss/train': 0.7661845618858933} +12/28/2021 12:17:08 - INFO - codeparrot_training - Step 42143: {'lr': 3.005579287899782e-05, 'samples': 21577728, 'steps': 42143, 'batch_loss/train': 0.7554760351777077} +12/28/2021 12:17:19 - INFO - codeparrot_training - Step 42144: {'lr': 3.004829972823697e-05, 'samples': 21578240, 'steps': 42144, 'batch_loss/train': 0.7247506519779563} +12/28/2021 12:17:30 - INFO - codeparrot_training - Step 42145: {'lr': 3.004080745192103e-05, 'samples': 21578752, 'steps': 42145, 'batch_loss/train': 0.7267705649137497} +12/28/2021 12:17:40 - INFO - codeparrot_training - Step 42146: {'lr': 3.003331605007978e-05, 'samples': 21579264, 'steps': 42146, 'batch_loss/train': 0.7597776683978736} +12/28/2021 12:17:54 - INFO - codeparrot_training - Step 42147: {'lr': 3.0025825522743078e-05, 'samples': 21579776, 'steps': 42147, 'batch_loss/train': 0.6121386461891234} +12/28/2021 12:18:05 - INFO - codeparrot_training - Step 42148: {'lr': 3.0018335869940516e-05, 'samples': 21580288, 'steps': 42148, 'batch_loss/train': 0.6505176217760891} +12/28/2021 12:18:16 - INFO - codeparrot_training - Step 42149: {'lr': 3.0010847091702043e-05, 'samples': 21580800, 'steps': 42149, 'batch_loss/train': 0.7831177567131817} +12/28/2021 12:18:28 - INFO - codeparrot_training - Step 42150: {'lr': 3.000335918805741e-05, 'samples': 21581312, 'steps': 42150, 'batch_loss/train': 0.6882234737277031} +12/28/2021 12:18:39 - INFO - codeparrot_training - Step 42151: {'lr': 2.9995872159036293e-05, 'samples': 21581824, 'steps': 42151, 'batch_loss/train': 0.7843004977330565} +12/28/2021 12:18:49 - INFO - codeparrot_training - Step 42152: {'lr': 2.998838600466858e-05, 'samples': 21582336, 'steps': 42152, 'batch_loss/train': 0.7135842200368643} +12/28/2021 12:19:03 - INFO - codeparrot_training - Step 42153: {'lr': 2.998090072498394e-05, 'samples': 21582848, 'steps': 42153, 'batch_loss/train': 0.5048888550081756} +12/28/2021 12:19:14 - INFO - codeparrot_training - Step 42154: {'lr': 2.9973416320012158e-05, 'samples': 21583360, 'steps': 42154, 'batch_loss/train': 0.634512519929558} +12/28/2021 12:19:24 - INFO - codeparrot_training - Step 42155: {'lr': 2.996593278978299e-05, 'samples': 21583872, 'steps': 42155, 'batch_loss/train': 0.703049160016235} +12/28/2021 12:19:35 - INFO - codeparrot_training - Step 42156: {'lr': 2.9958450134326186e-05, 'samples': 21584384, 'steps': 42156, 'batch_loss/train': 0.9369698744267225} +12/28/2021 12:19:47 - INFO - codeparrot_training - Step 42157: {'lr': 2.995096835367153e-05, 'samples': 21584896, 'steps': 42157, 'batch_loss/train': 0.7018617594148964} +12/28/2021 12:19:58 - INFO - codeparrot_training - Step 42158: {'lr': 2.994348744784872e-05, 'samples': 21585408, 'steps': 42158, 'batch_loss/train': 0.6975940396077931} +12/28/2021 12:20:08 - INFO - codeparrot_training - Step 42159: {'lr': 2.993600741688751e-05, 'samples': 21585920, 'steps': 42159, 'batch_loss/train': 0.6666292985901237} +12/28/2021 12:20:21 - INFO - codeparrot_training - Step 42160: {'lr': 2.992852826081763e-05, 'samples': 21586432, 'steps': 42160, 'batch_loss/train': 0.684116063406691} +12/28/2021 12:20:31 - INFO - codeparrot_training - Step 42161: {'lr': 2.992104997966888e-05, 'samples': 21586944, 'steps': 42161, 'batch_loss/train': 0.8132944041863084} +12/28/2021 12:20:42 - INFO - codeparrot_training - Step 42162: {'lr': 2.991357257347091e-05, 'samples': 21587456, 'steps': 42162, 'batch_loss/train': 0.5066813897865359} +12/28/2021 12:20:56 - INFO - codeparrot_training - Step 42163: {'lr': 2.990609604225339e-05, 'samples': 21587968, 'steps': 42163, 'batch_loss/train': 0.718078050063923} +12/28/2021 12:21:07 - INFO - codeparrot_training - Step 42164: {'lr': 2.9898620386046267e-05, 'samples': 21588480, 'steps': 42164, 'batch_loss/train': 0.659207166172564} +12/28/2021 12:21:17 - INFO - codeparrot_training - Step 42165: {'lr': 2.9891145604879044e-05, 'samples': 21588992, 'steps': 42165, 'batch_loss/train': 0.8299900447018445} +12/28/2021 12:21:30 - INFO - codeparrot_training - Step 42166: {'lr': 2.9883671698781477e-05, 'samples': 21589504, 'steps': 42166, 'batch_loss/train': 0.6351138101890683} +12/28/2021 12:21:40 - INFO - codeparrot_training - Step 42167: {'lr': 2.9876198667783406e-05, 'samples': 21590016, 'steps': 42167, 'batch_loss/train': 0.8021942172199488} +12/28/2021 12:21:51 - INFO - codeparrot_training - Step 42168: {'lr': 2.9868726511914413e-05, 'samples': 21590528, 'steps': 42168, 'batch_loss/train': 0.6049230121425353} +12/28/2021 12:22:02 - INFO - codeparrot_training - Step 42169: {'lr': 2.9861255231204255e-05, 'samples': 21591040, 'steps': 42169, 'batch_loss/train': 0.8390861367806792} +12/28/2021 12:22:16 - INFO - codeparrot_training - Step 42170: {'lr': 2.98537848256826e-05, 'samples': 21591552, 'steps': 42170, 'batch_loss/train': 0.8751378050073981} +12/28/2021 12:22:26 - INFO - codeparrot_training - Step 42171: {'lr': 2.9846315295379207e-05, 'samples': 21592064, 'steps': 42171, 'batch_loss/train': 0.7955530695617199} +12/28/2021 12:22:37 - INFO - codeparrot_training - Step 42172: {'lr': 2.9838846640323713e-05, 'samples': 21592576, 'steps': 42172, 'batch_loss/train': 0.6332639558240771} +12/28/2021 12:22:49 - INFO - codeparrot_training - Step 42173: {'lr': 2.9831378860545878e-05, 'samples': 21593088, 'steps': 42173, 'batch_loss/train': 0.6965546421706676} +12/28/2021 12:23:00 - INFO - codeparrot_training - Step 42174: {'lr': 2.9823911956075257e-05, 'samples': 21593600, 'steps': 42174, 'batch_loss/train': 0.8042451580986381} +12/28/2021 12:23:10 - INFO - codeparrot_training - Step 42175: {'lr': 2.9816445926941688e-05, 'samples': 21594112, 'steps': 42175, 'batch_loss/train': 0.7495956313796341} +12/28/2021 12:23:22 - INFO - codeparrot_training - Step 42176: {'lr': 2.9808980773174843e-05, 'samples': 21594624, 'steps': 42176, 'batch_loss/train': 0.6118736212374642} +12/28/2021 12:23:33 - INFO - codeparrot_training - Step 42177: {'lr': 2.9801516494804226e-05, 'samples': 21595136, 'steps': 42177, 'batch_loss/train': 0.6428714960638899} +12/28/2021 12:23:44 - INFO - codeparrot_training - Step 42178: {'lr': 2.979405309185976e-05, 'samples': 21595648, 'steps': 42178, 'batch_loss/train': 0.6636135373264551} +12/28/2021 12:23:54 - INFO - codeparrot_training - Step 42179: {'lr': 2.978659056437094e-05, 'samples': 21596160, 'steps': 42179, 'batch_loss/train': 0.6194137404672801} +12/28/2021 12:24:08 - INFO - codeparrot_training - Step 42180: {'lr': 2.9779128912367476e-05, 'samples': 21596672, 'steps': 42180, 'batch_loss/train': 0.7078955871984363} +12/28/2021 12:24:19 - INFO - codeparrot_training - Step 42181: {'lr': 2.977166813587903e-05, 'samples': 21597184, 'steps': 42181, 'batch_loss/train': 0.7864296128973365} +12/28/2021 12:24:30 - INFO - codeparrot_training - Step 42182: {'lr': 2.976420823493531e-05, 'samples': 21597696, 'steps': 42182, 'batch_loss/train': 0.7133378681028262} +12/28/2021 12:24:42 - INFO - codeparrot_training - Step 42183: {'lr': 2.975674920956592e-05, 'samples': 21598208, 'steps': 42183, 'batch_loss/train': 0.6796425171196461} +12/28/2021 12:24:52 - INFO - codeparrot_training - Step 42184: {'lr': 2.9749291059800538e-05, 'samples': 21598720, 'steps': 42184, 'batch_loss/train': 0.7062205385882407} +12/28/2021 12:25:03 - INFO - codeparrot_training - Step 42185: {'lr': 2.9741833785668802e-05, 'samples': 21599232, 'steps': 42185, 'batch_loss/train': 0.7928042045095935} +12/28/2021 12:25:17 - INFO - codeparrot_training - Step 42186: {'lr': 2.9734377387200388e-05, 'samples': 21599744, 'steps': 42186, 'batch_loss/train': 0.6273771775886416} +12/28/2021 12:25:28 - INFO - codeparrot_training - Step 42187: {'lr': 2.9726921864424965e-05, 'samples': 21600256, 'steps': 42187, 'batch_loss/train': 0.654251849045977} +12/28/2021 12:25:38 - INFO - codeparrot_training - Step 42188: {'lr': 2.971946721737201e-05, 'samples': 21600768, 'steps': 42188, 'batch_loss/train': 0.6549879722297192} +12/28/2021 12:25:49 - INFO - codeparrot_training - Step 42189: {'lr': 2.9712013446071357e-05, 'samples': 21601280, 'steps': 42189, 'batch_loss/train': 0.6694192594150081} +12/28/2021 12:26:02 - INFO - codeparrot_training - Step 42190: {'lr': 2.9704560550552595e-05, 'samples': 21601792, 'steps': 42190, 'batch_loss/train': 0.6725510710384697} +12/28/2021 12:26:12 - INFO - codeparrot_training - Step 42191: {'lr': 2.9697108530845258e-05, 'samples': 21602304, 'steps': 42191, 'batch_loss/train': 0.5585309430025518} +12/28/2021 12:26:23 - INFO - codeparrot_training - Step 42192: {'lr': 2.9689657386979013e-05, 'samples': 21602816, 'steps': 42192, 'batch_loss/train': 0.7536585554480553} +12/28/2021 12:26:35 - INFO - codeparrot_training - Step 42193: {'lr': 2.968220711898359e-05, 'samples': 21603328, 'steps': 42193, 'batch_loss/train': 0.745409619063139} +12/28/2021 12:26:46 - INFO - codeparrot_training - Step 42194: {'lr': 2.9674757726888464e-05, 'samples': 21603840, 'steps': 42194, 'batch_loss/train': 0.6629207949154079} +12/28/2021 12:26:56 - INFO - codeparrot_training - Step 42195: {'lr': 2.9667309210723302e-05, 'samples': 21604352, 'steps': 42195, 'batch_loss/train': 0.6391050911042839} +12/28/2021 12:27:09 - INFO - codeparrot_training - Step 42196: {'lr': 2.9659861570517755e-05, 'samples': 21604864, 'steps': 42196, 'batch_loss/train': 0.7687671722378582} +12/28/2021 12:27:19 - INFO - codeparrot_training - Step 42197: {'lr': 2.9652414806301375e-05, 'samples': 21605376, 'steps': 42197, 'batch_loss/train': 0.7744259265018627} +12/28/2021 12:27:30 - INFO - codeparrot_training - Step 42198: {'lr': 2.964496891810381e-05, 'samples': 21605888, 'steps': 42198, 'batch_loss/train': 0.46852971526095644} +12/28/2021 12:27:41 - INFO - codeparrot_training - Step 42199: {'lr': 2.96375239059547e-05, 'samples': 21606400, 'steps': 42199, 'batch_loss/train': 0.7532796813175082} +12/28/2021 12:27:55 - INFO - codeparrot_training - Step 42200: {'lr': 2.9630079769883467e-05, 'samples': 21606912, 'steps': 42200, 'batch_loss/train': 0.8125883750617504} +12/28/2021 12:28:05 - INFO - codeparrot_training - Step 42201: {'lr': 2.9622636509919894e-05, 'samples': 21607424, 'steps': 42201, 'batch_loss/train': 0.7840385643066838} +12/28/2021 12:28:16 - INFO - codeparrot_training - Step 42202: {'lr': 2.9615194126093564e-05, 'samples': 21607936, 'steps': 42202, 'batch_loss/train': 0.7789378203451633} +12/28/2021 12:28:28 - INFO - codeparrot_training - Step 42203: {'lr': 2.9607752618433903e-05, 'samples': 21608448, 'steps': 42203, 'batch_loss/train': 0.7518935110419989} +12/28/2021 12:28:38 - INFO - codeparrot_training - Step 42204: {'lr': 2.9600311986970636e-05, 'samples': 21608960, 'steps': 42204, 'batch_loss/train': 0.7298577628098428} +12/28/2021 12:28:49 - INFO - codeparrot_training - Step 42205: {'lr': 2.9592872231733377e-05, 'samples': 21609472, 'steps': 42205, 'batch_loss/train': 0.7854502843692899} +12/28/2021 12:29:01 - INFO - codeparrot_training - Step 42206: {'lr': 2.9585433352751574e-05, 'samples': 21609984, 'steps': 42206, 'batch_loss/train': 0.7101578409783542} +12/28/2021 12:29:12 - INFO - codeparrot_training - Step 42207: {'lr': 2.957799535005487e-05, 'samples': 21610496, 'steps': 42207, 'batch_loss/train': 0.6686785374768078} +12/28/2021 12:29:23 - INFO - codeparrot_training - Step 42208: {'lr': 2.957055822367283e-05, 'samples': 21611008, 'steps': 42208, 'batch_loss/train': 0.8044898597872816} +12/28/2021 12:29:33 - INFO - codeparrot_training - Step 42209: {'lr': 2.9563121973635004e-05, 'samples': 21611520, 'steps': 42209, 'batch_loss/train': 0.7330343786161393} +12/28/2021 12:29:47 - INFO - codeparrot_training - Step 42210: {'lr': 2.9555686599970988e-05, 'samples': 21612032, 'steps': 42210, 'batch_loss/train': 0.7057996625080705} +12/28/2021 12:29:58 - INFO - codeparrot_training - Step 42211: {'lr': 2.954825210271031e-05, 'samples': 21612544, 'steps': 42211, 'batch_loss/train': 0.7957690567709506} +12/28/2021 12:30:09 - INFO - codeparrot_training - Step 42212: {'lr': 2.954081848188256e-05, 'samples': 21613056, 'steps': 42212, 'batch_loss/train': 0.7737600607797503} +12/28/2021 12:30:21 - INFO - codeparrot_training - Step 42213: {'lr': 2.953338573751724e-05, 'samples': 21613568, 'steps': 42213, 'batch_loss/train': 0.9931144171860069} +12/28/2021 12:30:31 - INFO - codeparrot_training - Step 42214: {'lr': 2.9525953869643963e-05, 'samples': 21614080, 'steps': 42214, 'batch_loss/train': 0.7288631456904113} +12/28/2021 12:30:42 - INFO - codeparrot_training - Step 42215: {'lr': 2.9518522878292236e-05, 'samples': 21614592, 'steps': 42215, 'batch_loss/train': 0.4439655306050554} +12/28/2021 12:30:53 - INFO - codeparrot_training - Step 42216: {'lr': 2.9511092763491678e-05, 'samples': 21615104, 'steps': 42216, 'batch_loss/train': 0.6016144117456861} +12/28/2021 12:31:05 - INFO - codeparrot_training - Step 42217: {'lr': 2.9503663525271703e-05, 'samples': 21615616, 'steps': 42217, 'batch_loss/train': 0.7176394648849964} +12/28/2021 12:31:15 - INFO - codeparrot_training - Step 42218: {'lr': 2.9496235163661846e-05, 'samples': 21616128, 'steps': 42218, 'batch_loss/train': 0.7244764701463282} +12/28/2021 12:31:26 - INFO - codeparrot_training - Step 42219: {'lr': 2.9488807678691804e-05, 'samples': 21616640, 'steps': 42219, 'batch_loss/train': 0.7428138582035899} +12/28/2021 12:31:40 - INFO - codeparrot_training - Step 42220: {'lr': 2.9481381070390945e-05, 'samples': 21617152, 'steps': 42220, 'batch_loss/train': 0.6860672472976148} +12/28/2021 12:31:51 - INFO - codeparrot_training - Step 42221: {'lr': 2.9473955338788855e-05, 'samples': 21617664, 'steps': 42221, 'batch_loss/train': 0.7027787016704679} +12/28/2021 12:32:01 - INFO - codeparrot_training - Step 42222: {'lr': 2.9466530483915065e-05, 'samples': 21618176, 'steps': 42222, 'batch_loss/train': 0.783570204366697} +12/28/2021 12:32:14 - INFO - codeparrot_training - Step 42223: {'lr': 2.9459106505799084e-05, 'samples': 21618688, 'steps': 42223, 'batch_loss/train': 0.7671627346426249} +12/28/2021 12:32:24 - INFO - codeparrot_training - Step 42224: {'lr': 2.945168340447038e-05, 'samples': 21619200, 'steps': 42224, 'batch_loss/train': 0.8382214065641165} +12/28/2021 12:32:35 - INFO - codeparrot_training - Step 42225: {'lr': 2.944426117995855e-05, 'samples': 21619712, 'steps': 42225, 'batch_loss/train': 0.7399371941573918} +12/28/2021 12:32:45 - INFO - codeparrot_training - Step 42226: {'lr': 2.943683983229303e-05, 'samples': 21620224, 'steps': 42226, 'batch_loss/train': 0.7469301717355847} +12/28/2021 12:32:59 - INFO - codeparrot_training - Step 42227: {'lr': 2.9429419361503367e-05, 'samples': 21620736, 'steps': 42227, 'batch_loss/train': 0.6211214582435787} +12/28/2021 12:33:09 - INFO - codeparrot_training - Step 42228: {'lr': 2.9421999767619114e-05, 'samples': 21621248, 'steps': 42228, 'batch_loss/train': 0.6563584677642211} +12/28/2021 12:33:20 - INFO - codeparrot_training - Step 42229: {'lr': 2.9414581050669575e-05, 'samples': 21621760, 'steps': 42229, 'batch_loss/train': 0.7685855990275741} +12/28/2021 12:33:32 - INFO - codeparrot_training - Step 42230: {'lr': 2.940716321068443e-05, 'samples': 21622272, 'steps': 42230, 'batch_loss/train': 0.841603527776897} +12/28/2021 12:33:43 - INFO - codeparrot_training - Step 42231: {'lr': 2.9399746247693178e-05, 'samples': 21622784, 'steps': 42231, 'batch_loss/train': 0.7836440070532262} +12/28/2021 12:33:53 - INFO - codeparrot_training - Step 42232: {'lr': 2.9392330161725185e-05, 'samples': 21623296, 'steps': 42232, 'batch_loss/train': 0.8019457617774606} +12/28/2021 12:34:05 - INFO - codeparrot_training - Step 42233: {'lr': 2.9384914952809956e-05, 'samples': 21623808, 'steps': 42233, 'batch_loss/train': 0.7873909631744027} +12/28/2021 12:34:16 - INFO - codeparrot_training - Step 42234: {'lr': 2.937750062097705e-05, 'samples': 21624320, 'steps': 42234, 'batch_loss/train': 0.7320135226473212} +12/28/2021 12:34:27 - INFO - codeparrot_training - Step 42235: {'lr': 2.937008716625586e-05, 'samples': 21624832, 'steps': 42235, 'batch_loss/train': 0.8020673533901572} +12/28/2021 12:34:37 - INFO - codeparrot_training - Step 42236: {'lr': 2.936267458867592e-05, 'samples': 21625344, 'steps': 42236, 'batch_loss/train': 0.7151320148259401} +12/28/2021 12:34:49 - INFO - codeparrot_training - Step 42237: {'lr': 2.9355262888266676e-05, 'samples': 21625856, 'steps': 42237, 'batch_loss/train': 0.7748393341898918} +12/28/2021 12:35:00 - INFO - codeparrot_training - Step 42238: {'lr': 2.9347852065057578e-05, 'samples': 21626368, 'steps': 42238, 'batch_loss/train': 0.6456509889103472} +12/28/2021 12:35:10 - INFO - codeparrot_training - Step 42239: {'lr': 2.9340442119078104e-05, 'samples': 21626880, 'steps': 42239, 'batch_loss/train': 0.7118884891970083} +12/28/2021 12:35:24 - INFO - codeparrot_training - Step 42240: {'lr': 2.9333033050357727e-05, 'samples': 21627392, 'steps': 42240, 'batch_loss/train': 0.829746900126338} +12/28/2021 12:35:35 - INFO - codeparrot_training - Step 42241: {'lr': 2.9325624858925898e-05, 'samples': 21627904, 'steps': 42241, 'batch_loss/train': 0.7923225089907646} +12/28/2021 12:35:46 - INFO - codeparrot_training - Step 42242: {'lr': 2.9318217544812093e-05, 'samples': 21628416, 'steps': 42242, 'batch_loss/train': 0.7777623670990579} +12/28/2021 12:35:58 - INFO - codeparrot_training - Step 42243: {'lr': 2.9310811108045704e-05, 'samples': 21628928, 'steps': 42243, 'batch_loss/train': 0.7767519121989608} +12/28/2021 12:36:08 - INFO - codeparrot_training - Step 42244: {'lr': 2.9303405548656127e-05, 'samples': 21629440, 'steps': 42244, 'batch_loss/train': 0.7167349075898528} +12/28/2021 12:36:19 - INFO - codeparrot_training - Step 42245: {'lr': 2.9296000866672974e-05, 'samples': 21629952, 'steps': 42245, 'batch_loss/train': 0.7408862551674247} +12/28/2021 12:36:30 - INFO - codeparrot_training - Step 42246: {'lr': 2.9288597062125555e-05, 'samples': 21630464, 'steps': 42246, 'batch_loss/train': 0.6097309354227036} +12/28/2021 12:36:42 - INFO - codeparrot_training - Step 42247: {'lr': 2.928119413504332e-05, 'samples': 21630976, 'steps': 42247, 'batch_loss/train': 0.6969942566938698} +12/28/2021 12:36:52 - INFO - codeparrot_training - Step 42248: {'lr': 2.9273792085455747e-05, 'samples': 21631488, 'steps': 42248, 'batch_loss/train': 0.7886928683146834} +12/28/2021 12:37:03 - INFO - codeparrot_training - Step 42249: {'lr': 2.92663909133922e-05, 'samples': 21632000, 'steps': 42249, 'batch_loss/train': 0.5006786467856728} +12/28/2021 12:37:17 - INFO - codeparrot_training - Step 42250: {'lr': 2.9258990618882155e-05, 'samples': 21632512, 'steps': 42250, 'batch_loss/train': 0.7652470285538584} +12/28/2021 12:37:28 - INFO - codeparrot_training - Step 42251: {'lr': 2.9251591201955e-05, 'samples': 21633024, 'steps': 42251, 'batch_loss/train': 0.720968805369921} +12/28/2021 12:37:38 - INFO - codeparrot_training - Step 42252: {'lr': 2.9244192662640195e-05, 'samples': 21633536, 'steps': 42252, 'batch_loss/train': 0.7684696661308408} +12/28/2021 12:37:50 - INFO - codeparrot_training - Step 42253: {'lr': 2.92367950009671e-05, 'samples': 21634048, 'steps': 42253, 'batch_loss/train': 0.8513139290735126} +12/28/2021 12:38:01 - INFO - codeparrot_training - Step 42254: {'lr': 2.9229398216965216e-05, 'samples': 21634560, 'steps': 42254, 'batch_loss/train': 0.7550620804540813} +12/28/2021 12:38:12 - INFO - codeparrot_training - Step 42255: {'lr': 2.922200231066377e-05, 'samples': 21635072, 'steps': 42255, 'batch_loss/train': 0.6950674299150705} +12/28/2021 12:38:26 - INFO - codeparrot_training - Step 42256: {'lr': 2.9214607282092355e-05, 'samples': 21635584, 'steps': 42256, 'batch_loss/train': 0.735985572449863} +12/28/2021 12:38:37 - INFO - codeparrot_training - Step 42257: {'lr': 2.920721313128033e-05, 'samples': 21636096, 'steps': 42257, 'batch_loss/train': 0.8636943413875997} +12/28/2021 12:38:48 - INFO - codeparrot_training - Step 42258: {'lr': 2.919981985825698e-05, 'samples': 21636608, 'steps': 42258, 'batch_loss/train': 0.823806734289974} +12/28/2021 12:38:58 - INFO - codeparrot_training - Step 42259: {'lr': 2.9192427463051756e-05, 'samples': 21637120, 'steps': 42259, 'batch_loss/train': 0.6528918037656695} +12/28/2021 12:39:10 - INFO - codeparrot_training - Step 42260: {'lr': 2.918503594569416e-05, 'samples': 21637632, 'steps': 42260, 'batch_loss/train': 0.8201401247642934} +12/28/2021 12:39:21 - INFO - codeparrot_training - Step 42261: {'lr': 2.9177645306213445e-05, 'samples': 21638144, 'steps': 42261, 'batch_loss/train': 0.5743400926003233} +12/28/2021 12:39:32 - INFO - codeparrot_training - Step 42262: {'lr': 2.9170255544639035e-05, 'samples': 21638656, 'steps': 42262, 'batch_loss/train': 0.7183611209038645} +12/28/2021 12:39:44 - INFO - codeparrot_training - Step 42263: {'lr': 2.9162866661000292e-05, 'samples': 21639168, 'steps': 42263, 'batch_loss/train': 0.6926548999035731} +12/28/2021 12:39:54 - INFO - codeparrot_training - Step 42264: {'lr': 2.915547865532661e-05, 'samples': 21639680, 'steps': 42264, 'batch_loss/train': 0.859656632412225} +12/28/2021 12:40:05 - INFO - codeparrot_training - Step 42265: {'lr': 2.914809152764736e-05, 'samples': 21640192, 'steps': 42265, 'batch_loss/train': 0.7498458800837398} +12/28/2021 12:40:16 - INFO - codeparrot_training - Step 42266: {'lr': 2.9140705277991898e-05, 'samples': 21640704, 'steps': 42266, 'batch_loss/train': 0.678523620357737} +12/28/2021 12:40:30 - INFO - codeparrot_training - Step 42267: {'lr': 2.913331990638962e-05, 'samples': 21641216, 'steps': 42267, 'batch_loss/train': 0.758819404989481} +12/28/2021 12:40:41 - INFO - codeparrot_training - Step 42268: {'lr': 2.9125935412869897e-05, 'samples': 21641728, 'steps': 42268, 'batch_loss/train': 0.7045714718988165} +12/28/2021 12:40:51 - INFO - codeparrot_training - Step 42269: {'lr': 2.9118551797462006e-05, 'samples': 21642240, 'steps': 42269, 'batch_loss/train': 0.8049524137750268} +12/28/2021 12:41:03 - INFO - codeparrot_training - Step 42270: {'lr': 2.9111169060195313e-05, 'samples': 21642752, 'steps': 42270, 'batch_loss/train': 0.8181156329810619} +12/28/2021 12:41:14 - INFO - codeparrot_training - Step 42271: {'lr': 2.9103787201099298e-05, 'samples': 21643264, 'steps': 42271, 'batch_loss/train': 0.7370026559219696} +12/28/2021 12:41:25 - INFO - codeparrot_training - Step 42272: {'lr': 2.909640622020318e-05, 'samples': 21643776, 'steps': 42272, 'batch_loss/train': 0.7093264944851398} +12/28/2021 12:41:37 - INFO - codeparrot_training - Step 42273: {'lr': 2.9089026117536277e-05, 'samples': 21644288, 'steps': 42273, 'batch_loss/train': 0.663976184790954} +12/28/2021 12:41:47 - INFO - codeparrot_training - Step 42274: {'lr': 2.9081646893128116e-05, 'samples': 21644800, 'steps': 42274, 'batch_loss/train': 0.5662739214021713} +12/28/2021 12:41:58 - INFO - codeparrot_training - Step 42275: {'lr': 2.9074268547007843e-05, 'samples': 21645312, 'steps': 42275, 'batch_loss/train': 0.6916382594499737} +12/28/2021 12:42:09 - INFO - codeparrot_training - Step 42276: {'lr': 2.9066891079204848e-05, 'samples': 21645824, 'steps': 42276, 'batch_loss/train': 0.653624651953578} +12/28/2021 12:42:21 - INFO - codeparrot_training - Step 42277: {'lr': 2.90595144897485e-05, 'samples': 21646336, 'steps': 42277, 'batch_loss/train': 0.7398798577487469} +12/28/2021 12:42:31 - INFO - codeparrot_training - Step 42278: {'lr': 2.905213877866808e-05, 'samples': 21646848, 'steps': 42278, 'batch_loss/train': 0.7680253321304917} +12/28/2021 12:42:42 - INFO - codeparrot_training - Step 42279: {'lr': 2.9044763945992923e-05, 'samples': 21647360, 'steps': 42279, 'batch_loss/train': 0.7332058344036341} +12/28/2021 12:42:56 - INFO - codeparrot_training - Step 42280: {'lr': 2.9037389991752427e-05, 'samples': 21647872, 'steps': 42280, 'batch_loss/train': 0.7736911168321967} +12/28/2021 12:43:07 - INFO - codeparrot_training - Step 42281: {'lr': 2.9030016915975732e-05, 'samples': 21648384, 'steps': 42281, 'batch_loss/train': 0.6190242040902376} +12/28/2021 12:43:17 - INFO - codeparrot_training - Step 42282: {'lr': 2.9022644718692286e-05, 'samples': 21648896, 'steps': 42282, 'batch_loss/train': 0.6967418608255684} +12/28/2021 12:43:29 - INFO - codeparrot_training - Step 42283: {'lr': 2.901527339993143e-05, 'samples': 21649408, 'steps': 42283, 'batch_loss/train': 0.7617475586012006} +12/28/2021 12:43:40 - INFO - codeparrot_training - Step 42284: {'lr': 2.900790295972236e-05, 'samples': 21649920, 'steps': 42284, 'batch_loss/train': 0.6112208368722349} +12/28/2021 12:43:51 - INFO - codeparrot_training - Step 42285: {'lr': 2.900053339809436e-05, 'samples': 21650432, 'steps': 42285, 'batch_loss/train': 0.7681059199385345} +12/28/2021 12:44:01 - INFO - codeparrot_training - Step 42286: {'lr': 2.8993164715076903e-05, 'samples': 21650944, 'steps': 42286, 'batch_loss/train': 0.7152078277431428} +12/28/2021 12:44:13 - INFO - codeparrot_training - Step 42287: {'lr': 2.8985796910699137e-05, 'samples': 21651456, 'steps': 42287, 'batch_loss/train': 0.767314987257123} +12/28/2021 12:44:24 - INFO - codeparrot_training - Step 42288: {'lr': 2.897842998499037e-05, 'samples': 21651968, 'steps': 42288, 'batch_loss/train': 0.6886538886465132} +12/28/2021 12:44:35 - INFO - codeparrot_training - Step 42289: {'lr': 2.8971063937979935e-05, 'samples': 21652480, 'steps': 42289, 'batch_loss/train': 0.6784062855876982} +12/28/2021 12:44:49 - INFO - codeparrot_training - Step 42290: {'lr': 2.8963698769697067e-05, 'samples': 21652992, 'steps': 42290, 'batch_loss/train': 0.7756171708460897} +12/28/2021 12:44:59 - INFO - codeparrot_training - Step 42291: {'lr': 2.895633448017107e-05, 'samples': 21653504, 'steps': 42291, 'batch_loss/train': 0.8256103820167482} +12/28/2021 12:45:10 - INFO - codeparrot_training - Step 42292: {'lr': 2.8948971069431258e-05, 'samples': 21654016, 'steps': 42292, 'batch_loss/train': 0.7458431990817189} +12/28/2021 12:45:22 - INFO - codeparrot_training - Step 42293: {'lr': 2.8941608537506826e-05, 'samples': 21654528, 'steps': 42293, 'batch_loss/train': 0.9589458610862494} +12/28/2021 12:45:33 - INFO - codeparrot_training - Step 42294: {'lr': 2.893424688442711e-05, 'samples': 21655040, 'steps': 42294, 'batch_loss/train': 0.7186049008741975} +12/28/2021 12:45:44 - INFO - codeparrot_training - Step 42295: {'lr': 2.892688611022143e-05, 'samples': 21655552, 'steps': 42295, 'batch_loss/train': 0.7460182150825858} +12/28/2021 12:45:54 - INFO - codeparrot_training - Step 42296: {'lr': 2.8919526214918863e-05, 'samples': 21656064, 'steps': 42296, 'batch_loss/train': 0.6751743792556226} +12/28/2021 12:46:08 - INFO - codeparrot_training - Step 42297: {'lr': 2.891216719854886e-05, 'samples': 21656576, 'steps': 42297, 'batch_loss/train': 0.6893375692889094} +12/28/2021 12:46:19 - INFO - codeparrot_training - Step 42298: {'lr': 2.890480906114057e-05, 'samples': 21657088, 'steps': 42298, 'batch_loss/train': 0.4270992277888581} +12/28/2021 12:46:30 - INFO - codeparrot_training - Step 42299: {'lr': 2.889745180272324e-05, 'samples': 21657600, 'steps': 42299, 'batch_loss/train': 0.6663712980225682} +12/28/2021 12:46:42 - INFO - codeparrot_training - Step 42300: {'lr': 2.889009542332624e-05, 'samples': 21658112, 'steps': 42300, 'batch_loss/train': 0.7545642433688045} +12/28/2021 12:46:53 - INFO - codeparrot_training - Step 42301: {'lr': 2.8882739922978684e-05, 'samples': 21658624, 'steps': 42301, 'batch_loss/train': 0.5387545877601951} +12/28/2021 12:47:03 - INFO - codeparrot_training - Step 42302: {'lr': 2.8875385301709856e-05, 'samples': 21659136, 'steps': 42302, 'batch_loss/train': 0.8410858797142282} +12/28/2021 12:47:15 - INFO - codeparrot_training - Step 42303: {'lr': 2.8868031559549006e-05, 'samples': 21659648, 'steps': 42303, 'batch_loss/train': 0.6931823733029887} +12/28/2021 12:47:26 - INFO - codeparrot_training - Step 42304: {'lr': 2.8860678696525394e-05, 'samples': 21660160, 'steps': 42304, 'batch_loss/train': 0.766474857868161} +12/28/2021 12:47:37 - INFO - codeparrot_training - Step 42305: {'lr': 2.8853326712668187e-05, 'samples': 21660672, 'steps': 42305, 'batch_loss/train': 0.5912792640738189} +12/28/2021 12:47:47 - INFO - codeparrot_training - Step 42306: {'lr': 2.8845975608006724e-05, 'samples': 21661184, 'steps': 42306, 'batch_loss/train': 0.710568618029356} +12/28/2021 12:48:01 - INFO - codeparrot_training - Step 42307: {'lr': 2.8838625382570037e-05, 'samples': 21661696, 'steps': 42307, 'batch_loss/train': 0.6665071714669466} +12/28/2021 12:48:12 - INFO - codeparrot_training - Step 42308: {'lr': 2.883127603638755e-05, 'samples': 21662208, 'steps': 42308, 'batch_loss/train': 0.6954288566485047} +12/28/2021 12:48:22 - INFO - codeparrot_training - Step 42309: {'lr': 2.8823927569488427e-05, 'samples': 21662720, 'steps': 42309, 'batch_loss/train': 0.8270247676409781} +12/28/2021 12:48:35 - INFO - codeparrot_training - Step 42310: {'lr': 2.8816579981901735e-05, 'samples': 21663232, 'steps': 42310, 'batch_loss/train': 0.6892850985750556} +12/28/2021 12:48:45 - INFO - codeparrot_training - Step 42311: {'lr': 2.8809233273656893e-05, 'samples': 21663744, 'steps': 42311, 'batch_loss/train': 0.7414511712267995} +12/28/2021 12:48:56 - INFO - codeparrot_training - Step 42312: {'lr': 2.8801887444783042e-05, 'samples': 21664256, 'steps': 42312, 'batch_loss/train': 0.7278420180082321} +12/28/2021 12:49:08 - INFO - codeparrot_training - Step 42313: {'lr': 2.8794542495309355e-05, 'samples': 21664768, 'steps': 42313, 'batch_loss/train': 0.7646220824681222} +12/28/2021 12:49:19 - INFO - codeparrot_training - Step 42314: {'lr': 2.8787198425265003e-05, 'samples': 21665280, 'steps': 42314, 'batch_loss/train': 0.8552349610254169} +12/28/2021 12:49:29 - INFO - codeparrot_training - Step 42315: {'lr': 2.8779855234679242e-05, 'samples': 21665792, 'steps': 42315, 'batch_loss/train': 0.809620937332511} +12/28/2021 12:49:40 - INFO - codeparrot_training - Step 42316: {'lr': 2.877251292358124e-05, 'samples': 21666304, 'steps': 42316, 'batch_loss/train': 0.6803987188031897} +12/28/2021 12:49:52 - INFO - codeparrot_training - Step 42317: {'lr': 2.8765171492000196e-05, 'samples': 21666816, 'steps': 42317, 'batch_loss/train': 0.780994608416222} +12/28/2021 12:50:03 - INFO - codeparrot_training - Step 42318: {'lr': 2.8757830939965312e-05, 'samples': 21667328, 'steps': 42318, 'batch_loss/train': 0.7810532352887094} +12/28/2021 12:50:13 - INFO - codeparrot_training - Step 42319: {'lr': 2.8750491267505728e-05, 'samples': 21667840, 'steps': 42319, 'batch_loss/train': 0.7484686383977532} +12/28/2021 12:50:27 - INFO - codeparrot_training - Step 42320: {'lr': 2.8743152474650675e-05, 'samples': 21668352, 'steps': 42320, 'batch_loss/train': 0.7705353759229183} +12/28/2021 12:50:38 - INFO - codeparrot_training - Step 42321: {'lr': 2.873581456142932e-05, 'samples': 21668864, 'steps': 42321, 'batch_loss/train': 0.6768548139370978} +12/28/2021 12:50:49 - INFO - codeparrot_training - Step 42322: {'lr': 2.872847752787075e-05, 'samples': 21669376, 'steps': 42322, 'batch_loss/train': 0.6639784760773182} +12/28/2021 12:51:01 - INFO - codeparrot_training - Step 42323: {'lr': 2.8721141374004305e-05, 'samples': 21669888, 'steps': 42323, 'batch_loss/train': 0.8925459096208215} +12/28/2021 12:51:11 - INFO - codeparrot_training - Step 42324: {'lr': 2.871380609985899e-05, 'samples': 21670400, 'steps': 42324, 'batch_loss/train': 0.8417088421992958} +12/28/2021 12:51:22 - INFO - codeparrot_training - Step 42325: {'lr': 2.8706471705463978e-05, 'samples': 21670912, 'steps': 42325, 'batch_loss/train': 0.7276354295900092} +12/28/2021 12:51:33 - INFO - codeparrot_training - Step 42326: {'lr': 2.8699138190848573e-05, 'samples': 21671424, 'steps': 42326, 'batch_loss/train': 0.7304872907698154} +12/28/2021 12:51:46 - INFO - codeparrot_training - Step 42327: {'lr': 2.8691805556041784e-05, 'samples': 21671936, 'steps': 42327, 'batch_loss/train': 0.8168535716831684} +12/28/2021 12:51:57 - INFO - codeparrot_training - Step 42328: {'lr': 2.8684473801072806e-05, 'samples': 21672448, 'steps': 42328, 'batch_loss/train': 0.835046922788024} +12/28/2021 12:52:08 - INFO - codeparrot_training - Step 42329: {'lr': 2.8677142925970816e-05, 'samples': 21672960, 'steps': 42329, 'batch_loss/train': 0.7438598000444472} +12/28/2021 12:52:20 - INFO - codeparrot_training - Step 42330: {'lr': 2.8669812930764926e-05, 'samples': 21673472, 'steps': 42330, 'batch_loss/train': 0.7167154978960752} +12/28/2021 12:52:31 - INFO - codeparrot_training - Step 42331: {'lr': 2.866248381548431e-05, 'samples': 21673984, 'steps': 42331, 'batch_loss/train': 0.6886926963925362} +12/28/2021 12:52:41 - INFO - codeparrot_training - Step 42332: {'lr': 2.8655155580158104e-05, 'samples': 21674496, 'steps': 42332, 'batch_loss/train': 0.7649396606720984} +12/28/2021 12:52:53 - INFO - codeparrot_training - Step 42333: {'lr': 2.8647828224815348e-05, 'samples': 21675008, 'steps': 42333, 'batch_loss/train': 0.7832752401009202} +12/28/2021 12:53:04 - INFO - codeparrot_training - Step 42334: {'lr': 2.8640501749485266e-05, 'samples': 21675520, 'steps': 42334, 'batch_loss/train': 0.8102231919765472} +12/28/2021 12:53:15 - INFO - codeparrot_training - Step 42335: {'lr': 2.8633176154197056e-05, 'samples': 21676032, 'steps': 42335, 'batch_loss/train': 0.7004030626267195} +12/28/2021 12:53:29 - INFO - codeparrot_training - Step 42336: {'lr': 2.862585143897961e-05, 'samples': 21676544, 'steps': 42336, 'batch_loss/train': 0.72235266584903} +12/28/2021 12:53:40 - INFO - codeparrot_training - Step 42337: {'lr': 2.861852760386227e-05, 'samples': 21677056, 'steps': 42337, 'batch_loss/train': 0.5092599630297627} +12/28/2021 12:53:50 - INFO - codeparrot_training - Step 42338: {'lr': 2.861120464887412e-05, 'samples': 21677568, 'steps': 42338, 'batch_loss/train': 0.7925181584432721} +12/28/2021 12:54:01 - INFO - codeparrot_training - Step 42339: {'lr': 2.8603882574044172e-05, 'samples': 21678080, 'steps': 42339, 'batch_loss/train': 0.6492670958396047} +12/28/2021 12:54:13 - INFO - codeparrot_training - Step 42340: {'lr': 2.859656137940153e-05, 'samples': 21678592, 'steps': 42340, 'batch_loss/train': 0.6375485979951918} +12/28/2021 12:54:24 - INFO - codeparrot_training - Step 42341: {'lr': 2.8589241064975487e-05, 'samples': 21679104, 'steps': 42341, 'batch_loss/train': 0.7179537634365261} +12/28/2021 12:54:34 - INFO - codeparrot_training - Step 42342: {'lr': 2.8581921630794955e-05, 'samples': 21679616, 'steps': 42342, 'batch_loss/train': 0.7689396254718304} +12/28/2021 12:54:47 - INFO - codeparrot_training - Step 42343: {'lr': 2.857460307688911e-05, 'samples': 21680128, 'steps': 42343, 'batch_loss/train': 0.7285531295929104} +12/28/2021 12:54:57 - INFO - codeparrot_training - Step 42344: {'lr': 2.8567285403287012e-05, 'samples': 21680640, 'steps': 42344, 'batch_loss/train': 0.6978135717799887} +12/28/2021 12:55:08 - INFO - codeparrot_training - Step 42345: {'lr': 2.8559968610017804e-05, 'samples': 21681152, 'steps': 42345, 'batch_loss/train': 0.7289592530578375} +12/28/2021 12:55:22 - INFO - codeparrot_training - Step 42346: {'lr': 2.855265269711055e-05, 'samples': 21681664, 'steps': 42346, 'batch_loss/train': 0.7074131374247372} +12/28/2021 12:55:32 - INFO - codeparrot_training - Step 42347: {'lr': 2.8545337664594302e-05, 'samples': 21682176, 'steps': 42347, 'batch_loss/train': 0.7371039697900414} +12/28/2021 12:55:43 - INFO - codeparrot_training - Step 42348: {'lr': 2.8538023512498206e-05, 'samples': 21682688, 'steps': 42348, 'batch_loss/train': 0.6706536035053432} +12/28/2021 12:55:54 - INFO - codeparrot_training - Step 42349: {'lr': 2.8530710240851272e-05, 'samples': 21683200, 'steps': 42349, 'batch_loss/train': 0.6941067562438548} +12/28/2021 12:56:06 - INFO - codeparrot_training - Step 42350: {'lr': 2.852339784968269e-05, 'samples': 21683712, 'steps': 42350, 'batch_loss/train': 0.7266929121688008} +12/28/2021 12:56:16 - INFO - codeparrot_training - Step 42351: {'lr': 2.8516086339021337e-05, 'samples': 21684224, 'steps': 42351, 'batch_loss/train': 0.675871997140348} +12/28/2021 12:56:27 - INFO - codeparrot_training - Step 42352: {'lr': 2.8508775708896512e-05, 'samples': 21684736, 'steps': 42352, 'batch_loss/train': 0.5378399635665119} +12/28/2021 12:56:39 - INFO - codeparrot_training - Step 42353: {'lr': 2.8501465959337115e-05, 'samples': 21685248, 'steps': 42353, 'batch_loss/train': 0.7528833746910095} +12/28/2021 12:56:50 - INFO - codeparrot_training - Step 42354: {'lr': 2.849415709037223e-05, 'samples': 21685760, 'steps': 42354, 'batch_loss/train': 0.7102760479319841} +12/28/2021 12:57:00 - INFO - codeparrot_training - Step 42355: {'lr': 2.848684910203095e-05, 'samples': 21686272, 'steps': 42355, 'batch_loss/train': 0.7122997860424221} +12/28/2021 12:57:14 - INFO - codeparrot_training - Step 42356: {'lr': 2.8479541994342327e-05, 'samples': 21686784, 'steps': 42356, 'batch_loss/train': 0.7619574889540672} +12/28/2021 12:57:25 - INFO - codeparrot_training - Step 42357: {'lr': 2.84722357673354e-05, 'samples': 21687296, 'steps': 42357, 'batch_loss/train': 0.7911930745467544} +12/28/2021 12:57:36 - INFO - codeparrot_training - Step 42358: {'lr': 2.846493042103926e-05, 'samples': 21687808, 'steps': 42358, 'batch_loss/train': 0.7637014172505587} +12/28/2021 12:57:46 - INFO - codeparrot_training - Step 42359: {'lr': 2.845762595548282e-05, 'samples': 21688320, 'steps': 42359, 'batch_loss/train': 0.6899459373671561} +12/28/2021 12:57:58 - INFO - codeparrot_training - Step 42360: {'lr': 2.8450322370695253e-05, 'samples': 21688832, 'steps': 42360, 'batch_loss/train': 0.8208324424922466} +12/28/2021 12:58:09 - INFO - codeparrot_training - Step 42361: {'lr': 2.8443019666705593e-05, 'samples': 21689344, 'steps': 42361, 'batch_loss/train': 0.8501810766756535} +12/28/2021 12:58:19 - INFO - codeparrot_training - Step 42362: {'lr': 2.8435717843542737e-05, 'samples': 21689856, 'steps': 42362, 'batch_loss/train': 0.7396309762261808} +12/28/2021 12:58:32 - INFO - codeparrot_training - Step 42363: {'lr': 2.8428416901235847e-05, 'samples': 21690368, 'steps': 42363, 'batch_loss/train': 0.7589835114777088} +12/28/2021 12:58:42 - INFO - codeparrot_training - Step 42364: {'lr': 2.8421116839813965e-05, 'samples': 21690880, 'steps': 42364, 'batch_loss/train': 0.7703363941982388} +12/28/2021 12:58:53 - INFO - codeparrot_training - Step 42365: {'lr': 2.8413817659306006e-05, 'samples': 21691392, 'steps': 42365, 'batch_loss/train': 0.5972655159421265} +12/28/2021 12:59:07 - INFO - codeparrot_training - Step 42366: {'lr': 2.8406519359741007e-05, 'samples': 21691904, 'steps': 42366, 'batch_loss/train': 0.6891534868627787} +12/28/2021 12:59:18 - INFO - codeparrot_training - Step 42367: {'lr': 2.839922194114808e-05, 'samples': 21692416, 'steps': 42367, 'batch_loss/train': 0.7646116614341736} +12/28/2021 12:59:28 - INFO - codeparrot_training - Step 42368: {'lr': 2.8391925403556146e-05, 'samples': 21692928, 'steps': 42368, 'batch_loss/train': 0.7076531518250704} +12/28/2021 12:59:39 - INFO - codeparrot_training - Step 42369: {'lr': 2.8384629746994238e-05, 'samples': 21693440, 'steps': 42369, 'batch_loss/train': 0.7225329885259271} +12/28/2021 12:59:51 - INFO - codeparrot_training - Step 42370: {'lr': 2.837733497149139e-05, 'samples': 21693952, 'steps': 42370, 'batch_loss/train': 0.7104433537460864} +12/28/2021 13:00:02 - INFO - codeparrot_training - Step 42371: {'lr': 2.8370041077076552e-05, 'samples': 21694464, 'steps': 42371, 'batch_loss/train': 0.8534915177151561} +12/28/2021 13:00:12 - INFO - codeparrot_training - Step 42372: {'lr': 2.8362748063778725e-05, 'samples': 21694976, 'steps': 42372, 'batch_loss/train': 0.7405082164332271} +12/28/2021 13:00:26 - INFO - codeparrot_training - Step 42373: {'lr': 2.835545593162697e-05, 'samples': 21695488, 'steps': 42373, 'batch_loss/train': 0.6276261461898685} +12/28/2021 13:00:37 - INFO - codeparrot_training - Step 42374: {'lr': 2.8348164680650214e-05, 'samples': 21696000, 'steps': 42374, 'batch_loss/train': 0.7731631007045507} +12/28/2021 13:00:48 - INFO - codeparrot_training - Step 42375: {'lr': 2.834087431087745e-05, 'samples': 21696512, 'steps': 42375, 'batch_loss/train': 0.7195246191695333} +12/28/2021 13:01:00 - INFO - codeparrot_training - Step 42376: {'lr': 2.833358482233772e-05, 'samples': 21697024, 'steps': 42376, 'batch_loss/train': 0.585670827422291} +12/28/2021 13:01:11 - INFO - codeparrot_training - Step 42377: {'lr': 2.832629621505989e-05, 'samples': 21697536, 'steps': 42377, 'batch_loss/train': 0.7552249734289944} +12/28/2021 13:01:21 - INFO - codeparrot_training - Step 42378: {'lr': 2.8319008489073066e-05, 'samples': 21698048, 'steps': 42378, 'batch_loss/train': 0.7299008566769771} +12/28/2021 13:01:32 - INFO - codeparrot_training - Step 42379: {'lr': 2.8311721644406152e-05, 'samples': 21698560, 'steps': 42379, 'batch_loss/train': 0.8192129712551832} +12/28/2021 13:01:44 - INFO - codeparrot_training - Step 42380: {'lr': 2.8304435681088092e-05, 'samples': 21699072, 'steps': 42380, 'batch_loss/train': 0.7628131718374789} +12/28/2021 13:01:55 - INFO - codeparrot_training - Step 42381: {'lr': 2.829715059914792e-05, 'samples': 21699584, 'steps': 42381, 'batch_loss/train': 0.6816727081313729} +12/28/2021 13:02:05 - INFO - codeparrot_training - Step 42382: {'lr': 2.8289866398614527e-05, 'samples': 21700096, 'steps': 42382, 'batch_loss/train': 0.7450349563732743} +12/28/2021 13:02:19 - INFO - codeparrot_training - Step 42383: {'lr': 2.828258307951695e-05, 'samples': 21700608, 'steps': 42383, 'batch_loss/train': 0.8646040551830083} +12/28/2021 13:02:30 - INFO - codeparrot_training - Step 42384: {'lr': 2.8275300641884078e-05, 'samples': 21701120, 'steps': 42384, 'batch_loss/train': 0.6610297767911106} +12/28/2021 13:02:41 - INFO - codeparrot_training - Step 42385: {'lr': 2.826801908574489e-05, 'samples': 21701632, 'steps': 42385, 'batch_loss/train': 0.709633210208267} +12/28/2021 13:02:53 - INFO - codeparrot_training - Step 42386: {'lr': 2.8260738411128362e-05, 'samples': 21702144, 'steps': 42386, 'batch_loss/train': 0.6274541588500142} +12/28/2021 13:03:03 - INFO - codeparrot_training - Step 42387: {'lr': 2.8253458618063443e-05, 'samples': 21702656, 'steps': 42387, 'batch_loss/train': 0.7447728966362774} +12/28/2021 13:03:14 - INFO - codeparrot_training - Step 42388: {'lr': 2.8246179706578943e-05, 'samples': 21703168, 'steps': 42388, 'batch_loss/train': 0.7508887471631169} +12/28/2021 13:03:25 - INFO - codeparrot_training - Step 42389: {'lr': 2.8238901676703954e-05, 'samples': 21703680, 'steps': 42389, 'batch_loss/train': 0.8400836391374469} +12/28/2021 13:03:37 - INFO - codeparrot_training - Step 42390: {'lr': 2.823162452846739e-05, 'samples': 21704192, 'steps': 42390, 'batch_loss/train': 0.7547259824350476} +12/28/2021 13:03:47 - INFO - codeparrot_training - Step 42391: {'lr': 2.8224348261898127e-05, 'samples': 21704704, 'steps': 42391, 'batch_loss/train': 0.742630158085376} +12/28/2021 13:03:58 - INFO - codeparrot_training - Step 42392: {'lr': 2.8217072877025074e-05, 'samples': 21705216, 'steps': 42392, 'batch_loss/train': 0.6913641928695142} +12/28/2021 13:04:10 - INFO - codeparrot_training - Step 42393: {'lr': 2.820979837387727e-05, 'samples': 21705728, 'steps': 42393, 'batch_loss/train': 0.5766795915551484} +12/28/2021 13:04:21 - INFO - codeparrot_training - Step 42394: {'lr': 2.8202524752483527e-05, 'samples': 21706240, 'steps': 42394, 'batch_loss/train': 0.654614003142342} +12/28/2021 13:04:31 - INFO - codeparrot_training - Step 42395: {'lr': 2.819525201287279e-05, 'samples': 21706752, 'steps': 42395, 'batch_loss/train': 0.7305817473679781} +12/28/2021 13:04:45 - INFO - codeparrot_training - Step 42396: {'lr': 2.8187980155073983e-05, 'samples': 21707264, 'steps': 42396, 'batch_loss/train': 0.7539581563323736} +12/28/2021 13:04:56 - INFO - codeparrot_training - Step 42397: {'lr': 2.8180709179116027e-05, 'samples': 21707776, 'steps': 42397, 'batch_loss/train': 0.9351032112026587} +12/28/2021 13:05:07 - INFO - codeparrot_training - Step 42398: {'lr': 2.8173439085027814e-05, 'samples': 21708288, 'steps': 42398, 'batch_loss/train': 0.7034155614674091} +12/28/2021 13:05:17 - INFO - codeparrot_training - Step 42399: {'lr': 2.8166169872838236e-05, 'samples': 21708800, 'steps': 42399, 'batch_loss/train': 0.7246999095659703} +12/28/2021 13:05:30 - INFO - codeparrot_training - Step 42400: {'lr': 2.8158901542576192e-05, 'samples': 21709312, 'steps': 42400, 'batch_loss/train': 0.7337188143283129} +12/28/2021 13:05:40 - INFO - codeparrot_training - Step 42401: {'lr': 2.815163409427063e-05, 'samples': 21709824, 'steps': 42401, 'batch_loss/train': 0.7198491645976901} +12/28/2021 13:05:51 - INFO - codeparrot_training - Step 42402: {'lr': 2.814436752795041e-05, 'samples': 21710336, 'steps': 42402, 'batch_loss/train': 0.7903961362317204} +12/28/2021 13:06:05 - INFO - codeparrot_training - Step 42403: {'lr': 2.8137101843644346e-05, 'samples': 21710848, 'steps': 42403, 'batch_loss/train': 0.7523646233603358} +12/28/2021 13:06:15 - INFO - codeparrot_training - Step 42404: {'lr': 2.81298370413815e-05, 'samples': 21711360, 'steps': 42404, 'batch_loss/train': 0.7632668842561543} +12/28/2021 13:06:26 - INFO - codeparrot_training - Step 42405: {'lr': 2.812257312119057e-05, 'samples': 21711872, 'steps': 42405, 'batch_loss/train': 0.8658626191318035} +12/28/2021 13:06:38 - INFO - codeparrot_training - Step 42406: {'lr': 2.811531008310053e-05, 'samples': 21712384, 'steps': 42406, 'batch_loss/train': 0.6941713951528072} +12/28/2021 13:06:49 - INFO - codeparrot_training - Step 42407: {'lr': 2.8108047927140222e-05, 'samples': 21712896, 'steps': 42407, 'batch_loss/train': 0.6396227600052953} +12/28/2021 13:06:59 - INFO - codeparrot_training - Step 42408: {'lr': 2.810078665333854e-05, 'samples': 21713408, 'steps': 42408, 'batch_loss/train': 0.7414492616662756} +12/28/2021 13:07:10 - INFO - codeparrot_training - Step 42409: {'lr': 2.8093526261724344e-05, 'samples': 21713920, 'steps': 42409, 'batch_loss/train': 0.7695392481982708} +12/28/2021 13:07:22 - INFO - codeparrot_training - Step 42410: {'lr': 2.8086266752326505e-05, 'samples': 21714432, 'steps': 42410, 'batch_loss/train': 0.7083775659557432} +12/28/2021 13:07:33 - INFO - codeparrot_training - Step 42411: {'lr': 2.8079008125173887e-05, 'samples': 21714944, 'steps': 42411, 'batch_loss/train': 0.7399166310206056} +12/28/2021 13:07:44 - INFO - codeparrot_training - Step 42412: {'lr': 2.8071750380295304e-05, 'samples': 21715456, 'steps': 42412, 'batch_loss/train': 0.5194373440463096} +12/28/2021 13:07:58 - INFO - codeparrot_training - Step 42413: {'lr': 2.8064493517719724e-05, 'samples': 21715968, 'steps': 42413, 'batch_loss/train': 0.7017886964604259} +12/28/2021 13:08:08 - INFO - codeparrot_training - Step 42414: {'lr': 2.80572375374758e-05, 'samples': 21716480, 'steps': 42414, 'batch_loss/train': 0.7110953312367201} +12/28/2021 13:08:19 - INFO - codeparrot_training - Step 42415: {'lr': 2.8049982439592557e-05, 'samples': 21716992, 'steps': 42415, 'batch_loss/train': 0.6748038266086951} +12/28/2021 13:08:31 - INFO - codeparrot_training - Step 42416: {'lr': 2.804272822409884e-05, 'samples': 21717504, 'steps': 42416, 'batch_loss/train': 0.7488306472077966} +12/28/2021 13:08:42 - INFO - codeparrot_training - Step 42417: {'lr': 2.803547489102337e-05, 'samples': 21718016, 'steps': 42417, 'batch_loss/train': 0.701291523873806} +12/28/2021 13:08:52 - INFO - codeparrot_training - Step 42418: {'lr': 2.802822244039499e-05, 'samples': 21718528, 'steps': 42418, 'batch_loss/train': 0.8002557219006121} +12/28/2021 13:09:04 - INFO - codeparrot_training - Step 42419: {'lr': 2.8020970872242673e-05, 'samples': 21719040, 'steps': 42419, 'batch_loss/train': 0.6582194688962772} +12/28/2021 13:09:15 - INFO - codeparrot_training - Step 42420: {'lr': 2.801372018659512e-05, 'samples': 21719552, 'steps': 42420, 'batch_loss/train': 0.6918988130055368} +12/28/2021 13:09:25 - INFO - codeparrot_training - Step 42421: {'lr': 2.800647038348114e-05, 'samples': 21720064, 'steps': 42421, 'batch_loss/train': 0.7432077070698142} +12/28/2021 13:09:36 - INFO - codeparrot_training - Step 42422: {'lr': 2.7999221462929742e-05, 'samples': 21720576, 'steps': 42422, 'batch_loss/train': 0.7557685617357492} +12/28/2021 13:09:48 - INFO - codeparrot_training - Step 42423: {'lr': 2.7991973424969537e-05, 'samples': 21721088, 'steps': 42423, 'batch_loss/train': 0.5966391160618514} +12/28/2021 13:09:59 - INFO - codeparrot_training - Step 42424: {'lr': 2.798472626962942e-05, 'samples': 21721600, 'steps': 42424, 'batch_loss/train': 0.7843004586175084} +12/28/2021 13:10:10 - INFO - codeparrot_training - Step 42425: {'lr': 2.79774799969382e-05, 'samples': 21722112, 'steps': 42425, 'batch_loss/train': 0.7444934826344252} +12/28/2021 13:10:23 - INFO - codeparrot_training - Step 42426: {'lr': 2.797023460692469e-05, 'samples': 21722624, 'steps': 42426, 'batch_loss/train': 0.7060801875777543} +12/28/2021 13:10:34 - INFO - codeparrot_training - Step 42427: {'lr': 2.7962990099617697e-05, 'samples': 21723136, 'steps': 42427, 'batch_loss/train': 0.7796091232448816} +12/28/2021 13:10:44 - INFO - codeparrot_training - Step 42428: {'lr': 2.795574647504606e-05, 'samples': 21723648, 'steps': 42428, 'batch_loss/train': 0.6826468661893159} +12/28/2021 13:10:56 - INFO - codeparrot_training - Step 42429: {'lr': 2.7948503733238428e-05, 'samples': 21724160, 'steps': 42429, 'batch_loss/train': 0.6979129938408732} +12/28/2021 13:11:07 - INFO - codeparrot_training - Step 42430: {'lr': 2.7941261874223772e-05, 'samples': 21724672, 'steps': 42430, 'batch_loss/train': 0.6976593071594834} +12/28/2021 13:11:18 - INFO - codeparrot_training - Step 42431: {'lr': 2.7934020898030848e-05, 'samples': 21725184, 'steps': 42431, 'batch_loss/train': 0.6076366789638996} +12/28/2021 13:11:28 - INFO - codeparrot_training - Step 42432: {'lr': 2.7926780804688358e-05, 'samples': 21725696, 'steps': 42432, 'batch_loss/train': 0.7778000961989164} +12/28/2021 13:11:42 - INFO - codeparrot_training - Step 42433: {'lr': 2.791954159422516e-05, 'samples': 21726208, 'steps': 42433, 'batch_loss/train': 0.5401940706651658} +12/28/2021 13:11:53 - INFO - codeparrot_training - Step 42434: {'lr': 2.7912303266669992e-05, 'samples': 21726720, 'steps': 42434, 'batch_loss/train': 0.7710363729856908} +12/28/2021 13:12:04 - INFO - codeparrot_training - Step 42435: {'lr': 2.7905065822051627e-05, 'samples': 21727232, 'steps': 42435, 'batch_loss/train': 0.7484196410514414} +12/28/2021 13:12:16 - INFO - codeparrot_training - Step 42436: {'lr': 2.7897829260398882e-05, 'samples': 21727744, 'steps': 42436, 'batch_loss/train': 0.7881839587935247} +12/28/2021 13:12:26 - INFO - codeparrot_training - Step 42437: {'lr': 2.7890593581740508e-05, 'samples': 21728256, 'steps': 42437, 'batch_loss/train': 0.8240091092884541} +12/28/2021 13:12:37 - INFO - codeparrot_training - Step 42438: {'lr': 2.788335878610526e-05, 'samples': 21728768, 'steps': 42438, 'batch_loss/train': 0.6379252881743014} +12/28/2021 13:12:49 - INFO - codeparrot_training - Step 42439: {'lr': 2.7876124873521925e-05, 'samples': 21729280, 'steps': 42439, 'batch_loss/train': 1.3883983560372144} +12/28/2021 13:13:00 - INFO - codeparrot_training - Step 42440: {'lr': 2.786889184401925e-05, 'samples': 21729792, 'steps': 42440, 'batch_loss/train': 0.7381153553724289} +12/28/2021 13:13:11 - INFO - codeparrot_training - Step 42441: {'lr': 2.7861659697625967e-05, 'samples': 21730304, 'steps': 42441, 'batch_loss/train': 0.6488305772654712} +12/28/2021 13:13:21 - INFO - codeparrot_training - Step 42442: {'lr': 2.7854428434370915e-05, 'samples': 21730816, 'steps': 42442, 'batch_loss/train': 0.7901869025081396} +12/28/2021 13:13:35 - INFO - codeparrot_training - Step 42443: {'lr': 2.7847198054282707e-05, 'samples': 21731328, 'steps': 42443, 'batch_loss/train': 0.8341597723774612} +12/28/2021 13:13:46 - INFO - codeparrot_training - Step 42444: {'lr': 2.7839968557390126e-05, 'samples': 21731840, 'steps': 42444, 'batch_loss/train': 0.769114226102829} +12/28/2021 13:13:56 - INFO - codeparrot_training - Step 42445: {'lr': 2.783273994372204e-05, 'samples': 21732352, 'steps': 42445, 'batch_loss/train': 1.0118652950040996} +12/28/2021 13:14:08 - INFO - codeparrot_training - Step 42446: {'lr': 2.7825512213307036e-05, 'samples': 21732864, 'steps': 42446, 'batch_loss/train': 0.795604023616761} +12/28/2021 13:14:19 - INFO - codeparrot_training - Step 42447: {'lr': 2.7818285366173868e-05, 'samples': 21733376, 'steps': 42447, 'batch_loss/train': 0.722171665285714} +12/28/2021 13:14:30 - INFO - codeparrot_training - Step 42448: {'lr': 2.78110594023514e-05, 'samples': 21733888, 'steps': 42448, 'batch_loss/train': 0.7989903502166271} +12/28/2021 13:14:42 - INFO - codeparrot_training - Step 42449: {'lr': 2.78038343218682e-05, 'samples': 21734400, 'steps': 42449, 'batch_loss/train': 0.811983204446733} +12/28/2021 13:14:52 - INFO - codeparrot_training - Step 42450: {'lr': 2.779661012475307e-05, 'samples': 21734912, 'steps': 42450, 'batch_loss/train': 0.7251607356593013} +12/28/2021 13:15:03 - INFO - codeparrot_training - Step 42451: {'lr': 2.778938681103471e-05, 'samples': 21735424, 'steps': 42451, 'batch_loss/train': 0.6520466923248023} +12/28/2021 13:15:13 - INFO - codeparrot_training - Step 42452: {'lr': 2.7782164380741853e-05, 'samples': 21735936, 'steps': 42452, 'batch_loss/train': 0.625341618899256} +12/28/2021 13:15:27 - INFO - codeparrot_training - Step 42453: {'lr': 2.777494283390322e-05, 'samples': 21736448, 'steps': 42453, 'batch_loss/train': 0.6816876232624054} +12/28/2021 13:15:38 - INFO - codeparrot_training - Step 42454: {'lr': 2.7767722170547514e-05, 'samples': 21736960, 'steps': 42454, 'batch_loss/train': 0.7284287353977561} +12/28/2021 13:15:49 - INFO - codeparrot_training - Step 42455: {'lr': 2.7760502390703378e-05, 'samples': 21737472, 'steps': 42455, 'batch_loss/train': 0.6828666548244655} +12/28/2021 13:16:01 - INFO - codeparrot_training - Step 42456: {'lr': 2.7753283494399593e-05, 'samples': 21737984, 'steps': 42456, 'batch_loss/train': 0.7208312912844121} +12/28/2021 13:16:11 - INFO - codeparrot_training - Step 42457: {'lr': 2.774606548166489e-05, 'samples': 21738496, 'steps': 42457, 'batch_loss/train': 0.7134509230963886} +12/28/2021 13:16:22 - INFO - codeparrot_training - Step 42458: {'lr': 2.7738848352527822e-05, 'samples': 21739008, 'steps': 42458, 'batch_loss/train': 0.7382389204576612} +12/28/2021 13:16:34 - INFO - codeparrot_training - Step 42459: {'lr': 2.773163210701726e-05, 'samples': 21739520, 'steps': 42459, 'batch_loss/train': 0.7395894005894661} +12/28/2021 13:16:45 - INFO - codeparrot_training - Step 42460: {'lr': 2.772441674516174e-05, 'samples': 21740032, 'steps': 42460, 'batch_loss/train': 0.7557442770339549} +12/28/2021 13:16:56 - INFO - codeparrot_training - Step 42461: {'lr': 2.7717202266990038e-05, 'samples': 21740544, 'steps': 42461, 'batch_loss/train': 0.7884063720703125} +12/28/2021 13:17:06 - INFO - codeparrot_training - Step 42462: {'lr': 2.7709988672530802e-05, 'samples': 21741056, 'steps': 42462, 'batch_loss/train': 0.761058063886594} +12/28/2021 13:17:19 - INFO - codeparrot_training - Step 42463: {'lr': 2.7702775961812698e-05, 'samples': 21741568, 'steps': 42463, 'batch_loss/train': 0.9054101714864373} +12/28/2021 13:17:29 - INFO - codeparrot_training - Step 42464: {'lr': 2.769556413486443e-05, 'samples': 21742080, 'steps': 42464, 'batch_loss/train': 0.772924785502255} +12/28/2021 13:17:40 - INFO - codeparrot_training - Step 42465: {'lr': 2.768835319171467e-05, 'samples': 21742592, 'steps': 42465, 'batch_loss/train': 0.44834332005120814} +12/28/2021 13:17:54 - INFO - codeparrot_training - Step 42466: {'lr': 2.7681143132392062e-05, 'samples': 21743104, 'steps': 42466, 'batch_loss/train': 0.9216730322223157} +12/28/2021 13:18:05 - INFO - codeparrot_training - Step 42467: {'lr': 2.76739339569253e-05, 'samples': 21743616, 'steps': 42467, 'batch_loss/train': 0.6955186887644231} +12/28/2021 13:18:15 - INFO - codeparrot_training - Step 42468: {'lr': 2.766672566534309e-05, 'samples': 21744128, 'steps': 42468, 'batch_loss/train': 0.7795752054080367} +12/28/2021 13:18:27 - INFO - codeparrot_training - Step 42469: {'lr': 2.765951825767396e-05, 'samples': 21744640, 'steps': 42469, 'batch_loss/train': 0.6523179637733847} +12/28/2021 13:18:38 - INFO - codeparrot_training - Step 42470: {'lr': 2.7652311733946584e-05, 'samples': 21745152, 'steps': 42470, 'batch_loss/train': 0.821408633608371} +12/28/2021 13:18:49 - INFO - codeparrot_training - Step 42471: {'lr': 2.7645106094189742e-05, 'samples': 21745664, 'steps': 42471, 'batch_loss/train': 0.6106278292136267} +12/28/2021 13:18:59 - INFO - codeparrot_training - Step 42472: {'lr': 2.763790133843197e-05, 'samples': 21746176, 'steps': 42472, 'batch_loss/train': 0.7613845709711313} +12/28/2021 13:19:14 - INFO - codeparrot_training - Step 42473: {'lr': 2.763069746670191e-05, 'samples': 21746688, 'steps': 42473, 'batch_loss/train': 0.6807342073880136} +12/28/2021 13:19:25 - INFO - codeparrot_training - Step 42474: {'lr': 2.7623494479028317e-05, 'samples': 21747200, 'steps': 42474, 'batch_loss/train': 0.6475966391153634} +12/28/2021 13:19:35 - INFO - codeparrot_training - Step 42475: {'lr': 2.7616292375439693e-05, 'samples': 21747712, 'steps': 42475, 'batch_loss/train': 0.8158623361960053} +12/28/2021 13:19:48 - INFO - codeparrot_training - Step 42476: {'lr': 2.7609091155964717e-05, 'samples': 21748224, 'steps': 42476, 'batch_loss/train': 1.9549901261925697} +12/28/2021 13:19:58 - INFO - codeparrot_training - Step 42477: {'lr': 2.7601890820632025e-05, 'samples': 21748736, 'steps': 42477, 'batch_loss/train': 0.7157039120793343} +12/28/2021 13:20:09 - INFO - codeparrot_training - Step 42478: {'lr': 2.7594691369470236e-05, 'samples': 21749248, 'steps': 42478, 'batch_loss/train': 0.7185192593606189} +12/28/2021 13:20:19 - INFO - codeparrot_training - Step 42479: {'lr': 2.7587492802507997e-05, 'samples': 21749760, 'steps': 42479, 'batch_loss/train': 0.7785386312752962} +12/28/2021 13:20:31 - INFO - codeparrot_training - Step 42480: {'lr': 2.7580295119773946e-05, 'samples': 21750272, 'steps': 42480, 'batch_loss/train': 0.8081485666334629} +12/28/2021 13:20:42 - INFO - codeparrot_training - Step 42481: {'lr': 2.7573098321296537e-05, 'samples': 21750784, 'steps': 42481, 'batch_loss/train': 0.7080979589372873} +12/28/2021 13:20:53 - INFO - codeparrot_training - Step 42482: {'lr': 2.7565902407104575e-05, 'samples': 21751296, 'steps': 42482, 'batch_loss/train': 0.7943774070590734} +12/28/2021 13:21:07 - INFO - codeparrot_training - Step 42483: {'lr': 2.7558707377226655e-05, 'samples': 21751808, 'steps': 42483, 'batch_loss/train': 0.6869534244760871} +12/28/2021 13:21:18 - INFO - codeparrot_training - Step 42484: {'lr': 2.7551513231691224e-05, 'samples': 21752320, 'steps': 42484, 'batch_loss/train': 0.4808771114330739} +12/28/2021 13:21:28 - INFO - codeparrot_training - Step 42485: {'lr': 2.754431997052706e-05, 'samples': 21752832, 'steps': 42485, 'batch_loss/train': 0.7974307769909501} +12/28/2021 13:21:40 - INFO - codeparrot_training - Step 42486: {'lr': 2.7537127593762644e-05, 'samples': 21753344, 'steps': 42486, 'batch_loss/train': 0.758601248729974} +12/28/2021 13:21:51 - INFO - codeparrot_training - Step 42487: {'lr': 2.7529936101426623e-05, 'samples': 21753856, 'steps': 42487, 'batch_loss/train': 0.7834160490892828} +12/28/2021 13:22:02 - INFO - codeparrot_training - Step 42488: {'lr': 2.752274549354755e-05, 'samples': 21754368, 'steps': 42488, 'batch_loss/train': 0.7649109102785587} +12/28/2021 13:22:12 - INFO - codeparrot_training - Step 42489: {'lr': 2.7515555770154077e-05, 'samples': 21754880, 'steps': 42489, 'batch_loss/train': 0.6474554045125842} +12/28/2021 13:22:26 - INFO - codeparrot_training - Step 42490: {'lr': 2.7508366931274703e-05, 'samples': 21755392, 'steps': 42490, 'batch_loss/train': 0.7728499453514814} +12/28/2021 13:22:37 - INFO - codeparrot_training - Step 42491: {'lr': 2.7501178976938074e-05, 'samples': 21755904, 'steps': 42491, 'batch_loss/train': 0.7670481922104955} +12/28/2021 13:22:47 - INFO - codeparrot_training - Step 42492: {'lr': 2.749399190717275e-05, 'samples': 21756416, 'steps': 42492, 'batch_loss/train': 0.8402541382238269} +12/28/2021 13:23:00 - INFO - codeparrot_training - Step 42493: {'lr': 2.7486805722007292e-05, 'samples': 21756928, 'steps': 42493, 'batch_loss/train': 0.7798980223014951} +12/28/2021 13:23:10 - INFO - codeparrot_training - Step 42494: {'lr': 2.7479620421470315e-05, 'samples': 21757440, 'steps': 42494, 'batch_loss/train': 0.6654002303257585} +12/28/2021 13:23:21 - INFO - codeparrot_training - Step 42495: {'lr': 2.7472436005590268e-05, 'samples': 21757952, 'steps': 42495, 'batch_loss/train': 0.7354262848384678} +12/28/2021 13:23:33 - INFO - codeparrot_training - Step 42496: {'lr': 2.7465252474395823e-05, 'samples': 21758464, 'steps': 42496, 'batch_loss/train': 0.7384862648323178} +12/28/2021 13:23:44 - INFO - codeparrot_training - Step 42497: {'lr': 2.7458069827915572e-05, 'samples': 21758976, 'steps': 42497, 'batch_loss/train': 0.8084387555718422} +12/28/2021 13:23:54 - INFO - codeparrot_training - Step 42498: {'lr': 2.7450888066177958e-05, 'samples': 21759488, 'steps': 42498, 'batch_loss/train': 0.6762193748727441} +12/28/2021 13:24:05 - INFO - codeparrot_training - Step 42499: {'lr': 2.744370718921152e-05, 'samples': 21760000, 'steps': 42499, 'batch_loss/train': 0.7796046268194914} +12/28/2021 13:24:05 - INFO - codeparrot_training - Evaluating and saving model checkpoint +12/28/2021 13:27:27 - INFO - codeparrot_training - Step 42500: {'loss/eval': 0.7435383200645447, 'perplexity': 2.1033647060394287}