diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -4325,3 +4325,1009 @@ Use FP16 precision: False 02/24/2022 06:44:24 - INFO - codeparrot_training - Step 3998: {'lr': 0.0004978654846795748, 'samples': 2047488, 'steps': 3998, 'loss/train': 2.3059802055358887} 02/24/2022 06:44:29 - INFO - codeparrot_training - Step 3999: {'lr': 0.0004978633505423976, 'samples': 2048000, 'steps': 3999, 'loss/train': 2.555680751800537} 02/24/2022 06:44:29 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/24/2022 06:44:46 - WARNING - huggingface_hub.repository - Several commits (4) will be pushed upstream. +02/24/2022 06:44:46 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/24/2022 06:45:19 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + 292ce9a..f6d96e1 floral-grass-11 -> floral-grass-11 + +02/24/2022 06:45:25 - INFO - codeparrot_training - Step 4000: {'lr': 0.0004978612153434526, 'samples': 2048512, 'steps': 4000, 'loss/train': 1.766768217086792} +02/24/2022 06:45:29 - INFO - codeparrot_training - Step 4001: {'lr': 0.0004978590790827488, 'samples': 2049024, 'steps': 4001, 'loss/train': 2.1696391105651855} +02/24/2022 06:45:34 - INFO - codeparrot_training - Step 4002: {'lr': 0.0004978569417602955, 'samples': 2049536, 'steps': 4002, 'loss/train': 3.4861037731170654} +02/24/2022 06:45:38 - INFO - codeparrot_training - Step 4003: {'lr': 0.0004978548033761017, 'samples': 2050048, 'steps': 4003, 'loss/train': 2.9585325717926025} +02/24/2022 06:45:43 - INFO - codeparrot_training - Step 4004: {'lr': 0.0004978526639301766, 'samples': 2050560, 'steps': 4004, 'loss/train': 2.1167385578155518} +02/24/2022 06:45:47 - INFO - codeparrot_training - Step 4005: {'lr': 0.0004978505234225294, 'samples': 2051072, 'steps': 4005, 'loss/train': 2.0740511417388916} +02/24/2022 06:45:53 - INFO - codeparrot_training - Step 4006: {'lr': 0.0004978483818531693, 'samples': 2051584, 'steps': 4006, 'loss/train': 2.959043025970459} +02/24/2022 06:45:56 - INFO - codeparrot_training - Step 4007: {'lr': 0.0004978462392221054, 'samples': 2052096, 'steps': 4007, 'loss/train': 2.3407022953033447} +02/24/2022 06:46:02 - INFO - codeparrot_training - Step 4008: {'lr': 0.0004978440955293468, 'samples': 2052608, 'steps': 4008, 'loss/train': 2.577820062637329} +02/24/2022 06:46:06 - INFO - codeparrot_training - Step 4009: {'lr': 0.000497841950774903, 'samples': 2053120, 'steps': 4009, 'loss/train': 2.4661107063293457} +02/24/2022 06:46:11 - INFO - codeparrot_training - Step 4010: {'lr': 0.0004978398049587828, 'samples': 2053632, 'steps': 4010, 'loss/train': 1.9514811038970947} +02/24/2022 06:46:15 - INFO - codeparrot_training - Step 4011: {'lr': 0.0004978376580809957, 'samples': 2054144, 'steps': 4011, 'loss/train': 2.8353562355041504} +02/24/2022 06:46:20 - INFO - codeparrot_training - Step 4012: {'lr': 0.0004978355101415507, 'samples': 2054656, 'steps': 4012, 'loss/train': 3.286431312561035} +02/24/2022 06:46:23 - INFO - codeparrot_training - Step 4013: {'lr': 0.0004978333611404571, 'samples': 2055168, 'steps': 4013, 'loss/train': 1.1409614086151123} +02/24/2022 06:46:29 - INFO - codeparrot_training - Step 4014: {'lr': 0.0004978312110777241, 'samples': 2055680, 'steps': 4014, 'loss/train': 0.5710865259170532} +02/24/2022 06:46:32 - INFO - codeparrot_training - Step 4015: {'lr': 0.0004978290599533609, 'samples': 2056192, 'steps': 4015, 'loss/train': 2.760190010070801} +02/24/2022 06:46:39 - INFO - codeparrot_training - Step 4016: {'lr': 0.0004978269077673766, 'samples': 2056704, 'steps': 4016, 'loss/train': 2.6205389499664307} +02/24/2022 06:46:42 - INFO - codeparrot_training - Step 4017: {'lr': 0.0004978247545197806, 'samples': 2057216, 'steps': 4017, 'loss/train': 1.9146541357040405} +02/24/2022 06:46:48 - INFO - codeparrot_training - Step 4018: {'lr': 0.0004978226002105821, 'samples': 2057728, 'steps': 4018, 'loss/train': 2.279057264328003} +02/24/2022 06:46:51 - INFO - codeparrot_training - Step 4019: {'lr': 0.0004978204448397902, 'samples': 2058240, 'steps': 4019, 'loss/train': 2.3974881172180176} +02/24/2022 06:46:57 - INFO - codeparrot_training - Step 4020: {'lr': 0.0004978182884074142, 'samples': 2058752, 'steps': 4020, 'loss/train': 3.1385862827301025} +02/24/2022 06:47:00 - INFO - codeparrot_training - Step 4021: {'lr': 0.0004978161309134633, 'samples': 2059264, 'steps': 4021, 'loss/train': 2.3737213611602783} +02/24/2022 06:47:06 - INFO - codeparrot_training - Step 4022: {'lr': 0.0004978139723579469, 'samples': 2059776, 'steps': 4022, 'loss/train': 4.163173198699951} +02/24/2022 06:47:09 - INFO - codeparrot_training - Step 4023: {'lr': 0.0004978118127408741, 'samples': 2060288, 'steps': 4023, 'loss/train': 1.2128463983535767} +02/24/2022 06:47:15 - INFO - codeparrot_training - Step 4024: {'lr': 0.0004978096520622541, 'samples': 2060800, 'steps': 4024, 'loss/train': 1.8585034608840942} +02/24/2022 06:47:18 - INFO - codeparrot_training - Step 4025: {'lr': 0.0004978074903220964, 'samples': 2061312, 'steps': 4025, 'loss/train': 2.923572063446045} +02/24/2022 06:47:24 - INFO - codeparrot_training - Step 4026: {'lr': 0.0004978053275204099, 'samples': 2061824, 'steps': 4026, 'loss/train': 1.7987560033798218} +02/24/2022 06:47:28 - INFO - codeparrot_training - Step 4027: {'lr': 0.0004978031636572042, 'samples': 2062336, 'steps': 4027, 'loss/train': 2.458836078643799} +02/24/2022 06:47:33 - INFO - codeparrot_training - Step 4028: {'lr': 0.0004978009987324884, 'samples': 2062848, 'steps': 4028, 'loss/train': 3.3272807598114014} +02/24/2022 06:47:37 - INFO - codeparrot_training - Step 4029: {'lr': 0.0004977988327462718, 'samples': 2063360, 'steps': 4029, 'loss/train': 2.1616814136505127} +02/24/2022 06:47:42 - INFO - codeparrot_training - Step 4030: {'lr': 0.0004977966656985637, 'samples': 2063872, 'steps': 4030, 'loss/train': 2.478296995162964} +02/24/2022 06:47:46 - INFO - codeparrot_training - Step 4031: {'lr': 0.0004977944975893733, 'samples': 2064384, 'steps': 4031, 'loss/train': 1.5593154430389404} +02/24/2022 06:47:51 - INFO - codeparrot_training - Step 4032: {'lr': 0.00049779232841871, 'samples': 2064896, 'steps': 4032, 'loss/train': 2.3311731815338135} +02/24/2022 06:47:55 - INFO - codeparrot_training - Step 4033: {'lr': 0.0004977901581865831, 'samples': 2065408, 'steps': 4033, 'loss/train': 2.844867467880249} +02/24/2022 06:48:00 - INFO - codeparrot_training - Step 4034: {'lr': 0.0004977879868930018, 'samples': 2065920, 'steps': 4034, 'loss/train': 2.878805637359619} +02/24/2022 06:48:04 - INFO - codeparrot_training - Step 4035: {'lr': 0.0004977858145379754, 'samples': 2066432, 'steps': 4035, 'loss/train': 2.0401692390441895} +02/24/2022 06:48:09 - INFO - codeparrot_training - Step 4036: {'lr': 0.0004977836411215133, 'samples': 2066944, 'steps': 4036, 'loss/train': 2.3467581272125244} +02/24/2022 06:48:13 - INFO - codeparrot_training - Step 4037: {'lr': 0.0004977814666436248, 'samples': 2067456, 'steps': 4037, 'loss/train': 1.9562848806381226} +02/24/2022 06:48:18 - INFO - codeparrot_training - Step 4038: {'lr': 0.0004977792911043191, 'samples': 2067968, 'steps': 4038, 'loss/train': 3.3585565090179443} +02/24/2022 06:48:21 - INFO - codeparrot_training - Step 4039: {'lr': 0.0004977771145036056, 'samples': 2068480, 'steps': 4039, 'loss/train': 2.3639299869537354} +02/24/2022 06:48:27 - INFO - codeparrot_training - Step 4040: {'lr': 0.0004977749368414937, 'samples': 2068992, 'steps': 4040, 'loss/train': 2.498365640640259} +02/24/2022 06:48:30 - INFO - codeparrot_training - Step 4041: {'lr': 0.0004977727581179926, 'samples': 2069504, 'steps': 4041, 'loss/train': 4.177948474884033} +02/24/2022 06:48:37 - INFO - codeparrot_training - Step 4042: {'lr': 0.0004977705783331117, 'samples': 2070016, 'steps': 4042, 'loss/train': 3.2485718727111816} +02/24/2022 06:48:40 - INFO - codeparrot_training - Step 4043: {'lr': 0.0004977683974868603, 'samples': 2070528, 'steps': 4043, 'loss/train': 3.354999303817749} +02/24/2022 06:48:45 - INFO - codeparrot_training - Step 4044: {'lr': 0.0004977662155792478, 'samples': 2071040, 'steps': 4044, 'loss/train': 3.3551292419433594} +02/24/2022 06:48:49 - INFO - codeparrot_training - Step 4045: {'lr': 0.0004977640326102834, 'samples': 2071552, 'steps': 4045, 'loss/train': 2.6851553916931152} +02/24/2022 06:48:54 - INFO - codeparrot_training - Step 4046: {'lr': 0.0004977618485799767, 'samples': 2072064, 'steps': 4046, 'loss/train': 3.9697463512420654} +02/24/2022 06:48:58 - INFO - codeparrot_training - Step 4047: {'lr': 0.0004977596634883368, 'samples': 2072576, 'steps': 4047, 'loss/train': 3.1436359882354736} +02/24/2022 06:49:03 - INFO - codeparrot_training - Step 4048: {'lr': 0.0004977574773353732, 'samples': 2073088, 'steps': 4048, 'loss/train': 3.32637357711792} +02/24/2022 06:49:07 - INFO - codeparrot_training - Step 4049: {'lr': 0.0004977552901210952, 'samples': 2073600, 'steps': 4049, 'loss/train': 1.3028780221939087} +02/24/2022 06:49:12 - INFO - codeparrot_training - Step 4050: {'lr': 0.0004977531018455124, 'samples': 2074112, 'steps': 4050, 'loss/train': 2.07358455657959} +02/24/2022 06:49:16 - INFO - codeparrot_training - Step 4051: {'lr': 0.0004977509125086338, 'samples': 2074624, 'steps': 4051, 'loss/train': 3.3087549209594727} +02/24/2022 06:49:22 - INFO - codeparrot_training - Step 4052: {'lr': 0.000497748722110469, 'samples': 2075136, 'steps': 4052, 'loss/train': 3.6010215282440186} +02/24/2022 06:49:26 - INFO - codeparrot_training - Step 4053: {'lr': 0.0004977465306510273, 'samples': 2075648, 'steps': 4053, 'loss/train': 3.058358907699585} +02/24/2022 06:49:31 - INFO - codeparrot_training - Step 4054: {'lr': 0.0004977443381303182, 'samples': 2076160, 'steps': 4054, 'loss/train': 2.0042192935943604} +02/24/2022 06:49:34 - INFO - codeparrot_training - Step 4055: {'lr': 0.000497742144548351, 'samples': 2076672, 'steps': 4055, 'loss/train': 2.343111276626587} +02/24/2022 06:49:40 - INFO - codeparrot_training - Step 4056: {'lr': 0.0004977399499051351, 'samples': 2077184, 'steps': 4056, 'loss/train': 2.196147918701172} +02/24/2022 06:49:43 - INFO - codeparrot_training - Step 4057: {'lr': 0.0004977377542006799, 'samples': 2077696, 'steps': 4057, 'loss/train': 1.5491563081741333} +02/24/2022 06:49:49 - INFO - codeparrot_training - Step 4058: {'lr': 0.0004977355574349949, 'samples': 2078208, 'steps': 4058, 'loss/train': 2.3003621101379395} +02/24/2022 06:49:53 - INFO - codeparrot_training - Step 4059: {'lr': 0.0004977333596080894, 'samples': 2078720, 'steps': 4059, 'loss/train': 1.8863720893859863} +02/24/2022 06:49:58 - INFO - codeparrot_training - Step 4060: {'lr': 0.0004977311607199729, 'samples': 2079232, 'steps': 4060, 'loss/train': 3.0794272422790527} +02/24/2022 06:50:01 - INFO - codeparrot_training - Step 4061: {'lr': 0.0004977289607706547, 'samples': 2079744, 'steps': 4061, 'loss/train': 3.6320080757141113} +02/24/2022 06:50:09 - INFO - codeparrot_training - Step 4062: {'lr': 0.0004977267597601443, 'samples': 2080256, 'steps': 4062, 'loss/train': 2.693032741546631} +02/24/2022 06:50:12 - INFO - codeparrot_training - Step 4063: {'lr': 0.0004977245576884511, 'samples': 2080768, 'steps': 4063, 'loss/train': 1.1405690908432007} +02/24/2022 06:50:17 - INFO - codeparrot_training - Step 4064: {'lr': 0.0004977223545555847, 'samples': 2081280, 'steps': 4064, 'loss/train': 2.3361263275146484} +02/24/2022 06:50:21 - INFO - codeparrot_training - Step 4065: {'lr': 0.0004977201503615543, 'samples': 2081792, 'steps': 4065, 'loss/train': 3.531859874725342} +02/24/2022 06:50:26 - INFO - codeparrot_training - Step 4066: {'lr': 0.0004977179451063694, 'samples': 2082304, 'steps': 4066, 'loss/train': 2.254673480987549} +02/24/2022 06:50:30 - INFO - codeparrot_training - Step 4067: {'lr': 0.0004977157387900395, 'samples': 2082816, 'steps': 4067, 'loss/train': 2.790304660797119} +02/24/2022 06:50:35 - INFO - codeparrot_training - Step 4068: {'lr': 0.0004977135314125741, 'samples': 2083328, 'steps': 4068, 'loss/train': 1.7385773658752441} +02/24/2022 06:50:39 - INFO - codeparrot_training - Step 4069: {'lr': 0.0004977113229739825, 'samples': 2083840, 'steps': 4069, 'loss/train': 2.9903883934020996} +02/24/2022 06:50:45 - INFO - codeparrot_training - Step 4070: {'lr': 0.0004977091134742743, 'samples': 2084352, 'steps': 4070, 'loss/train': 2.2825770378112793} +02/24/2022 06:50:48 - INFO - codeparrot_training - Step 4071: {'lr': 0.0004977069029134588, 'samples': 2084864, 'steps': 4071, 'loss/train': 1.849504828453064} +02/24/2022 06:50:54 - INFO - codeparrot_training - Step 4072: {'lr': 0.0004977046912915458, 'samples': 2085376, 'steps': 4072, 'loss/train': 2.6642861366271973} +02/24/2022 06:50:57 - INFO - codeparrot_training - Step 4073: {'lr': 0.0004977024786085444, 'samples': 2085888, 'steps': 4073, 'loss/train': 3.099130630493164} +02/24/2022 06:51:03 - INFO - codeparrot_training - Step 4074: {'lr': 0.0004977002648644642, 'samples': 2086400, 'steps': 4074, 'loss/train': 2.609877347946167} +02/24/2022 06:51:06 - INFO - codeparrot_training - Step 4075: {'lr': 0.0004976980500593149, 'samples': 2086912, 'steps': 4075, 'loss/train': 2.702996253967285} +02/24/2022 06:51:12 - INFO - codeparrot_training - Step 4076: {'lr': 0.0004976958341931057, 'samples': 2087424, 'steps': 4076, 'loss/train': 3.115689754486084} +02/24/2022 06:51:15 - INFO - codeparrot_training - Step 4077: {'lr': 0.0004976936172658462, 'samples': 2087936, 'steps': 4077, 'loss/train': 0.3984140455722809} +02/24/2022 06:51:21 - INFO - codeparrot_training - Step 4078: {'lr': 0.0004976913992775459, 'samples': 2088448, 'steps': 4078, 'loss/train': 2.721973419189453} +02/24/2022 06:51:25 - INFO - codeparrot_training - Step 4079: {'lr': 0.0004976891802282143, 'samples': 2088960, 'steps': 4079, 'loss/train': 3.176347494125366} +02/24/2022 06:51:30 - INFO - codeparrot_training - Step 4080: {'lr': 0.0004976869601178609, 'samples': 2089472, 'steps': 4080, 'loss/train': 3.010443687438965} +02/24/2022 06:51:34 - INFO - codeparrot_training - Step 4081: {'lr': 0.0004976847389464952, 'samples': 2089984, 'steps': 4081, 'loss/train': 2.6185731887817383} +02/24/2022 06:51:39 - INFO - codeparrot_training - Step 4082: {'lr': 0.0004976825167141268, 'samples': 2090496, 'steps': 4082, 'loss/train': 2.216217279434204} +02/24/2022 06:51:43 - INFO - codeparrot_training - Step 4083: {'lr': 0.000497680293420765, 'samples': 2091008, 'steps': 4083, 'loss/train': 1.74923574924469} +02/24/2022 06:51:48 - INFO - codeparrot_training - Step 4084: {'lr': 0.0004976780690664196, 'samples': 2091520, 'steps': 4084, 'loss/train': 2.7566394805908203} +02/24/2022 06:51:52 - INFO - codeparrot_training - Step 4085: {'lr': 0.0004976758436511, 'samples': 2092032, 'steps': 4085, 'loss/train': 0.7061077356338501} +02/24/2022 06:51:57 - INFO - codeparrot_training - Step 4086: {'lr': 0.0004976736171748156, 'samples': 2092544, 'steps': 4086, 'loss/train': 2.711764335632324} +02/24/2022 06:52:01 - INFO - codeparrot_training - Step 4087: {'lr': 0.0004976713896375762, 'samples': 2093056, 'steps': 4087, 'loss/train': 3.8529140949249268} +02/24/2022 06:52:07 - INFO - codeparrot_training - Step 4088: {'lr': 0.0004976691610393911, 'samples': 2093568, 'steps': 4088, 'loss/train': 4.321038246154785} +02/24/2022 06:52:11 - INFO - codeparrot_training - Step 4089: {'lr': 0.0004976669313802701, 'samples': 2094080, 'steps': 4089, 'loss/train': 2.645718812942505} +02/24/2022 06:52:16 - INFO - codeparrot_training - Step 4090: {'lr': 0.0004976647006602225, 'samples': 2094592, 'steps': 4090, 'loss/train': 3.2641632556915283} +02/24/2022 06:52:20 - INFO - codeparrot_training - Step 4091: {'lr': 0.0004976624688792581, 'samples': 2095104, 'steps': 4091, 'loss/train': 3.3384528160095215} +02/24/2022 06:52:25 - INFO - codeparrot_training - Step 4092: {'lr': 0.0004976602360373861, 'samples': 2095616, 'steps': 4092, 'loss/train': 3.3506109714508057} +02/24/2022 06:52:29 - INFO - codeparrot_training - Step 4093: {'lr': 0.0004976580021346164, 'samples': 2096128, 'steps': 4093, 'loss/train': 2.810047149658203} +02/24/2022 06:52:34 - INFO - codeparrot_training - Step 4094: {'lr': 0.0004976557671709585, 'samples': 2096640, 'steps': 4094, 'loss/train': 0.20184071362018585} +02/24/2022 06:52:40 - INFO - codeparrot_training - Step 4095: {'lr': 0.0004976535311464219, 'samples': 2097152, 'steps': 4095, 'loss/train': 3.3932225704193115} +02/24/2022 06:52:43 - INFO - codeparrot_training - Step 4096: {'lr': 0.0004976512940610162, 'samples': 2097664, 'steps': 4096, 'loss/train': 2.8875722885131836} +02/24/2022 06:52:50 - INFO - codeparrot_training - Step 4097: {'lr': 0.0004976490559147511, 'samples': 2098176, 'steps': 4097, 'loss/train': 1.573515772819519} +02/24/2022 06:52:53 - INFO - codeparrot_training - Step 4098: {'lr': 0.0004976468167076359, 'samples': 2098688, 'steps': 4098, 'loss/train': 1.7893885374069214} +02/24/2022 06:52:59 - INFO - codeparrot_training - Step 4099: {'lr': 0.0004976445764396805, 'samples': 2099200, 'steps': 4099, 'loss/train': 3.0921425819396973} +02/24/2022 06:53:02 - INFO - codeparrot_training - Step 4100: {'lr': 0.0004976423351108943, 'samples': 2099712, 'steps': 4100, 'loss/train': 3.3553755283355713} +02/24/2022 06:53:08 - INFO - codeparrot_training - Step 4101: {'lr': 0.0004976400927212871, 'samples': 2100224, 'steps': 4101, 'loss/train': 2.4786128997802734} +02/24/2022 06:53:11 - INFO - codeparrot_training - Step 4102: {'lr': 0.0004976378492708681, 'samples': 2100736, 'steps': 4102, 'loss/train': 3.5474250316619873} +02/24/2022 06:53:17 - INFO - codeparrot_training - Step 4103: {'lr': 0.0004976356047596475, 'samples': 2101248, 'steps': 4103, 'loss/train': 3.0893149375915527} +02/24/2022 06:53:20 - INFO - codeparrot_training - Step 4104: {'lr': 0.0004976333591876344, 'samples': 2101760, 'steps': 4104, 'loss/train': 1.6089959144592285} +02/24/2022 06:53:26 - INFO - codeparrot_training - Step 4105: {'lr': 0.0004976311125548387, 'samples': 2102272, 'steps': 4105, 'loss/train': 1.4295350313186646} +02/24/2022 06:53:29 - INFO - codeparrot_training - Step 4106: {'lr': 0.00049762886486127, 'samples': 2102784, 'steps': 4106, 'loss/train': 2.4978926181793213} +02/24/2022 06:53:35 - INFO - codeparrot_training - Step 4107: {'lr': 0.0004976266161069379, 'samples': 2103296, 'steps': 4107, 'loss/train': 2.4716033935546875} +02/24/2022 06:53:39 - INFO - codeparrot_training - Step 4108: {'lr': 0.0004976243662918518, 'samples': 2103808, 'steps': 4108, 'loss/train': 2.882061719894409} +02/24/2022 06:53:44 - INFO - codeparrot_training - Step 4109: {'lr': 0.0004976221154160217, 'samples': 2104320, 'steps': 4109, 'loss/train': 3.3347325325012207} +02/24/2022 06:53:48 - INFO - codeparrot_training - Step 4110: {'lr': 0.0004976198634794571, 'samples': 2104832, 'steps': 4110, 'loss/train': 2.1972203254699707} +02/24/2022 06:53:53 - INFO - codeparrot_training - Step 4111: {'lr': 0.0004976176104821675, 'samples': 2105344, 'steps': 4111, 'loss/train': 3.1521363258361816} +02/24/2022 06:53:57 - INFO - codeparrot_training - Step 4112: {'lr': 0.0004976153564241628, 'samples': 2105856, 'steps': 4112, 'loss/train': 2.865093231201172} +02/24/2022 06:54:02 - INFO - codeparrot_training - Step 4113: {'lr': 0.0004976131013054526, 'samples': 2106368, 'steps': 4113, 'loss/train': 2.075465202331543} +02/24/2022 06:54:06 - INFO - codeparrot_training - Step 4114: {'lr': 0.0004976108451260464, 'samples': 2106880, 'steps': 4114, 'loss/train': 1.5532543659210205} +02/24/2022 06:54:11 - INFO - codeparrot_training - Step 4115: {'lr': 0.000497608587885954, 'samples': 2107392, 'steps': 4115, 'loss/train': 3.418349027633667} +02/24/2022 06:54:15 - INFO - codeparrot_training - Step 4116: {'lr': 0.0004976063295851849, 'samples': 2107904, 'steps': 4116, 'loss/train': 0.24530726671218872} +02/24/2022 06:54:20 - INFO - codeparrot_training - Step 4117: {'lr': 0.000497604070223749, 'samples': 2108416, 'steps': 4117, 'loss/train': 3.3822782039642334} +02/24/2022 06:54:24 - INFO - codeparrot_training - Step 4118: {'lr': 0.0004976018098016559, 'samples': 2108928, 'steps': 4118, 'loss/train': 3.072525978088379} +02/24/2022 06:54:29 - INFO - codeparrot_training - Step 4119: {'lr': 0.0004975995483189153, 'samples': 2109440, 'steps': 4119, 'loss/train': 3.4508750438690186} +02/24/2022 06:54:33 - INFO - codeparrot_training - Step 4120: {'lr': 0.0004975972857755368, 'samples': 2109952, 'steps': 4120, 'loss/train': 3.0615954399108887} +02/24/2022 06:54:38 - INFO - codeparrot_training - Step 4121: {'lr': 0.0004975950221715302, 'samples': 2110464, 'steps': 4121, 'loss/train': 1.7298758029937744} +02/24/2022 06:54:42 - INFO - codeparrot_training - Step 4122: {'lr': 0.0004975927575069051, 'samples': 2110976, 'steps': 4122, 'loss/train': 2.7990801334381104} +02/24/2022 06:54:47 - INFO - codeparrot_training - Step 4123: {'lr': 0.0004975904917816713, 'samples': 2111488, 'steps': 4123, 'loss/train': 3.235246181488037} +02/24/2022 06:54:51 - INFO - codeparrot_training - Step 4124: {'lr': 0.0004975882249958385, 'samples': 2112000, 'steps': 4124, 'loss/train': 3.029106616973877} +02/24/2022 06:54:57 - INFO - codeparrot_training - Step 4125: {'lr': 0.0004975859571494162, 'samples': 2112512, 'steps': 4125, 'loss/train': 3.2794747352600098} +02/24/2022 06:55:01 - INFO - codeparrot_training - Step 4126: {'lr': 0.0004975836882424143, 'samples': 2113024, 'steps': 4126, 'loss/train': 3.6696321964263916} +02/24/2022 06:55:06 - INFO - codeparrot_training - Step 4127: {'lr': 0.0004975814182748426, 'samples': 2113536, 'steps': 4127, 'loss/train': 2.5619986057281494} +02/24/2022 06:55:10 - INFO - codeparrot_training - Step 4128: {'lr': 0.0004975791472467108, 'samples': 2114048, 'steps': 4128, 'loss/train': 3.685267686843872} +02/24/2022 06:55:15 - INFO - codeparrot_training - Step 4129: {'lr': 0.0004975768751580283, 'samples': 2114560, 'steps': 4129, 'loss/train': 1.5730431079864502} +02/24/2022 06:55:19 - INFO - codeparrot_training - Step 4130: {'lr': 0.0004975746020088052, 'samples': 2115072, 'steps': 4130, 'loss/train': 2.3806216716766357} +02/24/2022 06:55:24 - INFO - codeparrot_training - Step 4131: {'lr': 0.0004975723277990512, 'samples': 2115584, 'steps': 4131, 'loss/train': 2.7863998413085938} +02/24/2022 06:55:28 - INFO - codeparrot_training - Step 4132: {'lr': 0.0004975700525287758, 'samples': 2116096, 'steps': 4132, 'loss/train': 2.579864025115967} +02/24/2022 06:55:33 - INFO - codeparrot_training - Step 4133: {'lr': 0.0004975677761979891, 'samples': 2116608, 'steps': 4133, 'loss/train': 1.7505260705947876} +02/24/2022 06:55:37 - INFO - codeparrot_training - Step 4134: {'lr': 0.0004975654988067005, 'samples': 2117120, 'steps': 4134, 'loss/train': 3.4967141151428223} +02/24/2022 06:55:43 - INFO - codeparrot_training - Step 4135: {'lr': 0.00049756322035492, 'samples': 2117632, 'steps': 4135, 'loss/train': 2.701418161392212} +02/24/2022 06:55:47 - INFO - codeparrot_training - Step 4136: {'lr': 0.0004975609408426572, 'samples': 2118144, 'steps': 4136, 'loss/train': 2.120211601257324} +02/24/2022 06:55:52 - INFO - codeparrot_training - Step 4137: {'lr': 0.000497558660269922, 'samples': 2118656, 'steps': 4137, 'loss/train': 2.9140028953552246} +02/24/2022 06:55:56 - INFO - codeparrot_training - Step 4138: {'lr': 0.0004975563786367241, 'samples': 2119168, 'steps': 4138, 'loss/train': 2.5102291107177734} +02/24/2022 06:56:01 - INFO - codeparrot_training - Step 4139: {'lr': 0.0004975540959430732, 'samples': 2119680, 'steps': 4139, 'loss/train': 2.784776449203491} +02/24/2022 06:56:05 - INFO - codeparrot_training - Step 4140: {'lr': 0.0004975518121889793, 'samples': 2120192, 'steps': 4140, 'loss/train': 3.3934874534606934} +02/24/2022 06:56:10 - INFO - codeparrot_training - Step 4141: {'lr': 0.000497549527374452, 'samples': 2120704, 'steps': 4141, 'loss/train': 3.3145015239715576} +02/24/2022 06:56:14 - INFO - codeparrot_training - Step 4142: {'lr': 0.000497547241499501, 'samples': 2121216, 'steps': 4142, 'loss/train': 3.1953165531158447} +02/24/2022 06:56:19 - INFO - codeparrot_training - Step 4143: {'lr': 0.0004975449545641364, 'samples': 2121728, 'steps': 4143, 'loss/train': 2.2466766834259033} +02/24/2022 06:56:23 - INFO - codeparrot_training - Step 4144: {'lr': 0.0004975426665683678, 'samples': 2122240, 'steps': 4144, 'loss/train': 1.393843173980713} +02/24/2022 06:56:29 - INFO - codeparrot_training - Step 4145: {'lr': 0.000497540377512205, 'samples': 2122752, 'steps': 4145, 'loss/train': 2.6824259757995605} +02/24/2022 06:56:33 - INFO - codeparrot_training - Step 4146: {'lr': 0.0004975380873956577, 'samples': 2123264, 'steps': 4146, 'loss/train': 2.5177626609802246} +02/24/2022 06:56:38 - INFO - codeparrot_training - Step 4147: {'lr': 0.0004975357962187359, 'samples': 2123776, 'steps': 4147, 'loss/train': 2.309971570968628} +02/24/2022 06:56:42 - INFO - codeparrot_training - Step 4148: {'lr': 0.0004975335039814493, 'samples': 2124288, 'steps': 4148, 'loss/train': 0.4787849485874176} +02/24/2022 06:56:47 - INFO - codeparrot_training - Step 4149: {'lr': 0.0004975312106838079, 'samples': 2124800, 'steps': 4149, 'loss/train': 1.504604458808899} +02/24/2022 06:56:51 - INFO - codeparrot_training - Step 4150: {'lr': 0.0004975289163258214, 'samples': 2125312, 'steps': 4150, 'loss/train': 2.834517478942871} +02/24/2022 06:56:56 - INFO - codeparrot_training - Step 4151: {'lr': 0.0004975266209074995, 'samples': 2125824, 'steps': 4151, 'loss/train': 3.719064474105835} +02/24/2022 06:57:00 - INFO - codeparrot_training - Step 4152: {'lr': 0.0004975243244288522, 'samples': 2126336, 'steps': 4152, 'loss/train': 1.7685208320617676} +02/24/2022 06:57:05 - INFO - codeparrot_training - Step 4153: {'lr': 0.0004975220268898893, 'samples': 2126848, 'steps': 4153, 'loss/train': 0.5588983297348022} +02/24/2022 06:57:09 - INFO - codeparrot_training - Step 4154: {'lr': 0.0004975197282906207, 'samples': 2127360, 'steps': 4154, 'loss/train': 2.361112356185913} +02/24/2022 06:57:15 - INFO - codeparrot_training - Step 4155: {'lr': 0.0004975174286310562, 'samples': 2127872, 'steps': 4155, 'loss/train': 2.388824224472046} +02/24/2022 06:57:18 - INFO - codeparrot_training - Step 4156: {'lr': 0.0004975151279112054, 'samples': 2128384, 'steps': 4156, 'loss/train': 1.9848403930664062} +02/24/2022 06:57:24 - INFO - codeparrot_training - Step 4157: {'lr': 0.0004975128261310787, 'samples': 2128896, 'steps': 4157, 'loss/train': 2.1548051834106445} +02/24/2022 06:57:27 - INFO - codeparrot_training - Step 4158: {'lr': 0.0004975105232906854, 'samples': 2129408, 'steps': 4158, 'loss/train': 1.8559770584106445} +02/24/2022 06:57:33 - INFO - codeparrot_training - Step 4159: {'lr': 0.0004975082193900357, 'samples': 2129920, 'steps': 4159, 'loss/train': 3.228878974914551} +02/24/2022 06:57:36 - INFO - codeparrot_training - Step 4160: {'lr': 0.0004975059144291394, 'samples': 2130432, 'steps': 4160, 'loss/train': 2.7836742401123047} +02/24/2022 06:57:42 - INFO - codeparrot_training - Step 4161: {'lr': 0.0004975036084080063, 'samples': 2130944, 'steps': 4161, 'loss/train': 3.261890411376953} +02/24/2022 06:57:45 - INFO - codeparrot_training - Step 4162: {'lr': 0.0004975013013266464, 'samples': 2131456, 'steps': 4162, 'loss/train': 5.519766807556152} +02/24/2022 06:57:51 - INFO - codeparrot_training - Step 4163: {'lr': 0.0004974989931850695, 'samples': 2131968, 'steps': 4163, 'loss/train': 2.5995800495147705} +02/24/2022 06:57:54 - INFO - codeparrot_training - Step 4164: {'lr': 0.0004974966839832855, 'samples': 2132480, 'steps': 4164, 'loss/train': 1.9648898839950562} +02/24/2022 06:58:00 - INFO - codeparrot_training - Step 4165: {'lr': 0.0004974943737213042, 'samples': 2132992, 'steps': 4165, 'loss/train': 3.284193992614746} +02/24/2022 06:58:03 - INFO - codeparrot_training - Step 4166: {'lr': 0.0004974920623991356, 'samples': 2133504, 'steps': 4166, 'loss/train': 3.09946346282959} +02/24/2022 06:58:09 - INFO - codeparrot_training - Step 4167: {'lr': 0.0004974897500167898, 'samples': 2134016, 'steps': 4167, 'loss/train': 1.719670057296753} +02/24/2022 06:58:15 - INFO - codeparrot_training - Step 4168: {'lr': 0.0004974874365742763, 'samples': 2134528, 'steps': 4168, 'loss/train': 3.4264321327209473} +02/24/2022 06:58:19 - INFO - codeparrot_training - Step 4169: {'lr': 0.0004974851220716053, 'samples': 2135040, 'steps': 4169, 'loss/train': 3.7530100345611572} +02/24/2022 06:58:22 - INFO - codeparrot_training - Step 4170: {'lr': 0.0004974828065087867, 'samples': 2135552, 'steps': 4170, 'loss/train': 4.17598295211792} +02/24/2022 06:58:28 - INFO - codeparrot_training - Step 4171: {'lr': 0.0004974804898858302, 'samples': 2136064, 'steps': 4171, 'loss/train': 1.6517932415008545} +02/24/2022 06:58:31 - INFO - codeparrot_training - Step 4172: {'lr': 0.0004974781722027459, 'samples': 2136576, 'steps': 4172, 'loss/train': 2.7677624225616455} +02/24/2022 06:58:36 - INFO - codeparrot_training - Step 4173: {'lr': 0.0004974758534595436, 'samples': 2137088, 'steps': 4173, 'loss/train': 1.6782039403915405} +02/24/2022 06:58:42 - INFO - codeparrot_training - Step 4174: {'lr': 0.0004974735336562335, 'samples': 2137600, 'steps': 4174, 'loss/train': 2.7820169925689697} +02/24/2022 06:58:46 - INFO - codeparrot_training - Step 4175: {'lr': 0.0004974712127928252, 'samples': 2138112, 'steps': 4175, 'loss/train': 1.788802981376648} +02/24/2022 06:58:51 - INFO - codeparrot_training - Step 4176: {'lr': 0.000497468890869329, 'samples': 2138624, 'steps': 4176, 'loss/train': 3.1842432022094727} +02/24/2022 06:58:55 - INFO - codeparrot_training - Step 4177: {'lr': 0.0004974665678857545, 'samples': 2139136, 'steps': 4177, 'loss/train': 2.655348777770996} +02/24/2022 06:59:00 - INFO - codeparrot_training - Step 4178: {'lr': 0.0004974642438421118, 'samples': 2139648, 'steps': 4178, 'loss/train': 3.539243698120117} +02/24/2022 06:59:04 - INFO - codeparrot_training - Step 4179: {'lr': 0.0004974619187384109, 'samples': 2140160, 'steps': 4179, 'loss/train': 3.358081817626953} +02/24/2022 06:59:10 - INFO - codeparrot_training - Step 4180: {'lr': 0.0004974595925746618, 'samples': 2140672, 'steps': 4180, 'loss/train': 1.3600236177444458} +02/24/2022 06:59:13 - INFO - codeparrot_training - Step 4181: {'lr': 0.0004974572653508742, 'samples': 2141184, 'steps': 4181, 'loss/train': 1.4963819980621338} +02/24/2022 06:59:19 - INFO - codeparrot_training - Step 4182: {'lr': 0.0004974549370670584, 'samples': 2141696, 'steps': 4182, 'loss/train': 1.3458919525146484} +02/24/2022 06:59:22 - INFO - codeparrot_training - Step 4183: {'lr': 0.0004974526077232242, 'samples': 2142208, 'steps': 4183, 'loss/train': 3.2300713062286377} +02/24/2022 06:59:28 - INFO - codeparrot_training - Step 4184: {'lr': 0.0004974502773193815, 'samples': 2142720, 'steps': 4184, 'loss/train': 3.0200698375701904} +02/24/2022 06:59:31 - INFO - codeparrot_training - Step 4185: {'lr': 0.0004974479458555405, 'samples': 2143232, 'steps': 4185, 'loss/train': 1.4956889152526855} +02/24/2022 06:59:37 - INFO - codeparrot_training - Step 4186: {'lr': 0.000497445613331711, 'samples': 2143744, 'steps': 4186, 'loss/train': 2.869990825653076} +02/24/2022 06:59:40 - INFO - codeparrot_training - Step 4187: {'lr': 0.0004974432797479032, 'samples': 2144256, 'steps': 4187, 'loss/train': 1.9260053634643555} +02/24/2022 06:59:46 - INFO - codeparrot_training - Step 4188: {'lr': 0.0004974409451041268, 'samples': 2144768, 'steps': 4188, 'loss/train': 2.113548994064331} +02/24/2022 06:59:49 - INFO - codeparrot_training - Step 4189: {'lr': 0.0004974386094003921, 'samples': 2145280, 'steps': 4189, 'loss/train': 3.0446863174438477} +02/24/2022 06:59:56 - INFO - codeparrot_training - Step 4190: {'lr': 0.0004974362726367089, 'samples': 2145792, 'steps': 4190, 'loss/train': 1.829388976097107} +02/24/2022 07:00:00 - INFO - codeparrot_training - Step 4191: {'lr': 0.0004974339348130873, 'samples': 2146304, 'steps': 4191, 'loss/train': 3.40861177444458} +02/24/2022 07:00:03 - INFO - codeparrot_training - Step 4192: {'lr': 0.0004974315959295373, 'samples': 2146816, 'steps': 4192, 'loss/train': 2.2164175510406494} +02/24/2022 07:00:09 - INFO - codeparrot_training - Step 4193: {'lr': 0.0004974292559860688, 'samples': 2147328, 'steps': 4193, 'loss/train': 2.3503730297088623} +02/24/2022 07:00:12 - INFO - codeparrot_training - Step 4194: {'lr': 0.0004974269149826921, 'samples': 2147840, 'steps': 4194, 'loss/train': 3.8397929668426514} +02/24/2022 07:00:18 - INFO - codeparrot_training - Step 4195: {'lr': 0.0004974245729194169, 'samples': 2148352, 'steps': 4195, 'loss/train': 2.7743873596191406} +02/24/2022 07:00:21 - INFO - codeparrot_training - Step 4196: {'lr': 0.0004974222297962535, 'samples': 2148864, 'steps': 4196, 'loss/train': 0.25342845916748047} +02/24/2022 07:00:27 - INFO - codeparrot_training - Step 4197: {'lr': 0.0004974198856132118, 'samples': 2149376, 'steps': 4197, 'loss/train': 0.7477357983589172} +02/24/2022 07:00:30 - INFO - codeparrot_training - Step 4198: {'lr': 0.0004974175403703019, 'samples': 2149888, 'steps': 4198, 'loss/train': 3.1035780906677246} +02/24/2022 07:00:36 - INFO - codeparrot_training - Step 4199: {'lr': 0.0004974151940675338, 'samples': 2150400, 'steps': 4199, 'loss/train': 3.3665931224823} +02/24/2022 07:00:39 - INFO - codeparrot_training - Step 4200: {'lr': 0.0004974128467049176, 'samples': 2150912, 'steps': 4200, 'loss/train': 2.379422664642334} +02/24/2022 07:00:46 - INFO - codeparrot_training - Step 4201: {'lr': 0.0004974104982824632, 'samples': 2151424, 'steps': 4201, 'loss/train': 3.107706069946289} +02/24/2022 07:00:50 - INFO - codeparrot_training - Step 4202: {'lr': 0.0004974081488001809, 'samples': 2151936, 'steps': 4202, 'loss/train': 2.9332213401794434} +02/24/2022 07:00:55 - INFO - codeparrot_training - Step 4203: {'lr': 0.0004974057982580806, 'samples': 2152448, 'steps': 4203, 'loss/train': 3.1667959690093994} +02/24/2022 07:00:59 - INFO - codeparrot_training - Step 4204: {'lr': 0.0004974034466561725, 'samples': 2152960, 'steps': 4204, 'loss/train': 2.3205790519714355} +02/24/2022 07:01:04 - INFO - codeparrot_training - Step 4205: {'lr': 0.0004974010939944667, 'samples': 2153472, 'steps': 4205, 'loss/train': 2.7863240242004395} +02/24/2022 07:01:08 - INFO - codeparrot_training - Step 4206: {'lr': 0.0004973987402729729, 'samples': 2153984, 'steps': 4206, 'loss/train': 3.1548309326171875} +02/24/2022 07:01:13 - INFO - codeparrot_training - Step 4207: {'lr': 0.0004973963854917016, 'samples': 2154496, 'steps': 4207, 'loss/train': 2.5072789192199707} +02/24/2022 07:01:17 - INFO - codeparrot_training - Step 4208: {'lr': 0.0004973940296506627, 'samples': 2155008, 'steps': 4208, 'loss/train': 3.0419318675994873} +02/24/2022 07:01:22 - INFO - codeparrot_training - Step 4209: {'lr': 0.0004973916727498664, 'samples': 2155520, 'steps': 4209, 'loss/train': 2.2985165119171143} +02/24/2022 07:01:26 - INFO - codeparrot_training - Step 4210: {'lr': 0.0004973893147893227, 'samples': 2156032, 'steps': 4210, 'loss/train': 2.629042148590088} +02/24/2022 07:01:31 - INFO - codeparrot_training - Step 4211: {'lr': 0.0004973869557690417, 'samples': 2156544, 'steps': 4211, 'loss/train': 2.9459030628204346} +02/24/2022 07:01:35 - INFO - codeparrot_training - Step 4212: {'lr': 0.0004973845956890336, 'samples': 2157056, 'steps': 4212, 'loss/train': 1.8773908615112305} +02/24/2022 07:01:41 - INFO - codeparrot_training - Step 4213: {'lr': 0.0004973822345493084, 'samples': 2157568, 'steps': 4213, 'loss/train': 2.438155174255371} +02/24/2022 07:01:44 - INFO - codeparrot_training - Step 4214: {'lr': 0.0004973798723498762, 'samples': 2158080, 'steps': 4214, 'loss/train': 3.4575247764587402} +02/24/2022 07:01:50 - INFO - codeparrot_training - Step 4215: {'lr': 0.0004973775090907473, 'samples': 2158592, 'steps': 4215, 'loss/train': 2.744008779525757} +02/24/2022 07:01:53 - INFO - codeparrot_training - Step 4216: {'lr': 0.0004973751447719316, 'samples': 2159104, 'steps': 4216, 'loss/train': 2.998913526535034} +02/24/2022 07:01:59 - INFO - codeparrot_training - Step 4217: {'lr': 0.0004973727793934394, 'samples': 2159616, 'steps': 4217, 'loss/train': 3.5606839656829834} +02/24/2022 07:02:03 - INFO - codeparrot_training - Step 4218: {'lr': 0.0004973704129552808, 'samples': 2160128, 'steps': 4218, 'loss/train': 2.9075167179107666} +02/24/2022 07:02:08 - INFO - codeparrot_training - Step 4219: {'lr': 0.0004973680454574657, 'samples': 2160640, 'steps': 4219, 'loss/train': 2.903878688812256} +02/24/2022 07:02:12 - INFO - codeparrot_training - Step 4220: {'lr': 0.0004973656769000046, 'samples': 2161152, 'steps': 4220, 'loss/train': 3.636286973953247} +02/24/2022 07:02:17 - INFO - codeparrot_training - Step 4221: {'lr': 0.0004973633072829075, 'samples': 2161664, 'steps': 4221, 'loss/train': 2.547485828399658} +02/24/2022 07:02:21 - INFO - codeparrot_training - Step 4222: {'lr': 0.0004973609366061845, 'samples': 2162176, 'steps': 4222, 'loss/train': 1.3098418712615967} +02/24/2022 07:02:26 - INFO - codeparrot_training - Step 4223: {'lr': 0.0004973585648698457, 'samples': 2162688, 'steps': 4223, 'loss/train': 2.7077715396881104} +02/24/2022 07:02:30 - INFO - codeparrot_training - Step 4224: {'lr': 0.0004973561920739015, 'samples': 2163200, 'steps': 4224, 'loss/train': 1.7139753103256226} +02/24/2022 07:02:35 - INFO - codeparrot_training - Step 4225: {'lr': 0.0004973538182183618, 'samples': 2163712, 'steps': 4225, 'loss/train': 3.5012283325195312} +02/24/2022 07:02:39 - INFO - codeparrot_training - Step 4226: {'lr': 0.000497351443303237, 'samples': 2164224, 'steps': 4226, 'loss/train': 1.6470822095870972} +02/24/2022 07:02:45 - INFO - codeparrot_training - Step 4227: {'lr': 0.0004973490673285372, 'samples': 2164736, 'steps': 4227, 'loss/train': 1.578709602355957} +02/24/2022 07:02:48 - INFO - codeparrot_training - Step 4228: {'lr': 0.0004973466902942723, 'samples': 2165248, 'steps': 4228, 'loss/train': 2.0621304512023926} +02/24/2022 07:02:54 - INFO - codeparrot_training - Step 4229: {'lr': 0.0004973443122004529, 'samples': 2165760, 'steps': 4229, 'loss/train': 2.8437130451202393} +02/24/2022 07:02:57 - INFO - codeparrot_training - Step 4230: {'lr': 0.0004973419330470891, 'samples': 2166272, 'steps': 4230, 'loss/train': 3.1119749546051025} +02/24/2022 07:03:03 - INFO - codeparrot_training - Step 4231: {'lr': 0.0004973395528341908, 'samples': 2166784, 'steps': 4231, 'loss/train': 2.8233418464660645} +02/24/2022 07:03:06 - INFO - codeparrot_training - Step 4232: {'lr': 0.0004973371715617685, 'samples': 2167296, 'steps': 4232, 'loss/train': 3.0032131671905518} +02/24/2022 07:03:12 - INFO - codeparrot_training - Step 4233: {'lr': 0.0004973347892298322, 'samples': 2167808, 'steps': 4233, 'loss/train': 2.891402006149292} +02/24/2022 07:03:15 - INFO - codeparrot_training - Step 4234: {'lr': 0.0004973324058383924, 'samples': 2168320, 'steps': 4234, 'loss/train': 0.458845853805542} +02/24/2022 07:03:21 - INFO - codeparrot_training - Step 4235: {'lr': 0.0004973300213874589, 'samples': 2168832, 'steps': 4235, 'loss/train': 1.8262721300125122} +02/24/2022 07:03:24 - INFO - codeparrot_training - Step 4236: {'lr': 0.0004973276358770422, 'samples': 2169344, 'steps': 4236, 'loss/train': 2.673147201538086} +02/24/2022 07:03:30 - INFO - codeparrot_training - Step 4237: {'lr': 0.0004973252493071525, 'samples': 2169856, 'steps': 4237, 'loss/train': 2.2957446575164795} +02/24/2022 07:03:34 - INFO - codeparrot_training - Step 4238: {'lr': 0.0004973228616777999, 'samples': 2170368, 'steps': 4238, 'loss/train': 2.6661338806152344} +02/24/2022 07:03:40 - INFO - codeparrot_training - Step 4239: {'lr': 0.0004973204729889946, 'samples': 2170880, 'steps': 4239, 'loss/train': 2.6410841941833496} +02/24/2022 07:03:43 - INFO - codeparrot_training - Step 4240: {'lr': 0.0004973180832407472, 'samples': 2171392, 'steps': 4240, 'loss/train': 3.154447317123413} +02/24/2022 07:03:48 - INFO - codeparrot_training - Step 4241: {'lr': 0.0004973156924330674, 'samples': 2171904, 'steps': 4241, 'loss/train': 3.064802646636963} +02/24/2022 07:03:52 - INFO - codeparrot_training - Step 4242: {'lr': 0.0004973133005659658, 'samples': 2172416, 'steps': 4242, 'loss/train': 3.259582996368408} +02/24/2022 07:03:58 - INFO - codeparrot_training - Step 4243: {'lr': 0.0004973109076394526, 'samples': 2172928, 'steps': 4243, 'loss/train': 2.3444747924804688} +02/24/2022 07:04:01 - INFO - codeparrot_training - Step 4244: {'lr': 0.0004973085136535379, 'samples': 2173440, 'steps': 4244, 'loss/train': 2.6797614097595215} +02/24/2022 07:04:07 - INFO - codeparrot_training - Step 4245: {'lr': 0.000497306118608232, 'samples': 2173952, 'steps': 4245, 'loss/train': 1.1895850896835327} +02/24/2022 07:04:10 - INFO - codeparrot_training - Step 4246: {'lr': 0.0004973037225035454, 'samples': 2174464, 'steps': 4246, 'loss/train': 3.039938449859619} +02/24/2022 07:04:16 - INFO - codeparrot_training - Step 4247: {'lr': 0.0004973013253394881, 'samples': 2174976, 'steps': 4247, 'loss/train': 1.6467804908752441} +02/24/2022 07:04:19 - INFO - codeparrot_training - Step 4248: {'lr': 0.0004972989271160705, 'samples': 2175488, 'steps': 4248, 'loss/train': 3.7007312774658203} +02/24/2022 07:04:25 - INFO - codeparrot_training - Step 4249: {'lr': 0.0004972965278333028, 'samples': 2176000, 'steps': 4249, 'loss/train': 3.049842119216919} +02/24/2022 07:04:30 - INFO - codeparrot_training - Step 4250: {'lr': 0.0004972941274911952, 'samples': 2176512, 'steps': 4250, 'loss/train': 3.7673535346984863} +02/24/2022 07:04:34 - INFO - codeparrot_training - Step 4251: {'lr': 0.0004972917260897583, 'samples': 2177024, 'steps': 4251, 'loss/train': 2.8709936141967773} +02/24/2022 07:04:40 - INFO - codeparrot_training - Step 4252: {'lr': 0.0004972893236290019, 'samples': 2177536, 'steps': 4252, 'loss/train': 3.36670184135437} +02/24/2022 07:04:43 - INFO - codeparrot_training - Step 4253: {'lr': 0.0004972869201089367, 'samples': 2178048, 'steps': 4253, 'loss/train': 3.524763584136963} +02/24/2022 07:04:49 - INFO - codeparrot_training - Step 4254: {'lr': 0.0004972845155295729, 'samples': 2178560, 'steps': 4254, 'loss/train': 2.793093204498291} +02/24/2022 07:04:52 - INFO - codeparrot_training - Step 4255: {'lr': 0.0004972821098909207, 'samples': 2179072, 'steps': 4255, 'loss/train': 1.677321195602417} +02/24/2022 07:04:58 - INFO - codeparrot_training - Step 4256: {'lr': 0.0004972797031929904, 'samples': 2179584, 'steps': 4256, 'loss/train': 1.6498457193374634} +02/24/2022 07:05:01 - INFO - codeparrot_training - Step 4257: {'lr': 0.0004972772954357924, 'samples': 2180096, 'steps': 4257, 'loss/train': 2.9303619861602783} +02/24/2022 07:05:07 - INFO - codeparrot_training - Step 4258: {'lr': 0.0004972748866193371, 'samples': 2180608, 'steps': 4258, 'loss/train': 3.9064760208129883} +02/24/2022 07:05:10 - INFO - codeparrot_training - Step 4259: {'lr': 0.0004972724767436346, 'samples': 2181120, 'steps': 4259, 'loss/train': 2.702552556991577} +02/24/2022 07:05:16 - INFO - codeparrot_training - Step 4260: {'lr': 0.0004972700658086954, 'samples': 2181632, 'steps': 4260, 'loss/train': 2.3609423637390137} +02/24/2022 07:05:19 - INFO - codeparrot_training - Step 4261: {'lr': 0.0004972676538145298, 'samples': 2182144, 'steps': 4261, 'loss/train': 4.089694499969482} +02/24/2022 07:05:25 - INFO - codeparrot_training - Step 4262: {'lr': 0.0004972652407611479, 'samples': 2182656, 'steps': 4262, 'loss/train': 3.2853028774261475} +02/24/2022 07:05:29 - INFO - codeparrot_training - Step 4263: {'lr': 0.0004972628266485604, 'samples': 2183168, 'steps': 4263, 'loss/train': 0.9743463397026062} +02/24/2022 07:05:34 - INFO - codeparrot_training - Step 4264: {'lr': 0.0004972604114767774, 'samples': 2183680, 'steps': 4264, 'loss/train': 3.7340307235717773} +02/24/2022 07:05:38 - INFO - codeparrot_training - Step 4265: {'lr': 0.0004972579952458092, 'samples': 2184192, 'steps': 4265, 'loss/train': 2.7161076068878174} +02/24/2022 07:05:43 - INFO - codeparrot_training - Step 4266: {'lr': 0.0004972555779556664, 'samples': 2184704, 'steps': 4266, 'loss/train': 2.780529260635376} +02/24/2022 07:05:47 - INFO - codeparrot_training - Step 4267: {'lr': 0.0004972531596063592, 'samples': 2185216, 'steps': 4267, 'loss/train': 2.639686346054077} +02/24/2022 07:05:52 - INFO - codeparrot_training - Step 4268: {'lr': 0.000497250740197898, 'samples': 2185728, 'steps': 4268, 'loss/train': 1.4798073768615723} +02/24/2022 07:05:56 - INFO - codeparrot_training - Step 4269: {'lr': 0.0004972483197302931, 'samples': 2186240, 'steps': 4269, 'loss/train': 1.8299784660339355} +02/24/2022 07:06:01 - INFO - codeparrot_training - Step 4270: {'lr': 0.0004972458982035548, 'samples': 2186752, 'steps': 4270, 'loss/train': 2.852864980697632} +02/24/2022 07:06:05 - INFO - codeparrot_training - Step 4271: {'lr': 0.0004972434756176937, 'samples': 2187264, 'steps': 4271, 'loss/train': 8.189410209655762} +02/24/2022 07:06:11 - INFO - codeparrot_training - Step 4272: {'lr': 0.0004972410519727201, 'samples': 2187776, 'steps': 4272, 'loss/train': 2.3508660793304443} +02/24/2022 07:06:14 - INFO - codeparrot_training - Step 4273: {'lr': 0.0004972386272686443, 'samples': 2188288, 'steps': 4273, 'loss/train': 1.5492292642593384} +02/24/2022 07:06:20 - INFO - codeparrot_training - Step 4274: {'lr': 0.0004972362015054767, 'samples': 2188800, 'steps': 4274, 'loss/train': 3.1584479808807373} +02/24/2022 07:06:23 - INFO - codeparrot_training - Step 4275: {'lr': 0.0004972337746832278, 'samples': 2189312, 'steps': 4275, 'loss/train': 2.052060842514038} +02/24/2022 07:06:29 - INFO - codeparrot_training - Step 4276: {'lr': 0.0004972313468019077, 'samples': 2189824, 'steps': 4276, 'loss/train': 2.50105881690979} +02/24/2022 07:06:32 - INFO - codeparrot_training - Step 4277: {'lr': 0.0004972289178615273, 'samples': 2190336, 'steps': 4277, 'loss/train': 1.3975589275360107} +02/24/2022 07:06:38 - INFO - codeparrot_training - Step 4278: {'lr': 0.0004972264878620965, 'samples': 2190848, 'steps': 4278, 'loss/train': 2.5786895751953125} +02/24/2022 07:06:41 - INFO - codeparrot_training - Step 4279: {'lr': 0.000497224056803626, 'samples': 2191360, 'steps': 4279, 'loss/train': 2.410680055618286} +02/24/2022 07:06:47 - INFO - codeparrot_training - Step 4280: {'lr': 0.0004972216246861262, 'samples': 2191872, 'steps': 4280, 'loss/train': 2.154649019241333} +02/24/2022 07:06:50 - INFO - codeparrot_training - Step 4281: {'lr': 0.0004972191915096074, 'samples': 2192384, 'steps': 4281, 'loss/train': 2.531623363494873} +02/24/2022 07:06:56 - INFO - codeparrot_training - Step 4282: {'lr': 0.0004972167572740801, 'samples': 2192896, 'steps': 4282, 'loss/train': 2.173480749130249} +02/24/2022 07:07:00 - INFO - codeparrot_training - Step 4283: {'lr': 0.0004972143219795547, 'samples': 2193408, 'steps': 4283, 'loss/train': 0.3406789302825928} +02/24/2022 07:07:06 - INFO - codeparrot_training - Step 4284: {'lr': 0.0004972118856260416, 'samples': 2193920, 'steps': 4284, 'loss/train': 2.5170040130615234} +02/24/2022 07:07:09 - INFO - codeparrot_training - Step 4285: {'lr': 0.0004972094482135514, 'samples': 2194432, 'steps': 4285, 'loss/train': 3.0732195377349854} +02/24/2022 07:07:15 - INFO - codeparrot_training - Step 4286: {'lr': 0.0004972070097420943, 'samples': 2194944, 'steps': 4286, 'loss/train': 2.393036365509033} +02/24/2022 07:07:18 - INFO - codeparrot_training - Step 4287: {'lr': 0.0004972045702116809, 'samples': 2195456, 'steps': 4287, 'loss/train': 2.8728830814361572} +02/24/2022 07:07:24 - INFO - codeparrot_training - Step 4288: {'lr': 0.0004972021296223217, 'samples': 2195968, 'steps': 4288, 'loss/train': 2.898014545440674} +02/24/2022 07:07:27 - INFO - codeparrot_training - Step 4289: {'lr': 0.0004971996879740271, 'samples': 2196480, 'steps': 4289, 'loss/train': 2.6281983852386475} +02/24/2022 07:07:33 - INFO - codeparrot_training - Step 4290: {'lr': 0.0004971972452668074, 'samples': 2196992, 'steps': 4290, 'loss/train': 2.692924737930298} +02/24/2022 07:07:36 - INFO - codeparrot_training - Step 4291: {'lr': 0.0004971948015006732, 'samples': 2197504, 'steps': 4291, 'loss/train': 1.9726898670196533} +02/24/2022 07:07:42 - INFO - codeparrot_training - Step 4292: {'lr': 0.000497192356675635, 'samples': 2198016, 'steps': 4292, 'loss/train': 2.1184329986572266} +02/24/2022 07:07:45 - INFO - codeparrot_training - Step 4293: {'lr': 0.0004971899107917033, 'samples': 2198528, 'steps': 4293, 'loss/train': 1.4840930700302124} +02/24/2022 07:07:51 - INFO - codeparrot_training - Step 4294: {'lr': 0.0004971874638488884, 'samples': 2199040, 'steps': 4294, 'loss/train': 2.085710287094116} +02/24/2022 07:07:54 - INFO - codeparrot_training - Step 4295: {'lr': 0.000497185015847201, 'samples': 2199552, 'steps': 4295, 'loss/train': 2.7963337898254395} +02/24/2022 07:08:00 - INFO - codeparrot_training - Step 4296: {'lr': 0.0004971825667866515, 'samples': 2200064, 'steps': 4296, 'loss/train': 3.636133909225464} +02/24/2022 07:08:03 - INFO - codeparrot_training - Step 4297: {'lr': 0.0004971801166672502, 'samples': 2200576, 'steps': 4297, 'loss/train': 2.0215768814086914} +02/24/2022 07:08:09 - INFO - codeparrot_training - Step 4298: {'lr': 0.0004971776654890079, 'samples': 2201088, 'steps': 4298, 'loss/train': 2.9988853931427} +02/24/2022 07:08:13 - INFO - codeparrot_training - Step 4299: {'lr': 0.000497175213251935, 'samples': 2201600, 'steps': 4299, 'loss/train': 1.9716269969940186} +02/24/2022 07:08:18 - INFO - codeparrot_training - Step 4300: {'lr': 0.0004971727599560418, 'samples': 2202112, 'steps': 4300, 'loss/train': 1.5621925592422485} +02/24/2022 07:08:22 - INFO - codeparrot_training - Step 4301: {'lr': 0.0004971703056013392, 'samples': 2202624, 'steps': 4301, 'loss/train': 3.330235004425049} +02/24/2022 07:08:27 - INFO - codeparrot_training - Step 4302: {'lr': 0.0004971678501878374, 'samples': 2203136, 'steps': 4302, 'loss/train': 4.034710884094238} +02/24/2022 07:08:31 - INFO - codeparrot_training - Step 4303: {'lr': 0.000497165393715547, 'samples': 2203648, 'steps': 4303, 'loss/train': 1.7335206270217896} +02/24/2022 07:08:36 - INFO - codeparrot_training - Step 4304: {'lr': 0.0004971629361844785, 'samples': 2204160, 'steps': 4304, 'loss/train': 1.8462622165679932} +02/24/2022 07:08:40 - INFO - codeparrot_training - Step 4305: {'lr': 0.0004971604775946425, 'samples': 2204672, 'steps': 4305, 'loss/train': 3.579932928085327} +02/24/2022 07:08:45 - INFO - codeparrot_training - Step 4306: {'lr': 0.0004971580179460495, 'samples': 2205184, 'steps': 4306, 'loss/train': 1.6447175741195679} +02/24/2022 07:08:49 - INFO - codeparrot_training - Step 4307: {'lr': 0.0004971555572387101, 'samples': 2205696, 'steps': 4307, 'loss/train': 2.691662311553955} +02/24/2022 07:08:55 - INFO - codeparrot_training - Step 4308: {'lr': 0.0004971530954726346, 'samples': 2206208, 'steps': 4308, 'loss/train': 3.363874673843384} +02/24/2022 07:08:58 - INFO - codeparrot_training - Step 4309: {'lr': 0.0004971506326478339, 'samples': 2206720, 'steps': 4309, 'loss/train': 3.470662832260132} +02/24/2022 07:09:04 - INFO - codeparrot_training - Step 4310: {'lr': 0.0004971481687643184, 'samples': 2207232, 'steps': 4310, 'loss/train': 2.9335923194885254} +02/24/2022 07:09:07 - INFO - codeparrot_training - Step 4311: {'lr': 0.0004971457038220984, 'samples': 2207744, 'steps': 4311, 'loss/train': 2.650141477584839} +02/24/2022 07:09:13 - INFO - codeparrot_training - Step 4312: {'lr': 0.0004971432378211849, 'samples': 2208256, 'steps': 4312, 'loss/train': 3.1428043842315674} +02/24/2022 07:09:16 - INFO - codeparrot_training - Step 4313: {'lr': 0.0004971407707615881, 'samples': 2208768, 'steps': 4313, 'loss/train': 2.7456884384155273} +02/24/2022 07:09:24 - INFO - codeparrot_training - Step 4314: {'lr': 0.0004971383026433189, 'samples': 2209280, 'steps': 4314, 'loss/train': 2.857170343399048} +02/24/2022 07:09:28 - INFO - codeparrot_training - Step 4315: {'lr': 0.0004971358334663875, 'samples': 2209792, 'steps': 4315, 'loss/train': 1.715367317199707} +02/24/2022 07:09:33 - INFO - codeparrot_training - Step 4316: {'lr': 0.0004971333632308047, 'samples': 2210304, 'steps': 4316, 'loss/train': 3.0234556198120117} +02/24/2022 07:09:36 - INFO - codeparrot_training - Step 4317: {'lr': 0.000497130891936581, 'samples': 2210816, 'steps': 4317, 'loss/train': 3.300001859664917} +02/24/2022 07:09:42 - INFO - codeparrot_training - Step 4318: {'lr': 0.0004971284195837271, 'samples': 2211328, 'steps': 4318, 'loss/train': 2.5985090732574463} +02/24/2022 07:09:46 - INFO - codeparrot_training - Step 4319: {'lr': 0.0004971259461722536, 'samples': 2211840, 'steps': 4319, 'loss/train': 2.2258150577545166} +02/24/2022 07:09:51 - INFO - codeparrot_training - Step 4320: {'lr': 0.0004971234717021708, 'samples': 2212352, 'steps': 4320, 'loss/train': 1.5778388977050781} +02/24/2022 07:09:54 - INFO - codeparrot_training - Step 4321: {'lr': 0.0004971209961734897, 'samples': 2212864, 'steps': 4321, 'loss/train': 1.0925599336624146} +02/24/2022 07:10:00 - INFO - codeparrot_training - Step 4322: {'lr': 0.0004971185195862207, 'samples': 2213376, 'steps': 4322, 'loss/train': 2.530054807662964} +02/24/2022 07:10:03 - INFO - codeparrot_training - Step 4323: {'lr': 0.0004971160419403744, 'samples': 2213888, 'steps': 4323, 'loss/train': 0.9472009539604187} +02/24/2022 07:10:11 - INFO - codeparrot_training - Step 4324: {'lr': 0.0004971135632359614, 'samples': 2214400, 'steps': 4324, 'loss/train': 2.86696720123291} +02/24/2022 07:10:14 - INFO - codeparrot_training - Step 4325: {'lr': 0.0004971110834729925, 'samples': 2214912, 'steps': 4325, 'loss/train': 2.8923516273498535} +02/24/2022 07:10:20 - INFO - codeparrot_training - Step 4326: {'lr': 0.0004971086026514781, 'samples': 2215424, 'steps': 4326, 'loss/train': 2.862438201904297} +02/24/2022 07:10:23 - INFO - codeparrot_training - Step 4327: {'lr': 0.0004971061207714289, 'samples': 2215936, 'steps': 4327, 'loss/train': 2.9758591651916504} +02/24/2022 07:10:29 - INFO - codeparrot_training - Step 4328: {'lr': 0.0004971036378328556, 'samples': 2216448, 'steps': 4328, 'loss/train': 1.4875649213790894} +02/24/2022 07:10:32 - INFO - codeparrot_training - Step 4329: {'lr': 0.0004971011538357687, 'samples': 2216960, 'steps': 4329, 'loss/train': 2.493306875228882} +02/24/2022 07:10:38 - INFO - codeparrot_training - Step 4330: {'lr': 0.000497098668780179, 'samples': 2217472, 'steps': 4330, 'loss/train': 3.131075143814087} +02/24/2022 07:10:41 - INFO - codeparrot_training - Step 4331: {'lr': 0.000497096182666097, 'samples': 2217984, 'steps': 4331, 'loss/train': 3.5481016635894775} +02/24/2022 07:10:47 - INFO - codeparrot_training - Step 4332: {'lr': 0.0004970936954935334, 'samples': 2218496, 'steps': 4332, 'loss/train': 2.7496330738067627} +02/24/2022 07:10:50 - INFO - codeparrot_training - Step 4333: {'lr': 0.0004970912072624989, 'samples': 2219008, 'steps': 4333, 'loss/train': 2.2501373291015625} +02/24/2022 07:10:57 - INFO - codeparrot_training - Step 4334: {'lr': 0.0004970887179730041, 'samples': 2219520, 'steps': 4334, 'loss/train': 2.827500820159912} +02/24/2022 07:11:01 - INFO - codeparrot_training - Step 4335: {'lr': 0.0004970862276250599, 'samples': 2220032, 'steps': 4335, 'loss/train': 3.508098602294922} +02/24/2022 07:11:06 - INFO - codeparrot_training - Step 4336: {'lr': 0.0004970837362186766, 'samples': 2220544, 'steps': 4336, 'loss/train': 1.368098258972168} +02/24/2022 07:11:10 - INFO - codeparrot_training - Step 4337: {'lr': 0.0004970812437538649, 'samples': 2221056, 'steps': 4337, 'loss/train': 3.964770555496216} +02/24/2022 07:11:15 - INFO - codeparrot_training - Step 4338: {'lr': 0.0004970787502306357, 'samples': 2221568, 'steps': 4338, 'loss/train': 1.5995824337005615} +02/24/2022 07:11:19 - INFO - codeparrot_training - Step 4339: {'lr': 0.0004970762556489996, 'samples': 2222080, 'steps': 4339, 'loss/train': 2.1084766387939453} +02/24/2022 07:11:24 - INFO - codeparrot_training - Step 4340: {'lr': 0.0004970737600089673, 'samples': 2222592, 'steps': 4340, 'loss/train': 2.1236257553100586} +02/24/2022 07:11:27 - INFO - codeparrot_training - Step 4341: {'lr': 0.0004970712633105496, 'samples': 2223104, 'steps': 4341, 'loss/train': 3.0250909328460693} +02/24/2022 07:11:33 - INFO - codeparrot_training - Step 4342: {'lr': 0.0004970687655537568, 'samples': 2223616, 'steps': 4342, 'loss/train': 3.0731847286224365} +02/24/2022 07:11:37 - INFO - codeparrot_training - Step 4343: {'lr': 0.0004970662667386, 'samples': 2224128, 'steps': 4343, 'loss/train': 1.9997105598449707} +02/24/2022 07:11:44 - INFO - codeparrot_training - Step 4344: {'lr': 0.0004970637668650898, 'samples': 2224640, 'steps': 4344, 'loss/train': 0.218427374958992} +02/24/2022 07:11:47 - INFO - codeparrot_training - Step 4345: {'lr': 0.0004970612659332368, 'samples': 2225152, 'steps': 4345, 'loss/train': 2.3627676963806152} +02/24/2022 07:11:53 - INFO - codeparrot_training - Step 4346: {'lr': 0.0004970587639430518, 'samples': 2225664, 'steps': 4346, 'loss/train': 2.492006301879883} +02/24/2022 07:11:56 - INFO - codeparrot_training - Step 4347: {'lr': 0.0004970562608945455, 'samples': 2226176, 'steps': 4347, 'loss/train': 2.6539816856384277} +02/24/2022 07:12:02 - INFO - codeparrot_training - Step 4348: {'lr': 0.0004970537567877286, 'samples': 2226688, 'steps': 4348, 'loss/train': 3.71990966796875} +02/24/2022 07:12:05 - INFO - codeparrot_training - Step 4349: {'lr': 0.000497051251622612, 'samples': 2227200, 'steps': 4349, 'loss/train': 0.6602520942687988} +02/24/2022 07:12:11 - INFO - codeparrot_training - Step 4350: {'lr': 0.0004970487453992062, 'samples': 2227712, 'steps': 4350, 'loss/train': 2.111689329147339} +02/24/2022 07:12:16 - INFO - codeparrot_training - Step 4351: {'lr': 0.000497046238117522, 'samples': 2228224, 'steps': 4351, 'loss/train': 1.2905681133270264} +02/24/2022 07:12:20 - INFO - codeparrot_training - Step 4352: {'lr': 0.0004970437297775702, 'samples': 2228736, 'steps': 4352, 'loss/train': 3.3549752235412598} +02/24/2022 07:12:27 - INFO - codeparrot_training - Step 4353: {'lr': 0.0004970412203793614, 'samples': 2229248, 'steps': 4353, 'loss/train': 1.9374116659164429} +02/24/2022 07:12:31 - INFO - codeparrot_training - Step 4354: {'lr': 0.0004970387099229066, 'samples': 2229760, 'steps': 4354, 'loss/train': 3.28519606590271} +02/24/2022 07:12:36 - INFO - codeparrot_training - Step 4355: {'lr': 0.0004970361984082163, 'samples': 2230272, 'steps': 4355, 'loss/train': 2.5305819511413574} +02/24/2022 07:12:39 - INFO - codeparrot_training - Step 4356: {'lr': 0.0004970336858353014, 'samples': 2230784, 'steps': 4356, 'loss/train': 2.907233953475952} +02/24/2022 07:12:45 - INFO - codeparrot_training - Step 4357: {'lr': 0.0004970311722041727, 'samples': 2231296, 'steps': 4357, 'loss/train': 2.892895460128784} +02/24/2022 07:12:48 - INFO - codeparrot_training - Step 4358: {'lr': 0.0004970286575148408, 'samples': 2231808, 'steps': 4358, 'loss/train': 2.534895181655884} +02/24/2022 07:12:54 - INFO - codeparrot_training - Step 4359: {'lr': 0.0004970261417673165, 'samples': 2232320, 'steps': 4359, 'loss/train': 2.9031383991241455} +02/24/2022 07:12:57 - INFO - codeparrot_training - Step 4360: {'lr': 0.0004970236249616109, 'samples': 2232832, 'steps': 4360, 'loss/train': 2.6167120933532715} +02/24/2022 07:13:03 - INFO - codeparrot_training - Step 4361: {'lr': 0.0004970211070977344, 'samples': 2233344, 'steps': 4361, 'loss/train': 1.2957284450531006} +02/24/2022 07:13:06 - INFO - codeparrot_training - Step 4362: {'lr': 0.0004970185881756979, 'samples': 2233856, 'steps': 4362, 'loss/train': 2.167532205581665} +02/24/2022 07:13:12 - INFO - codeparrot_training - Step 4363: {'lr': 0.0004970160681955121, 'samples': 2234368, 'steps': 4363, 'loss/train': 3.269674062728882} +02/24/2022 07:13:15 - INFO - codeparrot_training - Step 4364: {'lr': 0.0004970135471571881, 'samples': 2234880, 'steps': 4364, 'loss/train': 1.3479008674621582} +02/24/2022 07:13:21 - INFO - codeparrot_training - Step 4365: {'lr': 0.0004970110250607364, 'samples': 2235392, 'steps': 4365, 'loss/train': 2.534153461456299} +02/24/2022 07:13:24 - INFO - codeparrot_training - Step 4366: {'lr': 0.000497008501906168, 'samples': 2235904, 'steps': 4366, 'loss/train': 3.1038362979888916} +02/24/2022 07:13:30 - INFO - codeparrot_training - Step 4367: {'lr': 0.0004970059776934935, 'samples': 2236416, 'steps': 4367, 'loss/train': 3.128077983856201} +02/24/2022 07:13:33 - INFO - codeparrot_training - Step 4368: {'lr': 0.0004970034524227238, 'samples': 2236928, 'steps': 4368, 'loss/train': 2.852567434310913} +02/24/2022 07:13:41 - INFO - codeparrot_training - Step 4369: {'lr': 0.0004970009260938698, 'samples': 2237440, 'steps': 4369, 'loss/train': 3.0271027088165283} +02/24/2022 07:13:44 - INFO - codeparrot_training - Step 4370: {'lr': 0.0004969983987069423, 'samples': 2237952, 'steps': 4370, 'loss/train': 2.3852086067199707} +02/24/2022 07:13:50 - INFO - codeparrot_training - Step 4371: {'lr': 0.000496995870261952, 'samples': 2238464, 'steps': 4371, 'loss/train': 3.10168719291687} +02/24/2022 07:13:53 - INFO - codeparrot_training - Step 4372: {'lr': 0.0004969933407589098, 'samples': 2238976, 'steps': 4372, 'loss/train': 2.5915274620056152} +02/24/2022 07:13:59 - INFO - codeparrot_training - Step 4373: {'lr': 0.0004969908101978267, 'samples': 2239488, 'steps': 4373, 'loss/train': 1.5831700563430786} +02/24/2022 07:14:02 - INFO - codeparrot_training - Step 4374: {'lr': 0.0004969882785787133, 'samples': 2240000, 'steps': 4374, 'loss/train': 2.789936065673828} +02/24/2022 07:14:08 - INFO - codeparrot_training - Step 4375: {'lr': 0.0004969857459015807, 'samples': 2240512, 'steps': 4375, 'loss/train': 1.6095125675201416} +02/24/2022 07:14:11 - INFO - codeparrot_training - Step 4376: {'lr': 0.0004969832121664394, 'samples': 2241024, 'steps': 4376, 'loss/train': 2.221705675125122} +02/24/2022 07:14:17 - INFO - codeparrot_training - Step 4377: {'lr': 0.0004969806773733004, 'samples': 2241536, 'steps': 4377, 'loss/train': 1.3647468090057373} +02/24/2022 07:14:20 - INFO - codeparrot_training - Step 4378: {'lr': 0.0004969781415221748, 'samples': 2242048, 'steps': 4378, 'loss/train': 2.6426949501037598} +02/24/2022 07:14:28 - INFO - codeparrot_training - Step 4379: {'lr': 0.0004969756046130731, 'samples': 2242560, 'steps': 4379, 'loss/train': 1.2999351024627686} +02/24/2022 07:14:31 - INFO - codeparrot_training - Step 4380: {'lr': 0.0004969730666460065, 'samples': 2243072, 'steps': 4380, 'loss/train': 3.769508123397827} +02/24/2022 07:14:37 - INFO - codeparrot_training - Step 4381: {'lr': 0.0004969705276209856, 'samples': 2243584, 'steps': 4381, 'loss/train': 3.0358543395996094} +02/24/2022 07:14:40 - INFO - codeparrot_training - Step 4382: {'lr': 0.0004969679875380214, 'samples': 2244096, 'steps': 4382, 'loss/train': 3.4092957973480225} +02/24/2022 07:14:46 - INFO - codeparrot_training - Step 4383: {'lr': 0.0004969654463971247, 'samples': 2244608, 'steps': 4383, 'loss/train': 2.6508755683898926} +02/24/2022 07:14:49 - INFO - codeparrot_training - Step 4384: {'lr': 0.0004969629041983065, 'samples': 2245120, 'steps': 4384, 'loss/train': 2.873786211013794} +02/24/2022 07:14:55 - INFO - codeparrot_training - Step 4385: {'lr': 0.0004969603609415777, 'samples': 2245632, 'steps': 4385, 'loss/train': 2.103040933609009} +02/24/2022 07:14:58 - INFO - codeparrot_training - Step 4386: {'lr': 0.000496957816626949, 'samples': 2246144, 'steps': 4386, 'loss/train': 3.001793146133423} +02/24/2022 07:15:04 - INFO - codeparrot_training - Step 4387: {'lr': 0.0004969552712544316, 'samples': 2246656, 'steps': 4387, 'loss/train': 2.502504825592041} +02/24/2022 07:15:07 - INFO - codeparrot_training - Step 4388: {'lr': 0.0004969527248240361, 'samples': 2247168, 'steps': 4388, 'loss/train': 2.7314093112945557} +02/24/2022 07:15:14 - INFO - codeparrot_training - Step 4389: {'lr': 0.0004969501773357736, 'samples': 2247680, 'steps': 4389, 'loss/train': 0.27108290791511536} +02/24/2022 07:15:18 - INFO - codeparrot_training - Step 4390: {'lr': 0.000496947628789655, 'samples': 2248192, 'steps': 4390, 'loss/train': 2.2951478958129883} +02/24/2022 07:15:23 - INFO - codeparrot_training - Step 4391: {'lr': 0.000496945079185691, 'samples': 2248704, 'steps': 4391, 'loss/train': 3.159607410430908} +02/24/2022 07:15:27 - INFO - codeparrot_training - Step 4392: {'lr': 0.0004969425285238928, 'samples': 2249216, 'steps': 4392, 'loss/train': 8.981085777282715} +02/24/2022 07:15:32 - INFO - codeparrot_training - Step 4393: {'lr': 0.0004969399768042713, 'samples': 2249728, 'steps': 4393, 'loss/train': 2.296780824661255} +02/24/2022 07:15:36 - INFO - codeparrot_training - Step 4394: {'lr': 0.0004969374240268373, 'samples': 2250240, 'steps': 4394, 'loss/train': 2.9942376613616943} +02/24/2022 07:15:41 - INFO - codeparrot_training - Step 4395: {'lr': 0.0004969348701916018, 'samples': 2250752, 'steps': 4395, 'loss/train': 3.0727157592773438} +02/24/2022 07:15:45 - INFO - codeparrot_training - Step 4396: {'lr': 0.0004969323152985756, 'samples': 2251264, 'steps': 4396, 'loss/train': 1.7974709272384644} +02/24/2022 07:15:50 - INFO - codeparrot_training - Step 4397: {'lr': 0.0004969297593477699, 'samples': 2251776, 'steps': 4397, 'loss/train': 3.8683183193206787} +02/24/2022 07:15:54 - INFO - codeparrot_training - Step 4398: {'lr': 0.0004969272023391955, 'samples': 2252288, 'steps': 4398, 'loss/train': 3.408257484436035} +02/24/2022 07:15:59 - INFO - codeparrot_training - Step 4399: {'lr': 0.0004969246442728633, 'samples': 2252800, 'steps': 4399, 'loss/train': 2.9289355278015137} +02/24/2022 07:16:03 - INFO - codeparrot_training - Step 4400: {'lr': 0.0004969220851487844, 'samples': 2253312, 'steps': 4400, 'loss/train': 2.622563123703003} +02/24/2022 07:16:10 - INFO - codeparrot_training - Step 4401: {'lr': 0.0004969195249669697, 'samples': 2253824, 'steps': 4401, 'loss/train': 3.680387496948242} +02/24/2022 07:16:13 - INFO - codeparrot_training - Step 4402: {'lr': 0.0004969169637274301, 'samples': 2254336, 'steps': 4402, 'loss/train': 1.7651646137237549} +02/24/2022 07:16:19 - INFO - codeparrot_training - Step 4403: {'lr': 0.0004969144014301767, 'samples': 2254848, 'steps': 4403, 'loss/train': 3.1497962474823} +02/24/2022 07:16:22 - INFO - codeparrot_training - Step 4404: {'lr': 0.0004969118380752205, 'samples': 2255360, 'steps': 4404, 'loss/train': 1.8213385343551636} +02/24/2022 07:16:28 - INFO - codeparrot_training - Step 4405: {'lr': 0.0004969092736625722, 'samples': 2255872, 'steps': 4405, 'loss/train': 1.1485289335250854} +02/24/2022 07:16:31 - INFO - codeparrot_training - Step 4406: {'lr': 0.000496906708192243, 'samples': 2256384, 'steps': 4406, 'loss/train': 2.1710715293884277} +02/24/2022 07:16:37 - INFO - codeparrot_training - Step 4407: {'lr': 0.000496904141664244, 'samples': 2256896, 'steps': 4407, 'loss/train': 4.6458210945129395} +02/24/2022 07:16:40 - INFO - codeparrot_training - Step 4408: {'lr': 0.0004969015740785859, 'samples': 2257408, 'steps': 4408, 'loss/train': 2.5125210285186768} +02/24/2022 07:16:46 - INFO - codeparrot_training - Step 4409: {'lr': 0.00049689900543528, 'samples': 2257920, 'steps': 4409, 'loss/train': 2.992716073989868} +02/24/2022 07:16:49 - INFO - codeparrot_training - Step 4410: {'lr': 0.0004968964357343371, 'samples': 2258432, 'steps': 4410, 'loss/train': 2.815979480743408} +02/24/2022 07:16:55 - INFO - codeparrot_training - Step 4411: {'lr': 0.0004968938649757682, 'samples': 2258944, 'steps': 4411, 'loss/train': 2.444709062576294} +02/24/2022 07:16:58 - INFO - codeparrot_training - Step 4412: {'lr': 0.0004968912931595845, 'samples': 2259456, 'steps': 4412, 'loss/train': 2.953953504562378} +02/24/2022 07:17:04 - INFO - codeparrot_training - Step 4413: {'lr': 0.0004968887202857968, 'samples': 2259968, 'steps': 4413, 'loss/train': 2.799375057220459} +02/24/2022 07:17:07 - INFO - codeparrot_training - Step 4414: {'lr': 0.0004968861463544163, 'samples': 2260480, 'steps': 4414, 'loss/train': 1.8723090887069702} +02/24/2022 07:17:15 - INFO - codeparrot_training - Step 4415: {'lr': 0.0004968835713654538, 'samples': 2260992, 'steps': 4415, 'loss/train': 2.4066226482391357} +02/24/2022 07:17:18 - INFO - codeparrot_training - Step 4416: {'lr': 0.0004968809953189206, 'samples': 2261504, 'steps': 4416, 'loss/train': 1.7056611776351929} +02/24/2022 07:17:24 - INFO - codeparrot_training - Step 4417: {'lr': 0.0004968784182148276, 'samples': 2262016, 'steps': 4417, 'loss/train': 2.899319648742676} +02/24/2022 07:17:27 - INFO - codeparrot_training - Step 4418: {'lr': 0.0004968758400531859, 'samples': 2262528, 'steps': 4418, 'loss/train': 1.9671186208724976} +02/24/2022 07:17:32 - INFO - codeparrot_training - Step 4419: {'lr': 0.0004968732608340064, 'samples': 2263040, 'steps': 4419, 'loss/train': 2.4352123737335205} +02/24/2022 07:17:36 - INFO - codeparrot_training - Step 4420: {'lr': 0.0004968706805573002, 'samples': 2263552, 'steps': 4420, 'loss/train': 3.3830208778381348} +02/24/2022 07:17:42 - INFO - codeparrot_training - Step 4421: {'lr': 0.0004968680992230785, 'samples': 2264064, 'steps': 4421, 'loss/train': 2.5741186141967773} +02/24/2022 07:17:45 - INFO - codeparrot_training - Step 4422: {'lr': 0.0004968655168313522, 'samples': 2264576, 'steps': 4422, 'loss/train': 2.9256064891815186} +02/24/2022 07:17:51 - INFO - codeparrot_training - Step 4423: {'lr': 0.0004968629333821324, 'samples': 2265088, 'steps': 4423, 'loss/train': 2.8239471912384033} +02/24/2022 07:17:54 - INFO - codeparrot_training - Step 4424: {'lr': 0.0004968603488754302, 'samples': 2265600, 'steps': 4424, 'loss/train': 3.7768757343292236} +02/24/2022 07:18:01 - INFO - codeparrot_training - Step 4425: {'lr': 0.0004968577633112566, 'samples': 2266112, 'steps': 4425, 'loss/train': 2.7473161220550537} +02/24/2022 07:18:05 - INFO - codeparrot_training - Step 4426: {'lr': 0.0004968551766896228, 'samples': 2266624, 'steps': 4426, 'loss/train': 3.1793689727783203} +02/24/2022 07:18:10 - INFO - codeparrot_training - Step 4427: {'lr': 0.0004968525890105399, 'samples': 2267136, 'steps': 4427, 'loss/train': 2.7369556427001953} +02/24/2022 07:18:16 - INFO - codeparrot_training - Step 4428: {'lr': 0.0004968500002740187, 'samples': 2267648, 'steps': 4428, 'loss/train': 3.915787696838379} +02/24/2022 07:18:19 - INFO - codeparrot_training - Step 4429: {'lr': 0.0004968474104800706, 'samples': 2268160, 'steps': 4429, 'loss/train': 2.2864506244659424} +02/24/2022 07:18:25 - INFO - codeparrot_training - Step 4430: {'lr': 0.0004968448196287066, 'samples': 2268672, 'steps': 4430, 'loss/train': 2.4751694202423096} +02/24/2022 07:18:28 - INFO - codeparrot_training - Step 4431: {'lr': 0.0004968422277199377, 'samples': 2269184, 'steps': 4431, 'loss/train': 0.6428107023239136} +02/24/2022 07:18:34 - INFO - codeparrot_training - Step 4432: {'lr': 0.000496839634753775, 'samples': 2269696, 'steps': 4432, 'loss/train': 3.907973051071167} +02/24/2022 07:18:37 - INFO - codeparrot_training - Step 4433: {'lr': 0.0004968370407302299, 'samples': 2270208, 'steps': 4433, 'loss/train': 2.3889451026916504} +02/24/2022 07:18:45 - INFO - codeparrot_training - Step 4434: {'lr': 0.0004968344456493132, 'samples': 2270720, 'steps': 4434, 'loss/train': 8.024490356445312} +02/24/2022 07:18:48 - INFO - codeparrot_training - Step 4435: {'lr': 0.000496831849511036, 'samples': 2271232, 'steps': 4435, 'loss/train': 3.2024617195129395} +02/24/2022 07:18:54 - INFO - codeparrot_training - Step 4436: {'lr': 0.0004968292523154096, 'samples': 2271744, 'steps': 4436, 'loss/train': 3.395526885986328} +02/24/2022 07:18:57 - INFO - codeparrot_training - Step 4437: {'lr': 0.0004968266540624452, 'samples': 2272256, 'steps': 4437, 'loss/train': 3.5550262928009033} +02/24/2022 07:19:01 - INFO - codeparrot_training - Step 4438: {'lr': 0.0004968240547521536, 'samples': 2272768, 'steps': 4438, 'loss/train': 4.299802780151367} +02/24/2022 07:19:06 - INFO - codeparrot_training - Step 4439: {'lr': 0.0004968214543845463, 'samples': 2273280, 'steps': 4439, 'loss/train': 2.9327893257141113} +02/24/2022 07:19:10 - INFO - codeparrot_training - Step 4440: {'lr': 0.0004968188529596341, 'samples': 2273792, 'steps': 4440, 'loss/train': 2.998858690261841} +02/24/2022 07:19:15 - INFO - codeparrot_training - Step 4441: {'lr': 0.0004968162504774284, 'samples': 2274304, 'steps': 4441, 'loss/train': 4.053661346435547} +02/24/2022 07:19:19 - INFO - codeparrot_training - Step 4442: {'lr': 0.0004968136469379403, 'samples': 2274816, 'steps': 4442, 'loss/train': 3.4228515625} +02/24/2022 07:19:24 - INFO - codeparrot_training - Step 4443: {'lr': 0.0004968110423411808, 'samples': 2275328, 'steps': 4443, 'loss/train': 2.4554667472839355} +02/24/2022 07:19:28 - INFO - codeparrot_training - Step 4444: {'lr': 0.0004968084366871612, 'samples': 2275840, 'steps': 4444, 'loss/train': 2.481663942337036} +02/24/2022 07:19:34 - INFO - codeparrot_training - Step 4445: {'lr': 0.0004968058299758926, 'samples': 2276352, 'steps': 4445, 'loss/train': 2.463561773300171} +02/24/2022 07:19:37 - INFO - codeparrot_training - Step 4446: {'lr': 0.0004968032222073863, 'samples': 2276864, 'steps': 4446, 'loss/train': 2.4199366569519043} +02/24/2022 07:19:44 - INFO - codeparrot_training - Step 4447: {'lr': 0.0004968006133816532, 'samples': 2277376, 'steps': 4447, 'loss/train': 2.901564359664917} +02/24/2022 07:19:48 - INFO - codeparrot_training - Step 4448: {'lr': 0.0004967980034987048, 'samples': 2277888, 'steps': 4448, 'loss/train': 2.1362946033477783} +02/24/2022 07:19:54 - INFO - codeparrot_training - Step 4449: {'lr': 0.0004967953925585521, 'samples': 2278400, 'steps': 4449, 'loss/train': 3.142230987548828} +02/24/2022 07:19:57 - INFO - codeparrot_training - Step 4450: {'lr': 0.0004967927805612063, 'samples': 2278912, 'steps': 4450, 'loss/train': 3.0722169876098633} +02/24/2022 07:20:03 - INFO - codeparrot_training - Step 4451: {'lr': 0.0004967901675066784, 'samples': 2279424, 'steps': 4451, 'loss/train': 3.438680410385132} +02/24/2022 07:20:06 - INFO - codeparrot_training - Step 4452: {'lr': 0.0004967875533949801, 'samples': 2279936, 'steps': 4452, 'loss/train': 1.9792863130569458} +02/24/2022 07:20:12 - INFO - codeparrot_training - Step 4453: {'lr': 0.000496784938226122, 'samples': 2280448, 'steps': 4453, 'loss/train': 1.7001590728759766} +02/24/2022 07:20:15 - INFO - codeparrot_training - Step 4454: {'lr': 0.0004967823220001158, 'samples': 2280960, 'steps': 4454, 'loss/train': 1.9118536710739136} +02/24/2022 07:20:20 - INFO - codeparrot_training - Step 4455: {'lr': 0.0004967797047169724, 'samples': 2281472, 'steps': 4455, 'loss/train': 0.26020562648773193} +02/24/2022 07:20:24 - INFO - codeparrot_training - Step 4456: {'lr': 0.0004967770863767031, 'samples': 2281984, 'steps': 4456, 'loss/train': 2.5394480228424072} +02/24/2022 07:20:29 - INFO - codeparrot_training - Step 4457: {'lr': 0.0004967744669793192, 'samples': 2282496, 'steps': 4457, 'loss/train': 3.2624049186706543} +02/24/2022 07:20:33 - INFO - codeparrot_training - Step 4458: {'lr': 0.0004967718465248317, 'samples': 2283008, 'steps': 4458, 'loss/train': 2.592275857925415} +02/24/2022 07:20:38 - INFO - codeparrot_training - Step 4459: {'lr': 0.000496769225013252, 'samples': 2283520, 'steps': 4459, 'loss/train': 2.834155797958374} +02/24/2022 07:20:42 - INFO - codeparrot_training - Step 4460: {'lr': 0.0004967666024445913, 'samples': 2284032, 'steps': 4460, 'loss/train': 3.2586326599121094} +02/24/2022 07:20:47 - INFO - codeparrot_training - Step 4461: {'lr': 0.000496763978818861, 'samples': 2284544, 'steps': 4461, 'loss/train': 2.9925897121429443} +02/24/2022 07:20:51 - INFO - codeparrot_training - Step 4462: {'lr': 0.000496761354136072, 'samples': 2285056, 'steps': 4462, 'loss/train': 2.9752209186553955} +02/24/2022 07:20:58 - INFO - codeparrot_training - Step 4463: {'lr': 0.0004967587283962358, 'samples': 2285568, 'steps': 4463, 'loss/train': 2.2733354568481445} +02/24/2022 07:21:02 - INFO - codeparrot_training - Step 4464: {'lr': 0.0004967561015993635, 'samples': 2286080, 'steps': 4464, 'loss/train': 3.6289825439453125} +02/24/2022 07:21:07 - INFO - codeparrot_training - Step 4465: {'lr': 0.0004967534737454665, 'samples': 2286592, 'steps': 4465, 'loss/train': 2.0765228271484375} +02/24/2022 07:21:11 - INFO - codeparrot_training - Step 4466: {'lr': 0.000496750844834556, 'samples': 2287104, 'steps': 4466, 'loss/train': 3.049776554107666} +02/24/2022 07:21:16 - INFO - codeparrot_training - Step 4467: {'lr': 0.000496748214866643, 'samples': 2287616, 'steps': 4467, 'loss/train': 0.45040270686149597} +02/24/2022 07:21:20 - INFO - codeparrot_training - Step 4468: {'lr': 0.0004967455838417392, 'samples': 2288128, 'steps': 4468, 'loss/train': 2.2237250804901123} +02/24/2022 07:21:25 - INFO - codeparrot_training - Step 4469: {'lr': 0.0004967429517598556, 'samples': 2288640, 'steps': 4469, 'loss/train': 2.066601037979126} +02/24/2022 07:21:29 - INFO - codeparrot_training - Step 4470: {'lr': 0.0004967403186210036, 'samples': 2289152, 'steps': 4470, 'loss/train': 3.095932960510254} +02/24/2022 07:21:34 - INFO - codeparrot_training - Step 4471: {'lr': 0.0004967376844251944, 'samples': 2289664, 'steps': 4471, 'loss/train': 5.598101615905762} +02/24/2022 07:21:38 - INFO - codeparrot_training - Step 4472: {'lr': 0.0004967350491724392, 'samples': 2290176, 'steps': 4472, 'loss/train': 2.4305169582366943} +02/24/2022 07:21:45 - INFO - codeparrot_training - Step 4473: {'lr': 0.0004967324128627495, 'samples': 2290688, 'steps': 4473, 'loss/train': 3.0877225399017334} +02/24/2022 07:21:48 - INFO - codeparrot_training - Step 4474: {'lr': 0.0004967297754961365, 'samples': 2291200, 'steps': 4474, 'loss/train': 1.8233672380447388} +02/24/2022 07:21:54 - INFO - codeparrot_training - Step 4475: {'lr': 0.0004967271370726115, 'samples': 2291712, 'steps': 4475, 'loss/train': 2.399275302886963} +02/24/2022 07:21:57 - INFO - codeparrot_training - Step 4476: {'lr': 0.0004967244975921857, 'samples': 2292224, 'steps': 4476, 'loss/train': 3.2833547592163086} +02/24/2022 07:22:03 - INFO - codeparrot_training - Step 4477: {'lr': 0.0004967218570548706, 'samples': 2292736, 'steps': 4477, 'loss/train': 2.1094300746917725} +02/24/2022 07:22:06 - INFO - codeparrot_training - Step 4478: {'lr': 0.0004967192154606774, 'samples': 2293248, 'steps': 4478, 'loss/train': 2.0430593490600586} +02/24/2022 07:22:12 - INFO - codeparrot_training - Step 4479: {'lr': 0.0004967165728096172, 'samples': 2293760, 'steps': 4479, 'loss/train': 3.1830222606658936} +02/24/2022 07:22:15 - INFO - codeparrot_training - Step 4480: {'lr': 0.0004967139291017018, 'samples': 2294272, 'steps': 4480, 'loss/train': 1.9834043979644775} +02/24/2022 07:22:21 - INFO - codeparrot_training - Step 4481: {'lr': 0.0004967112843369423, 'samples': 2294784, 'steps': 4481, 'loss/train': 3.3287105560302734} +02/24/2022 07:22:24 - INFO - codeparrot_training - Step 4482: {'lr': 0.0004967086385153499, 'samples': 2295296, 'steps': 4482, 'loss/train': 2.6500513553619385} +02/24/2022 07:22:30 - INFO - codeparrot_training - Step 4483: {'lr': 0.0004967059916369359, 'samples': 2295808, 'steps': 4483, 'loss/train': 3.129227876663208} +02/24/2022 07:22:34 - INFO - codeparrot_training - Step 4484: {'lr': 0.000496703343701712, 'samples': 2296320, 'steps': 4484, 'loss/train': 2.439460277557373} +02/24/2022 07:22:39 - INFO - codeparrot_training - Step 4485: {'lr': 0.0004967006947096892, 'samples': 2296832, 'steps': 4485, 'loss/train': 2.4997289180755615} +02/24/2022 07:22:43 - INFO - codeparrot_training - Step 4486: {'lr': 0.0004966980446608789, 'samples': 2297344, 'steps': 4486, 'loss/train': 3.0099687576293945} +02/24/2022 07:22:48 - INFO - codeparrot_training - Step 4487: {'lr': 0.0004966953935552925, 'samples': 2297856, 'steps': 4487, 'loss/train': 2.946878433227539} +02/24/2022 07:22:52 - INFO - codeparrot_training - Step 4488: {'lr': 0.0004966927413929415, 'samples': 2298368, 'steps': 4488, 'loss/train': 2.7749521732330322} +02/24/2022 07:22:57 - INFO - codeparrot_training - Step 4489: {'lr': 0.0004966900881738371, 'samples': 2298880, 'steps': 4489, 'loss/train': 2.306493043899536} +02/24/2022 07:23:01 - INFO - codeparrot_training - Step 4490: {'lr': 0.0004966874338979907, 'samples': 2299392, 'steps': 4490, 'loss/train': 3.477994441986084} +02/24/2022 07:23:06 - INFO - codeparrot_training - Step 4491: {'lr': 0.0004966847785654136, 'samples': 2299904, 'steps': 4491, 'loss/train': 3.2932941913604736} +02/24/2022 07:23:10 - INFO - codeparrot_training - Step 4492: {'lr': 0.0004966821221761173, 'samples': 2300416, 'steps': 4492, 'loss/train': 2.67145037651062} +02/24/2022 07:23:16 - INFO - codeparrot_training - Step 4493: {'lr': 0.0004966794647301131, 'samples': 2300928, 'steps': 4493, 'loss/train': 2.603548765182495} +02/24/2022 07:23:20 - INFO - codeparrot_training - Step 4494: {'lr': 0.0004966768062274125, 'samples': 2301440, 'steps': 4494, 'loss/train': 7.907590866088867} +02/24/2022 07:23:25 - INFO - codeparrot_training - Step 4495: {'lr': 0.0004966741466680266, 'samples': 2301952, 'steps': 4495, 'loss/train': 2.716430902481079} +02/24/2022 07:23:28 - INFO - codeparrot_training - Step 4496: {'lr': 0.000496671486051967, 'samples': 2302464, 'steps': 4496, 'loss/train': 3.171616315841675} +02/24/2022 07:23:34 - INFO - codeparrot_training - Step 4497: {'lr': 0.0004966688243792452, 'samples': 2302976, 'steps': 4497, 'loss/train': 2.0503103733062744} +02/24/2022 07:23:37 - INFO - codeparrot_training - Step 4498: {'lr': 0.0004966661616498724, 'samples': 2303488, 'steps': 4498, 'loss/train': 2.387420654296875} +02/24/2022 07:23:43 - INFO - codeparrot_training - Step 4499: {'lr': 0.0004966634978638601, 'samples': 2304000, 'steps': 4499, 'loss/train': 2.9485855102539062} +02/24/2022 07:23:46 - INFO - codeparrot_training - Step 4500: {'lr': 0.0004966608330212198, 'samples': 2304512, 'steps': 4500, 'loss/train': 2.670931816101074} +02/24/2022 07:23:52 - INFO - codeparrot_training - Step 4501: {'lr': 0.0004966581671219627, 'samples': 2305024, 'steps': 4501, 'loss/train': 3.4441380500793457} +02/24/2022 07:23:55 - INFO - codeparrot_training - Step 4502: {'lr': 0.0004966555001661004, 'samples': 2305536, 'steps': 4502, 'loss/train': 2.946645975112915} +02/24/2022 07:24:02 - INFO - codeparrot_training - Step 4503: {'lr': 0.0004966528321536442, 'samples': 2306048, 'steps': 4503, 'loss/train': 1.6140245199203491} +02/24/2022 07:24:05 - INFO - codeparrot_training - Step 4504: {'lr': 0.0004966501630846057, 'samples': 2306560, 'steps': 4504, 'loss/train': 7.584186553955078} +02/24/2022 07:24:10 - INFO - codeparrot_training - Step 4505: {'lr': 0.000496647492958996, 'samples': 2307072, 'steps': 4505, 'loss/train': 2.899822950363159} +02/24/2022 07:24:16 - INFO - codeparrot_training - Step 4506: {'lr': 0.000496644821776827, 'samples': 2307584, 'steps': 4506, 'loss/train': 1.99951171875} +02/24/2022 07:24:19 - INFO - codeparrot_training - Step 4507: {'lr': 0.0004966421495381098, 'samples': 2308096, 'steps': 4507, 'loss/train': 2.502471923828125} +02/24/2022 07:24:25 - INFO - codeparrot_training - Step 4508: {'lr': 0.0004966394762428559, 'samples': 2308608, 'steps': 4508, 'loss/train': 1.67020845413208} +02/24/2022 07:24:29 - INFO - codeparrot_training - Step 4509: {'lr': 0.0004966368018910768, 'samples': 2309120, 'steps': 4509, 'loss/train': 3.101428747177124} +02/24/2022 07:24:34 - INFO - codeparrot_training - Step 4510: {'lr': 0.000496634126482784, 'samples': 2309632, 'steps': 4510, 'loss/train': 1.0595159530639648} +02/24/2022 07:24:38 - INFO - codeparrot_training - Step 4511: {'lr': 0.000496631450017989, 'samples': 2310144, 'steps': 4511, 'loss/train': 0.08816852420568466} +02/24/2022 07:24:43 - INFO - codeparrot_training - Step 4512: {'lr': 0.0004966287724967032, 'samples': 2310656, 'steps': 4512, 'loss/train': 1.6504418849945068} +02/24/2022 07:24:47 - INFO - codeparrot_training - Step 4513: {'lr': 0.0004966260939189379, 'samples': 2311168, 'steps': 4513, 'loss/train': 1.6694890260696411} +02/24/2022 07:24:52 - INFO - codeparrot_training - Step 4514: {'lr': 0.0004966234142847048, 'samples': 2311680, 'steps': 4514, 'loss/train': 2.1192007064819336} +02/24/2022 07:24:56 - INFO - codeparrot_training - Step 4515: {'lr': 0.0004966207335940153, 'samples': 2312192, 'steps': 4515, 'loss/train': 3.1064274311065674} +02/24/2022 07:25:01 - INFO - codeparrot_training - Step 4516: {'lr': 0.0004966180518468808, 'samples': 2312704, 'steps': 4516, 'loss/train': 2.8479433059692383} +02/24/2022 07:25:05 - INFO - codeparrot_training - Step 4517: {'lr': 0.000496615369043313, 'samples': 2313216, 'steps': 4517, 'loss/train': 2.3023681640625} +02/24/2022 07:25:11 - INFO - codeparrot_training - Step 4518: {'lr': 0.0004966126851833233, 'samples': 2313728, 'steps': 4518, 'loss/train': 3.0059397220611572} +02/24/2022 07:25:14 - INFO - codeparrot_training - Step 4519: {'lr': 0.0004966100002669231, 'samples': 2314240, 'steps': 4519, 'loss/train': 1.8635908365249634} +02/24/2022 07:25:20 - INFO - codeparrot_training - Step 4520: {'lr': 0.0004966073142941239, 'samples': 2314752, 'steps': 4520, 'loss/train': 3.673736095428467} +02/24/2022 07:25:23 - INFO - codeparrot_training - Step 4521: {'lr': 0.0004966046272649372, 'samples': 2315264, 'steps': 4521, 'loss/train': 2.7314493656158447} +02/24/2022 07:25:29 - INFO - codeparrot_training - Step 4522: {'lr': 0.0004966019391793748, 'samples': 2315776, 'steps': 4522, 'loss/train': 1.319679856300354} +02/24/2022 07:25:32 - INFO - codeparrot_training - Step 4523: {'lr': 0.0004965992500374479, 'samples': 2316288, 'steps': 4523, 'loss/train': 3.444967031478882} +02/24/2022 07:25:38 - INFO - codeparrot_training - Step 4524: {'lr': 0.0004965965598391682, 'samples': 2316800, 'steps': 4524, 'loss/train': 3.1193411350250244} +02/24/2022 07:25:41 - INFO - codeparrot_training - Step 4525: {'lr': 0.000496593868584547, 'samples': 2317312, 'steps': 4525, 'loss/train': 2.2373745441436768} +02/24/2022 07:25:47 - INFO - codeparrot_training - Step 4526: {'lr': 0.0004965911762735961, 'samples': 2317824, 'steps': 4526, 'loss/train': 3.0222339630126953} +02/24/2022 07:25:51 - INFO - codeparrot_training - Step 4527: {'lr': 0.0004965884829063268, 'samples': 2318336, 'steps': 4527, 'loss/train': 2.2678277492523193} +02/24/2022 07:25:57 - INFO - codeparrot_training - Step 4528: {'lr': 0.0004965857884827508, 'samples': 2318848, 'steps': 4528, 'loss/train': 1.9348386526107788} +02/24/2022 07:26:00 - INFO - codeparrot_training - Step 4529: {'lr': 0.0004965830930028795, 'samples': 2319360, 'steps': 4529, 'loss/train': 3.5817720890045166} +02/24/2022 07:26:06 - INFO - codeparrot_training - Step 4530: {'lr': 0.0004965803964667246, 'samples': 2319872, 'steps': 4530, 'loss/train': 2.1504907608032227} +02/24/2022 07:26:09 - INFO - codeparrot_training - Step 4531: {'lr': 0.0004965776988742976, 'samples': 2320384, 'steps': 4531, 'loss/train': 2.881620168685913} +02/24/2022 07:26:15 - INFO - codeparrot_training - Step 4532: {'lr': 0.00049657500022561, 'samples': 2320896, 'steps': 4532, 'loss/train': 2.845905303955078} +02/24/2022 07:26:18 - INFO - codeparrot_training - Step 4533: {'lr': 0.0004965723005206734, 'samples': 2321408, 'steps': 4533, 'loss/train': 3.8281946182250977} +02/24/2022 07:26:24 - INFO - codeparrot_training - Step 4534: {'lr': 0.0004965695997594993, 'samples': 2321920, 'steps': 4534, 'loss/train': 3.05188250541687} +02/24/2022 07:26:27 - INFO - codeparrot_training - Step 4535: {'lr': 0.0004965668979420994, 'samples': 2322432, 'steps': 4535, 'loss/train': 1.9769772291183472} +02/24/2022 07:26:33 - INFO - codeparrot_training - Step 4536: {'lr': 0.0004965641950684852, 'samples': 2322944, 'steps': 4536, 'loss/train': 2.2932844161987305} +02/24/2022 07:26:36 - INFO - codeparrot_training - Step 4537: {'lr': 0.0004965614911386683, 'samples': 2323456, 'steps': 4537, 'loss/train': 2.8208165168762207} +02/24/2022 07:26:42 - INFO - codeparrot_training - Step 4538: {'lr': 0.0004965587861526602, 'samples': 2323968, 'steps': 4538, 'loss/train': 2.6197359561920166} +02/24/2022 07:26:45 - INFO - codeparrot_training - Step 4539: {'lr': 0.0004965560801104726, 'samples': 2324480, 'steps': 4539, 'loss/train': 2.682002067565918} +02/24/2022 07:26:51 - INFO - codeparrot_training - Step 4540: {'lr': 0.000496553373012117, 'samples': 2324992, 'steps': 4540, 'loss/train': 2.820742130279541} +02/24/2022 07:26:54 - INFO - codeparrot_training - Step 4541: {'lr': 0.0004965506648576052, 'samples': 2325504, 'steps': 4541, 'loss/train': 0.8161208629608154} +02/24/2022 07:27:00 - INFO - codeparrot_training - Step 4542: {'lr': 0.0004965479556469485, 'samples': 2326016, 'steps': 4542, 'loss/train': 2.9415500164031982} +02/24/2022 07:27:03 - INFO - codeparrot_training - Step 4543: {'lr': 0.0004965452453801586, 'samples': 2326528, 'steps': 4543, 'loss/train': 2.601675271987915} +02/24/2022 07:27:09 - INFO - codeparrot_training - Step 4544: {'lr': 0.0004965425340572472, 'samples': 2327040, 'steps': 4544, 'loss/train': 2.377925157546997} +02/24/2022 07:27:12 - INFO - codeparrot_training - Step 4545: {'lr': 0.0004965398216782258, 'samples': 2327552, 'steps': 4545, 'loss/train': 2.7842869758605957} +02/24/2022 07:27:18 - INFO - codeparrot_training - Step 4546: {'lr': 0.0004965371082431062, 'samples': 2328064, 'steps': 4546, 'loss/train': 3.3167741298675537} +02/24/2022 07:27:21 - INFO - codeparrot_training - Step 4547: {'lr': 0.0004965343937519, 'samples': 2328576, 'steps': 4547, 'loss/train': 2.836545705795288} +02/24/2022 07:27:28 - INFO - codeparrot_training - Step 4548: {'lr': 0.0004965316782046186, 'samples': 2329088, 'steps': 4548, 'loss/train': 2.41920804977417} +02/24/2022 07:27:31 - INFO - codeparrot_training - Step 4549: {'lr': 0.0004965289616012739, 'samples': 2329600, 'steps': 4549, 'loss/train': 2.9270546436309814} +02/24/2022 07:27:37 - INFO - codeparrot_training - Step 4550: {'lr': 0.0004965262439418772, 'samples': 2330112, 'steps': 4550, 'loss/train': 1.860337495803833} +02/24/2022 07:27:40 - INFO - codeparrot_training - Step 4551: {'lr': 0.0004965235252264405, 'samples': 2330624, 'steps': 4551, 'loss/train': 2.1710102558135986} +02/24/2022 07:27:46 - INFO - codeparrot_training - Step 4552: {'lr': 0.0004965208054549753, 'samples': 2331136, 'steps': 4552, 'loss/train': 3.1194827556610107} +02/24/2022 07:27:49 - INFO - codeparrot_training - Step 4553: {'lr': 0.0004965180846274931, 'samples': 2331648, 'steps': 4553, 'loss/train': 2.0802981853485107} +02/24/2022 07:27:55 - INFO - codeparrot_training - Step 4554: {'lr': 0.0004965153627440058, 'samples': 2332160, 'steps': 4554, 'loss/train': 1.9774519205093384} +02/24/2022 07:27:59 - INFO - codeparrot_training - Step 4555: {'lr': 0.000496512639804525, 'samples': 2332672, 'steps': 4555, 'loss/train': 8.99859619140625} +02/24/2022 07:28:02 - INFO - codeparrot_training - Step 4556: {'lr': 0.0004965099158090624, 'samples': 2333184, 'steps': 4556, 'loss/train': 1.8740646839141846} +02/24/2022 07:28:07 - INFO - codeparrot_training - Step 4557: {'lr': 0.0004965071907576294, 'samples': 2333696, 'steps': 4557, 'loss/train': 3.6951892375946045} +02/24/2022 07:28:13 - INFO - codeparrot_training - Step 4558: {'lr': 0.000496504464650238, 'samples': 2334208, 'steps': 4558, 'loss/train': 2.408580780029297} +02/24/2022 07:28:16 - INFO - codeparrot_training - Step 4559: {'lr': 0.0004965017374868997, 'samples': 2334720, 'steps': 4559, 'loss/train': 2.156168222427368} +02/24/2022 07:28:22 - INFO - codeparrot_training - Step 4560: {'lr': 0.0004964990092676262, 'samples': 2335232, 'steps': 4560, 'loss/train': 2.1013872623443604} +02/24/2022 07:28:25 - INFO - codeparrot_training - Step 4561: {'lr': 0.0004964962799924293, 'samples': 2335744, 'steps': 4561, 'loss/train': 3.039947748184204} +02/24/2022 07:28:31 - INFO - codeparrot_training - Step 4562: {'lr': 0.0004964935496613206, 'samples': 2336256, 'steps': 4562, 'loss/train': 2.92911696434021} +02/24/2022 07:28:34 - INFO - codeparrot_training - Step 4563: {'lr': 0.0004964908182743117, 'samples': 2336768, 'steps': 4563, 'loss/train': 2.880499839782715} +02/24/2022 07:28:40 - INFO - codeparrot_training - Step 4564: {'lr': 0.0004964880858314146, 'samples': 2337280, 'steps': 4564, 'loss/train': 1.8258914947509766} +02/24/2022 07:28:44 - INFO - codeparrot_training - Step 4565: {'lr': 0.0004964853523326406, 'samples': 2337792, 'steps': 4565, 'loss/train': 2.7664577960968018} +02/24/2022 07:28:50 - INFO - codeparrot_training - Step 4566: {'lr': 0.0004964826177780017, 'samples': 2338304, 'steps': 4566, 'loss/train': 1.474591612815857} +02/24/2022 07:28:53 - INFO - codeparrot_training - Step 4567: {'lr': 0.0004964798821675096, 'samples': 2338816, 'steps': 4567, 'loss/train': 2.8581604957580566} +02/24/2022 07:28:58 - INFO - codeparrot_training - Step 4568: {'lr': 0.0004964771455011758, 'samples': 2339328, 'steps': 4568, 'loss/train': 2.927184581756592} +02/24/2022 07:29:02 - INFO - codeparrot_training - Step 4569: {'lr': 0.0004964744077790123, 'samples': 2339840, 'steps': 4569, 'loss/train': 1.7048866748809814} +02/24/2022 07:29:08 - INFO - codeparrot_training - Step 4570: {'lr': 0.0004964716690010306, 'samples': 2340352, 'steps': 4570, 'loss/train': 2.103135108947754} +02/24/2022 07:29:11 - INFO - codeparrot_training - Step 4571: {'lr': 0.0004964689291672427, 'samples': 2340864, 'steps': 4571, 'loss/train': 2.819431781768799} +02/24/2022 07:29:16 - INFO - codeparrot_training - Step 4572: {'lr': 0.00049646618827766, 'samples': 2341376, 'steps': 4572, 'loss/train': 2.6605138778686523} +02/24/2022 07:29:20 - INFO - codeparrot_training - Step 4573: {'lr': 0.0004964634463322945, 'samples': 2341888, 'steps': 4573, 'loss/train': 1.7473955154418945} +02/24/2022 07:29:25 - INFO - codeparrot_training - Step 4574: {'lr': 0.0004964607033311579, 'samples': 2342400, 'steps': 4574, 'loss/train': 3.4722418785095215} +02/24/2022 07:29:29 - INFO - codeparrot_training - Step 4575: {'lr': 0.0004964579592742618, 'samples': 2342912, 'steps': 4575, 'loss/train': 3.3197989463806152} +02/24/2022 07:29:35 - INFO - codeparrot_training - Step 4576: {'lr': 0.000496455214161618, 'samples': 2343424, 'steps': 4576, 'loss/train': 1.1324106454849243} +02/24/2022 07:29:39 - INFO - codeparrot_training - Step 4577: {'lr': 0.0004964524679932385, 'samples': 2343936, 'steps': 4577, 'loss/train': 2.899233102798462} +02/24/2022 07:29:44 - INFO - codeparrot_training - Step 4578: {'lr': 0.0004964497207691349, 'samples': 2344448, 'steps': 4578, 'loss/train': 2.8270816802978516} +02/24/2022 07:29:48 - INFO - codeparrot_training - Step 4579: {'lr': 0.0004964469724893188, 'samples': 2344960, 'steps': 4579, 'loss/train': 2.535975217819214} +02/24/2022 07:29:53 - INFO - codeparrot_training - Step 4580: {'lr': 0.0004964442231538023, 'samples': 2345472, 'steps': 4580, 'loss/train': 2.6434524059295654} +02/24/2022 07:29:57 - INFO - codeparrot_training - Step 4581: {'lr': 0.0004964414727625968, 'samples': 2345984, 'steps': 4581, 'loss/train': 2.275078535079956} +02/24/2022 07:30:02 - INFO - codeparrot_training - Step 4582: {'lr': 0.0004964387213157143, 'samples': 2346496, 'steps': 4582, 'loss/train': 2.6823956966400146} +02/24/2022 07:30:06 - INFO - codeparrot_training - Step 4583: {'lr': 0.0004964359688131667, 'samples': 2347008, 'steps': 4583, 'loss/train': 1.7113174200057983} +02/24/2022 07:30:11 - INFO - codeparrot_training - Step 4584: {'lr': 0.0004964332152549657, 'samples': 2347520, 'steps': 4584, 'loss/train': 2.531528949737549} +02/24/2022 07:30:15 - INFO - codeparrot_training - Step 4585: {'lr': 0.0004964304606411229, 'samples': 2348032, 'steps': 4585, 'loss/train': 2.952152967453003} +02/24/2022 07:30:21 - INFO - codeparrot_training - Step 4586: {'lr': 0.0004964277049716503, 'samples': 2348544, 'steps': 4586, 'loss/train': 2.2497806549072266} +02/24/2022 07:30:24 - INFO - codeparrot_training - Step 4587: {'lr': 0.0004964249482465597, 'samples': 2349056, 'steps': 4587, 'loss/train': 3.607583522796631} +02/24/2022 07:30:30 - INFO - codeparrot_training - Step 4588: {'lr': 0.0004964221904658629, 'samples': 2349568, 'steps': 4588, 'loss/train': 3.379906177520752} +02/24/2022 07:30:33 - INFO - codeparrot_training - Step 4589: {'lr': 0.0004964194316295716, 'samples': 2350080, 'steps': 4589, 'loss/train': 2.5294156074523926} +02/24/2022 07:30:39 - INFO - codeparrot_training - Step 4590: {'lr': 0.0004964166717376978, 'samples': 2350592, 'steps': 4590, 'loss/train': 1.7820571660995483} +02/24/2022 07:30:42 - INFO - codeparrot_training - Step 4591: {'lr': 0.0004964139107902531, 'samples': 2351104, 'steps': 4591, 'loss/train': 3.2617321014404297} +02/24/2022 07:30:48 - INFO - codeparrot_training - Step 4592: {'lr': 0.0004964111487872495, 'samples': 2351616, 'steps': 4592, 'loss/train': 2.0228817462921143} +02/24/2022 07:30:51 - INFO - codeparrot_training - Step 4593: {'lr': 0.0004964083857286988, 'samples': 2352128, 'steps': 4593, 'loss/train': 1.648618221282959} +02/24/2022 07:30:56 - INFO - codeparrot_training - Step 4594: {'lr': 0.0004964056216146129, 'samples': 2352640, 'steps': 4594, 'loss/train': 3.0626327991485596} +02/24/2022 07:31:00 - INFO - codeparrot_training - Step 4595: {'lr': 0.0004964028564450034, 'samples': 2353152, 'steps': 4595, 'loss/train': 2.530766248703003} +02/24/2022 07:31:06 - INFO - codeparrot_training - Step 4596: {'lr': 0.0004964000902198824, 'samples': 2353664, 'steps': 4596, 'loss/train': 1.1859678030014038} +02/24/2022 07:31:09 - INFO - codeparrot_training - Step 4597: {'lr': 0.0004963973229392617, 'samples': 2354176, 'steps': 4597, 'loss/train': 2.338047504425049} +02/24/2022 07:31:14 - INFO - codeparrot_training - Step 4598: {'lr': 0.0004963945546031529, 'samples': 2354688, 'steps': 4598, 'loss/train': 3.690566301345825} +02/24/2022 07:31:18 - INFO - codeparrot_training - Step 4599: {'lr': 0.0004963917852115683, 'samples': 2355200, 'steps': 4599, 'loss/train': 2.787959337234497} +02/24/2022 07:31:24 - INFO - codeparrot_training - Step 4600: {'lr': 0.0004963890147645194, 'samples': 2355712, 'steps': 4600, 'loss/train': 0.9729641079902649} +02/24/2022 07:31:27 - INFO - codeparrot_training - Step 4601: {'lr': 0.0004963862432620183, 'samples': 2356224, 'steps': 4601, 'loss/train': 1.9808101654052734} +02/24/2022 07:31:33 - INFO - codeparrot_training - Step 4602: {'lr': 0.0004963834707040767, 'samples': 2356736, 'steps': 4602, 'loss/train': 2.3603668212890625} +02/24/2022 07:31:36 - INFO - codeparrot_training - Step 4603: {'lr': 0.0004963806970907066, 'samples': 2357248, 'steps': 4603, 'loss/train': 2.557182788848877} +02/24/2022 07:31:42 - INFO - codeparrot_training - Step 4604: {'lr': 0.0004963779224219197, 'samples': 2357760, 'steps': 4604, 'loss/train': 8.10139274597168} +02/24/2022 07:31:45 - INFO - codeparrot_training - Step 4605: {'lr': 0.0004963751466977281, 'samples': 2358272, 'steps': 4605, 'loss/train': 2.8341751098632812} +02/24/2022 07:31:51 - INFO - codeparrot_training - Step 4606: {'lr': 0.0004963723699181437, 'samples': 2358784, 'steps': 4606, 'loss/train': 4.430930137634277} +02/24/2022 07:31:54 - INFO - codeparrot_training - Step 4607: {'lr': 0.0004963695920831781, 'samples': 2359296, 'steps': 4607, 'loss/train': 2.1612918376922607} +02/24/2022 07:32:00 - INFO - codeparrot_training - Step 4608: {'lr': 0.0004963668131928436, 'samples': 2359808, 'steps': 4608, 'loss/train': 2.2669763565063477} +02/24/2022 07:32:03 - INFO - codeparrot_training - Step 4609: {'lr': 0.0004963640332471518, 'samples': 2360320, 'steps': 4609, 'loss/train': 2.771120548248291} +02/24/2022 07:32:09 - INFO - codeparrot_training - Step 4610: {'lr': 0.0004963612522461147, 'samples': 2360832, 'steps': 4610, 'loss/train': 3.9943385124206543} +02/24/2022 07:32:13 - INFO - codeparrot_training - Step 4611: {'lr': 0.0004963584701897443, 'samples': 2361344, 'steps': 4611, 'loss/train': 2.767150402069092} +02/24/2022 07:32:19 - INFO - codeparrot_training - Step 4612: {'lr': 0.0004963556870780523, 'samples': 2361856, 'steps': 4612, 'loss/train': 2.933321475982666} +02/24/2022 07:32:22 - INFO - codeparrot_training - Step 4613: {'lr': 0.0004963529029110509, 'samples': 2362368, 'steps': 4613, 'loss/train': 3.2616207599639893} +02/24/2022 07:32:28 - INFO - codeparrot_training - Step 4614: {'lr': 0.0004963501176887519, 'samples': 2362880, 'steps': 4614, 'loss/train': 3.9431509971618652} +02/24/2022 07:32:31 - INFO - codeparrot_training - Step 4615: {'lr': 0.000496347331411167, 'samples': 2363392, 'steps': 4615, 'loss/train': 1.420654535293579} +02/24/2022 07:32:37 - INFO - codeparrot_training - Step 4616: {'lr': 0.0004963445440783086, 'samples': 2363904, 'steps': 4616, 'loss/train': 3.3908069133758545} +02/24/2022 07:32:40 - INFO - codeparrot_training - Step 4617: {'lr': 0.0004963417556901882, 'samples': 2364416, 'steps': 4617, 'loss/train': 2.4702248573303223} +02/24/2022 07:32:46 - INFO - codeparrot_training - Step 4618: {'lr': 0.0004963389662468182, 'samples': 2364928, 'steps': 4618, 'loss/train': 2.7041637897491455} +02/24/2022 07:32:49 - INFO - codeparrot_training - Step 4619: {'lr': 0.0004963361757482101, 'samples': 2365440, 'steps': 4619, 'loss/train': 1.2446062564849854} +02/24/2022 07:32:55 - INFO - codeparrot_training - Step 4620: {'lr': 0.000496333384194376, 'samples': 2365952, 'steps': 4620, 'loss/train': 3.6648197174072266} +02/24/2022 07:32:58 - INFO - codeparrot_training - Step 4621: {'lr': 0.000496330591585328, 'samples': 2366464, 'steps': 4621, 'loss/train': 2.9328489303588867} +02/24/2022 07:33:05 - INFO - codeparrot_training - Step 4622: {'lr': 0.0004963277979210779, 'samples': 2366976, 'steps': 4622, 'loss/train': 2.2918753623962402} +02/24/2022 07:33:08 - INFO - codeparrot_training - Step 4623: {'lr': 0.0004963250032016379, 'samples': 2367488, 'steps': 4623, 'loss/train': 2.744584083557129} +02/24/2022 07:33:14 - INFO - codeparrot_training - Step 4624: {'lr': 0.0004963222074270197, 'samples': 2368000, 'steps': 4624, 'loss/train': 1.826094627380371} +02/24/2022 07:33:19 - INFO - codeparrot_training - Step 4625: {'lr': 0.0004963194105972353, 'samples': 2368512, 'steps': 4625, 'loss/train': 2.5796639919281006} +02/24/2022 07:33:22 - INFO - codeparrot_training - Step 4626: {'lr': 0.0004963166127122969, 'samples': 2369024, 'steps': 4626, 'loss/train': 2.089270830154419} +02/24/2022 07:33:28 - INFO - codeparrot_training - Step 4627: {'lr': 0.0004963138137722161, 'samples': 2369536, 'steps': 4627, 'loss/train': 2.129268169403076} +02/24/2022 07:33:31 - INFO - codeparrot_training - Step 4628: {'lr': 0.0004963110137770054, 'samples': 2370048, 'steps': 4628, 'loss/train': 2.4956653118133545} +02/24/2022 07:33:37 - INFO - codeparrot_training - Step 4629: {'lr': 0.0004963082127266764, 'samples': 2370560, 'steps': 4629, 'loss/train': 1.6712114810943604} +02/24/2022 07:33:40 - INFO - codeparrot_training - Step 4630: {'lr': 0.0004963054106212414, 'samples': 2371072, 'steps': 4630, 'loss/train': 3.452554225921631} +02/24/2022 07:33:47 - INFO - codeparrot_training - Step 4631: {'lr': 0.000496302607460712, 'samples': 2371584, 'steps': 4631, 'loss/train': 3.349071502685547} +02/24/2022 07:33:51 - INFO - codeparrot_training - Step 4632: {'lr': 0.0004962998032451005, 'samples': 2372096, 'steps': 4632, 'loss/train': 1.8830987215042114} +02/24/2022 07:33:56 - INFO - codeparrot_training - Step 4633: {'lr': 0.0004962969979744189, 'samples': 2372608, 'steps': 4633, 'loss/train': 1.2372952699661255} +02/24/2022 07:34:00 - INFO - codeparrot_training - Step 4634: {'lr': 0.0004962941916486791, 'samples': 2373120, 'steps': 4634, 'loss/train': 1.7662454843521118} +02/24/2022 07:34:05 - INFO - codeparrot_training - Step 4635: {'lr': 0.0004962913842678934, 'samples': 2373632, 'steps': 4635, 'loss/train': 2.36586332321167} +02/24/2022 07:34:09 - INFO - codeparrot_training - Step 4636: {'lr': 0.0004962885758320734, 'samples': 2374144, 'steps': 4636, 'loss/train': 6.441001892089844} +02/24/2022 07:34:14 - INFO - codeparrot_training - Step 4637: {'lr': 0.0004962857663412314, 'samples': 2374656, 'steps': 4637, 'loss/train': 3.0188519954681396} +02/24/2022 07:34:17 - INFO - codeparrot_training - Step 4638: {'lr': 0.0004962829557953794, 'samples': 2375168, 'steps': 4638, 'loss/train': 2.7057178020477295} +02/24/2022 07:34:23 - INFO - codeparrot_training - Step 4639: {'lr': 0.0004962801441945293, 'samples': 2375680, 'steps': 4639, 'loss/train': 2.828700065612793} +02/24/2022 07:34:27 - INFO - codeparrot_training - Step 4640: {'lr': 0.0004962773315386935, 'samples': 2376192, 'steps': 4640, 'loss/train': 2.5407299995422363} +02/24/2022 07:34:33 - INFO - codeparrot_training - Step 4641: {'lr': 0.0004962745178278837, 'samples': 2376704, 'steps': 4641, 'loss/train': 0.6332015991210938} +02/24/2022 07:34:36 - INFO - codeparrot_training - Step 4642: {'lr': 0.000496271703062112, 'samples': 2377216, 'steps': 4642, 'loss/train': 3.1000168323516846} +02/24/2022 07:34:42 - INFO - codeparrot_training - Step 4643: {'lr': 0.0004962688872413906, 'samples': 2377728, 'steps': 4643, 'loss/train': 2.8571813106536865} +02/24/2022 07:34:45 - INFO - codeparrot_training - Step 4644: {'lr': 0.0004962660703657315, 'samples': 2378240, 'steps': 4644, 'loss/train': 2.7817742824554443} +02/24/2022 07:34:51 - INFO - codeparrot_training - Step 4645: {'lr': 0.0004962632524351467, 'samples': 2378752, 'steps': 4645, 'loss/train': 2.790558099746704} +02/24/2022 07:34:54 - INFO - codeparrot_training - Step 4646: {'lr': 0.0004962604334496483, 'samples': 2379264, 'steps': 4646, 'loss/train': 2.701981782913208} +02/24/2022 07:35:00 - INFO - codeparrot_training - Step 4647: {'lr': 0.0004962576134092485, 'samples': 2379776, 'steps': 4647, 'loss/train': 3.595661163330078} +02/24/2022 07:35:03 - INFO - codeparrot_training - Step 4648: {'lr': 0.0004962547923139592, 'samples': 2380288, 'steps': 4648, 'loss/train': 2.838186025619507} +02/24/2022 07:35:09 - INFO - codeparrot_training - Step 4649: {'lr': 0.0004962519701637926, 'samples': 2380800, 'steps': 4649, 'loss/train': 3.0065674781799316} +02/24/2022 07:35:12 - INFO - codeparrot_training - Step 4650: {'lr': 0.0004962491469587607, 'samples': 2381312, 'steps': 4650, 'loss/train': 2.068279981613159} +02/24/2022 07:35:18 - INFO - codeparrot_training - Step 4651: {'lr': 0.0004962463226988758, 'samples': 2381824, 'steps': 4651, 'loss/train': 1.4434808492660522} +02/24/2022 07:35:21 - INFO - codeparrot_training - Step 4652: {'lr': 0.0004962434973841497, 'samples': 2382336, 'steps': 4652, 'loss/train': 2.6536037921905518} +02/24/2022 07:35:27 - INFO - codeparrot_training - Step 4653: {'lr': 0.0004962406710145946, 'samples': 2382848, 'steps': 4653, 'loss/train': 1.1333259344100952} +02/24/2022 07:35:30 - INFO - codeparrot_training - Step 4654: {'lr': 0.0004962378435902228, 'samples': 2383360, 'steps': 4654, 'loss/train': 2.8344967365264893} +02/24/2022 07:35:36 - INFO - codeparrot_training - Step 4655: {'lr': 0.0004962350151110461, 'samples': 2383872, 'steps': 4655, 'loss/train': 2.6559736728668213} +02/24/2022 07:35:40 - INFO - codeparrot_training - Step 4656: {'lr': 0.0004962321855770769, 'samples': 2384384, 'steps': 4656, 'loss/train': 3.159492254257202} +02/24/2022 07:35:46 - INFO - codeparrot_training - Step 4657: {'lr': 0.0004962293549883273, 'samples': 2384896, 'steps': 4657, 'loss/train': 2.220210552215576} +02/24/2022 07:35:49 - INFO - codeparrot_training - Step 4658: {'lr': 0.0004962265233448092, 'samples': 2385408, 'steps': 4658, 'loss/train': 1.3266245126724243} +02/24/2022 07:35:55 - INFO - codeparrot_training - Step 4659: {'lr': 0.0004962236906465349, 'samples': 2385920, 'steps': 4659, 'loss/train': 1.2635562419891357} +02/24/2022 07:35:58 - INFO - codeparrot_training - Step 4660: {'lr': 0.0004962208568935164, 'samples': 2386432, 'steps': 4660, 'loss/train': 1.07157564163208} +02/24/2022 07:36:04 - INFO - codeparrot_training - Step 4661: {'lr': 0.000496218022085766, 'samples': 2386944, 'steps': 4661, 'loss/train': 2.566544532775879} +02/24/2022 07:36:07 - INFO - codeparrot_training - Step 4662: {'lr': 0.0004962151862232958, 'samples': 2387456, 'steps': 4662, 'loss/train': 3.157766103744507} +02/24/2022 07:36:13 - INFO - codeparrot_training - Step 4663: {'lr': 0.000496212349306118, 'samples': 2387968, 'steps': 4663, 'loss/train': 1.444319725036621} +02/24/2022 07:36:16 - INFO - codeparrot_training - Step 4664: {'lr': 0.0004962095113342445, 'samples': 2388480, 'steps': 4664, 'loss/train': 3.160513401031494} +02/24/2022 07:36:22 - INFO - codeparrot_training - Step 4665: {'lr': 0.0004962066723076878, 'samples': 2388992, 'steps': 4665, 'loss/train': 3.587603807449341} +02/24/2022 07:36:25 - INFO - codeparrot_training - Step 4666: {'lr': 0.0004962038322264598, 'samples': 2389504, 'steps': 4666, 'loss/train': 2.4390740394592285} +02/24/2022 07:36:31 - INFO - codeparrot_training - Step 4667: {'lr': 0.0004962009910905728, 'samples': 2390016, 'steps': 4667, 'loss/train': 2.259981632232666} +02/24/2022 07:36:35 - INFO - codeparrot_training - Step 4668: {'lr': 0.0004961981489000389, 'samples': 2390528, 'steps': 4668, 'loss/train': 1.3510923385620117} +02/24/2022 07:36:40 - INFO - codeparrot_training - Step 4669: {'lr': 0.0004961953056548703, 'samples': 2391040, 'steps': 4669, 'loss/train': 2.9432783126831055} +02/24/2022 07:36:44 - INFO - codeparrot_training - Step 4670: {'lr': 0.0004961924613550793, 'samples': 2391552, 'steps': 4670, 'loss/train': 3.6587672233581543} +02/24/2022 07:36:49 - INFO - codeparrot_training - Step 4671: {'lr': 0.0004961896160006778, 'samples': 2392064, 'steps': 4671, 'loss/train': 2.511075973510742} +02/24/2022 07:36:53 - INFO - codeparrot_training - Step 4672: {'lr': 0.0004961867695916782, 'samples': 2392576, 'steps': 4672, 'loss/train': 1.8166834115982056} +02/24/2022 07:36:58 - INFO - codeparrot_training - Step 4673: {'lr': 0.0004961839221280927, 'samples': 2393088, 'steps': 4673, 'loss/train': 0.8847571611404419} +02/24/2022 07:37:02 - INFO - codeparrot_training - Step 4674: {'lr': 0.0004961810736099334, 'samples': 2393600, 'steps': 4674, 'loss/train': 0.10410743951797485} +02/24/2022 07:37:07 - INFO - codeparrot_training - Step 4675: {'lr': 0.0004961782240372126, 'samples': 2394112, 'steps': 4675, 'loss/train': 3.4463889598846436} +02/24/2022 07:37:11 - INFO - codeparrot_training - Step 4676: {'lr': 0.0004961753734099425, 'samples': 2394624, 'steps': 4676, 'loss/train': 1.7875908613204956} +02/24/2022 07:37:17 - INFO - codeparrot_training - Step 4677: {'lr': 0.0004961725217281352, 'samples': 2395136, 'steps': 4677, 'loss/train': 3.275717258453369} +02/24/2022 07:37:20 - INFO - codeparrot_training - Step 4678: {'lr': 0.0004961696689918029, 'samples': 2395648, 'steps': 4678, 'loss/train': 1.3053271770477295} +02/24/2022 07:37:26 - INFO - codeparrot_training - Step 4679: {'lr': 0.0004961668152009581, 'samples': 2396160, 'steps': 4679, 'loss/train': 1.3668562173843384} +02/24/2022 07:37:29 - INFO - codeparrot_training - Step 4680: {'lr': 0.0004961639603556127, 'samples': 2396672, 'steps': 4680, 'loss/train': 1.2597614526748657} +02/24/2022 07:37:35 - INFO - codeparrot_training - Step 4681: {'lr': 0.0004961611044557792, 'samples': 2397184, 'steps': 4681, 'loss/train': 4.308879852294922} +02/24/2022 07:37:38 - INFO - codeparrot_training - Step 4682: {'lr': 0.0004961582475014695, 'samples': 2397696, 'steps': 4682, 'loss/train': 1.345685362815857} +02/24/2022 07:37:44 - INFO - codeparrot_training - Step 4683: {'lr': 0.0004961553894926961, 'samples': 2398208, 'steps': 4683, 'loss/train': 2.627019166946411} +02/24/2022 07:37:47 - INFO - codeparrot_training - Step 4684: {'lr': 0.0004961525304294712, 'samples': 2398720, 'steps': 4684, 'loss/train': 3.0345568656921387} +02/24/2022 07:37:53 - INFO - codeparrot_training - Step 4685: {'lr': 0.000496149670311807, 'samples': 2399232, 'steps': 4685, 'loss/train': 1.9586538076400757} +02/24/2022 07:37:56 - INFO - codeparrot_training - Step 4686: {'lr': 0.0004961468091397158, 'samples': 2399744, 'steps': 4686, 'loss/train': 2.8094279766082764} +02/24/2022 07:38:02 - INFO - codeparrot_training - Step 4687: {'lr': 0.0004961439469132098, 'samples': 2400256, 'steps': 4687, 'loss/train': 1.7700942754745483} +02/24/2022 07:38:05 - INFO - codeparrot_training - Step 4688: {'lr': 0.0004961410836323014, 'samples': 2400768, 'steps': 4688, 'loss/train': 2.9086923599243164} +02/24/2022 07:38:11 - INFO - codeparrot_training - Step 4689: {'lr': 0.0004961382192970027, 'samples': 2401280, 'steps': 4689, 'loss/train': 3.96055006980896} +02/24/2022 07:38:15 - INFO - codeparrot_training - Step 4690: {'lr': 0.0004961353539073258, 'samples': 2401792, 'steps': 4690, 'loss/train': 2.3432586193084717} +02/24/2022 07:38:20 - INFO - codeparrot_training - Step 4691: {'lr': 0.0004961324874632835, 'samples': 2402304, 'steps': 4691, 'loss/train': 1.7943015098571777} +02/24/2022 07:38:24 - INFO - codeparrot_training - Step 4692: {'lr': 0.0004961296199648877, 'samples': 2402816, 'steps': 4692, 'loss/train': 2.798440456390381} +02/24/2022 07:38:29 - INFO - codeparrot_training - Step 4693: {'lr': 0.0004961267514121507, 'samples': 2403328, 'steps': 4693, 'loss/train': 0.7034985423088074} +02/24/2022 07:38:33 - INFO - codeparrot_training - Step 4694: {'lr': 0.0004961238818050849, 'samples': 2403840, 'steps': 4694, 'loss/train': 1.9754117727279663} +02/24/2022 07:38:38 - INFO - codeparrot_training - Step 4695: {'lr': 0.0004961210111437026, 'samples': 2404352, 'steps': 4695, 'loss/train': 1.690004587173462} +02/24/2022 07:38:42 - INFO - codeparrot_training - Step 4696: {'lr': 0.0004961181394280159, 'samples': 2404864, 'steps': 4696, 'loss/train': 0.06594810634851456} +02/24/2022 07:38:47 - INFO - codeparrot_training - Step 4697: {'lr': 0.0004961152666580373, 'samples': 2405376, 'steps': 4697, 'loss/train': 1.377718210220337} +02/24/2022 07:38:51 - INFO - codeparrot_training - Step 4698: {'lr': 0.0004961123928337791, 'samples': 2405888, 'steps': 4698, 'loss/train': 2.882286787033081} +02/24/2022 07:38:56 - INFO - codeparrot_training - Step 4699: {'lr': 0.0004961095179552535, 'samples': 2406400, 'steps': 4699, 'loss/train': 2.091817855834961} +02/24/2022 07:39:00 - INFO - codeparrot_training - Step 4700: {'lr': 0.0004961066420224729, 'samples': 2406912, 'steps': 4700, 'loss/train': 2.072143316268921} +02/24/2022 07:39:05 - INFO - codeparrot_training - Step 4701: {'lr': 0.0004961037650354496, 'samples': 2407424, 'steps': 4701, 'loss/train': 2.9821889400482178} +02/24/2022 07:39:08 - INFO - codeparrot_training - Step 4702: {'lr': 0.0004961008869941959, 'samples': 2407936, 'steps': 4702, 'loss/train': 2.035592555999756} +02/24/2022 07:39:15 - INFO - codeparrot_training - Step 4703: {'lr': 0.0004960980078987241, 'samples': 2408448, 'steps': 4703, 'loss/train': 9.838709831237793} +02/24/2022 07:39:18 - INFO - codeparrot_training - Step 4704: {'lr': 0.0004960951277490467, 'samples': 2408960, 'steps': 4704, 'loss/train': 2.8876585960388184} +02/24/2022 07:39:24 - INFO - codeparrot_training - Step 4705: {'lr': 0.0004960922465451758, 'samples': 2409472, 'steps': 4705, 'loss/train': 2.791590929031372} +02/24/2022 07:39:27 - INFO - codeparrot_training - Step 4706: {'lr': 0.0004960893642871239, 'samples': 2409984, 'steps': 4706, 'loss/train': 3.412191867828369} +02/24/2022 07:39:33 - INFO - codeparrot_training - Step 4707: {'lr': 0.0004960864809749034, 'samples': 2410496, 'steps': 4707, 'loss/train': 0.14033402502536774} +02/24/2022 07:39:36 - INFO - codeparrot_training - Step 4708: {'lr': 0.0004960835966085264, 'samples': 2411008, 'steps': 4708, 'loss/train': 3.120861053466797} +02/24/2022 07:39:42 - INFO - codeparrot_training - Step 4709: {'lr': 0.0004960807111880055, 'samples': 2411520, 'steps': 4709, 'loss/train': 1.9810534715652466} +02/24/2022 07:39:45 - INFO - codeparrot_training - Step 4710: {'lr': 0.000496077824713353, 'samples': 2412032, 'steps': 4710, 'loss/train': 2.2970259189605713} +02/24/2022 07:39:51 - INFO - codeparrot_training - Step 4711: {'lr': 0.0004960749371845812, 'samples': 2412544, 'steps': 4711, 'loss/train': 2.2321465015411377} +02/24/2022 07:39:54 - INFO - codeparrot_training - Step 4712: {'lr': 0.0004960720486017025, 'samples': 2413056, 'steps': 4712, 'loss/train': 3.103388786315918} +02/24/2022 07:40:00 - INFO - codeparrot_training - Step 4713: {'lr': 0.0004960691589647292, 'samples': 2413568, 'steps': 4713, 'loss/train': 2.979495048522949} +02/24/2022 07:40:04 - INFO - codeparrot_training - Step 4714: {'lr': 0.0004960662682736739, 'samples': 2414080, 'steps': 4714, 'loss/train': 3.1824088096618652} +02/24/2022 07:40:09 - INFO - codeparrot_training - Step 4715: {'lr': 0.0004960633765285487, 'samples': 2414592, 'steps': 4715, 'loss/train': 2.706354856491089} +02/24/2022 07:40:13 - INFO - codeparrot_training - Step 4716: {'lr': 0.0004960604837293663, 'samples': 2415104, 'steps': 4716, 'loss/train': 2.2891921997070312} +02/24/2022 07:40:18 - INFO - codeparrot_training - Step 4717: {'lr': 0.0004960575898761388, 'samples': 2415616, 'steps': 4717, 'loss/train': 4.0091447830200195} +02/24/2022 07:40:24 - INFO - codeparrot_training - Step 4718: {'lr': 0.0004960546949688788, 'samples': 2416128, 'steps': 4718, 'loss/train': 2.0257010459899902} +02/24/2022 07:40:27 - INFO - codeparrot_training - Step 4719: {'lr': 0.0004960517990075985, 'samples': 2416640, 'steps': 4719, 'loss/train': 2.9779868125915527} +02/24/2022 07:40:33 - INFO - codeparrot_training - Step 4720: {'lr': 0.0004960489019923105, 'samples': 2417152, 'steps': 4720, 'loss/train': 1.8966171741485596} +02/24/2022 07:40:36 - INFO - codeparrot_training - Step 4721: {'lr': 0.0004960460039230271, 'samples': 2417664, 'steps': 4721, 'loss/train': 0.7914944291114807} +02/24/2022 07:40:43 - INFO - codeparrot_training - Step 4722: {'lr': 0.0004960431047997608, 'samples': 2418176, 'steps': 4722, 'loss/train': 2.8662352561950684} +02/24/2022 07:40:46 - INFO - codeparrot_training - Step 4723: {'lr': 0.0004960402046225239, 'samples': 2418688, 'steps': 4723, 'loss/train': 2.5579631328582764} +02/24/2022 07:40:50 - INFO - codeparrot_training - Step 4724: {'lr': 0.0004960373033913289, 'samples': 2419200, 'steps': 4724, 'loss/train': 1.533189058303833} +02/24/2022 07:40:55 - INFO - codeparrot_training - Step 4725: {'lr': 0.0004960344011061882, 'samples': 2419712, 'steps': 4725, 'loss/train': 2.4321110248565674} +02/24/2022 07:41:01 - INFO - codeparrot_training - Step 4726: {'lr': 0.0004960314977671144, 'samples': 2420224, 'steps': 4726, 'loss/train': 2.3497602939605713} +02/24/2022 07:41:04 - INFO - codeparrot_training - Step 4727: {'lr': 0.0004960285933741196, 'samples': 2420736, 'steps': 4727, 'loss/train': 2.9004337787628174} +02/24/2022 07:41:10 - INFO - codeparrot_training - Step 4728: {'lr': 0.0004960256879272166, 'samples': 2421248, 'steps': 4728, 'loss/train': 3.0766336917877197} +02/24/2022 07:41:13 - INFO - codeparrot_training - Step 4729: {'lr': 0.0004960227814264175, 'samples': 2421760, 'steps': 4729, 'loss/train': 2.4607222080230713} +02/24/2022 07:41:19 - INFO - codeparrot_training - Step 4730: {'lr': 0.0004960198738717351, 'samples': 2422272, 'steps': 4730, 'loss/train': 2.8965721130371094} +02/24/2022 07:41:22 - INFO - codeparrot_training - Step 4731: {'lr': 0.0004960169652631815, 'samples': 2422784, 'steps': 4731, 'loss/train': 2.2890076637268066} +02/24/2022 07:41:28 - INFO - codeparrot_training - Step 4732: {'lr': 0.0004960140556007695, 'samples': 2423296, 'steps': 4732, 'loss/train': 1.5802545547485352} +02/24/2022 07:41:31 - INFO - codeparrot_training - Step 4733: {'lr': 0.0004960111448845114, 'samples': 2423808, 'steps': 4733, 'loss/train': 2.5724680423736572} +02/24/2022 07:41:37 - INFO - codeparrot_training - Step 4734: {'lr': 0.0004960082331144195, 'samples': 2424320, 'steps': 4734, 'loss/train': 2.1319644451141357} +02/24/2022 07:41:41 - INFO - codeparrot_training - Step 4735: {'lr': 0.0004960053202905066, 'samples': 2424832, 'steps': 4735, 'loss/train': 2.4284427165985107} +02/24/2022 07:41:46 - INFO - codeparrot_training - Step 4736: {'lr': 0.0004960024064127849, 'samples': 2425344, 'steps': 4736, 'loss/train': 2.9356603622436523} +02/24/2022 07:41:50 - INFO - codeparrot_training - Step 4737: {'lr': 0.0004959994914812671, 'samples': 2425856, 'steps': 4737, 'loss/train': 2.7840700149536133} +02/24/2022 07:41:55 - INFO - codeparrot_training - Step 4738: {'lr': 0.0004959965754959656, 'samples': 2426368, 'steps': 4738, 'loss/train': 2.49600887298584} +02/24/2022 07:41:59 - INFO - codeparrot_training - Step 4739: {'lr': 0.0004959936584568928, 'samples': 2426880, 'steps': 4739, 'loss/train': 2.3466830253601074} +02/24/2022 07:42:04 - INFO - codeparrot_training - Step 4740: {'lr': 0.0004959907403640614, 'samples': 2427392, 'steps': 4740, 'loss/train': 2.574820041656494} +02/24/2022 07:42:07 - INFO - codeparrot_training - Step 4741: {'lr': 0.0004959878212174837, 'samples': 2427904, 'steps': 4741, 'loss/train': 1.8421263694763184} +02/24/2022 07:42:13 - INFO - codeparrot_training - Step 4742: {'lr': 0.0004959849010171723, 'samples': 2428416, 'steps': 4742, 'loss/train': 1.4805868864059448} +02/24/2022 07:42:16 - INFO - codeparrot_training - Step 4743: {'lr': 0.0004959819797631397, 'samples': 2428928, 'steps': 4743, 'loss/train': 2.4147841930389404} +02/24/2022 07:42:22 - INFO - codeparrot_training - Step 4744: {'lr': 0.0004959790574553984, 'samples': 2429440, 'steps': 4744, 'loss/train': 3.531266689300537} +02/24/2022 07:42:26 - INFO - codeparrot_training - Step 4745: {'lr': 0.000495976134093961, 'samples': 2429952, 'steps': 4745, 'loss/train': 9.151768684387207} +02/24/2022 07:42:31 - INFO - codeparrot_training - Step 4746: {'lr': 0.0004959732096788398, 'samples': 2430464, 'steps': 4746, 'loss/train': 0.6532483696937561} +02/24/2022 07:42:35 - INFO - codeparrot_training - Step 4747: {'lr': 0.0004959702842100475, 'samples': 2430976, 'steps': 4747, 'loss/train': 2.721266984939575} +02/24/2022 07:42:41 - INFO - codeparrot_training - Step 4748: {'lr': 0.0004959673576875967, 'samples': 2431488, 'steps': 4748, 'loss/train': 3.1963508129119873} +02/24/2022 07:42:44 - INFO - codeparrot_training - Step 4749: {'lr': 0.0004959644301114998, 'samples': 2432000, 'steps': 4749, 'loss/train': 2.7351653575897217} +02/24/2022 07:42:50 - INFO - codeparrot_training - Step 4750: {'lr': 0.0004959615014817694, 'samples': 2432512, 'steps': 4750, 'loss/train': 3.450247049331665} +02/24/2022 07:42:53 - INFO - codeparrot_training - Step 4751: {'lr': 0.000495958571798418, 'samples': 2433024, 'steps': 4751, 'loss/train': 3.1717827320098877} +02/24/2022 07:42:57 - INFO - codeparrot_training - Step 4752: {'lr': 0.0004959556410614582, 'samples': 2433536, 'steps': 4752, 'loss/train': 3.368734836578369} +02/24/2022 07:43:02 - INFO - codeparrot_training - Step 4753: {'lr': 0.0004959527092709026, 'samples': 2434048, 'steps': 4753, 'loss/train': 2.6499545574188232} +02/24/2022 07:43:06 - INFO - codeparrot_training - Step 4754: {'lr': 0.0004959497764267636, 'samples': 2434560, 'steps': 4754, 'loss/train': 2.5016541481018066} +02/24/2022 07:43:12 - INFO - codeparrot_training - Step 4755: {'lr': 0.0004959468425290537, 'samples': 2435072, 'steps': 4755, 'loss/train': 2.6367087364196777} +02/24/2022 07:43:15 - INFO - codeparrot_training - Step 4756: {'lr': 0.0004959439075777858, 'samples': 2435584, 'steps': 4756, 'loss/train': 1.3696736097335815} +02/24/2022 07:43:20 - INFO - codeparrot_training - Step 4757: {'lr': 0.0004959409715729723, 'samples': 2436096, 'steps': 4757, 'loss/train': 3.7594516277313232} +02/24/2022 07:43:24 - INFO - codeparrot_training - Step 4758: {'lr': 0.0004959380345146258, 'samples': 2436608, 'steps': 4758, 'loss/train': 2.6778650283813477} +02/24/2022 07:43:29 - INFO - codeparrot_training - Step 4759: {'lr': 0.0004959350964027588, 'samples': 2437120, 'steps': 4759, 'loss/train': 2.489854335784912} +02/24/2022 07:43:33 - INFO - codeparrot_training - Step 4760: {'lr': 0.000495932157237384, 'samples': 2437632, 'steps': 4760, 'loss/train': 1.8308134078979492} +02/24/2022 07:43:39 - INFO - codeparrot_training - Step 4761: {'lr': 0.0004959292170185139, 'samples': 2438144, 'steps': 4761, 'loss/train': 2.295203685760498} +02/24/2022 07:43:42 - INFO - codeparrot_training - Step 4762: {'lr': 0.0004959262757461611, 'samples': 2438656, 'steps': 4762, 'loss/train': 1.9961360692977905} +02/24/2022 07:43:48 - INFO - codeparrot_training - Step 4763: {'lr': 0.0004959233334203382, 'samples': 2439168, 'steps': 4763, 'loss/train': 2.186973810195923} +02/24/2022 07:43:51 - INFO - codeparrot_training - Step 4764: {'lr': 0.0004959203900410579, 'samples': 2439680, 'steps': 4764, 'loss/train': 1.625566840171814} +02/24/2022 07:43:57 - INFO - codeparrot_training - Step 4765: {'lr': 0.0004959174456083327, 'samples': 2440192, 'steps': 4765, 'loss/train': 2.9025657176971436} +02/24/2022 07:44:00 - INFO - codeparrot_training - Step 4766: {'lr': 0.0004959145001221752, 'samples': 2440704, 'steps': 4766, 'loss/train': 2.9964284896850586} +02/24/2022 07:44:06 - INFO - codeparrot_training - Step 4767: {'lr': 0.0004959115535825982, 'samples': 2441216, 'steps': 4767, 'loss/train': 3.570746660232544} +02/24/2022 07:44:09 - INFO - codeparrot_training - Step 4768: {'lr': 0.000495908605989614, 'samples': 2441728, 'steps': 4768, 'loss/train': 3.0606961250305176} +02/24/2022 07:44:15 - INFO - codeparrot_training - Step 4769: {'lr': 0.0004959056573432357, 'samples': 2442240, 'steps': 4769, 'loss/train': 3.297722816467285} +02/24/2022 07:44:18 - INFO - codeparrot_training - Step 4770: {'lr': 0.0004959027076434754, 'samples': 2442752, 'steps': 4770, 'loss/train': 2.21748423576355} +02/24/2022 07:44:24 - INFO - codeparrot_training - Step 4771: {'lr': 0.000495899756890346, 'samples': 2443264, 'steps': 4771, 'loss/train': 2.501883029937744} +02/24/2022 07:44:28 - INFO - codeparrot_training - Step 4772: {'lr': 0.0004958968050838603, 'samples': 2443776, 'steps': 4772, 'loss/train': 9.372735023498535} +02/24/2022 07:44:33 - INFO - codeparrot_training - Step 4773: {'lr': 0.0004958938522240306, 'samples': 2444288, 'steps': 4773, 'loss/train': 3.2129766941070557} +02/24/2022 07:44:37 - INFO - codeparrot_training - Step 4774: {'lr': 0.0004958908983108697, 'samples': 2444800, 'steps': 4774, 'loss/train': 2.83373761177063} +02/24/2022 07:44:42 - INFO - codeparrot_training - Step 4775: {'lr': 0.0004958879433443903, 'samples': 2445312, 'steps': 4775, 'loss/train': 1.9512957334518433} +02/24/2022 07:44:48 - INFO - codeparrot_training - Step 4776: {'lr': 0.0004958849873246051, 'samples': 2445824, 'steps': 4776, 'loss/train': 3.2849318981170654} +02/24/2022 07:44:51 - INFO - codeparrot_training - Step 4777: {'lr': 0.0004958820302515268, 'samples': 2446336, 'steps': 4777, 'loss/train': 3.2047841548919678} +02/24/2022 07:44:57 - INFO - codeparrot_training - Step 4778: {'lr': 0.0004958790721251678, 'samples': 2446848, 'steps': 4778, 'loss/train': 3.4885566234588623} +02/24/2022 07:45:00 - INFO - codeparrot_training - Step 4779: {'lr': 0.000495876112945541, 'samples': 2447360, 'steps': 4779, 'loss/train': 2.5625088214874268} +02/24/2022 07:45:06 - INFO - codeparrot_training - Step 4780: {'lr': 0.0004958731527126589, 'samples': 2447872, 'steps': 4780, 'loss/train': 1.7746145725250244} +02/24/2022 07:45:10 - INFO - codeparrot_training - Step 4781: {'lr': 0.0004958701914265344, 'samples': 2448384, 'steps': 4781, 'loss/train': 2.9764153957366943} +02/24/2022 07:45:15 - INFO - codeparrot_training - Step 4782: {'lr': 0.0004958672290871799, 'samples': 2448896, 'steps': 4782, 'loss/train': 1.6381715536117554} +02/24/2022 07:45:19 - INFO - codeparrot_training - Step 4783: {'lr': 0.0004958642656946084, 'samples': 2449408, 'steps': 4783, 'loss/train': 2.3984715938568115} +02/24/2022 07:45:24 - INFO - codeparrot_training - Step 4784: {'lr': 0.0004958613012488324, 'samples': 2449920, 'steps': 4784, 'loss/train': 3.122859239578247} +02/24/2022 07:45:28 - INFO - codeparrot_training - Step 4785: {'lr': 0.0004958583357498647, 'samples': 2450432, 'steps': 4785, 'loss/train': 3.215186834335327} +02/24/2022 07:45:33 - INFO - codeparrot_training - Step 4786: {'lr': 0.000495855369197718, 'samples': 2450944, 'steps': 4786, 'loss/train': 2.622568130493164} +02/24/2022 07:45:37 - INFO - codeparrot_training - Step 4787: {'lr': 0.0004958524015924048, 'samples': 2451456, 'steps': 4787, 'loss/train': 2.390336751937866} +02/24/2022 07:45:42 - INFO - codeparrot_training - Step 4788: {'lr': 0.0004958494329339382, 'samples': 2451968, 'steps': 4788, 'loss/train': 2.356473445892334} +02/24/2022 07:45:46 - INFO - codeparrot_training - Step 4789: {'lr': 0.0004958464632223306, 'samples': 2452480, 'steps': 4789, 'loss/train': 2.064406394958496} +02/24/2022 07:45:51 - INFO - codeparrot_training - Step 4790: {'lr': 0.0004958434924575947, 'samples': 2452992, 'steps': 4790, 'loss/train': 2.5083656311035156} +02/24/2022 07:45:55 - INFO - codeparrot_training - Step 4791: {'lr': 0.0004958405206397434, 'samples': 2453504, 'steps': 4791, 'loss/train': 2.026231050491333} +02/24/2022 07:46:00 - INFO - codeparrot_training - Step 4792: {'lr': 0.0004958375477687896, 'samples': 2454016, 'steps': 4792, 'loss/train': 1.7246315479278564} +02/24/2022 07:46:04 - INFO - codeparrot_training - Step 4793: {'lr': 0.0004958345738447456, 'samples': 2454528, 'steps': 4793, 'loss/train': 1.6558524370193481} +02/24/2022 07:46:09 - INFO - codeparrot_training - Step 4794: {'lr': 0.0004958315988676244, 'samples': 2455040, 'steps': 4794, 'loss/train': 3.594749689102173} +02/24/2022 07:46:13 - INFO - codeparrot_training - Step 4795: {'lr': 0.0004958286228374387, 'samples': 2455552, 'steps': 4795, 'loss/train': 2.831083059310913} +02/24/2022 07:46:19 - INFO - codeparrot_training - Step 4796: {'lr': 0.0004958256457542011, 'samples': 2456064, 'steps': 4796, 'loss/train': 1.8108325004577637} +02/24/2022 07:46:23 - INFO - codeparrot_training - Step 4797: {'lr': 0.0004958226676179246, 'samples': 2456576, 'steps': 4797, 'loss/train': 3.0596425533294678} +02/24/2022 07:46:28 - INFO - codeparrot_training - Step 4798: {'lr': 0.0004958196884286218, 'samples': 2457088, 'steps': 4798, 'loss/train': 2.8492672443389893} +02/24/2022 07:46:32 - INFO - codeparrot_training - Step 4799: {'lr': 0.0004958167081863057, 'samples': 2457600, 'steps': 4799, 'loss/train': 2.4905967712402344} +02/24/2022 07:46:37 - INFO - codeparrot_training - Step 4800: {'lr': 0.0004958137268909887, 'samples': 2458112, 'steps': 4800, 'loss/train': 1.6208795309066772} +02/24/2022 07:46:41 - INFO - codeparrot_training - Step 4801: {'lr': 0.0004958107445426838, 'samples': 2458624, 'steps': 4801, 'loss/train': 2.433300256729126} +02/24/2022 07:46:46 - INFO - codeparrot_training - Step 4802: {'lr': 0.0004958077611414037, 'samples': 2459136, 'steps': 4802, 'loss/train': 2.9399757385253906} +02/24/2022 07:46:50 - INFO - codeparrot_training - Step 4803: {'lr': 0.0004958047766871612, 'samples': 2459648, 'steps': 4803, 'loss/train': 2.5095489025115967} +02/24/2022 07:46:55 - INFO - codeparrot_training - Step 4804: {'lr': 0.000495801791179969, 'samples': 2460160, 'steps': 4804, 'loss/train': 2.0438201427459717} +02/24/2022 07:46:59 - INFO - codeparrot_training - Step 4805: {'lr': 0.0004957988046198401, 'samples': 2460672, 'steps': 4805, 'loss/train': 2.9106061458587646} +02/24/2022 07:47:05 - INFO - codeparrot_training - Step 4806: {'lr': 0.0004957958170067872, 'samples': 2461184, 'steps': 4806, 'loss/train': 3.167832612991333} +02/24/2022 07:47:08 - INFO - codeparrot_training - Step 4807: {'lr': 0.000495792828340823, 'samples': 2461696, 'steps': 4807, 'loss/train': 3.0617074966430664} +02/24/2022 07:47:14 - INFO - codeparrot_training - Step 4808: {'lr': 0.0004957898386219603, 'samples': 2462208, 'steps': 4808, 'loss/train': 2.184767007827759} +02/24/2022 07:47:17 - INFO - codeparrot_training - Step 4809: {'lr': 0.0004957868478502121, 'samples': 2462720, 'steps': 4809, 'loss/train': 3.0218400955200195} +02/24/2022 07:47:23 - INFO - codeparrot_training - Step 4810: {'lr': 0.0004957838560255911, 'samples': 2463232, 'steps': 4810, 'loss/train': 3.329874277114868} +02/24/2022 07:47:26 - INFO - codeparrot_training - Step 4811: {'lr': 0.0004957808631481101, 'samples': 2463744, 'steps': 4811, 'loss/train': 4.094907283782959} +02/24/2022 07:47:32 - INFO - codeparrot_training - Step 4812: {'lr': 0.0004957778692177819, 'samples': 2464256, 'steps': 4812, 'loss/train': 1.6182080507278442} +02/24/2022 07:47:35 - INFO - codeparrot_training - Step 4813: {'lr': 0.0004957748742346193, 'samples': 2464768, 'steps': 4813, 'loss/train': 2.0050013065338135} +02/24/2022 07:47:41 - INFO - codeparrot_training - Step 4814: {'lr': 0.0004957718781986352, 'samples': 2465280, 'steps': 4814, 'loss/train': 3.025818109512329} +02/24/2022 07:47:44 - INFO - codeparrot_training - Step 4815: {'lr': 0.0004957688811098425, 'samples': 2465792, 'steps': 4815, 'loss/train': 3.4738645553588867} +02/24/2022 07:47:50 - INFO - codeparrot_training - Step 4816: {'lr': 0.0004957658829682539, 'samples': 2466304, 'steps': 4816, 'loss/train': 2.82922101020813} +02/24/2022 07:47:54 - INFO - codeparrot_training - Step 4817: {'lr': 0.0004957628837738823, 'samples': 2466816, 'steps': 4817, 'loss/train': 2.2350382804870605} +02/24/2022 07:47:59 - INFO - codeparrot_training - Step 4818: {'lr': 0.0004957598835267405, 'samples': 2467328, 'steps': 4818, 'loss/train': 1.9026799201965332} +02/24/2022 07:48:03 - INFO - codeparrot_training - Step 4819: {'lr': 0.0004957568822268415, 'samples': 2467840, 'steps': 4819, 'loss/train': 1.5194218158721924} +02/24/2022 07:48:08 - INFO - codeparrot_training - Step 4820: {'lr': 0.000495753879874198, 'samples': 2468352, 'steps': 4820, 'loss/train': 2.190692901611328} +02/24/2022 07:48:12 - INFO - codeparrot_training - Step 4821: {'lr': 0.0004957508764688227, 'samples': 2468864, 'steps': 4821, 'loss/train': 1.6686153411865234} +02/24/2022 07:48:17 - INFO - codeparrot_training - Step 4822: {'lr': 0.000495747872010729, 'samples': 2469376, 'steps': 4822, 'loss/train': 1.8834881782531738} +02/24/2022 07:48:21 - INFO - codeparrot_training - Step 4823: {'lr': 0.0004957448664999293, 'samples': 2469888, 'steps': 4823, 'loss/train': 2.2773869037628174} +02/24/2022 07:48:26 - INFO - codeparrot_training - Step 4824: {'lr': 0.0004957418599364367, 'samples': 2470400, 'steps': 4824, 'loss/train': 4.282899379730225} +02/24/2022 07:48:30 - INFO - codeparrot_training - Step 4825: {'lr': 0.000495738852320264, 'samples': 2470912, 'steps': 4825, 'loss/train': 2.3439242839813232} +02/24/2022 07:48:36 - INFO - codeparrot_training - Step 4826: {'lr': 0.000495735843651424, 'samples': 2471424, 'steps': 4826, 'loss/train': 2.6990227699279785} +02/24/2022 07:48:39 - INFO - codeparrot_training - Step 4827: {'lr': 0.0004957328339299297, 'samples': 2471936, 'steps': 4827, 'loss/train': 2.958448886871338} +02/24/2022 07:48:45 - INFO - codeparrot_training - Step 4828: {'lr': 0.0004957298231557939, 'samples': 2472448, 'steps': 4828, 'loss/train': 2.2011032104492188} +02/24/2022 07:48:48 - INFO - codeparrot_training - Step 4829: {'lr': 0.0004957268113290297, 'samples': 2472960, 'steps': 4829, 'loss/train': 2.080993175506592} +02/24/2022 07:48:54 - INFO - codeparrot_training - Step 4830: {'lr': 0.0004957237984496499, 'samples': 2473472, 'steps': 4830, 'loss/train': 2.498842239379883} +02/24/2022 07:48:59 - INFO - codeparrot_training - Step 4831: {'lr': 0.0004957207845176673, 'samples': 2473984, 'steps': 4831, 'loss/train': 1.2883267402648926} +02/24/2022 07:49:03 - INFO - codeparrot_training - Step 4832: {'lr': 0.0004957177695330948, 'samples': 2474496, 'steps': 4832, 'loss/train': 3.31538724899292} +02/24/2022 07:49:08 - INFO - codeparrot_training - Step 4833: {'lr': 0.0004957147534959455, 'samples': 2475008, 'steps': 4833, 'loss/train': 2.475583553314209} +02/24/2022 07:49:12 - INFO - codeparrot_training - Step 4834: {'lr': 0.0004957117364062321, 'samples': 2475520, 'steps': 4834, 'loss/train': 2.8139469623565674} +02/24/2022 07:49:17 - INFO - codeparrot_training - Step 4835: {'lr': 0.0004957087182639678, 'samples': 2476032, 'steps': 4835, 'loss/train': 1.918376088142395} +02/24/2022 07:49:21 - INFO - codeparrot_training - Step 4836: {'lr': 0.0004957056990691653, 'samples': 2476544, 'steps': 4836, 'loss/train': 2.501250982284546} +02/24/2022 07:49:26 - INFO - codeparrot_training - Step 4837: {'lr': 0.0004957026788218377, 'samples': 2477056, 'steps': 4837, 'loss/train': 3.309501886367798} +02/24/2022 07:49:30 - INFO - codeparrot_training - Step 4838: {'lr': 0.0004956996575219977, 'samples': 2477568, 'steps': 4838, 'loss/train': 1.1400737762451172} +02/24/2022 07:49:35 - INFO - codeparrot_training - Step 4839: {'lr': 0.0004956966351696584, 'samples': 2478080, 'steps': 4839, 'loss/train': 2.0966250896453857} +02/24/2022 07:49:39 - INFO - codeparrot_training - Step 4840: {'lr': 0.0004956936117648329, 'samples': 2478592, 'steps': 4840, 'loss/train': 1.8754032850265503} +02/24/2022 07:49:45 - INFO - codeparrot_training - Step 4841: {'lr': 0.0004956905873075338, 'samples': 2479104, 'steps': 4841, 'loss/train': 1.5526013374328613} +02/24/2022 07:49:48 - INFO - codeparrot_training - Step 4842: {'lr': 0.0004956875617977743, 'samples': 2479616, 'steps': 4842, 'loss/train': 2.265995979309082} +02/24/2022 07:49:54 - INFO - codeparrot_training - Step 4843: {'lr': 0.0004956845352355674, 'samples': 2480128, 'steps': 4843, 'loss/train': 2.154154062271118} +02/24/2022 07:49:57 - INFO - codeparrot_training - Step 4844: {'lr': 0.0004956815076209257, 'samples': 2480640, 'steps': 4844, 'loss/train': 2.8307952880859375} +02/24/2022 07:50:03 - INFO - codeparrot_training - Step 4845: {'lr': 0.0004956784789538626, 'samples': 2481152, 'steps': 4845, 'loss/train': 2.029360294342041} +02/24/2022 07:50:06 - INFO - codeparrot_training - Step 4846: {'lr': 0.000495675449234391, 'samples': 2481664, 'steps': 4846, 'loss/train': 2.8690202236175537} +02/24/2022 07:50:12 - INFO - codeparrot_training - Step 4847: {'lr': 0.0004956724184625237, 'samples': 2482176, 'steps': 4847, 'loss/train': 2.2508604526519775} +02/24/2022 07:50:15 - INFO - codeparrot_training - Step 4848: {'lr': 0.0004956693866382738, 'samples': 2482688, 'steps': 4848, 'loss/train': 1.745201826095581} +02/24/2022 07:50:21 - INFO - codeparrot_training - Step 4849: {'lr': 0.0004956663537616542, 'samples': 2483200, 'steps': 4849, 'loss/train': 2.035811185836792} +02/24/2022 07:50:24 - INFO - codeparrot_training - Step 4850: {'lr': 0.000495663319832678, 'samples': 2483712, 'steps': 4850, 'loss/train': 3.593346118927002} +02/24/2022 07:50:31 - INFO - codeparrot_training - Step 4851: {'lr': 0.0004956602848513581, 'samples': 2484224, 'steps': 4851, 'loss/train': 1.7882438898086548} +02/24/2022 07:50:34 - INFO - codeparrot_training - Step 4852: {'lr': 0.0004956572488177075, 'samples': 2484736, 'steps': 4852, 'loss/train': 2.32806658744812} +02/24/2022 07:50:40 - INFO - codeparrot_training - Step 4853: {'lr': 0.0004956542117317393, 'samples': 2485248, 'steps': 4853, 'loss/train': 2.117522954940796} +02/24/2022 07:50:43 - INFO - codeparrot_training - Step 4854: {'lr': 0.0004956511735934665, 'samples': 2485760, 'steps': 4854, 'loss/train': 2.0202419757843018} +02/24/2022 07:50:49 - INFO - codeparrot_training - Step 4855: {'lr': 0.000495648134402902, 'samples': 2486272, 'steps': 4855, 'loss/train': 2.7553694248199463} +02/24/2022 07:50:52 - INFO - codeparrot_training - Step 4856: {'lr': 0.0004956450941600589, 'samples': 2486784, 'steps': 4856, 'loss/train': 2.8186230659484863} +02/24/2022 07:50:58 - INFO - codeparrot_training - Step 4857: {'lr': 0.0004956420528649504, 'samples': 2487296, 'steps': 4857, 'loss/train': 2.7916064262390137} +02/24/2022 07:51:01 - INFO - codeparrot_training - Step 4858: {'lr': 0.0004956390105175892, 'samples': 2487808, 'steps': 4858, 'loss/train': 2.8463239669799805} +02/24/2022 07:51:07 - INFO - codeparrot_training - Step 4859: {'lr': 0.0004956359671179885, 'samples': 2488320, 'steps': 4859, 'loss/train': 3.3343734741210938} +02/24/2022 07:51:10 - INFO - codeparrot_training - Step 4860: {'lr': 0.0004956329226661612, 'samples': 2488832, 'steps': 4860, 'loss/train': 2.231766700744629} +02/24/2022 07:51:17 - INFO - codeparrot_training - Step 4861: {'lr': 0.0004956298771621206, 'samples': 2489344, 'steps': 4861, 'loss/train': 2.2583096027374268} +02/24/2022 07:51:20 - INFO - codeparrot_training - Step 4862: {'lr': 0.0004956268306058795, 'samples': 2489856, 'steps': 4862, 'loss/train': 3.411510944366455} +02/24/2022 07:51:26 - INFO - codeparrot_training - Step 4863: {'lr': 0.0004956237829974511, 'samples': 2490368, 'steps': 4863, 'loss/train': 1.1921054124832153} +02/24/2022 07:51:29 - INFO - codeparrot_training - Step 4864: {'lr': 0.0004956207343368485, 'samples': 2490880, 'steps': 4864, 'loss/train': 3.353442907333374} +02/24/2022 07:51:35 - INFO - codeparrot_training - Step 4865: {'lr': 0.0004956176846240845, 'samples': 2491392, 'steps': 4865, 'loss/train': 1.3059569597244263} +02/24/2022 07:51:38 - INFO - codeparrot_training - Step 4866: {'lr': 0.0004956146338591725, 'samples': 2491904, 'steps': 4866, 'loss/train': 2.7087414264678955} +02/24/2022 07:51:43 - INFO - codeparrot_training - Step 4867: {'lr': 0.0004956115820421253, 'samples': 2492416, 'steps': 4867, 'loss/train': 2.6254632472991943} +02/24/2022 07:51:47 - INFO - codeparrot_training - Step 4868: {'lr': 0.000495608529172956, 'samples': 2492928, 'steps': 4868, 'loss/train': 3.8123397827148438} +02/24/2022 07:51:52 - INFO - codeparrot_training - Step 4869: {'lr': 0.000495605475251678, 'samples': 2493440, 'steps': 4869, 'loss/train': 2.5492396354675293} +02/24/2022 07:51:56 - INFO - codeparrot_training - Step 4870: {'lr': 0.000495602420278304, 'samples': 2493952, 'steps': 4870, 'loss/train': 1.9818994998931885} +02/24/2022 07:52:02 - INFO - codeparrot_training - Step 4871: {'lr': 0.0004955993642528471, 'samples': 2494464, 'steps': 4871, 'loss/train': 2.781245231628418} +02/24/2022 07:52:05 - INFO - codeparrot_training - Step 4872: {'lr': 0.0004955963071753206, 'samples': 2494976, 'steps': 4872, 'loss/train': 2.337423801422119} +02/24/2022 07:52:11 - INFO - codeparrot_training - Step 4873: {'lr': 0.0004955932490457375, 'samples': 2495488, 'steps': 4873, 'loss/train': 2.89913010597229} +02/24/2022 07:52:14 - INFO - codeparrot_training - Step 4874: {'lr': 0.0004955901898641109, 'samples': 2496000, 'steps': 4874, 'loss/train': 3.992082357406616} +02/24/2022 07:52:20 - INFO - codeparrot_training - Step 4875: {'lr': 0.000495587129630454, 'samples': 2496512, 'steps': 4875, 'loss/train': 2.445347309112549} +02/24/2022 07:52:24 - INFO - codeparrot_training - Step 4876: {'lr': 0.0004955840683447797, 'samples': 2497024, 'steps': 4876, 'loss/train': 2.601616144180298} +02/24/2022 07:52:29 - INFO - codeparrot_training - Step 4877: {'lr': 0.0004955810060071012, 'samples': 2497536, 'steps': 4877, 'loss/train': 3.540217161178589} +02/24/2022 07:52:33 - INFO - codeparrot_training - Step 4878: {'lr': 0.0004955779426174318, 'samples': 2498048, 'steps': 4878, 'loss/train': 2.6730260848999023} +02/24/2022 07:52:38 - INFO - codeparrot_training - Step 4879: {'lr': 0.0004955748781757844, 'samples': 2498560, 'steps': 4879, 'loss/train': 3.049858808517456} +02/24/2022 07:52:42 - INFO - codeparrot_training - Step 4880: {'lr': 0.0004955718126821722, 'samples': 2499072, 'steps': 4880, 'loss/train': 2.644120216369629} +02/24/2022 07:52:49 - INFO - codeparrot_training - Step 4881: {'lr': 0.0004955687461366083, 'samples': 2499584, 'steps': 4881, 'loss/train': 2.384840250015259} +02/24/2022 07:52:52 - INFO - codeparrot_training - Step 4882: {'lr': 0.000495565678539106, 'samples': 2500096, 'steps': 4882, 'loss/train': 4.711916923522949} +02/24/2022 07:52:58 - INFO - codeparrot_training - Step 4883: {'lr': 0.0004955626098896782, 'samples': 2500608, 'steps': 4883, 'loss/train': 1.9414739608764648} +02/24/2022 07:53:01 - INFO - codeparrot_training - Step 4884: {'lr': 0.0004955595401883381, 'samples': 2501120, 'steps': 4884, 'loss/train': 2.6034696102142334} +02/24/2022 07:53:07 - INFO - codeparrot_training - Step 4885: {'lr': 0.0004955564694350989, 'samples': 2501632, 'steps': 4885, 'loss/train': 3.098513126373291} +02/24/2022 07:53:10 - INFO - codeparrot_training - Step 4886: {'lr': 0.0004955533976299739, 'samples': 2502144, 'steps': 4886, 'loss/train': 0.36670711636543274} +02/24/2022 07:53:16 - INFO - codeparrot_training - Step 4887: {'lr': 0.000495550324772976, 'samples': 2502656, 'steps': 4887, 'loss/train': 3.2154476642608643} +02/24/2022 07:53:19 - INFO - codeparrot_training - Step 4888: {'lr': 0.0004955472508641186, 'samples': 2503168, 'steps': 4888, 'loss/train': 3.24361515045166} +02/24/2022 07:53:25 - INFO - codeparrot_training - Step 4889: {'lr': 0.0004955441759034146, 'samples': 2503680, 'steps': 4889, 'loss/train': 0.37054336071014404} +02/24/2022 07:53:28 - INFO - codeparrot_training - Step 4890: {'lr': 0.0004955410998908774, 'samples': 2504192, 'steps': 4890, 'loss/train': 2.3825619220733643} +02/24/2022 07:53:34 - INFO - codeparrot_training - Step 4891: {'lr': 0.0004955380228265201, 'samples': 2504704, 'steps': 4891, 'loss/train': 0.7356753945350647} +02/24/2022 07:53:37 - INFO - codeparrot_training - Step 4892: {'lr': 0.0004955349447103559, 'samples': 2505216, 'steps': 4892, 'loss/train': 3.7161316871643066} +02/24/2022 07:53:44 - INFO - codeparrot_training - Step 4893: {'lr': 0.000495531865542398, 'samples': 2505728, 'steps': 4893, 'loss/train': 1.4669992923736572} +02/24/2022 07:53:47 - INFO - codeparrot_training - Step 4894: {'lr': 0.0004955287853226594, 'samples': 2506240, 'steps': 4894, 'loss/train': 2.5684609413146973} +02/24/2022 07:53:53 - INFO - codeparrot_training - Step 4895: {'lr': 0.0004955257040511534, 'samples': 2506752, 'steps': 4895, 'loss/train': 3.071998357772827} +02/24/2022 07:53:56 - INFO - codeparrot_training - Step 4896: {'lr': 0.0004955226217278934, 'samples': 2507264, 'steps': 4896, 'loss/train': 3.3817026615142822} +02/24/2022 07:54:01 - INFO - codeparrot_training - Step 4897: {'lr': 0.0004955195383528926, 'samples': 2507776, 'steps': 4897, 'loss/train': 2.566835403442383} +02/24/2022 07:54:05 - INFO - codeparrot_training - Step 4898: {'lr': 0.0004955164539261638, 'samples': 2508288, 'steps': 4898, 'loss/train': 3.8253231048583984} +02/24/2022 07:54:10 - INFO - codeparrot_training - Step 4899: {'lr': 0.0004955133684477205, 'samples': 2508800, 'steps': 4899, 'loss/train': 2.7843708992004395} +02/24/2022 07:54:14 - INFO - codeparrot_training - Step 4900: {'lr': 0.000495510281917576, 'samples': 2509312, 'steps': 4900, 'loss/train': 3.378871440887451} +02/24/2022 07:54:19 - INFO - codeparrot_training - Step 4901: {'lr': 0.0004955071943357433, 'samples': 2509824, 'steps': 4901, 'loss/train': 1.7449642419815063} +02/24/2022 07:54:23 - INFO - codeparrot_training - Step 4902: {'lr': 0.0004955041057022358, 'samples': 2510336, 'steps': 4902, 'loss/train': 3.5450022220611572} +02/24/2022 07:54:28 - INFO - codeparrot_training - Step 4903: {'lr': 0.0004955010160170667, 'samples': 2510848, 'steps': 4903, 'loss/train': 2.7884042263031006} +02/24/2022 07:54:32 - INFO - codeparrot_training - Step 4904: {'lr': 0.0004954979252802491, 'samples': 2511360, 'steps': 4904, 'loss/train': 1.781516194343567} +02/24/2022 07:54:37 - INFO - codeparrot_training - Step 4905: {'lr': 0.0004954948334917965, 'samples': 2511872, 'steps': 4905, 'loss/train': 2.7518675327301025} +02/24/2022 07:54:41 - INFO - codeparrot_training - Step 4906: {'lr': 0.0004954917406517218, 'samples': 2512384, 'steps': 4906, 'loss/train': 1.969922423362732} +02/24/2022 07:54:46 - INFO - codeparrot_training - Step 4907: {'lr': 0.0004954886467600386, 'samples': 2512896, 'steps': 4907, 'loss/train': 2.238471508026123} +02/24/2022 07:54:50 - INFO - codeparrot_training - Step 4908: {'lr': 0.0004954855518167599, 'samples': 2513408, 'steps': 4908, 'loss/train': 3.4221787452697754} +02/24/2022 07:54:57 - INFO - codeparrot_training - Step 4909: {'lr': 0.000495482455821899, 'samples': 2513920, 'steps': 4909, 'loss/train': 2.2988929748535156} +02/24/2022 07:55:00 - INFO - codeparrot_training - Step 4910: {'lr': 0.0004954793587754694, 'samples': 2514432, 'steps': 4910, 'loss/train': 2.313474416732788} +02/24/2022 07:55:06 - INFO - codeparrot_training - Step 4911: {'lr': 0.000495476260677484, 'samples': 2514944, 'steps': 4911, 'loss/train': 2.7729668617248535} +02/24/2022 07:55:09 - INFO - codeparrot_training - Step 4912: {'lr': 0.0004954731615279563, 'samples': 2515456, 'steps': 4912, 'loss/train': 3.1681737899780273} +02/24/2022 07:55:15 - INFO - codeparrot_training - Step 4913: {'lr': 0.0004954700613268995, 'samples': 2515968, 'steps': 4913, 'loss/train': 2.9729576110839844} +02/24/2022 07:55:18 - INFO - codeparrot_training - Step 4914: {'lr': 0.0004954669600743269, 'samples': 2516480, 'steps': 4914, 'loss/train': 1.7505345344543457} +02/24/2022 07:55:24 - INFO - codeparrot_training - Step 4915: {'lr': 0.0004954638577702519, 'samples': 2516992, 'steps': 4915, 'loss/train': 2.5069775581359863} +02/24/2022 07:55:27 - INFO - codeparrot_training - Step 4916: {'lr': 0.0004954607544146875, 'samples': 2517504, 'steps': 4916, 'loss/train': 2.729753017425537} +02/24/2022 07:55:34 - INFO - codeparrot_training - Step 4917: {'lr': 0.0004954576500076472, 'samples': 2518016, 'steps': 4917, 'loss/train': 2.9764349460601807} +02/24/2022 07:55:37 - INFO - codeparrot_training - Step 4918: {'lr': 0.0004954545445491444, 'samples': 2518528, 'steps': 4918, 'loss/train': 2.1443498134613037} +02/24/2022 07:55:43 - INFO - codeparrot_training - Step 4919: {'lr': 0.0004954514380391921, 'samples': 2519040, 'steps': 4919, 'loss/train': 1.6374441385269165} +02/24/2022 07:55:46 - INFO - codeparrot_training - Step 4920: {'lr': 0.0004954483304778039, 'samples': 2519552, 'steps': 4920, 'loss/train': 2.5993611812591553} +02/24/2022 07:55:52 - INFO - codeparrot_training - Step 4921: {'lr': 0.0004954452218649929, 'samples': 2520064, 'steps': 4921, 'loss/train': 1.3553427457809448} +02/24/2022 07:55:55 - INFO - codeparrot_training - Step 4922: {'lr': 0.0004954421122007727, 'samples': 2520576, 'steps': 4922, 'loss/train': 2.369081497192383} +02/24/2022 07:56:01 - INFO - codeparrot_training - Step 4923: {'lr': 0.0004954390014851563, 'samples': 2521088, 'steps': 4923, 'loss/train': 1.9360178709030151} +02/24/2022 07:56:04 - INFO - codeparrot_training - Step 4924: {'lr': 0.0004954358897181571, 'samples': 2521600, 'steps': 4924, 'loss/train': 3.213473081588745} +02/24/2022 07:56:10 - INFO - codeparrot_training - Step 4925: {'lr': 0.0004954327768997885, 'samples': 2522112, 'steps': 4925, 'loss/train': 1.8457989692687988} +02/24/2022 07:56:13 - INFO - codeparrot_training - Step 4926: {'lr': 0.0004954296630300638, 'samples': 2522624, 'steps': 4926, 'loss/train': 3.197357654571533} +02/24/2022 07:56:19 - INFO - codeparrot_training - Step 4927: {'lr': 0.0004954265481089965, 'samples': 2523136, 'steps': 4927, 'loss/train': 2.4606659412384033} +02/24/2022 07:56:22 - INFO - codeparrot_training - Step 4928: {'lr': 0.0004954234321365998, 'samples': 2523648, 'steps': 4928, 'loss/train': 2.2355072498321533} +02/24/2022 07:56:29 - INFO - codeparrot_training - Step 4929: {'lr': 0.0004954203151128868, 'samples': 2524160, 'steps': 4929, 'loss/train': 2.40077543258667} +02/24/2022 07:56:32 - INFO - codeparrot_training - Step 4930: {'lr': 0.0004954171970378713, 'samples': 2524672, 'steps': 4930, 'loss/train': 3.895455837249756} +02/24/2022 07:56:38 - INFO - codeparrot_training - Step 4931: {'lr': 0.0004954140779115664, 'samples': 2525184, 'steps': 4931, 'loss/train': 2.3514821529388428} +02/24/2022 07:56:43 - INFO - codeparrot_training - Step 4932: {'lr': 0.0004954109577339856, 'samples': 2525696, 'steps': 4932, 'loss/train': 2.851844310760498} +02/24/2022 07:56:47 - INFO - codeparrot_training - Step 4933: {'lr': 0.0004954078365051421, 'samples': 2526208, 'steps': 4933, 'loss/train': 1.7190126180648804} +02/24/2022 07:56:52 - INFO - codeparrot_training - Step 4934: {'lr': 0.0004954047142250494, 'samples': 2526720, 'steps': 4934, 'loss/train': 2.6115102767944336} +02/24/2022 07:56:56 - INFO - codeparrot_training - Step 4935: {'lr': 0.0004954015908937208, 'samples': 2527232, 'steps': 4935, 'loss/train': 3.137942314147949} +02/24/2022 07:57:01 - INFO - codeparrot_training - Step 4936: {'lr': 0.0004953984665111697, 'samples': 2527744, 'steps': 4936, 'loss/train': 1.0885988473892212} +02/24/2022 07:57:05 - INFO - codeparrot_training - Step 4937: {'lr': 0.0004953953410774095, 'samples': 2528256, 'steps': 4937, 'loss/train': 1.9508899450302124} +02/24/2022 07:57:10 - INFO - codeparrot_training - Step 4938: {'lr': 0.0004953922145924535, 'samples': 2528768, 'steps': 4938, 'loss/train': 2.9652576446533203} +02/24/2022 07:57:14 - INFO - codeparrot_training - Step 4939: {'lr': 0.0004953890870563153, 'samples': 2529280, 'steps': 4939, 'loss/train': 2.772034168243408} +02/24/2022 07:57:19 - INFO - codeparrot_training - Step 4940: {'lr': 0.0004953859584690081, 'samples': 2529792, 'steps': 4940, 'loss/train': 2.0161304473876953} +02/24/2022 07:57:23 - INFO - codeparrot_training - Step 4941: {'lr': 0.0004953828288305454, 'samples': 2530304, 'steps': 4941, 'loss/train': 1.348802089691162} +02/24/2022 07:57:29 - INFO - codeparrot_training - Step 4942: {'lr': 0.0004953796981409407, 'samples': 2530816, 'steps': 4942, 'loss/train': 3.405233383178711} +02/24/2022 07:57:32 - INFO - codeparrot_training - Step 4943: {'lr': 0.0004953765664002071, 'samples': 2531328, 'steps': 4943, 'loss/train': 2.201634407043457} +02/24/2022 07:57:38 - INFO - codeparrot_training - Step 4944: {'lr': 0.0004953734336083582, 'samples': 2531840, 'steps': 4944, 'loss/train': 3.315370798110962} +02/24/2022 07:57:41 - INFO - codeparrot_training - Step 4945: {'lr': 0.0004953702997654076, 'samples': 2532352, 'steps': 4945, 'loss/train': 2.597675085067749} +02/24/2022 07:57:47 - INFO - codeparrot_training - Step 4946: {'lr': 0.0004953671648713683, 'samples': 2532864, 'steps': 4946, 'loss/train': 2.51084566116333} +02/24/2022 07:57:50 - INFO - codeparrot_training - Step 4947: {'lr': 0.0004953640289262542, 'samples': 2533376, 'steps': 4947, 'loss/train': 3.502622127532959} +02/24/2022 07:57:56 - INFO - codeparrot_training - Step 4948: {'lr': 0.0004953608919300784, 'samples': 2533888, 'steps': 4948, 'loss/train': 3.09306263923645} +02/24/2022 07:57:59 - INFO - codeparrot_training - Step 4949: {'lr': 0.0004953577538828546, 'samples': 2534400, 'steps': 4949, 'loss/train': 2.943469285964966} +02/24/2022 07:58:05 - INFO - codeparrot_training - Step 4950: {'lr': 0.0004953546147845959, 'samples': 2534912, 'steps': 4950, 'loss/train': 2.031853675842285} +02/24/2022 07:58:08 - INFO - codeparrot_training - Step 4951: {'lr': 0.0004953514746353161, 'samples': 2535424, 'steps': 4951, 'loss/train': 1.1166609525680542} +02/24/2022 07:58:14 - INFO - codeparrot_training - Step 4952: {'lr': 0.0004953483334350283, 'samples': 2535936, 'steps': 4952, 'loss/train': 3.162663221359253} +02/24/2022 07:58:18 - INFO - codeparrot_training - Step 4953: {'lr': 0.0004953451911837463, 'samples': 2536448, 'steps': 4953, 'loss/train': 1.1098450422286987} +02/24/2022 07:58:23 - INFO - codeparrot_training - Step 4954: {'lr': 0.0004953420478814834, 'samples': 2536960, 'steps': 4954, 'loss/train': 2.0787851810455322} +02/24/2022 07:58:27 - INFO - codeparrot_training - Step 4955: {'lr': 0.000495338903528253, 'samples': 2537472, 'steps': 4955, 'loss/train': 1.8731580972671509} +02/24/2022 07:58:32 - INFO - codeparrot_training - Step 4956: {'lr': 0.0004953357581240686, 'samples': 2537984, 'steps': 4956, 'loss/train': 1.1025416851043701} +02/24/2022 07:58:36 - INFO - codeparrot_training - Step 4957: {'lr': 0.0004953326116689438, 'samples': 2538496, 'steps': 4957, 'loss/train': 2.0030360221862793} +02/24/2022 07:58:41 - INFO - codeparrot_training - Step 4958: {'lr': 0.000495329464162892, 'samples': 2539008, 'steps': 4958, 'loss/train': 3.1406311988830566} +02/24/2022 07:58:45 - INFO - codeparrot_training - Step 4959: {'lr': 0.0004953263156059266, 'samples': 2539520, 'steps': 4959, 'loss/train': 3.2624127864837646} +02/24/2022 07:58:50 - INFO - codeparrot_training - Step 4960: {'lr': 0.0004953231659980613, 'samples': 2540032, 'steps': 4960, 'loss/train': 3.0520901679992676} +02/24/2022 07:58:54 - INFO - codeparrot_training - Step 4961: {'lr': 0.0004953200153393094, 'samples': 2540544, 'steps': 4961, 'loss/train': 2.382667303085327} +02/24/2022 07:59:00 - INFO - codeparrot_training - Step 4962: {'lr': 0.0004953168636296845, 'samples': 2541056, 'steps': 4962, 'loss/train': 5.038918495178223} +02/24/2022 07:59:03 - INFO - codeparrot_training - Step 4963: {'lr': 0.0004953137108691999, 'samples': 2541568, 'steps': 4963, 'loss/train': 8.362198829650879} +02/24/2022 07:59:07 - INFO - codeparrot_training - Step 4964: {'lr': 0.0004953105570578693, 'samples': 2542080, 'steps': 4964, 'loss/train': 2.4258134365081787} +02/24/2022 07:59:13 - INFO - codeparrot_training - Step 4965: {'lr': 0.0004953074021957063, 'samples': 2542592, 'steps': 4965, 'loss/train': 3.187837600708008} +02/24/2022 07:59:16 - INFO - codeparrot_training - Step 4966: {'lr': 0.0004953042462827242, 'samples': 2543104, 'steps': 4966, 'loss/train': 1.609328031539917} +02/24/2022 07:59:22 - INFO - codeparrot_training - Step 4967: {'lr': 0.0004953010893189365, 'samples': 2543616, 'steps': 4967, 'loss/train': 2.3354032039642334} +02/24/2022 07:59:25 - INFO - codeparrot_training - Step 4968: {'lr': 0.000495297931304357, 'samples': 2544128, 'steps': 4968, 'loss/train': 1.3369238376617432} +02/24/2022 07:59:31 - INFO - codeparrot_training - Step 4969: {'lr': 0.000495294772238999, 'samples': 2544640, 'steps': 4969, 'loss/train': 2.266425132751465} +02/24/2022 07:59:34 - INFO - codeparrot_training - Step 4970: {'lr': 0.000495291612122876, 'samples': 2545152, 'steps': 4970, 'loss/train': 3.3470370769500732} +02/24/2022 07:59:40 - INFO - codeparrot_training - Step 4971: {'lr': 0.0004952884509560017, 'samples': 2545664, 'steps': 4971, 'loss/train': 3.1451051235198975} +02/24/2022 07:59:44 - INFO - codeparrot_training - Step 4972: {'lr': 0.0004952852887383895, 'samples': 2546176, 'steps': 4972, 'loss/train': 3.0164761543273926} +02/24/2022 07:59:49 - INFO - codeparrot_training - Step 4973: {'lr': 0.0004952821254700531, 'samples': 2546688, 'steps': 4973, 'loss/train': 2.5824825763702393} +02/24/2022 07:59:53 - INFO - codeparrot_training - Step 4974: {'lr': 0.0004952789611510059, 'samples': 2547200, 'steps': 4974, 'loss/train': 2.556976556777954} +02/24/2022 07:59:59 - INFO - codeparrot_training - Step 4975: {'lr': 0.0004952757957812615, 'samples': 2547712, 'steps': 4975, 'loss/train': 2.773397207260132} +02/24/2022 08:00:03 - INFO - codeparrot_training - Step 4976: {'lr': 0.0004952726293608335, 'samples': 2548224, 'steps': 4976, 'loss/train': 1.9987236261367798} +02/24/2022 08:00:08 - INFO - codeparrot_training - Step 4977: {'lr': 0.0004952694618897354, 'samples': 2548736, 'steps': 4977, 'loss/train': 1.3548074960708618} +02/24/2022 08:00:12 - INFO - codeparrot_training - Step 4978: {'lr': 0.0004952662933679809, 'samples': 2549248, 'steps': 4978, 'loss/train': 2.1706817150115967} +02/24/2022 08:00:17 - INFO - codeparrot_training - Step 4979: {'lr': 0.0004952631237955835, 'samples': 2549760, 'steps': 4979, 'loss/train': 2.3538026809692383} +02/24/2022 08:00:21 - INFO - codeparrot_training - Step 4980: {'lr': 0.0004952599531725567, 'samples': 2550272, 'steps': 4980, 'loss/train': 0.886324405670166} +02/24/2022 08:00:26 - INFO - codeparrot_training - Step 4981: {'lr': 0.0004952567814989141, 'samples': 2550784, 'steps': 4981, 'loss/train': 2.7025487422943115} +02/24/2022 08:00:30 - INFO - codeparrot_training - Step 4982: {'lr': 0.0004952536087746693, 'samples': 2551296, 'steps': 4982, 'loss/train': 2.810694694519043} +02/24/2022 08:00:35 - INFO - codeparrot_training - Step 4983: {'lr': 0.000495250434999836, 'samples': 2551808, 'steps': 4983, 'loss/train': 3.363544225692749} +02/24/2022 08:00:39 - INFO - codeparrot_training - Step 4984: {'lr': 0.0004952472601744277, 'samples': 2552320, 'steps': 4984, 'loss/train': 1.953047752380371} +02/24/2022 08:00:44 - INFO - codeparrot_training - Step 4985: {'lr': 0.000495244084298458, 'samples': 2552832, 'steps': 4985, 'loss/train': 2.4962847232818604} +02/24/2022 08:00:48 - INFO - codeparrot_training - Step 4986: {'lr': 0.0004952409073719405, 'samples': 2553344, 'steps': 4986, 'loss/train': 3.0354714393615723} +02/24/2022 08:00:53 - INFO - codeparrot_training - Step 4987: {'lr': 0.0004952377293948888, 'samples': 2553856, 'steps': 4987, 'loss/train': 2.5642433166503906} +02/24/2022 08:00:57 - INFO - codeparrot_training - Step 4988: {'lr': 0.0004952345503673166, 'samples': 2554368, 'steps': 4988, 'loss/train': 2.23897647857666} +02/24/2022 08:01:03 - INFO - codeparrot_training - Step 4989: {'lr': 0.0004952313702892375, 'samples': 2554880, 'steps': 4989, 'loss/train': 0.9788317680358887} +02/24/2022 08:01:06 - INFO - codeparrot_training - Step 4990: {'lr': 0.0004952281891606649, 'samples': 2555392, 'steps': 4990, 'loss/train': 1.6288464069366455} +02/24/2022 08:01:12 - INFO - codeparrot_training - Step 4991: {'lr': 0.0004952250069816127, 'samples': 2555904, 'steps': 4991, 'loss/train': 1.7622431516647339} +02/24/2022 08:01:15 - INFO - codeparrot_training - Step 4992: {'lr': 0.0004952218237520945, 'samples': 2556416, 'steps': 4992, 'loss/train': 0.8125408291816711} +02/24/2022 08:01:21 - INFO - codeparrot_training - Step 4993: {'lr': 0.0004952186394721239, 'samples': 2556928, 'steps': 4993, 'loss/train': 2.602688789367676} +02/24/2022 08:01:24 - INFO - codeparrot_training - Step 4994: {'lr': 0.0004952154541417144, 'samples': 2557440, 'steps': 4994, 'loss/train': 2.5203757286071777} +02/24/2022 08:01:30 - INFO - codeparrot_training - Step 4995: {'lr': 0.0004952122677608798, 'samples': 2557952, 'steps': 4995, 'loss/train': 2.7976837158203125} +02/24/2022 08:01:33 - INFO - codeparrot_training - Step 4996: {'lr': 0.0004952090803296337, 'samples': 2558464, 'steps': 4996, 'loss/train': 2.25091290473938} +02/24/2022 08:01:39 - INFO - codeparrot_training - Step 4997: {'lr': 0.0004952058918479899, 'samples': 2558976, 'steps': 4997, 'loss/train': 2.8913917541503906} +02/24/2022 08:01:42 - INFO - codeparrot_training - Step 4998: {'lr': 0.0004952027023159617, 'samples': 2559488, 'steps': 4998, 'loss/train': 3.246948719024658} +02/24/2022 08:01:48 - INFO - codeparrot_training - Step 4999: {'lr': 0.0004951995117335631, 'samples': 2560000, 'steps': 4999, 'loss/train': 2.412139415740967} +02/24/2022 08:01:48 - INFO - codeparrot_training - Evaluating and saving model checkpoint