diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -28470,3 +28470,1009 @@ Use FP16 precision: False 02/25/2022 13:51:32 - INFO - codeparrot_training - Step 27998: {'lr': 0.00021740089718167886, 'samples': 14335488, 'steps': 27998, 'loss/train': 1.9601777791976929} 02/25/2022 13:51:37 - INFO - codeparrot_training - Step 27999: {'lr': 0.0002173846744934761, 'samples': 14336000, 'steps': 27999, 'loss/train': 1.8428465127944946} 02/25/2022 13:51:37 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/25/2022 13:51:55 - WARNING - huggingface_hub.repository - Several commits (28) will be pushed upstream. +02/25/2022 13:51:55 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/25/2022 13:52:28 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + 9d342aa..739c14e floral-grass-11 -> floral-grass-11 + +02/25/2022 13:52:33 - INFO - codeparrot_training - Step 28000: {'lr': 0.00021736845194498717, 'samples': 14336512, 'steps': 28000, 'loss/train': 1.673930048942566} +02/25/2022 13:52:38 - INFO - codeparrot_training - Step 28001: {'lr': 0.00021735222953628131, 'samples': 14337024, 'steps': 28001, 'loss/train': 1.4876612424850464} +02/25/2022 13:52:42 - INFO - codeparrot_training - Step 28002: {'lr': 0.0002173360072674281, 'samples': 14337536, 'steps': 28002, 'loss/train': 0.10827385634183884} +02/25/2022 13:52:47 - INFO - codeparrot_training - Step 28003: {'lr': 0.0002173197851384971, 'samples': 14338048, 'steps': 28003, 'loss/train': 2.374160051345825} +02/25/2022 13:52:51 - INFO - codeparrot_training - Step 28004: {'lr': 0.00021730356314955785, 'samples': 14338560, 'steps': 28004, 'loss/train': 1.1584421396255493} +02/25/2022 13:52:57 - INFO - codeparrot_training - Step 28005: {'lr': 0.00021728734130067968, 'samples': 14339072, 'steps': 28005, 'loss/train': 2.1272032260894775} +02/25/2022 13:53:00 - INFO - codeparrot_training - Step 28006: {'lr': 0.00021727111959193212, 'samples': 14339584, 'steps': 28006, 'loss/train': 1.7936766147613525} +02/25/2022 13:53:06 - INFO - codeparrot_training - Step 28007: {'lr': 0.0002172548980233847, 'samples': 14340096, 'steps': 28007, 'loss/train': 1.1587542295455933} +02/25/2022 13:53:09 - INFO - codeparrot_training - Step 28008: {'lr': 0.00021723867659510698, 'samples': 14340608, 'steps': 28008, 'loss/train': 1.640569806098938} +02/25/2022 13:53:15 - INFO - codeparrot_training - Step 28009: {'lr': 0.0002172224553071683, 'samples': 14341120, 'steps': 28009, 'loss/train': 1.0196444988250732} +02/25/2022 13:53:18 - INFO - codeparrot_training - Step 28010: {'lr': 0.0002172062341596382, 'samples': 14341632, 'steps': 28010, 'loss/train': 2.137467622756958} +02/25/2022 13:53:24 - INFO - codeparrot_training - Step 28011: {'lr': 0.0002171900131525862, 'samples': 14342144, 'steps': 28011, 'loss/train': 2.705371856689453} +02/25/2022 13:53:27 - INFO - codeparrot_training - Step 28012: {'lr': 0.00021717379228608171, 'samples': 14342656, 'steps': 28012, 'loss/train': 1.1543686389923096} +02/25/2022 13:53:33 - INFO - codeparrot_training - Step 28013: {'lr': 0.0002171575715601944, 'samples': 14343168, 'steps': 28013, 'loss/train': 1.592047929763794} +02/25/2022 13:53:36 - INFO - codeparrot_training - Step 28014: {'lr': 0.00021714135097499355, 'samples': 14343680, 'steps': 28014, 'loss/train': 2.2105391025543213} +02/25/2022 13:53:42 - INFO - codeparrot_training - Step 28015: {'lr': 0.0002171251305305487, 'samples': 14344192, 'steps': 28015, 'loss/train': 1.9797700643539429} +02/25/2022 13:53:45 - INFO - codeparrot_training - Step 28016: {'lr': 0.00021710891022692937, 'samples': 14344704, 'steps': 28016, 'loss/train': 1.2600021362304688} +02/25/2022 13:53:52 - INFO - codeparrot_training - Step 28017: {'lr': 0.00021709269006420508, 'samples': 14345216, 'steps': 28017, 'loss/train': 2.207559108734131} +02/25/2022 13:53:55 - INFO - codeparrot_training - Step 28018: {'lr': 0.00021707647004244517, 'samples': 14345728, 'steps': 28018, 'loss/train': 0.6188715100288391} +02/25/2022 13:54:00 - INFO - codeparrot_training - Step 28019: {'lr': 0.00021706025016171923, 'samples': 14346240, 'steps': 28019, 'loss/train': 1.5557671785354614} +02/25/2022 13:54:04 - INFO - codeparrot_training - Step 28020: {'lr': 0.00021704403042209676, 'samples': 14346752, 'steps': 28020, 'loss/train': 2.0231847763061523} +02/25/2022 13:54:09 - INFO - codeparrot_training - Step 28021: {'lr': 0.00021702781082364714, 'samples': 14347264, 'steps': 28021, 'loss/train': 1.9406682252883911} +02/25/2022 13:54:13 - INFO - codeparrot_training - Step 28022: {'lr': 0.00021701159136643997, 'samples': 14347776, 'steps': 28022, 'loss/train': 0.7393404841423035} +02/25/2022 13:54:18 - INFO - codeparrot_training - Step 28023: {'lr': 0.00021699537205054466, 'samples': 14348288, 'steps': 28023, 'loss/train': 1.646751046180725} +02/25/2022 13:54:22 - INFO - codeparrot_training - Step 28024: {'lr': 0.00021697915287603064, 'samples': 14348800, 'steps': 28024, 'loss/train': 1.502605676651001} +02/25/2022 13:54:27 - INFO - codeparrot_training - Step 28025: {'lr': 0.00021696293384296746, 'samples': 14349312, 'steps': 28025, 'loss/train': 1.6017626523971558} +02/25/2022 13:54:31 - INFO - codeparrot_training - Step 28026: {'lr': 0.00021694671495142462, 'samples': 14349824, 'steps': 28026, 'loss/train': 1.9990664720535278} +02/25/2022 13:54:37 - INFO - codeparrot_training - Step 28027: {'lr': 0.00021693049620147157, 'samples': 14350336, 'steps': 28027, 'loss/train': 1.449980616569519} +02/25/2022 13:54:40 - INFO - codeparrot_training - Step 28028: {'lr': 0.0002169142775931777, 'samples': 14350848, 'steps': 28028, 'loss/train': 2.0600173473358154} +02/25/2022 13:54:46 - INFO - codeparrot_training - Step 28029: {'lr': 0.00021689805912661258, 'samples': 14351360, 'steps': 28029, 'loss/train': 1.0579495429992676} +02/25/2022 13:54:49 - INFO - codeparrot_training - Step 28030: {'lr': 0.00021688184080184572, 'samples': 14351872, 'steps': 28030, 'loss/train': 1.7171986103057861} +02/25/2022 13:54:55 - INFO - codeparrot_training - Step 28031: {'lr': 0.00021686562261894653, 'samples': 14352384, 'steps': 28031, 'loss/train': 1.11579167842865} +02/25/2022 13:54:58 - INFO - codeparrot_training - Step 28032: {'lr': 0.00021684940457798442, 'samples': 14352896, 'steps': 28032, 'loss/train': 1.6769918203353882} +02/25/2022 13:55:04 - INFO - codeparrot_training - Step 28033: {'lr': 0.00021683318667902897, 'samples': 14353408, 'steps': 28033, 'loss/train': 2.0899550914764404} +02/25/2022 13:55:07 - INFO - codeparrot_training - Step 28034: {'lr': 0.0002168169689221496, 'samples': 14353920, 'steps': 28034, 'loss/train': 0.5463775396347046} +02/25/2022 13:55:13 - INFO - codeparrot_training - Step 28035: {'lr': 0.0002168007513074159, 'samples': 14354432, 'steps': 28035, 'loss/train': 1.1548123359680176} +02/25/2022 13:55:19 - INFO - codeparrot_training - Step 28036: {'lr': 0.0002167845338348971, 'samples': 14354944, 'steps': 28036, 'loss/train': 1.4615651369094849} +02/25/2022 13:55:22 - INFO - codeparrot_training - Step 28037: {'lr': 0.00021676831650466286, 'samples': 14355456, 'steps': 28037, 'loss/train': 0.10096308588981628} +02/25/2022 13:55:28 - INFO - codeparrot_training - Step 28038: {'lr': 0.00021675209931678257, 'samples': 14355968, 'steps': 28038, 'loss/train': 1.5023850202560425} +02/25/2022 13:55:31 - INFO - codeparrot_training - Step 28039: {'lr': 0.00021673588227132587, 'samples': 14356480, 'steps': 28039, 'loss/train': 1.06998610496521} +02/25/2022 13:55:37 - INFO - codeparrot_training - Step 28040: {'lr': 0.00021671966536836195, 'samples': 14356992, 'steps': 28040, 'loss/train': 1.199450969696045} +02/25/2022 13:55:41 - INFO - codeparrot_training - Step 28041: {'lr': 0.00021670344860796043, 'samples': 14357504, 'steps': 28041, 'loss/train': 1.4964430332183838} +02/25/2022 13:55:46 - INFO - codeparrot_training - Step 28042: {'lr': 0.00021668723199019075, 'samples': 14358016, 'steps': 28042, 'loss/train': 1.9237947463989258} +02/25/2022 13:55:50 - INFO - codeparrot_training - Step 28043: {'lr': 0.00021667101551512254, 'samples': 14358528, 'steps': 28043, 'loss/train': 1.155117392539978} +02/25/2022 13:55:55 - INFO - codeparrot_training - Step 28044: {'lr': 0.00021665479918282494, 'samples': 14359040, 'steps': 28044, 'loss/train': 1.9901700019836426} +02/25/2022 13:55:59 - INFO - codeparrot_training - Step 28045: {'lr': 0.00021663858299336765, 'samples': 14359552, 'steps': 28045, 'loss/train': 1.1686725616455078} +02/25/2022 13:56:04 - INFO - codeparrot_training - Step 28046: {'lr': 0.00021662236694682002, 'samples': 14360064, 'steps': 28046, 'loss/train': 1.9542475938796997} +02/25/2022 13:56:08 - INFO - codeparrot_training - Step 28047: {'lr': 0.00021660615104325163, 'samples': 14360576, 'steps': 28047, 'loss/train': 1.1862233877182007} +02/25/2022 13:56:13 - INFO - codeparrot_training - Step 28048: {'lr': 0.00021658993528273195, 'samples': 14361088, 'steps': 28048, 'loss/train': 1.810849905014038} +02/25/2022 13:56:17 - INFO - codeparrot_training - Step 28049: {'lr': 0.0002165737196653303, 'samples': 14361600, 'steps': 28049, 'loss/train': 2.0611181259155273} +02/25/2022 13:56:23 - INFO - codeparrot_training - Step 28050: {'lr': 0.00021655750419111622, 'samples': 14362112, 'steps': 28050, 'loss/train': 2.0399246215820312} +02/25/2022 13:56:26 - INFO - codeparrot_training - Step 28051: {'lr': 0.00021654128886015918, 'samples': 14362624, 'steps': 28051, 'loss/train': 2.108231782913208} +02/25/2022 13:56:32 - INFO - codeparrot_training - Step 28052: {'lr': 0.0002165250736725287, 'samples': 14363136, 'steps': 28052, 'loss/train': 2.020242214202881} +02/25/2022 13:56:36 - INFO - codeparrot_training - Step 28053: {'lr': 0.0002165088586282941, 'samples': 14363648, 'steps': 28053, 'loss/train': 0.4527716040611267} +02/25/2022 13:56:41 - INFO - codeparrot_training - Step 28054: {'lr': 0.0002164926437275249, 'samples': 14364160, 'steps': 28054, 'loss/train': 2.0973050594329834} +02/25/2022 13:56:45 - INFO - codeparrot_training - Step 28055: {'lr': 0.00021647642897029058, 'samples': 14364672, 'steps': 28055, 'loss/train': 1.2976244688034058} +02/25/2022 13:56:50 - INFO - codeparrot_training - Step 28056: {'lr': 0.00021646021435666073, 'samples': 14365184, 'steps': 28056, 'loss/train': 2.2881805896759033} +02/25/2022 13:56:54 - INFO - codeparrot_training - Step 28057: {'lr': 0.00021644399988670452, 'samples': 14365696, 'steps': 28057, 'loss/train': 1.6598111391067505} +02/25/2022 13:56:59 - INFO - codeparrot_training - Step 28058: {'lr': 0.0002164277855604916, 'samples': 14366208, 'steps': 28058, 'loss/train': 1.6773346662521362} +02/25/2022 13:57:03 - INFO - codeparrot_training - Step 28059: {'lr': 0.00021641157137809134, 'samples': 14366720, 'steps': 28059, 'loss/train': 1.7139997482299805} +02/25/2022 13:57:08 - INFO - codeparrot_training - Step 28060: {'lr': 0.00021639535733957333, 'samples': 14367232, 'steps': 28060, 'loss/train': 1.2584538459777832} +02/25/2022 13:57:12 - INFO - codeparrot_training - Step 28061: {'lr': 0.00021637914344500693, 'samples': 14367744, 'steps': 28061, 'loss/train': 1.7919621467590332} +02/25/2022 13:57:18 - INFO - codeparrot_training - Step 28062: {'lr': 0.00021636292969446152, 'samples': 14368256, 'steps': 28062, 'loss/train': 2.3028078079223633} +02/25/2022 13:57:21 - INFO - codeparrot_training - Step 28063: {'lr': 0.00021634671608800667, 'samples': 14368768, 'steps': 28063, 'loss/train': 1.750710368156433} +02/25/2022 13:57:27 - INFO - codeparrot_training - Step 28064: {'lr': 0.00021633050262571187, 'samples': 14369280, 'steps': 28064, 'loss/train': 2.099971294403076} +02/25/2022 13:57:30 - INFO - codeparrot_training - Step 28065: {'lr': 0.0002163142893076465, 'samples': 14369792, 'steps': 28065, 'loss/train': 1.5793737173080444} +02/25/2022 13:57:36 - INFO - codeparrot_training - Step 28066: {'lr': 0.00021629807613387993, 'samples': 14370304, 'steps': 28066, 'loss/train': 1.7580320835113525} +02/25/2022 13:57:39 - INFO - codeparrot_training - Step 28067: {'lr': 0.0002162818631044818, 'samples': 14370816, 'steps': 28067, 'loss/train': 2.207139015197754} +02/25/2022 13:57:45 - INFO - codeparrot_training - Step 28068: {'lr': 0.00021626565021952136, 'samples': 14371328, 'steps': 28068, 'loss/train': 2.1355602741241455} +02/25/2022 13:57:48 - INFO - codeparrot_training - Step 28069: {'lr': 0.0002162494374790682, 'samples': 14371840, 'steps': 28069, 'loss/train': 1.7528828382492065} +02/25/2022 13:57:54 - INFO - codeparrot_training - Step 28070: {'lr': 0.00021623322488319174, 'samples': 14372352, 'steps': 28070, 'loss/train': 0.9418584704399109} +02/25/2022 13:57:57 - INFO - codeparrot_training - Step 28071: {'lr': 0.0002162170124319615, 'samples': 14372864, 'steps': 28071, 'loss/train': 1.112986445426941} +02/25/2022 13:58:03 - INFO - codeparrot_training - Step 28072: {'lr': 0.00021620080012544674, 'samples': 14373376, 'steps': 28072, 'loss/train': 2.258427619934082} +02/25/2022 13:58:07 - INFO - codeparrot_training - Step 28073: {'lr': 0.000216184587963717, 'samples': 14373888, 'steps': 28073, 'loss/train': 1.416995882987976} +02/25/2022 13:58:12 - INFO - codeparrot_training - Step 28074: {'lr': 0.00021616837594684192, 'samples': 14374400, 'steps': 28074, 'loss/train': 1.9147346019744873} +02/25/2022 13:58:16 - INFO - codeparrot_training - Step 28075: {'lr': 0.00021615216407489064, 'samples': 14374912, 'steps': 28075, 'loss/train': 0.5222363471984863} +02/25/2022 13:58:21 - INFO - codeparrot_training - Step 28076: {'lr': 0.0002161359523479327, 'samples': 14375424, 'steps': 28076, 'loss/train': 1.954379916191101} +02/25/2022 13:58:25 - INFO - codeparrot_training - Step 28077: {'lr': 0.00021611974076603764, 'samples': 14375936, 'steps': 28077, 'loss/train': 1.7154027223587036} +02/25/2022 13:58:30 - INFO - codeparrot_training - Step 28078: {'lr': 0.00021610352932927495, 'samples': 14376448, 'steps': 28078, 'loss/train': 1.7833881378173828} +02/25/2022 13:58:34 - INFO - codeparrot_training - Step 28079: {'lr': 0.00021608731803771387, 'samples': 14376960, 'steps': 28079, 'loss/train': 0.8669403791427612} +02/25/2022 13:58:39 - INFO - codeparrot_training - Step 28080: {'lr': 0.00021607110689142393, 'samples': 14377472, 'steps': 28080, 'loss/train': 1.6418979167938232} +02/25/2022 13:58:43 - INFO - codeparrot_training - Step 28081: {'lr': 0.0002160548958904746, 'samples': 14377984, 'steps': 28081, 'loss/train': 0.0733647346496582} +02/25/2022 13:58:50 - INFO - codeparrot_training - Step 28082: {'lr': 0.00021603868503493535, 'samples': 14378496, 'steps': 28082, 'loss/train': 1.7847167253494263} +02/25/2022 13:58:53 - INFO - codeparrot_training - Step 28083: {'lr': 0.00021602247432487564, 'samples': 14379008, 'steps': 28083, 'loss/train': 1.5454208850860596} +02/25/2022 13:58:59 - INFO - codeparrot_training - Step 28084: {'lr': 0.00021600626376036482, 'samples': 14379520, 'steps': 28084, 'loss/train': 2.453158378601074} +02/25/2022 13:59:02 - INFO - codeparrot_training - Step 28085: {'lr': 0.00021599005334147232, 'samples': 14380032, 'steps': 28085, 'loss/train': 1.43528413772583} +02/25/2022 13:59:08 - INFO - codeparrot_training - Step 28086: {'lr': 0.00021597384306826765, 'samples': 14380544, 'steps': 28086, 'loss/train': 1.4690775871276855} +02/25/2022 13:59:11 - INFO - codeparrot_training - Step 28087: {'lr': 0.00021595763294082033, 'samples': 14381056, 'steps': 28087, 'loss/train': 1.5960325002670288} +02/25/2022 13:59:17 - INFO - codeparrot_training - Step 28088: {'lr': 0.0002159414229591996, 'samples': 14381568, 'steps': 28088, 'loss/train': 2.019350528717041} +02/25/2022 13:59:20 - INFO - codeparrot_training - Step 28089: {'lr': 0.000215925213123475, 'samples': 14382080, 'steps': 28089, 'loss/train': 1.195811152458191} +02/25/2022 13:59:26 - INFO - codeparrot_training - Step 28090: {'lr': 0.00021590900343371598, 'samples': 14382592, 'steps': 28090, 'loss/train': 1.4054591655731201} +02/25/2022 13:59:29 - INFO - codeparrot_training - Step 28091: {'lr': 0.00021589279388999205, 'samples': 14383104, 'steps': 28091, 'loss/train': 1.5456644296646118} +02/25/2022 13:59:35 - INFO - codeparrot_training - Step 28092: {'lr': 0.00021587658449237246, 'samples': 14383616, 'steps': 28092, 'loss/train': 1.7446430921554565} +02/25/2022 13:59:38 - INFO - codeparrot_training - Step 28093: {'lr': 0.00021586037524092677, 'samples': 14384128, 'steps': 28093, 'loss/train': 2.2373547554016113} +02/25/2022 13:59:44 - INFO - codeparrot_training - Step 28094: {'lr': 0.00021584416613572435, 'samples': 14384640, 'steps': 28094, 'loss/train': 1.3391170501708984} +02/25/2022 13:59:47 - INFO - codeparrot_training - Step 28095: {'lr': 0.0002158279571768347, 'samples': 14385152, 'steps': 28095, 'loss/train': 1.7505850791931152} +02/25/2022 13:59:53 - INFO - codeparrot_training - Step 28096: {'lr': 0.00021581174836432735, 'samples': 14385664, 'steps': 28096, 'loss/train': 1.226311445236206} +02/25/2022 13:59:56 - INFO - codeparrot_training - Step 28097: {'lr': 0.00021579553969827147, 'samples': 14386176, 'steps': 28097, 'loss/train': 1.9549120664596558} +02/25/2022 14:00:02 - INFO - codeparrot_training - Step 28098: {'lr': 0.00021577933117873668, 'samples': 14386688, 'steps': 28098, 'loss/train': 2.2918050289154053} +02/25/2022 14:00:08 - INFO - codeparrot_training - Step 28099: {'lr': 0.00021576312280579236, 'samples': 14387200, 'steps': 28099, 'loss/train': 2.2413296699523926} +02/25/2022 14:00:11 - INFO - codeparrot_training - Step 28100: {'lr': 0.00021574691457950805, 'samples': 14387712, 'steps': 28100, 'loss/train': 1.1642554998397827} +02/25/2022 14:00:17 - INFO - codeparrot_training - Step 28101: {'lr': 0.00021573070649995296, 'samples': 14388224, 'steps': 28101, 'loss/train': 1.2302623987197876} +02/25/2022 14:00:20 - INFO - codeparrot_training - Step 28102: {'lr': 0.00021571449856719666, 'samples': 14388736, 'steps': 28102, 'loss/train': 1.9054666757583618} +02/25/2022 14:00:26 - INFO - codeparrot_training - Step 28103: {'lr': 0.00021569829078130854, 'samples': 14389248, 'steps': 28103, 'loss/train': 1.915951132774353} +02/25/2022 14:00:29 - INFO - codeparrot_training - Step 28104: {'lr': 0.00021568208314235818, 'samples': 14389760, 'steps': 28104, 'loss/train': 1.8752994537353516} +02/25/2022 14:00:35 - INFO - codeparrot_training - Step 28105: {'lr': 0.00021566587565041476, 'samples': 14390272, 'steps': 28105, 'loss/train': 1.3552035093307495} +02/25/2022 14:00:38 - INFO - codeparrot_training - Step 28106: {'lr': 0.00021564966830554783, 'samples': 14390784, 'steps': 28106, 'loss/train': 1.3413447141647339} +02/25/2022 14:00:45 - INFO - codeparrot_training - Step 28107: {'lr': 0.00021563346110782685, 'samples': 14391296, 'steps': 28107, 'loss/train': 2.0431149005889893} +02/25/2022 14:00:48 - INFO - codeparrot_training - Step 28108: {'lr': 0.0002156172540573212, 'samples': 14391808, 'steps': 28108, 'loss/train': 0.9471141695976257} +02/25/2022 14:00:54 - INFO - codeparrot_training - Step 28109: {'lr': 0.00021560104715410034, 'samples': 14392320, 'steps': 28109, 'loss/train': 1.4455173015594482} +02/25/2022 14:00:57 - INFO - codeparrot_training - Step 28110: {'lr': 0.00021558484039823362, 'samples': 14392832, 'steps': 28110, 'loss/train': 1.6967476606369019} +02/25/2022 14:01:02 - INFO - codeparrot_training - Step 28111: {'lr': 0.00021556863378979055, 'samples': 14393344, 'steps': 28111, 'loss/train': 2.3741543292999268} +02/25/2022 14:01:06 - INFO - codeparrot_training - Step 28112: {'lr': 0.0002155524273288405, 'samples': 14393856, 'steps': 28112, 'loss/train': 2.576460599899292} +02/25/2022 14:01:11 - INFO - codeparrot_training - Step 28113: {'lr': 0.00021553622101545296, 'samples': 14394368, 'steps': 28113, 'loss/train': 1.945274829864502} +02/25/2022 14:01:15 - INFO - codeparrot_training - Step 28114: {'lr': 0.00021552001484969726, 'samples': 14394880, 'steps': 28114, 'loss/train': 1.6612763404846191} +02/25/2022 14:01:20 - INFO - codeparrot_training - Step 28115: {'lr': 0.00021550380883164286, 'samples': 14395392, 'steps': 28115, 'loss/train': 1.6042780876159668} +02/25/2022 14:01:24 - INFO - codeparrot_training - Step 28116: {'lr': 0.0002154876029613592, 'samples': 14395904, 'steps': 28116, 'loss/train': 0.7500272989273071} +02/25/2022 14:01:30 - INFO - codeparrot_training - Step 28117: {'lr': 0.00021547139723891565, 'samples': 14396416, 'steps': 28117, 'loss/train': 1.5988813638687134} +02/25/2022 14:01:33 - INFO - codeparrot_training - Step 28118: {'lr': 0.00021545519166438182, 'samples': 14396928, 'steps': 28118, 'loss/train': 1.2883379459381104} +02/25/2022 14:01:39 - INFO - codeparrot_training - Step 28119: {'lr': 0.00021543898623782684, 'samples': 14397440, 'steps': 28119, 'loss/train': 1.5287202596664429} +02/25/2022 14:01:42 - INFO - codeparrot_training - Step 28120: {'lr': 0.00021542278095932027, 'samples': 14397952, 'steps': 28120, 'loss/train': 1.446513056755066} +02/25/2022 14:01:48 - INFO - codeparrot_training - Step 28121: {'lr': 0.00021540657582893154, 'samples': 14398464, 'steps': 28121, 'loss/train': 2.0345067977905273} +02/25/2022 14:01:51 - INFO - codeparrot_training - Step 28122: {'lr': 0.00021539037084673015, 'samples': 14398976, 'steps': 28122, 'loss/train': 2.1813249588012695} +02/25/2022 14:01:57 - INFO - codeparrot_training - Step 28123: {'lr': 0.00021537416601278533, 'samples': 14399488, 'steps': 28123, 'loss/train': 0.6046134233474731} +02/25/2022 14:02:00 - INFO - codeparrot_training - Step 28124: {'lr': 0.00021535796132716658, 'samples': 14400000, 'steps': 28124, 'loss/train': 1.187265157699585} +02/25/2022 14:02:06 - INFO - codeparrot_training - Step 28125: {'lr': 0.00021534175678994335, 'samples': 14400512, 'steps': 28125, 'loss/train': 1.6002784967422485} +02/25/2022 14:02:09 - INFO - codeparrot_training - Step 28126: {'lr': 0.00021532555240118509, 'samples': 14401024, 'steps': 28126, 'loss/train': 1.6754015684127808} +02/25/2022 14:02:16 - INFO - codeparrot_training - Step 28127: {'lr': 0.00021530934816096106, 'samples': 14401536, 'steps': 28127, 'loss/train': 2.282870054244995} +02/25/2022 14:02:19 - INFO - codeparrot_training - Step 28128: {'lr': 0.00021529314406934077, 'samples': 14402048, 'steps': 28128, 'loss/train': 2.4172914028167725} +02/25/2022 14:02:25 - INFO - codeparrot_training - Step 28129: {'lr': 0.00021527694012639362, 'samples': 14402560, 'steps': 28129, 'loss/train': 1.4151328802108765} +02/25/2022 14:02:28 - INFO - codeparrot_training - Step 28130: {'lr': 0.00021526073633218907, 'samples': 14403072, 'steps': 28130, 'loss/train': 1.3612678050994873} +02/25/2022 14:02:34 - INFO - codeparrot_training - Step 28131: {'lr': 0.00021524453268679657, 'samples': 14403584, 'steps': 28131, 'loss/train': 2.0022971630096436} +02/25/2022 14:02:37 - INFO - codeparrot_training - Step 28132: {'lr': 0.00021522832919028533, 'samples': 14404096, 'steps': 28132, 'loss/train': 1.8268616199493408} +02/25/2022 14:02:43 - INFO - codeparrot_training - Step 28133: {'lr': 0.00021521212584272494, 'samples': 14404608, 'steps': 28133, 'loss/train': 1.985948085784912} +02/25/2022 14:02:46 - INFO - codeparrot_training - Step 28134: {'lr': 0.00021519592264418472, 'samples': 14405120, 'steps': 28134, 'loss/train': 0.5921363234519958} +02/25/2022 14:02:52 - INFO - codeparrot_training - Step 28135: {'lr': 0.00021517971959473422, 'samples': 14405632, 'steps': 28135, 'loss/train': 1.3431898355484009} +02/25/2022 14:02:55 - INFO - codeparrot_training - Step 28136: {'lr': 0.00021516351669444267, 'samples': 14406144, 'steps': 28136, 'loss/train': 1.3506590127944946} +02/25/2022 14:03:01 - INFO - codeparrot_training - Step 28137: {'lr': 0.00021514731394337952, 'samples': 14406656, 'steps': 28137, 'loss/train': 2.289846658706665} +02/25/2022 14:03:04 - INFO - codeparrot_training - Step 28138: {'lr': 0.00021513111134161423, 'samples': 14407168, 'steps': 28138, 'loss/train': 1.7058439254760742} +02/25/2022 14:03:10 - INFO - codeparrot_training - Step 28139: {'lr': 0.00021511490888921628, 'samples': 14407680, 'steps': 28139, 'loss/train': 1.9068092107772827} +02/25/2022 14:03:14 - INFO - codeparrot_training - Step 28140: {'lr': 0.00021509870658625489, 'samples': 14408192, 'steps': 28140, 'loss/train': 1.0219967365264893} +02/25/2022 14:03:19 - INFO - codeparrot_training - Step 28141: {'lr': 0.00021508250443279952, 'samples': 14408704, 'steps': 28141, 'loss/train': 2.337024450302124} +02/25/2022 14:03:23 - INFO - codeparrot_training - Step 28142: {'lr': 0.00021506630242891967, 'samples': 14409216, 'steps': 28142, 'loss/train': 2.6537768840789795} +02/25/2022 14:03:28 - INFO - codeparrot_training - Step 28143: {'lr': 0.00021505010057468467, 'samples': 14409728, 'steps': 28143, 'loss/train': 2.428022861480713} +02/25/2022 14:03:31 - INFO - codeparrot_training - Step 28144: {'lr': 0.00021503389887016404, 'samples': 14410240, 'steps': 28144, 'loss/train': 8.52154541015625} +02/25/2022 14:03:37 - INFO - codeparrot_training - Step 28145: {'lr': 0.00021501769731542694, 'samples': 14410752, 'steps': 28145, 'loss/train': 1.2380784749984741} +02/25/2022 14:03:41 - INFO - codeparrot_training - Step 28146: {'lr': 0.00021500149591054297, 'samples': 14411264, 'steps': 28146, 'loss/train': 1.3599052429199219} +02/25/2022 14:03:46 - INFO - codeparrot_training - Step 28147: {'lr': 0.0002149852946555815, 'samples': 14411776, 'steps': 28147, 'loss/train': 1.460645318031311} +02/25/2022 14:03:53 - INFO - codeparrot_training - Step 28148: {'lr': 0.00021496909355061194, 'samples': 14412288, 'steps': 28148, 'loss/train': 2.548835277557373} +02/25/2022 14:03:56 - INFO - codeparrot_training - Step 28149: {'lr': 0.0002149528925957036, 'samples': 14412800, 'steps': 28149, 'loss/train': 1.4063996076583862} +02/25/2022 14:04:02 - INFO - codeparrot_training - Step 28150: {'lr': 0.00021493669179092594, 'samples': 14413312, 'steps': 28150, 'loss/train': 1.708360195159912} +02/25/2022 14:04:05 - INFO - codeparrot_training - Step 28151: {'lr': 0.00021492049113634837, 'samples': 14413824, 'steps': 28151, 'loss/train': 2.007326126098633} +02/25/2022 14:04:11 - INFO - codeparrot_training - Step 28152: {'lr': 0.0002149042906320403, 'samples': 14414336, 'steps': 28152, 'loss/train': 2.248398780822754} +02/25/2022 14:04:14 - INFO - codeparrot_training - Step 28153: {'lr': 0.00021488809027807106, 'samples': 14414848, 'steps': 28153, 'loss/train': 0.7525196671485901} +02/25/2022 14:04:19 - INFO - codeparrot_training - Step 28154: {'lr': 0.00021487189007451016, 'samples': 14415360, 'steps': 28154, 'loss/train': 1.4810278415679932} +02/25/2022 14:04:23 - INFO - codeparrot_training - Step 28155: {'lr': 0.00021485569002142684, 'samples': 14415872, 'steps': 28155, 'loss/train': 1.97084379196167} +02/25/2022 14:04:28 - INFO - codeparrot_training - Step 28156: {'lr': 0.00021483949011889066, 'samples': 14416384, 'steps': 28156, 'loss/train': 1.7304683923721313} +02/25/2022 14:04:32 - INFO - codeparrot_training - Step 28157: {'lr': 0.0002148232903669709, 'samples': 14416896, 'steps': 28157, 'loss/train': 1.599805235862732} +02/25/2022 14:04:37 - INFO - codeparrot_training - Step 28158: {'lr': 0.00021480709076573704, 'samples': 14417408, 'steps': 28158, 'loss/train': 0.06547779589891434} +02/25/2022 14:04:41 - INFO - codeparrot_training - Step 28159: {'lr': 0.00021479089131525836, 'samples': 14417920, 'steps': 28159, 'loss/train': 2.0388407707214355} +02/25/2022 14:04:46 - INFO - codeparrot_training - Step 28160: {'lr': 0.00021477469201560434, 'samples': 14418432, 'steps': 28160, 'loss/train': 1.4564509391784668} +02/25/2022 14:04:50 - INFO - codeparrot_training - Step 28161: {'lr': 0.00021475849286684446, 'samples': 14418944, 'steps': 28161, 'loss/train': 1.5271575450897217} +02/25/2022 14:04:56 - INFO - codeparrot_training - Step 28162: {'lr': 0.00021474229386904786, 'samples': 14419456, 'steps': 28162, 'loss/train': 1.5319429636001587} +02/25/2022 14:05:00 - INFO - codeparrot_training - Step 28163: {'lr': 0.00021472609502228414, 'samples': 14419968, 'steps': 28163, 'loss/train': 1.6581001281738281} +02/25/2022 14:05:05 - INFO - codeparrot_training - Step 28164: {'lr': 0.0002147098963266226, 'samples': 14420480, 'steps': 28164, 'loss/train': 1.5576667785644531} +02/25/2022 14:05:08 - INFO - codeparrot_training - Step 28165: {'lr': 0.00021469369778213273, 'samples': 14420992, 'steps': 28165, 'loss/train': 1.6378532648086548} +02/25/2022 14:05:14 - INFO - codeparrot_training - Step 28166: {'lr': 0.00021467749938888377, 'samples': 14421504, 'steps': 28166, 'loss/train': 1.3086446523666382} +02/25/2022 14:05:17 - INFO - codeparrot_training - Step 28167: {'lr': 0.0002146613011469452, 'samples': 14422016, 'steps': 28167, 'loss/train': 1.9520518779754639} +02/25/2022 14:05:23 - INFO - codeparrot_training - Step 28168: {'lr': 0.00021464510305638638, 'samples': 14422528, 'steps': 28168, 'loss/train': 2.518522024154663} +02/25/2022 14:05:26 - INFO - codeparrot_training - Step 28169: {'lr': 0.0002146289051172767, 'samples': 14423040, 'steps': 28169, 'loss/train': 1.546021580696106} +02/25/2022 14:05:32 - INFO - codeparrot_training - Step 28170: {'lr': 0.0002146127073296857, 'samples': 14423552, 'steps': 28170, 'loss/train': 2.0718612670898438} +02/25/2022 14:05:35 - INFO - codeparrot_training - Step 28171: {'lr': 0.0002145965096936825, 'samples': 14424064, 'steps': 28171, 'loss/train': 1.227023959159851} +02/25/2022 14:05:41 - INFO - codeparrot_training - Step 28172: {'lr': 0.0002145803122093366, 'samples': 14424576, 'steps': 28172, 'loss/train': 1.546911597251892} +02/25/2022 14:05:44 - INFO - codeparrot_training - Step 28173: {'lr': 0.0002145641148767174, 'samples': 14425088, 'steps': 28173, 'loss/train': 1.7818413972854614} +02/25/2022 14:05:51 - INFO - codeparrot_training - Step 28174: {'lr': 0.0002145479176958944, 'samples': 14425600, 'steps': 28174, 'loss/train': 2.3829448223114014} +02/25/2022 14:05:54 - INFO - codeparrot_training - Step 28175: {'lr': 0.00021453172066693673, 'samples': 14426112, 'steps': 28175, 'loss/train': 1.5756821632385254} +02/25/2022 14:06:00 - INFO - codeparrot_training - Step 28176: {'lr': 0.0002145155237899139, 'samples': 14426624, 'steps': 28176, 'loss/train': 2.3233695030212402} +02/25/2022 14:06:03 - INFO - codeparrot_training - Step 28177: {'lr': 0.00021449932706489534, 'samples': 14427136, 'steps': 28177, 'loss/train': 1.4959889650344849} +02/25/2022 14:06:09 - INFO - codeparrot_training - Step 28178: {'lr': 0.00021448313049195037, 'samples': 14427648, 'steps': 28178, 'loss/train': 1.8766247034072876} +02/25/2022 14:06:12 - INFO - codeparrot_training - Step 28179: {'lr': 0.00021446693407114852, 'samples': 14428160, 'steps': 28179, 'loss/train': 1.7371975183486938} +02/25/2022 14:06:17 - INFO - codeparrot_training - Step 28180: {'lr': 0.00021445073780255888, 'samples': 14428672, 'steps': 28180, 'loss/train': 1.3295888900756836} +02/25/2022 14:06:21 - INFO - codeparrot_training - Step 28181: {'lr': 0.00021443454168625103, 'samples': 14429184, 'steps': 28181, 'loss/train': 1.6605846881866455} +02/25/2022 14:06:26 - INFO - codeparrot_training - Step 28182: {'lr': 0.0002144183457222943, 'samples': 14429696, 'steps': 28182, 'loss/train': 1.1189130544662476} +02/25/2022 14:06:30 - INFO - codeparrot_training - Step 28183: {'lr': 0.00021440214991075822, 'samples': 14430208, 'steps': 28183, 'loss/train': 1.8553526401519775} +02/25/2022 14:06:36 - INFO - codeparrot_training - Step 28184: {'lr': 0.00021438595425171187, 'samples': 14430720, 'steps': 28184, 'loss/train': 1.5230867862701416} +02/25/2022 14:06:39 - INFO - codeparrot_training - Step 28185: {'lr': 0.0002143697587452248, 'samples': 14431232, 'steps': 28185, 'loss/train': 2.4427521228790283} +02/25/2022 14:06:45 - INFO - codeparrot_training - Step 28186: {'lr': 0.00021435356339136638, 'samples': 14431744, 'steps': 28186, 'loss/train': 2.2740325927734375} +02/25/2022 14:06:48 - INFO - codeparrot_training - Step 28187: {'lr': 0.00021433736819020607, 'samples': 14432256, 'steps': 28187, 'loss/train': 2.3454790115356445} +02/25/2022 14:06:54 - INFO - codeparrot_training - Step 28188: {'lr': 0.00021432117314181304, 'samples': 14432768, 'steps': 28188, 'loss/train': 2.020791530609131} +02/25/2022 14:06:57 - INFO - codeparrot_training - Step 28189: {'lr': 0.00021430497824625677, 'samples': 14433280, 'steps': 28189, 'loss/train': 0.047374699264764786} +02/25/2022 14:07:03 - INFO - codeparrot_training - Step 28190: {'lr': 0.00021428878350360667, 'samples': 14433792, 'steps': 28190, 'loss/train': 1.9009246826171875} +02/25/2022 14:07:06 - INFO - codeparrot_training - Step 28191: {'lr': 0.0002142725889139321, 'samples': 14434304, 'steps': 28191, 'loss/train': 2.777763605117798} +02/25/2022 14:07:12 - INFO - codeparrot_training - Step 28192: {'lr': 0.00021425639447730243, 'samples': 14434816, 'steps': 28192, 'loss/train': 2.066321611404419} +02/25/2022 14:07:15 - INFO - codeparrot_training - Step 28193: {'lr': 0.00021424020019378699, 'samples': 14435328, 'steps': 28193, 'loss/train': 1.9836395978927612} +02/25/2022 14:07:22 - INFO - codeparrot_training - Step 28194: {'lr': 0.0002142240060634552, 'samples': 14435840, 'steps': 28194, 'loss/train': 1.4912992715835571} +02/25/2022 14:07:26 - INFO - codeparrot_training - Step 28195: {'lr': 0.00021420781208637634, 'samples': 14436352, 'steps': 28195, 'loss/train': 1.8374536037445068} +02/25/2022 14:07:31 - INFO - codeparrot_training - Step 28196: {'lr': 0.00021419161826261997, 'samples': 14436864, 'steps': 28196, 'loss/train': 1.4489614963531494} +02/25/2022 14:07:35 - INFO - codeparrot_training - Step 28197: {'lr': 0.0002141754245922552, 'samples': 14437376, 'steps': 28197, 'loss/train': 0.8596668839454651} +02/25/2022 14:07:40 - INFO - codeparrot_training - Step 28198: {'lr': 0.00021415923107535162, 'samples': 14437888, 'steps': 28198, 'loss/train': 1.210690975189209} +02/25/2022 14:07:44 - INFO - codeparrot_training - Step 28199: {'lr': 0.00021414303771197845, 'samples': 14438400, 'steps': 28199, 'loss/train': 1.900983214378357} +02/25/2022 14:07:49 - INFO - codeparrot_training - Step 28200: {'lr': 0.0002141268445022052, 'samples': 14438912, 'steps': 28200, 'loss/train': 1.0254895687103271} +02/25/2022 14:07:53 - INFO - codeparrot_training - Step 28201: {'lr': 0.00021411065144610113, 'samples': 14439424, 'steps': 28201, 'loss/train': 1.9845155477523804} +02/25/2022 14:07:59 - INFO - codeparrot_training - Step 28202: {'lr': 0.0002140944585437356, 'samples': 14439936, 'steps': 28202, 'loss/train': 1.625124454498291} +02/25/2022 14:08:02 - INFO - codeparrot_training - Step 28203: {'lr': 0.00021407826579517803, 'samples': 14440448, 'steps': 28203, 'loss/train': 1.8188352584838867} +02/25/2022 14:08:08 - INFO - codeparrot_training - Step 28204: {'lr': 0.00021406207320049772, 'samples': 14440960, 'steps': 28204, 'loss/train': 0.9709446430206299} +02/25/2022 14:08:11 - INFO - codeparrot_training - Step 28205: {'lr': 0.00021404588075976422, 'samples': 14441472, 'steps': 28205, 'loss/train': 1.3550825119018555} +02/25/2022 14:08:17 - INFO - codeparrot_training - Step 28206: {'lr': 0.0002140296884730466, 'samples': 14441984, 'steps': 28206, 'loss/train': 1.4155069589614868} +02/25/2022 14:08:20 - INFO - codeparrot_training - Step 28207: {'lr': 0.0002140134963404144, 'samples': 14442496, 'steps': 28207, 'loss/train': 1.8521008491516113} +02/25/2022 14:08:25 - INFO - codeparrot_training - Step 28208: {'lr': 0.00021399730436193694, 'samples': 14443008, 'steps': 28208, 'loss/train': 2.0159757137298584} +02/25/2022 14:08:29 - INFO - codeparrot_training - Step 28209: {'lr': 0.00021398111253768373, 'samples': 14443520, 'steps': 28209, 'loss/train': 0.8082032203674316} +02/25/2022 14:08:35 - INFO - codeparrot_training - Step 28210: {'lr': 0.00021396492086772387, 'samples': 14444032, 'steps': 28210, 'loss/train': 1.9070026874542236} +02/25/2022 14:08:39 - INFO - codeparrot_training - Step 28211: {'lr': 0.00021394872935212684, 'samples': 14444544, 'steps': 28211, 'loss/train': 1.7314732074737549} +02/25/2022 14:08:44 - INFO - codeparrot_training - Step 28212: {'lr': 0.000213932537990962, 'samples': 14445056, 'steps': 28212, 'loss/train': 1.6581107378005981} +02/25/2022 14:08:47 - INFO - codeparrot_training - Step 28213: {'lr': 0.00021391634678429887, 'samples': 14445568, 'steps': 28213, 'loss/train': 0.4519192576408386} +02/25/2022 14:08:53 - INFO - codeparrot_training - Step 28214: {'lr': 0.0002139001557322065, 'samples': 14446080, 'steps': 28214, 'loss/train': 1.3191540241241455} +02/25/2022 14:08:56 - INFO - codeparrot_training - Step 28215: {'lr': 0.0002138839648347544, 'samples': 14446592, 'steps': 28215, 'loss/train': 2.546160936355591} +02/25/2022 14:09:02 - INFO - codeparrot_training - Step 28216: {'lr': 0.00021386777409201194, 'samples': 14447104, 'steps': 28216, 'loss/train': 0.9705095887184143} +02/25/2022 14:09:05 - INFO - codeparrot_training - Step 28217: {'lr': 0.00021385158350404845, 'samples': 14447616, 'steps': 28217, 'loss/train': 1.3036950826644897} +02/25/2022 14:09:11 - INFO - codeparrot_training - Step 28218: {'lr': 0.00021383539307093344, 'samples': 14448128, 'steps': 28218, 'loss/train': 2.0621285438537598} +02/25/2022 14:09:17 - INFO - codeparrot_training - Step 28219: {'lr': 0.00021381920279273597, 'samples': 14448640, 'steps': 28219, 'loss/train': 1.7898664474487305} +02/25/2022 14:09:21 - INFO - codeparrot_training - Step 28220: {'lr': 0.00021380301266952557, 'samples': 14449152, 'steps': 28220, 'loss/train': 2.425165891647339} +02/25/2022 14:09:24 - INFO - codeparrot_training - Step 28221: {'lr': 0.00021378682270137156, 'samples': 14449664, 'steps': 28221, 'loss/train': 1.5211080312728882} +02/25/2022 14:09:30 - INFO - codeparrot_training - Step 28222: {'lr': 0.0002137706328883434, 'samples': 14450176, 'steps': 28222, 'loss/train': 1.5055103302001953} +02/25/2022 14:09:33 - INFO - codeparrot_training - Step 28223: {'lr': 0.0002137544432305103, 'samples': 14450688, 'steps': 28223, 'loss/train': 1.73633873462677} +02/25/2022 14:09:39 - INFO - codeparrot_training - Step 28224: {'lr': 0.0002137382537279416, 'samples': 14451200, 'steps': 28224, 'loss/train': 1.9062696695327759} +02/25/2022 14:09:42 - INFO - codeparrot_training - Step 28225: {'lr': 0.00021372206438070672, 'samples': 14451712, 'steps': 28225, 'loss/train': 1.1663005352020264} +02/25/2022 14:09:48 - INFO - codeparrot_training - Step 28226: {'lr': 0.000213705875188875, 'samples': 14452224, 'steps': 28226, 'loss/train': 2.040790319442749} +02/25/2022 14:09:51 - INFO - codeparrot_training - Step 28227: {'lr': 0.0002136896861525159, 'samples': 14452736, 'steps': 28227, 'loss/train': 1.808836579322815} +02/25/2022 14:09:57 - INFO - codeparrot_training - Step 28228: {'lr': 0.0002136734972716985, 'samples': 14453248, 'steps': 28228, 'loss/train': 1.0167431831359863} +02/25/2022 14:10:00 - INFO - codeparrot_training - Step 28229: {'lr': 0.00021365730854649236, 'samples': 14453760, 'steps': 28229, 'loss/train': 1.3448762893676758} +02/25/2022 14:10:07 - INFO - codeparrot_training - Step 28230: {'lr': 0.0002136411199769667, 'samples': 14454272, 'steps': 28230, 'loss/train': 1.4268722534179688} +02/25/2022 14:10:10 - INFO - codeparrot_training - Step 28231: {'lr': 0.00021362493156319113, 'samples': 14454784, 'steps': 28231, 'loss/train': 1.303395390510559} +02/25/2022 14:10:16 - INFO - codeparrot_training - Step 28232: {'lr': 0.00021360874330523467, 'samples': 14455296, 'steps': 28232, 'loss/train': 1.3476988077163696} +02/25/2022 14:10:19 - INFO - codeparrot_training - Step 28233: {'lr': 0.00021359255520316675, 'samples': 14455808, 'steps': 28233, 'loss/train': 1.2929127216339111} +02/25/2022 14:10:24 - INFO - codeparrot_training - Step 28234: {'lr': 0.00021357636725705687, 'samples': 14456320, 'steps': 28234, 'loss/train': 1.5819498300552368} +02/25/2022 14:10:30 - INFO - codeparrot_training - Step 28235: {'lr': 0.00021356017946697425, 'samples': 14456832, 'steps': 28235, 'loss/train': 1.2295715808868408} +02/25/2022 14:10:33 - INFO - codeparrot_training - Step 28236: {'lr': 0.0002135439918329882, 'samples': 14457344, 'steps': 28236, 'loss/train': 2.0276858806610107} +02/25/2022 14:10:39 - INFO - codeparrot_training - Step 28237: {'lr': 0.00021352780435516811, 'samples': 14457856, 'steps': 28237, 'loss/train': 0.05608693137764931} +02/25/2022 14:10:42 - INFO - codeparrot_training - Step 28238: {'lr': 0.00021351161703358335, 'samples': 14458368, 'steps': 28238, 'loss/train': 2.012699842453003} +02/25/2022 14:10:49 - INFO - codeparrot_training - Step 28239: {'lr': 0.0002134954298683032, 'samples': 14458880, 'steps': 28239, 'loss/train': 1.3082733154296875} +02/25/2022 14:10:53 - INFO - codeparrot_training - Step 28240: {'lr': 0.00021347924285939712, 'samples': 14459392, 'steps': 28240, 'loss/train': 1.3898792266845703} +02/25/2022 14:10:56 - INFO - codeparrot_training - Step 28241: {'lr': 0.00021346305600693433, 'samples': 14459904, 'steps': 28241, 'loss/train': 2.0165929794311523} +02/25/2022 14:11:02 - INFO - codeparrot_training - Step 28242: {'lr': 0.00021344686931098418, 'samples': 14460416, 'steps': 28242, 'loss/train': 1.545802354812622} +02/25/2022 14:11:07 - INFO - codeparrot_training - Step 28243: {'lr': 0.00021343068277161604, 'samples': 14460928, 'steps': 28243, 'loss/train': 2.3445956707000732} +02/25/2022 14:11:11 - INFO - codeparrot_training - Step 28244: {'lr': 0.00021341449638889926, 'samples': 14461440, 'steps': 28244, 'loss/train': 0.15547709167003632} +02/25/2022 14:11:16 - INFO - codeparrot_training - Step 28245: {'lr': 0.00021339831016290318, 'samples': 14461952, 'steps': 28245, 'loss/train': 3.4157631397247314} +02/25/2022 14:11:20 - INFO - codeparrot_training - Step 28246: {'lr': 0.00021338212409369707, 'samples': 14462464, 'steps': 28246, 'loss/train': 3.1715946197509766} +02/25/2022 14:11:25 - INFO - codeparrot_training - Step 28247: {'lr': 0.00021336593818135032, 'samples': 14462976, 'steps': 28247, 'loss/train': 2.5878636837005615} +02/25/2022 14:11:29 - INFO - codeparrot_training - Step 28248: {'lr': 0.00021334975242593234, 'samples': 14463488, 'steps': 28248, 'loss/train': 1.7279274463653564} +02/25/2022 14:11:32 - INFO - codeparrot_training - Step 28249: {'lr': 0.0002133335668275123, 'samples': 14464000, 'steps': 28249, 'loss/train': 1.9622348546981812} +02/25/2022 14:11:38 - INFO - codeparrot_training - Step 28250: {'lr': 0.00021331738138615958, 'samples': 14464512, 'steps': 28250, 'loss/train': 0.05712484195828438} +02/25/2022 14:11:41 - INFO - codeparrot_training - Step 28251: {'lr': 0.0002133011961019436, 'samples': 14465024, 'steps': 28251, 'loss/train': 1.6140528917312622} +02/25/2022 14:11:48 - INFO - codeparrot_training - Step 28252: {'lr': 0.0002132850109749336, 'samples': 14465536, 'steps': 28252, 'loss/train': 1.1634860038757324} +02/25/2022 14:11:53 - INFO - codeparrot_training - Step 28253: {'lr': 0.0002132688260051991, 'samples': 14466048, 'steps': 28253, 'loss/train': 2.5930566787719727} +02/25/2022 14:11:56 - INFO - codeparrot_training - Step 28254: {'lr': 0.00021325264119280914, 'samples': 14466560, 'steps': 28254, 'loss/train': 1.196531891822815} +02/25/2022 14:12:02 - INFO - codeparrot_training - Step 28255: {'lr': 0.00021323645653783323, 'samples': 14467072, 'steps': 28255, 'loss/train': 1.8630634546279907} +02/25/2022 14:12:05 - INFO - codeparrot_training - Step 28256: {'lr': 0.00021322027204034063, 'samples': 14467584, 'steps': 28256, 'loss/train': 2.4226880073547363} +02/25/2022 14:12:11 - INFO - codeparrot_training - Step 28257: {'lr': 0.00021320408770040083, 'samples': 14468096, 'steps': 28257, 'loss/train': 1.9604405164718628} +02/25/2022 14:12:14 - INFO - codeparrot_training - Step 28258: {'lr': 0.00021318790351808296, 'samples': 14468608, 'steps': 28258, 'loss/train': 1.3375805616378784} +02/25/2022 14:12:20 - INFO - codeparrot_training - Step 28259: {'lr': 0.00021317171949345636, 'samples': 14469120, 'steps': 28259, 'loss/train': 0.15957719087600708} +02/25/2022 14:12:23 - INFO - codeparrot_training - Step 28260: {'lr': 0.00021315553562659045, 'samples': 14469632, 'steps': 28260, 'loss/train': 1.3576756715774536} +02/25/2022 14:12:29 - INFO - codeparrot_training - Step 28261: {'lr': 0.00021313935191755466, 'samples': 14470144, 'steps': 28261, 'loss/train': 1.406958818435669} +02/25/2022 14:12:32 - INFO - codeparrot_training - Step 28262: {'lr': 0.00021312316836641804, 'samples': 14470656, 'steps': 28262, 'loss/train': 2.1285762786865234} +02/25/2022 14:12:38 - INFO - codeparrot_training - Step 28263: {'lr': 0.00021310698497325006, 'samples': 14471168, 'steps': 28263, 'loss/train': 1.821584701538086} +02/25/2022 14:12:41 - INFO - codeparrot_training - Step 28264: {'lr': 0.00021309080173812007, 'samples': 14471680, 'steps': 28264, 'loss/train': 1.6576480865478516} +02/25/2022 14:12:47 - INFO - codeparrot_training - Step 28265: {'lr': 0.00021307461866109734, 'samples': 14472192, 'steps': 28265, 'loss/train': 1.8209234476089478} +02/25/2022 14:12:51 - INFO - codeparrot_training - Step 28266: {'lr': 0.00021305843574225133, 'samples': 14472704, 'steps': 28266, 'loss/train': 0.9990397691726685} +02/25/2022 14:12:56 - INFO - codeparrot_training - Step 28267: {'lr': 0.00021304225298165113, 'samples': 14473216, 'steps': 28267, 'loss/train': 2.634413719177246} +02/25/2022 14:13:00 - INFO - codeparrot_training - Step 28268: {'lr': 0.0002130260703793662, 'samples': 14473728, 'steps': 28268, 'loss/train': 2.0726587772369385} +02/25/2022 14:13:05 - INFO - codeparrot_training - Step 28269: {'lr': 0.00021300988793546586, 'samples': 14474240, 'steps': 28269, 'loss/train': 0.06204100698232651} +02/25/2022 14:13:09 - INFO - codeparrot_training - Step 28270: {'lr': 0.00021299370565001948, 'samples': 14474752, 'steps': 28270, 'loss/train': 1.9901347160339355} +02/25/2022 14:13:14 - INFO - codeparrot_training - Step 28271: {'lr': 0.00021297752352309623, 'samples': 14475264, 'steps': 28271, 'loss/train': 2.7706313133239746} +02/25/2022 14:13:18 - INFO - codeparrot_training - Step 28272: {'lr': 0.00021296134155476552, 'samples': 14475776, 'steps': 28272, 'loss/train': 0.9279763698577881} +02/25/2022 14:13:23 - INFO - codeparrot_training - Step 28273: {'lr': 0.00021294515974509666, 'samples': 14476288, 'steps': 28273, 'loss/train': 2.64377760887146} +02/25/2022 14:13:26 - INFO - codeparrot_training - Step 28274: {'lr': 0.00021292897809415906, 'samples': 14476800, 'steps': 28274, 'loss/train': 2.3142173290252686} +02/25/2022 14:13:33 - INFO - codeparrot_training - Step 28275: {'lr': 0.00021291279660202187, 'samples': 14477312, 'steps': 28275, 'loss/train': 1.709380865097046} +02/25/2022 14:13:36 - INFO - codeparrot_training - Step 28276: {'lr': 0.00021289661526875445, 'samples': 14477824, 'steps': 28276, 'loss/train': 2.758554458618164} +02/25/2022 14:13:42 - INFO - codeparrot_training - Step 28277: {'lr': 0.00021288043409442616, 'samples': 14478336, 'steps': 28277, 'loss/train': 0.8211163878440857} +02/25/2022 14:13:45 - INFO - codeparrot_training - Step 28278: {'lr': 0.00021286425307910636, 'samples': 14478848, 'steps': 28278, 'loss/train': 1.3632938861846924} +02/25/2022 14:13:50 - INFO - codeparrot_training - Step 28279: {'lr': 0.00021284807222286432, 'samples': 14479360, 'steps': 28279, 'loss/train': 0.996660590171814} +02/25/2022 14:13:54 - INFO - codeparrot_training - Step 28280: {'lr': 0.00021283189152576927, 'samples': 14479872, 'steps': 28280, 'loss/train': 1.7499308586120605} +02/25/2022 14:13:59 - INFO - codeparrot_training - Step 28281: {'lr': 0.00021281571098789065, 'samples': 14480384, 'steps': 28281, 'loss/train': 0.7764137387275696} +02/25/2022 14:14:03 - INFO - codeparrot_training - Step 28282: {'lr': 0.00021279953060929766, 'samples': 14480896, 'steps': 28282, 'loss/train': 2.001917600631714} +02/25/2022 14:14:08 - INFO - codeparrot_training - Step 28283: {'lr': 0.0002127833503900597, 'samples': 14481408, 'steps': 28283, 'loss/train': 2.2516567707061768} +02/25/2022 14:14:12 - INFO - codeparrot_training - Step 28284: {'lr': 0.00021276717033024607, 'samples': 14481920, 'steps': 28284, 'loss/train': 2.323160409927368} +02/25/2022 14:14:19 - INFO - codeparrot_training - Step 28285: {'lr': 0.00021275099042992606, 'samples': 14482432, 'steps': 28285, 'loss/train': 1.4083870649337769} +02/25/2022 14:14:22 - INFO - codeparrot_training - Step 28286: {'lr': 0.0002127348106891689, 'samples': 14482944, 'steps': 28286, 'loss/train': 1.5999479293823242} +02/25/2022 14:14:27 - INFO - codeparrot_training - Step 28287: {'lr': 0.000212718631108044, 'samples': 14483456, 'steps': 28287, 'loss/train': 1.4723689556121826} +02/25/2022 14:14:33 - INFO - codeparrot_training - Step 28288: {'lr': 0.00021270245168662077, 'samples': 14483968, 'steps': 28288, 'loss/train': 2.1287407875061035} +02/25/2022 14:14:36 - INFO - codeparrot_training - Step 28289: {'lr': 0.0002126862724249683, 'samples': 14484480, 'steps': 28289, 'loss/train': 1.5229589939117432} +02/25/2022 14:14:42 - INFO - codeparrot_training - Step 28290: {'lr': 0.000212670093323156, 'samples': 14484992, 'steps': 28290, 'loss/train': 1.637123465538025} +02/25/2022 14:14:46 - INFO - codeparrot_training - Step 28291: {'lr': 0.00021265391438125313, 'samples': 14485504, 'steps': 28291, 'loss/train': 1.69315767288208} +02/25/2022 14:14:49 - INFO - codeparrot_training - Step 28292: {'lr': 0.00021263773559932915, 'samples': 14486016, 'steps': 28292, 'loss/train': 1.6702494621276855} +02/25/2022 14:14:54 - INFO - codeparrot_training - Step 28293: {'lr': 0.00021262155697745311, 'samples': 14486528, 'steps': 28293, 'loss/train': 1.4113856554031372} +02/25/2022 14:14:58 - INFO - codeparrot_training - Step 28294: {'lr': 0.00021260537851569447, 'samples': 14487040, 'steps': 28294, 'loss/train': 1.4689005613327026} +02/25/2022 14:15:03 - INFO - codeparrot_training - Step 28295: {'lr': 0.00021258920021412252, 'samples': 14487552, 'steps': 28295, 'loss/train': 0.07687009871006012} +02/25/2022 14:15:10 - INFO - codeparrot_training - Step 28296: {'lr': 0.0002125730220728067, 'samples': 14488064, 'steps': 28296, 'loss/train': 1.0091983079910278} +02/25/2022 14:15:13 - INFO - codeparrot_training - Step 28297: {'lr': 0.000212556844091816, 'samples': 14488576, 'steps': 28297, 'loss/train': 1.4685746431350708} +02/25/2022 14:15:19 - INFO - codeparrot_training - Step 28298: {'lr': 0.00021254066627121994, 'samples': 14489088, 'steps': 28298, 'loss/train': 1.0950254201889038} +02/25/2022 14:15:22 - INFO - codeparrot_training - Step 28299: {'lr': 0.00021252448861108772, 'samples': 14489600, 'steps': 28299, 'loss/train': 2.378859043121338} +02/25/2022 14:15:28 - INFO - codeparrot_training - Step 28300: {'lr': 0.0002125083111114887, 'samples': 14490112, 'steps': 28300, 'loss/train': 0.11248381435871124} +02/25/2022 14:15:31 - INFO - codeparrot_training - Step 28301: {'lr': 0.00021249213377249232, 'samples': 14490624, 'steps': 28301, 'loss/train': 0.8231486678123474} +02/25/2022 14:15:37 - INFO - codeparrot_training - Step 28302: {'lr': 0.0002124759565941676, 'samples': 14491136, 'steps': 28302, 'loss/train': 2.3650870323181152} +02/25/2022 14:15:40 - INFO - codeparrot_training - Step 28303: {'lr': 0.00021245977957658397, 'samples': 14491648, 'steps': 28303, 'loss/train': 1.709525227546692} +02/25/2022 14:15:46 - INFO - codeparrot_training - Step 28304: {'lr': 0.00021244360271981073, 'samples': 14492160, 'steps': 28304, 'loss/train': 1.2625675201416016} +02/25/2022 14:15:49 - INFO - codeparrot_training - Step 28305: {'lr': 0.00021242742602391727, 'samples': 14492672, 'steps': 28305, 'loss/train': 2.158215045928955} +02/25/2022 14:15:56 - INFO - codeparrot_training - Step 28306: {'lr': 0.00021241124948897266, 'samples': 14493184, 'steps': 28306, 'loss/train': 1.7731049060821533} +02/25/2022 14:15:59 - INFO - codeparrot_training - Step 28307: {'lr': 0.00021239507311504634, 'samples': 14493696, 'steps': 28307, 'loss/train': 2.3433847427368164} +02/25/2022 14:16:05 - INFO - codeparrot_training - Step 28308: {'lr': 0.00021237889690220758, 'samples': 14494208, 'steps': 28308, 'loss/train': 0.08356847614049911} +02/25/2022 14:16:08 - INFO - codeparrot_training - Step 28309: {'lr': 0.0002123627208505258, 'samples': 14494720, 'steps': 28309, 'loss/train': 2.075960159301758} +02/25/2022 14:16:14 - INFO - codeparrot_training - Step 28310: {'lr': 0.00021234654496007005, 'samples': 14495232, 'steps': 28310, 'loss/train': 1.9082318544387817} +02/25/2022 14:16:17 - INFO - codeparrot_training - Step 28311: {'lr': 0.00021233036923090976, 'samples': 14495744, 'steps': 28311, 'loss/train': 2.581702709197998} +02/25/2022 14:16:23 - INFO - codeparrot_training - Step 28312: {'lr': 0.00021231419366311418, 'samples': 14496256, 'steps': 28312, 'loss/train': 1.8835728168487549} +02/25/2022 14:16:26 - INFO - codeparrot_training - Step 28313: {'lr': 0.00021229801825675267, 'samples': 14496768, 'steps': 28313, 'loss/train': 1.6758482456207275} +02/25/2022 14:16:32 - INFO - codeparrot_training - Step 28314: {'lr': 0.00021228184301189454, 'samples': 14497280, 'steps': 28314, 'loss/train': 1.2920202016830444} +02/25/2022 14:16:35 - INFO - codeparrot_training - Step 28315: {'lr': 0.00021226566792860893, 'samples': 14497792, 'steps': 28315, 'loss/train': 0.6129186749458313} +02/25/2022 14:16:41 - INFO - codeparrot_training - Step 28316: {'lr': 0.00021224949300696522, 'samples': 14498304, 'steps': 28316, 'loss/train': 1.807091474533081} +02/25/2022 14:16:44 - INFO - codeparrot_training - Step 28317: {'lr': 0.0002122333182470327, 'samples': 14498816, 'steps': 28317, 'loss/train': 1.946730613708496} +02/25/2022 14:16:50 - INFO - codeparrot_training - Step 28318: {'lr': 0.00021221714364888072, 'samples': 14499328, 'steps': 28318, 'loss/train': 1.6418988704681396} +02/25/2022 14:16:53 - INFO - codeparrot_training - Step 28319: {'lr': 0.00021220096921257845, 'samples': 14499840, 'steps': 28319, 'loss/train': 2.0740702152252197} +02/25/2022 14:16:59 - INFO - codeparrot_training - Step 28320: {'lr': 0.0002121847949381952, 'samples': 14500352, 'steps': 28320, 'loss/train': 1.0231614112854004} +02/25/2022 14:17:02 - INFO - codeparrot_training - Step 28321: {'lr': 0.00021216862082580032, 'samples': 14500864, 'steps': 28321, 'loss/train': 1.1979848146438599} +02/25/2022 14:17:08 - INFO - codeparrot_training - Step 28322: {'lr': 0.00021215244687546308, 'samples': 14501376, 'steps': 28322, 'loss/train': 1.2794530391693115} +02/25/2022 14:17:12 - INFO - codeparrot_training - Step 28323: {'lr': 0.00021213627308725266, 'samples': 14501888, 'steps': 28323, 'loss/train': 1.1398054361343384} +02/25/2022 14:17:17 - INFO - codeparrot_training - Step 28324: {'lr': 0.00021212009946123845, 'samples': 14502400, 'steps': 28324, 'loss/train': 1.1762428283691406} +02/25/2022 14:17:21 - INFO - codeparrot_training - Step 28325: {'lr': 0.00021210392599748973, 'samples': 14502912, 'steps': 28325, 'loss/train': 1.7300468683242798} +02/25/2022 14:17:26 - INFO - codeparrot_training - Step 28326: {'lr': 0.00021208775269607576, 'samples': 14503424, 'steps': 28326, 'loss/train': 1.592652440071106} +02/25/2022 14:17:30 - INFO - codeparrot_training - Step 28327: {'lr': 0.0002120715795570658, 'samples': 14503936, 'steps': 28327, 'loss/train': 2.032846212387085} +02/25/2022 14:17:35 - INFO - codeparrot_training - Step 28328: {'lr': 0.00021205540658052912, 'samples': 14504448, 'steps': 28328, 'loss/train': 1.2168664932250977} +02/25/2022 14:17:39 - INFO - codeparrot_training - Step 28329: {'lr': 0.00021203923376653512, 'samples': 14504960, 'steps': 28329, 'loss/train': 1.053036093711853} +02/25/2022 14:17:44 - INFO - codeparrot_training - Step 28330: {'lr': 0.0002120230611151529, 'samples': 14505472, 'steps': 28330, 'loss/train': 2.599642276763916} +02/25/2022 14:17:48 - INFO - codeparrot_training - Step 28331: {'lr': 0.0002120068886264519, 'samples': 14505984, 'steps': 28331, 'loss/train': 2.286259889602661} +02/25/2022 14:17:54 - INFO - codeparrot_training - Step 28332: {'lr': 0.00021199071630050132, 'samples': 14506496, 'steps': 28332, 'loss/train': 1.5866584777832031} +02/25/2022 14:17:59 - INFO - codeparrot_training - Step 28333: {'lr': 0.0002119745441373704, 'samples': 14507008, 'steps': 28333, 'loss/train': 2.208592653274536} +02/25/2022 14:18:03 - INFO - codeparrot_training - Step 28334: {'lr': 0.0002119583721371284, 'samples': 14507520, 'steps': 28334, 'loss/train': 2.1119301319122314} +02/25/2022 14:18:06 - INFO - codeparrot_training - Step 28335: {'lr': 0.0002119422002998447, 'samples': 14508032, 'steps': 28335, 'loss/train': 2.3421809673309326} +02/25/2022 14:18:12 - INFO - codeparrot_training - Step 28336: {'lr': 0.00021192602862558864, 'samples': 14508544, 'steps': 28336, 'loss/train': 0.6300588250160217} +02/25/2022 14:18:17 - INFO - codeparrot_training - Step 28337: {'lr': 0.00021190985711442924, 'samples': 14509056, 'steps': 28337, 'loss/train': 1.2629461288452148} +02/25/2022 14:18:21 - INFO - codeparrot_training - Step 28338: {'lr': 0.00021189368576643599, 'samples': 14509568, 'steps': 28338, 'loss/train': 1.7022731304168701} +02/25/2022 14:18:26 - INFO - codeparrot_training - Step 28339: {'lr': 0.00021187751458167804, 'samples': 14510080, 'steps': 28339, 'loss/train': 1.6350210905075073} +02/25/2022 14:18:30 - INFO - codeparrot_training - Step 28340: {'lr': 0.0002118613435602248, 'samples': 14510592, 'steps': 28340, 'loss/train': 1.8421428203582764} +02/25/2022 14:18:36 - INFO - codeparrot_training - Step 28341: {'lr': 0.00021184517270214537, 'samples': 14511104, 'steps': 28341, 'loss/train': 2.856041431427002} +02/25/2022 14:18:39 - INFO - codeparrot_training - Step 28342: {'lr': 0.0002118290020075091, 'samples': 14511616, 'steps': 28342, 'loss/train': 1.5928322076797485} +02/25/2022 14:18:43 - INFO - codeparrot_training - Step 28343: {'lr': 0.00021181283147638527, 'samples': 14512128, 'steps': 28343, 'loss/train': 0.04897914454340935} +02/25/2022 14:18:48 - INFO - codeparrot_training - Step 28344: {'lr': 0.00021179666110884328, 'samples': 14512640, 'steps': 28344, 'loss/train': 2.2004902362823486} +02/25/2022 14:18:54 - INFO - codeparrot_training - Step 28345: {'lr': 0.00021178049090495211, 'samples': 14513152, 'steps': 28345, 'loss/train': 1.6638797521591187} +02/25/2022 14:18:57 - INFO - codeparrot_training - Step 28346: {'lr': 0.0002117643208647812, 'samples': 14513664, 'steps': 28346, 'loss/train': 0.49903514981269836} +02/25/2022 14:19:03 - INFO - codeparrot_training - Step 28347: {'lr': 0.00021174815098839978, 'samples': 14514176, 'steps': 28347, 'loss/train': 1.2643673419952393} +02/25/2022 14:19:06 - INFO - codeparrot_training - Step 28348: {'lr': 0.00021173198127587717, 'samples': 14514688, 'steps': 28348, 'loss/train': 1.9727510213851929} +02/25/2022 14:19:12 - INFO - codeparrot_training - Step 28349: {'lr': 0.00021171581172728268, 'samples': 14515200, 'steps': 28349, 'loss/train': 1.769645094871521} +02/25/2022 14:19:15 - INFO - codeparrot_training - Step 28350: {'lr': 0.0002116996423426854, 'samples': 14515712, 'steps': 28350, 'loss/train': 0.595661461353302} +02/25/2022 14:19:21 - INFO - codeparrot_training - Step 28351: {'lr': 0.00021168347312215468, 'samples': 14516224, 'steps': 28351, 'loss/train': 0.16420549154281616} +02/25/2022 14:19:25 - INFO - codeparrot_training - Step 28352: {'lr': 0.0002116673040657598, 'samples': 14516736, 'steps': 28352, 'loss/train': 1.470096230506897} +02/25/2022 14:19:30 - INFO - codeparrot_training - Step 28353: {'lr': 0.00021165113517357016, 'samples': 14517248, 'steps': 28353, 'loss/train': 1.6985714435577393} +02/25/2022 14:19:34 - INFO - codeparrot_training - Step 28354: {'lr': 0.00021163496644565472, 'samples': 14517760, 'steps': 28354, 'loss/train': 1.5188720226287842} +02/25/2022 14:19:39 - INFO - codeparrot_training - Step 28355: {'lr': 0.00021161879788208295, 'samples': 14518272, 'steps': 28355, 'loss/train': 1.8870947360992432} +02/25/2022 14:19:43 - INFO - codeparrot_training - Step 28356: {'lr': 0.000211602629482924, 'samples': 14518784, 'steps': 28356, 'loss/train': 0.9333053231239319} +02/25/2022 14:19:48 - INFO - codeparrot_training - Step 28357: {'lr': 0.00021158646124824735, 'samples': 14519296, 'steps': 28357, 'loss/train': 1.6203453540802002} +02/25/2022 14:19:52 - INFO - codeparrot_training - Step 28358: {'lr': 0.00021157029317812198, 'samples': 14519808, 'steps': 28358, 'loss/train': 2.8714053630828857} +02/25/2022 14:19:57 - INFO - codeparrot_training - Step 28359: {'lr': 0.00021155412527261726, 'samples': 14520320, 'steps': 28359, 'loss/train': 1.8954418897628784} +02/25/2022 14:20:01 - INFO - codeparrot_training - Step 28360: {'lr': 0.00021153795753180247, 'samples': 14520832, 'steps': 28360, 'loss/train': 0.6464634537696838} +02/25/2022 14:20:06 - INFO - codeparrot_training - Step 28361: {'lr': 0.00021152178995574686, 'samples': 14521344, 'steps': 28361, 'loss/train': 1.4953765869140625} +02/25/2022 14:20:10 - INFO - codeparrot_training - Step 28362: {'lr': 0.00021150562254451978, 'samples': 14521856, 'steps': 28362, 'loss/train': 1.9135023355484009} +02/25/2022 14:20:15 - INFO - codeparrot_training - Step 28363: {'lr': 0.0002114894552981903, 'samples': 14522368, 'steps': 28363, 'loss/train': 2.067519187927246} +02/25/2022 14:20:19 - INFO - codeparrot_training - Step 28364: {'lr': 0.00021147328821682776, 'samples': 14522880, 'steps': 28364, 'loss/train': 1.184251070022583} +02/25/2022 14:20:24 - INFO - codeparrot_training - Step 28365: {'lr': 0.00021145712130050145, 'samples': 14523392, 'steps': 28365, 'loss/train': 1.4790740013122559} +02/25/2022 14:20:28 - INFO - codeparrot_training - Step 28366: {'lr': 0.00021144095454928062, 'samples': 14523904, 'steps': 28366, 'loss/train': 1.4340696334838867} +02/25/2022 14:20:34 - INFO - codeparrot_training - Step 28367: {'lr': 0.00021142478796323444, 'samples': 14524416, 'steps': 28367, 'loss/train': 1.3393166065216064} +02/25/2022 14:20:37 - INFO - codeparrot_training - Step 28368: {'lr': 0.0002114086215424322, 'samples': 14524928, 'steps': 28368, 'loss/train': 1.8404030799865723} +02/25/2022 14:20:43 - INFO - codeparrot_training - Step 28369: {'lr': 0.00021139245528694324, 'samples': 14525440, 'steps': 28369, 'loss/train': 1.5250625610351562} +02/25/2022 14:20:46 - INFO - codeparrot_training - Step 28370: {'lr': 0.00021137628919683674, 'samples': 14525952, 'steps': 28370, 'loss/train': 1.276877760887146} +02/25/2022 14:20:52 - INFO - codeparrot_training - Step 28371: {'lr': 0.00021136012327218191, 'samples': 14526464, 'steps': 28371, 'loss/train': 1.39301335811615} +02/25/2022 14:20:55 - INFO - codeparrot_training - Step 28372: {'lr': 0.00021134395751304808, 'samples': 14526976, 'steps': 28372, 'loss/train': 1.5065416097640991} +02/25/2022 14:21:01 - INFO - codeparrot_training - Step 28373: {'lr': 0.0002113277919195044, 'samples': 14527488, 'steps': 28373, 'loss/train': 1.7488592863082886} +02/25/2022 14:21:04 - INFO - codeparrot_training - Step 28374: {'lr': 0.00021131162649162022, 'samples': 14528000, 'steps': 28374, 'loss/train': 0.4177989959716797} +02/25/2022 14:21:10 - INFO - codeparrot_training - Step 28375: {'lr': 0.00021129546122946477, 'samples': 14528512, 'steps': 28375, 'loss/train': 1.301292061805725} +02/25/2022 14:21:13 - INFO - codeparrot_training - Step 28376: {'lr': 0.00021127929613310725, 'samples': 14529024, 'steps': 28376, 'loss/train': 1.3771742582321167} +02/25/2022 14:21:19 - INFO - codeparrot_training - Step 28377: {'lr': 0.0002112631312026169, 'samples': 14529536, 'steps': 28377, 'loss/train': 1.7349754571914673} +02/25/2022 14:21:23 - INFO - codeparrot_training - Step 28378: {'lr': 0.00021124696643806302, 'samples': 14530048, 'steps': 28378, 'loss/train': 2.1916744709014893} +02/25/2022 14:21:28 - INFO - codeparrot_training - Step 28379: {'lr': 0.00021123080183951492, 'samples': 14530560, 'steps': 28379, 'loss/train': 1.386434555053711} +02/25/2022 14:21:32 - INFO - codeparrot_training - Step 28380: {'lr': 0.00021121463740704166, 'samples': 14531072, 'steps': 28380, 'loss/train': 1.5551437139511108} +02/25/2022 14:21:37 - INFO - codeparrot_training - Step 28381: {'lr': 0.00021119847314071254, 'samples': 14531584, 'steps': 28381, 'loss/train': 0.06534381210803986} +02/25/2022 14:21:41 - INFO - codeparrot_training - Step 28382: {'lr': 0.00021118230904059688, 'samples': 14532096, 'steps': 28382, 'loss/train': 1.5706872940063477} +02/25/2022 14:21:46 - INFO - codeparrot_training - Step 28383: {'lr': 0.00021116614510676397, 'samples': 14532608, 'steps': 28383, 'loss/train': 1.1185634136199951} +02/25/2022 14:21:50 - INFO - codeparrot_training - Step 28384: {'lr': 0.00021114998133928286, 'samples': 14533120, 'steps': 28384, 'loss/train': 1.580344796180725} +02/25/2022 14:21:55 - INFO - codeparrot_training - Step 28385: {'lr': 0.00021113381773822288, 'samples': 14533632, 'steps': 28385, 'loss/train': 1.5345349311828613} +02/25/2022 14:22:02 - INFO - codeparrot_training - Step 28386: {'lr': 0.0002111176543036533, 'samples': 14534144, 'steps': 28386, 'loss/train': 1.2951834201812744} +02/25/2022 14:22:05 - INFO - codeparrot_training - Step 28387: {'lr': 0.00021110149103564335, 'samples': 14534656, 'steps': 28387, 'loss/train': 2.4283063411712646} +02/25/2022 14:22:11 - INFO - codeparrot_training - Step 28388: {'lr': 0.00021108532793426236, 'samples': 14535168, 'steps': 28388, 'loss/train': 1.3708207607269287} +02/25/2022 14:22:14 - INFO - codeparrot_training - Step 28389: {'lr': 0.00021106916499957936, 'samples': 14535680, 'steps': 28389, 'loss/train': 2.2369604110717773} +02/25/2022 14:22:19 - INFO - codeparrot_training - Step 28390: {'lr': 0.0002110530022316637, 'samples': 14536192, 'steps': 28390, 'loss/train': 1.4980615377426147} +02/25/2022 14:22:23 - INFO - codeparrot_training - Step 28391: {'lr': 0.00021103683963058457, 'samples': 14536704, 'steps': 28391, 'loss/train': 1.6785829067230225} +02/25/2022 14:22:28 - INFO - codeparrot_training - Step 28392: {'lr': 0.0002110206771964114, 'samples': 14537216, 'steps': 28392, 'loss/train': 2.1099720001220703} +02/25/2022 14:22:32 - INFO - codeparrot_training - Step 28393: {'lr': 0.00021100451492921316, 'samples': 14537728, 'steps': 28393, 'loss/train': 1.256303310394287} +02/25/2022 14:22:38 - INFO - codeparrot_training - Step 28394: {'lr': 0.0002109883528290592, 'samples': 14538240, 'steps': 28394, 'loss/train': 8.790576934814453} +02/25/2022 14:22:41 - INFO - codeparrot_training - Step 28395: {'lr': 0.00021097219089601872, 'samples': 14538752, 'steps': 28395, 'loss/train': 1.1408854722976685} +02/25/2022 14:22:47 - INFO - codeparrot_training - Step 28396: {'lr': 0.00021095602913016097, 'samples': 14539264, 'steps': 28396, 'loss/train': 0.04697303846478462} +02/25/2022 14:22:50 - INFO - codeparrot_training - Step 28397: {'lr': 0.00021093986753155538, 'samples': 14539776, 'steps': 28397, 'loss/train': 1.5680654048919678} +02/25/2022 14:22:56 - INFO - codeparrot_training - Step 28398: {'lr': 0.00021092370610027082, 'samples': 14540288, 'steps': 28398, 'loss/train': 1.721678376197815} +02/25/2022 14:23:00 - INFO - codeparrot_training - Step 28399: {'lr': 0.0002109075448363767, 'samples': 14540800, 'steps': 28399, 'loss/train': 2.1614668369293213} +02/25/2022 14:23:05 - INFO - codeparrot_training - Step 28400: {'lr': 0.00021089138373994224, 'samples': 14541312, 'steps': 28400, 'loss/train': 1.2734172344207764} +02/25/2022 14:23:09 - INFO - codeparrot_training - Step 28401: {'lr': 0.0002108752228110368, 'samples': 14541824, 'steps': 28401, 'loss/train': 2.3260772228240967} +02/25/2022 14:23:14 - INFO - codeparrot_training - Step 28402: {'lr': 0.00021085906204972938, 'samples': 14542336, 'steps': 28402, 'loss/train': 2.5203962326049805} +02/25/2022 14:23:18 - INFO - codeparrot_training - Step 28403: {'lr': 0.0002108429014560893, 'samples': 14542848, 'steps': 28403, 'loss/train': 1.7350388765335083} +02/25/2022 14:23:23 - INFO - codeparrot_training - Step 28404: {'lr': 0.00021082674103018579, 'samples': 14543360, 'steps': 28404, 'loss/train': 1.0933187007904053} +02/25/2022 14:23:27 - INFO - codeparrot_training - Step 28405: {'lr': 0.0002108105807720882, 'samples': 14543872, 'steps': 28405, 'loss/train': 1.4100477695465088} +02/25/2022 14:23:32 - INFO - codeparrot_training - Step 28406: {'lr': 0.00021079442068186552, 'samples': 14544384, 'steps': 28406, 'loss/train': 2.218083620071411} +02/25/2022 14:23:36 - INFO - codeparrot_training - Step 28407: {'lr': 0.0002107782607595871, 'samples': 14544896, 'steps': 28407, 'loss/train': 2.2843759059906006} +02/25/2022 14:23:41 - INFO - codeparrot_training - Step 28408: {'lr': 0.00021076210100532216, 'samples': 14545408, 'steps': 28408, 'loss/train': 1.5812772512435913} +02/25/2022 14:23:45 - INFO - codeparrot_training - Step 28409: {'lr': 0.00021074594141913997, 'samples': 14545920, 'steps': 28409, 'loss/train': 2.3902242183685303} +02/25/2022 14:23:50 - INFO - codeparrot_training - Step 28410: {'lr': 0.0002107297820011097, 'samples': 14546432, 'steps': 28410, 'loss/train': 1.7851753234863281} +02/25/2022 14:23:54 - INFO - codeparrot_training - Step 28411: {'lr': 0.00021071362275130052, 'samples': 14546944, 'steps': 28411, 'loss/train': 1.677545428276062} +02/25/2022 14:24:00 - INFO - codeparrot_training - Step 28412: {'lr': 0.00021069746366978177, 'samples': 14547456, 'steps': 28412, 'loss/train': 0.8821040391921997} +02/25/2022 14:24:03 - INFO - codeparrot_training - Step 28413: {'lr': 0.00021068130475662255, 'samples': 14547968, 'steps': 28413, 'loss/train': 1.6094051599502563} +02/25/2022 14:24:07 - INFO - codeparrot_training - Step 28414: {'lr': 0.00021066514601189218, 'samples': 14548480, 'steps': 28414, 'loss/train': 2.2515792846679688} +02/25/2022 14:24:13 - INFO - codeparrot_training - Step 28415: {'lr': 0.00021064898743565976, 'samples': 14548992, 'steps': 28415, 'loss/train': 0.9202149510383606} +02/25/2022 14:24:16 - INFO - codeparrot_training - Step 28416: {'lr': 0.00021063282902799468, 'samples': 14549504, 'steps': 28416, 'loss/train': 3.0163931846618652} +02/25/2022 14:24:22 - INFO - codeparrot_training - Step 28417: {'lr': 0.000210616670788966, 'samples': 14550016, 'steps': 28417, 'loss/train': 0.9904517531394958} +02/25/2022 14:24:25 - INFO - codeparrot_training - Step 28418: {'lr': 0.00021060051271864304, 'samples': 14550528, 'steps': 28418, 'loss/train': 1.882892370223999} +02/25/2022 14:24:31 - INFO - codeparrot_training - Step 28419: {'lr': 0.00021058435481709496, 'samples': 14551040, 'steps': 28419, 'loss/train': 1.5398706197738647} +02/25/2022 14:24:34 - INFO - codeparrot_training - Step 28420: {'lr': 0.00021056819708439092, 'samples': 14551552, 'steps': 28420, 'loss/train': 1.811499834060669} +02/25/2022 14:24:40 - INFO - codeparrot_training - Step 28421: {'lr': 0.00021055203952060022, 'samples': 14552064, 'steps': 28421, 'loss/train': 1.8703547716140747} +02/25/2022 14:24:45 - INFO - codeparrot_training - Step 28422: {'lr': 0.0002105358821257921, 'samples': 14552576, 'steps': 28422, 'loss/train': 1.6546560525894165} +02/25/2022 14:24:49 - INFO - codeparrot_training - Step 28423: {'lr': 0.0002105197249000358, 'samples': 14553088, 'steps': 28423, 'loss/train': 2.8906209468841553} +02/25/2022 14:24:55 - INFO - codeparrot_training - Step 28424: {'lr': 0.00021050356784340033, 'samples': 14553600, 'steps': 28424, 'loss/train': 1.5693621635437012} +02/25/2022 14:24:58 - INFO - codeparrot_training - Step 28425: {'lr': 0.00021048741095595506, 'samples': 14554112, 'steps': 28425, 'loss/train': 1.0829867124557495} +02/25/2022 14:25:04 - INFO - codeparrot_training - Step 28426: {'lr': 0.00021047125423776918, 'samples': 14554624, 'steps': 28426, 'loss/train': 0.5926124453544617} +02/25/2022 14:25:07 - INFO - codeparrot_training - Step 28427: {'lr': 0.000210455097688912, 'samples': 14555136, 'steps': 28427, 'loss/train': 1.9430432319641113} +02/25/2022 14:25:13 - INFO - codeparrot_training - Step 28428: {'lr': 0.00021043894130945252, 'samples': 14555648, 'steps': 28428, 'loss/train': 2.2305712699890137} +02/25/2022 14:25:16 - INFO - codeparrot_training - Step 28429: {'lr': 0.00021042278509946004, 'samples': 14556160, 'steps': 28429, 'loss/train': 1.6274597644805908} +02/25/2022 14:25:22 - INFO - codeparrot_training - Step 28430: {'lr': 0.00021040662905900376, 'samples': 14556672, 'steps': 28430, 'loss/train': 2.3341946601867676} +02/25/2022 14:25:25 - INFO - codeparrot_training - Step 28431: {'lr': 0.00021039047318815307, 'samples': 14557184, 'steps': 28431, 'loss/train': 2.4299092292785645} +02/25/2022 14:25:31 - INFO - codeparrot_training - Step 28432: {'lr': 0.00021037431748697688, 'samples': 14557696, 'steps': 28432, 'loss/train': 2.862508773803711} +02/25/2022 14:25:34 - INFO - codeparrot_training - Step 28433: {'lr': 0.00021035816195554452, 'samples': 14558208, 'steps': 28433, 'loss/train': 1.7706307172775269} +02/25/2022 14:25:40 - INFO - codeparrot_training - Step 28434: {'lr': 0.00021034200659392522, 'samples': 14558720, 'steps': 28434, 'loss/train': 1.8307759761810303} +02/25/2022 14:25:44 - INFO - codeparrot_training - Step 28435: {'lr': 0.00021032585140218817, 'samples': 14559232, 'steps': 28435, 'loss/train': 0.9823546409606934} +02/25/2022 14:25:49 - INFO - codeparrot_training - Step 28436: {'lr': 0.0002103096963804027, 'samples': 14559744, 'steps': 28436, 'loss/train': 1.3510633707046509} +02/25/2022 14:25:53 - INFO - codeparrot_training - Step 28437: {'lr': 0.00021029354152863776, 'samples': 14560256, 'steps': 28437, 'loss/train': 2.3421902656555176} +02/25/2022 14:25:58 - INFO - codeparrot_training - Step 28438: {'lr': 0.00021027738684696267, 'samples': 14560768, 'steps': 28438, 'loss/train': 1.8667206764221191} +02/25/2022 14:26:02 - INFO - codeparrot_training - Step 28439: {'lr': 0.00021026123233544667, 'samples': 14561280, 'steps': 28439, 'loss/train': 0.923180103302002} +02/25/2022 14:26:07 - INFO - codeparrot_training - Step 28440: {'lr': 0.000210245077994159, 'samples': 14561792, 'steps': 28440, 'loss/train': 3.013241767883301} +02/25/2022 14:26:11 - INFO - codeparrot_training - Step 28441: {'lr': 0.00021022892382316873, 'samples': 14562304, 'steps': 28441, 'loss/train': 1.757417917251587} +02/25/2022 14:26:16 - INFO - codeparrot_training - Step 28442: {'lr': 0.00021021276982254508, 'samples': 14562816, 'steps': 28442, 'loss/train': 1.7152827978134155} +02/25/2022 14:26:20 - INFO - codeparrot_training - Step 28443: {'lr': 0.0002101966159923573, 'samples': 14563328, 'steps': 28443, 'loss/train': 2.326441526412964} +02/25/2022 14:26:25 - INFO - codeparrot_training - Step 28444: {'lr': 0.0002101804623326746, 'samples': 14563840, 'steps': 28444, 'loss/train': 1.1961010694503784} +02/25/2022 14:26:29 - INFO - codeparrot_training - Step 28445: {'lr': 0.00021016430884356627, 'samples': 14564352, 'steps': 28445, 'loss/train': 1.5915242433547974} +02/25/2022 14:26:36 - INFO - codeparrot_training - Step 28446: {'lr': 0.00021014815552510124, 'samples': 14564864, 'steps': 28446, 'loss/train': 1.7820900678634644} +02/25/2022 14:26:39 - INFO - codeparrot_training - Step 28447: {'lr': 0.0002101320023773489, 'samples': 14565376, 'steps': 28447, 'loss/train': 2.502531051635742} +02/25/2022 14:26:45 - INFO - codeparrot_training - Step 28448: {'lr': 0.00021011584940037838, 'samples': 14565888, 'steps': 28448, 'loss/train': 2.571626663208008} +02/25/2022 14:26:48 - INFO - codeparrot_training - Step 28449: {'lr': 0.00021009969659425902, 'samples': 14566400, 'steps': 28449, 'loss/train': 1.7852882146835327} +02/25/2022 14:26:53 - INFO - codeparrot_training - Step 28450: {'lr': 0.00021008354395905978, 'samples': 14566912, 'steps': 28450, 'loss/train': 1.5485172271728516} +02/25/2022 14:26:57 - INFO - codeparrot_training - Step 28451: {'lr': 0.00021006739149484995, 'samples': 14567424, 'steps': 28451, 'loss/train': 2.1825942993164062} +02/25/2022 14:27:03 - INFO - codeparrot_training - Step 28452: {'lr': 0.00021005123920169878, 'samples': 14567936, 'steps': 28452, 'loss/train': 2.541562795639038} +02/25/2022 14:27:06 - INFO - codeparrot_training - Step 28453: {'lr': 0.00021003508707967544, 'samples': 14568448, 'steps': 28453, 'loss/train': 3.115570068359375} +02/25/2022 14:27:11 - INFO - codeparrot_training - Step 28454: {'lr': 0.000210018935128849, 'samples': 14568960, 'steps': 28454, 'loss/train': 1.3251688480377197} +02/25/2022 14:27:15 - INFO - codeparrot_training - Step 28455: {'lr': 0.0002100027833492888, 'samples': 14569472, 'steps': 28455, 'loss/train': 1.4058822393417358} +02/25/2022 14:27:21 - INFO - codeparrot_training - Step 28456: {'lr': 0.000209986631741064, 'samples': 14569984, 'steps': 28456, 'loss/train': 1.6258450746536255} +02/25/2022 14:27:25 - INFO - codeparrot_training - Step 28457: {'lr': 0.00020997048030424372, 'samples': 14570496, 'steps': 28457, 'loss/train': 1.88962721824646} +02/25/2022 14:27:30 - INFO - codeparrot_training - Step 28458: {'lr': 0.00020995432903889725, 'samples': 14571008, 'steps': 28458, 'loss/train': 1.9786540269851685} +02/25/2022 14:27:34 - INFO - codeparrot_training - Step 28459: {'lr': 0.0002099381779450937, 'samples': 14571520, 'steps': 28459, 'loss/train': 1.2691636085510254} +02/25/2022 14:27:39 - INFO - codeparrot_training - Step 28460: {'lr': 0.00020992202702290225, 'samples': 14572032, 'steps': 28460, 'loss/train': 1.8930144309997559} +02/25/2022 14:27:45 - INFO - codeparrot_training - Step 28461: {'lr': 0.00020990587627239208, 'samples': 14572544, 'steps': 28461, 'loss/train': 1.8002294301986694} +02/25/2022 14:27:48 - INFO - codeparrot_training - Step 28462: {'lr': 0.00020988972569363246, 'samples': 14573056, 'steps': 28462, 'loss/train': 2.0443029403686523} +02/25/2022 14:27:52 - INFO - codeparrot_training - Step 28463: {'lr': 0.00020987357528669254, 'samples': 14573568, 'steps': 28463, 'loss/train': 0.8098900318145752} +02/25/2022 14:27:57 - INFO - codeparrot_training - Step 28464: {'lr': 0.00020985742505164143, 'samples': 14574080, 'steps': 28464, 'loss/train': 4.687154769897461} +02/25/2022 14:28:01 - INFO - codeparrot_training - Step 28465: {'lr': 0.00020984127498854834, 'samples': 14574592, 'steps': 28465, 'loss/train': 1.154121994972229} +02/25/2022 14:28:07 - INFO - codeparrot_training - Step 28466: {'lr': 0.0002098251250974826, 'samples': 14575104, 'steps': 28466, 'loss/train': 1.4480441808700562} +02/25/2022 14:28:13 - INFO - codeparrot_training - Step 28467: {'lr': 0.00020980897537851314, 'samples': 14575616, 'steps': 28467, 'loss/train': 0.6143372654914856} +02/25/2022 14:28:16 - INFO - codeparrot_training - Step 28468: {'lr': 0.00020979282583170932, 'samples': 14576128, 'steps': 28468, 'loss/train': 1.7119415998458862} +02/25/2022 14:28:22 - INFO - codeparrot_training - Step 28469: {'lr': 0.00020977667645714023, 'samples': 14576640, 'steps': 28469, 'loss/train': 1.733964443206787} +02/25/2022 14:28:25 - INFO - codeparrot_training - Step 28470: {'lr': 0.00020976052725487507, 'samples': 14577152, 'steps': 28470, 'loss/train': 1.044413447380066} +02/25/2022 14:28:31 - INFO - codeparrot_training - Step 28471: {'lr': 0.00020974437822498317, 'samples': 14577664, 'steps': 28471, 'loss/train': 2.8491978645324707} +02/25/2022 14:28:34 - INFO - codeparrot_training - Step 28472: {'lr': 0.00020972822936753344, 'samples': 14578176, 'steps': 28472, 'loss/train': 1.7251328229904175} +02/25/2022 14:28:40 - INFO - codeparrot_training - Step 28473: {'lr': 0.00020971208068259518, 'samples': 14578688, 'steps': 28473, 'loss/train': 0.9348810315132141} +02/25/2022 14:28:43 - INFO - codeparrot_training - Step 28474: {'lr': 0.0002096959321702376, 'samples': 14579200, 'steps': 28474, 'loss/train': 1.3053526878356934} +02/25/2022 14:28:50 - INFO - codeparrot_training - Step 28475: {'lr': 0.00020967978383052994, 'samples': 14579712, 'steps': 28475, 'loss/train': 1.5097630023956299} +02/25/2022 14:28:53 - INFO - codeparrot_training - Step 28476: {'lr': 0.0002096636356635412, 'samples': 14580224, 'steps': 28476, 'loss/train': 2.7082297801971436} +02/25/2022 14:28:59 - INFO - codeparrot_training - Step 28477: {'lr': 0.0002096474876693406, 'samples': 14580736, 'steps': 28477, 'loss/train': 0.1345982849597931} +02/25/2022 14:29:03 - INFO - codeparrot_training - Step 28478: {'lr': 0.00020963133984799737, 'samples': 14581248, 'steps': 28478, 'loss/train': 1.7590373754501343} +02/25/2022 14:29:08 - INFO - codeparrot_training - Step 28479: {'lr': 0.00020961519219958079, 'samples': 14581760, 'steps': 28479, 'loss/train': 2.068002939224243} +02/25/2022 14:29:12 - INFO - codeparrot_training - Step 28480: {'lr': 0.00020959904472415974, 'samples': 14582272, 'steps': 28480, 'loss/train': 1.5228018760681152} +02/25/2022 14:29:17 - INFO - codeparrot_training - Step 28481: {'lr': 0.0002095828974218036, 'samples': 14582784, 'steps': 28481, 'loss/train': 0.8721629977226257} +02/25/2022 14:29:21 - INFO - codeparrot_training - Step 28482: {'lr': 0.0002095667502925815, 'samples': 14583296, 'steps': 28482, 'loss/train': 0.08726678043603897} +02/25/2022 14:29:26 - INFO - codeparrot_training - Step 28483: {'lr': 0.00020955060333656257, 'samples': 14583808, 'steps': 28483, 'loss/train': 2.0495686531066895} +02/25/2022 14:29:30 - INFO - codeparrot_training - Step 28484: {'lr': 0.00020953445655381615, 'samples': 14584320, 'steps': 28484, 'loss/train': 2.4478495121002197} +02/25/2022 14:29:35 - INFO - codeparrot_training - Step 28485: {'lr': 0.00020951830994441113, 'samples': 14584832, 'steps': 28485, 'loss/train': 1.9732154607772827} +02/25/2022 14:29:39 - INFO - codeparrot_training - Step 28486: {'lr': 0.00020950216350841682, 'samples': 14585344, 'steps': 28486, 'loss/train': 0.6213319301605225} +02/25/2022 14:29:44 - INFO - codeparrot_training - Step 28487: {'lr': 0.00020948601724590237, 'samples': 14585856, 'steps': 28487, 'loss/train': 1.034710168838501} +02/25/2022 14:29:48 - INFO - codeparrot_training - Step 28488: {'lr': 0.0002094698711569371, 'samples': 14586368, 'steps': 28488, 'loss/train': 1.8198992013931274} +02/25/2022 14:29:53 - INFO - codeparrot_training - Step 28489: {'lr': 0.00020945372524158992, 'samples': 14586880, 'steps': 28489, 'loss/train': 0.9788869619369507} +02/25/2022 14:29:57 - INFO - codeparrot_training - Step 28490: {'lr': 0.0002094375794999301, 'samples': 14587392, 'steps': 28490, 'loss/train': 1.9800159931182861} +02/25/2022 14:30:02 - INFO - codeparrot_training - Step 28491: {'lr': 0.0002094214339320268, 'samples': 14587904, 'steps': 28491, 'loss/train': 1.5009678602218628} +02/25/2022 14:30:06 - INFO - codeparrot_training - Step 28492: {'lr': 0.00020940528853794928, 'samples': 14588416, 'steps': 28492, 'loss/train': 0.9870285391807556} +02/25/2022 14:30:12 - INFO - codeparrot_training - Step 28493: {'lr': 0.00020938914331776657, 'samples': 14588928, 'steps': 28493, 'loss/train': 0.7175915837287903} +02/25/2022 14:30:15 - INFO - codeparrot_training - Step 28494: {'lr': 0.00020937299827154782, 'samples': 14589440, 'steps': 28494, 'loss/train': 1.7921266555786133} +02/25/2022 14:30:21 - INFO - codeparrot_training - Step 28495: {'lr': 0.00020935685339936228, 'samples': 14589952, 'steps': 28495, 'loss/train': 1.127036690711975} +02/25/2022 14:30:24 - INFO - codeparrot_training - Step 28496: {'lr': 0.0002093407087012791, 'samples': 14590464, 'steps': 28496, 'loss/train': 0.12431463599205017} +02/25/2022 14:30:30 - INFO - codeparrot_training - Step 28497: {'lr': 0.0002093245641773674, 'samples': 14590976, 'steps': 28497, 'loss/train': 1.3516435623168945} +02/25/2022 14:30:33 - INFO - codeparrot_training - Step 28498: {'lr': 0.00020930841982769635, 'samples': 14591488, 'steps': 28498, 'loss/train': 1.6913585662841797} +02/25/2022 14:30:38 - INFO - codeparrot_training - Step 28499: {'lr': 0.00020929227565233513, 'samples': 14592000, 'steps': 28499, 'loss/train': 1.9581748247146606} +02/25/2022 14:30:42 - INFO - codeparrot_training - Step 28500: {'lr': 0.00020927613165135284, 'samples': 14592512, 'steps': 28500, 'loss/train': 2.6589584350585938} +02/25/2022 14:30:48 - INFO - codeparrot_training - Step 28501: {'lr': 0.00020925998782481868, 'samples': 14593024, 'steps': 28501, 'loss/train': 1.4079232215881348} +02/25/2022 14:30:51 - INFO - codeparrot_training - Step 28502: {'lr': 0.0002092438441728018, 'samples': 14593536, 'steps': 28502, 'loss/train': 1.5161478519439697} +02/25/2022 14:30:57 - INFO - codeparrot_training - Step 28503: {'lr': 0.00020922770069537136, 'samples': 14594048, 'steps': 28503, 'loss/train': 1.4210277795791626} +02/25/2022 14:31:01 - INFO - codeparrot_training - Step 28504: {'lr': 0.00020921155739259646, 'samples': 14594560, 'steps': 28504, 'loss/train': 3.5918595790863037} +02/25/2022 14:31:06 - INFO - codeparrot_training - Step 28505: {'lr': 0.0002091954142645463, 'samples': 14595072, 'steps': 28505, 'loss/train': 2.795027732849121} +02/25/2022 14:31:10 - INFO - codeparrot_training - Step 28506: {'lr': 0.00020917927131129015, 'samples': 14595584, 'steps': 28506, 'loss/train': 1.9583570957183838} +02/25/2022 14:31:15 - INFO - codeparrot_training - Step 28507: {'lr': 0.0002091631285328969, 'samples': 14596096, 'steps': 28507, 'loss/train': 2.0473642349243164} +02/25/2022 14:31:19 - INFO - codeparrot_training - Step 28508: {'lr': 0.00020914698592943586, 'samples': 14596608, 'steps': 28508, 'loss/train': 1.3696526288986206} +02/25/2022 14:31:24 - INFO - codeparrot_training - Step 28509: {'lr': 0.00020913084350097618, 'samples': 14597120, 'steps': 28509, 'loss/train': 2.081183433532715} +02/25/2022 14:31:27 - INFO - codeparrot_training - Step 28510: {'lr': 0.00020911470124758706, 'samples': 14597632, 'steps': 28510, 'loss/train': 1.8736523389816284} +02/25/2022 14:31:33 - INFO - codeparrot_training - Step 28511: {'lr': 0.00020909855916933747, 'samples': 14598144, 'steps': 28511, 'loss/train': 1.8217201232910156} +02/25/2022 14:31:40 - INFO - codeparrot_training - Step 28512: {'lr': 0.0002090824172662967, 'samples': 14598656, 'steps': 28512, 'loss/train': 1.6766507625579834} +02/25/2022 14:31:43 - INFO - codeparrot_training - Step 28513: {'lr': 0.00020906627553853382, 'samples': 14599168, 'steps': 28513, 'loss/train': 1.1767115592956543} +02/25/2022 14:31:49 - INFO - codeparrot_training - Step 28514: {'lr': 0.00020905013398611817, 'samples': 14599680, 'steps': 28514, 'loss/train': 2.170016288757324} +02/25/2022 14:31:52 - INFO - codeparrot_training - Step 28515: {'lr': 0.0002090339926091186, 'samples': 14600192, 'steps': 28515, 'loss/train': 2.7046866416931152} +02/25/2022 14:31:58 - INFO - codeparrot_training - Step 28516: {'lr': 0.0002090178514076044, 'samples': 14600704, 'steps': 28516, 'loss/train': 2.140939235687256} +02/25/2022 14:32:01 - INFO - codeparrot_training - Step 28517: {'lr': 0.00020900171038164473, 'samples': 14601216, 'steps': 28517, 'loss/train': 1.3000941276550293} +02/25/2022 14:32:07 - INFO - codeparrot_training - Step 28518: {'lr': 0.00020898556953130872, 'samples': 14601728, 'steps': 28518, 'loss/train': 1.7874553203582764} +02/25/2022 14:32:10 - INFO - codeparrot_training - Step 28519: {'lr': 0.00020896942885666558, 'samples': 14602240, 'steps': 28519, 'loss/train': 0.09542856365442276} +02/25/2022 14:32:16 - INFO - codeparrot_training - Step 28520: {'lr': 0.0002089532883577843, 'samples': 14602752, 'steps': 28520, 'loss/train': 1.963853120803833} +02/25/2022 14:32:19 - INFO - codeparrot_training - Step 28521: {'lr': 0.00020893714803473407, 'samples': 14603264, 'steps': 28521, 'loss/train': 2.3917295932769775} +02/25/2022 14:32:26 - INFO - codeparrot_training - Step 28522: {'lr': 0.00020892100788758407, 'samples': 14603776, 'steps': 28522, 'loss/train': 1.9010334014892578} +02/25/2022 14:32:29 - INFO - codeparrot_training - Step 28523: {'lr': 0.00020890486791640356, 'samples': 14604288, 'steps': 28523, 'loss/train': 2.143414258956909} +02/25/2022 14:32:35 - INFO - codeparrot_training - Step 28524: {'lr': 0.00020888872812126143, 'samples': 14604800, 'steps': 28524, 'loss/train': 2.0403006076812744} +02/25/2022 14:32:38 - INFO - codeparrot_training - Step 28525: {'lr': 0.00020887258850222693, 'samples': 14605312, 'steps': 28525, 'loss/train': 3.372239112854004} +02/25/2022 14:32:44 - INFO - codeparrot_training - Step 28526: {'lr': 0.0002088564490593692, 'samples': 14605824, 'steps': 28526, 'loss/train': 1.5008960962295532} +02/25/2022 14:32:48 - INFO - codeparrot_training - Step 28527: {'lr': 0.00020884030979275749, 'samples': 14606336, 'steps': 28527, 'loss/train': 2.028052568435669} +02/25/2022 14:32:53 - INFO - codeparrot_training - Step 28528: {'lr': 0.0002088241707024607, 'samples': 14606848, 'steps': 28528, 'loss/train': 1.7060691118240356} +02/25/2022 14:32:57 - INFO - codeparrot_training - Step 28529: {'lr': 0.0002088080317885481, 'samples': 14607360, 'steps': 28529, 'loss/train': 0.13562779128551483} +02/25/2022 14:33:02 - INFO - codeparrot_training - Step 28530: {'lr': 0.0002087918930510888, 'samples': 14607872, 'steps': 28530, 'loss/train': 2.797316312789917} +02/25/2022 14:33:06 - INFO - codeparrot_training - Step 28531: {'lr': 0.00020877575449015196, 'samples': 14608384, 'steps': 28531, 'loss/train': 2.104167938232422} +02/25/2022 14:33:11 - INFO - codeparrot_training - Step 28532: {'lr': 0.0002087596161058068, 'samples': 14608896, 'steps': 28532, 'loss/train': 2.1439998149871826} +02/25/2022 14:33:15 - INFO - codeparrot_training - Step 28533: {'lr': 0.00020874347789812223, 'samples': 14609408, 'steps': 28533, 'loss/train': 1.8332749605178833} +02/25/2022 14:33:21 - INFO - codeparrot_training - Step 28534: {'lr': 0.00020872733986716753, 'samples': 14609920, 'steps': 28534, 'loss/train': 0.17677833139896393} +02/25/2022 14:33:24 - INFO - codeparrot_training - Step 28535: {'lr': 0.00020871120201301175, 'samples': 14610432, 'steps': 28535, 'loss/train': 2.381044626235962} +02/25/2022 14:33:30 - INFO - codeparrot_training - Step 28536: {'lr': 0.00020869506433572423, 'samples': 14610944, 'steps': 28536, 'loss/train': 1.4228692054748535} +02/25/2022 14:33:33 - INFO - codeparrot_training - Step 28537: {'lr': 0.0002086789268353738, 'samples': 14611456, 'steps': 28537, 'loss/train': 1.1511629819869995} +02/25/2022 14:33:40 - INFO - codeparrot_training - Step 28538: {'lr': 0.00020866278951202976, 'samples': 14611968, 'steps': 28538, 'loss/train': 1.707629680633545} +02/25/2022 14:33:43 - INFO - codeparrot_training - Step 28539: {'lr': 0.00020864665236576123, 'samples': 14612480, 'steps': 28539, 'loss/train': 1.5443429946899414} +02/25/2022 14:33:48 - INFO - codeparrot_training - Step 28540: {'lr': 0.0002086305153966373, 'samples': 14612992, 'steps': 28540, 'loss/train': 1.6012040376663208} +02/25/2022 14:33:52 - INFO - codeparrot_training - Step 28541: {'lr': 0.00020861437860472706, 'samples': 14613504, 'steps': 28541, 'loss/train': 1.6184651851654053} +02/25/2022 14:33:57 - INFO - codeparrot_training - Step 28542: {'lr': 0.0002085982419900997, 'samples': 14614016, 'steps': 28542, 'loss/train': 2.0229878425598145} +02/25/2022 14:34:01 - INFO - codeparrot_training - Step 28543: {'lr': 0.00020858210555282436, 'samples': 14614528, 'steps': 28543, 'loss/train': 1.0092744827270508} +02/25/2022 14:34:06 - INFO - codeparrot_training - Step 28544: {'lr': 0.00020856596929297007, 'samples': 14615040, 'steps': 28544, 'loss/train': 1.4197784662246704} +02/25/2022 14:34:10 - INFO - codeparrot_training - Step 28545: {'lr': 0.00020854983321060607, 'samples': 14615552, 'steps': 28545, 'loss/train': 2.2018327713012695} +02/25/2022 14:34:15 - INFO - codeparrot_training - Step 28546: {'lr': 0.00020853369730580143, 'samples': 14616064, 'steps': 28546, 'loss/train': 2.091029644012451} +02/25/2022 14:34:19 - INFO - codeparrot_training - Step 28547: {'lr': 0.00020851756157862523, 'samples': 14616576, 'steps': 28547, 'loss/train': 1.0809040069580078} +02/25/2022 14:34:25 - INFO - codeparrot_training - Step 28548: {'lr': 0.00020850142602914657, 'samples': 14617088, 'steps': 28548, 'loss/train': 1.5546313524246216} +02/25/2022 14:34:28 - INFO - codeparrot_training - Step 28549: {'lr': 0.0002084852906574347, 'samples': 14617600, 'steps': 28549, 'loss/train': 1.2265230417251587} +02/25/2022 14:34:34 - INFO - codeparrot_training - Step 28550: {'lr': 0.0002084691554635587, 'samples': 14618112, 'steps': 28550, 'loss/train': 2.349365472793579} +02/25/2022 14:34:39 - INFO - codeparrot_training - Step 28551: {'lr': 0.00020845302044758757, 'samples': 14618624, 'steps': 28551, 'loss/train': 1.6875478029251099} +02/25/2022 14:34:43 - INFO - codeparrot_training - Step 28552: {'lr': 0.0002084368856095905, 'samples': 14619136, 'steps': 28552, 'loss/train': 1.7619774341583252} +02/25/2022 14:34:48 - INFO - codeparrot_training - Step 28553: {'lr': 0.0002084207509496366, 'samples': 14619648, 'steps': 28553, 'loss/train': 1.9773417711257935} +02/25/2022 14:34:52 - INFO - codeparrot_training - Step 28554: {'lr': 0.00020840461646779514, 'samples': 14620160, 'steps': 28554, 'loss/train': 1.5791746377944946} +02/25/2022 14:34:57 - INFO - codeparrot_training - Step 28555: {'lr': 0.00020838848216413498, 'samples': 14620672, 'steps': 28555, 'loss/train': 1.9619215726852417} +02/25/2022 14:35:01 - INFO - codeparrot_training - Step 28556: {'lr': 0.00020837234803872535, 'samples': 14621184, 'steps': 28556, 'loss/train': 2.4131245613098145} +02/25/2022 14:35:08 - INFO - codeparrot_training - Step 28557: {'lr': 0.00020835621409163535, 'samples': 14621696, 'steps': 28557, 'loss/train': 1.222489356994629} +02/25/2022 14:35:11 - INFO - codeparrot_training - Step 28558: {'lr': 0.00020834008032293423, 'samples': 14622208, 'steps': 28558, 'loss/train': 2.3088722229003906} +02/25/2022 14:35:17 - INFO - codeparrot_training - Step 28559: {'lr': 0.00020832394673269084, 'samples': 14622720, 'steps': 28559, 'loss/train': 1.247127652168274} +02/25/2022 14:35:20 - INFO - codeparrot_training - Step 28560: {'lr': 0.00020830781332097445, 'samples': 14623232, 'steps': 28560, 'loss/train': 2.018090009689331} +02/25/2022 14:35:26 - INFO - codeparrot_training - Step 28561: {'lr': 0.00020829168008785416, 'samples': 14623744, 'steps': 28561, 'loss/train': 1.283644199371338} +02/25/2022 14:35:29 - INFO - codeparrot_training - Step 28562: {'lr': 0.00020827554703339916, 'samples': 14624256, 'steps': 28562, 'loss/train': 0.7227175235748291} +02/25/2022 14:35:35 - INFO - codeparrot_training - Step 28563: {'lr': 0.00020825941415767834, 'samples': 14624768, 'steps': 28563, 'loss/train': 1.8993874788284302} +02/25/2022 14:35:38 - INFO - codeparrot_training - Step 28564: {'lr': 0.00020824328146076097, 'samples': 14625280, 'steps': 28564, 'loss/train': 0.7130821943283081} +02/25/2022 14:35:43 - INFO - codeparrot_training - Step 28565: {'lr': 0.0002082271489427161, 'samples': 14625792, 'steps': 28565, 'loss/train': 1.4089535474777222} +02/25/2022 14:35:47 - INFO - codeparrot_training - Step 28566: {'lr': 0.00020821101660361288, 'samples': 14626304, 'steps': 28566, 'loss/train': 2.0551435947418213} +02/25/2022 14:35:52 - INFO - codeparrot_training - Step 28567: {'lr': 0.0002081948844435205, 'samples': 14626816, 'steps': 28567, 'loss/train': 0.628305971622467} +02/25/2022 14:35:56 - INFO - codeparrot_training - Step 28568: {'lr': 0.00020817875246250783, 'samples': 14627328, 'steps': 28568, 'loss/train': 1.8296648263931274} +02/25/2022 14:36:02 - INFO - codeparrot_training - Step 28569: {'lr': 0.00020816262066064413, 'samples': 14627840, 'steps': 28569, 'loss/train': 2.1563613414764404} +02/25/2022 14:36:06 - INFO - codeparrot_training - Step 28570: {'lr': 0.0002081464890379985, 'samples': 14628352, 'steps': 28570, 'loss/train': 0.7723124027252197} +02/25/2022 14:36:12 - INFO - codeparrot_training - Step 28571: {'lr': 0.00020813035759464007, 'samples': 14628864, 'steps': 28571, 'loss/train': 1.6140035390853882} +02/25/2022 14:36:15 - INFO - codeparrot_training - Step 28572: {'lr': 0.00020811422633063782, 'samples': 14629376, 'steps': 28572, 'loss/train': 1.7647329568862915} +02/25/2022 14:36:21 - INFO - codeparrot_training - Step 28573: {'lr': 0.00020809809524606091, 'samples': 14629888, 'steps': 28573, 'loss/train': 1.649605631828308} +02/25/2022 14:36:24 - INFO - codeparrot_training - Step 28574: {'lr': 0.00020808196434097848, 'samples': 14630400, 'steps': 28574, 'loss/train': 1.9374405145645142} +02/25/2022 14:36:30 - INFO - codeparrot_training - Step 28575: {'lr': 0.00020806583361545965, 'samples': 14630912, 'steps': 28575, 'loss/train': 0.14211036264896393} +02/25/2022 14:36:33 - INFO - codeparrot_training - Step 28576: {'lr': 0.00020804970306957343, 'samples': 14631424, 'steps': 28576, 'loss/train': 2.1012015342712402} +02/25/2022 14:36:39 - INFO - codeparrot_training - Step 28577: {'lr': 0.00020803357270338896, 'samples': 14631936, 'steps': 28577, 'loss/train': 1.3976686000823975} +02/25/2022 14:36:42 - INFO - codeparrot_training - Step 28578: {'lr': 0.0002080174425169753, 'samples': 14632448, 'steps': 28578, 'loss/train': 2.407975673675537} +02/25/2022 14:36:49 - INFO - codeparrot_training - Step 28579: {'lr': 0.00020800131251040167, 'samples': 14632960, 'steps': 28579, 'loss/train': 1.723849892616272} +02/25/2022 14:36:52 - INFO - codeparrot_training - Step 28580: {'lr': 0.00020798518268373706, 'samples': 14633472, 'steps': 28580, 'loss/train': 1.3217285871505737} +02/25/2022 14:36:58 - INFO - codeparrot_training - Step 28581: {'lr': 0.00020796905303705054, 'samples': 14633984, 'steps': 28581, 'loss/train': 1.6374187469482422} +02/25/2022 14:37:01 - INFO - codeparrot_training - Step 28582: {'lr': 0.00020795292357041127, 'samples': 14634496, 'steps': 28582, 'loss/train': 1.4681124687194824} +02/25/2022 14:37:07 - INFO - codeparrot_training - Step 28583: {'lr': 0.00020793679428388835, 'samples': 14635008, 'steps': 28583, 'loss/train': 1.5293604135513306} +02/25/2022 14:37:10 - INFO - codeparrot_training - Step 28584: {'lr': 0.00020792066517755087, 'samples': 14635520, 'steps': 28584, 'loss/train': 2.2930104732513428} +02/25/2022 14:37:16 - INFO - codeparrot_training - Step 28585: {'lr': 0.0002079045362514678, 'samples': 14636032, 'steps': 28585, 'loss/train': 2.5324337482452393} +02/25/2022 14:37:19 - INFO - codeparrot_training - Step 28586: {'lr': 0.00020788840750570844, 'samples': 14636544, 'steps': 28586, 'loss/train': 0.18094457685947418} +02/25/2022 14:37:25 - INFO - codeparrot_training - Step 28587: {'lr': 0.0002078722789403417, 'samples': 14637056, 'steps': 28587, 'loss/train': 1.896743655204773} +02/25/2022 14:37:28 - INFO - codeparrot_training - Step 28588: {'lr': 0.00020785615055543677, 'samples': 14637568, 'steps': 28588, 'loss/train': 1.4646371603012085} +02/25/2022 14:37:34 - INFO - codeparrot_training - Step 28589: {'lr': 0.0002078400223510627, 'samples': 14638080, 'steps': 28589, 'loss/train': 1.76212477684021} +02/25/2022 14:37:37 - INFO - codeparrot_training - Step 28590: {'lr': 0.0002078238943272886, 'samples': 14638592, 'steps': 28590, 'loss/train': 0.7759493589401245} +02/25/2022 14:37:43 - INFO - codeparrot_training - Step 28591: {'lr': 0.00020780776648418348, 'samples': 14639104, 'steps': 28591, 'loss/train': 1.2317774295806885} +02/25/2022 14:37:46 - INFO - codeparrot_training - Step 28592: {'lr': 0.00020779163882181655, 'samples': 14639616, 'steps': 28592, 'loss/train': 1.692404866218567} +02/25/2022 14:37:52 - INFO - codeparrot_training - Step 28593: {'lr': 0.00020777551134025683, 'samples': 14640128, 'steps': 28593, 'loss/train': 2.312106132507324} +02/25/2022 14:37:55 - INFO - codeparrot_training - Step 28594: {'lr': 0.00020775938403957346, 'samples': 14640640, 'steps': 28594, 'loss/train': 2.040748119354248} +02/25/2022 14:38:02 - INFO - codeparrot_training - Step 28595: {'lr': 0.00020774325691983537, 'samples': 14641152, 'steps': 28595, 'loss/train': 1.5753356218338013} +02/25/2022 14:38:06 - INFO - codeparrot_training - Step 28596: {'lr': 0.00020772712998111178, 'samples': 14641664, 'steps': 28596, 'loss/train': 1.6934571266174316} +02/25/2022 14:38:11 - INFO - codeparrot_training - Step 28597: {'lr': 0.00020771100322347185, 'samples': 14642176, 'steps': 28597, 'loss/train': 1.2630354166030884} +02/25/2022 14:38:15 - INFO - codeparrot_training - Step 28598: {'lr': 0.00020769487664698444, 'samples': 14642688, 'steps': 28598, 'loss/train': 0.935442328453064} +02/25/2022 14:38:20 - INFO - codeparrot_training - Step 28599: {'lr': 0.00020767875025171876, 'samples': 14643200, 'steps': 28599, 'loss/train': 1.4629456996917725} +02/25/2022 14:38:24 - INFO - codeparrot_training - Step 28600: {'lr': 0.00020766262403774385, 'samples': 14643712, 'steps': 28600, 'loss/train': 1.1431686878204346} +02/25/2022 14:38:29 - INFO - codeparrot_training - Step 28601: {'lr': 0.00020764649800512883, 'samples': 14644224, 'steps': 28601, 'loss/train': 1.6016335487365723} +02/25/2022 14:38:33 - INFO - codeparrot_training - Step 28602: {'lr': 0.00020763037215394288, 'samples': 14644736, 'steps': 28602, 'loss/train': 0.3140012323856354} +02/25/2022 14:38:39 - INFO - codeparrot_training - Step 28603: {'lr': 0.00020761424648425484, 'samples': 14645248, 'steps': 28603, 'loss/train': 0.8882197737693787} +02/25/2022 14:38:42 - INFO - codeparrot_training - Step 28604: {'lr': 0.0002075981209961339, 'samples': 14645760, 'steps': 28604, 'loss/train': 1.863071084022522} +02/25/2022 14:38:48 - INFO - codeparrot_training - Step 28605: {'lr': 0.00020758199568964917, 'samples': 14646272, 'steps': 28605, 'loss/train': 1.4822843074798584} +02/25/2022 14:38:51 - INFO - codeparrot_training - Step 28606: {'lr': 0.00020756587056486978, 'samples': 14646784, 'steps': 28606, 'loss/train': 2.31294846534729} +02/25/2022 14:38:57 - INFO - codeparrot_training - Step 28607: {'lr': 0.00020754974562186467, 'samples': 14647296, 'steps': 28607, 'loss/train': 1.0011610984802246} +02/25/2022 14:39:00 - INFO - codeparrot_training - Step 28608: {'lr': 0.00020753362086070292, 'samples': 14647808, 'steps': 28608, 'loss/train': 1.8208271265029907} +02/25/2022 14:39:06 - INFO - codeparrot_training - Step 28609: {'lr': 0.00020751749628145368, 'samples': 14648320, 'steps': 28609, 'loss/train': 1.6482386589050293} +02/25/2022 14:39:09 - INFO - codeparrot_training - Step 28610: {'lr': 0.0002075013718841861, 'samples': 14648832, 'steps': 28610, 'loss/train': 1.8164997100830078} +02/25/2022 14:39:15 - INFO - codeparrot_training - Step 28611: {'lr': 0.000207485247668969, 'samples': 14649344, 'steps': 28611, 'loss/train': 0.45227307081222534} +02/25/2022 14:39:18 - INFO - codeparrot_training - Step 28612: {'lr': 0.00020746912363587164, 'samples': 14649856, 'steps': 28612, 'loss/train': 1.3443998098373413} +02/25/2022 14:39:24 - INFO - codeparrot_training - Step 28613: {'lr': 0.00020745299978496305, 'samples': 14650368, 'steps': 28613, 'loss/train': 2.8215768337249756} +02/25/2022 14:39:27 - INFO - codeparrot_training - Step 28614: {'lr': 0.00020743687611631228, 'samples': 14650880, 'steps': 28614, 'loss/train': 2.068390369415283} +02/25/2022 14:39:33 - INFO - codeparrot_training - Step 28615: {'lr': 0.00020742075262998854, 'samples': 14651392, 'steps': 28615, 'loss/train': 2.1479415893554688} +02/25/2022 14:39:37 - INFO - codeparrot_training - Step 28616: {'lr': 0.00020740462932606067, 'samples': 14651904, 'steps': 28616, 'loss/train': 2.1982851028442383} +02/25/2022 14:39:42 - INFO - codeparrot_training - Step 28617: {'lr': 0.00020738850620459782, 'samples': 14652416, 'steps': 28617, 'loss/train': 1.437268853187561} +02/25/2022 14:39:46 - INFO - codeparrot_training - Step 28618: {'lr': 0.00020737238326566909, 'samples': 14652928, 'steps': 28618, 'loss/train': 1.3859179019927979} +02/25/2022 14:39:51 - INFO - codeparrot_training - Step 28619: {'lr': 0.00020735626050934368, 'samples': 14653440, 'steps': 28619, 'loss/train': 1.6420785188674927} +02/25/2022 14:39:55 - INFO - codeparrot_training - Step 28620: {'lr': 0.00020734013793569035, 'samples': 14653952, 'steps': 28620, 'loss/train': 2.492062568664551} +02/25/2022 14:40:00 - INFO - codeparrot_training - Step 28621: {'lr': 0.00020732401554477835, 'samples': 14654464, 'steps': 28621, 'loss/train': 0.6783117055892944} +02/25/2022 14:40:04 - INFO - codeparrot_training - Step 28622: {'lr': 0.0002073078933366767, 'samples': 14654976, 'steps': 28622, 'loss/train': 1.1454886198043823} +02/25/2022 14:40:09 - INFO - codeparrot_training - Step 28623: {'lr': 0.0002072917713114546, 'samples': 14655488, 'steps': 28623, 'loss/train': 0.911496639251709} +02/25/2022 14:40:15 - INFO - codeparrot_training - Step 28624: {'lr': 0.00020727564946918087, 'samples': 14656000, 'steps': 28624, 'loss/train': 2.397329092025757} +02/25/2022 14:40:19 - INFO - codeparrot_training - Step 28625: {'lr': 0.00020725952780992467, 'samples': 14656512, 'steps': 28625, 'loss/train': 1.5379050970077515} +02/25/2022 14:40:24 - INFO - codeparrot_training - Step 28626: {'lr': 0.00020724340633375513, 'samples': 14657024, 'steps': 28626, 'loss/train': 1.7232321500778198} +02/25/2022 14:40:28 - INFO - codeparrot_training - Step 28627: {'lr': 0.0002072272850407413, 'samples': 14657536, 'steps': 28627, 'loss/train': 2.2017805576324463} +02/25/2022 14:40:33 - INFO - codeparrot_training - Step 28628: {'lr': 0.00020721116393095218, 'samples': 14658048, 'steps': 28628, 'loss/train': 1.135353922843933} +02/25/2022 14:40:37 - INFO - codeparrot_training - Step 28629: {'lr': 0.0002071950430044568, 'samples': 14658560, 'steps': 28629, 'loss/train': 1.7855346202850342} +02/25/2022 14:40:43 - INFO - codeparrot_training - Step 28630: {'lr': 0.00020717892226132433, 'samples': 14659072, 'steps': 28630, 'loss/train': 1.7313529253005981} +02/25/2022 14:40:46 - INFO - codeparrot_training - Step 28631: {'lr': 0.00020716280170162373, 'samples': 14659584, 'steps': 28631, 'loss/train': 1.4980180263519287} +02/25/2022 14:40:51 - INFO - codeparrot_training - Step 28632: {'lr': 0.00020714668132542407, 'samples': 14660096, 'steps': 28632, 'loss/train': 0.990715503692627} +02/25/2022 14:40:55 - INFO - codeparrot_training - Step 28633: {'lr': 0.00020713056113279442, 'samples': 14660608, 'steps': 28633, 'loss/train': 1.4652318954467773} +02/25/2022 14:41:01 - INFO - codeparrot_training - Step 28634: {'lr': 0.00020711444112380388, 'samples': 14661120, 'steps': 28634, 'loss/train': 1.8936306238174438} +02/25/2022 14:41:04 - INFO - codeparrot_training - Step 28635: {'lr': 0.0002070983212985214, 'samples': 14661632, 'steps': 28635, 'loss/train': 2.2042338848114014} +02/25/2022 14:41:10 - INFO - codeparrot_training - Step 28636: {'lr': 0.0002070822016570161, 'samples': 14662144, 'steps': 28636, 'loss/train': 1.4645395278930664} +02/25/2022 14:41:13 - INFO - codeparrot_training - Step 28637: {'lr': 0.00020706608219935707, 'samples': 14662656, 'steps': 28637, 'loss/train': 2.1972157955169678} +02/25/2022 14:41:19 - INFO - codeparrot_training - Step 28638: {'lr': 0.00020704996292561324, 'samples': 14663168, 'steps': 28638, 'loss/train': 1.8572359085083008} +02/25/2022 14:41:22 - INFO - codeparrot_training - Step 28639: {'lr': 0.00020703384383585375, 'samples': 14663680, 'steps': 28639, 'loss/train': 1.9268752336502075} +02/25/2022 14:41:28 - INFO - codeparrot_training - Step 28640: {'lr': 0.00020701772493014758, 'samples': 14664192, 'steps': 28640, 'loss/train': 2.246455669403076} +02/25/2022 14:41:32 - INFO - codeparrot_training - Step 28641: {'lr': 0.000207001606208564, 'samples': 14664704, 'steps': 28641, 'loss/train': 2.3508694171905518} +02/25/2022 14:41:35 - INFO - codeparrot_training - Step 28642: {'lr': 0.00020698548767117173, 'samples': 14665216, 'steps': 28642, 'loss/train': 2.2325327396392822} +02/25/2022 14:41:41 - INFO - codeparrot_training - Step 28643: {'lr': 0.00020696936931804, 'samples': 14665728, 'steps': 28643, 'loss/train': 1.6687617301940918} +02/25/2022 14:41:44 - INFO - codeparrot_training - Step 28644: {'lr': 0.0002069532511492378, 'samples': 14666240, 'steps': 28644, 'loss/train': 1.7231296300888062} +02/25/2022 14:41:50 - INFO - codeparrot_training - Step 28645: {'lr': 0.00020693713316483433, 'samples': 14666752, 'steps': 28645, 'loss/train': 1.0525126457214355} +02/25/2022 14:41:53 - INFO - codeparrot_training - Step 28646: {'lr': 0.00020692101536489842, 'samples': 14667264, 'steps': 28646, 'loss/train': 1.8687127828598022} +02/25/2022 14:41:59 - INFO - codeparrot_training - Step 28647: {'lr': 0.00020690489774949917, 'samples': 14667776, 'steps': 28647, 'loss/train': 1.6920956373214722} +02/25/2022 14:42:02 - INFO - codeparrot_training - Step 28648: {'lr': 0.00020688878031870566, 'samples': 14668288, 'steps': 28648, 'loss/train': 2.2957491874694824} +02/25/2022 14:42:08 - INFO - codeparrot_training - Step 28649: {'lr': 0.00020687266307258702, 'samples': 14668800, 'steps': 28649, 'loss/train': 1.5056724548339844} +02/25/2022 14:42:11 - INFO - codeparrot_training - Step 28650: {'lr': 0.00020685654601121213, 'samples': 14669312, 'steps': 28650, 'loss/train': 3.3470239639282227} +02/25/2022 14:42:18 - INFO - codeparrot_training - Step 28651: {'lr': 0.00020684042913465006, 'samples': 14669824, 'steps': 28651, 'loss/train': 2.2611420154571533} +02/25/2022 14:42:23 - INFO - codeparrot_training - Step 28652: {'lr': 0.0002068243124429699, 'samples': 14670336, 'steps': 28652, 'loss/train': 1.4850214719772339} +02/25/2022 14:42:27 - INFO - codeparrot_training - Step 28653: {'lr': 0.00020680819593624068, 'samples': 14670848, 'steps': 28653, 'loss/train': 0.2812162935733795} +02/25/2022 14:42:30 - INFO - codeparrot_training - Step 28654: {'lr': 0.00020679207961453154, 'samples': 14671360, 'steps': 28654, 'loss/train': 2.114703416824341} +02/25/2022 14:42:36 - INFO - codeparrot_training - Step 28655: {'lr': 0.0002067759634779113, 'samples': 14671872, 'steps': 28655, 'loss/train': 1.8977705240249634} +02/25/2022 14:42:39 - INFO - codeparrot_training - Step 28656: {'lr': 0.0002067598475264491, 'samples': 14672384, 'steps': 28656, 'loss/train': 3.3362317085266113} +02/25/2022 14:42:45 - INFO - codeparrot_training - Step 28657: {'lr': 0.00020674373176021398, 'samples': 14672896, 'steps': 28657, 'loss/train': 2.0299575328826904} +02/25/2022 14:42:50 - INFO - codeparrot_training - Step 28658: {'lr': 0.0002067276161792751, 'samples': 14673408, 'steps': 28658, 'loss/train': 1.2983375787734985} +02/25/2022 14:42:54 - INFO - codeparrot_training - Step 28659: {'lr': 0.00020671150078370126, 'samples': 14673920, 'steps': 28659, 'loss/train': 1.2460395097732544} +02/25/2022 14:43:00 - INFO - codeparrot_training - Step 28660: {'lr': 0.0002066953855735616, 'samples': 14674432, 'steps': 28660, 'loss/train': 1.99422287940979} +02/25/2022 14:43:03 - INFO - codeparrot_training - Step 28661: {'lr': 0.00020667927054892517, 'samples': 14674944, 'steps': 28661, 'loss/train': 1.4847508668899536} +02/25/2022 14:43:09 - INFO - codeparrot_training - Step 28662: {'lr': 0.000206663155709861, 'samples': 14675456, 'steps': 28662, 'loss/train': 2.0936968326568604} +02/25/2022 14:43:12 - INFO - codeparrot_training - Step 28663: {'lr': 0.00020664704105643819, 'samples': 14675968, 'steps': 28663, 'loss/train': 1.7086166143417358} +02/25/2022 14:43:18 - INFO - codeparrot_training - Step 28664: {'lr': 0.00020663092658872558, 'samples': 14676480, 'steps': 28664, 'loss/train': 1.5557129383087158} +02/25/2022 14:43:21 - INFO - codeparrot_training - Step 28665: {'lr': 0.00020661481230679232, 'samples': 14676992, 'steps': 28665, 'loss/train': 1.6385211944580078} +02/25/2022 14:43:27 - INFO - codeparrot_training - Step 28666: {'lr': 0.00020659869821070743, 'samples': 14677504, 'steps': 28666, 'loss/train': 2.0725016593933105} +02/25/2022 14:43:30 - INFO - codeparrot_training - Step 28667: {'lr': 0.00020658258430054005, 'samples': 14678016, 'steps': 28667, 'loss/train': 0.871519148349762} +02/25/2022 14:43:36 - INFO - codeparrot_training - Step 28668: {'lr': 0.00020656647057635895, 'samples': 14678528, 'steps': 28668, 'loss/train': 1.3905963897705078} +02/25/2022 14:43:39 - INFO - codeparrot_training - Step 28669: {'lr': 0.00020655035703823335, 'samples': 14679040, 'steps': 28669, 'loss/train': 2.8349263668060303} +02/25/2022 14:43:45 - INFO - codeparrot_training - Step 28670: {'lr': 0.00020653424368623224, 'samples': 14679552, 'steps': 28670, 'loss/train': 2.7927067279815674} +02/25/2022 14:43:49 - INFO - codeparrot_training - Step 28671: {'lr': 0.00020651813052042465, 'samples': 14680064, 'steps': 28671, 'loss/train': 2.0033702850341797} +02/25/2022 14:43:54 - INFO - codeparrot_training - Step 28672: {'lr': 0.0002065020175408795, 'samples': 14680576, 'steps': 28672, 'loss/train': 1.4825894832611084} +02/25/2022 14:43:58 - INFO - codeparrot_training - Step 28673: {'lr': 0.00020648590474766593, 'samples': 14681088, 'steps': 28673, 'loss/train': 1.778352975845337} +02/25/2022 14:44:03 - INFO - codeparrot_training - Step 28674: {'lr': 0.00020646979214085297, 'samples': 14681600, 'steps': 28674, 'loss/train': 1.3953208923339844} +02/25/2022 14:44:07 - INFO - codeparrot_training - Step 28675: {'lr': 0.00020645367972050953, 'samples': 14682112, 'steps': 28675, 'loss/train': 1.6745994091033936} +02/25/2022 14:44:12 - INFO - codeparrot_training - Step 28676: {'lr': 0.00020643756748670475, 'samples': 14682624, 'steps': 28676, 'loss/train': 2.226574420928955} +02/25/2022 14:44:16 - INFO - codeparrot_training - Step 28677: {'lr': 0.00020642145543950762, 'samples': 14683136, 'steps': 28677, 'loss/train': 2.3025829792022705} +02/25/2022 14:44:21 - INFO - codeparrot_training - Step 28678: {'lr': 0.00020640534357898707, 'samples': 14683648, 'steps': 28678, 'loss/train': 1.069924235343933} +02/25/2022 14:44:25 - INFO - codeparrot_training - Step 28679: {'lr': 0.0002063892319052122, 'samples': 14684160, 'steps': 28679, 'loss/train': 2.2852087020874023} +02/25/2022 14:44:30 - INFO - codeparrot_training - Step 28680: {'lr': 0.00020637312041825204, 'samples': 14684672, 'steps': 28680, 'loss/train': 1.590694785118103} +02/25/2022 14:44:34 - INFO - codeparrot_training - Step 28681: {'lr': 0.0002063570091181756, 'samples': 14685184, 'steps': 28681, 'loss/train': 1.619643211364746} +02/25/2022 14:44:39 - INFO - codeparrot_training - Step 28682: {'lr': 0.00020634089800505178, 'samples': 14685696, 'steps': 28682, 'loss/train': 1.892874836921692} +02/25/2022 14:44:43 - INFO - codeparrot_training - Step 28683: {'lr': 0.00020632478707894976, 'samples': 14686208, 'steps': 28683, 'loss/train': 0.04007269814610481} +02/25/2022 14:44:48 - INFO - codeparrot_training - Step 28684: {'lr': 0.00020630867633993855, 'samples': 14686720, 'steps': 28684, 'loss/train': 0.06422439217567444} +02/25/2022 14:44:52 - INFO - codeparrot_training - Step 28685: {'lr': 0.00020629256578808696, 'samples': 14687232, 'steps': 28685, 'loss/train': 2.165538787841797} +02/25/2022 14:44:58 - INFO - codeparrot_training - Step 28686: {'lr': 0.0002062764554234642, 'samples': 14687744, 'steps': 28686, 'loss/train': 2.598635196685791} +02/25/2022 14:45:02 - INFO - codeparrot_training - Step 28687: {'lr': 0.00020626034524613918, 'samples': 14688256, 'steps': 28687, 'loss/train': 2.199892044067383} +02/25/2022 14:45:07 - INFO - codeparrot_training - Step 28688: {'lr': 0.00020624423525618098, 'samples': 14688768, 'steps': 28688, 'loss/train': 1.1122218370437622} +02/25/2022 14:45:11 - INFO - codeparrot_training - Step 28689: {'lr': 0.00020622812545365864, 'samples': 14689280, 'steps': 28689, 'loss/train': 1.9154456853866577} +02/25/2022 14:45:16 - INFO - codeparrot_training - Step 28690: {'lr': 0.00020621201583864108, 'samples': 14689792, 'steps': 28690, 'loss/train': 2.39687180519104} +02/25/2022 14:45:20 - INFO - codeparrot_training - Step 28691: {'lr': 0.0002061959064111973, 'samples': 14690304, 'steps': 28691, 'loss/train': 1.426198959350586} +02/25/2022 14:45:25 - INFO - codeparrot_training - Step 28692: {'lr': 0.00020617979717139635, 'samples': 14690816, 'steps': 28692, 'loss/train': 0.7376397252082825} +02/25/2022 14:45:29 - INFO - codeparrot_training - Step 28693: {'lr': 0.00020616368811930734, 'samples': 14691328, 'steps': 28693, 'loss/train': 1.5889759063720703} +02/25/2022 14:45:34 - INFO - codeparrot_training - Step 28694: {'lr': 0.00020614757925499906, 'samples': 14691840, 'steps': 28694, 'loss/train': 0.9628810286521912} +02/25/2022 14:45:37 - INFO - codeparrot_training - Step 28695: {'lr': 0.0002061314705785406, 'samples': 14692352, 'steps': 28695, 'loss/train': 2.215167284011841} +02/25/2022 14:45:44 - INFO - codeparrot_training - Step 28696: {'lr': 0.00020611536209000104, 'samples': 14692864, 'steps': 28696, 'loss/train': 1.7047783136367798} +02/25/2022 14:45:47 - INFO - codeparrot_training - Step 28697: {'lr': 0.0002060992537894494, 'samples': 14693376, 'steps': 28697, 'loss/train': 1.2807097434997559} +02/25/2022 14:45:53 - INFO - codeparrot_training - Step 28698: {'lr': 0.00020608314567695452, 'samples': 14693888, 'steps': 28698, 'loss/train': 0.7985394597053528} +02/25/2022 14:45:56 - INFO - codeparrot_training - Step 28699: {'lr': 0.0002060670377525855, 'samples': 14694400, 'steps': 28699, 'loss/train': 0.030608918517827988} +02/25/2022 14:46:02 - INFO - codeparrot_training - Step 28700: {'lr': 0.00020605093001641137, 'samples': 14694912, 'steps': 28700, 'loss/train': 3.155197858810425} +02/25/2022 14:46:05 - INFO - codeparrot_training - Step 28701: {'lr': 0.0002060348224685011, 'samples': 14695424, 'steps': 28701, 'loss/train': 1.1235108375549316} +02/25/2022 14:46:11 - INFO - codeparrot_training - Step 28702: {'lr': 0.00020601871510892374, 'samples': 14695936, 'steps': 28702, 'loss/train': 1.8598308563232422} +02/25/2022 14:46:14 - INFO - codeparrot_training - Step 28703: {'lr': 0.00020600260793774815, 'samples': 14696448, 'steps': 28703, 'loss/train': 1.8477109670639038} +02/25/2022 14:46:20 - INFO - codeparrot_training - Step 28704: {'lr': 0.00020598650095504344, 'samples': 14696960, 'steps': 28704, 'loss/train': 1.6313574314117432} +02/25/2022 14:46:23 - INFO - codeparrot_training - Step 28705: {'lr': 0.0002059703941608786, 'samples': 14697472, 'steps': 28705, 'loss/train': 2.326218366622925} +02/25/2022 14:46:31 - INFO - codeparrot_training - Step 28706: {'lr': 0.0002059542875553227, 'samples': 14697984, 'steps': 28706, 'loss/train': 2.12508487701416} +02/25/2022 14:46:34 - INFO - codeparrot_training - Step 28707: {'lr': 0.00020593818113844454, 'samples': 14698496, 'steps': 28707, 'loss/train': 2.466625452041626} +02/25/2022 14:46:39 - INFO - codeparrot_training - Step 28708: {'lr': 0.0002059220749103132, 'samples': 14699008, 'steps': 28708, 'loss/train': 2.1992530822753906} +02/25/2022 14:46:43 - INFO - codeparrot_training - Step 28709: {'lr': 0.00020590596887099771, 'samples': 14699520, 'steps': 28709, 'loss/train': 4.6187028884887695} +02/25/2022 14:46:48 - INFO - codeparrot_training - Step 28710: {'lr': 0.00020588986302056713, 'samples': 14700032, 'steps': 28710, 'loss/train': 2.0518155097961426} +02/25/2022 14:46:54 - INFO - codeparrot_training - Step 28711: {'lr': 0.00020587375735909032, 'samples': 14700544, 'steps': 28711, 'loss/train': 1.4279311895370483} +02/25/2022 14:46:57 - INFO - codeparrot_training - Step 28712: {'lr': 0.00020585765188663627, 'samples': 14701056, 'steps': 28712, 'loss/train': 2.015519857406616} +02/25/2022 14:47:03 - INFO - codeparrot_training - Step 28713: {'lr': 0.00020584154660327407, 'samples': 14701568, 'steps': 28713, 'loss/train': 1.6761972904205322} +02/25/2022 14:47:06 - INFO - codeparrot_training - Step 28714: {'lr': 0.00020582544150907268, 'samples': 14702080, 'steps': 28714, 'loss/train': 1.7252320051193237} +02/25/2022 14:47:13 - INFO - codeparrot_training - Step 28715: {'lr': 0.00020580933660410106, 'samples': 14702592, 'steps': 28715, 'loss/train': 2.269017219543457} +02/25/2022 14:47:16 - INFO - codeparrot_training - Step 28716: {'lr': 0.00020579323188842818, 'samples': 14703104, 'steps': 28716, 'loss/train': 1.3190195560455322} +02/25/2022 14:47:21 - INFO - codeparrot_training - Step 28717: {'lr': 0.0002057771273621231, 'samples': 14703616, 'steps': 28717, 'loss/train': 1.3510839939117432} +02/25/2022 14:47:25 - INFO - codeparrot_training - Step 28718: {'lr': 0.00020576102302525474, 'samples': 14704128, 'steps': 28718, 'loss/train': 0.7739048600196838} +02/25/2022 14:47:30 - INFO - codeparrot_training - Step 28719: {'lr': 0.00020574491887789212, 'samples': 14704640, 'steps': 28719, 'loss/train': 2.20670223236084} +02/25/2022 14:47:34 - INFO - codeparrot_training - Step 28720: {'lr': 0.0002057288149201042, 'samples': 14705152, 'steps': 28720, 'loss/train': 2.3893651962280273} +02/25/2022 14:47:39 - INFO - codeparrot_training - Step 28721: {'lr': 0.00020571271115196, 'samples': 14705664, 'steps': 28721, 'loss/train': 1.216496467590332} +02/25/2022 14:47:43 - INFO - codeparrot_training - Step 28722: {'lr': 0.00020569660757352847, 'samples': 14706176, 'steps': 28722, 'loss/train': 1.737797498703003} +02/25/2022 14:47:48 - INFO - codeparrot_training - Step 28723: {'lr': 0.00020568050418487855, 'samples': 14706688, 'steps': 28723, 'loss/train': 1.9807382822036743} +02/25/2022 14:47:52 - INFO - codeparrot_training - Step 28724: {'lr': 0.00020566440098607943, 'samples': 14707200, 'steps': 28724, 'loss/train': 1.1350288391113281} +02/25/2022 14:47:57 - INFO - codeparrot_training - Step 28725: {'lr': 0.0002056482979771998, 'samples': 14707712, 'steps': 28725, 'loss/train': 1.7946361303329468} +02/25/2022 14:48:01 - INFO - codeparrot_training - Step 28726: {'lr': 0.0002056321951583088, 'samples': 14708224, 'steps': 28726, 'loss/train': 1.7340128421783447} +02/25/2022 14:48:06 - INFO - codeparrot_training - Step 28727: {'lr': 0.0002056160925294754, 'samples': 14708736, 'steps': 28727, 'loss/train': 1.778532862663269} +02/25/2022 14:48:10 - INFO - codeparrot_training - Step 28728: {'lr': 0.00020559999009076864, 'samples': 14709248, 'steps': 28728, 'loss/train': 0.8869004249572754} +02/25/2022 14:48:15 - INFO - codeparrot_training - Step 28729: {'lr': 0.00020558388784225733, 'samples': 14709760, 'steps': 28729, 'loss/train': 1.7055113315582275} +02/25/2022 14:48:19 - INFO - codeparrot_training - Step 28730: {'lr': 0.00020556778578401054, 'samples': 14710272, 'steps': 28730, 'loss/train': 1.5329267978668213} +02/25/2022 14:48:26 - INFO - codeparrot_training - Step 28731: {'lr': 0.00020555168391609726, 'samples': 14710784, 'steps': 28731, 'loss/train': 1.6859725713729858} +02/25/2022 14:48:29 - INFO - codeparrot_training - Step 28732: {'lr': 0.00020553558223858654, 'samples': 14711296, 'steps': 28732, 'loss/train': 0.2816525101661682} +02/25/2022 14:48:34 - INFO - codeparrot_training - Step 28733: {'lr': 0.00020551948075154714, 'samples': 14711808, 'steps': 28733, 'loss/train': 1.9571174383163452} +02/25/2022 14:48:38 - INFO - codeparrot_training - Step 28734: {'lr': 0.00020550337945504822, 'samples': 14712320, 'steps': 28734, 'loss/train': 1.4706687927246094} +02/25/2022 14:48:43 - INFO - codeparrot_training - Step 28735: {'lr': 0.00020548727834915866, 'samples': 14712832, 'steps': 28735, 'loss/train': 0.6561219692230225} +02/25/2022 14:48:47 - INFO - codeparrot_training - Step 28736: {'lr': 0.00020547117743394743, 'samples': 14713344, 'steps': 28736, 'loss/train': 2.74123477935791} +02/25/2022 14:48:52 - INFO - codeparrot_training - Step 28737: {'lr': 0.00020545507670948369, 'samples': 14713856, 'steps': 28737, 'loss/train': 1.3911833763122559} +02/25/2022 14:48:56 - INFO - codeparrot_training - Step 28738: {'lr': 0.00020543897617583614, 'samples': 14714368, 'steps': 28738, 'loss/train': 0.653398334980011} +02/25/2022 14:49:01 - INFO - codeparrot_training - Step 28739: {'lr': 0.00020542287583307387, 'samples': 14714880, 'steps': 28739, 'loss/train': 1.9609392881393433} +02/25/2022 14:49:05 - INFO - codeparrot_training - Step 28740: {'lr': 0.00020540677568126585, 'samples': 14715392, 'steps': 28740, 'loss/train': 1.7053741216659546} +02/25/2022 14:49:10 - INFO - codeparrot_training - Step 28741: {'lr': 0.00020539067572048113, 'samples': 14715904, 'steps': 28741, 'loss/train': 1.6404954195022583} +02/25/2022 14:49:14 - INFO - codeparrot_training - Step 28742: {'lr': 0.00020537457595078847, 'samples': 14716416, 'steps': 28742, 'loss/train': 0.9432055950164795} +02/25/2022 14:49:20 - INFO - codeparrot_training - Step 28743: {'lr': 0.00020535847637225698, 'samples': 14716928, 'steps': 28743, 'loss/train': 1.2730201482772827} +02/25/2022 14:49:24 - INFO - codeparrot_training - Step 28744: {'lr': 0.0002053423769849556, 'samples': 14717440, 'steps': 28744, 'loss/train': 1.872104287147522} +02/25/2022 14:49:29 - INFO - codeparrot_training - Step 28745: {'lr': 0.0002053262777889534, 'samples': 14717952, 'steps': 28745, 'loss/train': 0.7880858182907104} +02/25/2022 14:49:32 - INFO - codeparrot_training - Step 28746: {'lr': 0.00020531017878431916, 'samples': 14718464, 'steps': 28746, 'loss/train': 1.3543180227279663} +02/25/2022 14:49:38 - INFO - codeparrot_training - Step 28747: {'lr': 0.0002052940799711219, 'samples': 14718976, 'steps': 28747, 'loss/train': 2.4413537979125977} +02/25/2022 14:49:41 - INFO - codeparrot_training - Step 28748: {'lr': 0.0002052779813494306, 'samples': 14719488, 'steps': 28748, 'loss/train': 1.8142073154449463} +02/25/2022 14:49:47 - INFO - codeparrot_training - Step 28749: {'lr': 0.00020526188291931425, 'samples': 14720000, 'steps': 28749, 'loss/train': 1.770838975906372} +02/25/2022 14:49:50 - INFO - codeparrot_training - Step 28750: {'lr': 0.00020524578468084187, 'samples': 14720512, 'steps': 28750, 'loss/train': 1.8709532022476196} +02/25/2022 14:49:56 - INFO - codeparrot_training - Step 28751: {'lr': 0.00020522968663408227, 'samples': 14721024, 'steps': 28751, 'loss/train': 1.670910120010376} +02/25/2022 14:49:59 - INFO - codeparrot_training - Step 28752: {'lr': 0.00020521358877910443, 'samples': 14721536, 'steps': 28752, 'loss/train': 1.01792573928833} +02/25/2022 14:50:06 - INFO - codeparrot_training - Step 28753: {'lr': 0.00020519749111597735, 'samples': 14722048, 'steps': 28753, 'loss/train': 1.2357524633407593} +02/25/2022 14:50:10 - INFO - codeparrot_training - Step 28754: {'lr': 0.00020518139364477013, 'samples': 14722560, 'steps': 28754, 'loss/train': 1.5755587816238403} +02/25/2022 14:50:15 - INFO - codeparrot_training - Step 28755: {'lr': 0.00020516529636555148, 'samples': 14723072, 'steps': 28755, 'loss/train': 1.5529495477676392} +02/25/2022 14:50:18 - INFO - codeparrot_training - Step 28756: {'lr': 0.00020514919927839043, 'samples': 14723584, 'steps': 28756, 'loss/train': 1.8930851221084595} +02/25/2022 14:50:24 - INFO - codeparrot_training - Step 28757: {'lr': 0.00020513310238335605, 'samples': 14724096, 'steps': 28757, 'loss/train': 1.8603923320770264} +02/25/2022 14:50:30 - INFO - codeparrot_training - Step 28758: {'lr': 0.00020511700568051722, 'samples': 14724608, 'steps': 28758, 'loss/train': 1.7283425331115723} +02/25/2022 14:50:33 - INFO - codeparrot_training - Step 28759: {'lr': 0.0002051009091699428, 'samples': 14725120, 'steps': 28759, 'loss/train': 1.3955087661743164} +02/25/2022 14:50:39 - INFO - codeparrot_training - Step 28760: {'lr': 0.00020508481285170185, 'samples': 14725632, 'steps': 28760, 'loss/train': 0.8470161557197571} +02/25/2022 14:50:42 - INFO - codeparrot_training - Step 28761: {'lr': 0.00020506871672586335, 'samples': 14726144, 'steps': 28761, 'loss/train': 1.517698884010315} +02/25/2022 14:50:49 - INFO - codeparrot_training - Step 28762: {'lr': 0.00020505262079249616, 'samples': 14726656, 'steps': 28762, 'loss/train': 2.106020450592041} +02/25/2022 14:50:52 - INFO - codeparrot_training - Step 28763: {'lr': 0.00020503652505166932, 'samples': 14727168, 'steps': 28763, 'loss/train': 1.446267008781433} +02/25/2022 14:50:58 - INFO - codeparrot_training - Step 28764: {'lr': 0.00020502042950345172, 'samples': 14727680, 'steps': 28764, 'loss/train': 2.2972755432128906} +02/25/2022 14:51:01 - INFO - codeparrot_training - Step 28765: {'lr': 0.00020500433414791225, 'samples': 14728192, 'steps': 28765, 'loss/train': 0.03696531429886818} +02/25/2022 14:51:05 - INFO - codeparrot_training - Step 28766: {'lr': 0.00020498823898511994, 'samples': 14728704, 'steps': 28766, 'loss/train': 2.1747655868530273} +02/25/2022 14:51:11 - INFO - codeparrot_training - Step 28767: {'lr': 0.0002049721440151438, 'samples': 14729216, 'steps': 28767, 'loss/train': 1.4541162252426147} +02/25/2022 14:51:14 - INFO - codeparrot_training - Step 28768: {'lr': 0.00020495604923805266, 'samples': 14729728, 'steps': 28768, 'loss/train': 1.9620437622070312} +02/25/2022 14:51:20 - INFO - codeparrot_training - Step 28769: {'lr': 0.00020493995465391547, 'samples': 14730240, 'steps': 28769, 'loss/train': 0.8052703142166138} +02/25/2022 14:51:23 - INFO - codeparrot_training - Step 28770: {'lr': 0.00020492386026280118, 'samples': 14730752, 'steps': 28770, 'loss/train': 1.440199613571167} +02/25/2022 14:51:28 - INFO - codeparrot_training - Step 28771: {'lr': 0.0002049077660647788, 'samples': 14731264, 'steps': 28771, 'loss/train': 2.5613863468170166} +02/25/2022 14:51:32 - INFO - codeparrot_training - Step 28772: {'lr': 0.0002048916720599173, 'samples': 14731776, 'steps': 28772, 'loss/train': 1.9654618501663208} +02/25/2022 14:51:39 - INFO - codeparrot_training - Step 28773: {'lr': 0.00020487557824828544, 'samples': 14732288, 'steps': 28773, 'loss/train': 0.8163354396820068} +02/25/2022 14:51:42 - INFO - codeparrot_training - Step 28774: {'lr': 0.0002048594846299523, 'samples': 14732800, 'steps': 28774, 'loss/train': 0.8364977836608887} +02/25/2022 14:51:48 - INFO - codeparrot_training - Step 28775: {'lr': 0.0002048433912049868, 'samples': 14733312, 'steps': 28775, 'loss/train': 1.1681852340698242} +02/25/2022 14:51:51 - INFO - codeparrot_training - Step 28776: {'lr': 0.00020482729797345798, 'samples': 14733824, 'steps': 28776, 'loss/train': 2.1070070266723633} +02/25/2022 14:51:57 - INFO - codeparrot_training - Step 28777: {'lr': 0.00020481120493543454, 'samples': 14734336, 'steps': 28777, 'loss/train': 1.93741774559021} +02/25/2022 14:52:00 - INFO - codeparrot_training - Step 28778: {'lr': 0.00020479511209098555, 'samples': 14734848, 'steps': 28778, 'loss/train': 0.30311065912246704} +02/25/2022 14:52:06 - INFO - codeparrot_training - Step 28779: {'lr': 0.00020477901944017995, 'samples': 14735360, 'steps': 28779, 'loss/train': 1.9404871463775635} +02/25/2022 14:52:09 - INFO - codeparrot_training - Step 28780: {'lr': 0.0002047629269830868, 'samples': 14735872, 'steps': 28780, 'loss/train': 1.0799428224563599} +02/25/2022 14:52:15 - INFO - codeparrot_training - Step 28781: {'lr': 0.00020474683471977478, 'samples': 14736384, 'steps': 28781, 'loss/train': 1.9959062337875366} +02/25/2022 14:52:18 - INFO - codeparrot_training - Step 28782: {'lr': 0.00020473074265031296, 'samples': 14736896, 'steps': 28782, 'loss/train': 1.7825422286987305} +02/25/2022 14:52:24 - INFO - codeparrot_training - Step 28783: {'lr': 0.00020471465077477027, 'samples': 14737408, 'steps': 28783, 'loss/train': 2.2013068199157715} +02/25/2022 14:52:27 - INFO - codeparrot_training - Step 28784: {'lr': 0.00020469855909321564, 'samples': 14737920, 'steps': 28784, 'loss/train': 3.418954849243164} +02/25/2022 14:52:33 - INFO - codeparrot_training - Step 28785: {'lr': 0.00020468246760571807, 'samples': 14738432, 'steps': 28785, 'loss/train': 0.06748632341623306} +02/25/2022 14:52:36 - INFO - codeparrot_training - Step 28786: {'lr': 0.00020466637631234635, 'samples': 14738944, 'steps': 28786, 'loss/train': 1.76181960105896} +02/25/2022 14:52:42 - INFO - codeparrot_training - Step 28787: {'lr': 0.00020465028521316948, 'samples': 14739456, 'steps': 28787, 'loss/train': 1.4854650497436523} +02/25/2022 14:52:45 - INFO - codeparrot_training - Step 28788: {'lr': 0.00020463419430825637, 'samples': 14739968, 'steps': 28788, 'loss/train': 2.1689870357513428} +02/25/2022 14:52:52 - INFO - codeparrot_training - Step 28789: {'lr': 0.00020461810359767604, 'samples': 14740480, 'steps': 28789, 'loss/train': 1.7968506813049316} +02/25/2022 14:52:55 - INFO - codeparrot_training - Step 28790: {'lr': 0.00020460201308149727, 'samples': 14740992, 'steps': 28790, 'loss/train': 1.7370885610580444} +02/25/2022 14:53:00 - INFO - codeparrot_training - Step 28791: {'lr': 0.00020458592275978906, 'samples': 14741504, 'steps': 28791, 'loss/train': 2.430738925933838} +02/25/2022 14:53:04 - INFO - codeparrot_training - Step 28792: {'lr': 0.00020456983263262036, 'samples': 14742016, 'steps': 28792, 'loss/train': 1.5756193399429321} +02/25/2022 14:53:09 - INFO - codeparrot_training - Step 28793: {'lr': 0.00020455374270006018, 'samples': 14742528, 'steps': 28793, 'loss/train': 2.4550929069519043} +02/25/2022 14:53:13 - INFO - codeparrot_training - Step 28794: {'lr': 0.0002045376529621772, 'samples': 14743040, 'steps': 28794, 'loss/train': 1.1958998441696167} +02/25/2022 14:53:18 - INFO - codeparrot_training - Step 28795: {'lr': 0.00020452156341904047, 'samples': 14743552, 'steps': 28795, 'loss/train': 2.3857009410858154} +02/25/2022 14:53:24 - INFO - codeparrot_training - Step 28796: {'lr': 0.00020450547407071894, 'samples': 14744064, 'steps': 28796, 'loss/train': 1.6521241664886475} +02/25/2022 14:53:27 - INFO - codeparrot_training - Step 28797: {'lr': 0.00020448938491728153, 'samples': 14744576, 'steps': 28797, 'loss/train': 1.5824264287948608} +02/25/2022 14:53:34 - INFO - codeparrot_training - Step 28798: {'lr': 0.00020447329595879718, 'samples': 14745088, 'steps': 28798, 'loss/train': 1.3231697082519531} +02/25/2022 14:53:37 - INFO - codeparrot_training - Step 28799: {'lr': 0.00020445720719533474, 'samples': 14745600, 'steps': 28799, 'loss/train': 2.106687545776367} +02/25/2022 14:53:43 - INFO - codeparrot_training - Step 28800: {'lr': 0.00020444111862696313, 'samples': 14746112, 'steps': 28800, 'loss/train': 1.9485704898834229} +02/25/2022 14:53:46 - INFO - codeparrot_training - Step 28801: {'lr': 0.00020442503025375138, 'samples': 14746624, 'steps': 28801, 'loss/train': 1.8105411529541016} +02/25/2022 14:53:52 - INFO - codeparrot_training - Step 28802: {'lr': 0.0002044089420757683, 'samples': 14747136, 'steps': 28802, 'loss/train': 2.4100394248962402} +02/25/2022 14:53:55 - INFO - codeparrot_training - Step 28803: {'lr': 0.0002043928540930828, 'samples': 14747648, 'steps': 28803, 'loss/train': 0.9466906189918518} +02/25/2022 14:54:01 - INFO - codeparrot_training - Step 28804: {'lr': 0.00020437676630576386, 'samples': 14748160, 'steps': 28804, 'loss/train': 0.7663961052894592} +02/25/2022 14:54:04 - INFO - codeparrot_training - Step 28805: {'lr': 0.00020436067871388032, 'samples': 14748672, 'steps': 28805, 'loss/train': 1.7867168188095093} +02/25/2022 14:54:09 - INFO - codeparrot_training - Step 28806: {'lr': 0.00020434459131750122, 'samples': 14749184, 'steps': 28806, 'loss/train': 1.5550402402877808} +02/25/2022 14:54:13 - INFO - codeparrot_training - Step 28807: {'lr': 0.00020432850411669531, 'samples': 14749696, 'steps': 28807, 'loss/train': 3.034815788269043} +02/25/2022 14:54:19 - INFO - codeparrot_training - Step 28808: {'lr': 0.00020431241711153165, 'samples': 14750208, 'steps': 28808, 'loss/train': 0.093503437936306} +02/25/2022 14:54:23 - INFO - codeparrot_training - Step 28809: {'lr': 0.00020429633030207906, 'samples': 14750720, 'steps': 28809, 'loss/train': 0.9339956641197205} +02/25/2022 14:54:28 - INFO - codeparrot_training - Step 28810: {'lr': 0.00020428024368840644, 'samples': 14751232, 'steps': 28810, 'loss/train': 1.3292827606201172} +02/25/2022 14:54:32 - INFO - codeparrot_training - Step 28811: {'lr': 0.00020426415727058288, 'samples': 14751744, 'steps': 28811, 'loss/train': 2.028827667236328} +02/25/2022 14:54:37 - INFO - codeparrot_training - Step 28812: {'lr': 0.000204248071048677, 'samples': 14752256, 'steps': 28812, 'loss/train': 1.6217399835586548} +02/25/2022 14:54:41 - INFO - codeparrot_training - Step 28813: {'lr': 0.0002042319850227579, 'samples': 14752768, 'steps': 28813, 'loss/train': 0.9365517497062683} +02/25/2022 14:54:46 - INFO - codeparrot_training - Step 28814: {'lr': 0.0002042158991928944, 'samples': 14753280, 'steps': 28814, 'loss/train': 2.1713790893554688} +02/25/2022 14:54:50 - INFO - codeparrot_training - Step 28815: {'lr': 0.0002041998135591556, 'samples': 14753792, 'steps': 28815, 'loss/train': 1.6756418943405151} +02/25/2022 14:54:55 - INFO - codeparrot_training - Step 28816: {'lr': 0.00020418372812161013, 'samples': 14754304, 'steps': 28816, 'loss/train': 0.9748415350914001} +02/25/2022 14:54:59 - INFO - codeparrot_training - Step 28817: {'lr': 0.000204167642880327, 'samples': 14754816, 'steps': 28817, 'loss/train': 1.5152543783187866} +02/25/2022 14:55:05 - INFO - codeparrot_training - Step 28818: {'lr': 0.00020415155783537513, 'samples': 14755328, 'steps': 28818, 'loss/train': 1.4748494625091553} +02/25/2022 14:55:08 - INFO - codeparrot_training - Step 28819: {'lr': 0.00020413547298682346, 'samples': 14755840, 'steps': 28819, 'loss/train': 2.737989902496338} +02/25/2022 14:55:14 - INFO - codeparrot_training - Step 28820: {'lr': 0.00020411938833474097, 'samples': 14756352, 'steps': 28820, 'loss/train': 1.906535267829895} +02/25/2022 14:55:17 - INFO - codeparrot_training - Step 28821: {'lr': 0.00020410330387919632, 'samples': 14756864, 'steps': 28821, 'loss/train': 1.3603588342666626} +02/25/2022 14:55:23 - INFO - codeparrot_training - Step 28822: {'lr': 0.00020408721962025857, 'samples': 14757376, 'steps': 28822, 'loss/train': 1.3775073289871216} +02/25/2022 14:55:26 - INFO - codeparrot_training - Step 28823: {'lr': 0.00020407113555799655, 'samples': 14757888, 'steps': 28823, 'loss/train': 1.9446040391921997} +02/25/2022 14:55:32 - INFO - codeparrot_training - Step 28824: {'lr': 0.00020405505169247934, 'samples': 14758400, 'steps': 28824, 'loss/train': 1.7067701816558838} +02/25/2022 14:55:35 - INFO - codeparrot_training - Step 28825: {'lr': 0.00020403896802377559, 'samples': 14758912, 'steps': 28825, 'loss/train': 1.4373440742492676} +02/25/2022 14:55:41 - INFO - codeparrot_training - Step 28826: {'lr': 0.0002040228845519543, 'samples': 14759424, 'steps': 28826, 'loss/train': 2.052150249481201} +02/25/2022 14:55:44 - INFO - codeparrot_training - Step 28827: {'lr': 0.00020400680127708438, 'samples': 14759936, 'steps': 28827, 'loss/train': 2.708298921585083} +02/25/2022 14:55:50 - INFO - codeparrot_training - Step 28828: {'lr': 0.00020399071819923481, 'samples': 14760448, 'steps': 28828, 'loss/train': 1.6759121417999268} +02/25/2022 14:55:53 - INFO - codeparrot_training - Step 28829: {'lr': 0.0002039746353184743, 'samples': 14760960, 'steps': 28829, 'loss/train': 1.826778769493103} +02/25/2022 14:55:59 - INFO - codeparrot_training - Step 28830: {'lr': 0.00020395855263487185, 'samples': 14761472, 'steps': 28830, 'loss/train': 1.1359045505523682} +02/25/2022 14:56:02 - INFO - codeparrot_training - Step 28831: {'lr': 0.0002039424701484963, 'samples': 14761984, 'steps': 28831, 'loss/train': 1.5218032598495483} +02/25/2022 14:56:07 - INFO - codeparrot_training - Step 28832: {'lr': 0.00020392638785941665, 'samples': 14762496, 'steps': 28832, 'loss/train': 2.3321585655212402} +02/25/2022 14:56:14 - INFO - codeparrot_training - Step 28833: {'lr': 0.00020391030576770178, 'samples': 14763008, 'steps': 28833, 'loss/train': 1.690425157546997} +02/25/2022 14:56:17 - INFO - codeparrot_training - Step 28834: {'lr': 0.00020389422387342044, 'samples': 14763520, 'steps': 28834, 'loss/train': 2.3165602684020996} +02/25/2022 14:56:23 - INFO - codeparrot_training - Step 28835: {'lr': 0.00020387814217664158, 'samples': 14764032, 'steps': 28835, 'loss/train': 1.0011096000671387} +02/25/2022 14:56:26 - INFO - codeparrot_training - Step 28836: {'lr': 0.00020386206067743413, 'samples': 14764544, 'steps': 28836, 'loss/train': 2.806399345397949} +02/25/2022 14:56:32 - INFO - codeparrot_training - Step 28837: {'lr': 0.00020384597937586708, 'samples': 14765056, 'steps': 28837, 'loss/train': 1.8127254247665405} +02/25/2022 14:56:35 - INFO - codeparrot_training - Step 28838: {'lr': 0.00020382989827200907, 'samples': 14765568, 'steps': 28838, 'loss/train': 1.8910161256790161} +02/25/2022 14:56:41 - INFO - codeparrot_training - Step 28839: {'lr': 0.0002038138173659291, 'samples': 14766080, 'steps': 28839, 'loss/train': 1.3368737697601318} +02/25/2022 14:56:44 - INFO - codeparrot_training - Step 28840: {'lr': 0.0002037977366576961, 'samples': 14766592, 'steps': 28840, 'loss/train': 0.7702274322509766} +02/25/2022 14:56:50 - INFO - codeparrot_training - Step 28841: {'lr': 0.00020378165614737904, 'samples': 14767104, 'steps': 28841, 'loss/train': 0.7103793025016785} +02/25/2022 14:56:53 - INFO - codeparrot_training - Step 28842: {'lr': 0.00020376557583504657, 'samples': 14767616, 'steps': 28842, 'loss/train': 1.5509947538375854} +02/25/2022 14:56:59 - INFO - codeparrot_training - Step 28843: {'lr': 0.0002037494957207677, 'samples': 14768128, 'steps': 28843, 'loss/train': 2.027299404144287} +02/25/2022 14:57:03 - INFO - codeparrot_training - Step 28844: {'lr': 0.00020373341580461133, 'samples': 14768640, 'steps': 28844, 'loss/train': 1.8981330394744873} +02/25/2022 14:57:08 - INFO - codeparrot_training - Step 28845: {'lr': 0.00020371733608664627, 'samples': 14769152, 'steps': 28845, 'loss/train': 1.3378561735153198} +02/25/2022 14:57:12 - INFO - codeparrot_training - Step 28846: {'lr': 0.00020370125656694153, 'samples': 14769664, 'steps': 28846, 'loss/train': 0.5291807055473328} +02/25/2022 14:57:17 - INFO - codeparrot_training - Step 28847: {'lr': 0.00020368517724556583, 'samples': 14770176, 'steps': 28847, 'loss/train': 2.3157451152801514} +02/25/2022 14:57:21 - INFO - codeparrot_training - Step 28848: {'lr': 0.00020366909812258817, 'samples': 14770688, 'steps': 28848, 'loss/train': 1.742571234703064} +02/25/2022 14:57:26 - INFO - codeparrot_training - Step 28849: {'lr': 0.00020365301919807733, 'samples': 14771200, 'steps': 28849, 'loss/train': 1.5665849447250366} +02/25/2022 14:57:30 - INFO - codeparrot_training - Step 28850: {'lr': 0.00020363694047210228, 'samples': 14771712, 'steps': 28850, 'loss/train': 2.2817270755767822} +02/25/2022 14:57:35 - INFO - codeparrot_training - Step 28851: {'lr': 0.00020362086194473185, 'samples': 14772224, 'steps': 28851, 'loss/train': 1.2029011249542236} +02/25/2022 14:57:39 - INFO - codeparrot_training - Step 28852: {'lr': 0.00020360478361603493, 'samples': 14772736, 'steps': 28852, 'loss/train': 1.2510825395584106} +02/25/2022 14:57:45 - INFO - codeparrot_training - Step 28853: {'lr': 0.00020358870548608035, 'samples': 14773248, 'steps': 28853, 'loss/train': 2.434044361114502} +02/25/2022 14:57:49 - INFO - codeparrot_training - Step 28854: {'lr': 0.0002035726275549371, 'samples': 14773760, 'steps': 28854, 'loss/train': 2.3309528827667236} +02/25/2022 14:57:54 - INFO - codeparrot_training - Step 28855: {'lr': 0.0002035565498226739, 'samples': 14774272, 'steps': 28855, 'loss/train': 1.2794013023376465} +02/25/2022 14:57:57 - INFO - codeparrot_training - Step 28856: {'lr': 0.00020354047228935969, 'samples': 14774784, 'steps': 28856, 'loss/train': 1.783035397529602} +02/25/2022 14:58:03 - INFO - codeparrot_training - Step 28857: {'lr': 0.00020352439495506335, 'samples': 14775296, 'steps': 28857, 'loss/train': 1.3094043731689453} +02/25/2022 14:58:06 - INFO - codeparrot_training - Step 28858: {'lr': 0.00020350831781985372, 'samples': 14775808, 'steps': 28858, 'loss/train': 1.8641000986099243} +02/25/2022 14:58:12 - INFO - codeparrot_training - Step 28859: {'lr': 0.0002034922408837998, 'samples': 14776320, 'steps': 28859, 'loss/train': 1.677760362625122} +02/25/2022 14:58:15 - INFO - codeparrot_training - Step 28860: {'lr': 0.00020347616414697023, 'samples': 14776832, 'steps': 28860, 'loss/train': 1.5541274547576904} +02/25/2022 14:58:21 - INFO - codeparrot_training - Step 28861: {'lr': 0.00020346008760943404, 'samples': 14777344, 'steps': 28861, 'loss/train': 2.8858442306518555} +02/25/2022 14:58:24 - INFO - codeparrot_training - Step 28862: {'lr': 0.00020344401127126005, 'samples': 14777856, 'steps': 28862, 'loss/train': 1.2962510585784912} +02/25/2022 14:58:30 - INFO - codeparrot_training - Step 28863: {'lr': 0.00020342793513251724, 'samples': 14778368, 'steps': 28863, 'loss/train': 1.4813169240951538} +02/25/2022 14:58:33 - INFO - codeparrot_training - Step 28864: {'lr': 0.00020341185919327425, 'samples': 14778880, 'steps': 28864, 'loss/train': 1.480462908744812} +02/25/2022 14:58:40 - INFO - codeparrot_training - Step 28865: {'lr': 0.00020339578345360005, 'samples': 14779392, 'steps': 28865, 'loss/train': 1.7878892421722412} +02/25/2022 14:58:43 - INFO - codeparrot_training - Step 28866: {'lr': 0.00020337970791356352, 'samples': 14779904, 'steps': 28866, 'loss/train': 2.87617564201355} +02/25/2022 14:58:49 - INFO - codeparrot_training - Step 28867: {'lr': 0.00020336363257323354, 'samples': 14780416, 'steps': 28867, 'loss/train': 2.882838726043701} +02/25/2022 14:58:52 - INFO - codeparrot_training - Step 28868: {'lr': 0.00020334755743267903, 'samples': 14780928, 'steps': 28868, 'loss/train': 1.6327271461486816} +02/25/2022 14:58:58 - INFO - codeparrot_training - Step 28869: {'lr': 0.00020333148249196867, 'samples': 14781440, 'steps': 28869, 'loss/train': 1.1009503602981567} +02/25/2022 14:59:01 - INFO - codeparrot_training - Step 28870: {'lr': 0.00020331540775117142, 'samples': 14781952, 'steps': 28870, 'loss/train': 0.37435033917427063} +02/25/2022 14:59:07 - INFO - codeparrot_training - Step 28871: {'lr': 0.00020329933321035616, 'samples': 14782464, 'steps': 28871, 'loss/train': 1.4789108037948608} +02/25/2022 14:59:10 - INFO - codeparrot_training - Step 28872: {'lr': 0.00020328325886959182, 'samples': 14782976, 'steps': 28872, 'loss/train': 1.1462639570236206} +02/25/2022 14:59:16 - INFO - codeparrot_training - Step 28873: {'lr': 0.00020326718472894704, 'samples': 14783488, 'steps': 28873, 'loss/train': 1.7303941249847412} +02/25/2022 14:59:19 - INFO - codeparrot_training - Step 28874: {'lr': 0.00020325111078849082, 'samples': 14784000, 'steps': 28874, 'loss/train': 1.615073323249817} +02/25/2022 14:59:25 - INFO - codeparrot_training - Step 28875: {'lr': 0.000203235037048292, 'samples': 14784512, 'steps': 28875, 'loss/train': 2.763493299484253} +02/25/2022 14:59:29 - INFO - codeparrot_training - Step 28876: {'lr': 0.0002032189635084195, 'samples': 14785024, 'steps': 28876, 'loss/train': 1.6377590894699097} +02/25/2022 14:59:34 - INFO - codeparrot_training - Step 28877: {'lr': 0.00020320289016894207, 'samples': 14785536, 'steps': 28877, 'loss/train': 2.0550026893615723} +02/25/2022 14:59:38 - INFO - codeparrot_training - Step 28878: {'lr': 0.00020318681702992852, 'samples': 14786048, 'steps': 28878, 'loss/train': 1.940559983253479} +02/25/2022 14:59:43 - INFO - codeparrot_training - Step 28879: {'lr': 0.00020317074409144785, 'samples': 14786560, 'steps': 28879, 'loss/train': 2.354809522628784} +02/25/2022 14:59:49 - INFO - codeparrot_training - Step 28880: {'lr': 0.0002031546713535688, 'samples': 14787072, 'steps': 28880, 'loss/train': 1.8954113721847534} +02/25/2022 14:59:52 - INFO - codeparrot_training - Step 28881: {'lr': 0.00020313859881636038, 'samples': 14787584, 'steps': 28881, 'loss/train': 2.0843141078948975} +02/25/2022 14:59:58 - INFO - codeparrot_training - Step 28882: {'lr': 0.0002031225264798912, 'samples': 14788096, 'steps': 28882, 'loss/train': 2.0360560417175293} +02/25/2022 15:00:01 - INFO - codeparrot_training - Step 28883: {'lr': 0.00020310645434423025, 'samples': 14788608, 'steps': 28883, 'loss/train': 2.3703370094299316} +02/25/2022 15:00:07 - INFO - codeparrot_training - Step 28884: {'lr': 0.00020309038240944643, 'samples': 14789120, 'steps': 28884, 'loss/train': 1.0343043804168701} +02/25/2022 15:00:10 - INFO - codeparrot_training - Step 28885: {'lr': 0.0002030743106756085, 'samples': 14789632, 'steps': 28885, 'loss/train': 1.93584144115448} +02/25/2022 15:00:16 - INFO - codeparrot_training - Step 28886: {'lr': 0.00020305823914278527, 'samples': 14790144, 'steps': 28886, 'loss/train': 0.6957326531410217} +02/25/2022 15:00:19 - INFO - codeparrot_training - Step 28887: {'lr': 0.00020304216781104562, 'samples': 14790656, 'steps': 28887, 'loss/train': 8.838553428649902} +02/25/2022 15:00:23 - INFO - codeparrot_training - Step 28888: {'lr': 0.00020302609668045848, 'samples': 14791168, 'steps': 28888, 'loss/train': 1.8295924663543701} +02/25/2022 15:00:28 - INFO - codeparrot_training - Step 28889: {'lr': 0.00020301002575109266, 'samples': 14791680, 'steps': 28889, 'loss/train': 1.9290168285369873} +02/25/2022 15:00:31 - INFO - codeparrot_training - Step 28890: {'lr': 0.00020299395502301689, 'samples': 14792192, 'steps': 28890, 'loss/train': 0.6554014086723328} +02/25/2022 15:00:38 - INFO - codeparrot_training - Step 28891: {'lr': 0.00020297788449630006, 'samples': 14792704, 'steps': 28891, 'loss/train': 1.4383426904678345} +02/25/2022 15:00:43 - INFO - codeparrot_training - Step 28892: {'lr': 0.0002029618141710111, 'samples': 14793216, 'steps': 28892, 'loss/train': 0.5175086259841919} +02/25/2022 15:00:47 - INFO - codeparrot_training - Step 28893: {'lr': 0.00020294574404721878, 'samples': 14793728, 'steps': 28893, 'loss/train': 0.8374757766723633} +02/25/2022 15:00:52 - INFO - codeparrot_training - Step 28894: {'lr': 0.00020292967412499196, 'samples': 14794240, 'steps': 28894, 'loss/train': 2.3668553829193115} +02/25/2022 15:00:56 - INFO - codeparrot_training - Step 28895: {'lr': 0.00020291360440439948, 'samples': 14794752, 'steps': 28895, 'loss/train': 1.3878892660140991} +02/25/2022 15:01:01 - INFO - codeparrot_training - Step 28896: {'lr': 0.00020289753488551016, 'samples': 14795264, 'steps': 28896, 'loss/train': 1.7678484916687012} +02/25/2022 15:01:05 - INFO - codeparrot_training - Step 28897: {'lr': 0.00020288146556839278, 'samples': 14795776, 'steps': 28897, 'loss/train': 1.3984142541885376} +02/25/2022 15:01:10 - INFO - codeparrot_training - Step 28898: {'lr': 0.00020286539645311634, 'samples': 14796288, 'steps': 28898, 'loss/train': 2.6917362213134766} +02/25/2022 15:01:13 - INFO - codeparrot_training - Step 28899: {'lr': 0.00020284932753974956, 'samples': 14796800, 'steps': 28899, 'loss/train': 1.7898812294006348} +02/25/2022 15:01:20 - INFO - codeparrot_training - Step 28900: {'lr': 0.00020283325882836122, 'samples': 14797312, 'steps': 28900, 'loss/train': 1.6224220991134644} +02/25/2022 15:01:23 - INFO - codeparrot_training - Step 28901: {'lr': 0.00020281719031902025, 'samples': 14797824, 'steps': 28901, 'loss/train': 1.9690816402435303} +02/25/2022 15:01:29 - INFO - codeparrot_training - Step 28902: {'lr': 0.0002028011220117955, 'samples': 14798336, 'steps': 28902, 'loss/train': 4.891348838806152} +02/25/2022 15:01:32 - INFO - codeparrot_training - Step 28903: {'lr': 0.00020278505390675572, 'samples': 14798848, 'steps': 28903, 'loss/train': 1.8070660829544067} +02/25/2022 15:01:38 - INFO - codeparrot_training - Step 28904: {'lr': 0.00020276898600396975, 'samples': 14799360, 'steps': 28904, 'loss/train': 2.5544748306274414} +02/25/2022 15:01:41 - INFO - codeparrot_training - Step 28905: {'lr': 0.00020275291830350645, 'samples': 14799872, 'steps': 28905, 'loss/train': 1.1793084144592285} +02/25/2022 15:01:47 - INFO - codeparrot_training - Step 28906: {'lr': 0.00020273685080543464, 'samples': 14800384, 'steps': 28906, 'loss/train': 1.445697546005249} +02/25/2022 15:01:50 - INFO - codeparrot_training - Step 28907: {'lr': 0.00020272078350982325, 'samples': 14800896, 'steps': 28907, 'loss/train': 1.6257308721542358} +02/25/2022 15:01:56 - INFO - codeparrot_training - Step 28908: {'lr': 0.00020270471641674093, 'samples': 14801408, 'steps': 28908, 'loss/train': 0.0301054734736681} +02/25/2022 15:01:59 - INFO - codeparrot_training - Step 28909: {'lr': 0.00020268864952625656, 'samples': 14801920, 'steps': 28909, 'loss/train': 1.8452787399291992} +02/25/2022 15:02:06 - INFO - codeparrot_training - Step 28910: {'lr': 0.000202672582838439, 'samples': 14802432, 'steps': 28910, 'loss/train': 2.0845794677734375} +02/25/2022 15:02:09 - INFO - codeparrot_training - Step 28911: {'lr': 0.00020265651635335719, 'samples': 14802944, 'steps': 28911, 'loss/train': 1.7245670557022095} +02/25/2022 15:02:15 - INFO - codeparrot_training - Step 28912: {'lr': 0.00020264045007107974, 'samples': 14803456, 'steps': 28912, 'loss/train': 1.579886794090271} +02/25/2022 15:02:18 - INFO - codeparrot_training - Step 28913: {'lr': 0.00020262438399167552, 'samples': 14803968, 'steps': 28913, 'loss/train': 1.6542655229568481} +02/25/2022 15:02:24 - INFO - codeparrot_training - Step 28914: {'lr': 0.0002026083181152134, 'samples': 14804480, 'steps': 28914, 'loss/train': 1.8935295343399048} +02/25/2022 15:02:28 - INFO - codeparrot_training - Step 28915: {'lr': 0.00020259225244176232, 'samples': 14804992, 'steps': 28915, 'loss/train': 1.7291104793548584} +02/25/2022 15:02:33 - INFO - codeparrot_training - Step 28916: {'lr': 0.00020257618697139086, 'samples': 14805504, 'steps': 28916, 'loss/train': 1.0121861696243286} +02/25/2022 15:02:37 - INFO - codeparrot_training - Step 28917: {'lr': 0.00020256012170416797, 'samples': 14806016, 'steps': 28917, 'loss/train': 1.8899732828140259} +02/25/2022 15:02:42 - INFO - codeparrot_training - Step 28918: {'lr': 0.00020254405664016245, 'samples': 14806528, 'steps': 28918, 'loss/train': 1.450128197669983} +02/25/2022 15:02:45 - INFO - codeparrot_training - Step 28919: {'lr': 0.0002025279917794431, 'samples': 14807040, 'steps': 28919, 'loss/train': 2.908566951751709} +02/25/2022 15:02:52 - INFO - codeparrot_training - Step 28920: {'lr': 0.0002025119271220789, 'samples': 14807552, 'steps': 28920, 'loss/train': 2.1035959720611572} +02/25/2022 15:02:55 - INFO - codeparrot_training - Step 28921: {'lr': 0.0002024958626681384, 'samples': 14808064, 'steps': 28921, 'loss/train': 1.8606675863265991} +02/25/2022 15:03:01 - INFO - codeparrot_training - Step 28922: {'lr': 0.00020247979841769053, 'samples': 14808576, 'steps': 28922, 'loss/train': 2.044541358947754} +02/25/2022 15:03:04 - INFO - codeparrot_training - Step 28923: {'lr': 0.00020246373437080413, 'samples': 14809088, 'steps': 28923, 'loss/train': 1.2998839616775513} +02/25/2022 15:03:10 - INFO - codeparrot_training - Step 28924: {'lr': 0.0002024476705275481, 'samples': 14809600, 'steps': 28924, 'loss/train': 0.5418252348899841} +02/25/2022 15:03:13 - INFO - codeparrot_training - Step 28925: {'lr': 0.00020243160688799105, 'samples': 14810112, 'steps': 28925, 'loss/train': 1.4740431308746338} +02/25/2022 15:03:19 - INFO - codeparrot_training - Step 28926: {'lr': 0.0002024155434522019, 'samples': 14810624, 'steps': 28926, 'loss/train': 1.7650465965270996} +02/25/2022 15:03:22 - INFO - codeparrot_training - Step 28927: {'lr': 0.00020239948022024941, 'samples': 14811136, 'steps': 28927, 'loss/train': 0.3023514449596405} +02/25/2022 15:03:28 - INFO - codeparrot_training - Step 28928: {'lr': 0.00020238341719220254, 'samples': 14811648, 'steps': 28928, 'loss/train': 2.5048794746398926} +02/25/2022 15:03:31 - INFO - codeparrot_training - Step 28929: {'lr': 0.00020236735436812996, 'samples': 14812160, 'steps': 28929, 'loss/train': 1.2107259035110474} +02/25/2022 15:03:37 - INFO - codeparrot_training - Step 28930: {'lr': 0.00020235129174810045, 'samples': 14812672, 'steps': 28930, 'loss/train': 1.3697885274887085} +02/25/2022 15:03:40 - INFO - codeparrot_training - Step 28931: {'lr': 0.0002023352293321829, 'samples': 14813184, 'steps': 28931, 'loss/train': 1.601084589958191} +02/25/2022 15:03:46 - INFO - codeparrot_training - Step 28932: {'lr': 0.00020231916712044613, 'samples': 14813696, 'steps': 28932, 'loss/train': 1.7738384008407593} +02/25/2022 15:03:50 - INFO - codeparrot_training - Step 28933: {'lr': 0.0002023031051129589, 'samples': 14814208, 'steps': 28933, 'loss/train': 2.0779154300689697} +02/25/2022 15:03:55 - INFO - codeparrot_training - Step 28934: {'lr': 0.00020228704330978997, 'samples': 14814720, 'steps': 28934, 'loss/train': 1.2004581689834595} +02/25/2022 15:03:59 - INFO - codeparrot_training - Step 28935: {'lr': 0.00020227098171100828, 'samples': 14815232, 'steps': 28935, 'loss/train': 0.09606925398111343} +02/25/2022 15:04:05 - INFO - codeparrot_training - Step 28936: {'lr': 0.00020225492031668247, 'samples': 14815744, 'steps': 28936, 'loss/train': 0.42571696639060974} +02/25/2022 15:04:08 - INFO - codeparrot_training - Step 28937: {'lr': 0.00020223885912688146, 'samples': 14816256, 'steps': 28937, 'loss/train': 2.1236870288848877} +02/25/2022 15:04:14 - INFO - codeparrot_training - Step 28938: {'lr': 0.00020222279814167398, 'samples': 14816768, 'steps': 28938, 'loss/train': 2.339834451675415} +02/25/2022 15:04:17 - INFO - codeparrot_training - Step 28939: {'lr': 0.0002022067373611289, 'samples': 14817280, 'steps': 28939, 'loss/train': 1.5197266340255737} +02/25/2022 15:04:23 - INFO - codeparrot_training - Step 28940: {'lr': 0.00020219067678531495, 'samples': 14817792, 'steps': 28940, 'loss/train': 2.158062696456909} +02/25/2022 15:04:26 - INFO - codeparrot_training - Step 28941: {'lr': 0.00020217461641430096, 'samples': 14818304, 'steps': 28941, 'loss/train': 1.1631979942321777} +02/25/2022 15:04:32 - INFO - codeparrot_training - Step 28942: {'lr': 0.0002021585562481558, 'samples': 14818816, 'steps': 28942, 'loss/train': 2.30741286277771} +02/25/2022 15:04:35 - INFO - codeparrot_training - Step 28943: {'lr': 0.0002021424962869481, 'samples': 14819328, 'steps': 28943, 'loss/train': 1.9290807247161865} +02/25/2022 15:04:41 - INFO - codeparrot_training - Step 28944: {'lr': 0.00020212643653074678, 'samples': 14819840, 'steps': 28944, 'loss/train': 1.7719894647598267} +02/25/2022 15:04:44 - INFO - codeparrot_training - Step 28945: {'lr': 0.00020211037697962056, 'samples': 14820352, 'steps': 28945, 'loss/train': 1.351855993270874} +02/25/2022 15:04:50 - INFO - codeparrot_training - Step 28946: {'lr': 0.0002020943176336384, 'samples': 14820864, 'steps': 28946, 'loss/train': 0.9579593539237976} +02/25/2022 15:04:54 - INFO - codeparrot_training - Step 28947: {'lr': 0.00020207825849286886, 'samples': 14821376, 'steps': 28947, 'loss/train': 2.261284828186035} +02/25/2022 15:04:59 - INFO - codeparrot_training - Step 28948: {'lr': 0.00020206219955738088, 'samples': 14821888, 'steps': 28948, 'loss/train': 0.973355233669281} +02/25/2022 15:05:03 - INFO - codeparrot_training - Step 28949: {'lr': 0.0002020461408272432, 'samples': 14822400, 'steps': 28949, 'loss/train': 1.0824620723724365} +02/25/2022 15:05:08 - INFO - codeparrot_training - Step 28950: {'lr': 0.00020203008230252473, 'samples': 14822912, 'steps': 28950, 'loss/train': 1.2817915678024292} +02/25/2022 15:05:12 - INFO - codeparrot_training - Step 28951: {'lr': 0.000202014023983294, 'samples': 14823424, 'steps': 28951, 'loss/train': 2.090498924255371} +02/25/2022 15:05:17 - INFO - codeparrot_training - Step 28952: {'lr': 0.00020199796586962003, 'samples': 14823936, 'steps': 28952, 'loss/train': 1.5789719820022583} +02/25/2022 15:05:21 - INFO - codeparrot_training - Step 28953: {'lr': 0.0002019819079615715, 'samples': 14824448, 'steps': 28953, 'loss/train': 2.967611789703369} +02/25/2022 15:05:26 - INFO - codeparrot_training - Step 28954: {'lr': 0.00020196585025921722, 'samples': 14824960, 'steps': 28954, 'loss/train': 1.1173735857009888} +02/25/2022 15:05:30 - INFO - codeparrot_training - Step 28955: {'lr': 0.0002019497927626261, 'samples': 14825472, 'steps': 28955, 'loss/train': 1.1480498313903809} +02/25/2022 15:05:36 - INFO - codeparrot_training - Step 28956: {'lr': 0.00020193373547186668, 'samples': 14825984, 'steps': 28956, 'loss/train': 1.3480191230773926} +02/25/2022 15:05:39 - INFO - codeparrot_training - Step 28957: {'lr': 0.00020191767838700792, 'samples': 14826496, 'steps': 28957, 'loss/train': 1.5765042304992676} +02/25/2022 15:05:45 - INFO - codeparrot_training - Step 28958: {'lr': 0.00020190162150811853, 'samples': 14827008, 'steps': 28958, 'loss/train': 1.3374444246292114} +02/25/2022 15:05:48 - INFO - codeparrot_training - Step 28959: {'lr': 0.00020188556483526743, 'samples': 14827520, 'steps': 28959, 'loss/train': 1.4985771179199219} +02/25/2022 15:05:54 - INFO - codeparrot_training - Step 28960: {'lr': 0.0002018695083685232, 'samples': 14828032, 'steps': 28960, 'loss/train': 2.812014579772949} +02/25/2022 15:05:57 - INFO - codeparrot_training - Step 28961: {'lr': 0.00020185345210795466, 'samples': 14828544, 'steps': 28961, 'loss/train': 1.8284573554992676} +02/25/2022 15:06:03 - INFO - codeparrot_training - Step 28962: {'lr': 0.00020183739605363069, 'samples': 14829056, 'steps': 28962, 'loss/train': 1.4798394441604614} +02/25/2022 15:06:06 - INFO - codeparrot_training - Step 28963: {'lr': 0.00020182134020562014, 'samples': 14829568, 'steps': 28963, 'loss/train': 1.6920136213302612} +02/25/2022 15:06:12 - INFO - codeparrot_training - Step 28964: {'lr': 0.00020180528456399153, 'samples': 14830080, 'steps': 28964, 'loss/train': 1.226874828338623} +02/25/2022 15:06:15 - INFO - codeparrot_training - Step 28965: {'lr': 0.00020178922912881378, 'samples': 14830592, 'steps': 28965, 'loss/train': 2.674668788909912} +02/25/2022 15:06:23 - INFO - codeparrot_training - Step 28966: {'lr': 0.00020177317390015568, 'samples': 14831104, 'steps': 28966, 'loss/train': 2.2185351848602295} +02/25/2022 15:06:26 - INFO - codeparrot_training - Step 28967: {'lr': 0.00020175711887808598, 'samples': 14831616, 'steps': 28967, 'loss/train': 1.9698344469070435} +02/25/2022 15:06:32 - INFO - codeparrot_training - Step 28968: {'lr': 0.0002017410640626736, 'samples': 14832128, 'steps': 28968, 'loss/train': 1.7015665769577026} +02/25/2022 15:06:37 - INFO - codeparrot_training - Step 28969: {'lr': 0.00020172500945398703, 'samples': 14832640, 'steps': 28969, 'loss/train': 2.2540230751037598} +02/25/2022 15:06:41 - INFO - codeparrot_training - Step 28970: {'lr': 0.00020170895505209523, 'samples': 14833152, 'steps': 28970, 'loss/train': 0.05459139123558998} +02/25/2022 15:06:46 - INFO - codeparrot_training - Step 28971: {'lr': 0.00020169290085706687, 'samples': 14833664, 'steps': 28971, 'loss/train': 1.7054938077926636} +02/25/2022 15:06:50 - INFO - codeparrot_training - Step 28972: {'lr': 0.00020167684686897094, 'samples': 14834176, 'steps': 28972, 'loss/train': 1.4739301204681396} +02/25/2022 15:06:55 - INFO - codeparrot_training - Step 28973: {'lr': 0.00020166079308787593, 'samples': 14834688, 'steps': 28973, 'loss/train': 1.7997164726257324} +02/25/2022 15:06:59 - INFO - codeparrot_training - Step 28974: {'lr': 0.00020164473951385077, 'samples': 14835200, 'steps': 28974, 'loss/train': 1.6903244256973267} +02/25/2022 15:07:04 - INFO - codeparrot_training - Step 28975: {'lr': 0.0002016286861469642, 'samples': 14835712, 'steps': 28975, 'loss/train': 2.206655263900757} +02/25/2022 15:07:08 - INFO - codeparrot_training - Step 28976: {'lr': 0.00020161263298728495, 'samples': 14836224, 'steps': 28976, 'loss/train': 3.481577157974243} +02/25/2022 15:07:14 - INFO - codeparrot_training - Step 28977: {'lr': 0.00020159658003488185, 'samples': 14836736, 'steps': 28977, 'loss/train': 1.877198576927185} +02/25/2022 15:07:17 - INFO - codeparrot_training - Step 28978: {'lr': 0.00020158052728982357, 'samples': 14837248, 'steps': 28978, 'loss/train': 1.6590062379837036} +02/25/2022 15:07:23 - INFO - codeparrot_training - Step 28979: {'lr': 0.000201564474752179, 'samples': 14837760, 'steps': 28979, 'loss/train': 1.7465327978134155} +02/25/2022 15:07:26 - INFO - codeparrot_training - Step 28980: {'lr': 0.00020154842242201682, 'samples': 14838272, 'steps': 28980, 'loss/train': 1.3785573244094849} +02/25/2022 15:07:32 - INFO - codeparrot_training - Step 28981: {'lr': 0.00020153237029940584, 'samples': 14838784, 'steps': 28981, 'loss/train': 1.5153241157531738} +02/25/2022 15:07:35 - INFO - codeparrot_training - Step 28982: {'lr': 0.00020151631838441478, 'samples': 14839296, 'steps': 28982, 'loss/train': 1.3197001218795776} +02/25/2022 15:07:42 - INFO - codeparrot_training - Step 28983: {'lr': 0.00020150026667711238, 'samples': 14839808, 'steps': 28983, 'loss/train': 0.2718430161476135} +02/25/2022 15:07:46 - INFO - codeparrot_training - Step 28984: {'lr': 0.00020148421517756743, 'samples': 14840320, 'steps': 28984, 'loss/train': 1.6264054775238037} +02/25/2022 15:07:51 - INFO - codeparrot_training - Step 28985: {'lr': 0.00020146816388584877, 'samples': 14840832, 'steps': 28985, 'loss/train': 2.099219799041748} +02/25/2022 15:07:55 - INFO - codeparrot_training - Step 28986: {'lr': 0.00020145211280202504, 'samples': 14841344, 'steps': 28986, 'loss/train': 1.719549298286438} +02/25/2022 15:08:00 - INFO - codeparrot_training - Step 28987: {'lr': 0.00020143606192616503, 'samples': 14841856, 'steps': 28987, 'loss/train': 0.9109772443771362} +02/25/2022 15:08:04 - INFO - codeparrot_training - Step 28988: {'lr': 0.0002014200112583375, 'samples': 14842368, 'steps': 28988, 'loss/train': 0.3388788402080536} +02/25/2022 15:08:09 - INFO - codeparrot_training - Step 28989: {'lr': 0.00020140396079861123, 'samples': 14842880, 'steps': 28989, 'loss/train': 1.8075129985809326} +02/25/2022 15:08:13 - INFO - codeparrot_training - Step 28990: {'lr': 0.00020138791054705505, 'samples': 14843392, 'steps': 28990, 'loss/train': 2.0343501567840576} +02/25/2022 15:08:18 - INFO - codeparrot_training - Step 28991: {'lr': 0.0002013718605037375, 'samples': 14843904, 'steps': 28991, 'loss/train': 1.957627773284912} +02/25/2022 15:08:22 - INFO - codeparrot_training - Step 28992: {'lr': 0.0002013558106687275, 'samples': 14844416, 'steps': 28992, 'loss/train': 1.6540087461471558} +02/25/2022 15:08:28 - INFO - codeparrot_training - Step 28993: {'lr': 0.00020133976104209372, 'samples': 14844928, 'steps': 28993, 'loss/train': 1.4951592683792114} +02/25/2022 15:08:32 - INFO - codeparrot_training - Step 28994: {'lr': 0.00020132371162390512, 'samples': 14845440, 'steps': 28994, 'loss/train': 1.3136905431747437} +02/25/2022 15:08:38 - INFO - codeparrot_training - Step 28995: {'lr': 0.0002013076624142301, 'samples': 14845952, 'steps': 28995, 'loss/train': 1.4378952980041504} +02/25/2022 15:08:41 - INFO - codeparrot_training - Step 28996: {'lr': 0.00020129161341313765, 'samples': 14846464, 'steps': 28996, 'loss/train': 0.17468242347240448} +02/25/2022 15:08:46 - INFO - codeparrot_training - Step 28997: {'lr': 0.00020127556462069644, 'samples': 14846976, 'steps': 28997, 'loss/train': 0.5771901607513428} +02/25/2022 15:08:50 - INFO - codeparrot_training - Step 28998: {'lr': 0.00020125951603697535, 'samples': 14847488, 'steps': 28998, 'loss/train': 1.5860698223114014} +02/25/2022 15:08:55 - INFO - codeparrot_training - Step 28999: {'lr': 0.0002012434676620429, 'samples': 14848000, 'steps': 28999, 'loss/train': 0.6781620979309082} +02/25/2022 15:08:55 - INFO - codeparrot_training - Evaluating and saving model checkpoint