diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -15391,3 +15391,1009 @@ Use FP16 precision: False 02/24/2022 20:59:00 - INFO - codeparrot_training - Step 14998: {'lr': 0.000414861056262898, 'samples': 7679488, 'steps': 14998, 'loss/train': 1.7019398212432861} 02/24/2022 20:59:03 - INFO - codeparrot_training - Step 14999: {'lr': 0.0004148487553720375, 'samples': 7680000, 'steps': 14999, 'loss/train': 2.059992551803589} 02/24/2022 20:59:03 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/24/2022 20:59:20 - WARNING - huggingface_hub.repository - Several commits (15) will be pushed upstream. +02/24/2022 20:59:20 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/24/2022 20:59:54 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + 3a61b16..751fbfb floral-grass-11 -> floral-grass-11 + +02/24/2022 21:00:01 - INFO - codeparrot_training - Step 15000: {'lr': 0.0004148364537750172, 'samples': 7680512, 'steps': 15000, 'loss/train': 1.8882983922958374} +02/24/2022 21:00:04 - INFO - codeparrot_training - Step 15001: {'lr': 0.0004148241514718899, 'samples': 7681024, 'steps': 15001, 'loss/train': 3.1234290599823} +02/24/2022 21:00:11 - INFO - codeparrot_training - Step 15002: {'lr': 0.00041481184846270836, 'samples': 7681536, 'steps': 15002, 'loss/train': 2.020622968673706} +02/24/2022 21:00:14 - INFO - codeparrot_training - Step 15003: {'lr': 0.00041479954474752507, 'samples': 7682048, 'steps': 15003, 'loss/train': 2.7225804328918457} +02/24/2022 21:00:20 - INFO - codeparrot_training - Step 15004: {'lr': 0.0004147872403263929, 'samples': 7682560, 'steps': 15004, 'loss/train': 1.2915410995483398} +02/24/2022 21:00:23 - INFO - codeparrot_training - Step 15005: {'lr': 0.0004147749351993645, 'samples': 7683072, 'steps': 15005, 'loss/train': 3.145185947418213} +02/24/2022 21:00:29 - INFO - codeparrot_training - Step 15006: {'lr': 0.0004147626293664926, 'samples': 7683584, 'steps': 15006, 'loss/train': 2.1505861282348633} +02/24/2022 21:00:32 - INFO - codeparrot_training - Step 15007: {'lr': 0.00041475032282783, 'samples': 7684096, 'steps': 15007, 'loss/train': 1.9363479614257812} +02/24/2022 21:00:38 - INFO - codeparrot_training - Step 15008: {'lr': 0.0004147380155834293, 'samples': 7684608, 'steps': 15008, 'loss/train': 0.9024702906608582} +02/24/2022 21:00:41 - INFO - codeparrot_training - Step 15009: {'lr': 0.00041472570763334316, 'samples': 7685120, 'steps': 15009, 'loss/train': 2.2938032150268555} +02/24/2022 21:00:47 - INFO - codeparrot_training - Step 15010: {'lr': 0.00041471339897762447, 'samples': 7685632, 'steps': 15010, 'loss/train': 2.2601678371429443} +02/24/2022 21:00:50 - INFO - codeparrot_training - Step 15011: {'lr': 0.0004147010896163259, 'samples': 7686144, 'steps': 15011, 'loss/train': 2.4319632053375244} +02/24/2022 21:00:56 - INFO - codeparrot_training - Step 15012: {'lr': 0.00041468877954950006, 'samples': 7686656, 'steps': 15012, 'loss/train': 1.8616952896118164} +02/24/2022 21:00:59 - INFO - codeparrot_training - Step 15013: {'lr': 0.0004146764687771999, 'samples': 7687168, 'steps': 15013, 'loss/train': 2.0138206481933594} +02/24/2022 21:01:06 - INFO - codeparrot_training - Step 15014: {'lr': 0.00041466415729947794, 'samples': 7687680, 'steps': 15014, 'loss/train': 1.6361883878707886} +02/24/2022 21:01:09 - INFO - codeparrot_training - Step 15015: {'lr': 0.0004146518451163871, 'samples': 7688192, 'steps': 15015, 'loss/train': 1.0572295188903809} +02/24/2022 21:01:15 - INFO - codeparrot_training - Step 15016: {'lr': 0.00041463953222798, 'samples': 7688704, 'steps': 15016, 'loss/train': 3.8093860149383545} +02/24/2022 21:01:18 - INFO - codeparrot_training - Step 15017: {'lr': 0.00041462721863430943, 'samples': 7689216, 'steps': 15017, 'loss/train': 1.4273110628128052} +02/24/2022 21:01:24 - INFO - codeparrot_training - Step 15018: {'lr': 0.0004146149043354281, 'samples': 7689728, 'steps': 15018, 'loss/train': 1.517775535583496} +02/24/2022 21:01:27 - INFO - codeparrot_training - Step 15019: {'lr': 0.0004146025893313888, 'samples': 7690240, 'steps': 15019, 'loss/train': 1.854677438735962} +02/24/2022 21:01:33 - INFO - codeparrot_training - Step 15020: {'lr': 0.00041459027362224433, 'samples': 7690752, 'steps': 15020, 'loss/train': 2.0060839653015137} +02/24/2022 21:01:36 - INFO - codeparrot_training - Step 15021: {'lr': 0.0004145779572080473, 'samples': 7691264, 'steps': 15021, 'loss/train': 2.086730718612671} +02/24/2022 21:01:42 - INFO - codeparrot_training - Step 15022: {'lr': 0.0004145656400888506, 'samples': 7691776, 'steps': 15022, 'loss/train': 2.4009227752685547} +02/24/2022 21:01:45 - INFO - codeparrot_training - Step 15023: {'lr': 0.000414553322264707, 'samples': 7692288, 'steps': 15023, 'loss/train': 1.1341274976730347} +02/24/2022 21:01:52 - INFO - codeparrot_training - Step 15024: {'lr': 0.00041454100373566915, 'samples': 7692800, 'steps': 15024, 'loss/train': 1.7949512004852295} +02/24/2022 21:01:55 - INFO - codeparrot_training - Step 15025: {'lr': 0.00041452868450178994, 'samples': 7693312, 'steps': 15025, 'loss/train': 1.8109724521636963} +02/24/2022 21:02:01 - INFO - codeparrot_training - Step 15026: {'lr': 0.00041451636456312207, 'samples': 7693824, 'steps': 15026, 'loss/train': 0.7747088670730591} +02/24/2022 21:02:04 - INFO - codeparrot_training - Step 15027: {'lr': 0.0004145040439197183, 'samples': 7694336, 'steps': 15027, 'loss/train': 2.311444044113159} +02/24/2022 21:02:10 - INFO - codeparrot_training - Step 15028: {'lr': 0.00041449172257163156, 'samples': 7694848, 'steps': 15028, 'loss/train': 2.1977365016937256} +02/24/2022 21:02:13 - INFO - codeparrot_training - Step 15029: {'lr': 0.00041447940051891435, 'samples': 7695360, 'steps': 15029, 'loss/train': 2.079603433609009} +02/24/2022 21:02:19 - INFO - codeparrot_training - Step 15030: {'lr': 0.00041446707776161975, 'samples': 7695872, 'steps': 15030, 'loss/train': 1.5588449239730835} +02/24/2022 21:02:24 - INFO - codeparrot_training - Step 15031: {'lr': 0.00041445475429980033, 'samples': 7696384, 'steps': 15031, 'loss/train': 0.8621600270271301} +02/24/2022 21:02:28 - INFO - codeparrot_training - Step 15032: {'lr': 0.000414442430133509, 'samples': 7696896, 'steps': 15032, 'loss/train': 3.114267110824585} +02/24/2022 21:02:33 - INFO - codeparrot_training - Step 15033: {'lr': 0.0004144301052627985, 'samples': 7697408, 'steps': 15033, 'loss/train': 1.6453732252120972} +02/24/2022 21:02:37 - INFO - codeparrot_training - Step 15034: {'lr': 0.00041441777968772165, 'samples': 7697920, 'steps': 15034, 'loss/train': 2.32963228225708} +02/24/2022 21:02:42 - INFO - codeparrot_training - Step 15035: {'lr': 0.00041440545340833124, 'samples': 7698432, 'steps': 15035, 'loss/train': 2.181037425994873} +02/24/2022 21:02:46 - INFO - codeparrot_training - Step 15036: {'lr': 0.00041439312642468007, 'samples': 7698944, 'steps': 15036, 'loss/train': 2.291114091873169} +02/24/2022 21:02:51 - INFO - codeparrot_training - Step 15037: {'lr': 0.000414380798736821, 'samples': 7699456, 'steps': 15037, 'loss/train': 2.567660093307495} +02/24/2022 21:02:55 - INFO - codeparrot_training - Step 15038: {'lr': 0.0004143684703448067, 'samples': 7699968, 'steps': 15038, 'loss/train': 2.418159246444702} +02/24/2022 21:03:01 - INFO - codeparrot_training - Step 15039: {'lr': 0.0004143561412486901, 'samples': 7700480, 'steps': 15039, 'loss/train': 1.9684832096099854} +02/24/2022 21:03:04 - INFO - codeparrot_training - Step 15040: {'lr': 0.00041434381144852395, 'samples': 7700992, 'steps': 15040, 'loss/train': 1.1164835691452026} +02/24/2022 21:03:10 - INFO - codeparrot_training - Step 15041: {'lr': 0.00041433148094436115, 'samples': 7701504, 'steps': 15041, 'loss/train': 2.4944045543670654} +02/24/2022 21:03:13 - INFO - codeparrot_training - Step 15042: {'lr': 0.0004143191497362544, 'samples': 7702016, 'steps': 15042, 'loss/train': 1.8259201049804688} +02/24/2022 21:03:19 - INFO - codeparrot_training - Step 15043: {'lr': 0.0004143068178242566, 'samples': 7702528, 'steps': 15043, 'loss/train': 5.458270072937012} +02/24/2022 21:03:23 - INFO - codeparrot_training - Step 15044: {'lr': 0.00041429448520842064, 'samples': 7703040, 'steps': 15044, 'loss/train': 1.0957911014556885} +02/24/2022 21:03:28 - INFO - codeparrot_training - Step 15045: {'lr': 0.00041428215188879926, 'samples': 7703552, 'steps': 15045, 'loss/train': 1.8701893091201782} +02/24/2022 21:03:32 - INFO - codeparrot_training - Step 15046: {'lr': 0.0004142698178654453, 'samples': 7704064, 'steps': 15046, 'loss/train': 1.3205199241638184} +02/24/2022 21:03:37 - INFO - codeparrot_training - Step 15047: {'lr': 0.0004142574831384115, 'samples': 7704576, 'steps': 15047, 'loss/train': 1.8461235761642456} +02/24/2022 21:03:41 - INFO - codeparrot_training - Step 15048: {'lr': 0.0004142451477077509, 'samples': 7705088, 'steps': 15048, 'loss/train': 2.698197841644287} +02/24/2022 21:03:47 - INFO - codeparrot_training - Step 15049: {'lr': 0.00041423281157351624, 'samples': 7705600, 'steps': 15049, 'loss/train': 2.1005477905273438} +02/24/2022 21:03:50 - INFO - codeparrot_training - Step 15050: {'lr': 0.00041422047473576033, 'samples': 7706112, 'steps': 15050, 'loss/train': 1.5033564567565918} +02/24/2022 21:03:56 - INFO - codeparrot_training - Step 15051: {'lr': 0.0004142081371945361, 'samples': 7706624, 'steps': 15051, 'loss/train': 2.7540948390960693} +02/24/2022 21:03:59 - INFO - codeparrot_training - Step 15052: {'lr': 0.00041419579894989633, 'samples': 7707136, 'steps': 15052, 'loss/train': 2.084900140762329} +02/24/2022 21:04:05 - INFO - codeparrot_training - Step 15053: {'lr': 0.0004141834600018939, 'samples': 7707648, 'steps': 15053, 'loss/train': 2.363438129425049} +02/24/2022 21:04:08 - INFO - codeparrot_training - Step 15054: {'lr': 0.00041417112035058157, 'samples': 7708160, 'steps': 15054, 'loss/train': 2.3568363189697266} +02/24/2022 21:04:14 - INFO - codeparrot_training - Step 15055: {'lr': 0.00041415877999601236, 'samples': 7708672, 'steps': 15055, 'loss/train': 1.926734447479248} +02/24/2022 21:04:17 - INFO - codeparrot_training - Step 15056: {'lr': 0.0004141464389382391, 'samples': 7709184, 'steps': 15056, 'loss/train': 2.082723379135132} +02/24/2022 21:04:24 - INFO - codeparrot_training - Step 15057: {'lr': 0.0004141340971773147, 'samples': 7709696, 'steps': 15057, 'loss/train': 2.5265214443206787} +02/24/2022 21:04:28 - INFO - codeparrot_training - Step 15058: {'lr': 0.00041412175471329174, 'samples': 7710208, 'steps': 15058, 'loss/train': 2.0780766010284424} +02/24/2022 21:04:33 - INFO - codeparrot_training - Step 15059: {'lr': 0.0004141094115462234, 'samples': 7710720, 'steps': 15059, 'loss/train': 0.3992597162723541} +02/24/2022 21:04:37 - INFO - codeparrot_training - Step 15060: {'lr': 0.00041409706767616246, 'samples': 7711232, 'steps': 15060, 'loss/train': 1.4950170516967773} +02/24/2022 21:04:42 - INFO - codeparrot_training - Step 15061: {'lr': 0.0004140847231031618, 'samples': 7711744, 'steps': 15061, 'loss/train': 1.5474239587783813} +02/24/2022 21:04:46 - INFO - codeparrot_training - Step 15062: {'lr': 0.00041407237782727427, 'samples': 7712256, 'steps': 15062, 'loss/train': 1.1488640308380127} +02/24/2022 21:04:51 - INFO - codeparrot_training - Step 15063: {'lr': 0.0004140600318485527, 'samples': 7712768, 'steps': 15063, 'loss/train': 1.7876847982406616} +02/24/2022 21:04:55 - INFO - codeparrot_training - Step 15064: {'lr': 0.0004140476851670502, 'samples': 7713280, 'steps': 15064, 'loss/train': 1.9944267272949219} +02/24/2022 21:05:00 - INFO - codeparrot_training - Step 15065: {'lr': 0.00041403533778281934, 'samples': 7713792, 'steps': 15065, 'loss/train': 1.2161877155303955} +02/24/2022 21:05:04 - INFO - codeparrot_training - Step 15066: {'lr': 0.0004140229896959132, 'samples': 7714304, 'steps': 15066, 'loss/train': 0.2673460841178894} +02/24/2022 21:05:07 - INFO - codeparrot_training - Step 15067: {'lr': 0.00041401064090638474, 'samples': 7714816, 'steps': 15067, 'loss/train': 2.6707763671875} +02/24/2022 21:05:13 - INFO - codeparrot_training - Step 15068: {'lr': 0.0004139982914142868, 'samples': 7715328, 'steps': 15068, 'loss/train': 2.6089210510253906} +02/24/2022 21:05:19 - INFO - codeparrot_training - Step 15069: {'lr': 0.00041398594121967215, 'samples': 7715840, 'steps': 15069, 'loss/train': 2.262225389480591} +02/24/2022 21:05:23 - INFO - codeparrot_training - Step 15070: {'lr': 0.0004139735903225939, 'samples': 7716352, 'steps': 15070, 'loss/train': 1.618273377418518} +02/24/2022 21:05:28 - INFO - codeparrot_training - Step 15071: {'lr': 0.0004139612387231048, 'samples': 7716864, 'steps': 15071, 'loss/train': 1.9760456085205078} +02/24/2022 21:05:32 - INFO - codeparrot_training - Step 15072: {'lr': 0.0004139488864212578, 'samples': 7717376, 'steps': 15072, 'loss/train': 1.9000672101974487} +02/24/2022 21:05:37 - INFO - codeparrot_training - Step 15073: {'lr': 0.0004139365334171059, 'samples': 7717888, 'steps': 15073, 'loss/train': 2.652770757675171} +02/24/2022 21:05:41 - INFO - codeparrot_training - Step 15074: {'lr': 0.0004139241797107019, 'samples': 7718400, 'steps': 15074, 'loss/train': 2.3371622562408447} +02/24/2022 21:05:46 - INFO - codeparrot_training - Step 15075: {'lr': 0.00041391182530209873, 'samples': 7718912, 'steps': 15075, 'loss/train': 1.8400214910507202} +02/24/2022 21:05:50 - INFO - codeparrot_training - Step 15076: {'lr': 0.0004138994701913494, 'samples': 7719424, 'steps': 15076, 'loss/train': 1.8306312561035156} +02/24/2022 21:05:56 - INFO - codeparrot_training - Step 15077: {'lr': 0.00041388711437850676, 'samples': 7719936, 'steps': 15077, 'loss/train': 1.7445478439331055} +02/24/2022 21:05:59 - INFO - codeparrot_training - Step 15078: {'lr': 0.00041387475786362386, 'samples': 7720448, 'steps': 15078, 'loss/train': 1.8092299699783325} +02/24/2022 21:06:05 - INFO - codeparrot_training - Step 15079: {'lr': 0.0004138624006467534, 'samples': 7720960, 'steps': 15079, 'loss/train': 2.0429253578186035} +02/24/2022 21:06:08 - INFO - codeparrot_training - Step 15080: {'lr': 0.00041385004272794846, 'samples': 7721472, 'steps': 15080, 'loss/train': 1.4502273797988892} +02/24/2022 21:06:14 - INFO - codeparrot_training - Step 15081: {'lr': 0.00041383768410726207, 'samples': 7721984, 'steps': 15081, 'loss/train': 1.8575890064239502} +02/24/2022 21:06:17 - INFO - codeparrot_training - Step 15082: {'lr': 0.000413825324784747, 'samples': 7722496, 'steps': 15082, 'loss/train': 2.518663167953491} +02/24/2022 21:06:23 - INFO - codeparrot_training - Step 15083: {'lr': 0.00041381296476045626, 'samples': 7723008, 'steps': 15083, 'loss/train': 1.9789882898330688} +02/24/2022 21:06:26 - INFO - codeparrot_training - Step 15084: {'lr': 0.0004138006040344428, 'samples': 7723520, 'steps': 15084, 'loss/train': 1.4686317443847656} +02/24/2022 21:06:33 - INFO - codeparrot_training - Step 15085: {'lr': 0.0004137882426067595, 'samples': 7724032, 'steps': 15085, 'loss/train': 1.7022567987442017} +02/24/2022 21:06:36 - INFO - codeparrot_training - Step 15086: {'lr': 0.0004137758804774594, 'samples': 7724544, 'steps': 15086, 'loss/train': 1.742958664894104} +02/24/2022 21:06:42 - INFO - codeparrot_training - Step 15087: {'lr': 0.0004137635176465955, 'samples': 7725056, 'steps': 15087, 'loss/train': 1.699378490447998} +02/24/2022 21:06:45 - INFO - codeparrot_training - Step 15088: {'lr': 0.00041375115411422064, 'samples': 7725568, 'steps': 15088, 'loss/train': 1.7454075813293457} +02/24/2022 21:06:51 - INFO - codeparrot_training - Step 15089: {'lr': 0.0004137387898803878, 'samples': 7726080, 'steps': 15089, 'loss/train': 1.2566828727722168} +02/24/2022 21:06:54 - INFO - codeparrot_training - Step 15090: {'lr': 0.0004137264249451501, 'samples': 7726592, 'steps': 15090, 'loss/train': 2.319958448410034} +02/24/2022 21:07:00 - INFO - codeparrot_training - Step 15091: {'lr': 0.00041371405930856026, 'samples': 7727104, 'steps': 15091, 'loss/train': 2.1931228637695312} +02/24/2022 21:07:03 - INFO - codeparrot_training - Step 15092: {'lr': 0.00041370169297067145, 'samples': 7727616, 'steps': 15092, 'loss/train': 3.3477466106414795} +02/24/2022 21:07:09 - INFO - codeparrot_training - Step 15093: {'lr': 0.0004136893259315365, 'samples': 7728128, 'steps': 15093, 'loss/train': 2.4416987895965576} +02/24/2022 21:07:12 - INFO - codeparrot_training - Step 15094: {'lr': 0.00041367695819120854, 'samples': 7728640, 'steps': 15094, 'loss/train': 1.09297776222229} +02/24/2022 21:07:19 - INFO - codeparrot_training - Step 15095: {'lr': 0.0004136645897497404, 'samples': 7729152, 'steps': 15095, 'loss/train': 0.9568027257919312} +02/24/2022 21:07:23 - INFO - codeparrot_training - Step 15096: {'lr': 0.0004136522206071852, 'samples': 7729664, 'steps': 15096, 'loss/train': 1.05661940574646} +02/24/2022 21:07:28 - INFO - codeparrot_training - Step 15097: {'lr': 0.0004136398507635958, 'samples': 7730176, 'steps': 15097, 'loss/train': 2.524338483810425} +02/24/2022 21:07:31 - INFO - codeparrot_training - Step 15098: {'lr': 0.00041362748021902526, 'samples': 7730688, 'steps': 15098, 'loss/train': 1.827979326248169} +02/24/2022 21:07:37 - INFO - codeparrot_training - Step 15099: {'lr': 0.0004136151089735265, 'samples': 7731200, 'steps': 15099, 'loss/train': 1.1120802164077759} +02/24/2022 21:07:41 - INFO - codeparrot_training - Step 15100: {'lr': 0.00041360273702715263, 'samples': 7731712, 'steps': 15100, 'loss/train': 1.7640358209609985} +02/24/2022 21:07:46 - INFO - codeparrot_training - Step 15101: {'lr': 0.0004135903643799566, 'samples': 7732224, 'steps': 15101, 'loss/train': 1.9734355211257935} +02/24/2022 21:07:50 - INFO - codeparrot_training - Step 15102: {'lr': 0.00041357799103199127, 'samples': 7732736, 'steps': 15102, 'loss/train': 2.2872936725616455} +02/24/2022 21:07:55 - INFO - codeparrot_training - Step 15103: {'lr': 0.00041356561698330984, 'samples': 7733248, 'steps': 15103, 'loss/train': 2.3059182167053223} +02/24/2022 21:07:59 - INFO - codeparrot_training - Step 15104: {'lr': 0.0004135532422339653, 'samples': 7733760, 'steps': 15104, 'loss/train': 1.5279898643493652} +02/24/2022 21:08:05 - INFO - codeparrot_training - Step 15105: {'lr': 0.00041354086678401056, 'samples': 7734272, 'steps': 15105, 'loss/train': 2.68605375289917} +02/24/2022 21:08:09 - INFO - codeparrot_training - Step 15106: {'lr': 0.00041352849063349865, 'samples': 7734784, 'steps': 15106, 'loss/train': 3.3742055892944336} +02/24/2022 21:08:14 - INFO - codeparrot_training - Step 15107: {'lr': 0.0004135161137824827, 'samples': 7735296, 'steps': 15107, 'loss/train': 2.2265923023223877} +02/24/2022 21:08:18 - INFO - codeparrot_training - Step 15108: {'lr': 0.0004135037362310155, 'samples': 7735808, 'steps': 15108, 'loss/train': 0.9467727541923523} +02/24/2022 21:08:23 - INFO - codeparrot_training - Step 15109: {'lr': 0.0004134913579791503, 'samples': 7736320, 'steps': 15109, 'loss/train': 1.1220277547836304} +02/24/2022 21:08:27 - INFO - codeparrot_training - Step 15110: {'lr': 0.00041347897902694003, 'samples': 7736832, 'steps': 15110, 'loss/train': 1.8450486660003662} +02/24/2022 21:08:32 - INFO - codeparrot_training - Step 15111: {'lr': 0.00041346659937443775, 'samples': 7737344, 'steps': 15111, 'loss/train': 2.6501808166503906} +02/24/2022 21:08:36 - INFO - codeparrot_training - Step 15112: {'lr': 0.00041345421902169645, 'samples': 7737856, 'steps': 15112, 'loss/train': 0.6919535994529724} +02/24/2022 21:08:42 - INFO - codeparrot_training - Step 15113: {'lr': 0.0004134418379687691, 'samples': 7738368, 'steps': 15113, 'loss/train': 2.2594401836395264} +02/24/2022 21:08:45 - INFO - codeparrot_training - Step 15114: {'lr': 0.0004134294562157089, 'samples': 7738880, 'steps': 15114, 'loss/train': 1.914897084236145} +02/24/2022 21:08:51 - INFO - codeparrot_training - Step 15115: {'lr': 0.00041341707376256877, 'samples': 7739392, 'steps': 15115, 'loss/train': 2.244562864303589} +02/24/2022 21:08:55 - INFO - codeparrot_training - Step 15116: {'lr': 0.00041340469060940183, 'samples': 7739904, 'steps': 15116, 'loss/train': 3.3928701877593994} +02/24/2022 21:09:00 - INFO - codeparrot_training - Step 15117: {'lr': 0.0004133923067562611, 'samples': 7740416, 'steps': 15117, 'loss/train': 1.800842523574829} +02/24/2022 21:09:04 - INFO - codeparrot_training - Step 15118: {'lr': 0.0004133799222031995, 'samples': 7740928, 'steps': 15118, 'loss/train': 2.7789714336395264} +02/24/2022 21:09:10 - INFO - codeparrot_training - Step 15119: {'lr': 0.0004133675369502703, 'samples': 7741440, 'steps': 15119, 'loss/train': 1.9856935739517212} +02/24/2022 21:09:13 - INFO - codeparrot_training - Step 15120: {'lr': 0.0004133551509975264, 'samples': 7741952, 'steps': 15120, 'loss/train': 0.8502724170684814} +02/24/2022 21:09:19 - INFO - codeparrot_training - Step 15121: {'lr': 0.0004133427643450209, 'samples': 7742464, 'steps': 15121, 'loss/train': 0.7499862909317017} +02/24/2022 21:09:22 - INFO - codeparrot_training - Step 15122: {'lr': 0.0004133303769928068, 'samples': 7742976, 'steps': 15122, 'loss/train': 2.680826425552368} +02/24/2022 21:09:28 - INFO - codeparrot_training - Step 15123: {'lr': 0.00041331798894093735, 'samples': 7743488, 'steps': 15123, 'loss/train': 1.6448231935501099} +02/24/2022 21:09:31 - INFO - codeparrot_training - Step 15124: {'lr': 0.0004133056001894655, 'samples': 7744000, 'steps': 15124, 'loss/train': 2.183262586593628} +02/24/2022 21:09:37 - INFO - codeparrot_training - Step 15125: {'lr': 0.0004132932107384442, 'samples': 7744512, 'steps': 15125, 'loss/train': 2.57751727104187} +02/24/2022 21:09:40 - INFO - codeparrot_training - Step 15126: {'lr': 0.0004132808205879267, 'samples': 7745024, 'steps': 15126, 'loss/train': 2.0060176849365234} +02/24/2022 21:09:46 - INFO - codeparrot_training - Step 15127: {'lr': 0.000413268429737966, 'samples': 7745536, 'steps': 15127, 'loss/train': 1.2702980041503906} +02/24/2022 21:09:49 - INFO - codeparrot_training - Step 15128: {'lr': 0.00041325603818861517, 'samples': 7746048, 'steps': 15128, 'loss/train': 1.5013447999954224} +02/24/2022 21:09:55 - INFO - codeparrot_training - Step 15129: {'lr': 0.00041324364593992735, 'samples': 7746560, 'steps': 15129, 'loss/train': 2.2891414165496826} +02/24/2022 21:09:58 - INFO - codeparrot_training - Step 15130: {'lr': 0.00041323125299195563, 'samples': 7747072, 'steps': 15130, 'loss/train': 0.6653809547424316} +02/24/2022 21:10:04 - INFO - codeparrot_training - Step 15131: {'lr': 0.000413218859344753, 'samples': 7747584, 'steps': 15131, 'loss/train': 0.8936456441879272} +02/24/2022 21:10:08 - INFO - codeparrot_training - Step 15132: {'lr': 0.00041320646499837254, 'samples': 7748096, 'steps': 15132, 'loss/train': 2.230276584625244} +02/24/2022 21:10:13 - INFO - codeparrot_training - Step 15133: {'lr': 0.00041319406995286753, 'samples': 7748608, 'steps': 15133, 'loss/train': 2.176027536392212} +02/24/2022 21:10:17 - INFO - codeparrot_training - Step 15134: {'lr': 0.0004131816742082909, 'samples': 7749120, 'steps': 15134, 'loss/train': 1.561685562133789} +02/24/2022 21:10:22 - INFO - codeparrot_training - Step 15135: {'lr': 0.00041316927776469575, 'samples': 7749632, 'steps': 15135, 'loss/train': 3.352414846420288} +02/24/2022 21:10:26 - INFO - codeparrot_training - Step 15136: {'lr': 0.00041315688062213524, 'samples': 7750144, 'steps': 15136, 'loss/train': 2.368332624435425} +02/24/2022 21:10:31 - INFO - codeparrot_training - Step 15137: {'lr': 0.0004131444827806625, 'samples': 7750656, 'steps': 15137, 'loss/train': 1.6380267143249512} +02/24/2022 21:10:35 - INFO - codeparrot_training - Step 15138: {'lr': 0.00041313208424033056, 'samples': 7751168, 'steps': 15138, 'loss/train': 0.9953458309173584} +02/24/2022 21:10:41 - INFO - codeparrot_training - Step 15139: {'lr': 0.0004131196850011926, 'samples': 7751680, 'steps': 15139, 'loss/train': 1.9391264915466309} +02/24/2022 21:10:44 - INFO - codeparrot_training - Step 15140: {'lr': 0.0004131072850633017, 'samples': 7752192, 'steps': 15140, 'loss/train': 2.290933132171631} +02/24/2022 21:10:51 - INFO - codeparrot_training - Step 15141: {'lr': 0.00041309488442671093, 'samples': 7752704, 'steps': 15141, 'loss/train': 2.3020575046539307} +02/24/2022 21:10:54 - INFO - codeparrot_training - Step 15142: {'lr': 0.00041308248309147356, 'samples': 7753216, 'steps': 15142, 'loss/train': 2.1128060817718506} +02/24/2022 21:10:59 - INFO - codeparrot_training - Step 15143: {'lr': 0.00041307008105764256, 'samples': 7753728, 'steps': 15143, 'loss/train': 2.1988351345062256} +02/24/2022 21:11:03 - INFO - codeparrot_training - Step 15144: {'lr': 0.0004130576783252712, 'samples': 7754240, 'steps': 15144, 'loss/train': 2.371443510055542} +02/24/2022 21:11:09 - INFO - codeparrot_training - Step 15145: {'lr': 0.00041304527489441237, 'samples': 7754752, 'steps': 15145, 'loss/train': 1.2331807613372803} +02/24/2022 21:11:12 - INFO - codeparrot_training - Step 15146: {'lr': 0.0004130328707651195, 'samples': 7755264, 'steps': 15146, 'loss/train': 0.9676528573036194} +02/24/2022 21:11:18 - INFO - codeparrot_training - Step 15147: {'lr': 0.00041302046593744547, 'samples': 7755776, 'steps': 15147, 'loss/train': 1.0516717433929443} +02/24/2022 21:11:21 - INFO - codeparrot_training - Step 15148: {'lr': 0.00041300806041144356, 'samples': 7756288, 'steps': 15148, 'loss/train': 1.3651198148727417} +02/24/2022 21:11:27 - INFO - codeparrot_training - Step 15149: {'lr': 0.0004129956541871669, 'samples': 7756800, 'steps': 15149, 'loss/train': 1.8809645175933838} +02/24/2022 21:11:30 - INFO - codeparrot_training - Step 15150: {'lr': 0.00041298324726466855, 'samples': 7757312, 'steps': 15150, 'loss/train': 1.4244515895843506} +02/24/2022 21:11:36 - INFO - codeparrot_training - Step 15151: {'lr': 0.0004129708396440018, 'samples': 7757824, 'steps': 15151, 'loss/train': 1.4894310235977173} +02/24/2022 21:11:40 - INFO - codeparrot_training - Step 15152: {'lr': 0.00041295843132521973, 'samples': 7758336, 'steps': 15152, 'loss/train': 2.0084636211395264} +02/24/2022 21:11:45 - INFO - codeparrot_training - Step 15153: {'lr': 0.0004129460223083754, 'samples': 7758848, 'steps': 15153, 'loss/train': 1.8689044713974} +02/24/2022 21:11:49 - INFO - codeparrot_training - Step 15154: {'lr': 0.0004129336125935221, 'samples': 7759360, 'steps': 15154, 'loss/train': 0.3137350380420685} +02/24/2022 21:11:54 - INFO - codeparrot_training - Step 15155: {'lr': 0.000412921202180713, 'samples': 7759872, 'steps': 15155, 'loss/train': 1.7021143436431885} +02/24/2022 21:11:58 - INFO - codeparrot_training - Step 15156: {'lr': 0.00041290879107000114, 'samples': 7760384, 'steps': 15156, 'loss/train': 1.8608148097991943} +02/24/2022 21:12:03 - INFO - codeparrot_training - Step 15157: {'lr': 0.00041289637926143974, 'samples': 7760896, 'steps': 15157, 'loss/train': 2.5681891441345215} +02/24/2022 21:12:07 - INFO - codeparrot_training - Step 15158: {'lr': 0.000412883966755082, 'samples': 7761408, 'steps': 15158, 'loss/train': 2.3331186771392822} +02/24/2022 21:12:12 - INFO - codeparrot_training - Step 15159: {'lr': 0.000412871553550981, 'samples': 7761920, 'steps': 15159, 'loss/train': 1.8314324617385864} +02/24/2022 21:12:16 - INFO - codeparrot_training - Step 15160: {'lr': 0.00041285913964919006, 'samples': 7762432, 'steps': 15160, 'loss/train': 2.451927661895752} +02/24/2022 21:12:22 - INFO - codeparrot_training - Step 15161: {'lr': 0.0004128467250497623, 'samples': 7762944, 'steps': 15161, 'loss/train': 8.79805850982666} +02/24/2022 21:12:26 - INFO - codeparrot_training - Step 15162: {'lr': 0.00041283430975275085, 'samples': 7763456, 'steps': 15162, 'loss/train': 1.1373577117919922} +02/24/2022 21:12:31 - INFO - codeparrot_training - Step 15163: {'lr': 0.0004128218937582089, 'samples': 7763968, 'steps': 15163, 'loss/train': 1.5468223094940186} +02/24/2022 21:12:35 - INFO - codeparrot_training - Step 15164: {'lr': 0.00041280947706618965, 'samples': 7764480, 'steps': 15164, 'loss/train': 2.591919183731079} +02/24/2022 21:12:40 - INFO - codeparrot_training - Step 15165: {'lr': 0.00041279705967674636, 'samples': 7764992, 'steps': 15165, 'loss/train': 2.084432363510132} +02/24/2022 21:12:44 - INFO - codeparrot_training - Step 15166: {'lr': 0.00041278464158993214, 'samples': 7765504, 'steps': 15166, 'loss/train': 2.1277060508728027} +02/24/2022 21:12:49 - INFO - codeparrot_training - Step 15167: {'lr': 0.0004127722228058002, 'samples': 7766016, 'steps': 15167, 'loss/train': 1.946176528930664} +02/24/2022 21:12:53 - INFO - codeparrot_training - Step 15168: {'lr': 0.0004127598033244037, 'samples': 7766528, 'steps': 15168, 'loss/train': 1.6518677473068237} +02/24/2022 21:12:58 - INFO - codeparrot_training - Step 15169: {'lr': 0.0004127473831457959, 'samples': 7767040, 'steps': 15169, 'loss/train': 2.1408612728118896} +02/24/2022 21:13:02 - INFO - codeparrot_training - Step 15170: {'lr': 0.00041273496227003004, 'samples': 7767552, 'steps': 15170, 'loss/train': 1.7052603960037231} +02/24/2022 21:13:08 - INFO - codeparrot_training - Step 15171: {'lr': 0.0004127225406971592, 'samples': 7768064, 'steps': 15171, 'loss/train': 2.443181276321411} +02/24/2022 21:13:11 - INFO - codeparrot_training - Step 15172: {'lr': 0.00041271011842723676, 'samples': 7768576, 'steps': 15172, 'loss/train': 0.3734479546546936} +02/24/2022 21:13:17 - INFO - codeparrot_training - Step 15173: {'lr': 0.00041269769546031576, 'samples': 7769088, 'steps': 15173, 'loss/train': 1.757319688796997} +02/24/2022 21:13:20 - INFO - codeparrot_training - Step 15174: {'lr': 0.0004126852717964495, 'samples': 7769600, 'steps': 15174, 'loss/train': 2.8355581760406494} +02/24/2022 21:13:26 - INFO - codeparrot_training - Step 15175: {'lr': 0.0004126728474356912, 'samples': 7770112, 'steps': 15175, 'loss/train': 3.0845136642456055} +02/24/2022 21:13:29 - INFO - codeparrot_training - Step 15176: {'lr': 0.0004126604223780941, 'samples': 7770624, 'steps': 15176, 'loss/train': 1.1025235652923584} +02/24/2022 21:13:35 - INFO - codeparrot_training - Step 15177: {'lr': 0.00041264799662371144, 'samples': 7771136, 'steps': 15177, 'loss/train': 2.0873119831085205} +02/24/2022 21:13:38 - INFO - codeparrot_training - Step 15178: {'lr': 0.0004126355701725963, 'samples': 7771648, 'steps': 15178, 'loss/train': 1.4091343879699707} +02/24/2022 21:13:44 - INFO - codeparrot_training - Step 15179: {'lr': 0.00041262314302480216, 'samples': 7772160, 'steps': 15179, 'loss/train': 2.303143262863159} +02/24/2022 21:13:47 - INFO - codeparrot_training - Step 15180: {'lr': 0.000412610715180382, 'samples': 7772672, 'steps': 15180, 'loss/train': 1.3482143878936768} +02/24/2022 21:13:53 - INFO - codeparrot_training - Step 15181: {'lr': 0.0004125982866393892, 'samples': 7773184, 'steps': 15181, 'loss/train': 0.5258373022079468} +02/24/2022 21:13:56 - INFO - codeparrot_training - Step 15182: {'lr': 0.0004125858574018769, 'samples': 7773696, 'steps': 15182, 'loss/train': 0.338752806186676} +02/24/2022 21:14:02 - INFO - codeparrot_training - Step 15183: {'lr': 0.0004125734274678986, 'samples': 7774208, 'steps': 15183, 'loss/train': 3.331299066543579} +02/24/2022 21:14:06 - INFO - codeparrot_training - Step 15184: {'lr': 0.0004125609968375072, 'samples': 7774720, 'steps': 15184, 'loss/train': 1.6494851112365723} +02/24/2022 21:14:11 - INFO - codeparrot_training - Step 15185: {'lr': 0.00041254856551075616, 'samples': 7775232, 'steps': 15185, 'loss/train': 2.7761707305908203} +02/24/2022 21:14:15 - INFO - codeparrot_training - Step 15186: {'lr': 0.0004125361334876987, 'samples': 7775744, 'steps': 15186, 'loss/train': 2.21018648147583} +02/24/2022 21:14:21 - INFO - codeparrot_training - Step 15187: {'lr': 0.000412523700768388, 'samples': 7776256, 'steps': 15187, 'loss/train': 0.8293166160583496} +02/24/2022 21:14:24 - INFO - codeparrot_training - Step 15188: {'lr': 0.0004125112673528775, 'samples': 7776768, 'steps': 15188, 'loss/train': 0.7854377031326294} +02/24/2022 21:14:30 - INFO - codeparrot_training - Step 15189: {'lr': 0.0004124988332412202, 'samples': 7777280, 'steps': 15189, 'loss/train': 1.8319604396820068} +02/24/2022 21:14:33 - INFO - codeparrot_training - Step 15190: {'lr': 0.00041248639843346953, 'samples': 7777792, 'steps': 15190, 'loss/train': 2.1161608695983887} +02/24/2022 21:14:39 - INFO - codeparrot_training - Step 15191: {'lr': 0.0004124739629296787, 'samples': 7778304, 'steps': 15191, 'loss/train': 2.0222506523132324} +02/24/2022 21:14:42 - INFO - codeparrot_training - Step 15192: {'lr': 0.00041246152672990105, 'samples': 7778816, 'steps': 15192, 'loss/train': 1.7895108461380005} +02/24/2022 21:14:48 - INFO - codeparrot_training - Step 15193: {'lr': 0.00041244908983418985, 'samples': 7779328, 'steps': 15193, 'loss/train': 1.4329543113708496} +02/24/2022 21:14:51 - INFO - codeparrot_training - Step 15194: {'lr': 0.0004124366522425982, 'samples': 7779840, 'steps': 15194, 'loss/train': 2.520211696624756} +02/24/2022 21:14:57 - INFO - codeparrot_training - Step 15195: {'lr': 0.0004124242139551796, 'samples': 7780352, 'steps': 15195, 'loss/train': 1.280103087425232} +02/24/2022 21:15:00 - INFO - codeparrot_training - Step 15196: {'lr': 0.00041241177497198725, 'samples': 7780864, 'steps': 15196, 'loss/train': 1.65938401222229} +02/24/2022 21:15:07 - INFO - codeparrot_training - Step 15197: {'lr': 0.00041239933529307437, 'samples': 7781376, 'steps': 15197, 'loss/train': 1.9791457653045654} +02/24/2022 21:15:10 - INFO - codeparrot_training - Step 15198: {'lr': 0.00041238689491849434, 'samples': 7781888, 'steps': 15198, 'loss/train': 1.8962838649749756} +02/24/2022 21:15:16 - INFO - codeparrot_training - Step 15199: {'lr': 0.00041237445384830043, 'samples': 7782400, 'steps': 15199, 'loss/train': 1.5832501649856567} +02/24/2022 21:15:19 - INFO - codeparrot_training - Step 15200: {'lr': 0.0004123620120825459, 'samples': 7782912, 'steps': 15200, 'loss/train': 2.6603612899780273} +02/24/2022 21:15:25 - INFO - codeparrot_training - Step 15201: {'lr': 0.0004123495696212841, 'samples': 7783424, 'steps': 15201, 'loss/train': 1.180666208267212} +02/24/2022 21:15:28 - INFO - codeparrot_training - Step 15202: {'lr': 0.00041233712646456823, 'samples': 7783936, 'steps': 15202, 'loss/train': 2.764810085296631} +02/24/2022 21:15:34 - INFO - codeparrot_training - Step 15203: {'lr': 0.0004123246826124517, 'samples': 7784448, 'steps': 15203, 'loss/train': 1.659140706062317} +02/24/2022 21:15:37 - INFO - codeparrot_training - Step 15204: {'lr': 0.00041231223806498777, 'samples': 7784960, 'steps': 15204, 'loss/train': 2.4636311531066895} +02/24/2022 21:15:43 - INFO - codeparrot_training - Step 15205: {'lr': 0.0004122997928222298, 'samples': 7785472, 'steps': 15205, 'loss/train': 1.4005694389343262} +02/24/2022 21:15:46 - INFO - codeparrot_training - Step 15206: {'lr': 0.000412287346884231, 'samples': 7785984, 'steps': 15206, 'loss/train': 3.509793758392334} +02/24/2022 21:15:52 - INFO - codeparrot_training - Step 15207: {'lr': 0.00041227490025104474, 'samples': 7786496, 'steps': 15207, 'loss/train': 2.6965408325195312} +02/24/2022 21:15:56 - INFO - codeparrot_training - Step 15208: {'lr': 0.00041226245292272433, 'samples': 7787008, 'steps': 15208, 'loss/train': 2.2454934120178223} +02/24/2022 21:16:02 - INFO - codeparrot_training - Step 15209: {'lr': 0.00041225000489932315, 'samples': 7787520, 'steps': 15209, 'loss/train': 2.4381909370422363} +02/24/2022 21:16:05 - INFO - codeparrot_training - Step 15210: {'lr': 0.00041223755618089445, 'samples': 7788032, 'steps': 15210, 'loss/train': 0.518037736415863} +02/24/2022 21:16:11 - INFO - codeparrot_training - Step 15211: {'lr': 0.0004122251067674915, 'samples': 7788544, 'steps': 15211, 'loss/train': 1.7804791927337646} +02/24/2022 21:16:14 - INFO - codeparrot_training - Step 15212: {'lr': 0.00041221265665916776, 'samples': 7789056, 'steps': 15212, 'loss/train': 3.487273931503296} +02/24/2022 21:16:20 - INFO - codeparrot_training - Step 15213: {'lr': 0.0004122002058559765, 'samples': 7789568, 'steps': 15213, 'loss/train': 1.683398723602295} +02/24/2022 21:16:23 - INFO - codeparrot_training - Step 15214: {'lr': 0.00041218775435797106, 'samples': 7790080, 'steps': 15214, 'loss/train': 2.7925148010253906} +02/24/2022 21:16:29 - INFO - codeparrot_training - Step 15215: {'lr': 0.0004121753021652048, 'samples': 7790592, 'steps': 15215, 'loss/train': 3.0459232330322266} +02/24/2022 21:16:32 - INFO - codeparrot_training - Step 15216: {'lr': 0.0004121628492777311, 'samples': 7791104, 'steps': 15216, 'loss/train': 2.6547303199768066} +02/24/2022 21:16:38 - INFO - codeparrot_training - Step 15217: {'lr': 0.0004121503956956031, 'samples': 7791616, 'steps': 15217, 'loss/train': 2.400583267211914} +02/24/2022 21:16:41 - INFO - codeparrot_training - Step 15218: {'lr': 0.0004121379414188744, 'samples': 7792128, 'steps': 15218, 'loss/train': 2.671135902404785} +02/24/2022 21:16:47 - INFO - codeparrot_training - Step 15219: {'lr': 0.0004121254864475982, 'samples': 7792640, 'steps': 15219, 'loss/train': 2.5322554111480713} +02/24/2022 21:16:51 - INFO - codeparrot_training - Step 15220: {'lr': 0.0004121130307818279, 'samples': 7793152, 'steps': 15220, 'loss/train': 3.069416046142578} +02/24/2022 21:16:57 - INFO - codeparrot_training - Step 15221: {'lr': 0.00041210057442161687, 'samples': 7793664, 'steps': 15221, 'loss/train': 2.3191070556640625} +02/24/2022 21:17:00 - INFO - codeparrot_training - Step 15222: {'lr': 0.0004120881173670184, 'samples': 7794176, 'steps': 15222, 'loss/train': 1.047019600868225} +02/24/2022 21:17:06 - INFO - codeparrot_training - Step 15223: {'lr': 0.000412075659618086, 'samples': 7794688, 'steps': 15223, 'loss/train': 2.5615899562835693} +02/24/2022 21:17:09 - INFO - codeparrot_training - Step 15224: {'lr': 0.0004120632011748728, 'samples': 7795200, 'steps': 15224, 'loss/train': 1.7023859024047852} +02/24/2022 21:17:15 - INFO - codeparrot_training - Step 15225: {'lr': 0.00041205074203743244, 'samples': 7795712, 'steps': 15225, 'loss/train': 0.7751212120056152} +02/24/2022 21:17:18 - INFO - codeparrot_training - Step 15226: {'lr': 0.00041203828220581805, 'samples': 7796224, 'steps': 15226, 'loss/train': 2.151682138442993} +02/24/2022 21:17:24 - INFO - codeparrot_training - Step 15227: {'lr': 0.00041202582168008324, 'samples': 7796736, 'steps': 15227, 'loss/train': 2.54848051071167} +02/24/2022 21:17:27 - INFO - codeparrot_training - Step 15228: {'lr': 0.00041201336046028117, 'samples': 7797248, 'steps': 15228, 'loss/train': 2.7814154624938965} +02/24/2022 21:17:33 - INFO - codeparrot_training - Step 15229: {'lr': 0.0004120008985464654, 'samples': 7797760, 'steps': 15229, 'loss/train': 0.9572920799255371} +02/24/2022 21:17:36 - INFO - codeparrot_training - Step 15230: {'lr': 0.0004119884359386891, 'samples': 7798272, 'steps': 15230, 'loss/train': 2.5589332580566406} +02/24/2022 21:17:42 - INFO - codeparrot_training - Step 15231: {'lr': 0.0004119759726370058, 'samples': 7798784, 'steps': 15231, 'loss/train': 2.0106046199798584} +02/24/2022 21:17:45 - INFO - codeparrot_training - Step 15232: {'lr': 0.0004119635086414689, 'samples': 7799296, 'steps': 15232, 'loss/train': 1.7771512269973755} +02/24/2022 21:17:52 - INFO - codeparrot_training - Step 15233: {'lr': 0.0004119510439521318, 'samples': 7799808, 'steps': 15233, 'loss/train': 1.5864630937576294} +02/24/2022 21:17:55 - INFO - codeparrot_training - Step 15234: {'lr': 0.0004119385785690478, 'samples': 7800320, 'steps': 15234, 'loss/train': 1.471492052078247} +02/24/2022 21:18:01 - INFO - codeparrot_training - Step 15235: {'lr': 0.0004119261124922703, 'samples': 7800832, 'steps': 15235, 'loss/train': 1.107556700706482} +02/24/2022 21:18:04 - INFO - codeparrot_training - Step 15236: {'lr': 0.00041191364572185286, 'samples': 7801344, 'steps': 15236, 'loss/train': 1.073188066482544} +02/24/2022 21:18:10 - INFO - codeparrot_training - Step 15237: {'lr': 0.0004119011782578487, 'samples': 7801856, 'steps': 15237, 'loss/train': 2.5408918857574463} +02/24/2022 21:18:13 - INFO - codeparrot_training - Step 15238: {'lr': 0.00041188871010031135, 'samples': 7802368, 'steps': 15238, 'loss/train': 2.212597370147705} +02/24/2022 21:18:19 - INFO - codeparrot_training - Step 15239: {'lr': 0.0004118762412492941, 'samples': 7802880, 'steps': 15239, 'loss/train': 1.766115665435791} +02/24/2022 21:18:22 - INFO - codeparrot_training - Step 15240: {'lr': 0.00041186377170485057, 'samples': 7803392, 'steps': 15240, 'loss/train': 1.3296129703521729} +02/24/2022 21:18:28 - INFO - codeparrot_training - Step 15241: {'lr': 0.00041185130146703387, 'samples': 7803904, 'steps': 15241, 'loss/train': 2.002227306365967} +02/24/2022 21:18:31 - INFO - codeparrot_training - Step 15242: {'lr': 0.0004118388305358977, 'samples': 7804416, 'steps': 15242, 'loss/train': 2.3698153495788574} +02/24/2022 21:18:37 - INFO - codeparrot_training - Step 15243: {'lr': 0.0004118263589114953, 'samples': 7804928, 'steps': 15243, 'loss/train': 1.9070606231689453} +02/24/2022 21:18:41 - INFO - codeparrot_training - Step 15244: {'lr': 0.00041181388659388026, 'samples': 7805440, 'steps': 15244, 'loss/train': 1.0141035318374634} +02/24/2022 21:18:46 - INFO - codeparrot_training - Step 15245: {'lr': 0.00041180141358310586, 'samples': 7805952, 'steps': 15245, 'loss/train': 2.0231447219848633} +02/24/2022 21:18:50 - INFO - codeparrot_training - Step 15246: {'lr': 0.00041178893987922556, 'samples': 7806464, 'steps': 15246, 'loss/train': 2.5819156169891357} +02/24/2022 21:18:55 - INFO - codeparrot_training - Step 15247: {'lr': 0.0004117764654822929, 'samples': 7806976, 'steps': 15247, 'loss/train': 1.5870237350463867} +02/24/2022 21:18:59 - INFO - codeparrot_training - Step 15248: {'lr': 0.0004117639903923611, 'samples': 7807488, 'steps': 15248, 'loss/train': 2.968144416809082} +02/24/2022 21:19:04 - INFO - codeparrot_training - Step 15249: {'lr': 0.0004117515146094838, 'samples': 7808000, 'steps': 15249, 'loss/train': 2.2506957054138184} +02/24/2022 21:19:08 - INFO - codeparrot_training - Step 15250: {'lr': 0.0004117390381337144, 'samples': 7808512, 'steps': 15250, 'loss/train': 2.5934667587280273} +02/24/2022 21:19:14 - INFO - codeparrot_training - Step 15251: {'lr': 0.00041172656096510624, 'samples': 7809024, 'steps': 15251, 'loss/train': 1.9710956811904907} +02/24/2022 21:19:17 - INFO - codeparrot_training - Step 15252: {'lr': 0.0004117140831037129, 'samples': 7809536, 'steps': 15252, 'loss/train': 3.5833899974823} +02/24/2022 21:19:23 - INFO - codeparrot_training - Step 15253: {'lr': 0.00041170160454958785, 'samples': 7810048, 'steps': 15253, 'loss/train': 1.8081755638122559} +02/24/2022 21:19:26 - INFO - codeparrot_training - Step 15254: {'lr': 0.00041168912530278434, 'samples': 7810560, 'steps': 15254, 'loss/train': 0.6070297360420227} +02/24/2022 21:19:32 - INFO - codeparrot_training - Step 15255: {'lr': 0.00041167664536335605, 'samples': 7811072, 'steps': 15255, 'loss/train': 2.3863115310668945} +02/24/2022 21:19:36 - INFO - codeparrot_training - Step 15256: {'lr': 0.0004116641647313563, 'samples': 7811584, 'steps': 15256, 'loss/train': 1.7708550691604614} +02/24/2022 21:19:41 - INFO - codeparrot_training - Step 15257: {'lr': 0.00041165168340683857, 'samples': 7812096, 'steps': 15257, 'loss/train': 1.7683138847351074} +02/24/2022 21:19:45 - INFO - codeparrot_training - Step 15258: {'lr': 0.0004116392013898564, 'samples': 7812608, 'steps': 15258, 'loss/train': 1.6458642482757568} +02/24/2022 21:19:50 - INFO - codeparrot_training - Step 15259: {'lr': 0.0004116267186804632, 'samples': 7813120, 'steps': 15259, 'loss/train': 1.3484008312225342} +02/24/2022 21:19:54 - INFO - codeparrot_training - Step 15260: {'lr': 0.0004116142352787125, 'samples': 7813632, 'steps': 15260, 'loss/train': 2.22202467918396} +02/24/2022 21:19:59 - INFO - codeparrot_training - Step 15261: {'lr': 0.0004116017511846577, 'samples': 7814144, 'steps': 15261, 'loss/train': 0.2391817569732666} +02/24/2022 21:20:03 - INFO - codeparrot_training - Step 15262: {'lr': 0.00041158926639835234, 'samples': 7814656, 'steps': 15262, 'loss/train': 2.558009386062622} +02/24/2022 21:20:08 - INFO - codeparrot_training - Step 15263: {'lr': 0.00041157678091984987, 'samples': 7815168, 'steps': 15263, 'loss/train': 2.524418354034424} +02/24/2022 21:20:12 - INFO - codeparrot_training - Step 15264: {'lr': 0.0004115642947492038, 'samples': 7815680, 'steps': 15264, 'loss/train': 1.3695374727249146} +02/24/2022 21:20:17 - INFO - codeparrot_training - Step 15265: {'lr': 0.0004115518078864675, 'samples': 7816192, 'steps': 15265, 'loss/train': 2.512526512145996} +02/24/2022 21:20:21 - INFO - codeparrot_training - Step 15266: {'lr': 0.0004115393203316946, 'samples': 7816704, 'steps': 15266, 'loss/train': 1.702041745185852} +02/24/2022 21:20:26 - INFO - codeparrot_training - Step 15267: {'lr': 0.00041152683208493855, 'samples': 7817216, 'steps': 15267, 'loss/train': 2.142240524291992} +02/24/2022 21:20:30 - INFO - codeparrot_training - Step 15268: {'lr': 0.0004115143431462529, 'samples': 7817728, 'steps': 15268, 'loss/train': 1.0065242052078247} +02/24/2022 21:20:36 - INFO - codeparrot_training - Step 15269: {'lr': 0.000411501853515691, 'samples': 7818240, 'steps': 15269, 'loss/train': 1.8284953832626343} +02/24/2022 21:20:39 - INFO - codeparrot_training - Step 15270: {'lr': 0.00041148936319330656, 'samples': 7818752, 'steps': 15270, 'loss/train': 1.7723549604415894} +02/24/2022 21:20:45 - INFO - codeparrot_training - Step 15271: {'lr': 0.0004114768721791529, 'samples': 7819264, 'steps': 15271, 'loss/train': 1.4771944284439087} +02/24/2022 21:20:48 - INFO - codeparrot_training - Step 15272: {'lr': 0.00041146438047328347, 'samples': 7819776, 'steps': 15272, 'loss/train': 2.116975784301758} +02/24/2022 21:20:54 - INFO - codeparrot_training - Step 15273: {'lr': 0.00041145188807575206, 'samples': 7820288, 'steps': 15273, 'loss/train': 2.1834287643432617} +02/24/2022 21:20:57 - INFO - codeparrot_training - Step 15274: {'lr': 0.000411439394986612, 'samples': 7820800, 'steps': 15274, 'loss/train': 0.97948157787323} +02/24/2022 21:21:03 - INFO - codeparrot_training - Step 15275: {'lr': 0.00041142690120591686, 'samples': 7821312, 'steps': 15275, 'loss/train': 3.0842692852020264} +02/24/2022 21:21:06 - INFO - codeparrot_training - Step 15276: {'lr': 0.0004114144067337201, 'samples': 7821824, 'steps': 15276, 'loss/train': 1.8898016214370728} +02/24/2022 21:21:12 - INFO - codeparrot_training - Step 15277: {'lr': 0.0004114019115700752, 'samples': 7822336, 'steps': 15277, 'loss/train': 1.829763650894165} +02/24/2022 21:21:15 - INFO - codeparrot_training - Step 15278: {'lr': 0.00041138941571503587, 'samples': 7822848, 'steps': 15278, 'loss/train': 1.9355000257492065} +02/24/2022 21:21:21 - INFO - codeparrot_training - Step 15279: {'lr': 0.0004113769191686555, 'samples': 7823360, 'steps': 15279, 'loss/train': 1.2846788167953491} +02/24/2022 21:21:24 - INFO - codeparrot_training - Step 15280: {'lr': 0.00041136442193098765, 'samples': 7823872, 'steps': 15280, 'loss/train': 2.2391481399536133} +02/24/2022 21:21:30 - INFO - codeparrot_training - Step 15281: {'lr': 0.00041135192400208585, 'samples': 7824384, 'steps': 15281, 'loss/train': 2.0184123516082764} +02/24/2022 21:21:34 - INFO - codeparrot_training - Step 15282: {'lr': 0.00041133942538200364, 'samples': 7824896, 'steps': 15282, 'loss/train': 2.0375027656555176} +02/24/2022 21:21:39 - INFO - codeparrot_training - Step 15283: {'lr': 0.0004113269260707946, 'samples': 7825408, 'steps': 15283, 'loss/train': 2.2702300548553467} +02/24/2022 21:21:43 - INFO - codeparrot_training - Step 15284: {'lr': 0.0004113144260685122, 'samples': 7825920, 'steps': 15284, 'loss/train': 2.0824499130249023} +02/24/2022 21:21:48 - INFO - codeparrot_training - Step 15285: {'lr': 0.00041130192537521, 'samples': 7826432, 'steps': 15285, 'loss/train': 1.392338752746582} +02/24/2022 21:21:52 - INFO - codeparrot_training - Step 15286: {'lr': 0.0004112894239909416, 'samples': 7826944, 'steps': 15286, 'loss/train': 1.3191852569580078} +02/24/2022 21:21:57 - INFO - codeparrot_training - Step 15287: {'lr': 0.0004112769219157605, 'samples': 7827456, 'steps': 15287, 'loss/train': 2.404175281524658} +02/24/2022 21:22:01 - INFO - codeparrot_training - Step 15288: {'lr': 0.00041126441914972036, 'samples': 7827968, 'steps': 15288, 'loss/train': 2.292914867401123} +02/24/2022 21:22:07 - INFO - codeparrot_training - Step 15289: {'lr': 0.00041125191569287456, 'samples': 7828480, 'steps': 15289, 'loss/train': 1.7711459398269653} +02/24/2022 21:22:10 - INFO - codeparrot_training - Step 15290: {'lr': 0.0004112394115452768, 'samples': 7828992, 'steps': 15290, 'loss/train': 2.0460455417633057} +02/24/2022 21:22:16 - INFO - codeparrot_training - Step 15291: {'lr': 0.00041122690670698054, 'samples': 7829504, 'steps': 15291, 'loss/train': 2.0301671028137207} +02/24/2022 21:22:19 - INFO - codeparrot_training - Step 15292: {'lr': 0.0004112144011780395, 'samples': 7830016, 'steps': 15292, 'loss/train': 1.9984729290008545} +02/24/2022 21:22:25 - INFO - codeparrot_training - Step 15293: {'lr': 0.00041120189495850713, 'samples': 7830528, 'steps': 15293, 'loss/train': 1.0612894296646118} +02/24/2022 21:22:29 - INFO - codeparrot_training - Step 15294: {'lr': 0.000411189388048437, 'samples': 7831040, 'steps': 15294, 'loss/train': 2.338893413543701} +02/24/2022 21:22:34 - INFO - codeparrot_training - Step 15295: {'lr': 0.0004111768804478827, 'samples': 7831552, 'steps': 15295, 'loss/train': 1.4726080894470215} +02/24/2022 21:22:38 - INFO - codeparrot_training - Step 15296: {'lr': 0.00041116437215689785, 'samples': 7832064, 'steps': 15296, 'loss/train': 1.815688133239746} +02/24/2022 21:22:43 - INFO - codeparrot_training - Step 15297: {'lr': 0.000411151863175536, 'samples': 7832576, 'steps': 15297, 'loss/train': 2.408508062362671} +02/24/2022 21:22:47 - INFO - codeparrot_training - Step 15298: {'lr': 0.00041113935350385074, 'samples': 7833088, 'steps': 15298, 'loss/train': 2.378605365753174} +02/24/2022 21:22:52 - INFO - codeparrot_training - Step 15299: {'lr': 0.0004111268431418957, 'samples': 7833600, 'steps': 15299, 'loss/train': 1.9653651714324951} +02/24/2022 21:22:56 - INFO - codeparrot_training - Step 15300: {'lr': 0.0004111143320897244, 'samples': 7834112, 'steps': 15300, 'loss/train': 2.33986496925354} +02/24/2022 21:23:03 - INFO - codeparrot_training - Step 15301: {'lr': 0.0004111018203473904, 'samples': 7834624, 'steps': 15301, 'loss/train': 3.817391872406006} +02/24/2022 21:23:06 - INFO - codeparrot_training - Step 15302: {'lr': 0.0004110893079149474, 'samples': 7835136, 'steps': 15302, 'loss/train': 2.3886022567749023} +02/24/2022 21:23:12 - INFO - codeparrot_training - Step 15303: {'lr': 0.000411076794792449, 'samples': 7835648, 'steps': 15303, 'loss/train': 2.1376419067382812} +02/24/2022 21:23:15 - INFO - codeparrot_training - Step 15304: {'lr': 0.0004110642809799487, 'samples': 7836160, 'steps': 15304, 'loss/train': 1.9003889560699463} +02/24/2022 21:23:21 - INFO - codeparrot_training - Step 15305: {'lr': 0.0004110517664775002, 'samples': 7836672, 'steps': 15305, 'loss/train': 2.674116611480713} +02/24/2022 21:23:24 - INFO - codeparrot_training - Step 15306: {'lr': 0.00041103925128515705, 'samples': 7837184, 'steps': 15306, 'loss/train': 2.4968910217285156} +02/24/2022 21:23:30 - INFO - codeparrot_training - Step 15307: {'lr': 0.0004110267354029729, 'samples': 7837696, 'steps': 15307, 'loss/train': 1.8054479360580444} +02/24/2022 21:23:33 - INFO - codeparrot_training - Step 15308: {'lr': 0.0004110142188310013, 'samples': 7838208, 'steps': 15308, 'loss/train': 0.8322159647941589} +02/24/2022 21:23:39 - INFO - codeparrot_training - Step 15309: {'lr': 0.00041100170156929596, 'samples': 7838720, 'steps': 15309, 'loss/train': 2.217796802520752} +02/24/2022 21:23:42 - INFO - codeparrot_training - Step 15310: {'lr': 0.0004109891836179105, 'samples': 7839232, 'steps': 15310, 'loss/train': 1.836825966835022} +02/24/2022 21:23:48 - INFO - codeparrot_training - Step 15311: {'lr': 0.0004109766649768984, 'samples': 7839744, 'steps': 15311, 'loss/train': 1.6202685832977295} +02/24/2022 21:23:51 - INFO - codeparrot_training - Step 15312: {'lr': 0.00041096414564631347, 'samples': 7840256, 'steps': 15312, 'loss/train': 2.732639789581299} +02/24/2022 21:23:57 - INFO - codeparrot_training - Step 15313: {'lr': 0.00041095162562620915, 'samples': 7840768, 'steps': 15313, 'loss/train': 1.9145981073379517} +02/24/2022 21:24:01 - INFO - codeparrot_training - Step 15314: {'lr': 0.00041093910491663926, 'samples': 7841280, 'steps': 15314, 'loss/train': 1.0388280153274536} +02/24/2022 21:24:06 - INFO - codeparrot_training - Step 15315: {'lr': 0.0004109265835176573, 'samples': 7841792, 'steps': 15315, 'loss/train': 2.090623140335083} +02/24/2022 21:24:09 - INFO - codeparrot_training - Step 15316: {'lr': 0.00041091406142931705, 'samples': 7842304, 'steps': 15316, 'loss/train': 0.30290287733078003} +02/24/2022 21:24:16 - INFO - codeparrot_training - Step 15317: {'lr': 0.00041090153865167196, 'samples': 7842816, 'steps': 15317, 'loss/train': 1.705505609512329} +02/24/2022 21:24:19 - INFO - codeparrot_training - Step 15318: {'lr': 0.0004108890151847758, 'samples': 7843328, 'steps': 15318, 'loss/train': 1.8073900938034058} +02/24/2022 21:24:25 - INFO - codeparrot_training - Step 15319: {'lr': 0.0004108764910286822, 'samples': 7843840, 'steps': 15319, 'loss/train': 2.8852314949035645} +02/24/2022 21:24:28 - INFO - codeparrot_training - Step 15320: {'lr': 0.00041086396618344475, 'samples': 7844352, 'steps': 15320, 'loss/train': 2.709652900695801} +02/24/2022 21:24:34 - INFO - codeparrot_training - Step 15321: {'lr': 0.0004108514406491172, 'samples': 7844864, 'steps': 15321, 'loss/train': 2.0468101501464844} +02/24/2022 21:24:37 - INFO - codeparrot_training - Step 15322: {'lr': 0.0004108389144257531, 'samples': 7845376, 'steps': 15322, 'loss/train': 1.4501640796661377} +02/24/2022 21:24:43 - INFO - codeparrot_training - Step 15323: {'lr': 0.0004108263875134062, 'samples': 7845888, 'steps': 15323, 'loss/train': 1.9559978246688843} +02/24/2022 21:24:46 - INFO - codeparrot_training - Step 15324: {'lr': 0.0004108138599121301, 'samples': 7846400, 'steps': 15324, 'loss/train': 2.1975150108337402} +02/24/2022 21:24:52 - INFO - codeparrot_training - Step 15325: {'lr': 0.00041080133162197855, 'samples': 7846912, 'steps': 15325, 'loss/train': 2.0719447135925293} +02/24/2022 21:24:55 - INFO - codeparrot_training - Step 15326: {'lr': 0.0004107888026430051, 'samples': 7847424, 'steps': 15326, 'loss/train': 2.1070544719696045} +02/24/2022 21:25:01 - INFO - codeparrot_training - Step 15327: {'lr': 0.0004107762729752635, 'samples': 7847936, 'steps': 15327, 'loss/train': 2.381730556488037} +02/24/2022 21:25:05 - INFO - codeparrot_training - Step 15328: {'lr': 0.00041076374261880735, 'samples': 7848448, 'steps': 15328, 'loss/train': 0.9070643782615662} +02/24/2022 21:25:10 - INFO - codeparrot_training - Step 15329: {'lr': 0.0004107512115736904, 'samples': 7848960, 'steps': 15329, 'loss/train': 1.4146337509155273} +02/24/2022 21:25:14 - INFO - codeparrot_training - Step 15330: {'lr': 0.0004107386798399664, 'samples': 7849472, 'steps': 15330, 'loss/train': 1.7019490003585815} +02/24/2022 21:25:19 - INFO - codeparrot_training - Step 15331: {'lr': 0.00041072614741768877, 'samples': 7849984, 'steps': 15331, 'loss/train': 1.81195867061615} +02/24/2022 21:25:23 - INFO - codeparrot_training - Step 15332: {'lr': 0.00041071361430691143, 'samples': 7850496, 'steps': 15332, 'loss/train': 1.6398224830627441} +02/24/2022 21:25:28 - INFO - codeparrot_training - Step 15333: {'lr': 0.00041070108050768805, 'samples': 7851008, 'steps': 15333, 'loss/train': 2.1147563457489014} +02/24/2022 21:25:32 - INFO - codeparrot_training - Step 15334: {'lr': 0.00041068854602007224, 'samples': 7851520, 'steps': 15334, 'loss/train': 1.8375039100646973} +02/24/2022 21:25:37 - INFO - codeparrot_training - Step 15335: {'lr': 0.0004106760108441177, 'samples': 7852032, 'steps': 15335, 'loss/train': 2.073981761932373} +02/24/2022 21:25:41 - INFO - codeparrot_training - Step 15336: {'lr': 0.0004106634749798782, 'samples': 7852544, 'steps': 15336, 'loss/train': 2.662733554840088} +02/24/2022 21:25:48 - INFO - codeparrot_training - Step 15337: {'lr': 0.0004106509384274073, 'samples': 7853056, 'steps': 15337, 'loss/train': 0.6032879948616028} +02/24/2022 21:25:51 - INFO - codeparrot_training - Step 15338: {'lr': 0.0004106384011867589, 'samples': 7853568, 'steps': 15338, 'loss/train': 1.042371392250061} +02/24/2022 21:25:57 - INFO - codeparrot_training - Step 15339: {'lr': 0.00041062586325798654, 'samples': 7854080, 'steps': 15339, 'loss/train': 1.7701929807662964} +02/24/2022 21:26:00 - INFO - codeparrot_training - Step 15340: {'lr': 0.000410613324641144, 'samples': 7854592, 'steps': 15340, 'loss/train': 1.5835610628128052} +02/24/2022 21:26:06 - INFO - codeparrot_training - Step 15341: {'lr': 0.000410600785336285, 'samples': 7855104, 'steps': 15341, 'loss/train': 1.518117070198059} +02/24/2022 21:26:09 - INFO - codeparrot_training - Step 15342: {'lr': 0.0004105882453434632, 'samples': 7855616, 'steps': 15342, 'loss/train': 1.9308040142059326} +02/24/2022 21:26:15 - INFO - codeparrot_training - Step 15343: {'lr': 0.0004105757046627323, 'samples': 7856128, 'steps': 15343, 'loss/train': 2.4660017490386963} +02/24/2022 21:26:18 - INFO - codeparrot_training - Step 15344: {'lr': 0.00041056316329414613, 'samples': 7856640, 'steps': 15344, 'loss/train': 2.105105400085449} +02/24/2022 21:26:24 - INFO - codeparrot_training - Step 15345: {'lr': 0.0004105506212377583, 'samples': 7857152, 'steps': 15345, 'loss/train': 2.8279001712799072} +02/24/2022 21:26:27 - INFO - codeparrot_training - Step 15346: {'lr': 0.0004105380784936227, 'samples': 7857664, 'steps': 15346, 'loss/train': 2.1776533126831055} +02/24/2022 21:26:33 - INFO - codeparrot_training - Step 15347: {'lr': 0.0004105255350617928, 'samples': 7858176, 'steps': 15347, 'loss/train': 2.864000082015991} +02/24/2022 21:26:37 - INFO - codeparrot_training - Step 15348: {'lr': 0.0004105129909423226, 'samples': 7858688, 'steps': 15348, 'loss/train': 1.0162395238876343} +02/24/2022 21:26:42 - INFO - codeparrot_training - Step 15349: {'lr': 0.0004105004461352657, 'samples': 7859200, 'steps': 15349, 'loss/train': 2.198106527328491} +02/24/2022 21:26:46 - INFO - codeparrot_training - Step 15350: {'lr': 0.00041048790064067577, 'samples': 7859712, 'steps': 15350, 'loss/train': 2.8883795738220215} +02/24/2022 21:26:51 - INFO - codeparrot_training - Step 15351: {'lr': 0.0004104753544586067, 'samples': 7860224, 'steps': 15351, 'loss/train': 2.16083025932312} +02/24/2022 21:26:55 - INFO - codeparrot_training - Step 15352: {'lr': 0.0004104628075891121, 'samples': 7860736, 'steps': 15352, 'loss/train': 1.572336196899414} +02/24/2022 21:27:01 - INFO - codeparrot_training - Step 15353: {'lr': 0.00041045026003224593, 'samples': 7861248, 'steps': 15353, 'loss/train': 1.6916844844818115} +02/24/2022 21:27:04 - INFO - codeparrot_training - Step 15354: {'lr': 0.00041043771178806164, 'samples': 7861760, 'steps': 15354, 'loss/train': 3.4172563552856445} +02/24/2022 21:27:10 - INFO - codeparrot_training - Step 15355: {'lr': 0.00041042516285661325, 'samples': 7862272, 'steps': 15355, 'loss/train': 2.7301809787750244} +02/24/2022 21:27:13 - INFO - codeparrot_training - Step 15356: {'lr': 0.00041041261323795437, 'samples': 7862784, 'steps': 15356, 'loss/train': 2.331615447998047} +02/24/2022 21:27:19 - INFO - codeparrot_training - Step 15357: {'lr': 0.00041040006293213883, 'samples': 7863296, 'steps': 15357, 'loss/train': 1.5986061096191406} +02/24/2022 21:27:22 - INFO - codeparrot_training - Step 15358: {'lr': 0.0004103875119392203, 'samples': 7863808, 'steps': 15358, 'loss/train': 1.8355193138122559} +02/24/2022 21:27:28 - INFO - codeparrot_training - Step 15359: {'lr': 0.00041037496025925256, 'samples': 7864320, 'steps': 15359, 'loss/train': 0.35641559958457947} +02/24/2022 21:27:31 - INFO - codeparrot_training - Step 15360: {'lr': 0.0004103624078922895, 'samples': 7864832, 'steps': 15360, 'loss/train': 2.09504771232605} +02/24/2022 21:27:37 - INFO - codeparrot_training - Step 15361: {'lr': 0.0004103498548383847, 'samples': 7865344, 'steps': 15361, 'loss/train': 2.1360926628112793} +02/24/2022 21:27:40 - INFO - codeparrot_training - Step 15362: {'lr': 0.00041033730109759216, 'samples': 7865856, 'steps': 15362, 'loss/train': 2.20161771774292} +02/24/2022 21:27:47 - INFO - codeparrot_training - Step 15363: {'lr': 0.00041032474666996544, 'samples': 7866368, 'steps': 15363, 'loss/train': 1.1102571487426758} +02/24/2022 21:27:50 - INFO - codeparrot_training - Step 15364: {'lr': 0.0004103121915555585, 'samples': 7866880, 'steps': 15364, 'loss/train': 1.4902901649475098} +02/24/2022 21:27:56 - INFO - codeparrot_training - Step 15365: {'lr': 0.00041029963575442494, 'samples': 7867392, 'steps': 15365, 'loss/train': 1.7745418548583984} +02/24/2022 21:27:59 - INFO - codeparrot_training - Step 15366: {'lr': 0.0004102870792666187, 'samples': 7867904, 'steps': 15366, 'loss/train': 0.8546589016914368} +02/24/2022 21:28:05 - INFO - codeparrot_training - Step 15367: {'lr': 0.0004102745220921935, 'samples': 7868416, 'steps': 15367, 'loss/train': 3.093583822250366} +02/24/2022 21:28:08 - INFO - codeparrot_training - Step 15368: {'lr': 0.0004102619642312031, 'samples': 7868928, 'steps': 15368, 'loss/train': 2.066190242767334} +02/24/2022 21:28:14 - INFO - codeparrot_training - Step 15369: {'lr': 0.0004102494056837014, 'samples': 7869440, 'steps': 15369, 'loss/train': 4.045064449310303} +02/24/2022 21:28:17 - INFO - codeparrot_training - Step 15370: {'lr': 0.00041023684644974213, 'samples': 7869952, 'steps': 15370, 'loss/train': 2.705396890640259} +02/24/2022 21:28:23 - INFO - codeparrot_training - Step 15371: {'lr': 0.00041022428652937905, 'samples': 7870464, 'steps': 15371, 'loss/train': 2.214348793029785} +02/24/2022 21:28:26 - INFO - codeparrot_training - Step 15372: {'lr': 0.000410211725922666, 'samples': 7870976, 'steps': 15372, 'loss/train': 1.8127859830856323} +02/24/2022 21:28:33 - INFO - codeparrot_training - Step 15373: {'lr': 0.00041019916462965684, 'samples': 7871488, 'steps': 15373, 'loss/train': 1.5097529888153076} +02/24/2022 21:28:36 - INFO - codeparrot_training - Step 15374: {'lr': 0.0004101866026504053, 'samples': 7872000, 'steps': 15374, 'loss/train': 2.180556058883667} +02/24/2022 21:28:42 - INFO - codeparrot_training - Step 15375: {'lr': 0.00041017403998496523, 'samples': 7872512, 'steps': 15375, 'loss/train': 1.9452483654022217} +02/24/2022 21:28:45 - INFO - codeparrot_training - Step 15376: {'lr': 0.0004101614766333904, 'samples': 7873024, 'steps': 15376, 'loss/train': 1.2768136262893677} +02/24/2022 21:28:51 - INFO - codeparrot_training - Step 15377: {'lr': 0.0004101489125957347, 'samples': 7873536, 'steps': 15377, 'loss/train': 1.8025894165039062} +02/24/2022 21:28:54 - INFO - codeparrot_training - Step 15378: {'lr': 0.0004101363478720519, 'samples': 7874048, 'steps': 15378, 'loss/train': 2.392350673675537} +02/24/2022 21:29:00 - INFO - codeparrot_training - Step 15379: {'lr': 0.0004101237824623958, 'samples': 7874560, 'steps': 15379, 'loss/train': 1.0656155347824097} +02/24/2022 21:29:04 - INFO - codeparrot_training - Step 15380: {'lr': 0.00041011121636682024, 'samples': 7875072, 'steps': 15380, 'loss/train': 2.7382652759552} +02/24/2022 21:29:09 - INFO - codeparrot_training - Step 15381: {'lr': 0.0004100986495853791, 'samples': 7875584, 'steps': 15381, 'loss/train': 1.5157382488250732} +02/24/2022 21:29:12 - INFO - codeparrot_training - Step 15382: {'lr': 0.00041008608211812625, 'samples': 7876096, 'steps': 15382, 'loss/train': 2.0826926231384277} +02/24/2022 21:29:19 - INFO - codeparrot_training - Step 15383: {'lr': 0.00041007351396511537, 'samples': 7876608, 'steps': 15383, 'loss/train': 1.5692298412322998} +02/24/2022 21:29:22 - INFO - codeparrot_training - Step 15384: {'lr': 0.00041006094512640044, 'samples': 7877120, 'steps': 15384, 'loss/train': 1.4416319131851196} +02/24/2022 21:29:28 - INFO - codeparrot_training - Step 15385: {'lr': 0.00041004837560203525, 'samples': 7877632, 'steps': 15385, 'loss/train': 9.425049781799316} +02/24/2022 21:29:31 - INFO - codeparrot_training - Step 15386: {'lr': 0.0004100358053920736, 'samples': 7878144, 'steps': 15386, 'loss/train': 1.5387446880340576} +02/24/2022 21:29:37 - INFO - codeparrot_training - Step 15387: {'lr': 0.00041002323449656943, 'samples': 7878656, 'steps': 15387, 'loss/train': 2.125946283340454} +02/24/2022 21:29:41 - INFO - codeparrot_training - Step 15388: {'lr': 0.00041001066291557653, 'samples': 7879168, 'steps': 15388, 'loss/train': 2.213087797164917} +02/24/2022 21:29:46 - INFO - codeparrot_training - Step 15389: {'lr': 0.0004099980906491487, 'samples': 7879680, 'steps': 15389, 'loss/train': 9.124073028564453} +02/24/2022 21:29:50 - INFO - codeparrot_training - Step 15390: {'lr': 0.0004099855176973399, 'samples': 7880192, 'steps': 15390, 'loss/train': 1.6886916160583496} +02/24/2022 21:29:55 - INFO - codeparrot_training - Step 15391: {'lr': 0.0004099729440602039, 'samples': 7880704, 'steps': 15391, 'loss/train': 2.2797696590423584} +02/24/2022 21:29:59 - INFO - codeparrot_training - Step 15392: {'lr': 0.0004099603697377946, 'samples': 7881216, 'steps': 15392, 'loss/train': 2.404973268508911} +02/24/2022 21:30:05 - INFO - codeparrot_training - Step 15393: {'lr': 0.000409947794730166, 'samples': 7881728, 'steps': 15393, 'loss/train': 2.6005899906158447} +02/24/2022 21:30:09 - INFO - codeparrot_training - Step 15394: {'lr': 0.0004099352190373716, 'samples': 7882240, 'steps': 15394, 'loss/train': 2.1604795455932617} +02/24/2022 21:30:14 - INFO - codeparrot_training - Step 15395: {'lr': 0.0004099226426594657, 'samples': 7882752, 'steps': 15395, 'loss/train': 1.7626302242279053} +02/24/2022 21:30:17 - INFO - codeparrot_training - Step 15396: {'lr': 0.0004099100655965019, 'samples': 7883264, 'steps': 15396, 'loss/train': 2.0931098461151123} +02/24/2022 21:30:23 - INFO - codeparrot_training - Step 15397: {'lr': 0.0004098974878485342, 'samples': 7883776, 'steps': 15397, 'loss/train': 1.9197818040847778} +02/24/2022 21:30:26 - INFO - codeparrot_training - Step 15398: {'lr': 0.0004098849094156164, 'samples': 7884288, 'steps': 15398, 'loss/train': 1.7820371389389038} +02/24/2022 21:30:32 - INFO - codeparrot_training - Step 15399: {'lr': 0.0004098723302978025, 'samples': 7884800, 'steps': 15399, 'loss/train': 1.904728651046753} +02/24/2022 21:30:36 - INFO - codeparrot_training - Step 15400: {'lr': 0.00040985975049514617, 'samples': 7885312, 'steps': 15400, 'loss/train': 1.2827519178390503} +02/24/2022 21:30:41 - INFO - codeparrot_training - Step 15401: {'lr': 0.00040984717000770157, 'samples': 7885824, 'steps': 15401, 'loss/train': 1.6896698474884033} +02/24/2022 21:30:45 - INFO - codeparrot_training - Step 15402: {'lr': 0.00040983458883552237, 'samples': 7886336, 'steps': 15402, 'loss/train': 1.2777215242385864} +02/24/2022 21:30:51 - INFO - codeparrot_training - Step 15403: {'lr': 0.00040982200697866256, 'samples': 7886848, 'steps': 15403, 'loss/train': 2.300790548324585} +02/24/2022 21:30:55 - INFO - codeparrot_training - Step 15404: {'lr': 0.00040980942443717596, 'samples': 7887360, 'steps': 15404, 'loss/train': 2.621476888656616} +02/24/2022 21:31:00 - INFO - codeparrot_training - Step 15405: {'lr': 0.0004097968412111166, 'samples': 7887872, 'steps': 15405, 'loss/train': 1.7218024730682373} +02/24/2022 21:31:04 - INFO - codeparrot_training - Step 15406: {'lr': 0.0004097842573005383, 'samples': 7888384, 'steps': 15406, 'loss/train': 2.013035297393799} +02/24/2022 21:31:10 - INFO - codeparrot_training - Step 15407: {'lr': 0.000409771672705495, 'samples': 7888896, 'steps': 15407, 'loss/train': 2.40028977394104} +02/24/2022 21:31:13 - INFO - codeparrot_training - Step 15408: {'lr': 0.0004097590874260405, 'samples': 7889408, 'steps': 15408, 'loss/train': 2.3176109790802} +02/24/2022 21:31:17 - INFO - codeparrot_training - Step 15409: {'lr': 0.0004097465014622289, 'samples': 7889920, 'steps': 15409, 'loss/train': 2.6842987537384033} +02/24/2022 21:31:22 - INFO - codeparrot_training - Step 15410: {'lr': 0.00040973391481411396, 'samples': 7890432, 'steps': 15410, 'loss/train': 2.919426918029785} +02/24/2022 21:31:26 - INFO - codeparrot_training - Step 15411: {'lr': 0.00040972132748174966, 'samples': 7890944, 'steps': 15411, 'loss/train': 2.180241346359253} +02/24/2022 21:31:31 - INFO - codeparrot_training - Step 15412: {'lr': 0.00040970873946518993, 'samples': 7891456, 'steps': 15412, 'loss/train': 2.3224427700042725} +02/24/2022 21:31:35 - INFO - codeparrot_training - Step 15413: {'lr': 0.00040969615076448865, 'samples': 7891968, 'steps': 15413, 'loss/train': 1.9502239227294922} +02/24/2022 21:31:40 - INFO - codeparrot_training - Step 15414: {'lr': 0.0004096835613796998, 'samples': 7892480, 'steps': 15414, 'loss/train': 1.4000823497772217} +02/24/2022 21:31:44 - INFO - codeparrot_training - Step 15415: {'lr': 0.00040967097131087727, 'samples': 7892992, 'steps': 15415, 'loss/train': 1.8175748586654663} +02/24/2022 21:31:49 - INFO - codeparrot_training - Step 15416: {'lr': 0.00040965838055807493, 'samples': 7893504, 'steps': 15416, 'loss/train': 2.2393364906311035} +02/24/2022 21:31:53 - INFO - codeparrot_training - Step 15417: {'lr': 0.00040964578912134687, 'samples': 7894016, 'steps': 15417, 'loss/train': 1.8740324974060059} +02/24/2022 21:31:58 - INFO - codeparrot_training - Step 15418: {'lr': 0.00040963319700074684, 'samples': 7894528, 'steps': 15418, 'loss/train': 1.4016344547271729} +02/24/2022 21:32:02 - INFO - codeparrot_training - Step 15419: {'lr': 0.00040962060419632906, 'samples': 7895040, 'steps': 15419, 'loss/train': 2.2686307430267334} +02/24/2022 21:32:08 - INFO - codeparrot_training - Step 15420: {'lr': 0.00040960801070814715, 'samples': 7895552, 'steps': 15420, 'loss/train': 1.1163126230239868} +02/24/2022 21:32:11 - INFO - codeparrot_training - Step 15421: {'lr': 0.00040959541653625526, 'samples': 7896064, 'steps': 15421, 'loss/train': 2.352973699569702} +02/24/2022 21:32:17 - INFO - codeparrot_training - Step 15422: {'lr': 0.0004095828216807073, 'samples': 7896576, 'steps': 15422, 'loss/train': 1.7794021368026733} +02/24/2022 21:32:20 - INFO - codeparrot_training - Step 15423: {'lr': 0.00040957022614155714, 'samples': 7897088, 'steps': 15423, 'loss/train': 2.371039628982544} +02/24/2022 21:32:26 - INFO - codeparrot_training - Step 15424: {'lr': 0.0004095576299188589, 'samples': 7897600, 'steps': 15424, 'loss/train': 2.479933738708496} +02/24/2022 21:32:29 - INFO - codeparrot_training - Step 15425: {'lr': 0.0004095450330126663, 'samples': 7898112, 'steps': 15425, 'loss/train': 1.5423216819763184} +02/24/2022 21:32:35 - INFO - codeparrot_training - Step 15426: {'lr': 0.0004095324354230335, 'samples': 7898624, 'steps': 15426, 'loss/train': 1.5639524459838867} +02/24/2022 21:32:38 - INFO - codeparrot_training - Step 15427: {'lr': 0.0004095198371500145, 'samples': 7899136, 'steps': 15427, 'loss/train': 2.206367015838623} +02/24/2022 21:32:44 - INFO - codeparrot_training - Step 15428: {'lr': 0.00040950723819366307, 'samples': 7899648, 'steps': 15428, 'loss/train': 1.9861165285110474} +02/24/2022 21:32:48 - INFO - codeparrot_training - Step 15429: {'lr': 0.00040949463855403326, 'samples': 7900160, 'steps': 15429, 'loss/train': 2.516212224960327} +02/24/2022 21:32:54 - INFO - codeparrot_training - Step 15430: {'lr': 0.00040948203823117915, 'samples': 7900672, 'steps': 15430, 'loss/train': 1.336027979850769} +02/24/2022 21:32:57 - INFO - codeparrot_training - Step 15431: {'lr': 0.00040946943722515455, 'samples': 7901184, 'steps': 15431, 'loss/train': 1.6456513404846191} +02/24/2022 21:33:03 - INFO - codeparrot_training - Step 15432: {'lr': 0.0004094568355360135, 'samples': 7901696, 'steps': 15432, 'loss/train': 3.6719956398010254} +02/24/2022 21:33:06 - INFO - codeparrot_training - Step 15433: {'lr': 0.00040944423316381006, 'samples': 7902208, 'steps': 15433, 'loss/train': 1.9775714874267578} +02/24/2022 21:33:12 - INFO - codeparrot_training - Step 15434: {'lr': 0.0004094316301085982, 'samples': 7902720, 'steps': 15434, 'loss/train': 0.892765998840332} +02/24/2022 21:33:15 - INFO - codeparrot_training - Step 15435: {'lr': 0.00040941902637043183, 'samples': 7903232, 'steps': 15435, 'loss/train': 2.8887033462524414} +02/24/2022 21:33:21 - INFO - codeparrot_training - Step 15436: {'lr': 0.00040940642194936495, 'samples': 7903744, 'steps': 15436, 'loss/train': 2.3517863750457764} +02/24/2022 21:33:25 - INFO - codeparrot_training - Step 15437: {'lr': 0.0004093938168454515, 'samples': 7904256, 'steps': 15437, 'loss/train': 1.8546351194381714} +02/24/2022 21:33:30 - INFO - codeparrot_training - Step 15438: {'lr': 0.00040938121105874573, 'samples': 7904768, 'steps': 15438, 'loss/train': 2.90852952003479} +02/24/2022 21:33:33 - INFO - codeparrot_training - Step 15439: {'lr': 0.0004093686045893013, 'samples': 7905280, 'steps': 15439, 'loss/train': 2.670231342315674} +02/24/2022 21:33:40 - INFO - codeparrot_training - Step 15440: {'lr': 0.00040935599743717243, 'samples': 7905792, 'steps': 15440, 'loss/train': 2.0736353397369385} +02/24/2022 21:33:43 - INFO - codeparrot_training - Step 15441: {'lr': 0.00040934338960241305, 'samples': 7906304, 'steps': 15441, 'loss/train': 2.002852439880371} +02/24/2022 21:33:49 - INFO - codeparrot_training - Step 15442: {'lr': 0.00040933078108507727, 'samples': 7906816, 'steps': 15442, 'loss/train': 2.8125252723693848} +02/24/2022 21:33:54 - INFO - codeparrot_training - Step 15443: {'lr': 0.00040931817188521894, 'samples': 7907328, 'steps': 15443, 'loss/train': 2.2092647552490234} +02/24/2022 21:33:58 - INFO - codeparrot_training - Step 15444: {'lr': 0.00040930556200289214, 'samples': 7907840, 'steps': 15444, 'loss/train': 0.7220437526702881} +02/24/2022 21:34:03 - INFO - codeparrot_training - Step 15445: {'lr': 0.00040929295143815093, 'samples': 7908352, 'steps': 15445, 'loss/train': 1.5290563106536865} +02/24/2022 21:34:07 - INFO - codeparrot_training - Step 15446: {'lr': 0.0004092803401910493, 'samples': 7908864, 'steps': 15446, 'loss/train': 1.9819129705429077} +02/24/2022 21:34:12 - INFO - codeparrot_training - Step 15447: {'lr': 0.00040926772826164126, 'samples': 7909376, 'steps': 15447, 'loss/train': 2.1988632678985596} +02/24/2022 21:34:16 - INFO - codeparrot_training - Step 15448: {'lr': 0.0004092551156499809, 'samples': 7909888, 'steps': 15448, 'loss/train': 2.420659065246582} +02/24/2022 21:34:21 - INFO - codeparrot_training - Step 15449: {'lr': 0.000409242502356122, 'samples': 7910400, 'steps': 15449, 'loss/train': 2.5587782859802246} +02/24/2022 21:34:25 - INFO - codeparrot_training - Step 15450: {'lr': 0.000409229888380119, 'samples': 7910912, 'steps': 15450, 'loss/train': 2.089151382446289} +02/24/2022 21:34:31 - INFO - codeparrot_training - Step 15451: {'lr': 0.00040921727372202565, 'samples': 7911424, 'steps': 15451, 'loss/train': 1.9240504503250122} +02/24/2022 21:34:34 - INFO - codeparrot_training - Step 15452: {'lr': 0.000409204658381896, 'samples': 7911936, 'steps': 15452, 'loss/train': 2.5743579864501953} +02/24/2022 21:34:40 - INFO - codeparrot_training - Step 15453: {'lr': 0.00040919204235978425, 'samples': 7912448, 'steps': 15453, 'loss/train': 2.241792678833008} +02/24/2022 21:34:43 - INFO - codeparrot_training - Step 15454: {'lr': 0.0004091794256557443, 'samples': 7912960, 'steps': 15454, 'loss/train': 2.547938585281372} +02/24/2022 21:34:47 - INFO - codeparrot_training - Step 15455: {'lr': 0.00040916680826983017, 'samples': 7913472, 'steps': 15455, 'loss/train': 2.1530346870422363} +02/24/2022 21:34:53 - INFO - codeparrot_training - Step 15456: {'lr': 0.00040915419020209605, 'samples': 7913984, 'steps': 15456, 'loss/train': 2.095562696456909} +02/24/2022 21:34:56 - INFO - codeparrot_training - Step 15457: {'lr': 0.0004091415714525959, 'samples': 7914496, 'steps': 15457, 'loss/train': 1.792824149131775} +02/24/2022 21:35:02 - INFO - codeparrot_training - Step 15458: {'lr': 0.0004091289520213838, 'samples': 7915008, 'steps': 15458, 'loss/train': 2.257463216781616} +02/24/2022 21:35:05 - INFO - codeparrot_training - Step 15459: {'lr': 0.0004091163319085137, 'samples': 7915520, 'steps': 15459, 'loss/train': 0.46837449073791504} +02/24/2022 21:35:13 - INFO - codeparrot_training - Step 15460: {'lr': 0.0004091037111140399, 'samples': 7916032, 'steps': 15460, 'loss/train': 2.1919593811035156} +02/24/2022 21:35:17 - INFO - codeparrot_training - Step 15461: {'lr': 0.00040909108963801624, 'samples': 7916544, 'steps': 15461, 'loss/train': 1.4890090227127075} +02/24/2022 21:35:22 - INFO - codeparrot_training - Step 15462: {'lr': 0.0004090784674804969, 'samples': 7917056, 'steps': 15462, 'loss/train': 1.857080340385437} +02/24/2022 21:35:26 - INFO - codeparrot_training - Step 15463: {'lr': 0.0004090658446415359, 'samples': 7917568, 'steps': 15463, 'loss/train': 1.6178237199783325} +02/24/2022 21:35:31 - INFO - codeparrot_training - Step 15464: {'lr': 0.0004090532211211874, 'samples': 7918080, 'steps': 15464, 'loss/train': 2.5663673877716064} +02/24/2022 21:35:37 - INFO - codeparrot_training - Step 15465: {'lr': 0.0004090405969195053, 'samples': 7918592, 'steps': 15465, 'loss/train': 1.9339476823806763} +02/24/2022 21:35:40 - INFO - codeparrot_training - Step 15466: {'lr': 0.0004090279720365438, 'samples': 7919104, 'steps': 15466, 'loss/train': 1.789537787437439} +02/24/2022 21:35:46 - INFO - codeparrot_training - Step 15467: {'lr': 0.00040901534647235703, 'samples': 7919616, 'steps': 15467, 'loss/train': 1.8607381582260132} +02/24/2022 21:35:50 - INFO - codeparrot_training - Step 15468: {'lr': 0.00040900272022699897, 'samples': 7920128, 'steps': 15468, 'loss/train': 1.4169374704360962} +02/24/2022 21:35:53 - INFO - codeparrot_training - Step 15469: {'lr': 0.00040899009330052375, 'samples': 7920640, 'steps': 15469, 'loss/train': 2.213528633117676} +02/24/2022 21:36:00 - INFO - codeparrot_training - Step 15470: {'lr': 0.00040897746569298546, 'samples': 7921152, 'steps': 15470, 'loss/train': 1.7157049179077148} +02/24/2022 21:36:04 - INFO - codeparrot_training - Step 15471: {'lr': 0.0004089648374044382, 'samples': 7921664, 'steps': 15471, 'loss/train': 1.051945686340332} +02/24/2022 21:36:09 - INFO - codeparrot_training - Step 15472: {'lr': 0.000408952208434936, 'samples': 7922176, 'steps': 15472, 'loss/train': 1.2381268739700317} +02/24/2022 21:36:13 - INFO - codeparrot_training - Step 15473: {'lr': 0.00040893957878453314, 'samples': 7922688, 'steps': 15473, 'loss/train': 2.5180225372314453} +02/24/2022 21:36:18 - INFO - codeparrot_training - Step 15474: {'lr': 0.0004089269484532834, 'samples': 7923200, 'steps': 15474, 'loss/train': 2.0026073455810547} +02/24/2022 21:36:22 - INFO - codeparrot_training - Step 15475: {'lr': 0.00040891431744124123, 'samples': 7923712, 'steps': 15475, 'loss/train': 2.1056971549987793} +02/24/2022 21:36:27 - INFO - codeparrot_training - Step 15476: {'lr': 0.00040890168574846055, 'samples': 7924224, 'steps': 15476, 'loss/train': 1.2067630290985107} +02/24/2022 21:36:31 - INFO - codeparrot_training - Step 15477: {'lr': 0.0004088890533749955, 'samples': 7924736, 'steps': 15477, 'loss/train': 2.467292308807373} +02/24/2022 21:36:36 - INFO - codeparrot_training - Step 15478: {'lr': 0.0004088764203209002, 'samples': 7925248, 'steps': 15478, 'loss/train': 2.263564348220825} +02/24/2022 21:36:40 - INFO - codeparrot_training - Step 15479: {'lr': 0.0004088637865862287, 'samples': 7925760, 'steps': 15479, 'loss/train': 2.318178653717041} +02/24/2022 21:36:47 - INFO - codeparrot_training - Step 15480: {'lr': 0.0004088511521710352, 'samples': 7926272, 'steps': 15480, 'loss/train': 2.8448283672332764} +02/24/2022 21:36:51 - INFO - codeparrot_training - Step 15481: {'lr': 0.0004088385170753739, 'samples': 7926784, 'steps': 15481, 'loss/train': 1.6736156940460205} +02/24/2022 21:36:56 - INFO - codeparrot_training - Step 15482: {'lr': 0.00040882588129929876, 'samples': 7927296, 'steps': 15482, 'loss/train': 1.9599905014038086} +02/24/2022 21:37:00 - INFO - codeparrot_training - Step 15483: {'lr': 0.000408813244842864, 'samples': 7927808, 'steps': 15483, 'loss/train': 2.5051231384277344} +02/24/2022 21:37:05 - INFO - codeparrot_training - Step 15484: {'lr': 0.0004088006077061237, 'samples': 7928320, 'steps': 15484, 'loss/train': 2.259176254272461} +02/24/2022 21:37:09 - INFO - codeparrot_training - Step 15485: {'lr': 0.00040878796988913204, 'samples': 7928832, 'steps': 15485, 'loss/train': 1.6392414569854736} +02/24/2022 21:37:14 - INFO - codeparrot_training - Step 15486: {'lr': 0.00040877533139194313, 'samples': 7929344, 'steps': 15486, 'loss/train': 2.424464225769043} +02/24/2022 21:37:18 - INFO - codeparrot_training - Step 15487: {'lr': 0.00040876269221461117, 'samples': 7929856, 'steps': 15487, 'loss/train': 2.7809624671936035} +02/24/2022 21:37:23 - INFO - codeparrot_training - Step 15488: {'lr': 0.0004087500523571902, 'samples': 7930368, 'steps': 15488, 'loss/train': 0.3770766854286194} +02/24/2022 21:37:27 - INFO - codeparrot_training - Step 15489: {'lr': 0.0004087374118197344, 'samples': 7930880, 'steps': 15489, 'loss/train': 1.3468271493911743} +02/24/2022 21:37:34 - INFO - codeparrot_training - Step 15490: {'lr': 0.00040872477060229797, 'samples': 7931392, 'steps': 15490, 'loss/train': 1.9579812288284302} +02/24/2022 21:37:37 - INFO - codeparrot_training - Step 15491: {'lr': 0.00040871212870493504, 'samples': 7931904, 'steps': 15491, 'loss/train': 2.244948387145996} +02/24/2022 21:37:43 - INFO - codeparrot_training - Step 15492: {'lr': 0.0004086994861276996, 'samples': 7932416, 'steps': 15492, 'loss/train': 1.9149377346038818} +02/24/2022 21:37:48 - INFO - codeparrot_training - Step 15493: {'lr': 0.00040868684287064617, 'samples': 7932928, 'steps': 15493, 'loss/train': 2.311769962310791} +02/24/2022 21:37:52 - INFO - codeparrot_training - Step 15494: {'lr': 0.0004086741989338285, 'samples': 7933440, 'steps': 15494, 'loss/train': 2.153244733810425} +02/24/2022 21:37:57 - INFO - codeparrot_training - Step 15495: {'lr': 0.0004086615543173011, 'samples': 7933952, 'steps': 15495, 'loss/train': 0.1000339463353157} +02/24/2022 21:38:01 - INFO - codeparrot_training - Step 15496: {'lr': 0.0004086489090211178, 'samples': 7934464, 'steps': 15496, 'loss/train': 2.5734522342681885} +02/24/2022 21:38:06 - INFO - codeparrot_training - Step 15497: {'lr': 0.00040863626304533316, 'samples': 7934976, 'steps': 15497, 'loss/train': 1.5227742195129395} +02/24/2022 21:38:10 - INFO - codeparrot_training - Step 15498: {'lr': 0.000408623616390001, 'samples': 7935488, 'steps': 15498, 'loss/train': 2.099524736404419} +02/24/2022 21:38:16 - INFO - codeparrot_training - Step 15499: {'lr': 0.00040861096905517574, 'samples': 7936000, 'steps': 15499, 'loss/train': 1.5269643068313599} +02/24/2022 21:38:19 - INFO - codeparrot_training - Step 15500: {'lr': 0.0004085983210409114, 'samples': 7936512, 'steps': 15500, 'loss/train': 3.403447389602661} +02/24/2022 21:38:22 - INFO - codeparrot_training - Step 15501: {'lr': 0.00040858567234726217, 'samples': 7937024, 'steps': 15501, 'loss/train': 1.84128999710083} +02/24/2022 21:38:28 - INFO - codeparrot_training - Step 15502: {'lr': 0.00040857302297428233, 'samples': 7937536, 'steps': 15502, 'loss/train': 1.6496556997299194} +02/24/2022 21:38:33 - INFO - codeparrot_training - Step 15503: {'lr': 0.000408560372922026, 'samples': 7938048, 'steps': 15503, 'loss/train': 2.2017412185668945} +02/24/2022 21:38:37 - INFO - codeparrot_training - Step 15504: {'lr': 0.00040854772219054737, 'samples': 7938560, 'steps': 15504, 'loss/train': 1.757104516029358} +02/24/2022 21:38:44 - INFO - codeparrot_training - Step 15505: {'lr': 0.00040853507077990073, 'samples': 7939072, 'steps': 15505, 'loss/train': 1.9060847759246826} +02/24/2022 21:38:48 - INFO - codeparrot_training - Step 15506: {'lr': 0.00040852241869014004, 'samples': 7939584, 'steps': 15506, 'loss/train': 1.3947877883911133} +02/24/2022 21:38:53 - INFO - codeparrot_training - Step 15507: {'lr': 0.00040850976592131974, 'samples': 7940096, 'steps': 15507, 'loss/train': 3.2720694541931152} +02/24/2022 21:38:57 - INFO - codeparrot_training - Step 15508: {'lr': 0.0004084971124734939, 'samples': 7940608, 'steps': 15508, 'loss/train': 2.748504877090454} +02/24/2022 21:39:02 - INFO - codeparrot_training - Step 15509: {'lr': 0.0004084844583467168, 'samples': 7941120, 'steps': 15509, 'loss/train': 2.437459707260132} +02/24/2022 21:39:06 - INFO - codeparrot_training - Step 15510: {'lr': 0.00040847180354104256, 'samples': 7941632, 'steps': 15510, 'loss/train': 2.965855360031128} +02/24/2022 21:39:11 - INFO - codeparrot_training - Step 15511: {'lr': 0.00040845914805652544, 'samples': 7942144, 'steps': 15511, 'loss/train': 1.7638673782348633} +02/24/2022 21:39:15 - INFO - codeparrot_training - Step 15512: {'lr': 0.0004084464918932197, 'samples': 7942656, 'steps': 15512, 'loss/train': 2.1305906772613525} +02/24/2022 21:39:20 - INFO - codeparrot_training - Step 15513: {'lr': 0.0004084338350511795, 'samples': 7943168, 'steps': 15513, 'loss/train': 2.331066846847534} +02/24/2022 21:39:24 - INFO - codeparrot_training - Step 15514: {'lr': 0.00040842117753045893, 'samples': 7943680, 'steps': 15514, 'loss/train': 1.5385148525238037} +02/24/2022 21:39:31 - INFO - codeparrot_training - Step 15515: {'lr': 0.0004084085193311124, 'samples': 7944192, 'steps': 15515, 'loss/train': 2.8428070545196533} +02/24/2022 21:39:34 - INFO - codeparrot_training - Step 15516: {'lr': 0.0004083958604531941, 'samples': 7944704, 'steps': 15516, 'loss/train': 1.7632235288619995} +02/24/2022 21:39:40 - INFO - codeparrot_training - Step 15517: {'lr': 0.0004083832008967583, 'samples': 7945216, 'steps': 15517, 'loss/train': 1.6807687282562256} +02/24/2022 21:39:43 - INFO - codeparrot_training - Step 15518: {'lr': 0.00040837054066185906, 'samples': 7945728, 'steps': 15518, 'loss/train': 1.4972398281097412} +02/24/2022 21:39:49 - INFO - codeparrot_training - Step 15519: {'lr': 0.0004083578797485508, 'samples': 7946240, 'steps': 15519, 'loss/train': 1.4638450145721436} +02/24/2022 21:39:52 - INFO - codeparrot_training - Step 15520: {'lr': 0.00040834521815688753, 'samples': 7946752, 'steps': 15520, 'loss/train': 2.972620964050293} +02/24/2022 21:39:58 - INFO - codeparrot_training - Step 15521: {'lr': 0.00040833255588692375, 'samples': 7947264, 'steps': 15521, 'loss/train': 2.0566892623901367} +02/24/2022 21:40:01 - INFO - codeparrot_training - Step 15522: {'lr': 0.0004083198929387135, 'samples': 7947776, 'steps': 15522, 'loss/train': 3.4358222484588623} +02/24/2022 21:40:07 - INFO - codeparrot_training - Step 15523: {'lr': 0.0004083072293123111, 'samples': 7948288, 'steps': 15523, 'loss/train': 1.9729145765304565} +02/24/2022 21:40:10 - INFO - codeparrot_training - Step 15524: {'lr': 0.00040829456500777084, 'samples': 7948800, 'steps': 15524, 'loss/train': 2.6530888080596924} +02/24/2022 21:40:16 - INFO - codeparrot_training - Step 15525: {'lr': 0.00040828190002514694, 'samples': 7949312, 'steps': 15525, 'loss/train': 1.9839402437210083} +02/24/2022 21:40:19 - INFO - codeparrot_training - Step 15526: {'lr': 0.0004082692343644936, 'samples': 7949824, 'steps': 15526, 'loss/train': 2.1632790565490723} +02/24/2022 21:40:27 - INFO - codeparrot_training - Step 15527: {'lr': 0.00040825656802586513, 'samples': 7950336, 'steps': 15527, 'loss/train': 2.0929946899414062} +02/24/2022 21:40:30 - INFO - codeparrot_training - Step 15528: {'lr': 0.00040824390100931585, 'samples': 7950848, 'steps': 15528, 'loss/train': 1.8413575887680054} +02/24/2022 21:40:36 - INFO - codeparrot_training - Step 15529: {'lr': 0.00040823123331489985, 'samples': 7951360, 'steps': 15529, 'loss/train': 2.7376160621643066} +02/24/2022 21:40:39 - INFO - codeparrot_training - Step 15530: {'lr': 0.0004082185649426715, 'samples': 7951872, 'steps': 15530, 'loss/train': 2.369424819946289} +02/24/2022 21:40:45 - INFO - codeparrot_training - Step 15531: {'lr': 0.0004082058958926851, 'samples': 7952384, 'steps': 15531, 'loss/train': 1.0001769065856934} +02/24/2022 21:40:48 - INFO - codeparrot_training - Step 15532: {'lr': 0.0004081932261649949, 'samples': 7952896, 'steps': 15532, 'loss/train': 2.2717673778533936} +02/24/2022 21:40:54 - INFO - codeparrot_training - Step 15533: {'lr': 0.00040818055575965505, 'samples': 7953408, 'steps': 15533, 'loss/train': 1.7797858715057373} +02/24/2022 21:40:57 - INFO - codeparrot_training - Step 15534: {'lr': 0.0004081678846767199, 'samples': 7953920, 'steps': 15534, 'loss/train': 3.093263626098633} +02/24/2022 21:41:03 - INFO - codeparrot_training - Step 15535: {'lr': 0.00040815521291624393, 'samples': 7954432, 'steps': 15535, 'loss/train': 1.6688371896743774} +02/24/2022 21:41:06 - INFO - codeparrot_training - Step 15536: {'lr': 0.0004081425404782811, 'samples': 7954944, 'steps': 15536, 'loss/train': 1.3239980936050415} +02/24/2022 21:41:14 - INFO - codeparrot_training - Step 15537: {'lr': 0.0004081298673628859, 'samples': 7955456, 'steps': 15537, 'loss/train': 2.282769203186035} +02/24/2022 21:41:17 - INFO - codeparrot_training - Step 15538: {'lr': 0.00040811719357011257, 'samples': 7955968, 'steps': 15538, 'loss/train': 1.4894928932189941} +02/24/2022 21:41:23 - INFO - codeparrot_training - Step 15539: {'lr': 0.00040810451910001537, 'samples': 7956480, 'steps': 15539, 'loss/train': 0.8797029852867126} +02/24/2022 21:41:26 - INFO - codeparrot_training - Step 15540: {'lr': 0.00040809184395264867, 'samples': 7956992, 'steps': 15540, 'loss/train': 2.388523578643799} +02/24/2022 21:41:32 - INFO - codeparrot_training - Step 15541: {'lr': 0.0004080791681280667, 'samples': 7957504, 'steps': 15541, 'loss/train': 1.1261372566223145} +02/24/2022 21:41:35 - INFO - codeparrot_training - Step 15542: {'lr': 0.00040806649162632364, 'samples': 7958016, 'steps': 15542, 'loss/train': 1.809226632118225} +02/24/2022 21:41:41 - INFO - codeparrot_training - Step 15543: {'lr': 0.000408053814447474, 'samples': 7958528, 'steps': 15543, 'loss/train': 1.465032935142517} +02/24/2022 21:41:44 - INFO - codeparrot_training - Step 15544: {'lr': 0.00040804113659157203, 'samples': 7959040, 'steps': 15544, 'loss/train': 2.1486222743988037} +02/24/2022 21:41:50 - INFO - codeparrot_training - Step 15545: {'lr': 0.00040802845805867205, 'samples': 7959552, 'steps': 15545, 'loss/train': 2.0902352333068848} +02/24/2022 21:41:53 - INFO - codeparrot_training - Step 15546: {'lr': 0.0004080157788488282, 'samples': 7960064, 'steps': 15546, 'loss/train': 2.96724271774292} +02/24/2022 21:41:59 - INFO - codeparrot_training - Step 15547: {'lr': 0.0004080030989620951, 'samples': 7960576, 'steps': 15547, 'loss/train': 2.1055681705474854} +02/24/2022 21:42:02 - INFO - codeparrot_training - Step 15548: {'lr': 0.0004079904183985268, 'samples': 7961088, 'steps': 15548, 'loss/train': 1.768223762512207} +02/24/2022 21:42:08 - INFO - codeparrot_training - Step 15549: {'lr': 0.0004079777371581777, 'samples': 7961600, 'steps': 15549, 'loss/train': 1.4073724746704102} +02/24/2022 21:42:11 - INFO - codeparrot_training - Step 15550: {'lr': 0.00040796505524110215, 'samples': 7962112, 'steps': 15550, 'loss/train': 2.4263346195220947} +02/24/2022 21:42:19 - INFO - codeparrot_training - Step 15551: {'lr': 0.00040795237264735454, 'samples': 7962624, 'steps': 15551, 'loss/train': 2.130971670150757} +02/24/2022 21:42:22 - INFO - codeparrot_training - Step 15552: {'lr': 0.00040793968937698905, 'samples': 7963136, 'steps': 15552, 'loss/train': 1.4335674047470093} +02/24/2022 21:42:28 - INFO - codeparrot_training - Step 15553: {'lr': 0.00040792700543006014, 'samples': 7963648, 'steps': 15553, 'loss/train': 2.335895538330078} +02/24/2022 21:42:31 - INFO - codeparrot_training - Step 15554: {'lr': 0.000407914320806622, 'samples': 7964160, 'steps': 15554, 'loss/train': 1.5521836280822754} +02/24/2022 21:42:37 - INFO - codeparrot_training - Step 15555: {'lr': 0.0004079016355067291, 'samples': 7964672, 'steps': 15555, 'loss/train': 2.253251552581787} +02/24/2022 21:42:40 - INFO - codeparrot_training - Step 15556: {'lr': 0.0004078889495304357, 'samples': 7965184, 'steps': 15556, 'loss/train': 1.2411023378372192} +02/24/2022 21:42:46 - INFO - codeparrot_training - Step 15557: {'lr': 0.00040787626287779624, 'samples': 7965696, 'steps': 15557, 'loss/train': 1.9537452459335327} +02/24/2022 21:42:49 - INFO - codeparrot_training - Step 15558: {'lr': 0.0004078635755488649, 'samples': 7966208, 'steps': 15558, 'loss/train': 2.891079902648926} +02/24/2022 21:42:55 - INFO - codeparrot_training - Step 15559: {'lr': 0.00040785088754369627, 'samples': 7966720, 'steps': 15559, 'loss/train': 2.240905523300171} +02/24/2022 21:42:58 - INFO - codeparrot_training - Step 15560: {'lr': 0.00040783819886234445, 'samples': 7967232, 'steps': 15560, 'loss/train': 2.200089693069458} +02/24/2022 21:43:06 - INFO - codeparrot_training - Step 15561: {'lr': 0.000407825509504864, 'samples': 7967744, 'steps': 15561, 'loss/train': 0.8187501430511475} +02/24/2022 21:43:09 - INFO - codeparrot_training - Step 15562: {'lr': 0.00040781281947130897, 'samples': 7968256, 'steps': 15562, 'loss/train': 2.1469154357910156} +02/24/2022 21:43:15 - INFO - codeparrot_training - Step 15563: {'lr': 0.0004078001287617342, 'samples': 7968768, 'steps': 15563, 'loss/train': 2.6158902645111084} +02/24/2022 21:43:19 - INFO - codeparrot_training - Step 15564: {'lr': 0.0004077874373761936, 'samples': 7969280, 'steps': 15564, 'loss/train': 2.2594118118286133} +02/24/2022 21:43:24 - INFO - codeparrot_training - Step 15565: {'lr': 0.0004077747453147418, 'samples': 7969792, 'steps': 15565, 'loss/train': 3.2596192359924316} +02/24/2022 21:43:28 - INFO - codeparrot_training - Step 15566: {'lr': 0.0004077620525774331, 'samples': 7970304, 'steps': 15566, 'loss/train': 2.3571548461914062} +02/24/2022 21:43:31 - INFO - codeparrot_training - Step 15567: {'lr': 0.0004077493591643219, 'samples': 7970816, 'steps': 15567, 'loss/train': 1.845572590827942} +02/24/2022 21:43:37 - INFO - codeparrot_training - Step 15568: {'lr': 0.00040773666507546244, 'samples': 7971328, 'steps': 15568, 'loss/train': 2.420595169067383} +02/24/2022 21:43:40 - INFO - codeparrot_training - Step 15569: {'lr': 0.00040772397031090923, 'samples': 7971840, 'steps': 15569, 'loss/train': 2.204481840133667} +02/24/2022 21:43:46 - INFO - codeparrot_training - Step 15570: {'lr': 0.0004077112748707166, 'samples': 7972352, 'steps': 15570, 'loss/train': 1.5307304859161377} +02/24/2022 21:43:49 - INFO - codeparrot_training - Step 15571: {'lr': 0.000407698578754939, 'samples': 7972864, 'steps': 15571, 'loss/train': 1.8535393476486206} +02/24/2022 21:43:57 - INFO - codeparrot_training - Step 15572: {'lr': 0.0004076858819636307, 'samples': 7973376, 'steps': 15572, 'loss/train': 1.7659270763397217} +02/24/2022 21:44:02 - INFO - codeparrot_training - Step 15573: {'lr': 0.0004076731844968462, 'samples': 7973888, 'steps': 15573, 'loss/train': 2.3016152381896973} +02/24/2022 21:44:06 - INFO - codeparrot_training - Step 15574: {'lr': 0.00040766048635463984, 'samples': 7974400, 'steps': 15574, 'loss/train': 2.24153995513916} +02/24/2022 21:44:09 - INFO - codeparrot_training - Step 15575: {'lr': 0.000407647787537066, 'samples': 7974912, 'steps': 15575, 'loss/train': 2.6576950550079346} +02/24/2022 21:44:14 - INFO - codeparrot_training - Step 15576: {'lr': 0.00040763508804417904, 'samples': 7975424, 'steps': 15576, 'loss/train': 1.8348135948181152} +02/24/2022 21:44:20 - INFO - codeparrot_training - Step 15577: {'lr': 0.0004076223878760335, 'samples': 7975936, 'steps': 15577, 'loss/train': 1.6207209825515747} +02/24/2022 21:44:24 - INFO - codeparrot_training - Step 15578: {'lr': 0.0004076096870326837, 'samples': 7976448, 'steps': 15578, 'loss/train': 2.6876683235168457} +02/24/2022 21:44:29 - INFO - codeparrot_training - Step 15579: {'lr': 0.000407596985514184, 'samples': 7976960, 'steps': 15579, 'loss/train': 1.0152466297149658} +02/24/2022 21:44:33 - INFO - codeparrot_training - Step 15580: {'lr': 0.00040758428332058895, 'samples': 7977472, 'steps': 15580, 'loss/train': 2.602926731109619} +02/24/2022 21:44:38 - INFO - codeparrot_training - Step 15581: {'lr': 0.00040757158045195274, 'samples': 7977984, 'steps': 15581, 'loss/train': 2.8576900959014893} +02/24/2022 21:44:42 - INFO - codeparrot_training - Step 15582: {'lr': 0.00040755887690833005, 'samples': 7978496, 'steps': 15582, 'loss/train': 2.028103828430176} +02/24/2022 21:44:47 - INFO - codeparrot_training - Step 15583: {'lr': 0.00040754617268977503, 'samples': 7979008, 'steps': 15583, 'loss/train': 1.3518954515457153} +02/24/2022 21:44:51 - INFO - codeparrot_training - Step 15584: {'lr': 0.0004075334677963423, 'samples': 7979520, 'steps': 15584, 'loss/train': 2.0328445434570312} +02/24/2022 21:44:56 - INFO - codeparrot_training - Step 15585: {'lr': 0.00040752076222808623, 'samples': 7980032, 'steps': 15585, 'loss/train': 0.06505563855171204} +02/24/2022 21:45:00 - INFO - codeparrot_training - Step 15586: {'lr': 0.00040750805598506115, 'samples': 7980544, 'steps': 15586, 'loss/train': 2.275618076324463} +02/24/2022 21:45:07 - INFO - codeparrot_training - Step 15587: {'lr': 0.00040749534906732167, 'samples': 7981056, 'steps': 15587, 'loss/train': 1.9834320545196533} +02/24/2022 21:45:11 - INFO - codeparrot_training - Step 15588: {'lr': 0.0004074826414749221, 'samples': 7981568, 'steps': 15588, 'loss/train': 2.576552152633667} +02/24/2022 21:45:16 - INFO - codeparrot_training - Step 15589: {'lr': 0.00040746993320791685, 'samples': 7982080, 'steps': 15589, 'loss/train': 0.2182197868824005} +02/24/2022 21:45:20 - INFO - codeparrot_training - Step 15590: {'lr': 0.00040745722426636043, 'samples': 7982592, 'steps': 15590, 'loss/train': 1.162914514541626} +02/24/2022 21:45:23 - INFO - codeparrot_training - Step 15591: {'lr': 0.0004074445146503073, 'samples': 7983104, 'steps': 15591, 'loss/train': 2.2130420207977295} +02/24/2022 21:45:29 - INFO - codeparrot_training - Step 15592: {'lr': 0.00040743180435981187, 'samples': 7983616, 'steps': 15592, 'loss/train': 2.697943687438965} +02/24/2022 21:45:33 - INFO - codeparrot_training - Step 15593: {'lr': 0.0004074190933949286, 'samples': 7984128, 'steps': 15593, 'loss/train': 2.4169728755950928} +02/24/2022 21:45:38 - INFO - codeparrot_training - Step 15594: {'lr': 0.00040740638175571175, 'samples': 7984640, 'steps': 15594, 'loss/train': 1.820448875427246} +02/24/2022 21:45:41 - INFO - codeparrot_training - Step 15595: {'lr': 0.0004073936694422161, 'samples': 7985152, 'steps': 15595, 'loss/train': 0.09863793104887009} +02/24/2022 21:45:47 - INFO - codeparrot_training - Step 15596: {'lr': 0.0004073809564544959, 'samples': 7985664, 'steps': 15596, 'loss/train': 2.099989891052246} +02/24/2022 21:45:51 - INFO - codeparrot_training - Step 15597: {'lr': 0.0004073682427926057, 'samples': 7986176, 'steps': 15597, 'loss/train': 2.8123772144317627} +02/24/2022 21:45:58 - INFO - codeparrot_training - Step 15598: {'lr': 0.00040735552845659986, 'samples': 7986688, 'steps': 15598, 'loss/train': 1.7853034734725952} +02/24/2022 21:46:03 - INFO - codeparrot_training - Step 15599: {'lr': 0.00040734281344653294, 'samples': 7987200, 'steps': 15599, 'loss/train': 1.9252949953079224} +02/24/2022 21:46:07 - INFO - codeparrot_training - Step 15600: {'lr': 0.0004073300977624594, 'samples': 7987712, 'steps': 15600, 'loss/train': 1.0375285148620605} +02/24/2022 21:46:12 - INFO - codeparrot_training - Step 15601: {'lr': 0.0004073173814044336, 'samples': 7988224, 'steps': 15601, 'loss/train': 2.023422956466675} +02/24/2022 21:46:16 - INFO - codeparrot_training - Step 15602: {'lr': 0.0004073046643725101, 'samples': 7988736, 'steps': 15602, 'loss/train': 1.3903703689575195} +02/24/2022 21:46:21 - INFO - codeparrot_training - Step 15603: {'lr': 0.0004072919466667434, 'samples': 7989248, 'steps': 15603, 'loss/train': 3.008213758468628} +02/24/2022 21:46:25 - INFO - codeparrot_training - Step 15604: {'lr': 0.000407279228287188, 'samples': 7989760, 'steps': 15604, 'loss/train': 2.3658792972564697} +02/24/2022 21:46:30 - INFO - codeparrot_training - Step 15605: {'lr': 0.00040726650923389825, 'samples': 7990272, 'steps': 15605, 'loss/train': 3.367514133453369} +02/24/2022 21:46:34 - INFO - codeparrot_training - Step 15606: {'lr': 0.00040725378950692874, 'samples': 7990784, 'steps': 15606, 'loss/train': 2.2920644283294678} +02/24/2022 21:46:41 - INFO - codeparrot_training - Step 15607: {'lr': 0.0004072410691063339, 'samples': 7991296, 'steps': 15607, 'loss/train': 2.5761048793792725} +02/24/2022 21:46:44 - INFO - codeparrot_training - Step 15608: {'lr': 0.00040722834803216834, 'samples': 7991808, 'steps': 15608, 'loss/train': 2.1576735973358154} +02/24/2022 21:46:50 - INFO - codeparrot_training - Step 15609: {'lr': 0.0004072156262844864, 'samples': 7992320, 'steps': 15609, 'loss/train': 2.205754518508911} +02/24/2022 21:46:53 - INFO - codeparrot_training - Step 15610: {'lr': 0.0004072029038633426, 'samples': 7992832, 'steps': 15610, 'loss/train': 2.0891125202178955} +02/24/2022 21:46:59 - INFO - codeparrot_training - Step 15611: {'lr': 0.0004071901807687915, 'samples': 7993344, 'steps': 15611, 'loss/train': 1.7078381776809692} +02/24/2022 21:47:02 - INFO - codeparrot_training - Step 15612: {'lr': 0.0004071774570008876, 'samples': 7993856, 'steps': 15612, 'loss/train': 1.4559953212738037} +02/24/2022 21:47:08 - INFO - codeparrot_training - Step 15613: {'lr': 0.00040716473255968534, 'samples': 7994368, 'steps': 15613, 'loss/train': 2.5761075019836426} +02/24/2022 21:47:11 - INFO - codeparrot_training - Step 15614: {'lr': 0.0004071520074452393, 'samples': 7994880, 'steps': 15614, 'loss/train': 1.5095183849334717} +02/24/2022 21:47:17 - INFO - codeparrot_training - Step 15615: {'lr': 0.000407139281657604, 'samples': 7995392, 'steps': 15615, 'loss/train': 2.1408703327178955} +02/24/2022 21:47:20 - INFO - codeparrot_training - Step 15616: {'lr': 0.0004071265551968338, 'samples': 7995904, 'steps': 15616, 'loss/train': 2.1084792613983154} +02/24/2022 21:47:28 - INFO - codeparrot_training - Step 15617: {'lr': 0.0004071138280629835, 'samples': 7996416, 'steps': 15617, 'loss/train': 2.583139657974243} +02/24/2022 21:47:31 - INFO - codeparrot_training - Step 15618: {'lr': 0.00040710110025610733, 'samples': 7996928, 'steps': 15618, 'loss/train': 1.5119836330413818} +02/24/2022 21:47:37 - INFO - codeparrot_training - Step 15619: {'lr': 0.00040708837177626, 'samples': 7997440, 'steps': 15619, 'loss/train': 2.2536675930023193} +02/24/2022 21:47:40 - INFO - codeparrot_training - Step 15620: {'lr': 0.00040707564262349594, 'samples': 7997952, 'steps': 15620, 'loss/train': 2.496870756149292} +02/24/2022 21:47:46 - INFO - codeparrot_training - Step 15621: {'lr': 0.00040706291279786965, 'samples': 7998464, 'steps': 15621, 'loss/train': 1.6723992824554443} +02/24/2022 21:47:49 - INFO - codeparrot_training - Step 15622: {'lr': 0.0004070501822994358, 'samples': 7998976, 'steps': 15622, 'loss/train': 2.4864156246185303} +02/24/2022 21:47:55 - INFO - codeparrot_training - Step 15623: {'lr': 0.00040703745112824876, 'samples': 7999488, 'steps': 15623, 'loss/train': 2.650505781173706} +02/24/2022 21:47:58 - INFO - codeparrot_training - Step 15624: {'lr': 0.00040702471928436316, 'samples': 8000000, 'steps': 15624, 'loss/train': 2.7441656589508057} +02/24/2022 21:48:04 - INFO - codeparrot_training - Step 15625: {'lr': 0.00040701198676783355, 'samples': 8000512, 'steps': 15625, 'loss/train': 1.6590642929077148} +02/24/2022 21:48:07 - INFO - codeparrot_training - Step 15626: {'lr': 0.00040699925357871446, 'samples': 8001024, 'steps': 15626, 'loss/train': 2.945499897003174} +02/24/2022 21:48:13 - INFO - codeparrot_training - Step 15627: {'lr': 0.00040698651971706037, 'samples': 8001536, 'steps': 15627, 'loss/train': 0.9308986067771912} +02/24/2022 21:48:16 - INFO - codeparrot_training - Step 15628: {'lr': 0.00040697378518292593, 'samples': 8002048, 'steps': 15628, 'loss/train': 2.228546619415283} +02/24/2022 21:48:22 - INFO - codeparrot_training - Step 15629: {'lr': 0.0004069610499763656, 'samples': 8002560, 'steps': 15629, 'loss/train': 1.2791314125061035} +02/24/2022 21:48:26 - INFO - codeparrot_training - Step 15630: {'lr': 0.00040694831409743406, 'samples': 8003072, 'steps': 15630, 'loss/train': 2.3972809314727783} +02/24/2022 21:48:31 - INFO - codeparrot_training - Step 15631: {'lr': 0.00040693557754618566, 'samples': 8003584, 'steps': 15631, 'loss/train': 2.058864116668701} +02/24/2022 21:48:35 - INFO - codeparrot_training - Step 15632: {'lr': 0.00040692284032267515, 'samples': 8004096, 'steps': 15632, 'loss/train': 2.842947244644165} +02/24/2022 21:48:41 - INFO - codeparrot_training - Step 15633: {'lr': 0.00040691010242695696, 'samples': 8004608, 'steps': 15633, 'loss/train': 2.114063024520874} +02/24/2022 21:48:45 - INFO - codeparrot_training - Step 15634: {'lr': 0.00040689736385908574, 'samples': 8005120, 'steps': 15634, 'loss/train': 2.1035783290863037} +02/24/2022 21:48:50 - INFO - codeparrot_training - Step 15635: {'lr': 0.0004068846246191161, 'samples': 8005632, 'steps': 15635, 'loss/train': 1.6431081295013428} +02/24/2022 21:48:54 - INFO - codeparrot_training - Step 15636: {'lr': 0.00040687188470710245, 'samples': 8006144, 'steps': 15636, 'loss/train': 1.4904038906097412} +02/24/2022 21:49:00 - INFO - codeparrot_training - Step 15637: {'lr': 0.00040685914412309955, 'samples': 8006656, 'steps': 15637, 'loss/train': 2.2176451683044434} +02/24/2022 21:49:04 - INFO - codeparrot_training - Step 15638: {'lr': 0.0004068464028671618, 'samples': 8007168, 'steps': 15638, 'loss/train': 2.1982195377349854} +02/24/2022 21:49:07 - INFO - codeparrot_training - Step 15639: {'lr': 0.00040683366093934394, 'samples': 8007680, 'steps': 15639, 'loss/train': 2.6619622707366943} +02/24/2022 21:49:13 - INFO - codeparrot_training - Step 15640: {'lr': 0.0004068209183397004, 'samples': 8008192, 'steps': 15640, 'loss/train': 1.3103344440460205} +02/24/2022 21:49:16 - INFO - codeparrot_training - Step 15641: {'lr': 0.0004068081750682859, 'samples': 8008704, 'steps': 15641, 'loss/train': 1.9242271184921265} +02/24/2022 21:49:22 - INFO - codeparrot_training - Step 15642: {'lr': 0.00040679543112515494, 'samples': 8009216, 'steps': 15642, 'loss/train': 1.9095739126205444} +02/24/2022 21:49:25 - INFO - codeparrot_training - Step 15643: {'lr': 0.00040678268651036213, 'samples': 8009728, 'steps': 15643, 'loss/train': 2.1943652629852295} +02/24/2022 21:49:31 - INFO - codeparrot_training - Step 15644: {'lr': 0.0004067699412239622, 'samples': 8010240, 'steps': 15644, 'loss/train': 0.7311398983001709} +02/24/2022 21:49:35 - INFO - codeparrot_training - Step 15645: {'lr': 0.00040675719526600947, 'samples': 8010752, 'steps': 15645, 'loss/train': 1.8090782165527344} +02/24/2022 21:49:40 - INFO - codeparrot_training - Step 15646: {'lr': 0.0004067444486365587, 'samples': 8011264, 'steps': 15646, 'loss/train': 2.4807727336883545} +02/24/2022 21:49:44 - INFO - codeparrot_training - Step 15647: {'lr': 0.00040673170133566453, 'samples': 8011776, 'steps': 15647, 'loss/train': 1.0575315952301025} +02/24/2022 21:49:49 - INFO - codeparrot_training - Step 15648: {'lr': 0.0004067189533633815, 'samples': 8012288, 'steps': 15648, 'loss/train': 1.96336829662323} +02/24/2022 21:49:53 - INFO - codeparrot_training - Step 15649: {'lr': 0.00040670620471976426, 'samples': 8012800, 'steps': 15649, 'loss/train': 1.8861486911773682} +02/24/2022 21:49:58 - INFO - codeparrot_training - Step 15650: {'lr': 0.0004066934554048674, 'samples': 8013312, 'steps': 15650, 'loss/train': 3.094194173812866} +02/24/2022 21:50:02 - INFO - codeparrot_training - Step 15651: {'lr': 0.00040668070541874553, 'samples': 8013824, 'steps': 15651, 'loss/train': 2.193488836288452} +02/24/2022 21:50:07 - INFO - codeparrot_training - Step 15652: {'lr': 0.00040666795476145326, 'samples': 8014336, 'steps': 15652, 'loss/train': 2.2909913063049316} +02/24/2022 21:50:11 - INFO - codeparrot_training - Step 15653: {'lr': 0.00040665520343304516, 'samples': 8014848, 'steps': 15653, 'loss/train': 0.6748460531234741} +02/24/2022 21:50:16 - INFO - codeparrot_training - Step 15654: {'lr': 0.00040664245143357604, 'samples': 8015360, 'steps': 15654, 'loss/train': 2.3541500568389893} +02/24/2022 21:50:20 - INFO - codeparrot_training - Step 15655: {'lr': 0.0004066296987631003, 'samples': 8015872, 'steps': 15655, 'loss/train': 2.6186037063598633} +02/24/2022 21:50:26 - INFO - codeparrot_training - Step 15656: {'lr': 0.0004066169454216727, 'samples': 8016384, 'steps': 15656, 'loss/train': 2.1786561012268066} +02/24/2022 21:50:29 - INFO - codeparrot_training - Step 15657: {'lr': 0.00040660419140934787, 'samples': 8016896, 'steps': 15657, 'loss/train': 2.1989779472351074} +02/24/2022 21:50:35 - INFO - codeparrot_training - Step 15658: {'lr': 0.0004065914367261804, 'samples': 8017408, 'steps': 15658, 'loss/train': 2.0224032402038574} +02/24/2022 21:50:38 - INFO - codeparrot_training - Step 15659: {'lr': 0.00040657868137222486, 'samples': 8017920, 'steps': 15659, 'loss/train': 2.074057102203369} +02/24/2022 21:50:44 - INFO - codeparrot_training - Step 15660: {'lr': 0.000406565925347536, 'samples': 8018432, 'steps': 15660, 'loss/train': 2.2125167846679688} +02/24/2022 21:50:47 - INFO - codeparrot_training - Step 15661: {'lr': 0.0004065531686521685, 'samples': 8018944, 'steps': 15661, 'loss/train': 1.2297435998916626} +02/24/2022 21:50:53 - INFO - codeparrot_training - Step 15662: {'lr': 0.00040654041128617693, 'samples': 8019456, 'steps': 15662, 'loss/train': 0.8650669455528259} +02/24/2022 21:50:56 - INFO - codeparrot_training - Step 15663: {'lr': 0.0004065276532496158, 'samples': 8019968, 'steps': 15663, 'loss/train': 2.5333142280578613} +02/24/2022 21:51:03 - INFO - codeparrot_training - Step 15664: {'lr': 0.0004065148945425401, 'samples': 8020480, 'steps': 15664, 'loss/train': 1.7127292156219482} +02/24/2022 21:51:06 - INFO - codeparrot_training - Step 15665: {'lr': 0.0004065021351650042, 'samples': 8020992, 'steps': 15665, 'loss/train': 1.736020565032959} +02/24/2022 21:51:12 - INFO - codeparrot_training - Step 15666: {'lr': 0.00040648937511706285, 'samples': 8021504, 'steps': 15666, 'loss/train': 2.829667329788208} +02/24/2022 21:51:15 - INFO - codeparrot_training - Step 15667: {'lr': 0.0004064766143987707, 'samples': 8022016, 'steps': 15667, 'loss/train': 0.9548608660697937} +02/24/2022 21:51:21 - INFO - codeparrot_training - Step 15668: {'lr': 0.00040646385301018243, 'samples': 8022528, 'steps': 15668, 'loss/train': 3.3888297080993652} +02/24/2022 21:51:24 - INFO - codeparrot_training - Step 15669: {'lr': 0.0004064510909513527, 'samples': 8023040, 'steps': 15669, 'loss/train': 2.4436657428741455} +02/24/2022 21:51:30 - INFO - codeparrot_training - Step 15670: {'lr': 0.00040643832822233615, 'samples': 8023552, 'steps': 15670, 'loss/train': 2.4292538166046143} +02/24/2022 21:51:33 - INFO - codeparrot_training - Step 15671: {'lr': 0.0004064255648231875, 'samples': 8024064, 'steps': 15671, 'loss/train': 2.216278076171875} +02/24/2022 21:51:39 - INFO - codeparrot_training - Step 15672: {'lr': 0.00040641280075396144, 'samples': 8024576, 'steps': 15672, 'loss/train': 1.429274559020996} +02/24/2022 21:51:42 - INFO - codeparrot_training - Step 15673: {'lr': 0.00040640003601471255, 'samples': 8025088, 'steps': 15673, 'loss/train': 3.0734481811523438} +02/24/2022 21:51:48 - INFO - codeparrot_training - Step 15674: {'lr': 0.00040638727060549556, 'samples': 8025600, 'steps': 15674, 'loss/train': 1.9044876098632812} +02/24/2022 21:51:52 - INFO - codeparrot_training - Step 15675: {'lr': 0.00040637450452636517, 'samples': 8026112, 'steps': 15675, 'loss/train': 1.3907132148742676} +02/24/2022 21:51:57 - INFO - codeparrot_training - Step 15676: {'lr': 0.00040636173777737613, 'samples': 8026624, 'steps': 15676, 'loss/train': 0.8431764245033264} +02/24/2022 21:52:01 - INFO - codeparrot_training - Step 15677: {'lr': 0.000406348970358583, 'samples': 8027136, 'steps': 15677, 'loss/train': 1.6959254741668701} +02/24/2022 21:52:06 - INFO - codeparrot_training - Step 15678: {'lr': 0.00040633620227004054, 'samples': 8027648, 'steps': 15678, 'loss/train': 2.103938579559326} +02/24/2022 21:52:10 - INFO - codeparrot_training - Step 15679: {'lr': 0.0004063234335118033, 'samples': 8028160, 'steps': 15679, 'loss/train': 1.601721167564392} +02/24/2022 21:52:15 - INFO - codeparrot_training - Step 15680: {'lr': 0.00040631066408392636, 'samples': 8028672, 'steps': 15680, 'loss/train': 0.8082705736160278} +02/24/2022 21:52:19 - INFO - codeparrot_training - Step 15681: {'lr': 0.000406297893986464, 'samples': 8029184, 'steps': 15681, 'loss/train': 1.3983267545700073} +02/24/2022 21:52:25 - INFO - codeparrot_training - Step 15682: {'lr': 0.0004062851232194711, 'samples': 8029696, 'steps': 15682, 'loss/train': 2.9970362186431885} +02/24/2022 21:52:29 - INFO - codeparrot_training - Step 15683: {'lr': 0.00040627235178300236, 'samples': 8030208, 'steps': 15683, 'loss/train': 2.2244744300842285} +02/24/2022 21:52:35 - INFO - codeparrot_training - Step 15684: {'lr': 0.0004062595796771126, 'samples': 8030720, 'steps': 15684, 'loss/train': 1.7679158449172974} +02/24/2022 21:52:38 - INFO - codeparrot_training - Step 15685: {'lr': 0.0004062468069018563, 'samples': 8031232, 'steps': 15685, 'loss/train': 1.208449363708496} +02/24/2022 21:52:42 - INFO - codeparrot_training - Step 15686: {'lr': 0.0004062340334572883, 'samples': 8031744, 'steps': 15686, 'loss/train': 1.7850629091262817} +02/24/2022 21:52:47 - INFO - codeparrot_training - Step 15687: {'lr': 0.0004062212593434634, 'samples': 8032256, 'steps': 15687, 'loss/train': 2.580045700073242} +02/24/2022 21:52:51 - INFO - codeparrot_training - Step 15688: {'lr': 0.0004062084845604361, 'samples': 8032768, 'steps': 15688, 'loss/train': 1.4563696384429932} +02/24/2022 21:52:56 - INFO - codeparrot_training - Step 15689: {'lr': 0.00040619570910826135, 'samples': 8033280, 'steps': 15689, 'loss/train': 2.5587480068206787} +02/24/2022 21:53:00 - INFO - codeparrot_training - Step 15690: {'lr': 0.0004061829329869937, 'samples': 8033792, 'steps': 15690, 'loss/train': 1.687915563583374} +02/24/2022 21:53:06 - INFO - codeparrot_training - Step 15691: {'lr': 0.0004061701561966881, 'samples': 8034304, 'steps': 15691, 'loss/train': 2.7886128425598145} +02/24/2022 21:53:09 - INFO - codeparrot_training - Step 15692: {'lr': 0.000406157378737399, 'samples': 8034816, 'steps': 15692, 'loss/train': 2.4578754901885986} +02/24/2022 21:53:16 - INFO - codeparrot_training - Step 15693: {'lr': 0.00040614460060918136, 'samples': 8035328, 'steps': 15693, 'loss/train': 1.6393136978149414} +02/24/2022 21:53:20 - INFO - codeparrot_training - Step 15694: {'lr': 0.0004061318218120898, 'samples': 8035840, 'steps': 15694, 'loss/train': 2.4925897121429443} +02/24/2022 21:53:25 - INFO - codeparrot_training - Step 15695: {'lr': 0.000406119042346179, 'samples': 8036352, 'steps': 15695, 'loss/train': 2.0249102115631104} +02/24/2022 21:53:29 - INFO - codeparrot_training - Step 15696: {'lr': 0.0004061062622115039, 'samples': 8036864, 'steps': 15696, 'loss/train': 2.170201539993286} +02/24/2022 21:53:34 - INFO - codeparrot_training - Step 15697: {'lr': 0.0004060934814081192, 'samples': 8037376, 'steps': 15697, 'loss/train': 2.5346755981445312} +02/24/2022 21:53:38 - INFO - codeparrot_training - Step 15698: {'lr': 0.00040608069993607954, 'samples': 8037888, 'steps': 15698, 'loss/train': 1.5353726148605347} +02/24/2022 21:53:43 - INFO - codeparrot_training - Step 15699: {'lr': 0.00040606791779543966, 'samples': 8038400, 'steps': 15699, 'loss/train': 2.2519142627716064} +02/24/2022 21:53:47 - INFO - codeparrot_training - Step 15700: {'lr': 0.00040605513498625443, 'samples': 8038912, 'steps': 15700, 'loss/train': 2.1619937419891357} +02/24/2022 21:53:52 - INFO - codeparrot_training - Step 15701: {'lr': 0.00040604235150857855, 'samples': 8039424, 'steps': 15701, 'loss/train': 2.3589928150177} +02/24/2022 21:53:56 - INFO - codeparrot_training - Step 15702: {'lr': 0.00040602956736246677, 'samples': 8039936, 'steps': 15702, 'loss/train': 2.3140981197357178} +02/24/2022 21:54:02 - INFO - codeparrot_training - Step 15703: {'lr': 0.00040601678254797394, 'samples': 8040448, 'steps': 15703, 'loss/train': 2.0368294715881348} +02/24/2022 21:54:05 - INFO - codeparrot_training - Step 15704: {'lr': 0.00040600399706515466, 'samples': 8040960, 'steps': 15704, 'loss/train': 2.1878161430358887} +02/24/2022 21:54:11 - INFO - codeparrot_training - Step 15705: {'lr': 0.0004059912109140638, 'samples': 8041472, 'steps': 15705, 'loss/train': 2.316575288772583} +02/24/2022 21:54:14 - INFO - codeparrot_training - Step 15706: {'lr': 0.00040597842409475615, 'samples': 8041984, 'steps': 15706, 'loss/train': 1.708521842956543} +02/24/2022 21:54:20 - INFO - codeparrot_training - Step 15707: {'lr': 0.00040596563660728646, 'samples': 8042496, 'steps': 15707, 'loss/train': 1.4838895797729492} +02/24/2022 21:54:23 - INFO - codeparrot_training - Step 15708: {'lr': 0.00040595284845170956, 'samples': 8043008, 'steps': 15708, 'loss/train': 1.8247803449630737} +02/24/2022 21:54:29 - INFO - codeparrot_training - Step 15709: {'lr': 0.0004059400596280801, 'samples': 8043520, 'steps': 15709, 'loss/train': 3.237764596939087} +02/24/2022 21:54:32 - INFO - codeparrot_training - Step 15710: {'lr': 0.00040592727013645297, 'samples': 8044032, 'steps': 15710, 'loss/train': 1.5520082712173462} +02/24/2022 21:54:38 - INFO - codeparrot_training - Step 15711: {'lr': 0.0004059144799768829, 'samples': 8044544, 'steps': 15711, 'loss/train': 1.7852360010147095} +02/24/2022 21:54:41 - INFO - codeparrot_training - Step 15712: {'lr': 0.00040590168914942477, 'samples': 8045056, 'steps': 15712, 'loss/train': 1.7346484661102295} +02/24/2022 21:54:48 - INFO - codeparrot_training - Step 15713: {'lr': 0.0004058888976541333, 'samples': 8045568, 'steps': 15713, 'loss/train': 2.3986096382141113} +02/24/2022 21:54:51 - INFO - codeparrot_training - Step 15714: {'lr': 0.00040587610549106326, 'samples': 8046080, 'steps': 15714, 'loss/train': 1.5621291399002075} +02/24/2022 21:54:57 - INFO - codeparrot_training - Step 15715: {'lr': 0.00040586331266026943, 'samples': 8046592, 'steps': 15715, 'loss/train': 2.487247943878174} +02/24/2022 21:55:00 - INFO - codeparrot_training - Step 15716: {'lr': 0.0004058505191618067, 'samples': 8047104, 'steps': 15716, 'loss/train': 3.226485252380371} +02/24/2022 21:55:06 - INFO - codeparrot_training - Step 15717: {'lr': 0.0004058377249957299, 'samples': 8047616, 'steps': 15717, 'loss/train': 9.076168060302734} +02/24/2022 21:55:09 - INFO - codeparrot_training - Step 15718: {'lr': 0.0004058249301620937, 'samples': 8048128, 'steps': 15718, 'loss/train': 1.2968710660934448} +02/24/2022 21:55:15 - INFO - codeparrot_training - Step 15719: {'lr': 0.00040581213466095304, 'samples': 8048640, 'steps': 15719, 'loss/train': 1.729250192642212} +02/24/2022 21:55:18 - INFO - codeparrot_training - Step 15720: {'lr': 0.0004057993384923626, 'samples': 8049152, 'steps': 15720, 'loss/train': 2.3715617656707764} +02/24/2022 21:55:24 - INFO - codeparrot_training - Step 15721: {'lr': 0.0004057865416563773, 'samples': 8049664, 'steps': 15721, 'loss/train': 2.581035852432251} +02/24/2022 21:55:27 - INFO - codeparrot_training - Step 15722: {'lr': 0.0004057737441530519, 'samples': 8050176, 'steps': 15722, 'loss/train': 1.50064218044281} +02/24/2022 21:55:33 - INFO - codeparrot_training - Step 15723: {'lr': 0.0004057609459824412, 'samples': 8050688, 'steps': 15723, 'loss/train': 3.061279058456421} +02/24/2022 21:55:36 - INFO - codeparrot_training - Step 15724: {'lr': 0.00040574814714460015, 'samples': 8051200, 'steps': 15724, 'loss/train': 2.3681302070617676} +02/24/2022 21:55:42 - INFO - codeparrot_training - Step 15725: {'lr': 0.0004057353476395835, 'samples': 8051712, 'steps': 15725, 'loss/train': 2.540001153945923} +02/24/2022 21:55:45 - INFO - codeparrot_training - Step 15726: {'lr': 0.00040572254746744607, 'samples': 8052224, 'steps': 15726, 'loss/train': 2.6839778423309326} +02/24/2022 21:55:52 - INFO - codeparrot_training - Step 15727: {'lr': 0.00040570974662824266, 'samples': 8052736, 'steps': 15727, 'loss/train': 1.0916556119918823} +02/24/2022 21:55:55 - INFO - codeparrot_training - Step 15728: {'lr': 0.00040569694512202815, 'samples': 8053248, 'steps': 15728, 'loss/train': 2.3467535972595215} +02/24/2022 21:56:01 - INFO - codeparrot_training - Step 15729: {'lr': 0.00040568414294885736, 'samples': 8053760, 'steps': 15729, 'loss/train': 1.5626912117004395} +02/24/2022 21:56:04 - INFO - codeparrot_training - Step 15730: {'lr': 0.00040567134010878513, 'samples': 8054272, 'steps': 15730, 'loss/train': 2.3129680156707764} +02/24/2022 21:56:10 - INFO - codeparrot_training - Step 15731: {'lr': 0.00040565853660186633, 'samples': 8054784, 'steps': 15731, 'loss/train': 2.3960771560668945} +02/24/2022 21:56:13 - INFO - codeparrot_training - Step 15732: {'lr': 0.0004056457324281557, 'samples': 8055296, 'steps': 15732, 'loss/train': 1.1214845180511475} +02/24/2022 21:56:19 - INFO - codeparrot_training - Step 15733: {'lr': 0.0004056329275877083, 'samples': 8055808, 'steps': 15733, 'loss/train': 2.5366082191467285} +02/24/2022 21:56:22 - INFO - codeparrot_training - Step 15734: {'lr': 0.00040562012208057886, 'samples': 8056320, 'steps': 15734, 'loss/train': 2.354208469390869} +02/24/2022 21:56:28 - INFO - codeparrot_training - Step 15735: {'lr': 0.0004056073159068222, 'samples': 8056832, 'steps': 15735, 'loss/train': 1.8322428464889526} +02/24/2022 21:56:31 - INFO - codeparrot_training - Step 15736: {'lr': 0.0004055945090664931, 'samples': 8057344, 'steps': 15736, 'loss/train': 1.8507310152053833} +02/24/2022 21:56:37 - INFO - codeparrot_training - Step 15737: {'lr': 0.0004055817015596467, 'samples': 8057856, 'steps': 15737, 'loss/train': 0.676263153553009} +02/24/2022 21:56:40 - INFO - codeparrot_training - Step 15738: {'lr': 0.00040556889338633754, 'samples': 8058368, 'steps': 15738, 'loss/train': 2.2110085487365723} +02/24/2022 21:56:47 - INFO - codeparrot_training - Step 15739: {'lr': 0.00040555608454662074, 'samples': 8058880, 'steps': 15739, 'loss/train': 2.2826342582702637} +02/24/2022 21:56:50 - INFO - codeparrot_training - Step 15740: {'lr': 0.00040554327504055106, 'samples': 8059392, 'steps': 15740, 'loss/train': 4.769762992858887} +02/24/2022 21:56:55 - INFO - codeparrot_training - Step 15741: {'lr': 0.00040553046486818336, 'samples': 8059904, 'steps': 15741, 'loss/train': 0.9186894297599792} +02/24/2022 21:56:59 - INFO - codeparrot_training - Step 15742: {'lr': 0.0004055176540295725, 'samples': 8060416, 'steps': 15742, 'loss/train': 2.7471678256988525} +02/24/2022 21:57:04 - INFO - codeparrot_training - Step 15743: {'lr': 0.00040550484252477347, 'samples': 8060928, 'steps': 15743, 'loss/train': 0.5740983486175537} +02/24/2022 21:57:08 - INFO - codeparrot_training - Step 15744: {'lr': 0.00040549203035384105, 'samples': 8061440, 'steps': 15744, 'loss/train': 1.8564348220825195} +02/24/2022 21:57:13 - INFO - codeparrot_training - Step 15745: {'lr': 0.0004054792175168301, 'samples': 8061952, 'steps': 15745, 'loss/train': 2.8452157974243164} +02/24/2022 21:57:17 - INFO - codeparrot_training - Step 15746: {'lr': 0.00040546640401379556, 'samples': 8062464, 'steps': 15746, 'loss/train': 1.6108318567276} +02/24/2022 21:57:22 - INFO - codeparrot_training - Step 15747: {'lr': 0.0004054535898447924, 'samples': 8062976, 'steps': 15747, 'loss/train': 1.845558524131775} +02/24/2022 21:57:26 - INFO - codeparrot_training - Step 15748: {'lr': 0.0004054407750098753, 'samples': 8063488, 'steps': 15748, 'loss/train': 1.8081797361373901} +02/24/2022 21:57:33 - INFO - codeparrot_training - Step 15749: {'lr': 0.0004054279595090994, 'samples': 8064000, 'steps': 15749, 'loss/train': 2.148766279220581} +02/24/2022 21:57:36 - INFO - codeparrot_training - Step 15750: {'lr': 0.0004054151433425194, 'samples': 8064512, 'steps': 15750, 'loss/train': 2.715588331222534} +02/24/2022 21:57:41 - INFO - codeparrot_training - Step 15751: {'lr': 0.00040540232651019027, 'samples': 8065024, 'steps': 15751, 'loss/train': 2.917924404144287} +02/24/2022 21:57:45 - INFO - codeparrot_training - Step 15752: {'lr': 0.0004053895090121669, 'samples': 8065536, 'steps': 15752, 'loss/train': 1.883616328239441} +02/24/2022 21:57:51 - INFO - codeparrot_training - Step 15753: {'lr': 0.00040537669084850426, 'samples': 8066048, 'steps': 15753, 'loss/train': 1.8007880449295044} +02/24/2022 21:57:56 - INFO - codeparrot_training - Step 15754: {'lr': 0.0004053638720192572, 'samples': 8066560, 'steps': 15754, 'loss/train': 2.740985155105591} +02/24/2022 21:58:00 - INFO - codeparrot_training - Step 15755: {'lr': 0.00040535105252448067, 'samples': 8067072, 'steps': 15755, 'loss/train': 1.8929885625839233} +02/24/2022 21:58:05 - INFO - codeparrot_training - Step 15756: {'lr': 0.0004053382323642295, 'samples': 8067584, 'steps': 15756, 'loss/train': 0.9658442735671997} +02/24/2022 21:58:09 - INFO - codeparrot_training - Step 15757: {'lr': 0.0004053254115385587, 'samples': 8068096, 'steps': 15757, 'loss/train': 1.908900260925293} +02/24/2022 21:58:15 - INFO - codeparrot_training - Step 15758: {'lr': 0.00040531259004752317, 'samples': 8068608, 'steps': 15758, 'loss/train': 2.4591195583343506} +02/24/2022 21:58:19 - INFO - codeparrot_training - Step 15759: {'lr': 0.00040529976789117786, 'samples': 8069120, 'steps': 15759, 'loss/train': 1.210711121559143} +02/24/2022 21:58:24 - INFO - codeparrot_training - Step 15760: {'lr': 0.0004052869450695776, 'samples': 8069632, 'steps': 15760, 'loss/train': 1.6907556056976318} +02/24/2022 21:58:28 - INFO - codeparrot_training - Step 15761: {'lr': 0.00040527412158277744, 'samples': 8070144, 'steps': 15761, 'loss/train': 1.694442868232727} +02/24/2022 21:58:34 - INFO - codeparrot_training - Step 15762: {'lr': 0.00040526129743083216, 'samples': 8070656, 'steps': 15762, 'loss/train': 1.5524027347564697} +02/24/2022 21:58:37 - INFO - codeparrot_training - Step 15763: {'lr': 0.0004052484726137968, 'samples': 8071168, 'steps': 15763, 'loss/train': 2.5131072998046875} +02/24/2022 21:58:43 - INFO - codeparrot_training - Step 15764: {'lr': 0.00040523564713172634, 'samples': 8071680, 'steps': 15764, 'loss/train': 1.8341569900512695} +02/24/2022 21:58:46 - INFO - codeparrot_training - Step 15765: {'lr': 0.0004052228209846756, 'samples': 8072192, 'steps': 15765, 'loss/train': 1.048017144203186} +02/24/2022 21:58:52 - INFO - codeparrot_training - Step 15766: {'lr': 0.0004052099941726996, 'samples': 8072704, 'steps': 15766, 'loss/train': 2.483851194381714} +02/24/2022 21:58:55 - INFO - codeparrot_training - Step 15767: {'lr': 0.0004051971666958533, 'samples': 8073216, 'steps': 15767, 'loss/train': 2.489689588546753} +02/24/2022 21:59:01 - INFO - codeparrot_training - Step 15768: {'lr': 0.0004051843385541916, 'samples': 8073728, 'steps': 15768, 'loss/train': 2.7603371143341064} +02/24/2022 21:59:04 - INFO - codeparrot_training - Step 15769: {'lr': 0.00040517150974776945, 'samples': 8074240, 'steps': 15769, 'loss/train': 1.8423763513565063} +02/24/2022 21:59:10 - INFO - codeparrot_training - Step 15770: {'lr': 0.00040515868027664185, 'samples': 8074752, 'steps': 15770, 'loss/train': 3.674001693725586} +02/24/2022 21:59:13 - INFO - codeparrot_training - Step 15771: {'lr': 0.00040514585014086367, 'samples': 8075264, 'steps': 15771, 'loss/train': 1.7027298212051392} +02/24/2022 21:59:19 - INFO - codeparrot_training - Step 15772: {'lr': 0.00040513301934049005, 'samples': 8075776, 'steps': 15772, 'loss/train': 2.7007603645324707} +02/24/2022 21:59:22 - INFO - codeparrot_training - Step 15773: {'lr': 0.00040512018787557574, 'samples': 8076288, 'steps': 15773, 'loss/train': 1.7654494047164917} +02/24/2022 21:59:29 - INFO - codeparrot_training - Step 15774: {'lr': 0.0004051073557461759, 'samples': 8076800, 'steps': 15774, 'loss/train': 0.9119476675987244} +02/24/2022 21:59:32 - INFO - codeparrot_training - Step 15775: {'lr': 0.00040509452295234527, 'samples': 8077312, 'steps': 15775, 'loss/train': 0.22977015376091003} +02/24/2022 21:59:39 - INFO - codeparrot_training - Step 15776: {'lr': 0.00040508168949413904, 'samples': 8077824, 'steps': 15776, 'loss/train': 2.417126417160034} +02/24/2022 21:59:42 - INFO - codeparrot_training - Step 15777: {'lr': 0.0004050688553716121, 'samples': 8078336, 'steps': 15777, 'loss/train': 2.347332715988159} +02/24/2022 21:59:46 - INFO - codeparrot_training - Step 15778: {'lr': 0.0004050560205848194, 'samples': 8078848, 'steps': 15778, 'loss/train': 1.4732513427734375} +02/24/2022 21:59:51 - INFO - codeparrot_training - Step 15779: {'lr': 0.0004050431851338159, 'samples': 8079360, 'steps': 15779, 'loss/train': 1.280692219734192} +02/24/2022 21:59:55 - INFO - codeparrot_training - Step 15780: {'lr': 0.00040503034901865666, 'samples': 8079872, 'steps': 15780, 'loss/train': 2.910001277923584} +02/24/2022 22:00:00 - INFO - codeparrot_training - Step 15781: {'lr': 0.00040501751223939665, 'samples': 8080384, 'steps': 15781, 'loss/train': 1.4696500301361084} +02/24/2022 22:00:04 - INFO - codeparrot_training - Step 15782: {'lr': 0.00040500467479609084, 'samples': 8080896, 'steps': 15782, 'loss/train': 1.9289052486419678} +02/24/2022 22:00:09 - INFO - codeparrot_training - Step 15783: {'lr': 0.00040499183668879415, 'samples': 8081408, 'steps': 15783, 'loss/train': 2.3125689029693604} +02/24/2022 22:00:13 - INFO - codeparrot_training - Step 15784: {'lr': 0.0004049789979175617, 'samples': 8081920, 'steps': 15784, 'loss/train': 2.5349185466766357} +02/24/2022 22:00:19 - INFO - codeparrot_training - Step 15785: {'lr': 0.00040496615848244845, 'samples': 8082432, 'steps': 15785, 'loss/train': 2.0821986198425293} +02/24/2022 22:00:22 - INFO - codeparrot_training - Step 15786: {'lr': 0.00040495331838350933, 'samples': 8082944, 'steps': 15786, 'loss/train': 2.451448917388916} +02/24/2022 22:00:28 - INFO - codeparrot_training - Step 15787: {'lr': 0.00040494047762079953, 'samples': 8083456, 'steps': 15787, 'loss/train': 2.157874822616577} +02/24/2022 22:00:31 - INFO - codeparrot_training - Step 15788: {'lr': 0.0004049276361943738, 'samples': 8083968, 'steps': 15788, 'loss/train': 4.5446038246154785} +02/24/2022 22:00:37 - INFO - codeparrot_training - Step 15789: {'lr': 0.00040491479410428735, 'samples': 8084480, 'steps': 15789, 'loss/train': 1.0855789184570312} +02/24/2022 22:00:40 - INFO - codeparrot_training - Step 15790: {'lr': 0.00040490195135059503, 'samples': 8084992, 'steps': 15790, 'loss/train': 2.2109575271606445} +02/24/2022 22:00:46 - INFO - codeparrot_training - Step 15791: {'lr': 0.000404889107933352, 'samples': 8085504, 'steps': 15791, 'loss/train': 1.4307068586349487} +02/24/2022 22:00:49 - INFO - codeparrot_training - Step 15792: {'lr': 0.0004048762638526132, 'samples': 8086016, 'steps': 15792, 'loss/train': 3.499948740005493} +02/24/2022 22:00:55 - INFO - codeparrot_training - Step 15793: {'lr': 0.0004048634191084336, 'samples': 8086528, 'steps': 15793, 'loss/train': 2.250875234603882} +02/24/2022 22:00:58 - INFO - codeparrot_training - Step 15794: {'lr': 0.0004048505737008684, 'samples': 8087040, 'steps': 15794, 'loss/train': 1.9795410633087158} +02/24/2022 22:01:04 - INFO - codeparrot_training - Step 15795: {'lr': 0.0004048377276299724, 'samples': 8087552, 'steps': 15795, 'loss/train': 2.4537510871887207} +02/24/2022 22:01:08 - INFO - codeparrot_training - Step 15796: {'lr': 0.00040482488089580083, 'samples': 8088064, 'steps': 15796, 'loss/train': 1.2694814205169678} +02/24/2022 22:01:13 - INFO - codeparrot_training - Step 15797: {'lr': 0.00040481203349840864, 'samples': 8088576, 'steps': 15797, 'loss/train': 2.8642430305480957} +02/24/2022 22:01:17 - INFO - codeparrot_training - Step 15798: {'lr': 0.0004047991854378508, 'samples': 8089088, 'steps': 15798, 'loss/train': 3.310959577560425} +02/24/2022 22:01:23 - INFO - codeparrot_training - Step 15799: {'lr': 0.00040478633671418244, 'samples': 8089600, 'steps': 15799, 'loss/train': 1.7746672630310059} +02/24/2022 22:01:26 - INFO - codeparrot_training - Step 15800: {'lr': 0.00040477348732745853, 'samples': 8090112, 'steps': 15800, 'loss/train': 2.1471316814422607} +02/24/2022 22:01:32 - INFO - codeparrot_training - Step 15801: {'lr': 0.00040476063727773416, 'samples': 8090624, 'steps': 15801, 'loss/train': 1.0932163000106812} +02/24/2022 22:01:35 - INFO - codeparrot_training - Step 15802: {'lr': 0.0004047477865650644, 'samples': 8091136, 'steps': 15802, 'loss/train': 2.674537181854248} +02/24/2022 22:01:40 - INFO - codeparrot_training - Step 15803: {'lr': 0.00040473493518950414, 'samples': 8091648, 'steps': 15803, 'loss/train': 1.7999407052993774} +02/24/2022 22:01:44 - INFO - codeparrot_training - Step 15804: {'lr': 0.00040472208315110866, 'samples': 8092160, 'steps': 15804, 'loss/train': 2.390216827392578} +02/24/2022 22:02:51 - INFO - codeparrot_training - Step 15805: {'lr': 0.0004047092304499329, 'samples': 8092672, 'steps': 15805, 'loss/train': 2.115192174911499} +02/24/2022 22:02:55 - INFO - codeparrot_training - Step 15806: {'lr': 0.0004046963770860319, 'samples': 8093184, 'steps': 15806, 'loss/train': 0.5324437022209167} +02/24/2022 22:03:00 - INFO - codeparrot_training - Step 15807: {'lr': 0.0004046835230594608, 'samples': 8093696, 'steps': 15807, 'loss/train': 1.6067605018615723} +02/24/2022 22:03:04 - INFO - codeparrot_training - Step 15808: {'lr': 0.0004046706683702744, 'samples': 8094208, 'steps': 15808, 'loss/train': 2.509697914123535} +02/24/2022 22:03:09 - INFO - codeparrot_training - Step 15809: {'lr': 0.0004046578130185282, 'samples': 8094720, 'steps': 15809, 'loss/train': 0.5408334136009216} +02/24/2022 22:03:13 - INFO - codeparrot_training - Step 15810: {'lr': 0.00040464495700427694, 'samples': 8095232, 'steps': 15810, 'loss/train': 2.224310874938965} +02/24/2022 22:03:18 - INFO - codeparrot_training - Step 15811: {'lr': 0.0004046321003275759, 'samples': 8095744, 'steps': 15811, 'loss/train': 2.0357437133789062} +02/24/2022 22:03:22 - INFO - codeparrot_training - Step 15812: {'lr': 0.00040461924298847987, 'samples': 8096256, 'steps': 15812, 'loss/train': 2.466475009918213} +02/24/2022 22:03:27 - INFO - codeparrot_training - Step 15813: {'lr': 0.0004046063849870442, 'samples': 8096768, 'steps': 15813, 'loss/train': 1.7881413698196411} +02/24/2022 22:03:31 - INFO - codeparrot_training - Step 15814: {'lr': 0.00040459352632332387, 'samples': 8097280, 'steps': 15814, 'loss/train': 0.9035694003105164} +02/24/2022 22:03:36 - INFO - codeparrot_training - Step 15815: {'lr': 0.0004045806669973739, 'samples': 8097792, 'steps': 15815, 'loss/train': 1.3460819721221924} +02/24/2022 22:03:42 - INFO - codeparrot_training - Step 15816: {'lr': 0.00040456780700924956, 'samples': 8098304, 'steps': 15816, 'loss/train': 2.337918996810913} +02/24/2022 22:03:45 - INFO - codeparrot_training - Step 15817: {'lr': 0.0004045549463590057, 'samples': 8098816, 'steps': 15817, 'loss/train': 0.1956883817911148} +02/24/2022 22:03:51 - INFO - codeparrot_training - Step 15818: {'lr': 0.0004045420850466975, 'samples': 8099328, 'steps': 15818, 'loss/train': 3.039705991744995} +02/24/2022 22:03:54 - INFO - codeparrot_training - Step 15819: {'lr': 0.00040452922307238016, 'samples': 8099840, 'steps': 15819, 'loss/train': 2.5365610122680664} +02/24/2022 22:04:01 - INFO - codeparrot_training - Step 15820: {'lr': 0.00040451636043610875, 'samples': 8100352, 'steps': 15820, 'loss/train': 1.4724889993667603} +02/24/2022 22:04:04 - INFO - codeparrot_training - Step 15821: {'lr': 0.0004045034971379382, 'samples': 8100864, 'steps': 15821, 'loss/train': 1.7322403192520142} +02/24/2022 22:04:10 - INFO - codeparrot_training - Step 15822: {'lr': 0.0004044906331779238, 'samples': 8101376, 'steps': 15822, 'loss/train': 3.248962640762329} +02/24/2022 22:04:13 - INFO - codeparrot_training - Step 15823: {'lr': 0.00040447776855612053, 'samples': 8101888, 'steps': 15823, 'loss/train': 1.9005476236343384} +02/24/2022 22:04:19 - INFO - codeparrot_training - Step 15824: {'lr': 0.0004044649032725836, 'samples': 8102400, 'steps': 15824, 'loss/train': 2.2104201316833496} +02/24/2022 22:04:22 - INFO - codeparrot_training - Step 15825: {'lr': 0.000404452037327368, 'samples': 8102912, 'steps': 15825, 'loss/train': 1.8906991481781006} +02/24/2022 22:04:28 - INFO - codeparrot_training - Step 15826: {'lr': 0.00040443917072052906, 'samples': 8103424, 'steps': 15826, 'loss/train': 1.7152348756790161} +02/24/2022 22:04:31 - INFO - codeparrot_training - Step 15827: {'lr': 0.0004044263034521216, 'samples': 8103936, 'steps': 15827, 'loss/train': 1.5793169736862183} +02/24/2022 22:04:37 - INFO - codeparrot_training - Step 15828: {'lr': 0.000404413435522201, 'samples': 8104448, 'steps': 15828, 'loss/train': 1.8360563516616821} +02/24/2022 22:04:40 - INFO - codeparrot_training - Step 15829: {'lr': 0.00040440056693082224, 'samples': 8104960, 'steps': 15829, 'loss/train': 2.155177354812622} +02/24/2022 22:04:47 - INFO - codeparrot_training - Step 15830: {'lr': 0.0004043876976780404, 'samples': 8105472, 'steps': 15830, 'loss/train': 1.060920238494873} +02/24/2022 22:04:50 - INFO - codeparrot_training - Step 15831: {'lr': 0.0004043748277639108, 'samples': 8105984, 'steps': 15831, 'loss/train': 2.553969144821167} +02/24/2022 22:04:56 - INFO - codeparrot_training - Step 15832: {'lr': 0.0004043619571884884, 'samples': 8106496, 'steps': 15832, 'loss/train': 2.750020980834961} +02/24/2022 22:04:59 - INFO - codeparrot_training - Step 15833: {'lr': 0.0004043490859518284, 'samples': 8107008, 'steps': 15833, 'loss/train': 1.9830418825149536} +02/24/2022 22:05:05 - INFO - codeparrot_training - Step 15834: {'lr': 0.0004043362140539859, 'samples': 8107520, 'steps': 15834, 'loss/train': 0.7290531396865845} +02/24/2022 22:05:08 - INFO - codeparrot_training - Step 15835: {'lr': 0.00040432334149501613, 'samples': 8108032, 'steps': 15835, 'loss/train': 1.781973123550415} +02/24/2022 22:05:14 - INFO - codeparrot_training - Step 15836: {'lr': 0.00040431046827497415, 'samples': 8108544, 'steps': 15836, 'loss/train': 2.0285396575927734} +02/24/2022 22:05:18 - INFO - codeparrot_training - Step 15837: {'lr': 0.00040429759439391513, 'samples': 8109056, 'steps': 15837, 'loss/train': 1.9817787408828735} +02/24/2022 22:05:23 - INFO - codeparrot_training - Step 15838: {'lr': 0.00040428471985189416, 'samples': 8109568, 'steps': 15838, 'loss/train': 2.752528190612793} +02/24/2022 22:05:27 - INFO - codeparrot_training - Step 15839: {'lr': 0.0004042718446489665, 'samples': 8110080, 'steps': 15839, 'loss/train': 1.9874250888824463} +02/24/2022 22:05:33 - INFO - codeparrot_training - Step 15840: {'lr': 0.0004042589687851872, 'samples': 8110592, 'steps': 15840, 'loss/train': 2.269784927368164} +02/24/2022 22:05:36 - INFO - codeparrot_training - Step 15841: {'lr': 0.00040424609226061146, 'samples': 8111104, 'steps': 15841, 'loss/train': 2.017077684402466} +02/24/2022 22:05:42 - INFO - codeparrot_training - Step 15842: {'lr': 0.0004042332150752944, 'samples': 8111616, 'steps': 15842, 'loss/train': 2.5844528675079346} +02/24/2022 22:05:45 - INFO - codeparrot_training - Step 15843: {'lr': 0.0004042203372292913, 'samples': 8112128, 'steps': 15843, 'loss/train': 1.9932184219360352} +02/24/2022 22:05:51 - INFO - codeparrot_training - Step 15844: {'lr': 0.00040420745872265726, 'samples': 8112640, 'steps': 15844, 'loss/train': 2.2793402671813965} +02/24/2022 22:05:54 - INFO - codeparrot_training - Step 15845: {'lr': 0.0004041945795554474, 'samples': 8113152, 'steps': 15845, 'loss/train': 0.9564520716667175} +02/24/2022 22:06:00 - INFO - codeparrot_training - Step 15846: {'lr': 0.0004041816997277169, 'samples': 8113664, 'steps': 15846, 'loss/train': 2.2356438636779785} +02/24/2022 22:06:03 - INFO - codeparrot_training - Step 15847: {'lr': 0.000404168819239521, 'samples': 8114176, 'steps': 15847, 'loss/train': 1.691184639930725} +02/24/2022 22:06:09 - INFO - codeparrot_training - Step 15848: {'lr': 0.0004041559380909148, 'samples': 8114688, 'steps': 15848, 'loss/train': 2.523899793624878} +02/24/2022 22:06:12 - INFO - codeparrot_training - Step 15849: {'lr': 0.00040414305628195347, 'samples': 8115200, 'steps': 15849, 'loss/train': 0.7413461208343506} +02/24/2022 22:06:18 - INFO - codeparrot_training - Step 15850: {'lr': 0.00040413017381269237, 'samples': 8115712, 'steps': 15850, 'loss/train': 2.901477336883545} +02/24/2022 22:06:21 - INFO - codeparrot_training - Step 15851: {'lr': 0.00040411729068318635, 'samples': 8116224, 'steps': 15851, 'loss/train': 1.7757055759429932} +02/24/2022 22:06:27 - INFO - codeparrot_training - Step 15852: {'lr': 0.0004041044068934909, 'samples': 8116736, 'steps': 15852, 'loss/train': 2.344928741455078} +02/24/2022 22:06:30 - INFO - codeparrot_training - Step 15853: {'lr': 0.00040409152244366117, 'samples': 8117248, 'steps': 15853, 'loss/train': 0.5482961535453796} +02/24/2022 22:06:36 - INFO - codeparrot_training - Step 15854: {'lr': 0.00040407863733375217, 'samples': 8117760, 'steps': 15854, 'loss/train': 0.8105165958404541} +02/24/2022 22:06:39 - INFO - codeparrot_training - Step 15855: {'lr': 0.0004040657515638193, 'samples': 8118272, 'steps': 15855, 'loss/train': 1.8301023244857788} +02/24/2022 22:06:46 - INFO - codeparrot_training - Step 15856: {'lr': 0.0004040528651339176, 'samples': 8118784, 'steps': 15856, 'loss/train': 2.081894636154175} +02/24/2022 22:06:49 - INFO - codeparrot_training - Step 15857: {'lr': 0.00040403997804410244, 'samples': 8119296, 'steps': 15857, 'loss/train': 1.9402638673782349} +02/24/2022 22:06:55 - INFO - codeparrot_training - Step 15858: {'lr': 0.00040402709029442883, 'samples': 8119808, 'steps': 15858, 'loss/train': 1.6192325353622437} +02/24/2022 22:06:58 - INFO - codeparrot_training - Step 15859: {'lr': 0.0004040142018849521, 'samples': 8120320, 'steps': 15859, 'loss/train': 2.533816337585449} +02/24/2022 22:07:03 - INFO - codeparrot_training - Step 15860: {'lr': 0.0004040013128157275, 'samples': 8120832, 'steps': 15860, 'loss/train': 2.0810694694519043} +02/24/2022 22:07:07 - INFO - codeparrot_training - Step 15861: {'lr': 0.0004039884230868101, 'samples': 8121344, 'steps': 15861, 'loss/train': 2.0571820735931396} +02/24/2022 22:07:13 - INFO - codeparrot_training - Step 15862: {'lr': 0.0004039755326982552, 'samples': 8121856, 'steps': 15862, 'loss/train': 1.2313518524169922} +02/24/2022 22:07:16 - INFO - codeparrot_training - Step 15863: {'lr': 0.000403962641650118, 'samples': 8122368, 'steps': 15863, 'loss/train': 2.065330743789673} +02/24/2022 22:07:22 - INFO - codeparrot_training - Step 15864: {'lr': 0.0004039497499424538, 'samples': 8122880, 'steps': 15864, 'loss/train': 1.1708543300628662} +02/24/2022 22:07:25 - INFO - codeparrot_training - Step 15865: {'lr': 0.00040393685757531776, 'samples': 8123392, 'steps': 15865, 'loss/train': 2.123807907104492} +02/24/2022 22:07:32 - INFO - codeparrot_training - Step 15866: {'lr': 0.000403923964548765, 'samples': 8123904, 'steps': 15866, 'loss/train': 2.3617241382598877} +02/24/2022 22:07:35 - INFO - codeparrot_training - Step 15867: {'lr': 0.0004039110708628509, 'samples': 8124416, 'steps': 15867, 'loss/train': 1.7418897151947021} +02/24/2022 22:07:41 - INFO - codeparrot_training - Step 15868: {'lr': 0.00040389817651763073, 'samples': 8124928, 'steps': 15868, 'loss/train': 1.7907893657684326} +02/24/2022 22:07:44 - INFO - codeparrot_training - Step 15869: {'lr': 0.0004038852815131595, 'samples': 8125440, 'steps': 15869, 'loss/train': 2.2889366149902344} +02/24/2022 22:07:49 - INFO - codeparrot_training - Step 15870: {'lr': 0.0004038723858494927, 'samples': 8125952, 'steps': 15870, 'loss/train': 2.0409507751464844} +02/24/2022 22:07:53 - INFO - codeparrot_training - Step 15871: {'lr': 0.00040385948952668537, 'samples': 8126464, 'steps': 15871, 'loss/train': 2.307403802871704} +02/24/2022 22:07:59 - INFO - codeparrot_training - Step 15872: {'lr': 0.0004038465925447929, 'samples': 8126976, 'steps': 15872, 'loss/train': 2.6729981899261475} +02/24/2022 22:08:02 - INFO - codeparrot_training - Step 15873: {'lr': 0.00040383369490387043, 'samples': 8127488, 'steps': 15873, 'loss/train': 3.0144176483154297} +02/24/2022 22:08:08 - INFO - codeparrot_training - Step 15874: {'lr': 0.0004038207966039733, 'samples': 8128000, 'steps': 15874, 'loss/train': 1.056424856185913} +02/24/2022 22:08:11 - INFO - codeparrot_training - Step 15875: {'lr': 0.00040380789764515667, 'samples': 8128512, 'steps': 15875, 'loss/train': 2.3025918006896973} +02/24/2022 22:08:17 - INFO - codeparrot_training - Step 15876: {'lr': 0.0004037949980274759, 'samples': 8129024, 'steps': 15876, 'loss/train': 2.5147294998168945} +02/24/2022 22:08:20 - INFO - codeparrot_training - Step 15877: {'lr': 0.0004037820977509862, 'samples': 8129536, 'steps': 15877, 'loss/train': 2.1982905864715576} +02/24/2022 22:08:26 - INFO - codeparrot_training - Step 15878: {'lr': 0.00040376919681574285, 'samples': 8130048, 'steps': 15878, 'loss/train': 2.2790567874908447} +02/24/2022 22:08:30 - INFO - codeparrot_training - Step 15879: {'lr': 0.000403756295221801, 'samples': 8130560, 'steps': 15879, 'loss/train': 1.8554037809371948} +02/24/2022 22:08:35 - INFO - codeparrot_training - Step 15880: {'lr': 0.00040374339296921606, 'samples': 8131072, 'steps': 15880, 'loss/train': 2.983825206756592} +02/24/2022 22:08:39 - INFO - codeparrot_training - Step 15881: {'lr': 0.00040373049005804323, 'samples': 8131584, 'steps': 15881, 'loss/train': 1.3516827821731567} +02/24/2022 22:08:44 - INFO - codeparrot_training - Step 15882: {'lr': 0.00040371758648833776, 'samples': 8132096, 'steps': 15882, 'loss/train': 1.6576025485992432} +02/24/2022 22:08:48 - INFO - codeparrot_training - Step 15883: {'lr': 0.00040370468226015507, 'samples': 8132608, 'steps': 15883, 'loss/train': 2.253601551055908} +02/24/2022 22:08:53 - INFO - codeparrot_training - Step 15884: {'lr': 0.0004036917773735502, 'samples': 8133120, 'steps': 15884, 'loss/train': 2.3390848636627197} +02/24/2022 22:08:57 - INFO - codeparrot_training - Step 15885: {'lr': 0.00040367887182857866, 'samples': 8133632, 'steps': 15885, 'loss/train': 1.6015355587005615} +02/24/2022 22:09:02 - INFO - codeparrot_training - Step 15886: {'lr': 0.00040366596562529554, 'samples': 8134144, 'steps': 15886, 'loss/train': 2.5205025672912598} +02/24/2022 22:09:06 - INFO - codeparrot_training - Step 15887: {'lr': 0.00040365305876375636, 'samples': 8134656, 'steps': 15887, 'loss/train': 1.9925780296325684} +02/24/2022 22:09:12 - INFO - codeparrot_training - Step 15888: {'lr': 0.0004036401512440161, 'samples': 8135168, 'steps': 15888, 'loss/train': 2.685431480407715} +02/24/2022 22:09:16 - INFO - codeparrot_training - Step 15889: {'lr': 0.0004036272430661303, 'samples': 8135680, 'steps': 15889, 'loss/train': 1.1180616617202759} +02/24/2022 22:09:21 - INFO - codeparrot_training - Step 15890: {'lr': 0.0004036143342301542, 'samples': 8136192, 'steps': 15890, 'loss/train': 1.2874945402145386} +02/24/2022 22:09:25 - INFO - codeparrot_training - Step 15891: {'lr': 0.000403601424736143, 'samples': 8136704, 'steps': 15891, 'loss/train': 2.018214464187622} +02/24/2022 22:09:30 - INFO - codeparrot_training - Step 15892: {'lr': 0.0004035885145841521, 'samples': 8137216, 'steps': 15892, 'loss/train': 1.9177649021148682} +02/24/2022 22:09:34 - INFO - codeparrot_training - Step 15893: {'lr': 0.00040357560377423675, 'samples': 8137728, 'steps': 15893, 'loss/train': 1.4360467195510864} +02/24/2022 22:09:39 - INFO - codeparrot_training - Step 15894: {'lr': 0.0004035626923064524, 'samples': 8138240, 'steps': 15894, 'loss/train': 2.937727451324463} +02/24/2022 22:09:43 - INFO - codeparrot_training - Step 15895: {'lr': 0.00040354978018085407, 'samples': 8138752, 'steps': 15895, 'loss/train': 1.301561713218689} +02/24/2022 22:09:48 - INFO - codeparrot_training - Step 15896: {'lr': 0.00040353686739749733, 'samples': 8139264, 'steps': 15896, 'loss/train': 2.101635694503784} +02/24/2022 22:09:52 - INFO - codeparrot_training - Step 15897: {'lr': 0.00040352395395643737, 'samples': 8139776, 'steps': 15897, 'loss/train': 2.8872873783111572} +02/24/2022 22:09:57 - INFO - codeparrot_training - Step 15898: {'lr': 0.00040351103985772964, 'samples': 8140288, 'steps': 15898, 'loss/train': 1.7483012676239014} +02/24/2022 22:10:01 - INFO - codeparrot_training - Step 15899: {'lr': 0.00040349812510142923, 'samples': 8140800, 'steps': 15899, 'loss/train': 1.4598948955535889} +02/24/2022 22:10:06 - INFO - codeparrot_training - Step 15900: {'lr': 0.0004034852096875916, 'samples': 8141312, 'steps': 15900, 'loss/train': 0.5955232381820679} +02/24/2022 22:10:10 - INFO - codeparrot_training - Step 15901: {'lr': 0.0004034722936162721, 'samples': 8141824, 'steps': 15901, 'loss/train': 2.769268035888672} +02/24/2022 22:10:16 - INFO - codeparrot_training - Step 15902: {'lr': 0.00040345937688752607, 'samples': 8142336, 'steps': 15902, 'loss/train': 0.09566738456487656} +02/24/2022 22:10:19 - INFO - codeparrot_training - Step 15903: {'lr': 0.0004034464595014088, 'samples': 8142848, 'steps': 15903, 'loss/train': 1.6336512565612793} +02/24/2022 22:10:25 - INFO - codeparrot_training - Step 15904: {'lr': 0.00040343354145797554, 'samples': 8143360, 'steps': 15904, 'loss/train': 1.7764639854431152} +02/24/2022 22:10:28 - INFO - codeparrot_training - Step 15905: {'lr': 0.0004034206227572818, 'samples': 8143872, 'steps': 15905, 'loss/train': 2.9088022708892822} +02/24/2022 22:10:34 - INFO - codeparrot_training - Step 15906: {'lr': 0.0004034077033993828, 'samples': 8144384, 'steps': 15906, 'loss/train': 2.1309866905212402} +02/24/2022 22:10:37 - INFO - codeparrot_training - Step 15907: {'lr': 0.00040339478338433386, 'samples': 8144896, 'steps': 15907, 'loss/train': 2.089580774307251} +02/24/2022 22:10:43 - INFO - codeparrot_training - Step 15908: {'lr': 0.0004033818627121904, 'samples': 8145408, 'steps': 15908, 'loss/train': 2.587886095046997} +02/24/2022 22:10:46 - INFO - codeparrot_training - Step 15909: {'lr': 0.00040336894138300777, 'samples': 8145920, 'steps': 15909, 'loss/train': 1.3302668333053589} +02/24/2022 22:10:52 - INFO - codeparrot_training - Step 15910: {'lr': 0.0004033560193968413, 'samples': 8146432, 'steps': 15910, 'loss/train': 1.1433871984481812} +02/24/2022 22:10:55 - INFO - codeparrot_training - Step 15911: {'lr': 0.00040334309675374636, 'samples': 8146944, 'steps': 15911, 'loss/train': 2.8243627548217773} +02/24/2022 22:11:01 - INFO - codeparrot_training - Step 15912: {'lr': 0.0004033301734537782, 'samples': 8147456, 'steps': 15912, 'loss/train': 1.226119875907898} +02/24/2022 22:11:04 - INFO - codeparrot_training - Step 15913: {'lr': 0.0004033172494969923, 'samples': 8147968, 'steps': 15913, 'loss/train': 1.8127819299697876} +02/24/2022 22:11:10 - INFO - codeparrot_training - Step 15914: {'lr': 0.000403304324883444, 'samples': 8148480, 'steps': 15914, 'loss/train': 1.9139840602874756} +02/24/2022 22:11:16 - INFO - codeparrot_training - Step 15915: {'lr': 0.00040329139961318863, 'samples': 8148992, 'steps': 15915, 'loss/train': 1.569076657295227} +02/24/2022 22:11:19 - INFO - codeparrot_training - Step 15916: {'lr': 0.00040327847368628163, 'samples': 8149504, 'steps': 15916, 'loss/train': 0.9138757586479187} +02/24/2022 22:11:25 - INFO - codeparrot_training - Step 15917: {'lr': 0.0004032655471027783, 'samples': 8150016, 'steps': 15917, 'loss/train': 1.0135868787765503} +02/24/2022 22:11:28 - INFO - codeparrot_training - Step 15918: {'lr': 0.000403252619862734, 'samples': 8150528, 'steps': 15918, 'loss/train': 2.693162441253662} +02/24/2022 22:11:34 - INFO - codeparrot_training - Step 15919: {'lr': 0.0004032396919662041, 'samples': 8151040, 'steps': 15919, 'loss/train': 2.888972043991089} +02/24/2022 22:11:37 - INFO - codeparrot_training - Step 15920: {'lr': 0.00040322676341324415, 'samples': 8151552, 'steps': 15920, 'loss/train': 1.0917152166366577} +02/24/2022 22:11:43 - INFO - codeparrot_training - Step 15921: {'lr': 0.0004032138342039093, 'samples': 8152064, 'steps': 15921, 'loss/train': 1.5159212350845337} +02/24/2022 22:11:46 - INFO - codeparrot_training - Step 15922: {'lr': 0.0004032009043382551, 'samples': 8152576, 'steps': 15922, 'loss/train': 2.320605754852295} +02/24/2022 22:11:53 - INFO - codeparrot_training - Step 15923: {'lr': 0.0004031879738163368, 'samples': 8153088, 'steps': 15923, 'loss/train': 2.4609010219573975} +02/24/2022 22:11:56 - INFO - codeparrot_training - Step 15924: {'lr': 0.00040317504263820994, 'samples': 8153600, 'steps': 15924, 'loss/train': 1.660744309425354} +02/24/2022 22:12:02 - INFO - codeparrot_training - Step 15925: {'lr': 0.0004031621108039298, 'samples': 8154112, 'steps': 15925, 'loss/train': 2.0532703399658203} +02/24/2022 22:12:05 - INFO - codeparrot_training - Step 15926: {'lr': 0.0004031491783135518, 'samples': 8154624, 'steps': 15926, 'loss/train': 2.271287679672241} +02/24/2022 22:12:11 - INFO - codeparrot_training - Step 15927: {'lr': 0.0004031362451671314, 'samples': 8155136, 'steps': 15927, 'loss/train': 1.370354413986206} +02/24/2022 22:12:14 - INFO - codeparrot_training - Step 15928: {'lr': 0.00040312331136472385, 'samples': 8155648, 'steps': 15928, 'loss/train': 3.042844772338867} +02/24/2022 22:12:20 - INFO - codeparrot_training - Step 15929: {'lr': 0.00040311037690638477, 'samples': 8156160, 'steps': 15929, 'loss/train': 1.244518756866455} +02/24/2022 22:12:23 - INFO - codeparrot_training - Step 15930: {'lr': 0.00040309744179216936, 'samples': 8156672, 'steps': 15930, 'loss/train': 2.078324794769287} +02/24/2022 22:12:29 - INFO - codeparrot_training - Step 15931: {'lr': 0.0004030845060221332, 'samples': 8157184, 'steps': 15931, 'loss/train': 1.725034475326538} +02/24/2022 22:12:32 - INFO - codeparrot_training - Step 15932: {'lr': 0.00040307156959633154, 'samples': 8157696, 'steps': 15932, 'loss/train': 2.2365212440490723} +02/24/2022 22:12:38 - INFO - codeparrot_training - Step 15933: {'lr': 0.00040305863251482, 'samples': 8158208, 'steps': 15933, 'loss/train': 1.8629781007766724} +02/24/2022 22:12:42 - INFO - codeparrot_training - Step 15934: {'lr': 0.00040304569477765375, 'samples': 8158720, 'steps': 15934, 'loss/train': 1.8882840871810913} +02/24/2022 22:12:47 - INFO - codeparrot_training - Step 15935: {'lr': 0.0004030327563848885, 'samples': 8159232, 'steps': 15935, 'loss/train': 1.6045397520065308} +02/24/2022 22:12:51 - INFO - codeparrot_training - Step 15936: {'lr': 0.00040301981733657934, 'samples': 8159744, 'steps': 15936, 'loss/train': 2.470729351043701} +02/24/2022 22:12:56 - INFO - codeparrot_training - Step 15937: {'lr': 0.00040300687763278196, 'samples': 8160256, 'steps': 15937, 'loss/train': 1.779634714126587} +02/24/2022 22:12:59 - INFO - codeparrot_training - Step 15938: {'lr': 0.0004029939372735517, 'samples': 8160768, 'steps': 15938, 'loss/train': 2.12949275970459} +02/24/2022 22:13:05 - INFO - codeparrot_training - Step 15939: {'lr': 0.000402980996258944, 'samples': 8161280, 'steps': 15939, 'loss/train': 2.9251275062561035} +02/24/2022 22:13:08 - INFO - codeparrot_training - Step 15940: {'lr': 0.00040296805458901427, 'samples': 8161792, 'steps': 15940, 'loss/train': 0.7486175894737244} +02/24/2022 22:13:14 - INFO - codeparrot_training - Step 15941: {'lr': 0.0004029551122638179, 'samples': 8162304, 'steps': 15941, 'loss/train': 2.53194522857666} +02/24/2022 22:13:17 - INFO - codeparrot_training - Step 15942: {'lr': 0.0004029421692834105, 'samples': 8162816, 'steps': 15942, 'loss/train': 1.3122670650482178} +02/24/2022 22:13:23 - INFO - codeparrot_training - Step 15943: {'lr': 0.0004029292256478474, 'samples': 8163328, 'steps': 15943, 'loss/train': 2.271367311477661} +02/24/2022 22:13:26 - INFO - codeparrot_training - Step 15944: {'lr': 0.00040291628135718404, 'samples': 8163840, 'steps': 15944, 'loss/train': 1.7397887706756592} +02/24/2022 22:13:32 - INFO - codeparrot_training - Step 15945: {'lr': 0.0004029033364114759, 'samples': 8164352, 'steps': 15945, 'loss/train': 2.0750691890716553} +02/24/2022 22:13:36 - INFO - codeparrot_training - Step 15946: {'lr': 0.00040289039081077837, 'samples': 8164864, 'steps': 15946, 'loss/train': 2.336592674255371} +02/24/2022 22:13:42 - INFO - codeparrot_training - Step 15947: {'lr': 0.00040287744455514703, 'samples': 8165376, 'steps': 15947, 'loss/train': 1.6677109003067017} +02/24/2022 22:13:46 - INFO - codeparrot_training - Step 15948: {'lr': 0.00040286449764463715, 'samples': 8165888, 'steps': 15948, 'loss/train': 1.7711118459701538} +02/24/2022 22:13:51 - INFO - codeparrot_training - Step 15949: {'lr': 0.0004028515500793044, 'samples': 8166400, 'steps': 15949, 'loss/train': 2.3861563205718994} +02/24/2022 22:13:55 - INFO - codeparrot_training - Step 15950: {'lr': 0.0004028386018592041, 'samples': 8166912, 'steps': 15950, 'loss/train': 2.4803884029388428} +02/24/2022 22:14:00 - INFO - codeparrot_training - Step 15951: {'lr': 0.0004028256529843918, 'samples': 8167424, 'steps': 15951, 'loss/train': 1.5475083589553833} +02/24/2022 22:14:04 - INFO - codeparrot_training - Step 15952: {'lr': 0.00040281270345492295, 'samples': 8167936, 'steps': 15952, 'loss/train': 3.054276704788208} +02/24/2022 22:14:09 - INFO - codeparrot_training - Step 15953: {'lr': 0.00040279975327085294, 'samples': 8168448, 'steps': 15953, 'loss/train': 0.556505024433136} +02/24/2022 22:14:13 - INFO - codeparrot_training - Step 15954: {'lr': 0.00040278680243223733, 'samples': 8168960, 'steps': 15954, 'loss/train': 1.144440770149231} +02/24/2022 22:14:18 - INFO - codeparrot_training - Step 15955: {'lr': 0.00040277385093913154, 'samples': 8169472, 'steps': 15955, 'loss/train': 2.0256495475769043} +02/24/2022 22:14:22 - INFO - codeparrot_training - Step 15956: {'lr': 0.0004027608987915912, 'samples': 8169984, 'steps': 15956, 'loss/train': 2.6622085571289062} +02/24/2022 22:14:27 - INFO - codeparrot_training - Step 15957: {'lr': 0.0004027479459896716, 'samples': 8170496, 'steps': 15957, 'loss/train': 1.8714369535446167} +02/24/2022 22:14:31 - INFO - codeparrot_training - Step 15958: {'lr': 0.0004027349925334282, 'samples': 8171008, 'steps': 15958, 'loss/train': 2.069997549057007} +02/24/2022 22:14:38 - INFO - codeparrot_training - Step 15959: {'lr': 0.00040272203842291676, 'samples': 8171520, 'steps': 15959, 'loss/train': 2.0072083473205566} +02/24/2022 22:14:41 - INFO - codeparrot_training - Step 15960: {'lr': 0.00040270908365819247, 'samples': 8172032, 'steps': 15960, 'loss/train': 1.9171329736709595} +02/24/2022 22:14:47 - INFO - codeparrot_training - Step 15961: {'lr': 0.000402696128239311, 'samples': 8172544, 'steps': 15961, 'loss/train': 1.3928046226501465} +02/24/2022 22:14:50 - INFO - codeparrot_training - Step 15962: {'lr': 0.00040268317216632783, 'samples': 8173056, 'steps': 15962, 'loss/train': 2.8390164375305176} +02/24/2022 22:14:56 - INFO - codeparrot_training - Step 15963: {'lr': 0.0004026702154392984, 'samples': 8173568, 'steps': 15963, 'loss/train': 1.405389428138733} +02/24/2022 22:14:59 - INFO - codeparrot_training - Step 15964: {'lr': 0.0004026572580582783, 'samples': 8174080, 'steps': 15964, 'loss/train': 1.5241801738739014} +02/24/2022 22:15:05 - INFO - codeparrot_training - Step 15965: {'lr': 0.000402644300023323, 'samples': 8174592, 'steps': 15965, 'loss/train': 2.094996929168701} +02/24/2022 22:15:08 - INFO - codeparrot_training - Step 15966: {'lr': 0.0004026313413344879, 'samples': 8175104, 'steps': 15966, 'loss/train': 1.3782401084899902} +02/24/2022 22:15:14 - INFO - codeparrot_training - Step 15967: {'lr': 0.0004026183819918286, 'samples': 8175616, 'steps': 15967, 'loss/train': 2.631969690322876} +02/24/2022 22:15:17 - INFO - codeparrot_training - Step 15968: {'lr': 0.00040260542199540064, 'samples': 8176128, 'steps': 15968, 'loss/train': 2.5334632396698} +02/24/2022 22:15:24 - INFO - codeparrot_training - Step 15969: {'lr': 0.00040259246134525953, 'samples': 8176640, 'steps': 15969, 'loss/train': 1.8797965049743652} +02/24/2022 22:15:28 - INFO - codeparrot_training - Step 15970: {'lr': 0.0004025795000414608, 'samples': 8177152, 'steps': 15970, 'loss/train': 2.752537250518799} +02/24/2022 22:15:33 - INFO - codeparrot_training - Step 15971: {'lr': 0.0004025665380840599, 'samples': 8177664, 'steps': 15971, 'loss/train': 2.20194411277771} +02/24/2022 22:15:37 - INFO - codeparrot_training - Step 15972: {'lr': 0.00040255357547311235, 'samples': 8178176, 'steps': 15972, 'loss/train': 2.222989082336426} +02/24/2022 22:15:42 - INFO - codeparrot_training - Step 15973: {'lr': 0.0004025406122086738, 'samples': 8178688, 'steps': 15973, 'loss/train': 3.159116268157959} +02/24/2022 22:15:46 - INFO - codeparrot_training - Step 15974: {'lr': 0.0004025276482907996, 'samples': 8179200, 'steps': 15974, 'loss/train': 2.6125540733337402} +02/24/2022 22:15:51 - INFO - codeparrot_training - Step 15975: {'lr': 0.0004025146837195455, 'samples': 8179712, 'steps': 15975, 'loss/train': 8.890109062194824} +02/24/2022 22:15:55 - INFO - codeparrot_training - Step 15976: {'lr': 0.00040250171849496685, 'samples': 8180224, 'steps': 15976, 'loss/train': 3.0437135696411133} +02/24/2022 22:16:00 - INFO - codeparrot_training - Step 15977: {'lr': 0.0004024887526171193, 'samples': 8180736, 'steps': 15977, 'loss/train': 1.925284743309021} +02/24/2022 22:16:04 - INFO - codeparrot_training - Step 15978: {'lr': 0.0004024757860860584, 'samples': 8181248, 'steps': 15978, 'loss/train': 1.3974815607070923} +02/24/2022 22:16:10 - INFO - codeparrot_training - Step 15979: {'lr': 0.00040246281890183954, 'samples': 8181760, 'steps': 15979, 'loss/train': 0.5565561652183533} +02/24/2022 22:16:13 - INFO - codeparrot_training - Step 15980: {'lr': 0.0004024498510645185, 'samples': 8182272, 'steps': 15980, 'loss/train': 1.920519232749939} +02/24/2022 22:16:19 - INFO - codeparrot_training - Step 15981: {'lr': 0.00040243688257415064, 'samples': 8182784, 'steps': 15981, 'loss/train': 1.2565110921859741} +02/24/2022 22:16:22 - INFO - codeparrot_training - Step 15982: {'lr': 0.00040242391343079157, 'samples': 8183296, 'steps': 15982, 'loss/train': 1.9350614547729492} +02/24/2022 22:16:28 - INFO - codeparrot_training - Step 15983: {'lr': 0.00040241094363449684, 'samples': 8183808, 'steps': 15983, 'loss/train': 1.8248515129089355} +02/24/2022 22:16:31 - INFO - codeparrot_training - Step 15984: {'lr': 0.000402397973185322, 'samples': 8184320, 'steps': 15984, 'loss/train': 2.6002328395843506} +02/24/2022 22:16:37 - INFO - codeparrot_training - Step 15985: {'lr': 0.0004023850020833227, 'samples': 8184832, 'steps': 15985, 'loss/train': 2.369809627532959} +02/24/2022 22:16:40 - INFO - codeparrot_training - Step 15986: {'lr': 0.00040237203032855446, 'samples': 8185344, 'steps': 15986, 'loss/train': 1.9571726322174072} +02/24/2022 22:16:46 - INFO - codeparrot_training - Step 15987: {'lr': 0.00040235905792107275, 'samples': 8185856, 'steps': 15987, 'loss/train': 2.0061137676239014} +02/24/2022 22:16:49 - INFO - codeparrot_training - Step 15988: {'lr': 0.00040234608486093326, 'samples': 8186368, 'steps': 15988, 'loss/train': 2.181933641433716} +02/24/2022 22:16:57 - INFO - codeparrot_training - Step 15989: {'lr': 0.00040233311114819156, 'samples': 8186880, 'steps': 15989, 'loss/train': 2.0181827545166016} +02/24/2022 22:17:00 - INFO - codeparrot_training - Step 15990: {'lr': 0.00040232013678290316, 'samples': 8187392, 'steps': 15990, 'loss/train': 2.413337469100952} +02/24/2022 22:17:04 - INFO - codeparrot_training - Step 15991: {'lr': 0.0004023071617651236, 'samples': 8187904, 'steps': 15991, 'loss/train': 2.2617931365966797} +02/24/2022 22:17:09 - INFO - codeparrot_training - Step 15992: {'lr': 0.0004022941860949085, 'samples': 8188416, 'steps': 15992, 'loss/train': 2.7247140407562256} +02/24/2022 22:17:13 - INFO - codeparrot_training - Step 15993: {'lr': 0.00040228120977231355, 'samples': 8188928, 'steps': 15993, 'loss/train': 2.6344995498657227} +02/24/2022 22:17:18 - INFO - codeparrot_training - Step 15994: {'lr': 0.00040226823279739427, 'samples': 8189440, 'steps': 15994, 'loss/train': 1.873080849647522} +02/24/2022 22:17:22 - INFO - codeparrot_training - Step 15995: {'lr': 0.00040225525517020616, 'samples': 8189952, 'steps': 15995, 'loss/train': 3.998002290725708} +02/24/2022 22:17:27 - INFO - codeparrot_training - Step 15996: {'lr': 0.0004022422768908049, 'samples': 8190464, 'steps': 15996, 'loss/train': 3.5603339672088623} +02/24/2022 22:17:31 - INFO - codeparrot_training - Step 15997: {'lr': 0.00040222929795924613, 'samples': 8190976, 'steps': 15997, 'loss/train': 0.8349915146827698} +02/24/2022 22:17:36 - INFO - codeparrot_training - Step 15998: {'lr': 0.0004022163183755853, 'samples': 8191488, 'steps': 15998, 'loss/train': 1.9355260133743286} +02/24/2022 22:17:40 - INFO - codeparrot_training - Step 15999: {'lr': 0.0004022033381398781, 'samples': 8192000, 'steps': 15999, 'loss/train': 2.3557024002075195} +02/24/2022 22:17:40 - INFO - codeparrot_training - Evaluating and saving model checkpoint