diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -27463,3 +27463,1010 @@ Use FP16 precision: False 02/25/2022 12:33:36 - INFO - codeparrot_training - Step 26998: {'lr': 0.00023368187268935588, 'samples': 13823488, 'steps': 26998, 'loss/train': 2.1118319034576416} 02/25/2022 12:33:41 - INFO - codeparrot_training - Step 26999: {'lr': 0.00023366554515592423, 'samples': 13824000, 'steps': 26999, 'loss/train': 1.4386999607086182} 02/25/2022 12:33:42 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/25/2022 12:33:59 - WARNING - huggingface_hub.repository - Several commits (27) will be pushed upstream. +02/25/2022 12:33:59 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/25/2022 12:34:41 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + 86c4c54..9d342aa floral-grass-11 -> floral-grass-11 + +02/25/2022 12:34:45 - INFO - codeparrot_training - Step 27000: {'lr': 0.00023364921769246423, 'samples': 13824512, 'steps': 27000, 'loss/train': 1.4343920946121216} +02/25/2022 12:34:51 - INFO - codeparrot_training - Step 27001: {'lr': 0.0002336328902990459, 'samples': 13825024, 'steps': 27001, 'loss/train': 0.8831506371498108} +02/25/2022 12:34:54 - INFO - codeparrot_training - Step 27002: {'lr': 0.0002336165629757389, 'samples': 13825536, 'steps': 27002, 'loss/train': 1.8967748880386353} +02/25/2022 12:35:00 - INFO - codeparrot_training - Step 27003: {'lr': 0.0002336002357226134, 'samples': 13826048, 'steps': 27003, 'loss/train': 1.9600099325180054} +02/25/2022 12:35:03 - INFO - codeparrot_training - Step 27004: {'lr': 0.00023358390853973928, 'samples': 13826560, 'steps': 27004, 'loss/train': 1.4554760456085205} +02/25/2022 12:35:11 - INFO - codeparrot_training - Step 27005: {'lr': 0.0002335675814271865, 'samples': 13827072, 'steps': 27005, 'loss/train': 0.9962917566299438} +02/25/2022 12:35:14 - INFO - codeparrot_training - Step 27006: {'lr': 0.00023355125438502506, 'samples': 13827584, 'steps': 27006, 'loss/train': 2.461592197418213} +02/25/2022 12:35:20 - INFO - codeparrot_training - Step 27007: {'lr': 0.00023353492741332472, 'samples': 13828096, 'steps': 27007, 'loss/train': 1.1618081331253052} +02/25/2022 12:35:23 - INFO - codeparrot_training - Step 27008: {'lr': 0.00023351860051215554, 'samples': 13828608, 'steps': 27008, 'loss/train': 2.4684417247772217} +02/25/2022 12:35:29 - INFO - codeparrot_training - Step 27009: {'lr': 0.00023350227368158742, 'samples': 13829120, 'steps': 27009, 'loss/train': 2.1745755672454834} +02/25/2022 12:35:32 - INFO - codeparrot_training - Step 27010: {'lr': 0.00023348594692169044, 'samples': 13829632, 'steps': 27010, 'loss/train': 1.3420857191085815} +02/25/2022 12:35:38 - INFO - codeparrot_training - Step 27011: {'lr': 0.00023346962023253431, 'samples': 13830144, 'steps': 27011, 'loss/train': 1.9390747547149658} +02/25/2022 12:35:41 - INFO - codeparrot_training - Step 27012: {'lr': 0.00023345329361418906, 'samples': 13830656, 'steps': 27012, 'loss/train': 2.150869131088257} +02/25/2022 12:35:47 - INFO - codeparrot_training - Step 27013: {'lr': 0.0002334369670667247, 'samples': 13831168, 'steps': 27013, 'loss/train': 1.68816339969635} +02/25/2022 12:35:50 - INFO - codeparrot_training - Step 27014: {'lr': 0.00023342064059021116, 'samples': 13831680, 'steps': 27014, 'loss/train': 1.979455590248108} +02/25/2022 12:35:58 - INFO - codeparrot_training - Step 27015: {'lr': 0.00023340431418471824, 'samples': 13832192, 'steps': 27015, 'loss/train': 0.5500654578208923} +02/25/2022 12:36:01 - INFO - codeparrot_training - Step 27016: {'lr': 0.00023338798785031597, 'samples': 13832704, 'steps': 27016, 'loss/train': 1.2035255432128906} +02/25/2022 12:36:07 - INFO - codeparrot_training - Step 27017: {'lr': 0.00023337166158707428, 'samples': 13833216, 'steps': 27017, 'loss/train': 1.976059079170227} +02/25/2022 12:36:10 - INFO - codeparrot_training - Step 27018: {'lr': 0.0002333553353950632, 'samples': 13833728, 'steps': 27018, 'loss/train': 2.331285238265991} +02/25/2022 12:36:16 - INFO - codeparrot_training - Step 27019: {'lr': 0.00023333900927435255, 'samples': 13834240, 'steps': 27019, 'loss/train': 1.1922298669815063} +02/25/2022 12:36:19 - INFO - codeparrot_training - Step 27020: {'lr': 0.00023332268322501226, 'samples': 13834752, 'steps': 27020, 'loss/train': 1.0364292860031128} +02/25/2022 12:36:25 - INFO - codeparrot_training - Step 27021: {'lr': 0.00023330635724711234, 'samples': 13835264, 'steps': 27021, 'loss/train': 2.257805585861206} +02/25/2022 12:36:28 - INFO - codeparrot_training - Step 27022: {'lr': 0.00023329003134072262, 'samples': 13835776, 'steps': 27022, 'loss/train': 2.1434290409088135} +02/25/2022 12:36:34 - INFO - codeparrot_training - Step 27023: {'lr': 0.00023327370550591314, 'samples': 13836288, 'steps': 27023, 'loss/train': 1.5563085079193115} +02/25/2022 12:36:37 - INFO - codeparrot_training - Step 27024: {'lr': 0.0002332573797427538, 'samples': 13836800, 'steps': 27024, 'loss/train': 1.5208728313446045} +02/25/2022 12:36:43 - INFO - codeparrot_training - Step 27025: {'lr': 0.00023324105405131453, 'samples': 13837312, 'steps': 27025, 'loss/train': 2.117149591445923} +02/25/2022 12:36:46 - INFO - codeparrot_training - Step 27026: {'lr': 0.00023322472843166522, 'samples': 13837824, 'steps': 27026, 'loss/train': 1.9643398523330688} +02/25/2022 12:36:52 - INFO - codeparrot_training - Step 27027: {'lr': 0.0002332084028838759, 'samples': 13838336, 'steps': 27027, 'loss/train': 1.7869101762771606} +02/25/2022 12:36:55 - INFO - codeparrot_training - Step 27028: {'lr': 0.00023319207740801645, 'samples': 13838848, 'steps': 27028, 'loss/train': 2.148238182067871} +02/25/2022 12:37:01 - INFO - codeparrot_training - Step 27029: {'lr': 0.00023317575200415677, 'samples': 13839360, 'steps': 27029, 'loss/train': 1.5981947183609009} +02/25/2022 12:37:04 - INFO - codeparrot_training - Step 27030: {'lr': 0.0002331594266723668, 'samples': 13839872, 'steps': 27030, 'loss/train': 1.2656347751617432} +02/25/2022 12:37:12 - INFO - codeparrot_training - Step 27031: {'lr': 0.00023314310141271652, 'samples': 13840384, 'steps': 27031, 'loss/train': 0.17614024877548218} +02/25/2022 12:37:15 - INFO - codeparrot_training - Step 27032: {'lr': 0.00023312677622527595, 'samples': 13840896, 'steps': 27032, 'loss/train': 2.560861349105835} +02/25/2022 12:37:21 - INFO - codeparrot_training - Step 27033: {'lr': 0.00023311045111011478, 'samples': 13841408, 'steps': 27033, 'loss/train': 2.1723649501800537} +02/25/2022 12:37:24 - INFO - codeparrot_training - Step 27034: {'lr': 0.0002330941260673031, 'samples': 13841920, 'steps': 27034, 'loss/train': 0.8607158660888672} +02/25/2022 12:37:30 - INFO - codeparrot_training - Step 27035: {'lr': 0.0002330778010969108, 'samples': 13842432, 'steps': 27035, 'loss/train': 0.4541480839252472} +02/25/2022 12:37:33 - INFO - codeparrot_training - Step 27036: {'lr': 0.00023306147619900794, 'samples': 13842944, 'steps': 27036, 'loss/train': 1.3741477727890015} +02/25/2022 12:37:39 - INFO - codeparrot_training - Step 27037: {'lr': 0.00023304515137366423, 'samples': 13843456, 'steps': 27037, 'loss/train': 1.7156343460083008} +02/25/2022 12:37:42 - INFO - codeparrot_training - Step 27038: {'lr': 0.0002330288266209497, 'samples': 13843968, 'steps': 27038, 'loss/train': 1.9308557510375977} +02/25/2022 12:37:48 - INFO - codeparrot_training - Step 27039: {'lr': 0.0002330125019409343, 'samples': 13844480, 'steps': 27039, 'loss/train': 2.123274564743042} +02/25/2022 12:37:51 - INFO - codeparrot_training - Step 27040: {'lr': 0.00023299617733368806, 'samples': 13844992, 'steps': 27040, 'loss/train': 1.9808441400527954} +02/25/2022 12:37:58 - INFO - codeparrot_training - Step 27041: {'lr': 0.00023297985279928065, 'samples': 13845504, 'steps': 27041, 'loss/train': 2.7426066398620605} +02/25/2022 12:38:02 - INFO - codeparrot_training - Step 27042: {'lr': 0.00023296352833778217, 'samples': 13846016, 'steps': 27042, 'loss/train': 2.7557485103607178} +02/25/2022 12:38:07 - INFO - codeparrot_training - Step 27043: {'lr': 0.00023294720394926254, 'samples': 13846528, 'steps': 27043, 'loss/train': 2.155454635620117} +02/25/2022 12:38:11 - INFO - codeparrot_training - Step 27044: {'lr': 0.00023293087963379168, 'samples': 13847040, 'steps': 27044, 'loss/train': 2.1809327602386475} +02/25/2022 12:38:16 - INFO - codeparrot_training - Step 27045: {'lr': 0.00023291455539143957, 'samples': 13847552, 'steps': 27045, 'loss/train': 1.895490288734436} +02/25/2022 12:38:20 - INFO - codeparrot_training - Step 27046: {'lr': 0.00023289823122227602, 'samples': 13848064, 'steps': 27046, 'loss/train': 1.6413599252700806} +02/25/2022 12:38:26 - INFO - codeparrot_training - Step 27047: {'lr': 0.000232881907126371, 'samples': 13848576, 'steps': 27047, 'loss/train': 3.226123809814453} +02/25/2022 12:38:29 - INFO - codeparrot_training - Step 27048: {'lr': 0.00023286558310379443, 'samples': 13849088, 'steps': 27048, 'loss/train': 2.4897942543029785} +02/25/2022 12:38:35 - INFO - codeparrot_training - Step 27049: {'lr': 0.00023284925915461642, 'samples': 13849600, 'steps': 27049, 'loss/train': 1.9303382635116577} +02/25/2022 12:38:38 - INFO - codeparrot_training - Step 27050: {'lr': 0.00023283293527890658, 'samples': 13850112, 'steps': 27050, 'loss/train': 2.461439847946167} +02/25/2022 12:38:45 - INFO - codeparrot_training - Step 27051: {'lr': 0.00023281661147673502, 'samples': 13850624, 'steps': 27051, 'loss/train': 1.6616508960723877} +02/25/2022 12:38:49 - INFO - codeparrot_training - Step 27052: {'lr': 0.00023280028774817164, 'samples': 13851136, 'steps': 27052, 'loss/train': 2.972146987915039} +02/25/2022 12:38:54 - INFO - codeparrot_training - Step 27053: {'lr': 0.00023278396409328638, 'samples': 13851648, 'steps': 27053, 'loss/train': 2.261521100997925} +02/25/2022 12:38:58 - INFO - codeparrot_training - Step 27054: {'lr': 0.00023276764051214925, 'samples': 13852160, 'steps': 27054, 'loss/train': 2.037123918533325} +02/25/2022 12:39:03 - INFO - codeparrot_training - Step 27055: {'lr': 0.00023275131700482992, 'samples': 13852672, 'steps': 27055, 'loss/train': 1.8184711933135986} +02/25/2022 12:39:09 - INFO - codeparrot_training - Step 27056: {'lr': 0.00023273499357139853, 'samples': 13853184, 'steps': 27056, 'loss/train': 0.9392097592353821} +02/25/2022 12:39:12 - INFO - codeparrot_training - Step 27057: {'lr': 0.00023271867021192494, 'samples': 13853696, 'steps': 27057, 'loss/train': 2.3508214950561523} +02/25/2022 12:39:18 - INFO - codeparrot_training - Step 27058: {'lr': 0.00023270234692647919, 'samples': 13854208, 'steps': 27058, 'loss/train': 2.173530101776123} +02/25/2022 12:39:21 - INFO - codeparrot_training - Step 27059: {'lr': 0.00023268602371513095, 'samples': 13854720, 'steps': 27059, 'loss/train': 1.7950519323349} +02/25/2022 12:39:29 - INFO - codeparrot_training - Step 27060: {'lr': 0.0002326697005779503, 'samples': 13855232, 'steps': 27060, 'loss/train': 1.7231035232543945} +02/25/2022 12:39:32 - INFO - codeparrot_training - Step 27061: {'lr': 0.0002326533775150072, 'samples': 13855744, 'steps': 27061, 'loss/train': 1.129690408706665} +02/25/2022 12:39:37 - INFO - codeparrot_training - Step 27062: {'lr': 0.00023263705452637154, 'samples': 13856256, 'steps': 27062, 'loss/train': 1.405643105506897} +02/25/2022 12:39:41 - INFO - codeparrot_training - Step 27063: {'lr': 0.00023262073161211316, 'samples': 13856768, 'steps': 27063, 'loss/train': 2.770814895629883} +02/25/2022 12:39:46 - INFO - codeparrot_training - Step 27064: {'lr': 0.00023260440877230206, 'samples': 13857280, 'steps': 27064, 'loss/train': 1.6392837762832642} +02/25/2022 12:39:50 - INFO - codeparrot_training - Step 27065: {'lr': 0.0002325880860070082, 'samples': 13857792, 'steps': 27065, 'loss/train': 2.566894292831421} +02/25/2022 12:39:55 - INFO - codeparrot_training - Step 27066: {'lr': 0.00023257176331630135, 'samples': 13858304, 'steps': 27066, 'loss/train': 2.1903414726257324} +02/25/2022 12:39:59 - INFO - codeparrot_training - Step 27067: {'lr': 0.00023255544070025164, 'samples': 13858816, 'steps': 27067, 'loss/train': 2.4081480503082275} +02/25/2022 12:40:04 - INFO - codeparrot_training - Step 27068: {'lr': 0.00023253911815892888, 'samples': 13859328, 'steps': 27068, 'loss/train': 2.0360560417175293} +02/25/2022 12:40:08 - INFO - codeparrot_training - Step 27069: {'lr': 0.00023252279569240292, 'samples': 13859840, 'steps': 27069, 'loss/train': 2.400786876678467} +02/25/2022 12:40:13 - INFO - codeparrot_training - Step 27070: {'lr': 0.00023250647330074376, 'samples': 13860352, 'steps': 27070, 'loss/train': 2.2619104385375977} +02/25/2022 12:40:17 - INFO - codeparrot_training - Step 27071: {'lr': 0.00023249015098402137, 'samples': 13860864, 'steps': 27071, 'loss/train': 1.9302499294281006} +02/25/2022 12:40:24 - INFO - codeparrot_training - Step 27072: {'lr': 0.00023247382874230562, 'samples': 13861376, 'steps': 27072, 'loss/train': 2.0327818393707275} +02/25/2022 12:40:28 - INFO - codeparrot_training - Step 27073: {'lr': 0.00023245750657566636, 'samples': 13861888, 'steps': 27073, 'loss/train': 1.0842987298965454} +02/25/2022 12:40:33 - INFO - codeparrot_training - Step 27074: {'lr': 0.0002324411844841736, 'samples': 13862400, 'steps': 27074, 'loss/train': 0.9487189650535583} +02/25/2022 12:40:37 - INFO - codeparrot_training - Step 27075: {'lr': 0.0002324248624678973, 'samples': 13862912, 'steps': 27075, 'loss/train': 0.672633171081543} +02/25/2022 12:40:42 - INFO - codeparrot_training - Step 27076: {'lr': 0.00023240854052690722, 'samples': 13863424, 'steps': 27076, 'loss/train': 1.6048749685287476} +02/25/2022 12:40:46 - INFO - codeparrot_training - Step 27077: {'lr': 0.00023239221866127333, 'samples': 13863936, 'steps': 27077, 'loss/train': 1.9225966930389404} +02/25/2022 12:40:51 - INFO - codeparrot_training - Step 27078: {'lr': 0.0002323758968710656, 'samples': 13864448, 'steps': 27078, 'loss/train': 2.140515089035034} +02/25/2022 12:40:55 - INFO - codeparrot_training - Step 27079: {'lr': 0.00023235957515635398, 'samples': 13864960, 'steps': 27079, 'loss/train': 2.1723084449768066} +02/25/2022 12:41:00 - INFO - codeparrot_training - Step 27080: {'lr': 0.0002323432535172084, 'samples': 13865472, 'steps': 27080, 'loss/train': 2.795393466949463} +02/25/2022 12:41:04 - INFO - codeparrot_training - Step 27081: {'lr': 0.0002323269319536986, 'samples': 13865984, 'steps': 27081, 'loss/train': 2.176541566848755} +02/25/2022 12:41:09 - INFO - codeparrot_training - Step 27082: {'lr': 0.00023231061046589464, 'samples': 13866496, 'steps': 27082, 'loss/train': 2.1633191108703613} +02/25/2022 12:41:13 - INFO - codeparrot_training - Step 27083: {'lr': 0.0002322942890538664, 'samples': 13867008, 'steps': 27083, 'loss/train': 1.303206205368042} +02/25/2022 12:41:18 - INFO - codeparrot_training - Step 27084: {'lr': 0.0002322779677176839, 'samples': 13867520, 'steps': 27084, 'loss/train': 2.6134519577026367} +02/25/2022 12:41:22 - INFO - codeparrot_training - Step 27085: {'lr': 0.00023226164645741689, 'samples': 13868032, 'steps': 27085, 'loss/train': 2.4531805515289307} +02/25/2022 12:41:27 - INFO - codeparrot_training - Step 27086: {'lr': 0.0002322453252731353, 'samples': 13868544, 'steps': 27086, 'loss/train': 2.132291793823242} +02/25/2022 12:41:31 - INFO - codeparrot_training - Step 27087: {'lr': 0.00023222900416490912, 'samples': 13869056, 'steps': 27087, 'loss/train': 1.0688408613204956} +02/25/2022 12:41:38 - INFO - codeparrot_training - Step 27088: {'lr': 0.00023221268313280836, 'samples': 13869568, 'steps': 27088, 'loss/train': 1.203363299369812} +02/25/2022 12:41:41 - INFO - codeparrot_training - Step 27089: {'lr': 0.00023219636217690267, 'samples': 13870080, 'steps': 27089, 'loss/train': 2.549347400665283} +02/25/2022 12:41:48 - INFO - codeparrot_training - Step 27090: {'lr': 0.00023218004129726218, 'samples': 13870592, 'steps': 27090, 'loss/train': 2.318603754043579} +02/25/2022 12:41:51 - INFO - codeparrot_training - Step 27091: {'lr': 0.00023216372049395667, 'samples': 13871104, 'steps': 27091, 'loss/train': 2.312936305999756} +02/25/2022 12:41:55 - INFO - codeparrot_training - Step 27092: {'lr': 0.00023214739976705614, 'samples': 13871616, 'steps': 27092, 'loss/train': 2.578765869140625} +02/25/2022 12:42:00 - INFO - codeparrot_training - Step 27093: {'lr': 0.00023213107911663062, 'samples': 13872128, 'steps': 27093, 'loss/train': 1.841038465499878} +02/25/2022 12:42:04 - INFO - codeparrot_training - Step 27094: {'lr': 0.00023211475854274975, 'samples': 13872640, 'steps': 27094, 'loss/train': 2.7046167850494385} +02/25/2022 12:42:09 - INFO - codeparrot_training - Step 27095: {'lr': 0.00023209843804548357, 'samples': 13873152, 'steps': 27095, 'loss/train': 2.2283904552459717} +02/25/2022 12:42:13 - INFO - codeparrot_training - Step 27096: {'lr': 0.000232082117624902, 'samples': 13873664, 'steps': 27096, 'loss/train': 2.249000072479248} +02/25/2022 12:42:18 - INFO - codeparrot_training - Step 27097: {'lr': 0.00023206579728107506, 'samples': 13874176, 'steps': 27097, 'loss/train': 1.733628511428833} +02/25/2022 12:42:22 - INFO - codeparrot_training - Step 27098: {'lr': 0.00023204947701407243, 'samples': 13874688, 'steps': 27098, 'loss/train': 1.3768359422683716} +02/25/2022 12:42:29 - INFO - codeparrot_training - Step 27099: {'lr': 0.00023203315682396414, 'samples': 13875200, 'steps': 27099, 'loss/train': 0.6457328200340271} +02/25/2022 12:42:32 - INFO - codeparrot_training - Step 27100: {'lr': 0.00023201683671082013, 'samples': 13875712, 'steps': 27100, 'loss/train': 2.564887762069702} +02/25/2022 12:42:38 - INFO - codeparrot_training - Step 27101: {'lr': 0.00023200051667471031, 'samples': 13876224, 'steps': 27101, 'loss/train': 1.8683359622955322} +02/25/2022 12:42:41 - INFO - codeparrot_training - Step 27102: {'lr': 0.00023198419671570456, 'samples': 13876736, 'steps': 27102, 'loss/train': 1.8324161767959595} +02/25/2022 12:42:47 - INFO - codeparrot_training - Step 27103: {'lr': 0.00023196787683387276, 'samples': 13877248, 'steps': 27103, 'loss/train': 0.6329008936882019} +02/25/2022 12:42:50 - INFO - codeparrot_training - Step 27104: {'lr': 0.00023195155702928483, 'samples': 13877760, 'steps': 27104, 'loss/train': 2.7817440032958984} +02/25/2022 12:42:56 - INFO - codeparrot_training - Step 27105: {'lr': 0.00023193523730201076, 'samples': 13878272, 'steps': 27105, 'loss/train': 2.1233136653900146} +02/25/2022 12:42:59 - INFO - codeparrot_training - Step 27106: {'lr': 0.0002319189176521204, 'samples': 13878784, 'steps': 27106, 'loss/train': 2.404804229736328} +02/25/2022 12:43:05 - INFO - codeparrot_training - Step 27107: {'lr': 0.00023190259807968357, 'samples': 13879296, 'steps': 27107, 'loss/train': 1.9561907052993774} +02/25/2022 12:43:08 - INFO - codeparrot_training - Step 27108: {'lr': 0.00023188627858477035, 'samples': 13879808, 'steps': 27108, 'loss/train': 2.178588628768921} +02/25/2022 12:43:15 - INFO - codeparrot_training - Step 27109: {'lr': 0.00023186995916745051, 'samples': 13880320, 'steps': 27109, 'loss/train': 1.5745863914489746} +02/25/2022 12:43:19 - INFO - codeparrot_training - Step 27110: {'lr': 0.00023185363982779406, 'samples': 13880832, 'steps': 27110, 'loss/train': 1.8510769605636597} +02/25/2022 12:43:24 - INFO - codeparrot_training - Step 27111: {'lr': 0.0002318373205658708, 'samples': 13881344, 'steps': 27111, 'loss/train': 1.5771312713623047} +02/25/2022 12:43:28 - INFO - codeparrot_training - Step 27112: {'lr': 0.00023182100138175073, 'samples': 13881856, 'steps': 27112, 'loss/train': 2.4008595943450928} +02/25/2022 12:43:34 - INFO - codeparrot_training - Step 27113: {'lr': 0.00023180468227550368, 'samples': 13882368, 'steps': 27113, 'loss/train': 2.3287513256073} +02/25/2022 12:43:37 - INFO - codeparrot_training - Step 27114: {'lr': 0.00023178836324719957, 'samples': 13882880, 'steps': 27114, 'loss/train': 2.1267924308776855} +02/25/2022 12:43:43 - INFO - codeparrot_training - Step 27115: {'lr': 0.00023177204429690847, 'samples': 13883392, 'steps': 27115, 'loss/train': 2.1555447578430176} +02/25/2022 12:43:46 - INFO - codeparrot_training - Step 27116: {'lr': 0.00023175572542469998, 'samples': 13883904, 'steps': 27116, 'loss/train': 2.275451183319092} +02/25/2022 12:43:53 - INFO - codeparrot_training - Step 27117: {'lr': 0.0002317394066306442, 'samples': 13884416, 'steps': 27117, 'loss/train': 1.5330990552902222} +02/25/2022 12:43:57 - INFO - codeparrot_training - Step 27118: {'lr': 0.00023172308791481104, 'samples': 13884928, 'steps': 27118, 'loss/train': 2.3393394947052} +02/25/2022 12:44:02 - INFO - codeparrot_training - Step 27119: {'lr': 0.00023170676927727045, 'samples': 13885440, 'steps': 27119, 'loss/train': 8.617653846740723} +02/25/2022 12:44:06 - INFO - codeparrot_training - Step 27120: {'lr': 0.00023169045071809213, 'samples': 13885952, 'steps': 27120, 'loss/train': 2.4430043697357178} +02/25/2022 12:44:11 - INFO - codeparrot_training - Step 27121: {'lr': 0.00023167413223734612, 'samples': 13886464, 'steps': 27121, 'loss/train': 2.00612473487854} +02/25/2022 12:44:15 - INFO - codeparrot_training - Step 27122: {'lr': 0.00023165781383510228, 'samples': 13886976, 'steps': 27122, 'loss/train': 0.4821772575378418} +02/25/2022 12:44:21 - INFO - codeparrot_training - Step 27123: {'lr': 0.00023164149551143067, 'samples': 13887488, 'steps': 27123, 'loss/train': 2.098259687423706} +02/25/2022 12:44:24 - INFO - codeparrot_training - Step 27124: {'lr': 0.00023162517726640097, 'samples': 13888000, 'steps': 27124, 'loss/train': 0.997495174407959} +02/25/2022 12:44:30 - INFO - codeparrot_training - Step 27125: {'lr': 0.00023160885910008318, 'samples': 13888512, 'steps': 27125, 'loss/train': 1.4886780977249146} +02/25/2022 12:44:33 - INFO - codeparrot_training - Step 27126: {'lr': 0.00023159254101254718, 'samples': 13889024, 'steps': 27126, 'loss/train': 2.1669259071350098} +02/25/2022 12:44:39 - INFO - codeparrot_training - Step 27127: {'lr': 0.00023157622300386287, 'samples': 13889536, 'steps': 27127, 'loss/train': 1.8937004804611206} +02/25/2022 12:44:42 - INFO - codeparrot_training - Step 27128: {'lr': 0.00023155990507410032, 'samples': 13890048, 'steps': 27128, 'loss/train': 2.1748459339141846} +02/25/2022 12:44:48 - INFO - codeparrot_training - Step 27129: {'lr': 0.00023154358722332917, 'samples': 13890560, 'steps': 27129, 'loss/train': 2.040041446685791} +02/25/2022 12:44:51 - INFO - codeparrot_training - Step 27130: {'lr': 0.0002315272694516194, 'samples': 13891072, 'steps': 27130, 'loss/train': 2.510655403137207} +02/25/2022 12:44:57 - INFO - codeparrot_training - Step 27131: {'lr': 0.00023151095175904095, 'samples': 13891584, 'steps': 27131, 'loss/train': 2.11201548576355} +02/25/2022 12:45:00 - INFO - codeparrot_training - Step 27132: {'lr': 0.00023149463414566383, 'samples': 13892096, 'steps': 27132, 'loss/train': 1.9070948362350464} +02/25/2022 12:45:06 - INFO - codeparrot_training - Step 27133: {'lr': 0.0002314783166115577, 'samples': 13892608, 'steps': 27133, 'loss/train': 1.5832808017730713} +02/25/2022 12:45:09 - INFO - codeparrot_training - Step 27134: {'lr': 0.0002314619991567926, 'samples': 13893120, 'steps': 27134, 'loss/train': 1.957318663597107} +02/25/2022 12:45:16 - INFO - codeparrot_training - Step 27135: {'lr': 0.0002314456817814384, 'samples': 13893632, 'steps': 27135, 'loss/train': 1.5475295782089233} +02/25/2022 12:45:20 - INFO - codeparrot_training - Step 27136: {'lr': 0.0002314293644855651, 'samples': 13894144, 'steps': 27136, 'loss/train': 1.2550175189971924} +02/25/2022 12:45:25 - INFO - codeparrot_training - Step 27137: {'lr': 0.00023141304726924242, 'samples': 13894656, 'steps': 27137, 'loss/train': 0.10717252641916275} +02/25/2022 12:45:29 - INFO - codeparrot_training - Step 27138: {'lr': 0.00023139673013254034, 'samples': 13895168, 'steps': 27138, 'loss/train': 1.3263659477233887} +02/25/2022 12:45:35 - INFO - codeparrot_training - Step 27139: {'lr': 0.00023138041307552875, 'samples': 13895680, 'steps': 27139, 'loss/train': 1.5434885025024414} +02/25/2022 12:45:38 - INFO - codeparrot_training - Step 27140: {'lr': 0.00023136409609827757, 'samples': 13896192, 'steps': 27140, 'loss/train': 1.9732574224472046} +02/25/2022 12:45:43 - INFO - codeparrot_training - Step 27141: {'lr': 0.00023134777920085677, 'samples': 13896704, 'steps': 27141, 'loss/train': 1.5463266372680664} +02/25/2022 12:45:47 - INFO - codeparrot_training - Step 27142: {'lr': 0.00023133146238333609, 'samples': 13897216, 'steps': 27142, 'loss/train': 2.0716617107391357} +02/25/2022 12:45:52 - INFO - codeparrot_training - Step 27143: {'lr': 0.00023131514564578547, 'samples': 13897728, 'steps': 27143, 'loss/train': 2.009819269180298} +02/25/2022 12:45:56 - INFO - codeparrot_training - Step 27144: {'lr': 0.00023129882898827484, 'samples': 13898240, 'steps': 27144, 'loss/train': 1.7620108127593994} +02/25/2022 12:46:03 - INFO - codeparrot_training - Step 27145: {'lr': 0.0002312825124108742, 'samples': 13898752, 'steps': 27145, 'loss/train': 1.8677525520324707} +02/25/2022 12:46:07 - INFO - codeparrot_training - Step 27146: {'lr': 0.0002312661959136532, 'samples': 13899264, 'steps': 27146, 'loss/train': 1.3752330541610718} +02/25/2022 12:46:13 - INFO - codeparrot_training - Step 27147: {'lr': 0.0002312498794966819, 'samples': 13899776, 'steps': 27147, 'loss/train': 2.3319666385650635} +02/25/2022 12:46:16 - INFO - codeparrot_training - Step 27148: {'lr': 0.0002312335631600302, 'samples': 13900288, 'steps': 27148, 'loss/train': 2.804506301879883} +02/25/2022 12:46:20 - INFO - codeparrot_training - Step 27149: {'lr': 0.000231217246903768, 'samples': 13900800, 'steps': 27149, 'loss/train': 1.6366115808486938} +02/25/2022 12:46:25 - INFO - codeparrot_training - Step 27150: {'lr': 0.00023120093072796506, 'samples': 13901312, 'steps': 27150, 'loss/train': 1.6016454696655273} +02/25/2022 12:46:31 - INFO - codeparrot_training - Step 27151: {'lr': 0.00023118461463269137, 'samples': 13901824, 'steps': 27151, 'loss/train': 1.7640063762664795} +02/25/2022 12:46:34 - INFO - codeparrot_training - Step 27152: {'lr': 0.00023116829861801686, 'samples': 13902336, 'steps': 27152, 'loss/train': 2.5405426025390625} +02/25/2022 12:46:38 - INFO - codeparrot_training - Step 27153: {'lr': 0.0002311519826840113, 'samples': 13902848, 'steps': 27153, 'loss/train': 2.9520487785339355} +02/25/2022 12:46:43 - INFO - codeparrot_training - Step 27154: {'lr': 0.00023113566683074477, 'samples': 13903360, 'steps': 27154, 'loss/train': 1.6504452228546143} +02/25/2022 12:46:47 - INFO - codeparrot_training - Step 27155: {'lr': 0.00023111935105828702, 'samples': 13903872, 'steps': 27155, 'loss/train': 2.4168541431427} +02/25/2022 12:46:55 - INFO - codeparrot_training - Step 27156: {'lr': 0.00023110303536670793, 'samples': 13904384, 'steps': 27156, 'loss/train': 2.660118579864502} +02/25/2022 12:46:58 - INFO - codeparrot_training - Step 27157: {'lr': 0.00023108671975607741, 'samples': 13904896, 'steps': 27157, 'loss/train': 2.314059019088745} +02/25/2022 12:47:04 - INFO - codeparrot_training - Step 27158: {'lr': 0.00023107040422646547, 'samples': 13905408, 'steps': 27158, 'loss/train': 1.920183777809143} +02/25/2022 12:47:07 - INFO - codeparrot_training - Step 27159: {'lr': 0.0002310540887779419, 'samples': 13905920, 'steps': 27159, 'loss/train': 1.5839715003967285} +02/25/2022 12:47:13 - INFO - codeparrot_training - Step 27160: {'lr': 0.0002310377734105765, 'samples': 13906432, 'steps': 27160, 'loss/train': 1.7582656145095825} +02/25/2022 12:47:16 - INFO - codeparrot_training - Step 27161: {'lr': 0.0002310214581244393, 'samples': 13906944, 'steps': 27161, 'loss/train': 1.8979930877685547} +02/25/2022 12:47:22 - INFO - codeparrot_training - Step 27162: {'lr': 0.00023100514291960017, 'samples': 13907456, 'steps': 27162, 'loss/train': 2.1478185653686523} +02/25/2022 12:47:25 - INFO - codeparrot_training - Step 27163: {'lr': 0.00023098882779612904, 'samples': 13907968, 'steps': 27163, 'loss/train': 2.982717514038086} +02/25/2022 12:47:31 - INFO - codeparrot_training - Step 27164: {'lr': 0.00023097251275409564, 'samples': 13908480, 'steps': 27164, 'loss/train': 1.919095754623413} +02/25/2022 12:47:34 - INFO - codeparrot_training - Step 27165: {'lr': 0.00023095619779356998, 'samples': 13908992, 'steps': 27165, 'loss/train': 1.1683413982391357} +02/25/2022 12:47:42 - INFO - codeparrot_training - Step 27166: {'lr': 0.00023093988291462186, 'samples': 13909504, 'steps': 27166, 'loss/train': 2.4578590393066406} +02/25/2022 12:47:45 - INFO - codeparrot_training - Step 27167: {'lr': 0.0002309235681173214, 'samples': 13910016, 'steps': 27167, 'loss/train': 1.0059542655944824} +02/25/2022 12:47:51 - INFO - codeparrot_training - Step 27168: {'lr': 0.00023090725340173818, 'samples': 13910528, 'steps': 27168, 'loss/train': 1.317365050315857} +02/25/2022 12:47:54 - INFO - codeparrot_training - Step 27169: {'lr': 0.00023089093876794224, 'samples': 13911040, 'steps': 27169, 'loss/train': 2.1020314693450928} +02/25/2022 12:48:00 - INFO - codeparrot_training - Step 27170: {'lr': 0.0002308746242160034, 'samples': 13911552, 'steps': 27170, 'loss/train': 1.8312489986419678} +02/25/2022 12:48:03 - INFO - codeparrot_training - Step 27171: {'lr': 0.00023085830974599176, 'samples': 13912064, 'steps': 27171, 'loss/train': 2.5547494888305664} +02/25/2022 12:48:09 - INFO - codeparrot_training - Step 27172: {'lr': 0.00023084199535797692, 'samples': 13912576, 'steps': 27172, 'loss/train': 2.441981792449951} +02/25/2022 12:48:12 - INFO - codeparrot_training - Step 27173: {'lr': 0.00023082568105202889, 'samples': 13913088, 'steps': 27173, 'loss/train': 1.776422142982483} +02/25/2022 12:48:18 - INFO - codeparrot_training - Step 27174: {'lr': 0.00023080936682821755, 'samples': 13913600, 'steps': 27174, 'loss/train': 1.1487456560134888} +02/25/2022 12:48:21 - INFO - codeparrot_training - Step 27175: {'lr': 0.00023079305268661277, 'samples': 13914112, 'steps': 27175, 'loss/train': 2.4722201824188232} +02/25/2022 12:48:27 - INFO - codeparrot_training - Step 27176: {'lr': 0.0002307767386272846, 'samples': 13914624, 'steps': 27176, 'loss/train': 0.8710139393806458} +02/25/2022 12:48:30 - INFO - codeparrot_training - Step 27177: {'lr': 0.00023076042465030265, 'samples': 13915136, 'steps': 27177, 'loss/train': 1.998792052268982} +02/25/2022 12:48:36 - INFO - codeparrot_training - Step 27178: {'lr': 0.00023074411075573692, 'samples': 13915648, 'steps': 27178, 'loss/train': 0.7048488855361938} +02/25/2022 12:48:39 - INFO - codeparrot_training - Step 27179: {'lr': 0.00023072779694365732, 'samples': 13916160, 'steps': 27179, 'loss/train': 1.5979152917861938} +02/25/2022 12:48:45 - INFO - codeparrot_training - Step 27180: {'lr': 0.00023071148321413383, 'samples': 13916672, 'steps': 27180, 'loss/train': 2.3554227352142334} +02/25/2022 12:48:48 - INFO - codeparrot_training - Step 27181: {'lr': 0.00023069516956723607, 'samples': 13917184, 'steps': 27181, 'loss/train': 1.9561262130737305} +02/25/2022 12:48:55 - INFO - codeparrot_training - Step 27182: {'lr': 0.0002306788560030341, 'samples': 13917696, 'steps': 27182, 'loss/train': 1.8612322807312012} +02/25/2022 12:48:59 - INFO - codeparrot_training - Step 27183: {'lr': 0.0002306625425215978, 'samples': 13918208, 'steps': 27183, 'loss/train': 2.59479022026062} +02/25/2022 12:49:04 - INFO - codeparrot_training - Step 27184: {'lr': 0.00023064622912299712, 'samples': 13918720, 'steps': 27184, 'loss/train': 1.6724401712417603} +02/25/2022 12:49:08 - INFO - codeparrot_training - Step 27185: {'lr': 0.00023062991580730176, 'samples': 13919232, 'steps': 27185, 'loss/train': 2.0189878940582275} +02/25/2022 12:49:13 - INFO - codeparrot_training - Step 27186: {'lr': 0.00023061360257458166, 'samples': 13919744, 'steps': 27186, 'loss/train': 1.8643494844436646} +02/25/2022 12:49:17 - INFO - codeparrot_training - Step 27187: {'lr': 0.00023059728942490673, 'samples': 13920256, 'steps': 27187, 'loss/train': 2.181403160095215} +02/25/2022 12:49:22 - INFO - codeparrot_training - Step 27188: {'lr': 0.00023058097635834693, 'samples': 13920768, 'steps': 27188, 'loss/train': 2.475731372833252} +02/25/2022 12:49:26 - INFO - codeparrot_training - Step 27189: {'lr': 0.00023056466337497206, 'samples': 13921280, 'steps': 27189, 'loss/train': 2.5900321006774902} +02/25/2022 12:49:31 - INFO - codeparrot_training - Step 27190: {'lr': 0.00023054835047485193, 'samples': 13921792, 'steps': 27190, 'loss/train': 2.261118173599243} +02/25/2022 12:49:35 - INFO - codeparrot_training - Step 27191: {'lr': 0.00023053203765805648, 'samples': 13922304, 'steps': 27191, 'loss/train': 1.4558427333831787} +02/25/2022 12:49:42 - INFO - codeparrot_training - Step 27192: {'lr': 0.00023051572492465566, 'samples': 13922816, 'steps': 27192, 'loss/train': 1.7706620693206787} +02/25/2022 12:49:45 - INFO - codeparrot_training - Step 27193: {'lr': 0.00023049941227471933, 'samples': 13923328, 'steps': 27193, 'loss/train': 1.2718539237976074} +02/25/2022 12:49:51 - INFO - codeparrot_training - Step 27194: {'lr': 0.0002304830997083172, 'samples': 13923840, 'steps': 27194, 'loss/train': 1.5267246961593628} +02/25/2022 12:49:54 - INFO - codeparrot_training - Step 27195: {'lr': 0.0002304667872255194, 'samples': 13924352, 'steps': 27195, 'loss/train': 1.4698207378387451} +02/25/2022 12:50:00 - INFO - codeparrot_training - Step 27196: {'lr': 0.00023045047482639556, 'samples': 13924864, 'steps': 27196, 'loss/train': 1.576555848121643} +02/25/2022 12:50:03 - INFO - codeparrot_training - Step 27197: {'lr': 0.00023043416251101575, 'samples': 13925376, 'steps': 27197, 'loss/train': 0.3115084171295166} +02/25/2022 12:50:09 - INFO - codeparrot_training - Step 27198: {'lr': 0.00023041785027944975, 'samples': 13925888, 'steps': 27198, 'loss/train': 2.4912776947021484} +02/25/2022 12:50:12 - INFO - codeparrot_training - Step 27199: {'lr': 0.0002304015381317675, 'samples': 13926400, 'steps': 27199, 'loss/train': 0.9662851691246033} +02/25/2022 12:50:18 - INFO - codeparrot_training - Step 27200: {'lr': 0.0002303852260680388, 'samples': 13926912, 'steps': 27200, 'loss/train': 1.828028678894043} +02/25/2022 12:50:21 - INFO - codeparrot_training - Step 27201: {'lr': 0.0002303689140883335, 'samples': 13927424, 'steps': 27201, 'loss/train': 1.5456184148788452} +02/25/2022 12:50:29 - INFO - codeparrot_training - Step 27202: {'lr': 0.00023035260219272168, 'samples': 13927936, 'steps': 27202, 'loss/train': 2.411005735397339} +02/25/2022 12:50:32 - INFO - codeparrot_training - Step 27203: {'lr': 0.00023033629038127302, 'samples': 13928448, 'steps': 27203, 'loss/train': 2.0583863258361816} +02/25/2022 12:50:38 - INFO - codeparrot_training - Step 27204: {'lr': 0.0002303199786540574, 'samples': 13928960, 'steps': 27204, 'loss/train': 2.002237319946289} +02/25/2022 12:50:41 - INFO - codeparrot_training - Step 27205: {'lr': 0.00023030366701114476, 'samples': 13929472, 'steps': 27205, 'loss/train': 2.032560110092163} +02/25/2022 12:50:47 - INFO - codeparrot_training - Step 27206: {'lr': 0.00023028735545260504, 'samples': 13929984, 'steps': 27206, 'loss/train': 1.6462011337280273} +02/25/2022 12:50:50 - INFO - codeparrot_training - Step 27207: {'lr': 0.00023027104397850795, 'samples': 13930496, 'steps': 27207, 'loss/train': 2.0393178462982178} +02/25/2022 12:50:56 - INFO - codeparrot_training - Step 27208: {'lr': 0.00023025473258892344, 'samples': 13931008, 'steps': 27208, 'loss/train': 1.538419246673584} +02/25/2022 12:50:59 - INFO - codeparrot_training - Step 27209: {'lr': 0.00023023842128392136, 'samples': 13931520, 'steps': 27209, 'loss/train': 1.1460812091827393} +02/25/2022 12:51:05 - INFO - codeparrot_training - Step 27210: {'lr': 0.0002302221100635716, 'samples': 13932032, 'steps': 27210, 'loss/train': 1.9465043544769287} +02/25/2022 12:51:08 - INFO - codeparrot_training - Step 27211: {'lr': 0.00023020579892794418, 'samples': 13932544, 'steps': 27211, 'loss/train': 2.175814151763916} +02/25/2022 12:51:16 - INFO - codeparrot_training - Step 27212: {'lr': 0.00023018948787710872, 'samples': 13933056, 'steps': 27212, 'loss/train': 2.371870279312134} +02/25/2022 12:51:19 - INFO - codeparrot_training - Step 27213: {'lr': 0.00023017317691113518, 'samples': 13933568, 'steps': 27213, 'loss/train': 2.3585050106048584} +02/25/2022 12:51:24 - INFO - codeparrot_training - Step 27214: {'lr': 0.00023015686603009347, 'samples': 13934080, 'steps': 27214, 'loss/train': 1.5567394495010376} +02/25/2022 12:51:28 - INFO - codeparrot_training - Step 27215: {'lr': 0.00023014055523405355, 'samples': 13934592, 'steps': 27215, 'loss/train': 1.6095361709594727} +02/25/2022 12:51:33 - INFO - codeparrot_training - Step 27216: {'lr': 0.00023012424452308508, 'samples': 13935104, 'steps': 27216, 'loss/train': 1.4049408435821533} +02/25/2022 12:51:37 - INFO - codeparrot_training - Step 27217: {'lr': 0.00023010793389725804, 'samples': 13935616, 'steps': 27217, 'loss/train': 2.5144336223602295} +02/25/2022 12:51:43 - INFO - codeparrot_training - Step 27218: {'lr': 0.0002300916233566423, 'samples': 13936128, 'steps': 27218, 'loss/train': 1.5060300827026367} +02/25/2022 12:51:46 - INFO - codeparrot_training - Step 27219: {'lr': 0.00023007531290130783, 'samples': 13936640, 'steps': 27219, 'loss/train': 2.409358501434326} +02/25/2022 12:51:51 - INFO - codeparrot_training - Step 27220: {'lr': 0.00023005900253132429, 'samples': 13937152, 'steps': 27220, 'loss/train': 2.524146795272827} +02/25/2022 12:51:55 - INFO - codeparrot_training - Step 27221: {'lr': 0.0002300426922467616, 'samples': 13937664, 'steps': 27221, 'loss/train': 1.2848683595657349} +02/25/2022 12:52:01 - INFO - codeparrot_training - Step 27222: {'lr': 0.00023002638204768975, 'samples': 13938176, 'steps': 27222, 'loss/train': 1.17233145236969} +02/25/2022 12:52:04 - INFO - codeparrot_training - Step 27223: {'lr': 0.0002300100719341785, 'samples': 13938688, 'steps': 27223, 'loss/train': 1.143632411956787} +02/25/2022 12:52:10 - INFO - codeparrot_training - Step 27224: {'lr': 0.00022999376190629788, 'samples': 13939200, 'steps': 27224, 'loss/train': 2.301959753036499} +02/25/2022 12:52:13 - INFO - codeparrot_training - Step 27225: {'lr': 0.0002299774519641175, 'samples': 13939712, 'steps': 27225, 'loss/train': 1.6440844535827637} +02/25/2022 12:52:18 - INFO - codeparrot_training - Step 27226: {'lr': 0.00022996114210770735, 'samples': 13940224, 'steps': 27226, 'loss/train': 1.3331406116485596} +02/25/2022 12:52:22 - INFO - codeparrot_training - Step 27227: {'lr': 0.00022994483233713733, 'samples': 13940736, 'steps': 27227, 'loss/train': 2.461721658706665} +02/25/2022 12:52:29 - INFO - codeparrot_training - Step 27228: {'lr': 0.00022992852265247738, 'samples': 13941248, 'steps': 27228, 'loss/train': 2.0594546794891357} +02/25/2022 12:52:33 - INFO - codeparrot_training - Step 27229: {'lr': 0.00022991221305379718, 'samples': 13941760, 'steps': 27229, 'loss/train': 2.064624071121216} +02/25/2022 12:52:38 - INFO - codeparrot_training - Step 27230: {'lr': 0.00022989590354116664, 'samples': 13942272, 'steps': 27230, 'loss/train': 1.986768364906311} +02/25/2022 12:52:42 - INFO - codeparrot_training - Step 27231: {'lr': 0.0002298795941146557, 'samples': 13942784, 'steps': 27231, 'loss/train': 1.9297752380371094} +02/25/2022 12:52:47 - INFO - codeparrot_training - Step 27232: {'lr': 0.00022986328477433426, 'samples': 13943296, 'steps': 27232, 'loss/train': 2.690293073654175} +02/25/2022 12:52:53 - INFO - codeparrot_training - Step 27233: {'lr': 0.00022984697552027203, 'samples': 13943808, 'steps': 27233, 'loss/train': 2.1552116870880127} +02/25/2022 12:52:56 - INFO - codeparrot_training - Step 27234: {'lr': 0.00022983066635253897, 'samples': 13944320, 'steps': 27234, 'loss/train': 2.2502670288085938} +02/25/2022 12:53:02 - INFO - codeparrot_training - Step 27235: {'lr': 0.00022981435727120498, 'samples': 13944832, 'steps': 27235, 'loss/train': 1.5809038877487183} +02/25/2022 12:53:05 - INFO - codeparrot_training - Step 27236: {'lr': 0.0002297980482763398, 'samples': 13945344, 'steps': 27236, 'loss/train': 1.8932820558547974} +02/25/2022 12:53:13 - INFO - codeparrot_training - Step 27237: {'lr': 0.00022978173936801343, 'samples': 13945856, 'steps': 27237, 'loss/train': 1.3279972076416016} +02/25/2022 12:53:16 - INFO - codeparrot_training - Step 27238: {'lr': 0.00022976543054629563, 'samples': 13946368, 'steps': 27238, 'loss/train': 4.221709728240967} +02/25/2022 12:53:22 - INFO - codeparrot_training - Step 27239: {'lr': 0.00022974912181125633, 'samples': 13946880, 'steps': 27239, 'loss/train': 2.5972936153411865} +02/25/2022 12:53:25 - INFO - codeparrot_training - Step 27240: {'lr': 0.0002297328131629653, 'samples': 13947392, 'steps': 27240, 'loss/train': 1.7605942487716675} +02/25/2022 12:53:31 - INFO - codeparrot_training - Step 27241: {'lr': 0.00022971650460149254, 'samples': 13947904, 'steps': 27241, 'loss/train': 1.8697713613510132} +02/25/2022 12:53:34 - INFO - codeparrot_training - Step 27242: {'lr': 0.0002297001961269078, 'samples': 13948416, 'steps': 27242, 'loss/train': 1.192578911781311} +02/25/2022 12:53:40 - INFO - codeparrot_training - Step 27243: {'lr': 0.000229683887739281, 'samples': 13948928, 'steps': 27243, 'loss/train': 0.8933893442153931} +02/25/2022 12:53:43 - INFO - codeparrot_training - Step 27244: {'lr': 0.0002296675794386819, 'samples': 13949440, 'steps': 27244, 'loss/train': 2.0607001781463623} +02/25/2022 12:53:49 - INFO - codeparrot_training - Step 27245: {'lr': 0.0002296512712251805, 'samples': 13949952, 'steps': 27245, 'loss/train': 1.4638926982879639} +02/25/2022 12:53:52 - INFO - codeparrot_training - Step 27246: {'lr': 0.00022963496309884662, 'samples': 13950464, 'steps': 27246, 'loss/train': 2.27056622505188} +02/25/2022 12:53:58 - INFO - codeparrot_training - Step 27247: {'lr': 0.00022961865505975002, 'samples': 13950976, 'steps': 27247, 'loss/train': 1.632817029953003} +02/25/2022 12:54:01 - INFO - codeparrot_training - Step 27248: {'lr': 0.00022960234710796062, 'samples': 13951488, 'steps': 27248, 'loss/train': 1.8588624000549316} +02/25/2022 12:54:09 - INFO - codeparrot_training - Step 27249: {'lr': 0.00022958603924354832, 'samples': 13952000, 'steps': 27249, 'loss/train': 1.4994919300079346} +02/25/2022 12:54:12 - INFO - codeparrot_training - Step 27250: {'lr': 0.00022956973146658302, 'samples': 13952512, 'steps': 27250, 'loss/train': 1.7028942108154297} +02/25/2022 12:54:17 - INFO - codeparrot_training - Step 27251: {'lr': 0.00022955342377713439, 'samples': 13953024, 'steps': 27251, 'loss/train': 1.4811912775039673} +02/25/2022 12:54:21 - INFO - codeparrot_training - Step 27252: {'lr': 0.00022953711617527243, 'samples': 13953536, 'steps': 27252, 'loss/train': 1.858709692955017} +02/25/2022 12:54:26 - INFO - codeparrot_training - Step 27253: {'lr': 0.00022952080866106694, 'samples': 13954048, 'steps': 27253, 'loss/train': 1.3391867876052856} +02/25/2022 12:54:30 - INFO - codeparrot_training - Step 27254: {'lr': 0.00022950450123458794, 'samples': 13954560, 'steps': 27254, 'loss/train': 2.081342935562134} +02/25/2022 12:54:35 - INFO - codeparrot_training - Step 27255: {'lr': 0.00022948819389590502, 'samples': 13955072, 'steps': 27255, 'loss/train': 1.4298616647720337} +02/25/2022 12:54:39 - INFO - codeparrot_training - Step 27256: {'lr': 0.00022947188664508816, 'samples': 13955584, 'steps': 27256, 'loss/train': 1.8034216165542603} +02/25/2022 12:54:45 - INFO - codeparrot_training - Step 27257: {'lr': 0.00022945557948220725, 'samples': 13956096, 'steps': 27257, 'loss/train': 2.344057559967041} +02/25/2022 12:54:48 - INFO - codeparrot_training - Step 27258: {'lr': 0.00022943927240733216, 'samples': 13956608, 'steps': 27258, 'loss/train': 1.4996782541275024} +02/25/2022 12:54:55 - INFO - codeparrot_training - Step 27259: {'lr': 0.00022942296542053265, 'samples': 13957120, 'steps': 27259, 'loss/train': 1.955847144126892} +02/25/2022 12:54:59 - INFO - codeparrot_training - Step 27260: {'lr': 0.0002294066585218786, 'samples': 13957632, 'steps': 27260, 'loss/train': 3.158378839492798} +02/25/2022 12:55:04 - INFO - codeparrot_training - Step 27261: {'lr': 0.00022939035171143992, 'samples': 13958144, 'steps': 27261, 'loss/train': 1.7249529361724854} +02/25/2022 12:55:08 - INFO - codeparrot_training - Step 27262: {'lr': 0.00022937404498928637, 'samples': 13958656, 'steps': 27262, 'loss/train': 2.572111129760742} +02/25/2022 12:55:13 - INFO - codeparrot_training - Step 27263: {'lr': 0.000229357738355488, 'samples': 13959168, 'steps': 27263, 'loss/train': 1.3766676187515259} +02/25/2022 12:55:17 - INFO - codeparrot_training - Step 27264: {'lr': 0.0002293414318101144, 'samples': 13959680, 'steps': 27264, 'loss/train': 2.379448652267456} +02/25/2022 12:55:22 - INFO - codeparrot_training - Step 27265: {'lr': 0.00022932512535323558, 'samples': 13960192, 'steps': 27265, 'loss/train': 1.549857258796692} +02/25/2022 12:55:26 - INFO - codeparrot_training - Step 27266: {'lr': 0.0002293088189849213, 'samples': 13960704, 'steps': 27266, 'loss/train': 1.948725938796997} +02/25/2022 12:55:31 - INFO - codeparrot_training - Step 27267: {'lr': 0.00022929251270524165, 'samples': 13961216, 'steps': 27267, 'loss/train': 1.6696780920028687} +02/25/2022 12:55:35 - INFO - codeparrot_training - Step 27268: {'lr': 0.00022927620651426616, 'samples': 13961728, 'steps': 27268, 'loss/train': 2.392348051071167} +02/25/2022 12:55:41 - INFO - codeparrot_training - Step 27269: {'lr': 0.00022925990041206478, 'samples': 13962240, 'steps': 27269, 'loss/train': 1.8572660684585571} +02/25/2022 12:55:44 - INFO - codeparrot_training - Step 27270: {'lr': 0.00022924359439870746, 'samples': 13962752, 'steps': 27270, 'loss/train': 2.9471664428710938} +02/25/2022 12:55:50 - INFO - codeparrot_training - Step 27271: {'lr': 0.00022922728847426396, 'samples': 13963264, 'steps': 27271, 'loss/train': 1.9245359897613525} +02/25/2022 12:55:53 - INFO - codeparrot_training - Step 27272: {'lr': 0.00022921098263880427, 'samples': 13963776, 'steps': 27272, 'loss/train': 1.6871438026428223} +02/25/2022 12:55:58 - INFO - codeparrot_training - Step 27273: {'lr': 0.00022919467689239804, 'samples': 13964288, 'steps': 27273, 'loss/train': 1.7842377424240112} +02/25/2022 12:56:02 - INFO - codeparrot_training - Step 27274: {'lr': 0.0002291783712351152, 'samples': 13964800, 'steps': 27274, 'loss/train': 2.3293793201446533} +02/25/2022 12:56:10 - INFO - codeparrot_training - Step 27275: {'lr': 0.0002291620656670256, 'samples': 13965312, 'steps': 27275, 'loss/train': 2.4978673458099365} +02/25/2022 12:56:13 - INFO - codeparrot_training - Step 27276: {'lr': 0.00022914576018819922, 'samples': 13965824, 'steps': 27276, 'loss/train': 1.9577503204345703} +02/25/2022 12:56:19 - INFO - codeparrot_training - Step 27277: {'lr': 0.00022912945479870565, 'samples': 13966336, 'steps': 27277, 'loss/train': 1.4334455728530884} +02/25/2022 12:56:22 - INFO - codeparrot_training - Step 27278: {'lr': 0.00022911314949861488, 'samples': 13966848, 'steps': 27278, 'loss/train': 2.327921152114868} +02/25/2022 12:56:28 - INFO - codeparrot_training - Step 27279: {'lr': 0.00022909684428799675, 'samples': 13967360, 'steps': 27279, 'loss/train': 2.6758856773376465} +02/25/2022 12:56:31 - INFO - codeparrot_training - Step 27280: {'lr': 0.00022908053916692117, 'samples': 13967872, 'steps': 27280, 'loss/train': 2.4031214714050293} +02/25/2022 12:56:37 - INFO - codeparrot_training - Step 27281: {'lr': 0.00022906423413545786, 'samples': 13968384, 'steps': 27281, 'loss/train': 1.354489803314209} +02/25/2022 12:56:40 - INFO - codeparrot_training - Step 27282: {'lr': 0.00022904792919367666, 'samples': 13968896, 'steps': 27282, 'loss/train': 1.9781063795089722} +02/25/2022 12:56:44 - INFO - codeparrot_training - Step 27283: {'lr': 0.0002290316243416476, 'samples': 13969408, 'steps': 27283, 'loss/train': 1.4685373306274414} +02/25/2022 12:56:49 - INFO - codeparrot_training - Step 27284: {'lr': 0.00022901531957944033, 'samples': 13969920, 'steps': 27284, 'loss/train': 1.7582650184631348} +02/25/2022 12:56:53 - INFO - codeparrot_training - Step 27285: {'lr': 0.00022899901490712482, 'samples': 13970432, 'steps': 27285, 'loss/train': 2.623544692993164} +02/25/2022 12:57:00 - INFO - codeparrot_training - Step 27286: {'lr': 0.00022898271032477087, 'samples': 13970944, 'steps': 27286, 'loss/train': 1.838005781173706} +02/25/2022 12:57:04 - INFO - codeparrot_training - Step 27287: {'lr': 0.00022896640583244823, 'samples': 13971456, 'steps': 27287, 'loss/train': 1.701197624206543} +02/25/2022 12:57:09 - INFO - codeparrot_training - Step 27288: {'lr': 0.00022895010143022686, 'samples': 13971968, 'steps': 27288, 'loss/train': 2.727379083633423} +02/25/2022 12:57:13 - INFO - codeparrot_training - Step 27289: {'lr': 0.0002289337971181766, 'samples': 13972480, 'steps': 27289, 'loss/train': 0.43912503123283386} +02/25/2022 12:57:18 - INFO - codeparrot_training - Step 27290: {'lr': 0.00022891749289636725, 'samples': 13972992, 'steps': 27290, 'loss/train': 2.4275786876678467} +02/25/2022 12:57:22 - INFO - codeparrot_training - Step 27291: {'lr': 0.00022890118876486864, 'samples': 13973504, 'steps': 27291, 'loss/train': 1.52302086353302} +02/25/2022 12:57:27 - INFO - codeparrot_training - Step 27292: {'lr': 0.00022888488472375067, 'samples': 13974016, 'steps': 27292, 'loss/train': 1.3313792943954468} +02/25/2022 12:57:31 - INFO - codeparrot_training - Step 27293: {'lr': 0.0002288685807730832, 'samples': 13974528, 'steps': 27293, 'loss/train': 1.9958349466323853} +02/25/2022 12:57:36 - INFO - codeparrot_training - Step 27294: {'lr': 0.00022885227691293595, 'samples': 13975040, 'steps': 27294, 'loss/train': 1.7047677040100098} +02/25/2022 12:57:40 - INFO - codeparrot_training - Step 27295: {'lr': 0.0002288359731433788, 'samples': 13975552, 'steps': 27295, 'loss/train': 2.0008506774902344} +02/25/2022 12:57:47 - INFO - codeparrot_training - Step 27296: {'lr': 0.00022881966946448166, 'samples': 13976064, 'steps': 27296, 'loss/train': 1.6596100330352783} +02/25/2022 12:57:50 - INFO - codeparrot_training - Step 27297: {'lr': 0.0002288033658763143, 'samples': 13976576, 'steps': 27297, 'loss/train': 1.7757279872894287} +02/25/2022 12:57:56 - INFO - codeparrot_training - Step 27298: {'lr': 0.0002287870623789467, 'samples': 13977088, 'steps': 27298, 'loss/train': 1.891709566116333} +02/25/2022 12:57:59 - INFO - codeparrot_training - Step 27299: {'lr': 0.00022877075897244847, 'samples': 13977600, 'steps': 27299, 'loss/train': 1.0397292375564575} +02/25/2022 12:58:05 - INFO - codeparrot_training - Step 27300: {'lr': 0.0002287544556568896, 'samples': 13978112, 'steps': 27300, 'loss/train': 3.0717060565948486} +02/25/2022 12:58:09 - INFO - codeparrot_training - Step 27301: {'lr': 0.00022873815243233987, 'samples': 13978624, 'steps': 27301, 'loss/train': 1.8659656047821045} +02/25/2022 12:58:14 - INFO - codeparrot_training - Step 27302: {'lr': 0.00022872184929886928, 'samples': 13979136, 'steps': 27302, 'loss/train': 2.4029877185821533} +02/25/2022 12:58:18 - INFO - codeparrot_training - Step 27303: {'lr': 0.00022870554625654737, 'samples': 13979648, 'steps': 27303, 'loss/train': 1.7859495878219604} +02/25/2022 12:58:23 - INFO - codeparrot_training - Step 27304: {'lr': 0.0002286892433054442, 'samples': 13980160, 'steps': 27304, 'loss/train': 2.1265759468078613} +02/25/2022 12:58:27 - INFO - codeparrot_training - Step 27305: {'lr': 0.0002286729404456295, 'samples': 13980672, 'steps': 27305, 'loss/train': 2.41939640045166} +02/25/2022 12:58:34 - INFO - codeparrot_training - Step 27306: {'lr': 0.0002286566376771733, 'samples': 13981184, 'steps': 27306, 'loss/train': 1.5045340061187744} +02/25/2022 12:58:37 - INFO - codeparrot_training - Step 27307: {'lr': 0.00022864033500014514, 'samples': 13981696, 'steps': 27307, 'loss/train': 2.2248873710632324} +02/25/2022 12:58:43 - INFO - codeparrot_training - Step 27308: {'lr': 0.00022862403241461502, 'samples': 13982208, 'steps': 27308, 'loss/train': 1.2735166549682617} +02/25/2022 12:58:46 - INFO - codeparrot_training - Step 27309: {'lr': 0.00022860772992065273, 'samples': 13982720, 'steps': 27309, 'loss/train': 1.8813750743865967} +02/25/2022 12:58:52 - INFO - codeparrot_training - Step 27310: {'lr': 0.0002285914275183282, 'samples': 13983232, 'steps': 27310, 'loss/train': 1.3298530578613281} +02/25/2022 12:58:55 - INFO - codeparrot_training - Step 27311: {'lr': 0.00022857512520771124, 'samples': 13983744, 'steps': 27311, 'loss/train': 1.666682481765747} +02/25/2022 12:59:01 - INFO - codeparrot_training - Step 27312: {'lr': 0.00022855882298887154, 'samples': 13984256, 'steps': 27312, 'loss/train': 1.1620745658874512} +02/25/2022 12:59:04 - INFO - codeparrot_training - Step 27313: {'lr': 0.00022854252086187905, 'samples': 13984768, 'steps': 27313, 'loss/train': 1.8478984832763672} +02/25/2022 12:59:10 - INFO - codeparrot_training - Step 27314: {'lr': 0.00022852621882680357, 'samples': 13985280, 'steps': 27314, 'loss/train': 0.6565882563591003} +02/25/2022 12:59:13 - INFO - codeparrot_training - Step 27315: {'lr': 0.0002285099168837151, 'samples': 13985792, 'steps': 27315, 'loss/train': 2.1913156509399414} +02/25/2022 12:59:20 - INFO - codeparrot_training - Step 27316: {'lr': 0.00022849361503268317, 'samples': 13986304, 'steps': 27316, 'loss/train': 2.696385145187378} +02/25/2022 12:59:24 - INFO - codeparrot_training - Step 27317: {'lr': 0.0002284773132737778, 'samples': 13986816, 'steps': 27317, 'loss/train': 1.8935333490371704} +02/25/2022 12:59:30 - INFO - codeparrot_training - Step 27318: {'lr': 0.00022846101160706875, 'samples': 13987328, 'steps': 27318, 'loss/train': 1.5423376560211182} +02/25/2022 12:59:33 - INFO - codeparrot_training - Step 27319: {'lr': 0.00022844471003262597, 'samples': 13987840, 'steps': 27319, 'loss/train': 0.28522610664367676} +02/25/2022 12:59:39 - INFO - codeparrot_training - Step 27320: {'lr': 0.00022842840855051918, 'samples': 13988352, 'steps': 27320, 'loss/train': 1.079040765762329} +02/25/2022 12:59:42 - INFO - codeparrot_training - Step 27321: {'lr': 0.0002284121071608182, 'samples': 13988864, 'steps': 27321, 'loss/train': 0.2602623701095581} +02/25/2022 12:59:48 - INFO - codeparrot_training - Step 27322: {'lr': 0.0002283958058635929, 'samples': 13989376, 'steps': 27322, 'loss/train': 1.2343724966049194} +02/25/2022 12:59:51 - INFO - codeparrot_training - Step 27323: {'lr': 0.00022837950465891317, 'samples': 13989888, 'steps': 27323, 'loss/train': 1.2429561614990234} +02/25/2022 12:59:57 - INFO - codeparrot_training - Step 27324: {'lr': 0.00022836320354684876, 'samples': 13990400, 'steps': 27324, 'loss/train': 0.31624525785446167} +02/25/2022 13:00:00 - INFO - codeparrot_training - Step 27325: {'lr': 0.00022834690252746947, 'samples': 13990912, 'steps': 27325, 'loss/train': 2.132636547088623} +02/25/2022 13:00:06 - INFO - codeparrot_training - Step 27326: {'lr': 0.00022833060160084524, 'samples': 13991424, 'steps': 27326, 'loss/train': 0.9071618318557739} +02/25/2022 13:00:09 - INFO - codeparrot_training - Step 27327: {'lr': 0.00022831430076704573, 'samples': 13991936, 'steps': 27327, 'loss/train': 2.5429437160491943} +02/25/2022 13:00:15 - INFO - codeparrot_training - Step 27328: {'lr': 0.00022829800002614094, 'samples': 13992448, 'steps': 27328, 'loss/train': 1.949150800704956} +02/25/2022 13:00:18 - INFO - codeparrot_training - Step 27329: {'lr': 0.0002282816993782006, 'samples': 13992960, 'steps': 27329, 'loss/train': 1.4601025581359863} +02/25/2022 13:00:24 - INFO - codeparrot_training - Step 27330: {'lr': 0.0002282653988232946, 'samples': 13993472, 'steps': 27330, 'loss/train': 0.41124340891838074} +02/25/2022 13:00:27 - INFO - codeparrot_training - Step 27331: {'lr': 0.0002282490983614927, 'samples': 13993984, 'steps': 27331, 'loss/train': 1.2325631380081177} +02/25/2022 13:00:37 - INFO - codeparrot_training - Step 27332: {'lr': 0.00022823279799286472, 'samples': 13994496, 'steps': 27332, 'loss/train': 1.7864919900894165} +02/25/2022 13:00:40 - INFO - codeparrot_training - Step 27333: {'lr': 0.00022821649771748067, 'samples': 13995008, 'steps': 27333, 'loss/train': 2.319870710372925} +02/25/2022 13:00:44 - INFO - codeparrot_training - Step 27334: {'lr': 0.0002282001975354101, 'samples': 13995520, 'steps': 27334, 'loss/train': 1.877912998199463} +02/25/2022 13:00:49 - INFO - codeparrot_training - Step 27335: {'lr': 0.00022818389744672297, 'samples': 13996032, 'steps': 27335, 'loss/train': 2.3093326091766357} +02/25/2022 13:00:53 - INFO - codeparrot_training - Step 27336: {'lr': 0.00022816759745148906, 'samples': 13996544, 'steps': 27336, 'loss/train': 1.4386746883392334} +02/25/2022 13:00:58 - INFO - codeparrot_training - Step 27337: {'lr': 0.00022815129754977836, 'samples': 13997056, 'steps': 27337, 'loss/train': 2.5465614795684814} +02/25/2022 13:01:02 - INFO - codeparrot_training - Step 27338: {'lr': 0.00022813499774166046, 'samples': 13997568, 'steps': 27338, 'loss/train': 2.212357997894287} +02/25/2022 13:01:07 - INFO - codeparrot_training - Step 27339: {'lr': 0.00022811869802720532, 'samples': 13998080, 'steps': 27339, 'loss/train': 1.942797064781189} +02/25/2022 13:01:11 - INFO - codeparrot_training - Step 27340: {'lr': 0.00022810239840648268, 'samples': 13998592, 'steps': 27340, 'loss/train': 2.6351046562194824} +02/25/2022 13:01:16 - INFO - codeparrot_training - Step 27341: {'lr': 0.00022808609887956254, 'samples': 13999104, 'steps': 27341, 'loss/train': 1.2209769487380981} +02/25/2022 13:01:20 - INFO - codeparrot_training - Step 27342: {'lr': 0.00022806979944651446, 'samples': 13999616, 'steps': 27342, 'loss/train': 1.6552488803863525} +02/25/2022 13:01:27 - INFO - codeparrot_training - Step 27343: {'lr': 0.0002280535001074084, 'samples': 14000128, 'steps': 27343, 'loss/train': 1.8452142477035522} +02/25/2022 13:01:31 - INFO - codeparrot_training - Step 27344: {'lr': 0.00022803720086231422, 'samples': 14000640, 'steps': 27344, 'loss/train': 1.879871129989624} +02/25/2022 13:01:36 - INFO - codeparrot_training - Step 27345: {'lr': 0.00022802090171130166, 'samples': 14001152, 'steps': 27345, 'loss/train': 1.9572960138320923} +02/25/2022 13:01:40 - INFO - codeparrot_training - Step 27346: {'lr': 0.0002280046026544407, 'samples': 14001664, 'steps': 27346, 'loss/train': 1.7477837800979614} +02/25/2022 13:01:45 - INFO - codeparrot_training - Step 27347: {'lr': 0.00022798830369180094, 'samples': 14002176, 'steps': 27347, 'loss/train': 2.547405242919922} +02/25/2022 13:01:49 - INFO - codeparrot_training - Step 27348: {'lr': 0.00022797200482345227, 'samples': 14002688, 'steps': 27348, 'loss/train': 1.622405767440796} +02/25/2022 13:01:54 - INFO - codeparrot_training - Step 27349: {'lr': 0.00022795570604946454, 'samples': 14003200, 'steps': 27349, 'loss/train': 2.198040723800659} +02/25/2022 13:01:58 - INFO - codeparrot_training - Step 27350: {'lr': 0.00022793940736990766, 'samples': 14003712, 'steps': 27350, 'loss/train': 1.9251022338867188} +02/25/2022 13:02:03 - INFO - codeparrot_training - Step 27351: {'lr': 0.0002279231087848513, 'samples': 14004224, 'steps': 27351, 'loss/train': 1.9350048303604126} +02/25/2022 13:02:07 - INFO - codeparrot_training - Step 27352: {'lr': 0.0002279068102943653, 'samples': 14004736, 'steps': 27352, 'loss/train': 2.227863073348999} +02/25/2022 13:02:14 - INFO - codeparrot_training - Step 27353: {'lr': 0.0002278905118985195, 'samples': 14005248, 'steps': 27353, 'loss/train': 1.5180689096450806} +02/25/2022 13:02:18 - INFO - codeparrot_training - Step 27354: {'lr': 0.00022787421359738387, 'samples': 14005760, 'steps': 27354, 'loss/train': 1.5994786024093628} +02/25/2022 13:02:23 - INFO - codeparrot_training - Step 27355: {'lr': 0.00022785791539102794, 'samples': 14006272, 'steps': 27355, 'loss/train': 3.0293421745300293} +02/25/2022 13:02:27 - INFO - codeparrot_training - Step 27356: {'lr': 0.00022784161727952166, 'samples': 14006784, 'steps': 27356, 'loss/train': 2.085681438446045} +02/25/2022 13:02:32 - INFO - codeparrot_training - Step 27357: {'lr': 0.00022782531926293488, 'samples': 14007296, 'steps': 27357, 'loss/train': 2.179945707321167} +02/25/2022 13:02:36 - INFO - codeparrot_training - Step 27358: {'lr': 0.00022780902134133738, 'samples': 14007808, 'steps': 27358, 'loss/train': 1.0897220373153687} +02/25/2022 13:02:41 - INFO - codeparrot_training - Step 27359: {'lr': 0.0002277927235147991, 'samples': 14008320, 'steps': 27359, 'loss/train': 1.6038397550582886} +02/25/2022 13:02:45 - INFO - codeparrot_training - Step 27360: {'lr': 0.00022777642578338965, 'samples': 14008832, 'steps': 27360, 'loss/train': 2.2412548065185547} +02/25/2022 13:02:50 - INFO - codeparrot_training - Step 27361: {'lr': 0.00022776012814717888, 'samples': 14009344, 'steps': 27361, 'loss/train': 1.0821096897125244} +02/25/2022 13:02:54 - INFO - codeparrot_training - Step 27362: {'lr': 0.0002277438306062367, 'samples': 14009856, 'steps': 27362, 'loss/train': 1.806030511856079} +02/25/2022 13:03:01 - INFO - codeparrot_training - Step 27363: {'lr': 0.00022772753316063302, 'samples': 14010368, 'steps': 27363, 'loss/train': 2.419410467147827} +02/25/2022 13:03:04 - INFO - codeparrot_training - Step 27364: {'lr': 0.00022771123581043738, 'samples': 14010880, 'steps': 27364, 'loss/train': 2.57742977142334} +02/25/2022 13:03:10 - INFO - codeparrot_training - Step 27365: {'lr': 0.0002276949385557197, 'samples': 14011392, 'steps': 27365, 'loss/train': 2.073284149169922} +02/25/2022 13:03:13 - INFO - codeparrot_training - Step 27366: {'lr': 0.0002276786413965499, 'samples': 14011904, 'steps': 27366, 'loss/train': 2.0743749141693115} +02/25/2022 13:03:19 - INFO - codeparrot_training - Step 27367: {'lr': 0.00022766234433299764, 'samples': 14012416, 'steps': 27367, 'loss/train': 1.8015978336334229} +02/25/2022 13:03:22 - INFO - codeparrot_training - Step 27368: {'lr': 0.0002276460473651329, 'samples': 14012928, 'steps': 27368, 'loss/train': 1.7960762977600098} +02/25/2022 13:03:28 - INFO - codeparrot_training - Step 27369: {'lr': 0.0002276297504930253, 'samples': 14013440, 'steps': 27369, 'loss/train': 8.626423835754395} +02/25/2022 13:03:31 - INFO - codeparrot_training - Step 27370: {'lr': 0.00022761345371674483, 'samples': 14013952, 'steps': 27370, 'loss/train': 1.6085503101348877} +02/25/2022 13:03:37 - INFO - codeparrot_training - Step 27371: {'lr': 0.00022759715703636114, 'samples': 14014464, 'steps': 27371, 'loss/train': 2.1694164276123047} +02/25/2022 13:03:40 - INFO - codeparrot_training - Step 27372: {'lr': 0.00022758086045194417, 'samples': 14014976, 'steps': 27372, 'loss/train': 2.093355178833008} +02/25/2022 13:03:46 - INFO - codeparrot_training - Step 27373: {'lr': 0.00022756456396356368, 'samples': 14015488, 'steps': 27373, 'loss/train': 2.26033616065979} +02/25/2022 13:03:50 - INFO - codeparrot_training - Step 27374: {'lr': 0.00022754826757128942, 'samples': 14016000, 'steps': 27374, 'loss/train': 2.317751407623291} +02/25/2022 13:03:55 - INFO - codeparrot_training - Step 27375: {'lr': 0.00022753197127519127, 'samples': 14016512, 'steps': 27375, 'loss/train': 0.4168996810913086} +02/25/2022 13:03:59 - INFO - codeparrot_training - Step 27376: {'lr': 0.00022751567507533905, 'samples': 14017024, 'steps': 27376, 'loss/train': 1.4742178916931152} +02/25/2022 13:04:04 - INFO - codeparrot_training - Step 27377: {'lr': 0.00022749937897180257, 'samples': 14017536, 'steps': 27377, 'loss/train': 2.1209206581115723} +02/25/2022 13:04:08 - INFO - codeparrot_training - Step 27378: {'lr': 0.0002274830829646515, 'samples': 14018048, 'steps': 27378, 'loss/train': 0.5565173625946045} +02/25/2022 13:04:15 - INFO - codeparrot_training - Step 27379: {'lr': 0.0002274667870539558, 'samples': 14018560, 'steps': 27379, 'loss/train': 2.070378065109253} +02/25/2022 13:04:18 - INFO - codeparrot_training - Step 27380: {'lr': 0.0002274504912397852, 'samples': 14019072, 'steps': 27380, 'loss/train': 2.1325156688690186} +02/25/2022 13:04:24 - INFO - codeparrot_training - Step 27381: {'lr': 0.00022743419552220963, 'samples': 14019584, 'steps': 27381, 'loss/train': 1.6749143600463867} +02/25/2022 13:04:27 - INFO - codeparrot_training - Step 27382: {'lr': 0.00022741789990129874, 'samples': 14020096, 'steps': 27382, 'loss/train': 1.7678847312927246} +02/25/2022 13:04:33 - INFO - codeparrot_training - Step 27383: {'lr': 0.00022740160437712236, 'samples': 14020608, 'steps': 27383, 'loss/train': 1.9126551151275635} +02/25/2022 13:04:36 - INFO - codeparrot_training - Step 27384: {'lr': 0.00022738530894975034, 'samples': 14021120, 'steps': 27384, 'loss/train': 0.7265369296073914} +02/25/2022 13:04:42 - INFO - codeparrot_training - Step 27385: {'lr': 0.00022736901361925261, 'samples': 14021632, 'steps': 27385, 'loss/train': 1.8522812128067017} +02/25/2022 13:04:45 - INFO - codeparrot_training - Step 27386: {'lr': 0.00022735271838569872, 'samples': 14022144, 'steps': 27386, 'loss/train': 1.4959352016448975} +02/25/2022 13:04:51 - INFO - codeparrot_training - Step 27387: {'lr': 0.00022733642324915856, 'samples': 14022656, 'steps': 27387, 'loss/train': 2.1490225791931152} +02/25/2022 13:04:54 - INFO - codeparrot_training - Step 27388: {'lr': 0.00022732012820970198, 'samples': 14023168, 'steps': 27388, 'loss/train': 2.401810646057129} +02/25/2022 13:05:02 - INFO - codeparrot_training - Step 27389: {'lr': 0.0002273038332673989, 'samples': 14023680, 'steps': 27389, 'loss/train': 2.265002965927124} +02/25/2022 13:05:05 - INFO - codeparrot_training - Step 27390: {'lr': 0.00022728753842231886, 'samples': 14024192, 'steps': 27390, 'loss/train': 1.6038258075714111} +02/25/2022 13:05:10 - INFO - codeparrot_training - Step 27391: {'lr': 0.0002272712436745318, 'samples': 14024704, 'steps': 27391, 'loss/train': 1.2745909690856934} +02/25/2022 13:05:14 - INFO - codeparrot_training - Step 27392: {'lr': 0.0002272549490241075, 'samples': 14025216, 'steps': 27392, 'loss/train': 1.4223980903625488} +02/25/2022 13:05:19 - INFO - codeparrot_training - Step 27393: {'lr': 0.0002272386544711158, 'samples': 14025728, 'steps': 27393, 'loss/train': 2.1912713050842285} +02/25/2022 13:05:23 - INFO - codeparrot_training - Step 27394: {'lr': 0.00022722236001562656, 'samples': 14026240, 'steps': 27394, 'loss/train': 1.4348034858703613} +02/25/2022 13:05:28 - INFO - codeparrot_training - Step 27395: {'lr': 0.00022720606565770942, 'samples': 14026752, 'steps': 27395, 'loss/train': 1.9353013038635254} +02/25/2022 13:05:32 - INFO - codeparrot_training - Step 27396: {'lr': 0.00022718977139743425, 'samples': 14027264, 'steps': 27396, 'loss/train': 1.378192663192749} +02/25/2022 13:05:37 - INFO - codeparrot_training - Step 27397: {'lr': 0.00022717347723487087, 'samples': 14027776, 'steps': 27397, 'loss/train': 1.6723642349243164} +02/25/2022 13:05:41 - INFO - codeparrot_training - Step 27398: {'lr': 0.00022715718317008912, 'samples': 14028288, 'steps': 27398, 'loss/train': 2.1466236114501953} +02/25/2022 13:05:48 - INFO - codeparrot_training - Step 27399: {'lr': 0.00022714088920315867, 'samples': 14028800, 'steps': 27399, 'loss/train': 2.059225559234619} +02/25/2022 13:05:53 - INFO - codeparrot_training - Step 27400: {'lr': 0.00022712459533414943, 'samples': 14029312, 'steps': 27400, 'loss/train': 1.8407567739486694} +02/25/2022 13:05:57 - INFO - codeparrot_training - Step 27401: {'lr': 0.00022710830156313108, 'samples': 14029824, 'steps': 27401, 'loss/train': 1.5227664709091187} +02/25/2022 13:06:02 - INFO - codeparrot_training - Step 27402: {'lr': 0.00022709200789017368, 'samples': 14030336, 'steps': 27402, 'loss/train': 1.9594672918319702} +02/25/2022 13:06:06 - INFO - codeparrot_training - Step 27403: {'lr': 0.00022707571431534668, 'samples': 14030848, 'steps': 27403, 'loss/train': 1.805530309677124} +02/25/2022 13:06:12 - INFO - codeparrot_training - Step 27404: {'lr': 0.00022705942083872004, 'samples': 14031360, 'steps': 27404, 'loss/train': 2.0772674083709717} +02/25/2022 13:06:15 - INFO - codeparrot_training - Step 27405: {'lr': 0.0002270431274603636, 'samples': 14031872, 'steps': 27405, 'loss/train': 2.0689711570739746} +02/25/2022 13:06:19 - INFO - codeparrot_training - Step 27406: {'lr': 0.00022702683418034713, 'samples': 14032384, 'steps': 27406, 'loss/train': 1.2383630275726318} +02/25/2022 13:06:24 - INFO - codeparrot_training - Step 27407: {'lr': 0.00022701054099874044, 'samples': 14032896, 'steps': 27407, 'loss/train': 1.6797540187835693} +02/25/2022 13:06:30 - INFO - codeparrot_training - Step 27408: {'lr': 0.00022699424791561325, 'samples': 14033408, 'steps': 27408, 'loss/train': 3.1989011764526367} +02/25/2022 13:06:33 - INFO - codeparrot_training - Step 27409: {'lr': 0.00022697795493103535, 'samples': 14033920, 'steps': 27409, 'loss/train': 2.340059995651245} +02/25/2022 13:06:40 - INFO - codeparrot_training - Step 27410: {'lr': 0.00022696166204507666, 'samples': 14034432, 'steps': 27410, 'loss/train': 1.4605093002319336} +02/25/2022 13:06:44 - INFO - codeparrot_training - Step 27411: {'lr': 0.00022694536925780688, 'samples': 14034944, 'steps': 27411, 'loss/train': 1.4475537538528442} +02/25/2022 13:06:47 - INFO - codeparrot_training - Step 27412: {'lr': 0.00022692907656929575, 'samples': 14035456, 'steps': 27412, 'loss/train': 0.7247291207313538} +02/25/2022 13:06:53 - INFO - codeparrot_training - Step 27413: {'lr': 0.0002269127839796132, 'samples': 14035968, 'steps': 27413, 'loss/train': 1.9053713083267212} +02/25/2022 13:06:57 - INFO - codeparrot_training - Step 27414: {'lr': 0.00022689649148882894, 'samples': 14036480, 'steps': 27414, 'loss/train': 1.8588893413543701} +02/25/2022 13:07:02 - INFO - codeparrot_training - Step 27415: {'lr': 0.00022688019909701277, 'samples': 14036992, 'steps': 27415, 'loss/train': 1.2183656692504883} +02/25/2022 13:07:06 - INFO - codeparrot_training - Step 27416: {'lr': 0.00022686390680423446, 'samples': 14037504, 'steps': 27416, 'loss/train': 2.2951855659484863} +02/25/2022 13:07:11 - INFO - codeparrot_training - Step 27417: {'lr': 0.00022684761461056385, 'samples': 14038016, 'steps': 27417, 'loss/train': 1.1336044073104858} +02/25/2022 13:07:15 - INFO - codeparrot_training - Step 27418: {'lr': 0.0002268313225160707, 'samples': 14038528, 'steps': 27418, 'loss/train': 1.936003565788269} +02/25/2022 13:07:20 - INFO - codeparrot_training - Step 27419: {'lr': 0.00022681503052082478, 'samples': 14039040, 'steps': 27419, 'loss/train': 1.238242745399475} +02/25/2022 13:07:24 - INFO - codeparrot_training - Step 27420: {'lr': 0.00022679873862489603, 'samples': 14039552, 'steps': 27420, 'loss/train': 2.4736294746398926} +02/25/2022 13:07:31 - INFO - codeparrot_training - Step 27421: {'lr': 0.000226782446828354, 'samples': 14040064, 'steps': 27421, 'loss/train': 1.6786586046218872} +02/25/2022 13:07:34 - INFO - codeparrot_training - Step 27422: {'lr': 0.00022676615513126858, 'samples': 14040576, 'steps': 27422, 'loss/train': 1.5163426399230957} +02/25/2022 13:07:40 - INFO - codeparrot_training - Step 27423: {'lr': 0.00022674986353370957, 'samples': 14041088, 'steps': 27423, 'loss/train': 1.3789907693862915} +02/25/2022 13:07:43 - INFO - codeparrot_training - Step 27424: {'lr': 0.0002267335720357469, 'samples': 14041600, 'steps': 27424, 'loss/train': 2.1570146083831787} +02/25/2022 13:07:49 - INFO - codeparrot_training - Step 27425: {'lr': 0.0002267172806374501, 'samples': 14042112, 'steps': 27425, 'loss/train': 2.1576268672943115} +02/25/2022 13:07:54 - INFO - codeparrot_training - Step 27426: {'lr': 0.0002267009893388891, 'samples': 14042624, 'steps': 27426, 'loss/train': 1.5178781747817993} +02/25/2022 13:07:57 - INFO - codeparrot_training - Step 27427: {'lr': 0.00022668469814013362, 'samples': 14043136, 'steps': 27427, 'loss/train': 2.3554844856262207} +02/25/2022 13:08:03 - INFO - codeparrot_training - Step 27428: {'lr': 0.00022666840704125353, 'samples': 14043648, 'steps': 27428, 'loss/train': 1.4940383434295654} +02/25/2022 13:08:06 - INFO - codeparrot_training - Step 27429: {'lr': 0.00022665211604231864, 'samples': 14044160, 'steps': 27429, 'loss/train': 2.6759610176086426} +02/25/2022 13:08:13 - INFO - codeparrot_training - Step 27430: {'lr': 0.00022663582514339858, 'samples': 14044672, 'steps': 27430, 'loss/train': 1.6067527532577515} +02/25/2022 13:08:17 - INFO - codeparrot_training - Step 27431: {'lr': 0.00022661953434456323, 'samples': 14045184, 'steps': 27431, 'loss/train': 2.3536789417266846} +02/25/2022 13:08:22 - INFO - codeparrot_training - Step 27432: {'lr': 0.00022660324364588236, 'samples': 14045696, 'steps': 27432, 'loss/train': 2.7798709869384766} +02/25/2022 13:08:26 - INFO - codeparrot_training - Step 27433: {'lr': 0.00022658695304742592, 'samples': 14046208, 'steps': 27433, 'loss/train': 2.226771354675293} +02/25/2022 13:08:31 - INFO - codeparrot_training - Step 27434: {'lr': 0.00022657066254926336, 'samples': 14046720, 'steps': 27434, 'loss/train': 2.5460848808288574} +02/25/2022 13:08:35 - INFO - codeparrot_training - Step 27435: {'lr': 0.0002265543721514647, 'samples': 14047232, 'steps': 27435, 'loss/train': 2.075054883956909} +02/25/2022 13:08:40 - INFO - codeparrot_training - Step 27436: {'lr': 0.00022653808185409962, 'samples': 14047744, 'steps': 27436, 'loss/train': 2.302901268005371} +02/25/2022 13:08:44 - INFO - codeparrot_training - Step 27437: {'lr': 0.0002265217916572381, 'samples': 14048256, 'steps': 27437, 'loss/train': 0.9972638487815857} +02/25/2022 13:08:49 - INFO - codeparrot_training - Step 27438: {'lr': 0.00022650550156094962, 'samples': 14048768, 'steps': 27438, 'loss/train': 1.3234316110610962} +02/25/2022 13:08:53 - INFO - codeparrot_training - Step 27439: {'lr': 0.00022648921156530414, 'samples': 14049280, 'steps': 27439, 'loss/train': 0.09567834436893463} +02/25/2022 13:09:00 - INFO - codeparrot_training - Step 27440: {'lr': 0.00022647292167037142, 'samples': 14049792, 'steps': 27440, 'loss/train': 1.3499737977981567} +02/25/2022 13:09:03 - INFO - codeparrot_training - Step 27441: {'lr': 0.0002264566318762212, 'samples': 14050304, 'steps': 27441, 'loss/train': 2.0918891429901123} +02/25/2022 13:09:09 - INFO - codeparrot_training - Step 27442: {'lr': 0.00022644034218292342, 'samples': 14050816, 'steps': 27442, 'loss/train': 1.148611307144165} +02/25/2022 13:09:12 - INFO - codeparrot_training - Step 27443: {'lr': 0.00022642405259054764, 'samples': 14051328, 'steps': 27443, 'loss/train': 1.0863064527511597} +02/25/2022 13:09:18 - INFO - codeparrot_training - Step 27444: {'lr': 0.0002264077630991637, 'samples': 14051840, 'steps': 27444, 'loss/train': 1.713740587234497} +02/25/2022 13:09:21 - INFO - codeparrot_training - Step 27445: {'lr': 0.0002263914737088414, 'samples': 14052352, 'steps': 27445, 'loss/train': 2.2682785987854004} +02/25/2022 13:09:27 - INFO - codeparrot_training - Step 27446: {'lr': 0.00022637518441965068, 'samples': 14052864, 'steps': 27446, 'loss/train': 2.4317922592163086} +02/25/2022 13:09:30 - INFO - codeparrot_training - Step 27447: {'lr': 0.00022635889523166106, 'samples': 14053376, 'steps': 27447, 'loss/train': 1.267711877822876} +02/25/2022 13:09:36 - INFO - codeparrot_training - Step 27448: {'lr': 0.0002263426061449424, 'samples': 14053888, 'steps': 27448, 'loss/train': 3.0414998531341553} +02/25/2022 13:09:39 - INFO - codeparrot_training - Step 27449: {'lr': 0.0002263263171595645, 'samples': 14054400, 'steps': 27449, 'loss/train': 1.6580809354782104} +02/25/2022 13:09:45 - INFO - codeparrot_training - Step 27450: {'lr': 0.00022631002827559727, 'samples': 14054912, 'steps': 27450, 'loss/train': 2.4516777992248535} +02/25/2022 13:09:48 - INFO - codeparrot_training - Step 27451: {'lr': 0.00022629373949311024, 'samples': 14055424, 'steps': 27451, 'loss/train': 2.7914505004882812} +02/25/2022 13:09:54 - INFO - codeparrot_training - Step 27452: {'lr': 0.0002262774508121733, 'samples': 14055936, 'steps': 27452, 'loss/train': 3.0902316570281982} +02/25/2022 13:09:57 - INFO - codeparrot_training - Step 27453: {'lr': 0.00022626116223285628, 'samples': 14056448, 'steps': 27453, 'loss/train': 0.8027417063713074} +02/25/2022 13:10:03 - INFO - codeparrot_training - Step 27454: {'lr': 0.00022624487375522888, 'samples': 14056960, 'steps': 27454, 'loss/train': 0.4505730867385864} +02/25/2022 13:10:06 - INFO - codeparrot_training - Step 27455: {'lr': 0.00022622858537936093, 'samples': 14057472, 'steps': 27455, 'loss/train': 2.1103124618530273} +02/25/2022 13:10:12 - INFO - codeparrot_training - Step 27456: {'lr': 0.00022621229710532208, 'samples': 14057984, 'steps': 27456, 'loss/train': 1.5171276330947876} +02/25/2022 13:10:16 - INFO - codeparrot_training - Step 27457: {'lr': 0.00022619600893318228, 'samples': 14058496, 'steps': 27457, 'loss/train': 2.027768850326538} +02/25/2022 13:10:21 - INFO - codeparrot_training - Step 27458: {'lr': 0.00022617972086301117, 'samples': 14059008, 'steps': 27458, 'loss/train': 2.2001190185546875} +02/25/2022 13:10:25 - INFO - codeparrot_training - Step 27459: {'lr': 0.00022616343289487862, 'samples': 14059520, 'steps': 27459, 'loss/train': 1.8636726140975952} +02/25/2022 13:10:30 - INFO - codeparrot_training - Step 27460: {'lr': 0.00022614714502885435, 'samples': 14060032, 'steps': 27460, 'loss/train': 1.8095732927322388} +02/25/2022 13:10:34 - INFO - codeparrot_training - Step 27461: {'lr': 0.0002261308572650081, 'samples': 14060544, 'steps': 27461, 'loss/train': 2.16621994972229} +02/25/2022 13:10:39 - INFO - codeparrot_training - Step 27462: {'lr': 0.00022611456960340968, 'samples': 14061056, 'steps': 27462, 'loss/train': 1.7925037145614624} +02/25/2022 13:10:43 - INFO - codeparrot_training - Step 27463: {'lr': 0.0002260982820441289, 'samples': 14061568, 'steps': 27463, 'loss/train': 1.3859803676605225} +02/25/2022 13:10:48 - INFO - codeparrot_training - Step 27464: {'lr': 0.0002260819945872355, 'samples': 14062080, 'steps': 27464, 'loss/train': 1.592763900756836} +02/25/2022 13:10:52 - INFO - codeparrot_training - Step 27465: {'lr': 0.0002260657072327992, 'samples': 14062592, 'steps': 27465, 'loss/train': 1.6806966066360474} +02/25/2022 13:10:58 - INFO - codeparrot_training - Step 27466: {'lr': 0.00022604941998088978, 'samples': 14063104, 'steps': 27466, 'loss/train': 2.4284791946411133} +02/25/2022 13:11:01 - INFO - codeparrot_training - Step 27467: {'lr': 0.00022603313283157703, 'samples': 14063616, 'steps': 27467, 'loss/train': 0.4122775197029114} +02/25/2022 13:11:07 - INFO - codeparrot_training - Step 27468: {'lr': 0.00022601684578493083, 'samples': 14064128, 'steps': 27468, 'loss/train': 2.1889266967773438} +02/25/2022 13:11:10 - INFO - codeparrot_training - Step 27469: {'lr': 0.00022600055884102079, 'samples': 14064640, 'steps': 27469, 'loss/train': 1.9168899059295654} +02/25/2022 13:11:16 - INFO - codeparrot_training - Step 27470: {'lr': 0.0002259842719999167, 'samples': 14065152, 'steps': 27470, 'loss/train': 2.676759719848633} +02/25/2022 13:11:19 - INFO - codeparrot_training - Step 27471: {'lr': 0.00022596798526168838, 'samples': 14065664, 'steps': 27471, 'loss/train': 2.6157772541046143} +02/25/2022 13:11:25 - INFO - codeparrot_training - Step 27472: {'lr': 0.0002259516986264057, 'samples': 14066176, 'steps': 27472, 'loss/train': 2.164128065109253} +02/25/2022 13:11:28 - INFO - codeparrot_training - Step 27473: {'lr': 0.00022593541209413814, 'samples': 14066688, 'steps': 27473, 'loss/train': 3.1501500606536865} +02/25/2022 13:11:34 - INFO - codeparrot_training - Step 27474: {'lr': 0.0002259191256649557, 'samples': 14067200, 'steps': 27474, 'loss/train': 0.9105191230773926} +02/25/2022 13:11:37 - INFO - codeparrot_training - Step 27475: {'lr': 0.00022590283933892805, 'samples': 14067712, 'steps': 27475, 'loss/train': 1.7122735977172852} +02/25/2022 13:11:43 - INFO - codeparrot_training - Step 27476: {'lr': 0.00022588655311612496, 'samples': 14068224, 'steps': 27476, 'loss/train': 1.426735520362854} +02/25/2022 13:11:46 - INFO - codeparrot_training - Step 27477: {'lr': 0.00022587026699661636, 'samples': 14068736, 'steps': 27477, 'loss/train': 1.4208389520645142} +02/25/2022 13:11:53 - INFO - codeparrot_training - Step 27478: {'lr': 0.00022585398098047177, 'samples': 14069248, 'steps': 27478, 'loss/train': 1.742519497871399} +02/25/2022 13:11:56 - INFO - codeparrot_training - Step 27479: {'lr': 0.00022583769506776105, 'samples': 14069760, 'steps': 27479, 'loss/train': 1.02162766456604} +02/25/2022 13:12:02 - INFO - codeparrot_training - Step 27480: {'lr': 0.00022582140925855396, 'samples': 14070272, 'steps': 27480, 'loss/train': 1.2883697748184204} +02/25/2022 13:12:05 - INFO - codeparrot_training - Step 27481: {'lr': 0.0002258051235529204, 'samples': 14070784, 'steps': 27481, 'loss/train': 1.4234154224395752} +02/25/2022 13:12:11 - INFO - codeparrot_training - Step 27482: {'lr': 0.00022578883795092988, 'samples': 14071296, 'steps': 27482, 'loss/train': 2.000788450241089} +02/25/2022 13:12:14 - INFO - codeparrot_training - Step 27483: {'lr': 0.00022577255245265232, 'samples': 14071808, 'steps': 27483, 'loss/train': 2.4077391624450684} +02/25/2022 13:12:20 - INFO - codeparrot_training - Step 27484: {'lr': 0.00022575626705815743, 'samples': 14072320, 'steps': 27484, 'loss/train': 2.5601675510406494} +02/25/2022 13:12:23 - INFO - codeparrot_training - Step 27485: {'lr': 0.0002257399817675151, 'samples': 14072832, 'steps': 27485, 'loss/train': 2.8967151641845703} +02/25/2022 13:12:29 - INFO - codeparrot_training - Step 27486: {'lr': 0.00022572369658079488, 'samples': 14073344, 'steps': 27486, 'loss/train': 1.1514675617218018} +02/25/2022 13:12:32 - INFO - codeparrot_training - Step 27487: {'lr': 0.00022570741149806665, 'samples': 14073856, 'steps': 27487, 'loss/train': 0.5491908192634583} +02/25/2022 13:12:38 - INFO - codeparrot_training - Step 27488: {'lr': 0.00022569112651940016, 'samples': 14074368, 'steps': 27488, 'loss/train': 2.202266216278076} +02/25/2022 13:12:42 - INFO - codeparrot_training - Step 27489: {'lr': 0.00022567484164486514, 'samples': 14074880, 'steps': 27489, 'loss/train': 1.4990229606628418} +02/25/2022 13:12:47 - INFO - codeparrot_training - Step 27490: {'lr': 0.0002256585568745315, 'samples': 14075392, 'steps': 27490, 'loss/train': 1.4296541213989258} +02/25/2022 13:12:51 - INFO - codeparrot_training - Step 27491: {'lr': 0.00022564227220846876, 'samples': 14075904, 'steps': 27491, 'loss/train': 1.1576011180877686} +02/25/2022 13:12:56 - INFO - codeparrot_training - Step 27492: {'lr': 0.00022562598764674677, 'samples': 14076416, 'steps': 27492, 'loss/train': 1.212106466293335} +02/25/2022 13:13:00 - INFO - codeparrot_training - Step 27493: {'lr': 0.00022560970318943538, 'samples': 14076928, 'steps': 27493, 'loss/train': 1.497833013534546} +02/25/2022 13:13:05 - INFO - codeparrot_training - Step 27494: {'lr': 0.00022559341883660427, 'samples': 14077440, 'steps': 27494, 'loss/train': 0.039732251316308975} +02/25/2022 13:13:09 - INFO - codeparrot_training - Step 27495: {'lr': 0.00022557713458832318, 'samples': 14077952, 'steps': 27495, 'loss/train': 1.7172825336456299} +02/25/2022 13:13:14 - INFO - codeparrot_training - Step 27496: {'lr': 0.00022556085044466185, 'samples': 14078464, 'steps': 27496, 'loss/train': 2.050816297531128} +02/25/2022 13:13:18 - INFO - codeparrot_training - Step 27497: {'lr': 0.00022554456640569017, 'samples': 14078976, 'steps': 27497, 'loss/train': 1.374283790588379} +02/25/2022 13:13:24 - INFO - codeparrot_training - Step 27498: {'lr': 0.00022552828247147778, 'samples': 14079488, 'steps': 27498, 'loss/train': 0.40927544236183167} +02/25/2022 13:13:27 - INFO - codeparrot_training - Step 27499: {'lr': 0.0002255119986420944, 'samples': 14080000, 'steps': 27499, 'loss/train': 1.6877408027648926} +02/25/2022 13:13:33 - INFO - codeparrot_training - Step 27500: {'lr': 0.00022549571491760985, 'samples': 14080512, 'steps': 27500, 'loss/train': 2.191965341567993} +02/25/2022 13:13:36 - INFO - codeparrot_training - Step 27501: {'lr': 0.00022547943129809392, 'samples': 14081024, 'steps': 27501, 'loss/train': 1.6670523881912231} +02/25/2022 13:13:42 - INFO - codeparrot_training - Step 27502: {'lr': 0.00022546314778361626, 'samples': 14081536, 'steps': 27502, 'loss/train': 2.7569758892059326} +02/25/2022 13:13:45 - INFO - codeparrot_training - Step 27503: {'lr': 0.00022544686437424676, 'samples': 14082048, 'steps': 27503, 'loss/train': 2.689985752105713} +02/25/2022 13:13:51 - INFO - codeparrot_training - Step 27504: {'lr': 0.0002254305810700551, 'samples': 14082560, 'steps': 27504, 'loss/train': 2.3572473526000977} +02/25/2022 13:13:54 - INFO - codeparrot_training - Step 27505: {'lr': 0.00022541429787111095, 'samples': 14083072, 'steps': 27505, 'loss/train': 1.786815881729126} +02/25/2022 13:14:00 - INFO - codeparrot_training - Step 27506: {'lr': 0.00022539801477748414, 'samples': 14083584, 'steps': 27506, 'loss/train': 2.169814348220825} +02/25/2022 13:14:03 - INFO - codeparrot_training - Step 27507: {'lr': 0.00022538173178924452, 'samples': 14084096, 'steps': 27507, 'loss/train': 2.227525234222412} +02/25/2022 13:14:09 - INFO - codeparrot_training - Step 27508: {'lr': 0.00022536544890646172, 'samples': 14084608, 'steps': 27508, 'loss/train': 1.5764561891555786} +02/25/2022 13:14:12 - INFO - codeparrot_training - Step 27509: {'lr': 0.00022534916612920543, 'samples': 14085120, 'steps': 27509, 'loss/train': 2.4514665603637695} +02/25/2022 13:14:18 - INFO - codeparrot_training - Step 27510: {'lr': 0.00022533288345754553, 'samples': 14085632, 'steps': 27510, 'loss/train': 0.28191226720809937} +02/25/2022 13:14:22 - INFO - codeparrot_training - Step 27511: {'lr': 0.00022531660089155178, 'samples': 14086144, 'steps': 27511, 'loss/train': 1.7199852466583252} +02/25/2022 13:14:27 - INFO - codeparrot_training - Step 27512: {'lr': 0.0002253003184312938, 'samples': 14086656, 'steps': 27512, 'loss/train': 2.033536195755005} +02/25/2022 13:14:31 - INFO - codeparrot_training - Step 27513: {'lr': 0.00022528403607684143, 'samples': 14087168, 'steps': 27513, 'loss/train': 2.2799434661865234} +02/25/2022 13:14:37 - INFO - codeparrot_training - Step 27514: {'lr': 0.00022526775382826437, 'samples': 14087680, 'steps': 27514, 'loss/train': 1.563528299331665} +02/25/2022 13:14:41 - INFO - codeparrot_training - Step 27515: {'lr': 0.0002252514716856324, 'samples': 14088192, 'steps': 27515, 'loss/train': 2.22799015045166} +02/25/2022 13:14:46 - INFO - codeparrot_training - Step 27516: {'lr': 0.0002252351896490154, 'samples': 14088704, 'steps': 27516, 'loss/train': 1.7148692607879639} +02/25/2022 13:14:50 - INFO - codeparrot_training - Step 27517: {'lr': 0.00022521890771848286, 'samples': 14089216, 'steps': 27517, 'loss/train': 1.7799091339111328} +02/25/2022 13:14:55 - INFO - codeparrot_training - Step 27518: {'lr': 0.00022520262589410464, 'samples': 14089728, 'steps': 27518, 'loss/train': 1.9172215461730957} +02/25/2022 13:14:59 - INFO - codeparrot_training - Step 27519: {'lr': 0.0002251863441759505, 'samples': 14090240, 'steps': 27519, 'loss/train': 1.4067130088806152} +02/25/2022 13:15:04 - INFO - codeparrot_training - Step 27520: {'lr': 0.00022517006256409032, 'samples': 14090752, 'steps': 27520, 'loss/train': 2.225858449935913} +02/25/2022 13:15:08 - INFO - codeparrot_training - Step 27521: {'lr': 0.00022515378105859358, 'samples': 14091264, 'steps': 27521, 'loss/train': 1.9427075386047363} +02/25/2022 13:15:14 - INFO - codeparrot_training - Step 27522: {'lr': 0.00022513749965953015, 'samples': 14091776, 'steps': 27522, 'loss/train': 1.470167636871338} +02/25/2022 13:15:17 - INFO - codeparrot_training - Step 27523: {'lr': 0.00022512121836696977, 'samples': 14092288, 'steps': 27523, 'loss/train': 2.064854621887207} +02/25/2022 13:15:23 - INFO - codeparrot_training - Step 27524: {'lr': 0.0002251049371809823, 'samples': 14092800, 'steps': 27524, 'loss/train': 0.22656604647636414} +02/25/2022 13:15:27 - INFO - codeparrot_training - Step 27525: {'lr': 0.0002250886561016373, 'samples': 14093312, 'steps': 27525, 'loss/train': 1.4379996061325073} +02/25/2022 13:15:32 - INFO - codeparrot_training - Step 27526: {'lr': 0.00022507237512900454, 'samples': 14093824, 'steps': 27526, 'loss/train': 1.5008889436721802} +02/25/2022 13:15:36 - INFO - codeparrot_training - Step 27527: {'lr': 0.00022505609426315382, 'samples': 14094336, 'steps': 27527, 'loss/train': 1.8783501386642456} +02/25/2022 13:15:41 - INFO - codeparrot_training - Step 27528: {'lr': 0.0002250398135041549, 'samples': 14094848, 'steps': 27528, 'loss/train': 1.4466270208358765} +02/25/2022 13:15:45 - INFO - codeparrot_training - Step 27529: {'lr': 0.00022502353285207757, 'samples': 14095360, 'steps': 27529, 'loss/train': 2.2592251300811768} +02/25/2022 13:15:50 - INFO - codeparrot_training - Step 27530: {'lr': 0.0002250072523069914, 'samples': 14095872, 'steps': 27530, 'loss/train': 1.8453835248947144} +02/25/2022 13:15:54 - INFO - codeparrot_training - Step 27531: {'lr': 0.00022499097186896622, 'samples': 14096384, 'steps': 27531, 'loss/train': 1.4378182888031006} +02/25/2022 13:16:00 - INFO - codeparrot_training - Step 27532: {'lr': 0.00022497469153807175, 'samples': 14096896, 'steps': 27532, 'loss/train': 2.338637590408325} +02/25/2022 13:16:04 - INFO - codeparrot_training - Step 27533: {'lr': 0.0002249584113143779, 'samples': 14097408, 'steps': 27533, 'loss/train': 3.065415143966675} +02/25/2022 13:16:09 - INFO - codeparrot_training - Step 27534: {'lr': 0.00022494213119795414, 'samples': 14097920, 'steps': 27534, 'loss/train': 1.4547630548477173} +02/25/2022 13:16:13 - INFO - codeparrot_training - Step 27535: {'lr': 0.00022492585118887034, 'samples': 14098432, 'steps': 27535, 'loss/train': 1.37712562084198} +02/25/2022 13:16:18 - INFO - codeparrot_training - Step 27536: {'lr': 0.00022490957128719626, 'samples': 14098944, 'steps': 27536, 'loss/train': 2.382920980453491} +02/25/2022 13:16:22 - INFO - codeparrot_training - Step 27537: {'lr': 0.00022489329149300163, 'samples': 14099456, 'steps': 27537, 'loss/train': 2.580874443054199} +02/25/2022 13:16:27 - INFO - codeparrot_training - Step 27538: {'lr': 0.00022487701180635617, 'samples': 14099968, 'steps': 27538, 'loss/train': 1.6556426286697388} +02/25/2022 13:16:31 - INFO - codeparrot_training - Step 27539: {'lr': 0.00022486073222732956, 'samples': 14100480, 'steps': 27539, 'loss/train': 1.8759794235229492} +02/25/2022 13:16:36 - INFO - codeparrot_training - Step 27540: {'lr': 0.00022484445275599158, 'samples': 14100992, 'steps': 27540, 'loss/train': 3.4298949241638184} +02/25/2022 13:16:40 - INFO - codeparrot_training - Step 27541: {'lr': 0.00022482817339241208, 'samples': 14101504, 'steps': 27541, 'loss/train': 2.441953659057617} +02/25/2022 13:16:45 - INFO - codeparrot_training - Step 27542: {'lr': 0.00022481189413666065, 'samples': 14102016, 'steps': 27542, 'loss/train': 2.5302250385284424} +02/25/2022 13:16:49 - INFO - codeparrot_training - Step 27543: {'lr': 0.00022479561498880702, 'samples': 14102528, 'steps': 27543, 'loss/train': 2.505784034729004} +02/25/2022 13:16:54 - INFO - codeparrot_training - Step 27544: {'lr': 0.000224779335948921, 'samples': 14103040, 'steps': 27544, 'loss/train': 2.3544015884399414} +02/25/2022 13:16:58 - INFO - codeparrot_training - Step 27545: {'lr': 0.00022476305701707227, 'samples': 14103552, 'steps': 27545, 'loss/train': 1.5262361764907837} +02/25/2022 13:17:03 - INFO - codeparrot_training - Step 27546: {'lr': 0.00022474677819333064, 'samples': 14104064, 'steps': 27546, 'loss/train': 2.424870729446411} +02/25/2022 13:17:07 - INFO - codeparrot_training - Step 27547: {'lr': 0.00022473049947776576, 'samples': 14104576, 'steps': 27547, 'loss/train': 1.2375322580337524} +02/25/2022 13:17:13 - INFO - codeparrot_training - Step 27548: {'lr': 0.0002247142208704474, 'samples': 14105088, 'steps': 27548, 'loss/train': 2.844965934753418} +02/25/2022 13:17:16 - INFO - codeparrot_training - Step 27549: {'lr': 0.00022469794237144528, 'samples': 14105600, 'steps': 27549, 'loss/train': 2.602905750274658} +02/25/2022 13:17:22 - INFO - codeparrot_training - Step 27550: {'lr': 0.00022468166398082913, 'samples': 14106112, 'steps': 27550, 'loss/train': 0.5203708410263062} +02/25/2022 13:17:25 - INFO - codeparrot_training - Step 27551: {'lr': 0.00022466538569866878, 'samples': 14106624, 'steps': 27551, 'loss/train': 0.8628764748573303} +02/25/2022 13:17:31 - INFO - codeparrot_training - Step 27552: {'lr': 0.00022464910752503382, 'samples': 14107136, 'steps': 27552, 'loss/train': 2.5456087589263916} +02/25/2022 13:17:34 - INFO - codeparrot_training - Step 27553: {'lr': 0.00022463282945999396, 'samples': 14107648, 'steps': 27553, 'loss/train': 2.410552501678467} +02/25/2022 13:17:40 - INFO - codeparrot_training - Step 27554: {'lr': 0.00022461655150361908, 'samples': 14108160, 'steps': 27554, 'loss/train': 0.7931026220321655} +02/25/2022 13:17:44 - INFO - codeparrot_training - Step 27555: {'lr': 0.00022460027365597888, 'samples': 14108672, 'steps': 27555, 'loss/train': 0.6397621035575867} +02/25/2022 13:17:49 - INFO - codeparrot_training - Step 27556: {'lr': 0.00022458399591714296, 'samples': 14109184, 'steps': 27556, 'loss/train': 1.2442971467971802} +02/25/2022 13:17:52 - INFO - codeparrot_training - Step 27557: {'lr': 0.00022456771828718112, 'samples': 14109696, 'steps': 27557, 'loss/train': 1.470343828201294} +02/25/2022 13:17:58 - INFO - codeparrot_training - Step 27558: {'lr': 0.0002245514407661631, 'samples': 14110208, 'steps': 27558, 'loss/train': 2.870821475982666} +02/25/2022 13:18:01 - INFO - codeparrot_training - Step 27559: {'lr': 0.00022453516335415875, 'samples': 14110720, 'steps': 27559, 'loss/train': 2.0164613723754883} +02/25/2022 13:18:08 - INFO - codeparrot_training - Step 27560: {'lr': 0.00022451888605123756, 'samples': 14111232, 'steps': 27560, 'loss/train': 1.6098765134811401} +02/25/2022 13:18:11 - INFO - codeparrot_training - Step 27561: {'lr': 0.00022450260885746934, 'samples': 14111744, 'steps': 27561, 'loss/train': 2.1574995517730713} +02/25/2022 13:18:17 - INFO - codeparrot_training - Step 27562: {'lr': 0.0002244863317729239, 'samples': 14112256, 'steps': 27562, 'loss/train': 0.12546542286872864} +02/25/2022 13:18:20 - INFO - codeparrot_training - Step 27563: {'lr': 0.00022447005479767087, 'samples': 14112768, 'steps': 27563, 'loss/train': 2.8106637001037598} +02/25/2022 13:18:26 - INFO - codeparrot_training - Step 27564: {'lr': 0.00022445377793178014, 'samples': 14113280, 'steps': 27564, 'loss/train': 1.8404120206832886} +02/25/2022 13:18:29 - INFO - codeparrot_training - Step 27565: {'lr': 0.0002244375011753212, 'samples': 14113792, 'steps': 27565, 'loss/train': 2.5010669231414795} +02/25/2022 13:18:35 - INFO - codeparrot_training - Step 27566: {'lr': 0.0002244212245283639, 'samples': 14114304, 'steps': 27566, 'loss/train': 2.2299082279205322} +02/25/2022 13:18:38 - INFO - codeparrot_training - Step 27567: {'lr': 0.00022440494799097797, 'samples': 14114816, 'steps': 27567, 'loss/train': 1.8206995725631714} +02/25/2022 13:18:45 - INFO - codeparrot_training - Step 27568: {'lr': 0.0002243886715632332, 'samples': 14115328, 'steps': 27568, 'loss/train': 2.437713146209717} +02/25/2022 13:18:48 - INFO - codeparrot_training - Step 27569: {'lr': 0.00022437239524519912, 'samples': 14115840, 'steps': 27569, 'loss/train': 2.008140802383423} +02/25/2022 13:18:54 - INFO - codeparrot_training - Step 27570: {'lr': 0.00022435611903694555, 'samples': 14116352, 'steps': 27570, 'loss/train': 1.4823436737060547} +02/25/2022 13:18:57 - INFO - codeparrot_training - Step 27571: {'lr': 0.00022433984293854226, 'samples': 14116864, 'steps': 27571, 'loss/train': 1.2989506721496582} +02/25/2022 13:19:03 - INFO - codeparrot_training - Step 27572: {'lr': 0.00022432356695005902, 'samples': 14117376, 'steps': 27572, 'loss/train': 1.6583824157714844} +02/25/2022 13:19:08 - INFO - codeparrot_training - Step 27573: {'lr': 0.00022430729107156532, 'samples': 14117888, 'steps': 27573, 'loss/train': 2.2382700443267822} +02/25/2022 13:19:12 - INFO - codeparrot_training - Step 27574: {'lr': 0.0002242910153031311, 'samples': 14118400, 'steps': 27574, 'loss/train': 1.2156875133514404} +02/25/2022 13:19:17 - INFO - codeparrot_training - Step 27575: {'lr': 0.00022427473964482597, 'samples': 14118912, 'steps': 27575, 'loss/train': 2.770169496536255} +02/25/2022 13:19:21 - INFO - codeparrot_training - Step 27576: {'lr': 0.00022425846409671968, 'samples': 14119424, 'steps': 27576, 'loss/train': 2.0766983032226562} +02/25/2022 13:19:28 - INFO - codeparrot_training - Step 27577: {'lr': 0.00022424218865888207, 'samples': 14119936, 'steps': 27577, 'loss/train': 2.6259078979492188} +02/25/2022 13:19:31 - INFO - codeparrot_training - Step 27578: {'lr': 0.00022422591333138265, 'samples': 14120448, 'steps': 27578, 'loss/train': 0.47769051790237427} +02/25/2022 13:19:35 - INFO - codeparrot_training - Step 27579: {'lr': 0.0002242096381142912, 'samples': 14120960, 'steps': 27579, 'loss/train': 2.3289449214935303} +02/25/2022 13:19:40 - INFO - codeparrot_training - Step 27580: {'lr': 0.00022419336300767752, 'samples': 14121472, 'steps': 27580, 'loss/train': 1.2013404369354248} +02/25/2022 13:19:44 - INFO - codeparrot_training - Step 27581: {'lr': 0.00022417708801161136, 'samples': 14121984, 'steps': 27581, 'loss/train': 1.5715900659561157} +02/25/2022 13:19:49 - INFO - codeparrot_training - Step 27582: {'lr': 0.00022416081312616224, 'samples': 14122496, 'steps': 27582, 'loss/train': 2.195190668106079} +02/25/2022 13:19:53 - INFO - codeparrot_training - Step 27583: {'lr': 0.0002241445383514, 'samples': 14123008, 'steps': 27583, 'loss/train': 0.9663413763046265} +02/25/2022 13:19:58 - INFO - codeparrot_training - Step 27584: {'lr': 0.00022412826368739438, 'samples': 14123520, 'steps': 27584, 'loss/train': 1.4118077754974365} +02/25/2022 13:20:02 - INFO - codeparrot_training - Step 27585: {'lr': 0.00022411198913421506, 'samples': 14124032, 'steps': 27585, 'loss/train': 1.2280981540679932} +02/25/2022 13:20:07 - INFO - codeparrot_training - Step 27586: {'lr': 0.00022409571469193178, 'samples': 14124544, 'steps': 27586, 'loss/train': 1.1711260080337524} +02/25/2022 13:20:11 - INFO - codeparrot_training - Step 27587: {'lr': 0.00022407944036061418, 'samples': 14125056, 'steps': 27587, 'loss/train': 1.842634677886963} +02/25/2022 13:20:16 - INFO - codeparrot_training - Step 27588: {'lr': 0.00022406316614033205, 'samples': 14125568, 'steps': 27588, 'loss/train': 2.152214288711548} +02/25/2022 13:20:20 - INFO - codeparrot_training - Step 27589: {'lr': 0.0002240468920311551, 'samples': 14126080, 'steps': 27589, 'loss/train': 1.5584194660186768} +02/25/2022 13:20:25 - INFO - codeparrot_training - Step 27590: {'lr': 0.000224030618033153, 'samples': 14126592, 'steps': 27590, 'loss/train': 2.6940431594848633} +02/25/2022 13:20:29 - INFO - codeparrot_training - Step 27591: {'lr': 0.00022401434414639552, 'samples': 14127104, 'steps': 27591, 'loss/train': 2.9364380836486816} +02/25/2022 13:20:34 - INFO - codeparrot_training - Step 27592: {'lr': 0.0002239980703709523, 'samples': 14127616, 'steps': 27592, 'loss/train': 1.4578418731689453} +02/25/2022 13:20:38 - INFO - codeparrot_training - Step 27593: {'lr': 0.00022398179670689305, 'samples': 14128128, 'steps': 27593, 'loss/train': 1.9327623844146729} +02/25/2022 13:20:44 - INFO - codeparrot_training - Step 27594: {'lr': 0.00022396552315428762, 'samples': 14128640, 'steps': 27594, 'loss/train': 0.12504425644874573} +02/25/2022 13:20:47 - INFO - codeparrot_training - Step 27595: {'lr': 0.0002239492497132056, 'samples': 14129152, 'steps': 27595, 'loss/train': 2.0389366149902344} +02/25/2022 13:20:53 - INFO - codeparrot_training - Step 27596: {'lr': 0.00022393297638371667, 'samples': 14129664, 'steps': 27596, 'loss/train': 1.5920017957687378} +02/25/2022 13:20:56 - INFO - codeparrot_training - Step 27597: {'lr': 0.0002239167031658906, 'samples': 14130176, 'steps': 27597, 'loss/train': 2.036208391189575} +02/25/2022 13:21:02 - INFO - codeparrot_training - Step 27598: {'lr': 0.00022390043005979707, 'samples': 14130688, 'steps': 27598, 'loss/train': 1.9205272197723389} +02/25/2022 13:21:07 - INFO - codeparrot_training - Step 27599: {'lr': 0.00022388415706550593, 'samples': 14131200, 'steps': 27599, 'loss/train': 2.547344446182251} +02/25/2022 13:21:11 - INFO - codeparrot_training - Step 27600: {'lr': 0.00022386788418308668, 'samples': 14131712, 'steps': 27600, 'loss/train': 1.4409884214401245} +02/25/2022 13:21:17 - INFO - codeparrot_training - Step 27601: {'lr': 0.0002238516114126091, 'samples': 14132224, 'steps': 27601, 'loss/train': 2.344510793685913} +02/25/2022 13:21:21 - INFO - codeparrot_training - Step 27602: {'lr': 0.0002238353387541429, 'samples': 14132736, 'steps': 27602, 'loss/train': 1.9597134590148926} +02/25/2022 13:21:24 - INFO - codeparrot_training - Step 27603: {'lr': 0.00022381906620775794, 'samples': 14133248, 'steps': 27603, 'loss/train': 2.3054986000061035} +02/25/2022 13:21:30 - INFO - codeparrot_training - Step 27604: {'lr': 0.00022380279377352363, 'samples': 14133760, 'steps': 27604, 'loss/train': 1.4429665803909302} +02/25/2022 13:21:34 - INFO - codeparrot_training - Step 27605: {'lr': 0.0002237865214515099, 'samples': 14134272, 'steps': 27605, 'loss/train': 2.250612258911133} +02/25/2022 13:21:39 - INFO - codeparrot_training - Step 27606: {'lr': 0.00022377024924178632, 'samples': 14134784, 'steps': 27606, 'loss/train': 1.073330044746399} +02/25/2022 13:21:43 - INFO - codeparrot_training - Step 27607: {'lr': 0.00022375397714442281, 'samples': 14135296, 'steps': 27607, 'loss/train': 1.6177748441696167} +02/25/2022 13:21:48 - INFO - codeparrot_training - Step 27608: {'lr': 0.00022373770515948883, 'samples': 14135808, 'steps': 27608, 'loss/train': 2.7134149074554443} +02/25/2022 13:21:52 - INFO - codeparrot_training - Step 27609: {'lr': 0.00022372143328705413, 'samples': 14136320, 'steps': 27609, 'loss/train': 1.0845777988433838} +02/25/2022 13:21:57 - INFO - codeparrot_training - Step 27610: {'lr': 0.0002237051615271885, 'samples': 14136832, 'steps': 27610, 'loss/train': 1.7530558109283447} +02/25/2022 13:22:01 - INFO - codeparrot_training - Step 27611: {'lr': 0.00022368888987996162, 'samples': 14137344, 'steps': 27611, 'loss/train': 2.2539851665496826} +02/25/2022 13:22:06 - INFO - codeparrot_training - Step 27612: {'lr': 0.00022367261834544327, 'samples': 14137856, 'steps': 27612, 'loss/train': 1.0081212520599365} +02/25/2022 13:22:10 - INFO - codeparrot_training - Step 27613: {'lr': 0.00022365634692370296, 'samples': 14138368, 'steps': 27613, 'loss/train': 2.083918809890747} +02/25/2022 13:22:15 - INFO - codeparrot_training - Step 27614: {'lr': 0.0002236400756148105, 'samples': 14138880, 'steps': 27614, 'loss/train': 1.8575726747512817} +02/25/2022 13:22:19 - INFO - codeparrot_training - Step 27615: {'lr': 0.0002236238044188356, 'samples': 14139392, 'steps': 27615, 'loss/train': 2.190290927886963} +02/25/2022 13:22:26 - INFO - codeparrot_training - Step 27616: {'lr': 0.00022360753333584805, 'samples': 14139904, 'steps': 27616, 'loss/train': 1.7230314016342163} +02/25/2022 13:22:29 - INFO - codeparrot_training - Step 27617: {'lr': 0.0002235912623659173, 'samples': 14140416, 'steps': 27617, 'loss/train': 2.0915002822875977} +02/25/2022 13:22:35 - INFO - codeparrot_training - Step 27618: {'lr': 0.00022357499150911324, 'samples': 14140928, 'steps': 27618, 'loss/train': 2.457686185836792} +02/25/2022 13:22:38 - INFO - codeparrot_training - Step 27619: {'lr': 0.0002235587207655055, 'samples': 14141440, 'steps': 27619, 'loss/train': 1.8092637062072754} +02/25/2022 13:22:44 - INFO - codeparrot_training - Step 27620: {'lr': 0.00022354245013516392, 'samples': 14141952, 'steps': 27620, 'loss/train': 2.0804600715637207} +02/25/2022 13:22:47 - INFO - codeparrot_training - Step 27621: {'lr': 0.00022352617961815795, 'samples': 14142464, 'steps': 27621, 'loss/train': 2.0949928760528564} +02/25/2022 13:22:52 - INFO - codeparrot_training - Step 27622: {'lr': 0.00022350990921455747, 'samples': 14142976, 'steps': 27622, 'loss/train': 1.9814836978912354} +02/25/2022 13:22:56 - INFO - codeparrot_training - Step 27623: {'lr': 0.0002234936389244321, 'samples': 14143488, 'steps': 27623, 'loss/train': 1.7035192251205444} +02/25/2022 13:23:01 - INFO - codeparrot_training - Step 27624: {'lr': 0.00022347736874785162, 'samples': 14144000, 'steps': 27624, 'loss/train': 0.5222650766372681} +02/25/2022 13:23:05 - INFO - codeparrot_training - Step 27625: {'lr': 0.00022346109868488567, 'samples': 14144512, 'steps': 27625, 'loss/train': 2.2711825370788574} +02/25/2022 13:23:11 - INFO - codeparrot_training - Step 27626: {'lr': 0.0002234448287356039, 'samples': 14145024, 'steps': 27626, 'loss/train': 2.2342886924743652} +02/25/2022 13:23:15 - INFO - codeparrot_training - Step 27627: {'lr': 0.00022342855890007603, 'samples': 14145536, 'steps': 27627, 'loss/train': 1.1420929431915283} +02/25/2022 13:23:20 - INFO - codeparrot_training - Step 27628: {'lr': 0.00022341228917837185, 'samples': 14146048, 'steps': 27628, 'loss/train': 2.265047073364258} +02/25/2022 13:23:24 - INFO - codeparrot_training - Step 27629: {'lr': 0.00022339601957056097, 'samples': 14146560, 'steps': 27629, 'loss/train': 2.317077398300171} +02/25/2022 13:23:29 - INFO - codeparrot_training - Step 27630: {'lr': 0.00022337975007671304, 'samples': 14147072, 'steps': 27630, 'loss/train': 1.0609493255615234} +02/25/2022 13:23:33 - INFO - codeparrot_training - Step 27631: {'lr': 0.0002233634806968979, 'samples': 14147584, 'steps': 27631, 'loss/train': 1.6585279703140259} +02/25/2022 13:23:38 - INFO - codeparrot_training - Step 27632: {'lr': 0.00022334721143118502, 'samples': 14148096, 'steps': 27632, 'loss/train': 1.9878816604614258} +02/25/2022 13:23:42 - INFO - codeparrot_training - Step 27633: {'lr': 0.00022333094227964436, 'samples': 14148608, 'steps': 27633, 'loss/train': 1.5142663717269897} +02/25/2022 13:23:47 - INFO - codeparrot_training - Step 27634: {'lr': 0.00022331467324234537, 'samples': 14149120, 'steps': 27634, 'loss/train': 0.7418177127838135} +02/25/2022 13:23:51 - INFO - codeparrot_training - Step 27635: {'lr': 0.00022329840431935792, 'samples': 14149632, 'steps': 27635, 'loss/train': 1.6578922271728516} +02/25/2022 13:23:56 - INFO - codeparrot_training - Step 27636: {'lr': 0.00022328213551075154, 'samples': 14150144, 'steps': 27636, 'loss/train': 0.968309760093689} +02/25/2022 13:24:00 - INFO - codeparrot_training - Step 27637: {'lr': 0.00022326586681659607, 'samples': 14150656, 'steps': 27637, 'loss/train': 2.5778255462646484} +02/25/2022 13:24:05 - INFO - codeparrot_training - Step 27638: {'lr': 0.00022324959823696118, 'samples': 14151168, 'steps': 27638, 'loss/train': 0.8112436532974243} +02/25/2022 13:24:09 - INFO - codeparrot_training - Step 27639: {'lr': 0.00022323332977191643, 'samples': 14151680, 'steps': 27639, 'loss/train': 2.3479654788970947} +02/25/2022 13:24:15 - INFO - codeparrot_training - Step 27640: {'lr': 0.00022321706142153163, 'samples': 14152192, 'steps': 27640, 'loss/train': 2.084033250808716} +02/25/2022 13:24:18 - INFO - codeparrot_training - Step 27641: {'lr': 0.00022320079318587639, 'samples': 14152704, 'steps': 27641, 'loss/train': 2.2912707328796387} +02/25/2022 13:24:25 - INFO - codeparrot_training - Step 27642: {'lr': 0.00022318452506502057, 'samples': 14153216, 'steps': 27642, 'loss/train': 1.9373067617416382} +02/25/2022 13:24:28 - INFO - codeparrot_training - Step 27643: {'lr': 0.00022316825705903363, 'samples': 14153728, 'steps': 27643, 'loss/train': 1.187343716621399} +02/25/2022 13:24:34 - INFO - codeparrot_training - Step 27644: {'lr': 0.00022315198916798533, 'samples': 14154240, 'steps': 27644, 'loss/train': 1.7621794939041138} +02/25/2022 13:24:37 - INFO - codeparrot_training - Step 27645: {'lr': 0.0002231357213919454, 'samples': 14154752, 'steps': 27645, 'loss/train': 2.896050214767456} +02/25/2022 13:24:43 - INFO - codeparrot_training - Step 27646: {'lr': 0.0002231194537309835, 'samples': 14155264, 'steps': 27646, 'loss/train': 1.8517706394195557} +02/25/2022 13:24:46 - INFO - codeparrot_training - Step 27647: {'lr': 0.00022310318618516944, 'samples': 14155776, 'steps': 27647, 'loss/train': 1.801209807395935} +02/25/2022 13:24:52 - INFO - codeparrot_training - Step 27648: {'lr': 0.0002230869187545727, 'samples': 14156288, 'steps': 27648, 'loss/train': 1.8718500137329102} +02/25/2022 13:24:55 - INFO - codeparrot_training - Step 27649: {'lr': 0.00022307065143926304, 'samples': 14156800, 'steps': 27649, 'loss/train': 1.6194086074829102} +02/25/2022 13:25:01 - INFO - codeparrot_training - Step 27650: {'lr': 0.00022305438423931017, 'samples': 14157312, 'steps': 27650, 'loss/train': 1.940527319908142} +02/25/2022 13:25:04 - INFO - codeparrot_training - Step 27651: {'lr': 0.00022303811715478384, 'samples': 14157824, 'steps': 27651, 'loss/train': 1.5621339082717896} +02/25/2022 13:25:11 - INFO - codeparrot_training - Step 27652: {'lr': 0.00022302185018575356, 'samples': 14158336, 'steps': 27652, 'loss/train': 1.719569206237793} +02/25/2022 13:25:14 - INFO - codeparrot_training - Step 27653: {'lr': 0.0002230055833322891, 'samples': 14158848, 'steps': 27653, 'loss/train': 1.676282286643982} +02/25/2022 13:25:20 - INFO - codeparrot_training - Step 27654: {'lr': 0.00022298931659446014, 'samples': 14159360, 'steps': 27654, 'loss/train': 1.22589910030365} +02/25/2022 13:25:23 - INFO - codeparrot_training - Step 27655: {'lr': 0.00022297304997233653, 'samples': 14159872, 'steps': 27655, 'loss/train': 2.559494733810425} +02/25/2022 13:25:29 - INFO - codeparrot_training - Step 27656: {'lr': 0.00022295678346598763, 'samples': 14160384, 'steps': 27656, 'loss/train': 2.6130824089050293} +02/25/2022 13:25:32 - INFO - codeparrot_training - Step 27657: {'lr': 0.0002229405170754833, 'samples': 14160896, 'steps': 27657, 'loss/train': 2.796952486038208} +02/25/2022 13:25:38 - INFO - codeparrot_training - Step 27658: {'lr': 0.0002229242508008932, 'samples': 14161408, 'steps': 27658, 'loss/train': 1.19169020652771} +02/25/2022 13:25:41 - INFO - codeparrot_training - Step 27659: {'lr': 0.00022290798464228703, 'samples': 14161920, 'steps': 27659, 'loss/train': 1.6232154369354248} +02/25/2022 13:25:47 - INFO - codeparrot_training - Step 27660: {'lr': 0.00022289171859973456, 'samples': 14162432, 'steps': 27660, 'loss/train': 2.304831027984619} +02/25/2022 13:25:50 - INFO - codeparrot_training - Step 27661: {'lr': 0.00022287545267330524, 'samples': 14162944, 'steps': 27661, 'loss/train': 0.9531778693199158} +02/25/2022 13:25:57 - INFO - codeparrot_training - Step 27662: {'lr': 0.00022285918686306886, 'samples': 14163456, 'steps': 27662, 'loss/train': 2.490593910217285} +02/25/2022 13:26:00 - INFO - codeparrot_training - Step 27663: {'lr': 0.0002228429211690951, 'samples': 14163968, 'steps': 27663, 'loss/train': 1.9293419122695923} +02/25/2022 13:26:05 - INFO - codeparrot_training - Step 27664: {'lr': 0.00022282665559145376, 'samples': 14164480, 'steps': 27664, 'loss/train': 1.6510881185531616} +02/25/2022 13:26:09 - INFO - codeparrot_training - Step 27665: {'lr': 0.0002228103901302143, 'samples': 14164992, 'steps': 27665, 'loss/train': 1.121003270149231} +02/25/2022 13:26:14 - INFO - codeparrot_training - Step 27666: {'lr': 0.0002227941247854465, 'samples': 14165504, 'steps': 27666, 'loss/train': 1.3819140195846558} +02/25/2022 13:26:18 - INFO - codeparrot_training - Step 27667: {'lr': 0.00022277785955722, 'samples': 14166016, 'steps': 27667, 'loss/train': 0.46884042024612427} +02/25/2022 13:26:23 - INFO - codeparrot_training - Step 27668: {'lr': 0.00022276159444560464, 'samples': 14166528, 'steps': 27668, 'loss/train': 2.10331130027771} +02/25/2022 13:26:27 - INFO - codeparrot_training - Step 27669: {'lr': 0.00022274532945066987, 'samples': 14167040, 'steps': 27669, 'loss/train': 1.8764166831970215} +02/25/2022 13:26:32 - INFO - codeparrot_training - Step 27670: {'lr': 0.0002227290645724854, 'samples': 14167552, 'steps': 27670, 'loss/train': 1.1372950077056885} +02/25/2022 13:26:36 - INFO - codeparrot_training - Step 27671: {'lr': 0.00022271279981112105, 'samples': 14168064, 'steps': 27671, 'loss/train': 1.5634915828704834} +02/25/2022 13:26:41 - INFO - codeparrot_training - Step 27672: {'lr': 0.00022269653516664633, 'samples': 14168576, 'steps': 27672, 'loss/train': 1.8037562370300293} +02/25/2022 13:26:45 - INFO - codeparrot_training - Step 27673: {'lr': 0.00022268027063913104, 'samples': 14169088, 'steps': 27673, 'loss/train': 2.0806803703308105} +02/25/2022 13:26:50 - INFO - codeparrot_training - Step 27674: {'lr': 0.00022266400622864474, 'samples': 14169600, 'steps': 27674, 'loss/train': 1.8873063325881958} +02/25/2022 13:26:54 - INFO - codeparrot_training - Step 27675: {'lr': 0.00022264774193525723, 'samples': 14170112, 'steps': 27675, 'loss/train': 0.3434310853481293} +02/25/2022 13:26:59 - INFO - codeparrot_training - Step 27676: {'lr': 0.00022263147775903805, 'samples': 14170624, 'steps': 27676, 'loss/train': 1.2676708698272705} +02/25/2022 13:27:03 - INFO - codeparrot_training - Step 27677: {'lr': 0.00022261521370005698, 'samples': 14171136, 'steps': 27677, 'loss/train': 1.3135557174682617} +02/25/2022 13:27:09 - INFO - codeparrot_training - Step 27678: {'lr': 0.00022259894975838363, 'samples': 14171648, 'steps': 27678, 'loss/train': 2.106416940689087} +02/25/2022 13:27:13 - INFO - codeparrot_training - Step 27679: {'lr': 0.0002225826859340876, 'samples': 14172160, 'steps': 27679, 'loss/train': 2.1538820266723633} +02/25/2022 13:27:18 - INFO - codeparrot_training - Step 27680: {'lr': 0.00022256642222723868, 'samples': 14172672, 'steps': 27680, 'loss/train': 1.9056397676467896} +02/25/2022 13:27:22 - INFO - codeparrot_training - Step 27681: {'lr': 0.00022255015863790656, 'samples': 14173184, 'steps': 27681, 'loss/train': 1.9012720584869385} +02/25/2022 13:27:27 - INFO - codeparrot_training - Step 27682: {'lr': 0.00022253389516616083, 'samples': 14173696, 'steps': 27682, 'loss/train': 1.3327257633209229} +02/25/2022 13:27:31 - INFO - codeparrot_training - Step 27683: {'lr': 0.00022251763181207107, 'samples': 14174208, 'steps': 27683, 'loss/train': 2.3490893840789795} +02/25/2022 13:27:36 - INFO - codeparrot_training - Step 27684: {'lr': 0.0002225013685757071, 'samples': 14174720, 'steps': 27684, 'loss/train': 1.998063325881958} +02/25/2022 13:27:40 - INFO - codeparrot_training - Step 27685: {'lr': 0.00022248510545713851, 'samples': 14175232, 'steps': 27685, 'loss/train': 1.465667724609375} +02/25/2022 13:27:45 - INFO - codeparrot_training - Step 27686: {'lr': 0.00022246884245643512, 'samples': 14175744, 'steps': 27686, 'loss/train': 1.754370927810669} +02/25/2022 13:27:49 - INFO - codeparrot_training - Step 27687: {'lr': 0.00022245257957366634, 'samples': 14176256, 'steps': 27687, 'loss/train': 1.1433238983154297} +02/25/2022 13:27:55 - INFO - codeparrot_training - Step 27688: {'lr': 0.00022243631680890198, 'samples': 14176768, 'steps': 27688, 'loss/train': 1.6080366373062134} +02/25/2022 13:27:59 - INFO - codeparrot_training - Step 27689: {'lr': 0.00022242005416221166, 'samples': 14177280, 'steps': 27689, 'loss/train': 1.9073879718780518} +02/25/2022 13:28:04 - INFO - codeparrot_training - Step 27690: {'lr': 0.00022240379163366523, 'samples': 14177792, 'steps': 27690, 'loss/train': 1.2257037162780762} +02/25/2022 13:28:08 - INFO - codeparrot_training - Step 27691: {'lr': 0.00022238752922333207, 'samples': 14178304, 'steps': 27691, 'loss/train': 1.4254138469696045} +02/25/2022 13:28:13 - INFO - codeparrot_training - Step 27692: {'lr': 0.00022237126693128192, 'samples': 14178816, 'steps': 27692, 'loss/train': 1.561631679534912} +02/25/2022 13:28:17 - INFO - codeparrot_training - Step 27693: {'lr': 0.00022235500475758453, 'samples': 14179328, 'steps': 27693, 'loss/train': 1.6028903722763062} +02/25/2022 13:28:22 - INFO - codeparrot_training - Step 27694: {'lr': 0.0002223387427023095, 'samples': 14179840, 'steps': 27694, 'loss/train': 0.7694383263587952} +02/25/2022 13:28:26 - INFO - codeparrot_training - Step 27695: {'lr': 0.00022232248076552662, 'samples': 14180352, 'steps': 27695, 'loss/train': 1.859541893005371} +02/25/2022 13:28:31 - INFO - codeparrot_training - Step 27696: {'lr': 0.00022230621894730535, 'samples': 14180864, 'steps': 27696, 'loss/train': 2.5287795066833496} +02/25/2022 13:28:35 - INFO - codeparrot_training - Step 27697: {'lr': 0.00022228995724771545, 'samples': 14181376, 'steps': 27697, 'loss/train': 1.9428517818450928} +02/25/2022 13:28:41 - INFO - codeparrot_training - Step 27698: {'lr': 0.00022227369566682657, 'samples': 14181888, 'steps': 27698, 'loss/train': 2.6602895259857178} +02/25/2022 13:28:44 - INFO - codeparrot_training - Step 27699: {'lr': 0.00022225743420470844, 'samples': 14182400, 'steps': 27699, 'loss/train': 4.13417911529541} +02/25/2022 13:28:50 - INFO - codeparrot_training - Step 27700: {'lr': 0.0002222411728614306, 'samples': 14182912, 'steps': 27700, 'loss/train': 2.261253833770752} +02/25/2022 13:28:53 - INFO - codeparrot_training - Step 27701: {'lr': 0.00022222491163706275, 'samples': 14183424, 'steps': 27701, 'loss/train': 1.140095829963684} +02/25/2022 13:28:59 - INFO - codeparrot_training - Step 27702: {'lr': 0.00022220865053167456, 'samples': 14183936, 'steps': 27702, 'loss/train': 2.3190813064575195} +02/25/2022 13:29:02 - INFO - codeparrot_training - Step 27703: {'lr': 0.00022219238954533578, 'samples': 14184448, 'steps': 27703, 'loss/train': 0.9984862208366394} +02/25/2022 13:29:08 - INFO - codeparrot_training - Step 27704: {'lr': 0.0002221761286781159, 'samples': 14184960, 'steps': 27704, 'loss/train': 1.4892514944076538} +02/25/2022 13:29:11 - INFO - codeparrot_training - Step 27705: {'lr': 0.00022215986793008459, 'samples': 14185472, 'steps': 27705, 'loss/train': 2.2885193824768066} +02/25/2022 13:29:17 - INFO - codeparrot_training - Step 27706: {'lr': 0.0002221436073013116, 'samples': 14185984, 'steps': 27706, 'loss/train': 1.7201766967773438} +02/25/2022 13:29:20 - INFO - codeparrot_training - Step 27707: {'lr': 0.00022212734679186651, 'samples': 14186496, 'steps': 27707, 'loss/train': 1.9517145156860352} +02/25/2022 13:29:27 - INFO - codeparrot_training - Step 27708: {'lr': 0.00022211108640181917, 'samples': 14187008, 'steps': 27708, 'loss/train': 2.1709697246551514} +02/25/2022 13:29:30 - INFO - codeparrot_training - Step 27709: {'lr': 0.00022209482613123898, 'samples': 14187520, 'steps': 27709, 'loss/train': 2.9296810626983643} +02/25/2022 13:29:36 - INFO - codeparrot_training - Step 27710: {'lr': 0.0002220785659801957, 'samples': 14188032, 'steps': 27710, 'loss/train': 1.9803681373596191} +02/25/2022 13:29:39 - INFO - codeparrot_training - Step 27711: {'lr': 0.00022206230594875898, 'samples': 14188544, 'steps': 27711, 'loss/train': 1.7493771314620972} +02/25/2022 13:29:45 - INFO - codeparrot_training - Step 27712: {'lr': 0.0002220460460369985, 'samples': 14189056, 'steps': 27712, 'loss/train': 1.181638240814209} +02/25/2022 13:29:48 - INFO - codeparrot_training - Step 27713: {'lr': 0.00022202978624498383, 'samples': 14189568, 'steps': 27713, 'loss/train': 1.7782877683639526} +02/25/2022 13:29:54 - INFO - codeparrot_training - Step 27714: {'lr': 0.00022201352657278466, 'samples': 14190080, 'steps': 27714, 'loss/train': 1.7174371480941772} +02/25/2022 13:29:57 - INFO - codeparrot_training - Step 27715: {'lr': 0.00022199726702047074, 'samples': 14190592, 'steps': 27715, 'loss/train': 1.833799958229065} +02/25/2022 13:30:03 - INFO - codeparrot_training - Step 27716: {'lr': 0.0002219810075881116, 'samples': 14191104, 'steps': 27716, 'loss/train': 0.9955262541770935} +02/25/2022 13:30:06 - INFO - codeparrot_training - Step 27717: {'lr': 0.0002219647482757769, 'samples': 14191616, 'steps': 27717, 'loss/train': 1.6329665184020996} +02/25/2022 13:30:12 - INFO - codeparrot_training - Step 27718: {'lr': 0.00022194848908353634, 'samples': 14192128, 'steps': 27718, 'loss/train': 0.23492471873760223} +02/25/2022 13:30:15 - INFO - codeparrot_training - Step 27719: {'lr': 0.00022193223001145952, 'samples': 14192640, 'steps': 27719, 'loss/train': 1.9844173192977905} +02/25/2022 13:30:21 - INFO - codeparrot_training - Step 27720: {'lr': 0.00022191597105961612, 'samples': 14193152, 'steps': 27720, 'loss/train': 1.7243579626083374} +02/25/2022 13:30:24 - INFO - codeparrot_training - Step 27721: {'lr': 0.00022189971222807582, 'samples': 14193664, 'steps': 27721, 'loss/train': 2.4163992404937744} +02/25/2022 13:30:30 - INFO - codeparrot_training - Step 27722: {'lr': 0.00022188345351690822, 'samples': 14194176, 'steps': 27722, 'loss/train': 1.9439784288406372} +02/25/2022 13:30:33 - INFO - codeparrot_training - Step 27723: {'lr': 0.00022186719492618294, 'samples': 14194688, 'steps': 27723, 'loss/train': 2.3920094966888428} +02/25/2022 13:30:40 - INFO - codeparrot_training - Step 27724: {'lr': 0.00022185093645596965, 'samples': 14195200, 'steps': 27724, 'loss/train': 2.495496988296509} +02/25/2022 13:30:44 - INFO - codeparrot_training - Step 27725: {'lr': 0.0002218346781063381, 'samples': 14195712, 'steps': 27725, 'loss/train': 2.224153995513916} +02/25/2022 13:30:49 - INFO - codeparrot_training - Step 27726: {'lr': 0.0002218184198773578, 'samples': 14196224, 'steps': 27726, 'loss/train': 2.4931423664093018} +02/25/2022 13:30:53 - INFO - codeparrot_training - Step 27727: {'lr': 0.0002218021617690984, 'samples': 14196736, 'steps': 27727, 'loss/train': 2.061262369155884} +02/25/2022 13:30:58 - INFO - codeparrot_training - Step 27728: {'lr': 0.00022178590378162956, 'samples': 14197248, 'steps': 27728, 'loss/train': 1.544169545173645} +02/25/2022 13:31:01 - INFO - codeparrot_training - Step 27729: {'lr': 0.00022176964591502112, 'samples': 14197760, 'steps': 27729, 'loss/train': 2.0824270248413086} +02/25/2022 13:31:07 - INFO - codeparrot_training - Step 27730: {'lr': 0.0002217533881693424, 'samples': 14198272, 'steps': 27730, 'loss/train': 1.380448818206787} +02/25/2022 13:31:11 - INFO - codeparrot_training - Step 27731: {'lr': 0.00022173713054466322, 'samples': 14198784, 'steps': 27731, 'loss/train': 2.2570340633392334} +02/25/2022 13:31:16 - INFO - codeparrot_training - Step 27732: {'lr': 0.00022172087304105317, 'samples': 14199296, 'steps': 27732, 'loss/train': 1.6462903022766113} +02/25/2022 13:31:20 - INFO - codeparrot_training - Step 27733: {'lr': 0.00022170461565858193, 'samples': 14199808, 'steps': 27733, 'loss/train': 1.5064811706542969} +02/25/2022 13:31:25 - INFO - codeparrot_training - Step 27734: {'lr': 0.00022168835839731925, 'samples': 14200320, 'steps': 27734, 'loss/train': 2.2610645294189453} +02/25/2022 13:31:29 - INFO - codeparrot_training - Step 27735: {'lr': 0.00022167210125733454, 'samples': 14200832, 'steps': 27735, 'loss/train': 0.16780051589012146} +02/25/2022 13:31:35 - INFO - codeparrot_training - Step 27736: {'lr': 0.00022165584423869755, 'samples': 14201344, 'steps': 27736, 'loss/train': 2.669257402420044} +02/25/2022 13:31:38 - INFO - codeparrot_training - Step 27737: {'lr': 0.00022163958734147793, 'samples': 14201856, 'steps': 27737, 'loss/train': 2.5278546810150146} +02/25/2022 13:31:44 - INFO - codeparrot_training - Step 27738: {'lr': 0.0002216233305657454, 'samples': 14202368, 'steps': 27738, 'loss/train': 1.9732301235198975} +02/25/2022 13:31:47 - INFO - codeparrot_training - Step 27739: {'lr': 0.00022160707391156943, 'samples': 14202880, 'steps': 27739, 'loss/train': 1.6554172039031982} +02/25/2022 13:31:53 - INFO - codeparrot_training - Step 27740: {'lr': 0.00022159081737901975, 'samples': 14203392, 'steps': 27740, 'loss/train': 0.4357997179031372} +02/25/2022 13:31:56 - INFO - codeparrot_training - Step 27741: {'lr': 0.00022157456096816595, 'samples': 14203904, 'steps': 27741, 'loss/train': 1.1607322692871094} +02/25/2022 13:32:01 - INFO - codeparrot_training - Step 27742: {'lr': 0.00022155830467907774, 'samples': 14204416, 'steps': 27742, 'loss/train': 1.7959980964660645} +02/25/2022 13:32:05 - INFO - codeparrot_training - Step 27743: {'lr': 0.0002215420485118248, 'samples': 14204928, 'steps': 27743, 'loss/train': 1.7010483741760254} +02/25/2022 13:32:11 - INFO - codeparrot_training - Step 27744: {'lr': 0.00022152579246647659, 'samples': 14205440, 'steps': 27744, 'loss/train': 1.758897304534912} +02/25/2022 13:32:14 - INFO - codeparrot_training - Step 27745: {'lr': 0.00022150953654310283, 'samples': 14205952, 'steps': 27745, 'loss/train': 1.5634516477584839} +02/25/2022 13:32:20 - INFO - codeparrot_training - Step 27746: {'lr': 0.0002214932807417732, 'samples': 14206464, 'steps': 27746, 'loss/train': 2.1271111965179443} +02/25/2022 13:32:24 - INFO - codeparrot_training - Step 27747: {'lr': 0.00022147702506255737, 'samples': 14206976, 'steps': 27747, 'loss/train': 1.813204288482666} +02/25/2022 13:32:29 - INFO - codeparrot_training - Step 27748: {'lr': 0.00022146076950552482, 'samples': 14207488, 'steps': 27748, 'loss/train': 2.859630584716797} +02/25/2022 13:32:33 - INFO - codeparrot_training - Step 27749: {'lr': 0.00022144451407074528, 'samples': 14208000, 'steps': 27749, 'loss/train': 2.5048768520355225} +02/25/2022 13:32:38 - INFO - codeparrot_training - Step 27750: {'lr': 0.00022142825875828836, 'samples': 14208512, 'steps': 27750, 'loss/train': 0.8993305563926697} +02/25/2022 13:32:42 - INFO - codeparrot_training - Step 27751: {'lr': 0.00022141200356822383, 'samples': 14209024, 'steps': 27751, 'loss/train': 1.6798063516616821} +02/25/2022 13:32:47 - INFO - codeparrot_training - Step 27752: {'lr': 0.0002213957485006211, 'samples': 14209536, 'steps': 27752, 'loss/train': 0.7667948007583618} +02/25/2022 13:32:51 - INFO - codeparrot_training - Step 27753: {'lr': 0.00022137949355554987, 'samples': 14210048, 'steps': 27753, 'loss/train': 1.3859878778457642} +02/25/2022 13:32:56 - INFO - codeparrot_training - Step 27754: {'lr': 0.0002213632387330798, 'samples': 14210560, 'steps': 27754, 'loss/train': 1.7311384677886963} +02/25/2022 13:33:00 - INFO - codeparrot_training - Step 27755: {'lr': 0.00022134698403328062, 'samples': 14211072, 'steps': 27755, 'loss/train': 2.2139503955841064} +02/25/2022 13:33:06 - INFO - codeparrot_training - Step 27756: {'lr': 0.00022133072945622182, 'samples': 14211584, 'steps': 27756, 'loss/train': 2.4205338954925537} +02/25/2022 13:33:09 - INFO - codeparrot_training - Step 27757: {'lr': 0.00022131447500197305, 'samples': 14212096, 'steps': 27757, 'loss/train': 1.3299930095672607} +02/25/2022 13:33:15 - INFO - codeparrot_training - Step 27758: {'lr': 0.00022129822067060398, 'samples': 14212608, 'steps': 27758, 'loss/train': 1.6065117120742798} +02/25/2022 13:33:18 - INFO - codeparrot_training - Step 27759: {'lr': 0.0002212819664621842, 'samples': 14213120, 'steps': 27759, 'loss/train': 1.617329716682434} +02/25/2022 13:33:24 - INFO - codeparrot_training - Step 27760: {'lr': 0.00022126571237678338, 'samples': 14213632, 'steps': 27760, 'loss/train': 2.0306341648101807} +02/25/2022 13:33:27 - INFO - codeparrot_training - Step 27761: {'lr': 0.0002212494584144711, 'samples': 14214144, 'steps': 27761, 'loss/train': 2.2177698612213135} +02/25/2022 13:33:33 - INFO - codeparrot_training - Step 27762: {'lr': 0.00022123320457531704, 'samples': 14214656, 'steps': 27762, 'loss/train': 2.270902395248413} +02/25/2022 13:33:36 - INFO - codeparrot_training - Step 27763: {'lr': 0.00022121695085939075, 'samples': 14215168, 'steps': 27763, 'loss/train': 1.4836783409118652} +02/25/2022 13:33:42 - INFO - codeparrot_training - Step 27764: {'lr': 0.00022120069726676194, 'samples': 14215680, 'steps': 27764, 'loss/train': 2.291496753692627} +02/25/2022 13:33:45 - INFO - codeparrot_training - Step 27765: {'lr': 0.00022118444379750016, 'samples': 14216192, 'steps': 27765, 'loss/train': 2.231790781021118} +02/25/2022 13:33:51 - INFO - codeparrot_training - Step 27766: {'lr': 0.00022116819045167512, 'samples': 14216704, 'steps': 27766, 'loss/train': 1.5718729496002197} +02/25/2022 13:33:54 - INFO - codeparrot_training - Step 27767: {'lr': 0.00022115193722935638, 'samples': 14217216, 'steps': 27767, 'loss/train': 1.8208876848220825} +02/25/2022 13:34:00 - INFO - codeparrot_training - Step 27768: {'lr': 0.00022113568413061356, 'samples': 14217728, 'steps': 27768, 'loss/train': 1.6542704105377197} +02/25/2022 13:34:03 - INFO - codeparrot_training - Step 27769: {'lr': 0.00022111943115551638, 'samples': 14218240, 'steps': 27769, 'loss/train': 2.653064012527466} +02/25/2022 13:34:09 - INFO - codeparrot_training - Step 27770: {'lr': 0.00022110317830413432, 'samples': 14218752, 'steps': 27770, 'loss/train': 1.756883144378662} +02/25/2022 13:34:12 - INFO - codeparrot_training - Step 27771: {'lr': 0.00022108692557653702, 'samples': 14219264, 'steps': 27771, 'loss/train': 2.680830240249634} +02/25/2022 13:34:19 - INFO - codeparrot_training - Step 27772: {'lr': 0.0002210706729727942, 'samples': 14219776, 'steps': 27772, 'loss/train': 2.6151442527770996} +02/25/2022 13:34:22 - INFO - codeparrot_training - Step 27773: {'lr': 0.00022105442049297552, 'samples': 14220288, 'steps': 27773, 'loss/train': 2.0104522705078125} +02/25/2022 13:34:27 - INFO - codeparrot_training - Step 27774: {'lr': 0.0002210381681371504, 'samples': 14220800, 'steps': 27774, 'loss/train': 1.2885103225708008} +02/25/2022 13:34:31 - INFO - codeparrot_training - Step 27775: {'lr': 0.00022102191590538857, 'samples': 14221312, 'steps': 27775, 'loss/train': 0.9257943630218506} +02/25/2022 13:34:36 - INFO - codeparrot_training - Step 27776: {'lr': 0.00022100566379775965, 'samples': 14221824, 'steps': 27776, 'loss/train': 1.4682072401046753} +02/25/2022 13:34:40 - INFO - codeparrot_training - Step 27777: {'lr': 0.0002209894118143334, 'samples': 14222336, 'steps': 27777, 'loss/train': 1.0441948175430298} +02/25/2022 13:34:45 - INFO - codeparrot_training - Step 27778: {'lr': 0.00022097315995517913, 'samples': 14222848, 'steps': 27778, 'loss/train': 1.6112158298492432} +02/25/2022 13:34:49 - INFO - codeparrot_training - Step 27779: {'lr': 0.00022095690822036666, 'samples': 14223360, 'steps': 27779, 'loss/train': 2.0746285915374756} +02/25/2022 13:34:54 - INFO - codeparrot_training - Step 27780: {'lr': 0.00022094065660996557, 'samples': 14223872, 'steps': 27780, 'loss/train': 1.873259425163269} +02/25/2022 13:34:58 - INFO - codeparrot_training - Step 27781: {'lr': 0.0002209244051240455, 'samples': 14224384, 'steps': 27781, 'loss/train': 1.9643899202346802} +02/25/2022 13:35:04 - INFO - codeparrot_training - Step 27782: {'lr': 0.00022090815376267611, 'samples': 14224896, 'steps': 27782, 'loss/train': 1.788137674331665} +02/25/2022 13:35:09 - INFO - codeparrot_training - Step 27783: {'lr': 0.0002208919025259269, 'samples': 14225408, 'steps': 27783, 'loss/train': 2.2936220169067383} +02/25/2022 13:35:13 - INFO - codeparrot_training - Step 27784: {'lr': 0.00022087565141386747, 'samples': 14225920, 'steps': 27784, 'loss/train': 1.3192137479782104} +02/25/2022 13:35:19 - INFO - codeparrot_training - Step 27785: {'lr': 0.00022085940042656754, 'samples': 14226432, 'steps': 27785, 'loss/train': 1.7573145627975464} +02/25/2022 13:35:22 - INFO - codeparrot_training - Step 27786: {'lr': 0.00022084314956409678, 'samples': 14226944, 'steps': 27786, 'loss/train': 1.472084403038025} +02/25/2022 13:35:25 - INFO - codeparrot_training - Step 27787: {'lr': 0.00022082689882652463, 'samples': 14227456, 'steps': 27787, 'loss/train': 4.177149772644043} +02/25/2022 13:35:31 - INFO - codeparrot_training - Step 27788: {'lr': 0.00022081064821392074, 'samples': 14227968, 'steps': 27788, 'loss/train': 2.5439677238464355} +02/25/2022 13:35:35 - INFO - codeparrot_training - Step 27789: {'lr': 0.0002207943977263548, 'samples': 14228480, 'steps': 27789, 'loss/train': 1.398103952407837} +02/25/2022 13:35:40 - INFO - codeparrot_training - Step 27790: {'lr': 0.00022077814736389648, 'samples': 14228992, 'steps': 27790, 'loss/train': 2.349363327026367} +02/25/2022 13:35:43 - INFO - codeparrot_training - Step 27791: {'lr': 0.00022076189712661522, 'samples': 14229504, 'steps': 27791, 'loss/train': 1.1061385869979858} +02/25/2022 13:35:50 - INFO - codeparrot_training - Step 27792: {'lr': 0.00022074564701458067, 'samples': 14230016, 'steps': 27792, 'loss/train': 1.1201121807098389} +02/25/2022 13:35:53 - INFO - codeparrot_training - Step 27793: {'lr': 0.00022072939702786246, 'samples': 14230528, 'steps': 27793, 'loss/train': 2.7665772438049316} +02/25/2022 13:35:59 - INFO - codeparrot_training - Step 27794: {'lr': 0.00022071314716653028, 'samples': 14231040, 'steps': 27794, 'loss/train': 2.7143783569335938} +02/25/2022 13:36:02 - INFO - codeparrot_training - Step 27795: {'lr': 0.00022069689743065373, 'samples': 14231552, 'steps': 27795, 'loss/train': 1.6863172054290771} +02/25/2022 13:36:08 - INFO - codeparrot_training - Step 27796: {'lr': 0.0002206806478203023, 'samples': 14232064, 'steps': 27796, 'loss/train': 0.559899091720581} +02/25/2022 13:36:12 - INFO - codeparrot_training - Step 27797: {'lr': 0.00022066439833554565, 'samples': 14232576, 'steps': 27797, 'loss/train': 2.693077564239502} +02/25/2022 13:36:17 - INFO - codeparrot_training - Step 27798: {'lr': 0.0002206481489764534, 'samples': 14233088, 'steps': 27798, 'loss/train': 1.8381447792053223} +02/25/2022 13:36:21 - INFO - codeparrot_training - Step 27799: {'lr': 0.00022063189974309524, 'samples': 14233600, 'steps': 27799, 'loss/train': 1.5035371780395508} +02/25/2022 13:36:26 - INFO - codeparrot_training - Step 27800: {'lr': 0.00022061565063554063, 'samples': 14234112, 'steps': 27800, 'loss/train': 0.786113440990448} +02/25/2022 13:36:30 - INFO - codeparrot_training - Step 27801: {'lr': 0.00022059940165385919, 'samples': 14234624, 'steps': 27801, 'loss/train': 2.77750825881958} +02/25/2022 13:36:36 - INFO - codeparrot_training - Step 27802: {'lr': 0.0002205831527981207, 'samples': 14235136, 'steps': 27802, 'loss/train': 1.6873021125793457} +02/25/2022 13:36:39 - INFO - codeparrot_training - Step 27803: {'lr': 0.00022056690406839453, 'samples': 14235648, 'steps': 27803, 'loss/train': 1.3729861974716187} +02/25/2022 13:36:45 - INFO - codeparrot_training - Step 27804: {'lr': 0.00022055065546475048, 'samples': 14236160, 'steps': 27804, 'loss/train': 2.353654146194458} +02/25/2022 13:36:48 - INFO - codeparrot_training - Step 27805: {'lr': 0.000220534406987258, 'samples': 14236672, 'steps': 27805, 'loss/train': 1.5033620595932007} +02/25/2022 13:36:54 - INFO - codeparrot_training - Step 27806: {'lr': 0.00022051815863598684, 'samples': 14237184, 'steps': 27806, 'loss/train': 2.432969331741333} +02/25/2022 13:36:57 - INFO - codeparrot_training - Step 27807: {'lr': 0.00022050191041100647, 'samples': 14237696, 'steps': 27807, 'loss/train': 1.6492234468460083} +02/25/2022 13:37:03 - INFO - codeparrot_training - Step 27808: {'lr': 0.0002204856623123866, 'samples': 14238208, 'steps': 27808, 'loss/train': 1.7908642292022705} +02/25/2022 13:37:06 - INFO - codeparrot_training - Step 27809: {'lr': 0.0002204694143401968, 'samples': 14238720, 'steps': 27809, 'loss/train': 4.232202529907227} +02/25/2022 13:37:12 - INFO - codeparrot_training - Step 27810: {'lr': 0.00022045316649450656, 'samples': 14239232, 'steps': 27810, 'loss/train': 1.6632262468338013} +02/25/2022 13:37:15 - INFO - codeparrot_training - Step 27811: {'lr': 0.00022043691877538557, 'samples': 14239744, 'steps': 27811, 'loss/train': 1.4221631288528442} +02/25/2022 13:37:21 - INFO - codeparrot_training - Step 27812: {'lr': 0.0002204206711829035, 'samples': 14240256, 'steps': 27812, 'loss/train': 1.8015223741531372} +02/25/2022 13:37:24 - INFO - codeparrot_training - Step 27813: {'lr': 0.0002204044237171299, 'samples': 14240768, 'steps': 27813, 'loss/train': 1.09882652759552} +02/25/2022 13:37:30 - INFO - codeparrot_training - Step 27814: {'lr': 0.00022038817637813428, 'samples': 14241280, 'steps': 27814, 'loss/train': 2.1022181510925293} +02/25/2022 13:37:33 - INFO - codeparrot_training - Step 27815: {'lr': 0.00022037192916598633, 'samples': 14241792, 'steps': 27815, 'loss/train': 1.5143153667449951} +02/25/2022 13:37:39 - INFO - codeparrot_training - Step 27816: {'lr': 0.0002203556820807556, 'samples': 14242304, 'steps': 27816, 'loss/train': 1.922299861907959} +02/25/2022 13:37:42 - INFO - codeparrot_training - Step 27817: {'lr': 0.00022033943512251184, 'samples': 14242816, 'steps': 27817, 'loss/train': 2.7210869789123535} +02/25/2022 13:37:48 - INFO - codeparrot_training - Step 27818: {'lr': 0.00022032318829132442, 'samples': 14243328, 'steps': 27818, 'loss/train': 1.6526563167572021} +02/25/2022 13:37:52 - INFO - codeparrot_training - Step 27819: {'lr': 0.00022030694158726302, 'samples': 14243840, 'steps': 27819, 'loss/train': 1.861775279045105} +02/25/2022 13:37:57 - INFO - codeparrot_training - Step 27820: {'lr': 0.00022029069501039726, 'samples': 14244352, 'steps': 27820, 'loss/train': 2.442487955093384} +02/25/2022 13:38:01 - INFO - codeparrot_training - Step 27821: {'lr': 0.00022027444856079684, 'samples': 14244864, 'steps': 27821, 'loss/train': 0.662951648235321} +02/25/2022 13:38:06 - INFO - codeparrot_training - Step 27822: {'lr': 0.00022025820223853113, 'samples': 14245376, 'steps': 27822, 'loss/train': 2.484048366546631} +02/25/2022 13:38:10 - INFO - codeparrot_training - Step 27823: {'lr': 0.00022024195604366983, 'samples': 14245888, 'steps': 27823, 'loss/train': 2.381814956665039} +02/25/2022 13:38:15 - INFO - codeparrot_training - Step 27824: {'lr': 0.00022022570997628254, 'samples': 14246400, 'steps': 27824, 'loss/train': 2.6971611976623535} +02/25/2022 13:38:19 - INFO - codeparrot_training - Step 27825: {'lr': 0.00022020946403643899, 'samples': 14246912, 'steps': 27825, 'loss/train': 2.3923892974853516} +02/25/2022 13:38:24 - INFO - codeparrot_training - Step 27826: {'lr': 0.00022019321822420852, 'samples': 14247424, 'steps': 27826, 'loss/train': 2.2321488857269287} +02/25/2022 13:38:28 - INFO - codeparrot_training - Step 27827: {'lr': 0.00022017697253966082, 'samples': 14247936, 'steps': 27827, 'loss/train': 2.395418882369995} +02/25/2022 13:38:34 - INFO - codeparrot_training - Step 27828: {'lr': 0.00022016072698286551, 'samples': 14248448, 'steps': 27828, 'loss/train': 1.973207712173462} +02/25/2022 13:38:37 - INFO - codeparrot_training - Step 27829: {'lr': 0.00022014448155389217, 'samples': 14248960, 'steps': 27829, 'loss/train': 0.8779698610305786} +02/25/2022 13:38:43 - INFO - codeparrot_training - Step 27830: {'lr': 0.0002201282362528105, 'samples': 14249472, 'steps': 27830, 'loss/train': 1.4527925252914429} +02/25/2022 13:38:46 - INFO - codeparrot_training - Step 27831: {'lr': 0.00022011199107968988, 'samples': 14249984, 'steps': 27831, 'loss/train': 2.2932536602020264} +02/25/2022 13:38:52 - INFO - codeparrot_training - Step 27832: {'lr': 0.00022009574603459997, 'samples': 14250496, 'steps': 27832, 'loss/train': 1.9249000549316406} +02/25/2022 13:38:56 - INFO - codeparrot_training - Step 27833: {'lr': 0.0002200795011176104, 'samples': 14251008, 'steps': 27833, 'loss/train': 1.3450337648391724} +02/25/2022 13:39:01 - INFO - codeparrot_training - Step 27834: {'lr': 0.00022006325632879087, 'samples': 14251520, 'steps': 27834, 'loss/train': 2.4711389541625977} +02/25/2022 13:39:04 - INFO - codeparrot_training - Step 27835: {'lr': 0.00022004701166821074, 'samples': 14252032, 'steps': 27835, 'loss/train': 2.395136833190918} +02/25/2022 13:39:10 - INFO - codeparrot_training - Step 27836: {'lr': 0.0002200307671359397, 'samples': 14252544, 'steps': 27836, 'loss/train': 1.8185944557189941} +02/25/2022 13:39:13 - INFO - codeparrot_training - Step 27837: {'lr': 0.0002200145227320473, 'samples': 14253056, 'steps': 27837, 'loss/train': 1.2070010900497437} +02/25/2022 13:39:20 - INFO - codeparrot_training - Step 27838: {'lr': 0.00021999827845660332, 'samples': 14253568, 'steps': 27838, 'loss/train': 1.2528291940689087} +02/25/2022 13:39:23 - INFO - codeparrot_training - Step 27839: {'lr': 0.00021998203430967706, 'samples': 14254080, 'steps': 27839, 'loss/train': 1.6341338157653809} +02/25/2022 13:39:29 - INFO - codeparrot_training - Step 27840: {'lr': 0.00021996579029133823, 'samples': 14254592, 'steps': 27840, 'loss/train': 2.2483770847320557} +02/25/2022 13:39:32 - INFO - codeparrot_training - Step 27841: {'lr': 0.00021994954640165644, 'samples': 14255104, 'steps': 27841, 'loss/train': 1.8836464881896973} +02/25/2022 13:39:38 - INFO - codeparrot_training - Step 27842: {'lr': 0.00021993330264070127, 'samples': 14255616, 'steps': 27842, 'loss/train': 1.5909324884414673} +02/25/2022 13:39:42 - INFO - codeparrot_training - Step 27843: {'lr': 0.0002199170590085423, 'samples': 14256128, 'steps': 27843, 'loss/train': 2.447415828704834} +02/25/2022 13:39:47 - INFO - codeparrot_training - Step 27844: {'lr': 0.00021990081550524903, 'samples': 14256640, 'steps': 27844, 'loss/train': 0.5877178311347961} +02/25/2022 13:39:51 - INFO - codeparrot_training - Step 27845: {'lr': 0.00021988457213089114, 'samples': 14257152, 'steps': 27845, 'loss/train': 0.4085596799850464} +02/25/2022 13:39:56 - INFO - codeparrot_training - Step 27846: {'lr': 0.0002198683288855382, 'samples': 14257664, 'steps': 27846, 'loss/train': 2.3782947063446045} +02/25/2022 13:40:00 - INFO - codeparrot_training - Step 27847: {'lr': 0.0002198520857692598, 'samples': 14258176, 'steps': 27847, 'loss/train': 1.4745988845825195} +02/25/2022 13:40:06 - INFO - codeparrot_training - Step 27848: {'lr': 0.00021983584278212543, 'samples': 14258688, 'steps': 27848, 'loss/train': 2.190983295440674} +02/25/2022 13:40:10 - INFO - codeparrot_training - Step 27849: {'lr': 0.00021981959992420475, 'samples': 14259200, 'steps': 27849, 'loss/train': 1.711606502532959} +02/25/2022 13:40:15 - INFO - codeparrot_training - Step 27850: {'lr': 0.0002198033571955673, 'samples': 14259712, 'steps': 27850, 'loss/train': 1.4930320978164673} +02/25/2022 13:40:18 - INFO - codeparrot_training - Step 27851: {'lr': 0.0002197871145962827, 'samples': 14260224, 'steps': 27851, 'loss/train': 2.3477847576141357} +02/25/2022 13:40:24 - INFO - codeparrot_training - Step 27852: {'lr': 0.00021977087212642052, 'samples': 14260736, 'steps': 27852, 'loss/train': 1.8510905504226685} +02/25/2022 13:40:27 - INFO - codeparrot_training - Step 27853: {'lr': 0.00021975462978605036, 'samples': 14261248, 'steps': 27853, 'loss/train': 1.3851715326309204} +02/25/2022 13:40:33 - INFO - codeparrot_training - Step 27854: {'lr': 0.00021973838757524168, 'samples': 14261760, 'steps': 27854, 'loss/train': 0.8404471278190613} +02/25/2022 13:40:37 - INFO - codeparrot_training - Step 27855: {'lr': 0.00021972214549406414, 'samples': 14262272, 'steps': 27855, 'loss/train': 1.822780966758728} +02/25/2022 13:40:42 - INFO - codeparrot_training - Step 27856: {'lr': 0.00021970590354258743, 'samples': 14262784, 'steps': 27856, 'loss/train': 1.6116869449615479} +02/25/2022 13:40:46 - INFO - codeparrot_training - Step 27857: {'lr': 0.0002196896617208809, 'samples': 14263296, 'steps': 27857, 'loss/train': 0.8649618029594421} +02/25/2022 13:40:51 - INFO - codeparrot_training - Step 27858: {'lr': 0.00021967342002901424, 'samples': 14263808, 'steps': 27858, 'loss/train': 2.0634584426879883} +02/25/2022 13:40:55 - INFO - codeparrot_training - Step 27859: {'lr': 0.00021965717846705702, 'samples': 14264320, 'steps': 27859, 'loss/train': 1.420082688331604} +02/25/2022 13:41:00 - INFO - codeparrot_training - Step 27860: {'lr': 0.00021964093703507893, 'samples': 14264832, 'steps': 27860, 'loss/train': 1.009238362312317} +02/25/2022 13:41:04 - INFO - codeparrot_training - Step 27861: {'lr': 0.00021962469573314928, 'samples': 14265344, 'steps': 27861, 'loss/train': 2.0793399810791016} +02/25/2022 13:41:09 - INFO - codeparrot_training - Step 27862: {'lr': 0.00021960845456133783, 'samples': 14265856, 'steps': 27862, 'loss/train': 1.3136025667190552} +02/25/2022 13:41:13 - INFO - codeparrot_training - Step 27863: {'lr': 0.0002195922135197141, 'samples': 14266368, 'steps': 27863, 'loss/train': 1.86370050907135} +02/25/2022 13:41:19 - INFO - codeparrot_training - Step 27864: {'lr': 0.00021957597260834763, 'samples': 14266880, 'steps': 27864, 'loss/train': 2.7393109798431396} +02/25/2022 13:41:22 - INFO - codeparrot_training - Step 27865: {'lr': 0.00021955973182730818, 'samples': 14267392, 'steps': 27865, 'loss/train': 1.668273687362671} +02/25/2022 13:41:28 - INFO - codeparrot_training - Step 27866: {'lr': 0.00021954349117666506, 'samples': 14267904, 'steps': 27866, 'loss/train': 1.5442965030670166} +02/25/2022 13:41:31 - INFO - codeparrot_training - Step 27867: {'lr': 0.00021952725065648796, 'samples': 14268416, 'steps': 27867, 'loss/train': 2.2530710697174072} +02/25/2022 13:41:37 - INFO - codeparrot_training - Step 27868: {'lr': 0.00021951101026684643, 'samples': 14268928, 'steps': 27868, 'loss/train': 2.1887032985687256} +02/25/2022 13:41:40 - INFO - codeparrot_training - Step 27869: {'lr': 0.00021949477000781018, 'samples': 14269440, 'steps': 27869, 'loss/train': 2.6383321285247803} +02/25/2022 13:41:46 - INFO - codeparrot_training - Step 27870: {'lr': 0.0002194785298794485, 'samples': 14269952, 'steps': 27870, 'loss/train': 1.1898847818374634} +02/25/2022 13:41:50 - INFO - codeparrot_training - Step 27871: {'lr': 0.00021946228988183115, 'samples': 14270464, 'steps': 27871, 'loss/train': 1.9612748622894287} +02/25/2022 13:41:55 - INFO - codeparrot_training - Step 27872: {'lr': 0.00021944605001502761, 'samples': 14270976, 'steps': 27872, 'loss/train': 0.6058495044708252} +02/25/2022 13:41:59 - INFO - codeparrot_training - Step 27873: {'lr': 0.00021942981027910763, 'samples': 14271488, 'steps': 27873, 'loss/train': 1.4544835090637207} +02/25/2022 13:42:05 - INFO - codeparrot_training - Step 27874: {'lr': 0.00021941357067414052, 'samples': 14272000, 'steps': 27874, 'loss/train': 1.8225550651550293} +02/25/2022 13:42:08 - INFO - codeparrot_training - Step 27875: {'lr': 0.00021939733120019599, 'samples': 14272512, 'steps': 27875, 'loss/train': 1.949411153793335} +02/25/2022 13:42:14 - INFO - codeparrot_training - Step 27876: {'lr': 0.0002193810918573435, 'samples': 14273024, 'steps': 27876, 'loss/train': 1.260650873184204} +02/25/2022 13:42:17 - INFO - codeparrot_training - Step 27877: {'lr': 0.00021936485264565275, 'samples': 14273536, 'steps': 27877, 'loss/train': 2.270949125289917} +02/25/2022 13:42:23 - INFO - codeparrot_training - Step 27878: {'lr': 0.00021934861356519335, 'samples': 14274048, 'steps': 27878, 'loss/train': 2.3504598140716553} +02/25/2022 13:42:26 - INFO - codeparrot_training - Step 27879: {'lr': 0.00021933237461603462, 'samples': 14274560, 'steps': 27879, 'loss/train': 0.7530683875083923} +02/25/2022 13:42:32 - INFO - codeparrot_training - Step 27880: {'lr': 0.00021931613579824626, 'samples': 14275072, 'steps': 27880, 'loss/train': 1.8609378337860107} +02/25/2022 13:42:35 - INFO - codeparrot_training - Step 27881: {'lr': 0.00021929989711189787, 'samples': 14275584, 'steps': 27881, 'loss/train': 1.4496419429779053} +02/25/2022 13:42:41 - INFO - codeparrot_training - Step 27882: {'lr': 0.00021928365855705906, 'samples': 14276096, 'steps': 27882, 'loss/train': 2.198673725128174} +02/25/2022 13:42:44 - INFO - codeparrot_training - Step 27883: {'lr': 0.00021926742013379918, 'samples': 14276608, 'steps': 27883, 'loss/train': 1.1382017135620117} +02/25/2022 13:42:50 - INFO - codeparrot_training - Step 27884: {'lr': 0.00021925118184218793, 'samples': 14277120, 'steps': 27884, 'loss/train': 1.882972240447998} +02/25/2022 13:42:54 - INFO - codeparrot_training - Step 27885: {'lr': 0.00021923494368229486, 'samples': 14277632, 'steps': 27885, 'loss/train': 1.4330956935882568} +02/25/2022 13:42:59 - INFO - codeparrot_training - Step 27886: {'lr': 0.00021921870565418962, 'samples': 14278144, 'steps': 27886, 'loss/train': 1.6983836889266968} +02/25/2022 13:43:03 - INFO - codeparrot_training - Step 27887: {'lr': 0.00021920246775794156, 'samples': 14278656, 'steps': 27887, 'loss/train': 1.799717903137207} +02/25/2022 13:43:08 - INFO - codeparrot_training - Step 27888: {'lr': 0.00021918622999362035, 'samples': 14279168, 'steps': 27888, 'loss/train': 1.973542332649231} +02/25/2022 13:43:12 - INFO - codeparrot_training - Step 27889: {'lr': 0.00021916999236129558, 'samples': 14279680, 'steps': 27889, 'loss/train': 2.15346622467041} +02/25/2022 13:43:17 - INFO - codeparrot_training - Step 27890: {'lr': 0.00021915375486103675, 'samples': 14280192, 'steps': 27890, 'loss/train': 1.8841772079467773} +02/25/2022 13:43:21 - INFO - codeparrot_training - Step 27891: {'lr': 0.00021913751749291346, 'samples': 14280704, 'steps': 27891, 'loss/train': 1.1203455924987793} +02/25/2022 13:43:26 - INFO - codeparrot_training - Step 27892: {'lr': 0.00021912128025699523, 'samples': 14281216, 'steps': 27892, 'loss/train': 0.9009745121002197} +02/25/2022 13:43:30 - INFO - codeparrot_training - Step 27893: {'lr': 0.00021910504315335167, 'samples': 14281728, 'steps': 27893, 'loss/train': 1.8594086170196533} +02/25/2022 13:43:36 - INFO - codeparrot_training - Step 27894: {'lr': 0.00021908880618205223, 'samples': 14282240, 'steps': 27894, 'loss/train': 1.881860613822937} +02/25/2022 13:43:39 - INFO - codeparrot_training - Step 27895: {'lr': 0.0002190725693431666, 'samples': 14282752, 'steps': 27895, 'loss/train': 1.39854896068573} +02/25/2022 13:43:45 - INFO - codeparrot_training - Step 27896: {'lr': 0.00021905633263676424, 'samples': 14283264, 'steps': 27896, 'loss/train': 1.7973921298980713} +02/25/2022 13:43:48 - INFO - codeparrot_training - Step 27897: {'lr': 0.0002190400960629147, 'samples': 14283776, 'steps': 27897, 'loss/train': 2.187462091445923} +02/25/2022 13:43:54 - INFO - codeparrot_training - Step 27898: {'lr': 0.00021902385962168752, 'samples': 14284288, 'steps': 27898, 'loss/train': 2.111696481704712} +02/25/2022 13:43:58 - INFO - codeparrot_training - Step 27899: {'lr': 0.00021900762331315238, 'samples': 14284800, 'steps': 27899, 'loss/train': 1.4585720300674438} +02/25/2022 13:44:03 - INFO - codeparrot_training - Step 27900: {'lr': 0.00021899138713737875, 'samples': 14285312, 'steps': 27900, 'loss/train': 2.317547082901001} +02/25/2022 13:44:07 - INFO - codeparrot_training - Step 27901: {'lr': 0.00021897515109443609, 'samples': 14285824, 'steps': 27901, 'loss/train': 0.6042247414588928} +02/25/2022 13:44:12 - INFO - codeparrot_training - Step 27902: {'lr': 0.00021895891518439403, 'samples': 14286336, 'steps': 27902, 'loss/train': 1.846108078956604} +02/25/2022 13:44:16 - INFO - codeparrot_training - Step 27903: {'lr': 0.00021894267940732211, 'samples': 14286848, 'steps': 27903, 'loss/train': 2.2347066402435303} +02/25/2022 13:44:22 - INFO - codeparrot_training - Step 27904: {'lr': 0.00021892644376329001, 'samples': 14287360, 'steps': 27904, 'loss/train': 1.7606449127197266} +02/25/2022 13:44:25 - INFO - codeparrot_training - Step 27905: {'lr': 0.00021891020825236707, 'samples': 14287872, 'steps': 27905, 'loss/train': 2.2213141918182373} +02/25/2022 13:44:31 - INFO - codeparrot_training - Step 27906: {'lr': 0.0002188939728746229, 'samples': 14288384, 'steps': 27906, 'loss/train': 2.346630811691284} +02/25/2022 13:44:34 - INFO - codeparrot_training - Step 27907: {'lr': 0.0002188777376301271, 'samples': 14288896, 'steps': 27907, 'loss/train': 1.9391924142837524} +02/25/2022 13:44:40 - INFO - codeparrot_training - Step 27908: {'lr': 0.00021886150251894927, 'samples': 14289408, 'steps': 27908, 'loss/train': 2.502089500427246} +02/25/2022 13:44:43 - INFO - codeparrot_training - Step 27909: {'lr': 0.00021884526754115878, 'samples': 14289920, 'steps': 27909, 'loss/train': 0.837447464466095} +02/25/2022 13:44:49 - INFO - codeparrot_training - Step 27910: {'lr': 0.00021882903269682526, 'samples': 14290432, 'steps': 27910, 'loss/train': 1.8735260963439941} +02/25/2022 13:44:52 - INFO - codeparrot_training - Step 27911: {'lr': 0.00021881279798601828, 'samples': 14290944, 'steps': 27911, 'loss/train': 1.3187624216079712} +02/25/2022 13:44:58 - INFO - codeparrot_training - Step 27912: {'lr': 0.00021879656340880734, 'samples': 14291456, 'steps': 27912, 'loss/train': 2.862642526626587} +02/25/2022 13:45:02 - INFO - codeparrot_training - Step 27913: {'lr': 0.00021878032896526216, 'samples': 14291968, 'steps': 27913, 'loss/train': 3.502828598022461} +02/25/2022 13:45:05 - INFO - codeparrot_training - Step 27914: {'lr': 0.00021876409465545202, 'samples': 14292480, 'steps': 27914, 'loss/train': 2.9667201042175293} +02/25/2022 13:45:10 - INFO - codeparrot_training - Step 27915: {'lr': 0.00021874786047944658, 'samples': 14292992, 'steps': 27915, 'loss/train': 1.2065900564193726} +02/25/2022 13:45:14 - INFO - codeparrot_training - Step 27916: {'lr': 0.00021873162643731535, 'samples': 14293504, 'steps': 27916, 'loss/train': 1.3866487741470337} +02/25/2022 13:45:19 - INFO - codeparrot_training - Step 27917: {'lr': 0.00021871539252912807, 'samples': 14294016, 'steps': 27917, 'loss/train': 2.808729410171509} +02/25/2022 13:45:23 - INFO - codeparrot_training - Step 27918: {'lr': 0.00021869915875495397, 'samples': 14294528, 'steps': 27918, 'loss/train': 1.9199239015579224} +02/25/2022 13:45:28 - INFO - codeparrot_training - Step 27919: {'lr': 0.00021868292511486274, 'samples': 14295040, 'steps': 27919, 'loss/train': 1.946401834487915} +02/25/2022 13:45:32 - INFO - codeparrot_training - Step 27920: {'lr': 0.00021866669160892392, 'samples': 14295552, 'steps': 27920, 'loss/train': 1.187985897064209} +02/25/2022 13:45:37 - INFO - codeparrot_training - Step 27921: {'lr': 0.00021865045823720713, 'samples': 14296064, 'steps': 27921, 'loss/train': 2.733861207962036} +02/25/2022 13:45:41 - INFO - codeparrot_training - Step 27922: {'lr': 0.00021863422499978174, 'samples': 14296576, 'steps': 27922, 'loss/train': 2.041826009750366} +02/25/2022 13:45:46 - INFO - codeparrot_training - Step 27923: {'lr': 0.00021861799189671737, 'samples': 14297088, 'steps': 27923, 'loss/train': 1.9650715589523315} +02/25/2022 13:45:49 - INFO - codeparrot_training - Dataset epoch: 1 +02/25/2022 13:45:54 - INFO - codeparrot_training - Step 27924: {'lr': 0.00021860175892808353, 'samples': 14297600, 'steps': 27924, 'loss/train': 1.7330572605133057} +02/25/2022 13:45:57 - INFO - codeparrot_training - Step 27925: {'lr': 0.00021858552609394983, 'samples': 14298112, 'steps': 27925, 'loss/train': 2.0157382488250732} +02/25/2022 13:46:01 - INFO - codeparrot_training - Step 27926: {'lr': 0.00021856929339438583, 'samples': 14298624, 'steps': 27926, 'loss/train': 2.117175579071045} +02/25/2022 13:46:06 - INFO - codeparrot_training - Step 27927: {'lr': 0.00021855306082946093, 'samples': 14299136, 'steps': 27927, 'loss/train': 1.6364167928695679} +02/25/2022 13:46:10 - INFO - codeparrot_training - Step 27928: {'lr': 0.00021853682839924468, 'samples': 14299648, 'steps': 27928, 'loss/train': 1.7808254957199097} +02/25/2022 13:46:16 - INFO - codeparrot_training - Step 27929: {'lr': 0.00021852059610380677, 'samples': 14300160, 'steps': 27929, 'loss/train': 1.8157947063446045} +02/25/2022 13:46:20 - INFO - codeparrot_training - Step 27930: {'lr': 0.00021850436394321658, 'samples': 14300672, 'steps': 27930, 'loss/train': 0.9372547268867493} +02/25/2022 13:46:25 - INFO - codeparrot_training - Step 27931: {'lr': 0.00021848813191754365, 'samples': 14301184, 'steps': 27931, 'loss/train': 1.892242670059204} +02/25/2022 13:46:29 - INFO - codeparrot_training - Step 27932: {'lr': 0.00021847190002685757, 'samples': 14301696, 'steps': 27932, 'loss/train': 2.2397842407226562} +02/25/2022 13:46:34 - INFO - codeparrot_training - Step 27933: {'lr': 0.00021845566827122793, 'samples': 14302208, 'steps': 27933, 'loss/train': 0.16233785450458527} +02/25/2022 13:46:38 - INFO - codeparrot_training - Step 27934: {'lr': 0.0002184394366507242, 'samples': 14302720, 'steps': 27934, 'loss/train': 2.0924746990203857} +02/25/2022 13:46:43 - INFO - codeparrot_training - Step 27935: {'lr': 0.0002184232051654158, 'samples': 14303232, 'steps': 27935, 'loss/train': 1.54048752784729} +02/25/2022 13:46:47 - INFO - codeparrot_training - Step 27936: {'lr': 0.00021840697381537245, 'samples': 14303744, 'steps': 27936, 'loss/train': 2.669825792312622} +02/25/2022 13:46:52 - INFO - codeparrot_training - Step 27937: {'lr': 0.0002183907426006635, 'samples': 14304256, 'steps': 27937, 'loss/train': 1.4830563068389893} +02/25/2022 13:46:56 - INFO - codeparrot_training - Step 27938: {'lr': 0.00021837451152135863, 'samples': 14304768, 'steps': 27938, 'loss/train': 1.3057787418365479} +02/25/2022 13:47:01 - INFO - codeparrot_training - Step 27939: {'lr': 0.00021835828057752732, 'samples': 14305280, 'steps': 27939, 'loss/train': 1.0629407167434692} +02/25/2022 13:47:05 - INFO - codeparrot_training - Step 27940: {'lr': 0.0002183420497692391, 'samples': 14305792, 'steps': 27940, 'loss/train': 2.2861998081207275} +02/25/2022 13:47:11 - INFO - codeparrot_training - Step 27941: {'lr': 0.00021832581909656342, 'samples': 14306304, 'steps': 27941, 'loss/train': 0.4250176250934601} +02/25/2022 13:47:14 - INFO - codeparrot_training - Step 27942: {'lr': 0.0002183095885595699, 'samples': 14306816, 'steps': 27942, 'loss/train': 1.5778084993362427} +02/25/2022 13:47:20 - INFO - codeparrot_training - Step 27943: {'lr': 0.00021829335815832813, 'samples': 14307328, 'steps': 27943, 'loss/train': 2.0959837436676025} +02/25/2022 13:47:23 - INFO - codeparrot_training - Step 27944: {'lr': 0.00021827712789290743, 'samples': 14307840, 'steps': 27944, 'loss/train': 1.300779938697815} +02/25/2022 13:47:29 - INFO - codeparrot_training - Step 27945: {'lr': 0.0002182608977633775, 'samples': 14308352, 'steps': 27945, 'loss/train': 1.7630836963653564} +02/25/2022 13:47:34 - INFO - codeparrot_training - Step 27946: {'lr': 0.00021824466776980772, 'samples': 14308864, 'steps': 27946, 'loss/train': 1.1993800401687622} +02/25/2022 13:47:38 - INFO - codeparrot_training - Step 27947: {'lr': 0.00021822843791226786, 'samples': 14309376, 'steps': 27947, 'loss/train': 0.8562624454498291} +02/25/2022 13:47:43 - INFO - codeparrot_training - Step 27948: {'lr': 0.00021821220819082714, 'samples': 14309888, 'steps': 27948, 'loss/train': 1.0514682531356812} +02/25/2022 13:47:47 - INFO - codeparrot_training - Step 27949: {'lr': 0.00021819597860555525, 'samples': 14310400, 'steps': 27949, 'loss/train': 1.1465142965316772} +02/25/2022 13:47:53 - INFO - codeparrot_training - Step 27950: {'lr': 0.00021817974915652172, 'samples': 14310912, 'steps': 27950, 'loss/train': 2.1919548511505127} +02/25/2022 13:47:56 - INFO - codeparrot_training - Step 27951: {'lr': 0.000218163519843796, 'samples': 14311424, 'steps': 27951, 'loss/train': 2.164722204208374} +02/25/2022 13:48:02 - INFO - codeparrot_training - Step 27952: {'lr': 0.00021814729066744776, 'samples': 14311936, 'steps': 27952, 'loss/train': 1.4317976236343384} +02/25/2022 13:48:05 - INFO - codeparrot_training - Step 27953: {'lr': 0.00021813106162754632, 'samples': 14312448, 'steps': 27953, 'loss/train': 2.4428582191467285} +02/25/2022 13:48:11 - INFO - codeparrot_training - Step 27954: {'lr': 0.00021811483272416127, 'samples': 14312960, 'steps': 27954, 'loss/train': 2.5472686290740967} +02/25/2022 13:48:15 - INFO - codeparrot_training - Step 27955: {'lr': 0.00021809860395736216, 'samples': 14313472, 'steps': 27955, 'loss/train': 2.027923107147217} +02/25/2022 13:48:20 - INFO - codeparrot_training - Step 27956: {'lr': 0.00021808237532721864, 'samples': 14313984, 'steps': 27956, 'loss/train': 1.3858634233474731} +02/25/2022 13:48:24 - INFO - codeparrot_training - Step 27957: {'lr': 0.00021806614683379994, 'samples': 14314496, 'steps': 27957, 'loss/train': 2.2144172191619873} +02/25/2022 13:48:29 - INFO - codeparrot_training - Step 27958: {'lr': 0.00021804991847717577, 'samples': 14315008, 'steps': 27958, 'loss/train': 1.7784770727157593} +02/25/2022 13:48:33 - INFO - codeparrot_training - Step 27959: {'lr': 0.00021803369025741556, 'samples': 14315520, 'steps': 27959, 'loss/train': 1.1121454238891602} +02/25/2022 13:48:39 - INFO - codeparrot_training - Step 27960: {'lr': 0.0002180174621745889, 'samples': 14316032, 'steps': 27960, 'loss/train': 2.161804437637329} +02/25/2022 13:48:42 - INFO - codeparrot_training - Step 27961: {'lr': 0.00021800123422876537, 'samples': 14316544, 'steps': 27961, 'loss/train': 2.4507758617401123} +02/25/2022 13:48:48 - INFO - codeparrot_training - Step 27962: {'lr': 0.00021798500642001428, 'samples': 14317056, 'steps': 27962, 'loss/train': 1.5650053024291992} +02/25/2022 13:48:51 - INFO - codeparrot_training - Step 27963: {'lr': 0.00021796877874840525, 'samples': 14317568, 'steps': 27963, 'loss/train': 1.986311912536621} +02/25/2022 13:48:57 - INFO - codeparrot_training - Step 27964: {'lr': 0.0002179525512140078, 'samples': 14318080, 'steps': 27964, 'loss/train': 0.8944240808486938} +02/25/2022 13:49:00 - INFO - codeparrot_training - Step 27965: {'lr': 0.0002179363238168916, 'samples': 14318592, 'steps': 27965, 'loss/train': 1.7376790046691895} +02/25/2022 13:49:06 - INFO - codeparrot_training - Step 27966: {'lr': 0.00021792009655712585, 'samples': 14319104, 'steps': 27966, 'loss/train': 1.2342053651809692} +02/25/2022 13:49:09 - INFO - codeparrot_training - Step 27967: {'lr': 0.00021790386943478025, 'samples': 14319616, 'steps': 27967, 'loss/train': 2.6067380905151367} +02/25/2022 13:49:15 - INFO - codeparrot_training - Step 27968: {'lr': 0.00021788764244992426, 'samples': 14320128, 'steps': 27968, 'loss/train': 2.0745351314544678} +02/25/2022 13:49:18 - INFO - codeparrot_training - Step 27969: {'lr': 0.00021787141560262752, 'samples': 14320640, 'steps': 27969, 'loss/train': 1.684960126876831} +02/25/2022 13:49:24 - INFO - codeparrot_training - Step 27970: {'lr': 0.00021785518889295936, 'samples': 14321152, 'steps': 27970, 'loss/train': 0.7392134070396423} +02/25/2022 13:49:27 - INFO - codeparrot_training - Step 27971: {'lr': 0.00021783896232098932, 'samples': 14321664, 'steps': 27971, 'loss/train': 0.48542502522468567} +02/25/2022 13:49:33 - INFO - codeparrot_training - Step 27972: {'lr': 0.00021782273588678697, 'samples': 14322176, 'steps': 27972, 'loss/train': 1.3457531929016113} +02/25/2022 13:49:37 - INFO - codeparrot_training - Step 27973: {'lr': 0.00021780650959042186, 'samples': 14322688, 'steps': 27973, 'loss/train': 1.9171829223632812} +02/25/2022 13:49:42 - INFO - codeparrot_training - Step 27974: {'lr': 0.00021779028343196343, 'samples': 14323200, 'steps': 27974, 'loss/train': 1.6579058170318604} +02/25/2022 13:49:46 - INFO - codeparrot_training - Step 27975: {'lr': 0.00021777405741148115, 'samples': 14323712, 'steps': 27975, 'loss/train': 0.8871021270751953} +02/25/2022 13:49:51 - INFO - codeparrot_training - Step 27976: {'lr': 0.00021775783152904463, 'samples': 14324224, 'steps': 27976, 'loss/train': 2.3760933876037598} +02/25/2022 13:49:55 - INFO - codeparrot_training - Step 27977: {'lr': 0.00021774160578472328, 'samples': 14324736, 'steps': 27977, 'loss/train': 1.3642617464065552} +02/25/2022 13:50:00 - INFO - codeparrot_training - Step 27978: {'lr': 0.00021772538017858668, 'samples': 14325248, 'steps': 27978, 'loss/train': 1.0976110696792603} +02/25/2022 13:50:04 - INFO - codeparrot_training - Step 27979: {'lr': 0.00021770915471070428, 'samples': 14325760, 'steps': 27979, 'loss/train': 1.7964541912078857} +02/25/2022 13:50:09 - INFO - codeparrot_training - Step 27980: {'lr': 0.00021769292938114563, 'samples': 14326272, 'steps': 27980, 'loss/train': 2.5346992015838623} +02/25/2022 13:50:13 - INFO - codeparrot_training - Step 27981: {'lr': 0.00021767670418998015, 'samples': 14326784, 'steps': 27981, 'loss/train': 2.2278366088867188} +02/25/2022 13:50:18 - INFO - codeparrot_training - Step 27982: {'lr': 0.0002176604791372775, 'samples': 14327296, 'steps': 27982, 'loss/train': 1.601348876953125} +02/25/2022 13:50:22 - INFO - codeparrot_training - Step 27983: {'lr': 0.00021764425422310705, 'samples': 14327808, 'steps': 27983, 'loss/train': 1.6496527194976807} +02/25/2022 13:50:27 - INFO - codeparrot_training - Step 27984: {'lr': 0.00021762802944753828, 'samples': 14328320, 'steps': 27984, 'loss/train': 1.5080457925796509} +02/25/2022 13:50:31 - INFO - codeparrot_training - Step 27985: {'lr': 0.0002176118048106408, 'samples': 14328832, 'steps': 27985, 'loss/train': 1.2395840883255005} +02/25/2022 13:50:37 - INFO - codeparrot_training - Step 27986: {'lr': 0.00021759558031248403, 'samples': 14329344, 'steps': 27986, 'loss/train': 0.9466220140457153} +02/25/2022 13:50:43 - INFO - codeparrot_training - Step 27987: {'lr': 0.00021757935595313762, 'samples': 14329856, 'steps': 27987, 'loss/train': 1.5399867296218872} +02/25/2022 13:50:46 - INFO - codeparrot_training - Step 27988: {'lr': 0.00021756313173267085, 'samples': 14330368, 'steps': 27988, 'loss/train': 2.2046821117401123} +02/25/2022 13:50:52 - INFO - codeparrot_training - Step 27989: {'lr': 0.00021754690765115331, 'samples': 14330880, 'steps': 27989, 'loss/train': 2.1169803142547607} +02/25/2022 13:50:55 - INFO - codeparrot_training - Step 27990: {'lr': 0.00021753068370865454, 'samples': 14331392, 'steps': 27990, 'loss/train': 1.42330002784729} +02/25/2022 13:51:01 - INFO - codeparrot_training - Step 27991: {'lr': 0.0002175144599052441, 'samples': 14331904, 'steps': 27991, 'loss/train': 1.930044174194336} +02/25/2022 13:51:04 - INFO - codeparrot_training - Step 27992: {'lr': 0.0002174982362409913, 'samples': 14332416, 'steps': 27992, 'loss/train': 1.9812712669372559} +02/25/2022 13:51:10 - INFO - codeparrot_training - Step 27993: {'lr': 0.00021748201271596575, 'samples': 14332928, 'steps': 27993, 'loss/train': 1.6960402727127075} +02/25/2022 13:51:13 - INFO - codeparrot_training - Step 27994: {'lr': 0.00021746578933023688, 'samples': 14333440, 'steps': 27994, 'loss/train': 1.3897360563278198} +02/25/2022 13:51:19 - INFO - codeparrot_training - Step 27995: {'lr': 0.0002174495660838744, 'samples': 14333952, 'steps': 27995, 'loss/train': 1.360162615776062} +02/25/2022 13:51:23 - INFO - codeparrot_training - Step 27996: {'lr': 0.0002174333429769475, 'samples': 14334464, 'steps': 27996, 'loss/train': 2.202347993850708} +02/25/2022 13:51:28 - INFO - codeparrot_training - Step 27997: {'lr': 0.00021741712000952583, 'samples': 14334976, 'steps': 27997, 'loss/train': 1.8725557327270508} +02/25/2022 13:51:32 - INFO - codeparrot_training - Step 27998: {'lr': 0.00021740089718167886, 'samples': 14335488, 'steps': 27998, 'loss/train': 1.9601777791976929} +02/25/2022 13:51:37 - INFO - codeparrot_training - Step 27999: {'lr': 0.0002173846744934761, 'samples': 14336000, 'steps': 27999, 'loss/train': 1.8428465127944946} +02/25/2022 13:51:37 - INFO - codeparrot_training - Evaluating and saving model checkpoint