diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -11367,3 +11367,1009 @@ Use FP16 precision: False 02/24/2022 15:47:48 - INFO - codeparrot_training - Step 10998: {'lr': 0.000457885582288069, 'samples': 5631488, 'steps': 10998, 'loss/train': 1.8267676830291748} 02/24/2022 15:47:54 - INFO - codeparrot_training - Step 10999: {'lr': 0.0004578764931270911, 'samples': 5632000, 'steps': 10999, 'loss/train': 1.6772180795669556} 02/24/2022 15:47:54 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/24/2022 15:48:11 - WARNING - huggingface_hub.repository - Several commits (11) will be pushed upstream. +02/24/2022 15:48:11 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/24/2022 15:48:44 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + c2659cd..3d9f7da floral-grass-11 -> floral-grass-11 + +02/24/2022 15:48:49 - INFO - codeparrot_training - Step 11000: {'lr': 0.00045786740307563633, 'samples': 5632512, 'steps': 11000, 'loss/train': 2.46366024017334} +02/24/2022 15:48:54 - INFO - codeparrot_training - Step 11001: {'lr': 0.0004578583121337436, 'samples': 5633024, 'steps': 11001, 'loss/train': 2.297736644744873} +02/24/2022 15:48:58 - INFO - codeparrot_training - Step 11002: {'lr': 0.0004578492203014518, 'samples': 5633536, 'steps': 11002, 'loss/train': 2.6836161613464355} +02/24/2022 15:49:03 - INFO - codeparrot_training - Step 11003: {'lr': 0.00045784012757880006, 'samples': 5634048, 'steps': 11003, 'loss/train': 3.5049614906311035} +02/24/2022 15:49:07 - INFO - codeparrot_training - Step 11004: {'lr': 0.00045783103396582713, 'samples': 5634560, 'steps': 11004, 'loss/train': 2.30288028717041} +02/24/2022 15:49:13 - INFO - codeparrot_training - Step 11005: {'lr': 0.0004578219394625721, 'samples': 5635072, 'steps': 11005, 'loss/train': 2.979309558868408} +02/24/2022 15:49:16 - INFO - codeparrot_training - Step 11006: {'lr': 0.0004578128440690738, 'samples': 5635584, 'steps': 11006, 'loss/train': 1.7592554092407227} +02/24/2022 15:49:22 - INFO - codeparrot_training - Step 11007: {'lr': 0.00045780374778537134, 'samples': 5636096, 'steps': 11007, 'loss/train': 1.4693125486373901} +02/24/2022 15:49:26 - INFO - codeparrot_training - Step 11008: {'lr': 0.00045779465061150356, 'samples': 5636608, 'steps': 11008, 'loss/train': 1.9468352794647217} +02/24/2022 15:49:31 - INFO - codeparrot_training - Step 11009: {'lr': 0.0004577855525475095, 'samples': 5637120, 'steps': 11009, 'loss/train': 2.276190996170044} +02/24/2022 15:49:35 - INFO - codeparrot_training - Step 11010: {'lr': 0.0004577764535934281, 'samples': 5637632, 'steps': 11010, 'loss/train': 1.9736546277999878} +02/24/2022 15:49:40 - INFO - codeparrot_training - Step 11011: {'lr': 0.00045776735374929834, 'samples': 5638144, 'steps': 11011, 'loss/train': 2.3329107761383057} +02/24/2022 15:49:44 - INFO - codeparrot_training - Step 11012: {'lr': 0.00045775825301515923, 'samples': 5638656, 'steps': 11012, 'loss/train': 1.7059834003448486} +02/24/2022 15:49:50 - INFO - codeparrot_training - Step 11013: {'lr': 0.00045774915139104973, 'samples': 5639168, 'steps': 11013, 'loss/train': 1.292406678199768} +02/24/2022 15:49:54 - INFO - codeparrot_training - Step 11014: {'lr': 0.0004577400488770088, 'samples': 5639680, 'steps': 11014, 'loss/train': 1.446625828742981} +02/24/2022 15:49:57 - INFO - codeparrot_training - Step 11015: {'lr': 0.0004577309454730755, 'samples': 5640192, 'steps': 11015, 'loss/train': 3.102872371673584} +02/24/2022 15:50:02 - INFO - codeparrot_training - Step 11016: {'lr': 0.00045772184117928884, 'samples': 5640704, 'steps': 11016, 'loss/train': 2.154526710510254} +02/24/2022 15:50:06 - INFO - codeparrot_training - Step 11017: {'lr': 0.00045771273599568767, 'samples': 5641216, 'steps': 11017, 'loss/train': 2.4339072704315186} +02/24/2022 15:50:11 - INFO - codeparrot_training - Step 11018: {'lr': 0.0004577036299223112, 'samples': 5641728, 'steps': 11018, 'loss/train': 2.4836628437042236} +02/24/2022 15:50:15 - INFO - codeparrot_training - Step 11019: {'lr': 0.0004576945229591982, 'samples': 5642240, 'steps': 11019, 'loss/train': 2.18385648727417} +02/24/2022 15:50:21 - INFO - codeparrot_training - Step 11020: {'lr': 0.0004576854151063879, 'samples': 5642752, 'steps': 11020, 'loss/train': 1.7703357934951782} +02/24/2022 15:50:25 - INFO - codeparrot_training - Step 11021: {'lr': 0.0004576763063639192, 'samples': 5643264, 'steps': 11021, 'loss/train': 1.8063669204711914} +02/24/2022 15:50:30 - INFO - codeparrot_training - Step 11022: {'lr': 0.0004576671967318312, 'samples': 5643776, 'steps': 11022, 'loss/train': 1.3862773180007935} +02/24/2022 15:50:33 - INFO - codeparrot_training - Step 11023: {'lr': 0.0004576580862101628, 'samples': 5644288, 'steps': 11023, 'loss/train': 1.5508840084075928} +02/24/2022 15:50:39 - INFO - codeparrot_training - Step 11024: {'lr': 0.00045764897479895315, 'samples': 5644800, 'steps': 11024, 'loss/train': 2.3896539211273193} +02/24/2022 15:50:43 - INFO - codeparrot_training - Step 11025: {'lr': 0.00045763986249824126, 'samples': 5645312, 'steps': 11025, 'loss/train': 1.9844008684158325} +02/24/2022 15:50:48 - INFO - codeparrot_training - Step 11026: {'lr': 0.00045763074930806606, 'samples': 5645824, 'steps': 11026, 'loss/train': 1.9455288648605347} +02/24/2022 15:50:52 - INFO - codeparrot_training - Step 11027: {'lr': 0.0004576216352284667, 'samples': 5646336, 'steps': 11027, 'loss/train': 1.5345526933670044} +02/24/2022 15:50:57 - INFO - codeparrot_training - Step 11028: {'lr': 0.0004576125202594822, 'samples': 5646848, 'steps': 11028, 'loss/train': 4.6256422996521} +02/24/2022 15:51:01 - INFO - codeparrot_training - Step 11029: {'lr': 0.0004576034044011515, 'samples': 5647360, 'steps': 11029, 'loss/train': 1.9145762920379639} +02/24/2022 15:51:06 - INFO - codeparrot_training - Step 11030: {'lr': 0.00045759428765351377, 'samples': 5647872, 'steps': 11030, 'loss/train': 2.9236321449279785} +02/24/2022 15:51:10 - INFO - codeparrot_training - Step 11031: {'lr': 0.0004575851700166081, 'samples': 5648384, 'steps': 11031, 'loss/train': 3.1974756717681885} +02/24/2022 15:51:15 - INFO - codeparrot_training - Step 11032: {'lr': 0.0004575760514904734, 'samples': 5648896, 'steps': 11032, 'loss/train': 2.6109697818756104} +02/24/2022 15:51:19 - INFO - codeparrot_training - Step 11033: {'lr': 0.0004575669320751489, 'samples': 5649408, 'steps': 11033, 'loss/train': 1.1202114820480347} +02/24/2022 15:51:25 - INFO - codeparrot_training - Step 11034: {'lr': 0.00045755781177067345, 'samples': 5649920, 'steps': 11034, 'loss/train': 2.89687180519104} +02/24/2022 15:51:29 - INFO - codeparrot_training - Step 11035: {'lr': 0.00045754869057708635, 'samples': 5650432, 'steps': 11035, 'loss/train': 2.5690252780914307} +02/24/2022 15:51:34 - INFO - codeparrot_training - Step 11036: {'lr': 0.00045753956849442647, 'samples': 5650944, 'steps': 11036, 'loss/train': 2.1626386642456055} +02/24/2022 15:51:38 - INFO - codeparrot_training - Step 11037: {'lr': 0.00045753044552273306, 'samples': 5651456, 'steps': 11037, 'loss/train': 1.834442377090454} +02/24/2022 15:51:43 - INFO - codeparrot_training - Step 11038: {'lr': 0.0004575213216620451, 'samples': 5651968, 'steps': 11038, 'loss/train': 1.7041386365890503} +02/24/2022 15:51:47 - INFO - codeparrot_training - Step 11039: {'lr': 0.0004575121969124016, 'samples': 5652480, 'steps': 11039, 'loss/train': 2.4016833305358887} +02/24/2022 15:51:52 - INFO - codeparrot_training - Step 11040: {'lr': 0.00045750307127384186, 'samples': 5652992, 'steps': 11040, 'loss/train': 2.4795610904693604} +02/24/2022 15:51:56 - INFO - codeparrot_training - Step 11041: {'lr': 0.0004574939447464048, 'samples': 5653504, 'steps': 11041, 'loss/train': 2.1513404846191406} +02/24/2022 15:52:01 - INFO - codeparrot_training - Step 11042: {'lr': 0.0004574848173301296, 'samples': 5654016, 'steps': 11042, 'loss/train': 2.031777858734131} +02/24/2022 15:52:05 - INFO - codeparrot_training - Step 11043: {'lr': 0.0004574756890250553, 'samples': 5654528, 'steps': 11043, 'loss/train': 1.6828848123550415} +02/24/2022 15:52:11 - INFO - codeparrot_training - Step 11044: {'lr': 0.00045746655983122105, 'samples': 5655040, 'steps': 11044, 'loss/train': 2.246591567993164} +02/24/2022 15:52:14 - INFO - codeparrot_training - Step 11045: {'lr': 0.0004574574297486659, 'samples': 5655552, 'steps': 11045, 'loss/train': 3.035916805267334} +02/24/2022 15:52:20 - INFO - codeparrot_training - Step 11046: {'lr': 0.00045744829877742907, 'samples': 5656064, 'steps': 11046, 'loss/train': 1.0365140438079834} +02/24/2022 15:52:23 - INFO - codeparrot_training - Step 11047: {'lr': 0.0004574391669175495, 'samples': 5656576, 'steps': 11047, 'loss/train': 1.1585502624511719} +02/24/2022 15:52:29 - INFO - codeparrot_training - Step 11048: {'lr': 0.0004574300341690665, 'samples': 5657088, 'steps': 11048, 'loss/train': 0.404594749212265} +02/24/2022 15:52:33 - INFO - codeparrot_training - Step 11049: {'lr': 0.000457420900532019, 'samples': 5657600, 'steps': 11049, 'loss/train': 2.6901657581329346} +02/24/2022 15:52:38 - INFO - codeparrot_training - Step 11050: {'lr': 0.0004574117660064463, 'samples': 5658112, 'steps': 11050, 'loss/train': 1.9391030073165894} +02/24/2022 15:52:41 - INFO - codeparrot_training - Step 11051: {'lr': 0.0004574026305923875, 'samples': 5658624, 'steps': 11051, 'loss/train': 1.7978076934814453} +02/24/2022 15:52:47 - INFO - codeparrot_training - Step 11052: {'lr': 0.0004573934942898816, 'samples': 5659136, 'steps': 11052, 'loss/train': 1.687556505203247} +02/24/2022 15:52:50 - INFO - codeparrot_training - Step 11053: {'lr': 0.0004573843570989679, 'samples': 5659648, 'steps': 11053, 'loss/train': 2.8811450004577637} +02/24/2022 15:52:56 - INFO - codeparrot_training - Step 11054: {'lr': 0.00045737521901968535, 'samples': 5660160, 'steps': 11054, 'loss/train': 3.078970432281494} +02/24/2022 15:53:00 - INFO - codeparrot_training - Step 11055: {'lr': 0.00045736608005207327, 'samples': 5660672, 'steps': 11055, 'loss/train': 2.828577756881714} +02/24/2022 15:53:06 - INFO - codeparrot_training - Step 11056: {'lr': 0.0004573569401961708, 'samples': 5661184, 'steps': 11056, 'loss/train': 2.040673017501831} +02/24/2022 15:53:10 - INFO - codeparrot_training - Step 11057: {'lr': 0.000457347799452017, 'samples': 5661696, 'steps': 11057, 'loss/train': 1.9416203498840332} +02/24/2022 15:53:16 - INFO - codeparrot_training - Step 11058: {'lr': 0.000457338657819651, 'samples': 5662208, 'steps': 11058, 'loss/train': 2.4838335514068604} +02/24/2022 15:53:19 - INFO - codeparrot_training - Step 11059: {'lr': 0.00045732951529911216, 'samples': 5662720, 'steps': 11059, 'loss/train': 2.3747220039367676} +02/24/2022 15:53:25 - INFO - codeparrot_training - Step 11060: {'lr': 0.0004573203718904394, 'samples': 5663232, 'steps': 11060, 'loss/train': 2.607297658920288} +02/24/2022 15:53:28 - INFO - codeparrot_training - Step 11061: {'lr': 0.00045731122759367206, 'samples': 5663744, 'steps': 11061, 'loss/train': 3.3181095123291016} +02/24/2022 15:53:34 - INFO - codeparrot_training - Step 11062: {'lr': 0.00045730208240884926, 'samples': 5664256, 'steps': 11062, 'loss/train': 2.0406525135040283} +02/24/2022 15:53:37 - INFO - codeparrot_training - Step 11063: {'lr': 0.0004572929363360101, 'samples': 5664768, 'steps': 11063, 'loss/train': 2.2629809379577637} +02/24/2022 15:53:43 - INFO - codeparrot_training - Step 11064: {'lr': 0.0004572837893751939, 'samples': 5665280, 'steps': 11064, 'loss/train': 3.5304272174835205} +02/24/2022 15:53:46 - INFO - codeparrot_training - Step 11065: {'lr': 0.0004572746415264397, 'samples': 5665792, 'steps': 11065, 'loss/train': 1.5774226188659668} +02/24/2022 15:53:53 - INFO - codeparrot_training - Step 11066: {'lr': 0.0004572654927897868, 'samples': 5666304, 'steps': 11066, 'loss/train': 1.3631993532180786} +02/24/2022 15:53:56 - INFO - codeparrot_training - Step 11067: {'lr': 0.0004572563431652743, 'samples': 5666816, 'steps': 11067, 'loss/train': 2.773375988006592} +02/24/2022 15:54:02 - INFO - codeparrot_training - Step 11068: {'lr': 0.00045724719265294143, 'samples': 5667328, 'steps': 11068, 'loss/train': 2.2670059204101562} +02/24/2022 15:54:05 - INFO - codeparrot_training - Step 11069: {'lr': 0.00045723804125282744, 'samples': 5667840, 'steps': 11069, 'loss/train': 2.206634759902954} +02/24/2022 15:54:11 - INFO - codeparrot_training - Step 11070: {'lr': 0.0004572288889649715, 'samples': 5668352, 'steps': 11070, 'loss/train': 2.9405438899993896} +02/24/2022 15:54:14 - INFO - codeparrot_training - Step 11071: {'lr': 0.00045721973578941277, 'samples': 5668864, 'steps': 11071, 'loss/train': 2.217150926589966} +02/24/2022 15:54:20 - INFO - codeparrot_training - Step 11072: {'lr': 0.00045721058172619043, 'samples': 5669376, 'steps': 11072, 'loss/train': 2.799722671508789} +02/24/2022 15:54:23 - INFO - codeparrot_training - Step 11073: {'lr': 0.00045720142677534387, 'samples': 5669888, 'steps': 11073, 'loss/train': 1.9608020782470703} +02/24/2022 15:54:29 - INFO - codeparrot_training - Step 11074: {'lr': 0.00045719227093691216, 'samples': 5670400, 'steps': 11074, 'loss/train': 2.630922317504883} +02/24/2022 15:54:32 - INFO - codeparrot_training - Step 11075: {'lr': 0.0004571831142109345, 'samples': 5670912, 'steps': 11075, 'loss/train': 2.085696220397949} +02/24/2022 15:54:38 - INFO - codeparrot_training - Step 11076: {'lr': 0.0004571739565974502, 'samples': 5671424, 'steps': 11076, 'loss/train': 2.1132121086120605} +02/24/2022 15:54:41 - INFO - codeparrot_training - Step 11077: {'lr': 0.0004571647980964985, 'samples': 5671936, 'steps': 11077, 'loss/train': 3.8341281414031982} +02/24/2022 15:54:47 - INFO - codeparrot_training - Step 11078: {'lr': 0.0004571556387081185, 'samples': 5672448, 'steps': 11078, 'loss/train': 1.9625836610794067} +02/24/2022 15:54:50 - INFO - codeparrot_training - Step 11079: {'lr': 0.0004571464784323496, 'samples': 5672960, 'steps': 11079, 'loss/train': 2.8605496883392334} +02/24/2022 15:54:56 - INFO - codeparrot_training - Step 11080: {'lr': 0.0004571373172692309, 'samples': 5673472, 'steps': 11080, 'loss/train': 2.9483017921447754} +02/24/2022 15:55:00 - INFO - codeparrot_training - Step 11081: {'lr': 0.0004571281552188018, 'samples': 5673984, 'steps': 11081, 'loss/train': 1.8165431022644043} +02/24/2022 15:55:06 - INFO - codeparrot_training - Step 11082: {'lr': 0.0004571189922811013, 'samples': 5674496, 'steps': 11082, 'loss/train': 1.9531326293945312} +02/24/2022 15:55:09 - INFO - codeparrot_training - Step 11083: {'lr': 0.00045710982845616893, 'samples': 5675008, 'steps': 11083, 'loss/train': 2.7474465370178223} +02/24/2022 15:55:15 - INFO - codeparrot_training - Step 11084: {'lr': 0.0004571006637440438, 'samples': 5675520, 'steps': 11084, 'loss/train': 3.15207576751709} +02/24/2022 15:55:18 - INFO - codeparrot_training - Step 11085: {'lr': 0.00045709149814476515, 'samples': 5676032, 'steps': 11085, 'loss/train': 2.6176068782806396} +02/24/2022 15:55:24 - INFO - codeparrot_training - Step 11086: {'lr': 0.0004570823316583723, 'samples': 5676544, 'steps': 11086, 'loss/train': 1.690643548965454} +02/24/2022 15:55:27 - INFO - codeparrot_training - Step 11087: {'lr': 0.00045707316428490453, 'samples': 5677056, 'steps': 11087, 'loss/train': 1.8370248079299927} +02/24/2022 15:55:33 - INFO - codeparrot_training - Step 11088: {'lr': 0.0004570639960244011, 'samples': 5677568, 'steps': 11088, 'loss/train': 2.0845835208892822} +02/24/2022 15:55:36 - INFO - codeparrot_training - Step 11089: {'lr': 0.00045705482687690113, 'samples': 5678080, 'steps': 11089, 'loss/train': 2.016822099685669} +02/24/2022 15:55:42 - INFO - codeparrot_training - Step 11090: {'lr': 0.00045704565684244415, 'samples': 5678592, 'steps': 11090, 'loss/train': 1.3957535028457642} +02/24/2022 15:55:45 - INFO - codeparrot_training - Step 11091: {'lr': 0.0004570364859210693, 'samples': 5679104, 'steps': 11091, 'loss/train': 0.9114518761634827} +02/24/2022 15:55:51 - INFO - codeparrot_training - Step 11092: {'lr': 0.0004570273141128158, 'samples': 5679616, 'steps': 11092, 'loss/train': 1.5152301788330078} +02/24/2022 15:55:55 - INFO - codeparrot_training - Step 11093: {'lr': 0.00045701814141772313, 'samples': 5680128, 'steps': 11093, 'loss/train': 1.3387616872787476} +02/24/2022 15:56:00 - INFO - codeparrot_training - Step 11094: {'lr': 0.0004570089678358305, 'samples': 5680640, 'steps': 11094, 'loss/train': 1.7889165878295898} +02/24/2022 15:56:04 - INFO - codeparrot_training - Step 11095: {'lr': 0.000456999793367177, 'samples': 5681152, 'steps': 11095, 'loss/train': 2.937065601348877} +02/24/2022 15:56:09 - INFO - codeparrot_training - Step 11096: {'lr': 0.0004569906180118023, 'samples': 5681664, 'steps': 11096, 'loss/train': 2.175995111465454} +02/24/2022 15:56:13 - INFO - codeparrot_training - Step 11097: {'lr': 0.0004569814417697454, 'samples': 5682176, 'steps': 11097, 'loss/train': 1.1215555667877197} +02/24/2022 15:56:18 - INFO - codeparrot_training - Step 11098: {'lr': 0.0004569722646410458, 'samples': 5682688, 'steps': 11098, 'loss/train': 1.065351128578186} +02/24/2022 15:56:22 - INFO - codeparrot_training - Step 11099: {'lr': 0.0004569630866257428, 'samples': 5683200, 'steps': 11099, 'loss/train': 0.2542405128479004} +02/24/2022 15:56:28 - INFO - codeparrot_training - Step 11100: {'lr': 0.00045695390772387557, 'samples': 5683712, 'steps': 11100, 'loss/train': 1.764005184173584} +02/24/2022 15:56:31 - INFO - codeparrot_training - Step 11101: {'lr': 0.00045694472793548346, 'samples': 5684224, 'steps': 11101, 'loss/train': 2.918515682220459} +02/24/2022 15:56:37 - INFO - codeparrot_training - Step 11102: {'lr': 0.0004569355472606059, 'samples': 5684736, 'steps': 11102, 'loss/train': 2.4069817066192627} +02/24/2022 15:56:41 - INFO - codeparrot_training - Step 11103: {'lr': 0.0004569263656992822, 'samples': 5685248, 'steps': 11103, 'loss/train': 2.837353229522705} +02/24/2022 15:56:46 - INFO - codeparrot_training - Step 11104: {'lr': 0.0004569171832515516, 'samples': 5685760, 'steps': 11104, 'loss/train': 2.0978844165802} +02/24/2022 15:56:50 - INFO - codeparrot_training - Step 11105: {'lr': 0.0004569079999174536, 'samples': 5686272, 'steps': 11105, 'loss/train': 3.774681568145752} +02/24/2022 15:56:55 - INFO - codeparrot_training - Step 11106: {'lr': 0.0004568988156970273, 'samples': 5686784, 'steps': 11106, 'loss/train': 2.0071756839752197} +02/24/2022 15:56:59 - INFO - codeparrot_training - Step 11107: {'lr': 0.00045688963059031226, 'samples': 5687296, 'steps': 11107, 'loss/train': 2.8168768882751465} +02/24/2022 15:57:04 - INFO - codeparrot_training - Step 11108: {'lr': 0.00045688044459734766, 'samples': 5687808, 'steps': 11108, 'loss/train': 1.557365894317627} +02/24/2022 15:57:08 - INFO - codeparrot_training - Step 11109: {'lr': 0.00045687125771817294, 'samples': 5688320, 'steps': 11109, 'loss/train': 1.7244561910629272} +02/24/2022 15:57:13 - INFO - codeparrot_training - Step 11110: {'lr': 0.00045686206995282754, 'samples': 5688832, 'steps': 11110, 'loss/train': 2.27374529838562} +02/24/2022 15:57:19 - INFO - codeparrot_training - Step 11111: {'lr': 0.00045685288130135063, 'samples': 5689344, 'steps': 11111, 'loss/train': 2.531306743621826} +02/24/2022 15:57:23 - INFO - codeparrot_training - Step 11112: {'lr': 0.00045684369176378164, 'samples': 5689856, 'steps': 11112, 'loss/train': 0.8942400217056274} +02/24/2022 15:57:28 - INFO - codeparrot_training - Step 11113: {'lr': 0.00045683450134016, 'samples': 5690368, 'steps': 11113, 'loss/train': 1.580580472946167} +02/24/2022 15:57:32 - INFO - codeparrot_training - Step 11114: {'lr': 0.0004568253100305251, 'samples': 5690880, 'steps': 11114, 'loss/train': 2.280998706817627} +02/24/2022 15:57:37 - INFO - codeparrot_training - Step 11115: {'lr': 0.0004568161178349161, 'samples': 5691392, 'steps': 11115, 'loss/train': 1.9796950817108154} +02/24/2022 15:57:41 - INFO - codeparrot_training - Step 11116: {'lr': 0.0004568069247533726, 'samples': 5691904, 'steps': 11116, 'loss/train': 1.0214215517044067} +02/24/2022 15:57:46 - INFO - codeparrot_training - Step 11117: {'lr': 0.0004567977307859339, 'samples': 5692416, 'steps': 11117, 'loss/train': 2.094801664352417} +02/24/2022 15:57:50 - INFO - codeparrot_training - Step 11118: {'lr': 0.0004567885359326394, 'samples': 5692928, 'steps': 11118, 'loss/train': 1.8895941972732544} +02/24/2022 15:57:55 - INFO - codeparrot_training - Step 11119: {'lr': 0.00045677934019352844, 'samples': 5693440, 'steps': 11119, 'loss/train': 2.691798210144043} +02/24/2022 15:57:59 - INFO - codeparrot_training - Step 11120: {'lr': 0.00045677014356864043, 'samples': 5693952, 'steps': 11120, 'loss/train': 1.1120326519012451} +02/24/2022 15:58:04 - INFO - codeparrot_training - Step 11121: {'lr': 0.00045676094605801487, 'samples': 5694464, 'steps': 11121, 'loss/train': 1.6420798301696777} +02/24/2022 15:58:08 - INFO - codeparrot_training - Step 11122: {'lr': 0.00045675174766169105, 'samples': 5694976, 'steps': 11122, 'loss/train': 2.191995143890381} +02/24/2022 15:58:14 - INFO - codeparrot_training - Step 11123: {'lr': 0.0004567425483797083, 'samples': 5695488, 'steps': 11123, 'loss/train': 2.5102522373199463} +02/24/2022 15:58:17 - INFO - codeparrot_training - Step 11124: {'lr': 0.0004567333482121062, 'samples': 5696000, 'steps': 11124, 'loss/train': 4.087818622589111} +02/24/2022 15:58:23 - INFO - codeparrot_training - Step 11125: {'lr': 0.0004567241471589241, 'samples': 5696512, 'steps': 11125, 'loss/train': 2.909599542617798} +02/24/2022 15:58:26 - INFO - codeparrot_training - Step 11126: {'lr': 0.0004567149452202013, 'samples': 5697024, 'steps': 11126, 'loss/train': 2.11537504196167} +02/24/2022 15:58:33 - INFO - codeparrot_training - Step 11127: {'lr': 0.0004567057423959774, 'samples': 5697536, 'steps': 11127, 'loss/train': 2.41999888420105} +02/24/2022 15:58:36 - INFO - codeparrot_training - Step 11128: {'lr': 0.0004566965386862917, 'samples': 5698048, 'steps': 11128, 'loss/train': 2.5000498294830322} +02/24/2022 15:58:42 - INFO - codeparrot_training - Step 11129: {'lr': 0.0004566873340911837, 'samples': 5698560, 'steps': 11129, 'loss/train': 2.079416275024414} +02/24/2022 15:58:45 - INFO - codeparrot_training - Step 11130: {'lr': 0.00045667812861069275, 'samples': 5699072, 'steps': 11130, 'loss/train': 1.9394596815109253} +02/24/2022 15:58:51 - INFO - codeparrot_training - Step 11131: {'lr': 0.00045666892224485836, 'samples': 5699584, 'steps': 11131, 'loss/train': 3.117314338684082} +02/24/2022 15:58:54 - INFO - codeparrot_training - Step 11132: {'lr': 0.0004566597149937199, 'samples': 5700096, 'steps': 11132, 'loss/train': 4.116604328155518} +02/24/2022 15:59:00 - INFO - codeparrot_training - Step 11133: {'lr': 0.0004566505068573168, 'samples': 5700608, 'steps': 11133, 'loss/train': 3.082699775695801} +02/24/2022 15:59:03 - INFO - codeparrot_training - Step 11134: {'lr': 0.00045664129783568866, 'samples': 5701120, 'steps': 11134, 'loss/train': 1.719556212425232} +02/24/2022 15:59:09 - INFO - codeparrot_training - Step 11135: {'lr': 0.00045663208792887474, 'samples': 5701632, 'steps': 11135, 'loss/train': 0.7279012799263} +02/24/2022 15:59:12 - INFO - codeparrot_training - Step 11136: {'lr': 0.0004566228771369146, 'samples': 5702144, 'steps': 11136, 'loss/train': 2.2131645679473877} +02/24/2022 15:59:18 - INFO - codeparrot_training - Step 11137: {'lr': 0.00045661366545984763, 'samples': 5702656, 'steps': 11137, 'loss/train': 1.8204002380371094} +02/24/2022 15:59:21 - INFO - codeparrot_training - Step 11138: {'lr': 0.00045660445289771336, 'samples': 5703168, 'steps': 11138, 'loss/train': 1.9106638431549072} +02/24/2022 15:59:28 - INFO - codeparrot_training - Step 11139: {'lr': 0.00045659523945055114, 'samples': 5703680, 'steps': 11139, 'loss/train': 2.272371292114258} +02/24/2022 15:59:31 - INFO - codeparrot_training - Step 11140: {'lr': 0.0004565860251184006, 'samples': 5704192, 'steps': 11140, 'loss/train': 2.1323587894439697} +02/24/2022 15:59:36 - INFO - codeparrot_training - Step 11141: {'lr': 0.0004565768099013011, 'samples': 5704704, 'steps': 11141, 'loss/train': 1.0485512018203735} +02/24/2022 15:59:40 - INFO - codeparrot_training - Step 11142: {'lr': 0.00045656759379929213, 'samples': 5705216, 'steps': 11142, 'loss/train': 1.9648613929748535} +02/24/2022 15:59:46 - INFO - codeparrot_training - Step 11143: {'lr': 0.0004565583768124132, 'samples': 5705728, 'steps': 11143, 'loss/train': 2.54841947555542} +02/24/2022 15:59:49 - INFO - codeparrot_training - Step 11144: {'lr': 0.0004565491589407038, 'samples': 5706240, 'steps': 11144, 'loss/train': 2.3678460121154785} +02/24/2022 15:59:54 - INFO - codeparrot_training - Step 11145: {'lr': 0.0004565399401842034, 'samples': 5706752, 'steps': 11145, 'loss/train': 2.0219571590423584} +02/24/2022 15:59:58 - INFO - codeparrot_training - Step 11146: {'lr': 0.0004565307205429514, 'samples': 5707264, 'steps': 11146, 'loss/train': 1.5630725622177124} +02/24/2022 16:00:03 - INFO - codeparrot_training - Step 11147: {'lr': 0.00045652150001698744, 'samples': 5707776, 'steps': 11147, 'loss/train': 2.420351982116699} +02/24/2022 16:00:07 - INFO - codeparrot_training - Step 11148: {'lr': 0.00045651227860635094, 'samples': 5708288, 'steps': 11148, 'loss/train': 2.3975882530212402} +02/24/2022 16:00:13 - INFO - codeparrot_training - Step 11149: {'lr': 0.00045650305631108137, 'samples': 5708800, 'steps': 11149, 'loss/train': 2.3108203411102295} +02/24/2022 16:00:17 - INFO - codeparrot_training - Step 11150: {'lr': 0.0004564938331312183, 'samples': 5709312, 'steps': 11150, 'loss/train': 2.3248162269592285} +02/24/2022 16:00:22 - INFO - codeparrot_training - Step 11151: {'lr': 0.00045648460906680123, 'samples': 5709824, 'steps': 11151, 'loss/train': 2.3360297679901123} +02/24/2022 16:00:26 - INFO - codeparrot_training - Step 11152: {'lr': 0.00045647538411786964, 'samples': 5710336, 'steps': 11152, 'loss/train': 2.2024340629577637} +02/24/2022 16:00:31 - INFO - codeparrot_training - Step 11153: {'lr': 0.00045646615828446316, 'samples': 5710848, 'steps': 11153, 'loss/train': 1.7577011585235596} +02/24/2022 16:00:35 - INFO - codeparrot_training - Step 11154: {'lr': 0.00045645693156662104, 'samples': 5711360, 'steps': 11154, 'loss/train': 3.6065192222595215} +02/24/2022 16:00:40 - INFO - codeparrot_training - Step 11155: {'lr': 0.0004564477039643831, 'samples': 5711872, 'steps': 11155, 'loss/train': 1.989689826965332} +02/24/2022 16:00:44 - INFO - codeparrot_training - Step 11156: {'lr': 0.0004564384754777888, 'samples': 5712384, 'steps': 11156, 'loss/train': 0.5113958120346069} +02/24/2022 16:00:49 - INFO - codeparrot_training - Step 11157: {'lr': 0.0004564292461068775, 'samples': 5712896, 'steps': 11157, 'loss/train': 1.392565369606018} +02/24/2022 16:00:53 - INFO - codeparrot_training - Step 11158: {'lr': 0.00045642001585168896, 'samples': 5713408, 'steps': 11158, 'loss/train': 2.381349802017212} +02/24/2022 16:00:59 - INFO - codeparrot_training - Step 11159: {'lr': 0.0004564107847122626, 'samples': 5713920, 'steps': 11159, 'loss/train': 1.497516393661499} +02/24/2022 16:01:03 - INFO - codeparrot_training - Step 11160: {'lr': 0.0004564015526886379, 'samples': 5714432, 'steps': 11160, 'loss/train': 1.8312599658966064} +02/24/2022 16:01:08 - INFO - codeparrot_training - Step 11161: {'lr': 0.0004563923197808546, 'samples': 5714944, 'steps': 11161, 'loss/train': 1.6338485479354858} +02/24/2022 16:01:12 - INFO - codeparrot_training - Step 11162: {'lr': 0.00045638308598895205, 'samples': 5715456, 'steps': 11162, 'loss/train': 1.7676514387130737} +02/24/2022 16:01:17 - INFO - codeparrot_training - Step 11163: {'lr': 0.0004563738513129699, 'samples': 5715968, 'steps': 11163, 'loss/train': 2.410141706466675} +02/24/2022 16:01:21 - INFO - codeparrot_training - Step 11164: {'lr': 0.0004563646157529477, 'samples': 5716480, 'steps': 11164, 'loss/train': 2.0964887142181396} +02/24/2022 16:01:26 - INFO - codeparrot_training - Step 11165: {'lr': 0.0004563553793089251, 'samples': 5716992, 'steps': 11165, 'loss/train': 2.0609984397888184} +02/24/2022 16:01:29 - INFO - codeparrot_training - Step 11166: {'lr': 0.00045634614198094154, 'samples': 5717504, 'steps': 11166, 'loss/train': 0.6208281517028809} +02/24/2022 16:01:35 - INFO - codeparrot_training - Step 11167: {'lr': 0.0004563369037690366, 'samples': 5718016, 'steps': 11167, 'loss/train': 3.829484224319458} +02/24/2022 16:01:41 - INFO - codeparrot_training - Step 11168: {'lr': 0.00045632766467324995, 'samples': 5718528, 'steps': 11168, 'loss/train': 2.1517574787139893} +02/24/2022 16:01:44 - INFO - codeparrot_training - Step 11169: {'lr': 0.00045631842469362103, 'samples': 5719040, 'steps': 11169, 'loss/train': 2.7359609603881836} +02/24/2022 16:01:50 - INFO - codeparrot_training - Step 11170: {'lr': 0.00045630918383018947, 'samples': 5719552, 'steps': 11170, 'loss/train': 1.998534917831421} +02/24/2022 16:01:53 - INFO - codeparrot_training - Step 11171: {'lr': 0.00045629994208299496, 'samples': 5720064, 'steps': 11171, 'loss/train': 2.9150965213775635} +02/24/2022 16:01:59 - INFO - codeparrot_training - Step 11172: {'lr': 0.0004562906994520769, 'samples': 5720576, 'steps': 11172, 'loss/train': 1.0025087594985962} +02/24/2022 16:02:03 - INFO - codeparrot_training - Step 11173: {'lr': 0.0004562814559374751, 'samples': 5721088, 'steps': 11173, 'loss/train': 1.4408923387527466} +02/24/2022 16:02:08 - INFO - codeparrot_training - Step 11174: {'lr': 0.000456272211539229, 'samples': 5721600, 'steps': 11174, 'loss/train': 0.9095917344093323} +02/24/2022 16:02:12 - INFO - codeparrot_training - Step 11175: {'lr': 0.00045626296625737823, 'samples': 5722112, 'steps': 11175, 'loss/train': 1.359091877937317} +02/24/2022 16:02:17 - INFO - codeparrot_training - Step 11176: {'lr': 0.0004562537200919624, 'samples': 5722624, 'steps': 11176, 'loss/train': 1.0864464044570923} +02/24/2022 16:02:21 - INFO - codeparrot_training - Step 11177: {'lr': 0.00045624447304302117, 'samples': 5723136, 'steps': 11177, 'loss/train': 2.413727283477783} +02/24/2022 16:02:27 - INFO - codeparrot_training - Step 11178: {'lr': 0.00045623522511059405, 'samples': 5723648, 'steps': 11178, 'loss/train': 2.8752694129943848} +02/24/2022 16:02:30 - INFO - codeparrot_training - Step 11179: {'lr': 0.00045622597629472073, 'samples': 5724160, 'steps': 11179, 'loss/train': 3.9342124462127686} +02/24/2022 16:02:36 - INFO - codeparrot_training - Step 11180: {'lr': 0.0004562167265954409, 'samples': 5724672, 'steps': 11180, 'loss/train': 1.9125604629516602} +02/24/2022 16:02:39 - INFO - codeparrot_training - Step 11181: {'lr': 0.000456207476012794, 'samples': 5725184, 'steps': 11181, 'loss/train': 1.9933083057403564} +02/24/2022 16:02:45 - INFO - codeparrot_training - Step 11182: {'lr': 0.0004561982245468198, 'samples': 5725696, 'steps': 11182, 'loss/train': 2.228924512863159} +02/24/2022 16:02:48 - INFO - codeparrot_training - Step 11183: {'lr': 0.0004561889721975578, 'samples': 5726208, 'steps': 11183, 'loss/train': 0.5284889936447144} +02/24/2022 16:02:54 - INFO - codeparrot_training - Step 11184: {'lr': 0.0004561797189650478, 'samples': 5726720, 'steps': 11184, 'loss/train': 1.4588679075241089} +02/24/2022 16:02:58 - INFO - codeparrot_training - Step 11185: {'lr': 0.0004561704648493293, 'samples': 5727232, 'steps': 11185, 'loss/train': 1.699294924736023} +02/24/2022 16:03:03 - INFO - codeparrot_training - Step 11186: {'lr': 0.00045616120985044205, 'samples': 5727744, 'steps': 11186, 'loss/train': 1.9070420265197754} +02/24/2022 16:03:07 - INFO - codeparrot_training - Step 11187: {'lr': 0.0004561519539684256, 'samples': 5728256, 'steps': 11187, 'loss/train': 1.4898838996887207} +02/24/2022 16:03:12 - INFO - codeparrot_training - Step 11188: {'lr': 0.00045614269720331964, 'samples': 5728768, 'steps': 11188, 'loss/train': 1.5310871601104736} +02/24/2022 16:03:16 - INFO - codeparrot_training - Step 11189: {'lr': 0.00045613343955516386, 'samples': 5729280, 'steps': 11189, 'loss/train': 2.431586265563965} +02/24/2022 16:03:21 - INFO - codeparrot_training - Step 11190: {'lr': 0.00045612418102399785, 'samples': 5729792, 'steps': 11190, 'loss/train': 1.268957257270813} +02/24/2022 16:03:25 - INFO - codeparrot_training - Step 11191: {'lr': 0.00045611492160986127, 'samples': 5730304, 'steps': 11191, 'loss/train': 1.8090838193893433} +02/24/2022 16:03:30 - INFO - codeparrot_training - Step 11192: {'lr': 0.00045610566131279386, 'samples': 5730816, 'steps': 11192, 'loss/train': 2.358632802963257} +02/24/2022 16:03:34 - INFO - codeparrot_training - Step 11193: {'lr': 0.00045609640013283525, 'samples': 5731328, 'steps': 11193, 'loss/train': 2.8368661403656006} +02/24/2022 16:03:40 - INFO - codeparrot_training - Step 11194: {'lr': 0.00045608713807002507, 'samples': 5731840, 'steps': 11194, 'loss/train': 2.0189576148986816} +02/24/2022 16:03:44 - INFO - codeparrot_training - Step 11195: {'lr': 0.000456077875124403, 'samples': 5732352, 'steps': 11195, 'loss/train': 1.6095008850097656} +02/24/2022 16:03:49 - INFO - codeparrot_training - Step 11196: {'lr': 0.00045606861129600883, 'samples': 5732864, 'steps': 11196, 'loss/train': 1.506212592124939} +02/24/2022 16:03:53 - INFO - codeparrot_training - Step 11197: {'lr': 0.00045605934658488214, 'samples': 5733376, 'steps': 11197, 'loss/train': 1.74277925491333} +02/24/2022 16:03:58 - INFO - codeparrot_training - Step 11198: {'lr': 0.0004560500809910626, 'samples': 5733888, 'steps': 11198, 'loss/train': 2.550379753112793} +02/24/2022 16:04:02 - INFO - codeparrot_training - Step 11199: {'lr': 0.00045604081451459, 'samples': 5734400, 'steps': 11199, 'loss/train': 0.9492202997207642} +02/24/2022 16:04:07 - INFO - codeparrot_training - Step 11200: {'lr': 0.0004560315471555039, 'samples': 5734912, 'steps': 11200, 'loss/train': 2.018465280532837} +02/24/2022 16:04:11 - INFO - codeparrot_training - Step 11201: {'lr': 0.00045602227891384416, 'samples': 5735424, 'steps': 11201, 'loss/train': 1.4421931505203247} +02/24/2022 16:04:16 - INFO - codeparrot_training - Step 11202: {'lr': 0.00045601300978965033, 'samples': 5735936, 'steps': 11202, 'loss/train': 1.4803434610366821} +02/24/2022 16:04:20 - INFO - codeparrot_training - Step 11203: {'lr': 0.00045600373978296223, 'samples': 5736448, 'steps': 11203, 'loss/train': 1.64203679561615} +02/24/2022 16:04:26 - INFO - codeparrot_training - Step 11204: {'lr': 0.0004559944688938195, 'samples': 5736960, 'steps': 11204, 'loss/train': 2.1750948429107666} +02/24/2022 16:04:29 - INFO - codeparrot_training - Step 11205: {'lr': 0.0004559851971222618, 'samples': 5737472, 'steps': 11205, 'loss/train': 2.479008674621582} +02/24/2022 16:04:35 - INFO - codeparrot_training - Step 11206: {'lr': 0.00045597592446832905, 'samples': 5737984, 'steps': 11206, 'loss/train': 2.1021623611450195} +02/24/2022 16:04:39 - INFO - codeparrot_training - Step 11207: {'lr': 0.0004559666509320608, 'samples': 5738496, 'steps': 11207, 'loss/train': 2.791691780090332} +02/24/2022 16:04:44 - INFO - codeparrot_training - Step 11208: {'lr': 0.0004559573765134968, 'samples': 5739008, 'steps': 11208, 'loss/train': 2.5238149166107178} +02/24/2022 16:04:48 - INFO - codeparrot_training - Step 11209: {'lr': 0.0004559481012126768, 'samples': 5739520, 'steps': 11209, 'loss/train': 2.6910507678985596} +02/24/2022 16:04:53 - INFO - codeparrot_training - Step 11210: {'lr': 0.00045593882502964055, 'samples': 5740032, 'steps': 11210, 'loss/train': 2.5532312393188477} +02/24/2022 16:04:57 - INFO - codeparrot_training - Step 11211: {'lr': 0.00045592954796442784, 'samples': 5740544, 'steps': 11211, 'loss/train': 2.537386178970337} +02/24/2022 16:05:02 - INFO - codeparrot_training - Step 11212: {'lr': 0.0004559202700170782, 'samples': 5741056, 'steps': 11212, 'loss/train': 2.9790687561035156} +02/24/2022 16:05:06 - INFO - codeparrot_training - Step 11213: {'lr': 0.00045591099118763156, 'samples': 5741568, 'steps': 11213, 'loss/train': 2.4957096576690674} +02/24/2022 16:05:11 - INFO - codeparrot_training - Step 11214: {'lr': 0.0004559017114761276, 'samples': 5742080, 'steps': 11214, 'loss/train': 1.2813564538955688} +02/24/2022 16:05:15 - INFO - codeparrot_training - Step 11215: {'lr': 0.00045589243088260613, 'samples': 5742592, 'steps': 11215, 'loss/train': 2.0508625507354736} +02/24/2022 16:05:20 - INFO - codeparrot_training - Step 11216: {'lr': 0.00045588314940710683, 'samples': 5743104, 'steps': 11216, 'loss/train': 2.667091131210327} +02/24/2022 16:05:24 - INFO - codeparrot_training - Step 11217: {'lr': 0.00045587386704966956, 'samples': 5743616, 'steps': 11217, 'loss/train': 2.8601770401000977} +02/24/2022 16:05:29 - INFO - codeparrot_training - Step 11218: {'lr': 0.00045586458381033395, 'samples': 5744128, 'steps': 11218, 'loss/train': 1.357225775718689} +02/24/2022 16:05:33 - INFO - codeparrot_training - Step 11219: {'lr': 0.00045585529968913984, 'samples': 5744640, 'steps': 11219, 'loss/train': 2.775374174118042} +02/24/2022 16:05:39 - INFO - codeparrot_training - Step 11220: {'lr': 0.00045584601468612703, 'samples': 5745152, 'steps': 11220, 'loss/train': 2.3387062549591064} +02/24/2022 16:05:42 - INFO - codeparrot_training - Step 11221: {'lr': 0.0004558367288013352, 'samples': 5745664, 'steps': 11221, 'loss/train': 3.0921833515167236} +02/24/2022 16:05:48 - INFO - codeparrot_training - Step 11222: {'lr': 0.00045582744203480417, 'samples': 5746176, 'steps': 11222, 'loss/train': 2.494013547897339} +02/24/2022 16:05:51 - INFO - codeparrot_training - Step 11223: {'lr': 0.0004558181543865738, 'samples': 5746688, 'steps': 11223, 'loss/train': 1.687036395072937} +02/24/2022 16:05:57 - INFO - codeparrot_training - Step 11224: {'lr': 0.0004558088658566838, 'samples': 5747200, 'steps': 11224, 'loss/train': 1.4024417400360107} +02/24/2022 16:06:00 - INFO - codeparrot_training - Step 11225: {'lr': 0.000455799576445174, 'samples': 5747712, 'steps': 11225, 'loss/train': 2.1459367275238037} +02/24/2022 16:06:06 - INFO - codeparrot_training - Step 11226: {'lr': 0.00045579028615208404, 'samples': 5748224, 'steps': 11226, 'loss/train': 2.2687666416168213} +02/24/2022 16:06:09 - INFO - codeparrot_training - Step 11227: {'lr': 0.000455780994977454, 'samples': 5748736, 'steps': 11227, 'loss/train': 1.6789630651474} +02/24/2022 16:06:15 - INFO - codeparrot_training - Step 11228: {'lr': 0.0004557717029213234, 'samples': 5749248, 'steps': 11228, 'loss/train': 1.9144201278686523} +02/24/2022 16:06:18 - INFO - codeparrot_training - Step 11229: {'lr': 0.00045576240998373226, 'samples': 5749760, 'steps': 11229, 'loss/train': 2.738412618637085} +02/24/2022 16:06:25 - INFO - codeparrot_training - Step 11230: {'lr': 0.00045575311616472024, 'samples': 5750272, 'steps': 11230, 'loss/train': 1.6283036470413208} +02/24/2022 16:06:28 - INFO - codeparrot_training - Step 11231: {'lr': 0.0004557438214643272, 'samples': 5750784, 'steps': 11231, 'loss/train': 2.378791332244873} +02/24/2022 16:06:33 - INFO - codeparrot_training - Step 11232: {'lr': 0.00045573452588259296, 'samples': 5751296, 'steps': 11232, 'loss/train': 2.9709901809692383} +02/24/2022 16:06:37 - INFO - codeparrot_training - Step 11233: {'lr': 0.0004557252294195573, 'samples': 5751808, 'steps': 11233, 'loss/train': 2.266359806060791} +02/24/2022 16:06:42 - INFO - codeparrot_training - Step 11234: {'lr': 0.00045571593207526016, 'samples': 5752320, 'steps': 11234, 'loss/train': 1.540785312652588} +02/24/2022 16:06:46 - INFO - codeparrot_training - Step 11235: {'lr': 0.00045570663384974125, 'samples': 5752832, 'steps': 11235, 'loss/train': 2.662827491760254} +02/24/2022 16:06:52 - INFO - codeparrot_training - Step 11236: {'lr': 0.00045569733474304044, 'samples': 5753344, 'steps': 11236, 'loss/train': 2.371854066848755} +02/24/2022 16:06:55 - INFO - codeparrot_training - Step 11237: {'lr': 0.0004556880347551976, 'samples': 5753856, 'steps': 11237, 'loss/train': 1.1751444339752197} +02/24/2022 16:07:00 - INFO - codeparrot_training - Step 11238: {'lr': 0.0004556787338862525, 'samples': 5754368, 'steps': 11238, 'loss/train': 1.4755576848983765} +02/24/2022 16:07:04 - INFO - codeparrot_training - Step 11239: {'lr': 0.000455669432136245, 'samples': 5754880, 'steps': 11239, 'loss/train': 2.0230255126953125} +02/24/2022 16:07:11 - INFO - codeparrot_training - Step 11240: {'lr': 0.00045566012950521497, 'samples': 5755392, 'steps': 11240, 'loss/train': 2.75071120262146} +02/24/2022 16:07:14 - INFO - codeparrot_training - Step 11241: {'lr': 0.0004556508259932022, 'samples': 5755904, 'steps': 11241, 'loss/train': 3.0111143589019775} +02/24/2022 16:07:20 - INFO - codeparrot_training - Step 11242: {'lr': 0.0004556415216002467, 'samples': 5756416, 'steps': 11242, 'loss/train': 1.4160038232803345} +02/24/2022 16:07:23 - INFO - codeparrot_training - Step 11243: {'lr': 0.0004556322163263882, 'samples': 5756928, 'steps': 11243, 'loss/train': 1.7679202556610107} +02/24/2022 16:07:29 - INFO - codeparrot_training - Step 11244: {'lr': 0.00045562291017166653, 'samples': 5757440, 'steps': 11244, 'loss/train': 2.0016725063323975} +02/24/2022 16:07:32 - INFO - codeparrot_training - Step 11245: {'lr': 0.0004556136031361216, 'samples': 5757952, 'steps': 11245, 'loss/train': 2.387760877609253} +02/24/2022 16:07:38 - INFO - codeparrot_training - Step 11246: {'lr': 0.0004556042952197933, 'samples': 5758464, 'steps': 11246, 'loss/train': 1.3299553394317627} +02/24/2022 16:07:41 - INFO - codeparrot_training - Step 11247: {'lr': 0.00045559498642272153, 'samples': 5758976, 'steps': 11247, 'loss/train': 2.1121790409088135} +02/24/2022 16:07:47 - INFO - codeparrot_training - Step 11248: {'lr': 0.0004555856767449461, 'samples': 5759488, 'steps': 11248, 'loss/train': 0.45214852690696716} +02/24/2022 16:07:50 - INFO - codeparrot_training - Step 11249: {'lr': 0.00045557636618650686, 'samples': 5760000, 'steps': 11249, 'loss/train': 1.3540096282958984} +02/24/2022 16:07:56 - INFO - codeparrot_training - Step 11250: {'lr': 0.00045556705474744376, 'samples': 5760512, 'steps': 11250, 'loss/train': 1.697840929031372} +02/24/2022 16:08:00 - INFO - codeparrot_training - Step 11251: {'lr': 0.00045555774242779675, 'samples': 5761024, 'steps': 11251, 'loss/train': 3.0322370529174805} +02/24/2022 16:08:06 - INFO - codeparrot_training - Step 11252: {'lr': 0.0004555484292276055, 'samples': 5761536, 'steps': 11252, 'loss/train': 2.3741860389709473} +02/24/2022 16:08:10 - INFO - codeparrot_training - Step 11253: {'lr': 0.0004555391151469102, 'samples': 5762048, 'steps': 11253, 'loss/train': 1.6380577087402344} +02/24/2022 16:08:13 - INFO - codeparrot_training - Step 11254: {'lr': 0.00045552980018575054, 'samples': 5762560, 'steps': 11254, 'loss/train': 1.7942290306091309} +02/24/2022 16:08:21 - INFO - codeparrot_training - Step 11255: {'lr': 0.0004555204843441665, 'samples': 5763072, 'steps': 11255, 'loss/train': 1.5617128610610962} +02/24/2022 16:08:26 - INFO - codeparrot_training - Step 11256: {'lr': 0.0004555111676221979, 'samples': 5763584, 'steps': 11256, 'loss/train': 2.575984477996826} +02/24/2022 16:08:30 - INFO - codeparrot_training - Step 11257: {'lr': 0.00045550185001988475, 'samples': 5764096, 'steps': 11257, 'loss/train': 1.7319233417510986} +02/24/2022 16:08:36 - INFO - codeparrot_training - Step 11258: {'lr': 0.00045549253153726694, 'samples': 5764608, 'steps': 11258, 'loss/train': 2.1164321899414062} +02/24/2022 16:08:39 - INFO - codeparrot_training - Step 11259: {'lr': 0.00045548321217438436, 'samples': 5765120, 'steps': 11259, 'loss/train': 2.0037665367126465} +02/24/2022 16:08:45 - INFO - codeparrot_training - Step 11260: {'lr': 0.00045547389193127696, 'samples': 5765632, 'steps': 11260, 'loss/train': 2.569166898727417} +02/24/2022 16:08:48 - INFO - codeparrot_training - Step 11261: {'lr': 0.00045546457080798463, 'samples': 5766144, 'steps': 11261, 'loss/train': 8.904472351074219} +02/24/2022 16:08:54 - INFO - codeparrot_training - Step 11262: {'lr': 0.00045545524880454734, 'samples': 5766656, 'steps': 11262, 'loss/train': 1.5161077976226807} +02/24/2022 16:08:57 - INFO - codeparrot_training - Step 11263: {'lr': 0.000455445925921005, 'samples': 5767168, 'steps': 11263, 'loss/train': 2.1050312519073486} +02/24/2022 16:09:05 - INFO - codeparrot_training - Step 11264: {'lr': 0.00045543660215739755, 'samples': 5767680, 'steps': 11264, 'loss/train': 1.7605210542678833} +02/24/2022 16:09:08 - INFO - codeparrot_training - Step 11265: {'lr': 0.00045542727751376495, 'samples': 5768192, 'steps': 11265, 'loss/train': 2.0572397708892822} +02/24/2022 16:09:14 - INFO - codeparrot_training - Step 11266: {'lr': 0.00045541795199014715, 'samples': 5768704, 'steps': 11266, 'loss/train': 2.817103624343872} +02/24/2022 16:09:17 - INFO - codeparrot_training - Step 11267: {'lr': 0.00045540862558658403, 'samples': 5769216, 'steps': 11267, 'loss/train': 2.798259735107422} +02/24/2022 16:09:23 - INFO - codeparrot_training - Step 11268: {'lr': 0.00045539929830311555, 'samples': 5769728, 'steps': 11268, 'loss/train': 2.311845302581787} +02/24/2022 16:09:26 - INFO - codeparrot_training - Step 11269: {'lr': 0.00045538997013978166, 'samples': 5770240, 'steps': 11269, 'loss/train': 0.5399313569068909} +02/24/2022 16:09:32 - INFO - codeparrot_training - Step 11270: {'lr': 0.0004553806410966225, 'samples': 5770752, 'steps': 11270, 'loss/train': 3.216301202774048} +02/24/2022 16:09:35 - INFO - codeparrot_training - Step 11271: {'lr': 0.0004553713111736778, 'samples': 5771264, 'steps': 11271, 'loss/train': 1.0413765907287598} +02/24/2022 16:09:41 - INFO - codeparrot_training - Step 11272: {'lr': 0.0004553619803709876, 'samples': 5771776, 'steps': 11272, 'loss/train': 1.5031672716140747} +02/24/2022 16:09:44 - INFO - codeparrot_training - Step 11273: {'lr': 0.00045535264868859195, 'samples': 5772288, 'steps': 11273, 'loss/train': 1.1409170627593994} +02/24/2022 16:09:50 - INFO - codeparrot_training - Step 11274: {'lr': 0.0004553433161265307, 'samples': 5772800, 'steps': 11274, 'loss/train': 0.9867144823074341} +02/24/2022 16:09:53 - INFO - codeparrot_training - Step 11275: {'lr': 0.00045533398268484396, 'samples': 5773312, 'steps': 11275, 'loss/train': 2.376530408859253} +02/24/2022 16:10:00 - INFO - codeparrot_training - Step 11276: {'lr': 0.00045532464836357155, 'samples': 5773824, 'steps': 11276, 'loss/train': 1.2782824039459229} +02/24/2022 16:10:04 - INFO - codeparrot_training - Step 11277: {'lr': 0.0004553153131627536, 'samples': 5774336, 'steps': 11277, 'loss/train': 0.9035613536834717} +02/24/2022 16:10:09 - INFO - codeparrot_training - Step 11278: {'lr': 0.00045530597708243, 'samples': 5774848, 'steps': 11278, 'loss/train': 2.114393711090088} +02/24/2022 16:10:13 - INFO - codeparrot_training - Step 11279: {'lr': 0.0004552966401226408, 'samples': 5775360, 'steps': 11279, 'loss/train': 1.7024188041687012} +02/24/2022 16:10:19 - INFO - codeparrot_training - Step 11280: {'lr': 0.000455287302283426, 'samples': 5775872, 'steps': 11280, 'loss/train': 2.1052308082580566} +02/24/2022 16:10:22 - INFO - codeparrot_training - Step 11281: {'lr': 0.00045527796356482566, 'samples': 5776384, 'steps': 11281, 'loss/train': 0.893500566482544} +02/24/2022 16:10:28 - INFO - codeparrot_training - Step 11282: {'lr': 0.00045526862396687957, 'samples': 5776896, 'steps': 11282, 'loss/train': 2.396361827850342} +02/24/2022 16:10:31 - INFO - codeparrot_training - Step 11283: {'lr': 0.000455259283489628, 'samples': 5777408, 'steps': 11283, 'loss/train': 1.5983911752700806} +02/24/2022 16:10:37 - INFO - codeparrot_training - Step 11284: {'lr': 0.0004552499421331107, 'samples': 5777920, 'steps': 11284, 'loss/train': 1.8370423316955566} +02/24/2022 16:10:40 - INFO - codeparrot_training - Step 11285: {'lr': 0.0004552405998973679, 'samples': 5778432, 'steps': 11285, 'loss/train': 1.6687424182891846} +02/24/2022 16:10:48 - INFO - codeparrot_training - Step 11286: {'lr': 0.0004552312567824395, 'samples': 5778944, 'steps': 11286, 'loss/train': 2.222606658935547} +02/24/2022 16:10:51 - INFO - codeparrot_training - Step 11287: {'lr': 0.00045522191278836563, 'samples': 5779456, 'steps': 11287, 'loss/train': 1.79940664768219} +02/24/2022 16:10:57 - INFO - codeparrot_training - Step 11288: {'lr': 0.00045521256791518616, 'samples': 5779968, 'steps': 11288, 'loss/train': 2.376446485519409} +02/24/2022 16:11:00 - INFO - codeparrot_training - Step 11289: {'lr': 0.0004552032221629413, 'samples': 5780480, 'steps': 11289, 'loss/train': 1.4775240421295166} +02/24/2022 16:11:06 - INFO - codeparrot_training - Step 11290: {'lr': 0.000455193875531671, 'samples': 5780992, 'steps': 11290, 'loss/train': 2.178053855895996} +02/24/2022 16:11:09 - INFO - codeparrot_training - Step 11291: {'lr': 0.00045518452802141524, 'samples': 5781504, 'steps': 11291, 'loss/train': 1.578875184059143} +02/24/2022 16:11:15 - INFO - codeparrot_training - Step 11292: {'lr': 0.0004551751796322141, 'samples': 5782016, 'steps': 11292, 'loss/train': 2.116464614868164} +02/24/2022 16:11:18 - INFO - codeparrot_training - Step 11293: {'lr': 0.00045516583036410777, 'samples': 5782528, 'steps': 11293, 'loss/train': 2.084806442260742} +02/24/2022 16:11:24 - INFO - codeparrot_training - Step 11294: {'lr': 0.00045515648021713604, 'samples': 5783040, 'steps': 11294, 'loss/train': 1.847497582435608} +02/24/2022 16:11:27 - INFO - codeparrot_training - Step 11295: {'lr': 0.0004551471291913391, 'samples': 5783552, 'steps': 11295, 'loss/train': 2.00285267829895} +02/24/2022 16:11:34 - INFO - codeparrot_training - Step 11296: {'lr': 0.00045513777728675703, 'samples': 5784064, 'steps': 11296, 'loss/train': 1.6667442321777344} +02/24/2022 16:11:38 - INFO - codeparrot_training - Step 11297: {'lr': 0.0004551284245034298, 'samples': 5784576, 'steps': 11297, 'loss/train': 1.6538283824920654} +02/24/2022 16:11:43 - INFO - codeparrot_training - Step 11298: {'lr': 0.00045511907084139767, 'samples': 5785088, 'steps': 11298, 'loss/train': 0.9538973569869995} +02/24/2022 16:11:47 - INFO - codeparrot_training - Step 11299: {'lr': 0.0004551097163007005, 'samples': 5785600, 'steps': 11299, 'loss/train': 3.656559705734253} +02/24/2022 16:11:52 - INFO - codeparrot_training - Step 11300: {'lr': 0.0004551003608813784, 'samples': 5786112, 'steps': 11300, 'loss/train': 2.665013074874878} +02/24/2022 16:11:56 - INFO - codeparrot_training - Step 11301: {'lr': 0.00045509100458347154, 'samples': 5786624, 'steps': 11301, 'loss/train': 1.9199810028076172} +02/24/2022 16:12:01 - INFO - codeparrot_training - Step 11302: {'lr': 0.0004550816474070199, 'samples': 5787136, 'steps': 11302, 'loss/train': 2.007091760635376} +02/24/2022 16:12:05 - INFO - codeparrot_training - Step 11303: {'lr': 0.0004550722893520636, 'samples': 5787648, 'steps': 11303, 'loss/train': 1.9990875720977783} +02/24/2022 16:12:10 - INFO - codeparrot_training - Step 11304: {'lr': 0.0004550629304186428, 'samples': 5788160, 'steps': 11304, 'loss/train': 1.3781377077102661} +02/24/2022 16:12:14 - INFO - codeparrot_training - Step 11305: {'lr': 0.0004550535706067974, 'samples': 5788672, 'steps': 11305, 'loss/train': 2.8855373859405518} +02/24/2022 16:12:19 - INFO - codeparrot_training - Step 11306: {'lr': 0.0004550442099165677, 'samples': 5789184, 'steps': 11306, 'loss/train': 2.104621410369873} +02/24/2022 16:12:23 - INFO - codeparrot_training - Step 11307: {'lr': 0.0004550348483479937, 'samples': 5789696, 'steps': 11307, 'loss/train': 1.7566425800323486} +02/24/2022 16:12:28 - INFO - codeparrot_training - Step 11308: {'lr': 0.00045502548590111553, 'samples': 5790208, 'steps': 11308, 'loss/train': 2.3883728981018066} +02/24/2022 16:12:32 - INFO - codeparrot_training - Step 11309: {'lr': 0.0004550161225759732, 'samples': 5790720, 'steps': 11309, 'loss/train': 2.064016342163086} +02/24/2022 16:12:38 - INFO - codeparrot_training - Step 11310: {'lr': 0.000455006758372607, 'samples': 5791232, 'steps': 11310, 'loss/train': 2.9384281635284424} +02/24/2022 16:12:41 - INFO - codeparrot_training - Step 11311: {'lr': 0.00045499739329105696, 'samples': 5791744, 'steps': 11311, 'loss/train': 1.8259005546569824} +02/24/2022 16:12:49 - INFO - codeparrot_training - Step 11312: {'lr': 0.00045498802733136306, 'samples': 5792256, 'steps': 11312, 'loss/train': 2.3329851627349854} +02/24/2022 16:12:52 - INFO - codeparrot_training - Step 11313: {'lr': 0.00045497866049356564, 'samples': 5792768, 'steps': 11313, 'loss/train': 1.7216702699661255} +02/24/2022 16:12:56 - INFO - codeparrot_training - Step 11314: {'lr': 0.0004549692927777047, 'samples': 5793280, 'steps': 11314, 'loss/train': 2.485945224761963} +02/24/2022 16:13:01 - INFO - codeparrot_training - Step 11315: {'lr': 0.00045495992418382035, 'samples': 5793792, 'steps': 11315, 'loss/train': 2.65579891204834} +02/24/2022 16:13:04 - INFO - codeparrot_training - Step 11316: {'lr': 0.0004549505547119529, 'samples': 5794304, 'steps': 11316, 'loss/train': 2.4479236602783203} +02/24/2022 16:13:10 - INFO - codeparrot_training - Step 11317: {'lr': 0.00045494118436214225, 'samples': 5794816, 'steps': 11317, 'loss/train': 2.8074517250061035} +02/24/2022 16:13:13 - INFO - codeparrot_training - Step 11318: {'lr': 0.00045493181313442866, 'samples': 5795328, 'steps': 11318, 'loss/train': 2.9917047023773193} +02/24/2022 16:13:19 - INFO - codeparrot_training - Step 11319: {'lr': 0.00045492244102885224, 'samples': 5795840, 'steps': 11319, 'loss/train': 3.807037353515625} +02/24/2022 16:13:22 - INFO - codeparrot_training - Step 11320: {'lr': 0.00045491306804545316, 'samples': 5796352, 'steps': 11320, 'loss/train': 1.3030238151550293} +02/24/2022 16:13:28 - INFO - codeparrot_training - Step 11321: {'lr': 0.0004549036941842716, 'samples': 5796864, 'steps': 11321, 'loss/train': 2.7639851570129395} +02/24/2022 16:13:31 - INFO - codeparrot_training - Step 11322: {'lr': 0.0004548943194453476, 'samples': 5797376, 'steps': 11322, 'loss/train': 2.7611591815948486} +02/24/2022 16:13:39 - INFO - codeparrot_training - Step 11323: {'lr': 0.0004548849438287214, 'samples': 5797888, 'steps': 11323, 'loss/train': 1.9540956020355225} +02/24/2022 16:13:44 - INFO - codeparrot_training - Step 11324: {'lr': 0.00045487556733443327, 'samples': 5798400, 'steps': 11324, 'loss/train': 2.3607444763183594} +02/24/2022 16:13:48 - INFO - codeparrot_training - Step 11325: {'lr': 0.00045486618996252315, 'samples': 5798912, 'steps': 11325, 'loss/train': 2.5105535984039307} +02/24/2022 16:13:53 - INFO - codeparrot_training - Step 11326: {'lr': 0.0004548568117130314, 'samples': 5799424, 'steps': 11326, 'loss/train': 1.458544373512268} +02/24/2022 16:13:57 - INFO - codeparrot_training - Step 11327: {'lr': 0.00045484743258599803, 'samples': 5799936, 'steps': 11327, 'loss/train': 2.256856918334961} +02/24/2022 16:14:02 - INFO - codeparrot_training - Step 11328: {'lr': 0.0004548380525814634, 'samples': 5800448, 'steps': 11328, 'loss/train': 1.4750778675079346} +02/24/2022 16:14:06 - INFO - codeparrot_training - Step 11329: {'lr': 0.0004548286716994676, 'samples': 5800960, 'steps': 11329, 'loss/train': 1.6293151378631592} +02/24/2022 16:14:11 - INFO - codeparrot_training - Step 11330: {'lr': 0.0004548192899400507, 'samples': 5801472, 'steps': 11330, 'loss/train': 1.9873050451278687} +02/24/2022 16:14:15 - INFO - codeparrot_training - Step 11331: {'lr': 0.0004548099073032531, 'samples': 5801984, 'steps': 11331, 'loss/train': 2.73116397857666} +02/24/2022 16:14:22 - INFO - codeparrot_training - Step 11332: {'lr': 0.00045480052378911483, 'samples': 5802496, 'steps': 11332, 'loss/train': 1.5129947662353516} +02/24/2022 16:14:25 - INFO - codeparrot_training - Step 11333: {'lr': 0.0004547911393976762, 'samples': 5803008, 'steps': 11333, 'loss/train': 1.450227975845337} +02/24/2022 16:14:31 - INFO - codeparrot_training - Step 11334: {'lr': 0.00045478175412897733, 'samples': 5803520, 'steps': 11334, 'loss/train': 2.6027538776397705} +02/24/2022 16:14:34 - INFO - codeparrot_training - Step 11335: {'lr': 0.00045477236798305846, 'samples': 5804032, 'steps': 11335, 'loss/train': 2.1613070964813232} +02/24/2022 16:14:40 - INFO - codeparrot_training - Step 11336: {'lr': 0.00045476298095995985, 'samples': 5804544, 'steps': 11336, 'loss/train': 2.1947970390319824} +02/24/2022 16:14:43 - INFO - codeparrot_training - Step 11337: {'lr': 0.0004547535930597215, 'samples': 5805056, 'steps': 11337, 'loss/train': 1.9178993701934814} +02/24/2022 16:14:49 - INFO - codeparrot_training - Step 11338: {'lr': 0.0004547442042823839, 'samples': 5805568, 'steps': 11338, 'loss/train': 2.148998975753784} +02/24/2022 16:14:52 - INFO - codeparrot_training - Step 11339: {'lr': 0.0004547348146279871, 'samples': 5806080, 'steps': 11339, 'loss/train': 2.377134084701538} +02/24/2022 16:14:58 - INFO - codeparrot_training - Step 11340: {'lr': 0.00045472542409657135, 'samples': 5806592, 'steps': 11340, 'loss/train': 2.880746603012085} +02/24/2022 16:15:01 - INFO - codeparrot_training - Step 11341: {'lr': 0.00045471603268817696, 'samples': 5807104, 'steps': 11341, 'loss/train': 2.8682913780212402} +02/24/2022 16:15:07 - INFO - codeparrot_training - Step 11342: {'lr': 0.000454706640402844, 'samples': 5807616, 'steps': 11342, 'loss/train': 2.337826728820801} +02/24/2022 16:15:10 - INFO - codeparrot_training - Step 11343: {'lr': 0.00045469724724061286, 'samples': 5808128, 'steps': 11343, 'loss/train': 2.1685545444488525} +02/24/2022 16:15:16 - INFO - codeparrot_training - Step 11344: {'lr': 0.0004546878532015236, 'samples': 5808640, 'steps': 11344, 'loss/train': 2.416700601577759} +02/24/2022 16:15:19 - INFO - codeparrot_training - Step 11345: {'lr': 0.00045467845828561673, 'samples': 5809152, 'steps': 11345, 'loss/train': 1.496660590171814} +02/24/2022 16:15:27 - INFO - codeparrot_training - Step 11346: {'lr': 0.0004546690624929322, 'samples': 5809664, 'steps': 11346, 'loss/train': 1.6992676258087158} +02/24/2022 16:15:30 - INFO - codeparrot_training - Step 11347: {'lr': 0.0004546596658235105, 'samples': 5810176, 'steps': 11347, 'loss/train': 2.159672737121582} +02/24/2022 16:15:36 - INFO - codeparrot_training - Step 11348: {'lr': 0.00045465026827739175, 'samples': 5810688, 'steps': 11348, 'loss/train': 1.746140956878662} +02/24/2022 16:15:39 - INFO - codeparrot_training - Step 11349: {'lr': 0.00045464086985461615, 'samples': 5811200, 'steps': 11349, 'loss/train': 1.85089910030365} +02/24/2022 16:15:44 - INFO - codeparrot_training - Step 11350: {'lr': 0.0004546314705552241, 'samples': 5811712, 'steps': 11350, 'loss/train': 2.0434978008270264} +02/24/2022 16:15:48 - INFO - codeparrot_training - Step 11351: {'lr': 0.00045462207037925593, 'samples': 5812224, 'steps': 11351, 'loss/train': 2.6612701416015625} +02/24/2022 16:15:54 - INFO - codeparrot_training - Step 11352: {'lr': 0.0004546126693267516, 'samples': 5812736, 'steps': 11352, 'loss/train': 0.9742980003356934} +02/24/2022 16:15:57 - INFO - codeparrot_training - Step 11353: {'lr': 0.0004546032673977517, 'samples': 5813248, 'steps': 11353, 'loss/train': 7.829508304595947} +02/24/2022 16:16:03 - INFO - codeparrot_training - Step 11354: {'lr': 0.0004545938645922963, 'samples': 5813760, 'steps': 11354, 'loss/train': 2.8821663856506348} +02/24/2022 16:16:06 - INFO - codeparrot_training - Step 11355: {'lr': 0.0004545844609104258, 'samples': 5814272, 'steps': 11355, 'loss/train': 1.7871776819229126} +02/24/2022 16:16:10 - INFO - codeparrot_training - Step 11356: {'lr': 0.0004545750563521804, 'samples': 5814784, 'steps': 11356, 'loss/train': 2.7154881954193115} +02/24/2022 16:16:16 - INFO - codeparrot_training - Step 11357: {'lr': 0.0004545656509176004, 'samples': 5815296, 'steps': 11357, 'loss/train': 2.296501636505127} +02/24/2022 16:16:23 - INFO - codeparrot_training - Step 11358: {'lr': 0.0004545562446067261, 'samples': 5815808, 'steps': 11358, 'loss/train': 0.6772823929786682} +02/24/2022 16:16:26 - INFO - codeparrot_training - Step 11359: {'lr': 0.00045454683741959787, 'samples': 5816320, 'steps': 11359, 'loss/train': 2.5950610637664795} +02/24/2022 16:16:32 - INFO - codeparrot_training - Step 11360: {'lr': 0.0004545374293562559, 'samples': 5816832, 'steps': 11360, 'loss/train': 1.4573802947998047} +02/24/2022 16:16:35 - INFO - codeparrot_training - Step 11361: {'lr': 0.00045452802041674045, 'samples': 5817344, 'steps': 11361, 'loss/train': 0.7797079682350159} +02/24/2022 16:16:41 - INFO - codeparrot_training - Step 11362: {'lr': 0.000454518610601092, 'samples': 5817856, 'steps': 11362, 'loss/train': 0.794467031955719} +02/24/2022 16:16:44 - INFO - codeparrot_training - Step 11363: {'lr': 0.0004545091999093508, 'samples': 5818368, 'steps': 11363, 'loss/train': 2.0298173427581787} +02/24/2022 16:16:50 - INFO - codeparrot_training - Step 11364: {'lr': 0.00045449978834155705, 'samples': 5818880, 'steps': 11364, 'loss/train': 1.0094642639160156} +02/24/2022 16:16:53 - INFO - codeparrot_training - Step 11365: {'lr': 0.00045449037589775123, 'samples': 5819392, 'steps': 11365, 'loss/train': 1.0958517789840698} +02/24/2022 16:16:59 - INFO - codeparrot_training - Step 11366: {'lr': 0.00045448096257797344, 'samples': 5819904, 'steps': 11366, 'loss/train': 2.5343425273895264} +02/24/2022 16:17:02 - INFO - codeparrot_training - Step 11367: {'lr': 0.0004544715483822642, 'samples': 5820416, 'steps': 11367, 'loss/train': 1.2284151315689087} +02/24/2022 16:17:09 - INFO - codeparrot_training - Step 11368: {'lr': 0.00045446213331066376, 'samples': 5820928, 'steps': 11368, 'loss/train': 1.1153687238693237} +02/24/2022 16:17:13 - INFO - codeparrot_training - Step 11369: {'lr': 0.0004544527173632125, 'samples': 5821440, 'steps': 11369, 'loss/train': 2.504488468170166} +02/24/2022 16:17:18 - INFO - codeparrot_training - Step 11370: {'lr': 0.00045444330053995074, 'samples': 5821952, 'steps': 11370, 'loss/train': 3.0068347454071045} +02/24/2022 16:17:22 - INFO - codeparrot_training - Step 11371: {'lr': 0.00045443388284091877, 'samples': 5822464, 'steps': 11371, 'loss/train': 1.8943393230438232} +02/24/2022 16:17:27 - INFO - codeparrot_training - Step 11372: {'lr': 0.0004544244642661569, 'samples': 5822976, 'steps': 11372, 'loss/train': 2.1895809173583984} +02/24/2022 16:17:31 - INFO - codeparrot_training - Step 11373: {'lr': 0.0004544150448157056, 'samples': 5823488, 'steps': 11373, 'loss/train': 1.8967657089233398} +02/24/2022 16:17:36 - INFO - codeparrot_training - Step 11374: {'lr': 0.0004544056244896052, 'samples': 5824000, 'steps': 11374, 'loss/train': 2.8636271953582764} +02/24/2022 16:17:40 - INFO - codeparrot_training - Step 11375: {'lr': 0.00045439620328789593, 'samples': 5824512, 'steps': 11375, 'loss/train': 1.1131142377853394} +02/24/2022 16:17:45 - INFO - codeparrot_training - Step 11376: {'lr': 0.00045438678121061826, 'samples': 5825024, 'steps': 11376, 'loss/train': 1.6215119361877441} +02/24/2022 16:17:49 - INFO - codeparrot_training - Step 11377: {'lr': 0.0004543773582578125, 'samples': 5825536, 'steps': 11377, 'loss/train': 1.6648873090744019} +02/24/2022 16:17:56 - INFO - codeparrot_training - Step 11378: {'lr': 0.00045436793442951907, 'samples': 5826048, 'steps': 11378, 'loss/train': 1.9659498929977417} +02/24/2022 16:18:00 - INFO - codeparrot_training - Step 11379: {'lr': 0.0004543585097257783, 'samples': 5826560, 'steps': 11379, 'loss/train': 2.6243858337402344} +02/24/2022 16:18:05 - INFO - codeparrot_training - Step 11380: {'lr': 0.0004543490841466306, 'samples': 5827072, 'steps': 11380, 'loss/train': 2.2534351348876953} +02/24/2022 16:18:09 - INFO - codeparrot_training - Step 11381: {'lr': 0.00045433965769211616, 'samples': 5827584, 'steps': 11381, 'loss/train': 3.113907814025879} +02/24/2022 16:18:14 - INFO - codeparrot_training - Step 11382: {'lr': 0.00045433023036227566, 'samples': 5828096, 'steps': 11382, 'loss/train': 2.194993257522583} +02/24/2022 16:18:18 - INFO - codeparrot_training - Step 11383: {'lr': 0.00045432080215714927, 'samples': 5828608, 'steps': 11383, 'loss/train': 2.3546202182769775} +02/24/2022 16:18:23 - INFO - codeparrot_training - Step 11384: {'lr': 0.00045431137307677753, 'samples': 5829120, 'steps': 11384, 'loss/train': 1.6404659748077393} +02/24/2022 16:18:27 - INFO - codeparrot_training - Step 11385: {'lr': 0.00045430194312120066, 'samples': 5829632, 'steps': 11385, 'loss/train': 2.023556709289551} +02/24/2022 16:18:32 - INFO - codeparrot_training - Step 11386: {'lr': 0.0004542925122904591, 'samples': 5830144, 'steps': 11386, 'loss/train': 2.4634549617767334} +02/24/2022 16:18:36 - INFO - codeparrot_training - Step 11387: {'lr': 0.00045428308058459335, 'samples': 5830656, 'steps': 11387, 'loss/train': 2.151942014694214} +02/24/2022 16:18:43 - INFO - codeparrot_training - Step 11388: {'lr': 0.00045427364800364374, 'samples': 5831168, 'steps': 11388, 'loss/train': 2.8610587120056152} +02/24/2022 16:18:46 - INFO - codeparrot_training - Step 11389: {'lr': 0.00045426421454765065, 'samples': 5831680, 'steps': 11389, 'loss/train': 0.571302592754364} +02/24/2022 16:18:52 - INFO - codeparrot_training - Step 11390: {'lr': 0.0004542547802166546, 'samples': 5832192, 'steps': 11390, 'loss/train': 1.377107858657837} +02/24/2022 16:18:55 - INFO - codeparrot_training - Step 11391: {'lr': 0.00045424534501069594, 'samples': 5832704, 'steps': 11391, 'loss/train': 2.3089232444763184} +02/24/2022 16:19:01 - INFO - codeparrot_training - Step 11392: {'lr': 0.00045423590892981503, 'samples': 5833216, 'steps': 11392, 'loss/train': 2.594899892807007} +02/24/2022 16:19:04 - INFO - codeparrot_training - Step 11393: {'lr': 0.0004542264719740523, 'samples': 5833728, 'steps': 11393, 'loss/train': 0.6809685826301575} +02/24/2022 16:19:10 - INFO - codeparrot_training - Step 11394: {'lr': 0.0004542170341434483, 'samples': 5834240, 'steps': 11394, 'loss/train': 1.1828583478927612} +02/24/2022 16:19:13 - INFO - codeparrot_training - Step 11395: {'lr': 0.00045420759543804326, 'samples': 5834752, 'steps': 11395, 'loss/train': 1.4844894409179688} +02/24/2022 16:19:19 - INFO - codeparrot_training - Step 11396: {'lr': 0.0004541981558578778, 'samples': 5835264, 'steps': 11396, 'loss/train': 2.7818479537963867} +02/24/2022 16:19:22 - INFO - codeparrot_training - Step 11397: {'lr': 0.0004541887154029922, 'samples': 5835776, 'steps': 11397, 'loss/train': 1.0714787244796753} +02/24/2022 16:19:28 - INFO - codeparrot_training - Step 11398: {'lr': 0.0004541792740734271, 'samples': 5836288, 'steps': 11398, 'loss/train': 1.311425805091858} +02/24/2022 16:19:32 - INFO - codeparrot_training - Step 11399: {'lr': 0.0004541698318692228, 'samples': 5836800, 'steps': 11399, 'loss/train': 2.1383049488067627} +02/24/2022 16:19:37 - INFO - codeparrot_training - Step 11400: {'lr': 0.0004541603887904198, 'samples': 5837312, 'steps': 11400, 'loss/train': 1.4966486692428589} +02/24/2022 16:19:41 - INFO - codeparrot_training - Step 11401: {'lr': 0.0004541509448370584, 'samples': 5837824, 'steps': 11401, 'loss/train': 1.64617121219635} +02/24/2022 16:19:46 - INFO - codeparrot_training - Step 11402: {'lr': 0.00045414150000917927, 'samples': 5838336, 'steps': 11402, 'loss/train': 2.2392823696136475} +02/24/2022 16:19:50 - INFO - codeparrot_training - Step 11403: {'lr': 0.0004541320543068227, 'samples': 5838848, 'steps': 11403, 'loss/train': 2.1169583797454834} +02/24/2022 16:19:55 - INFO - codeparrot_training - Step 11404: {'lr': 0.00045412260773002933, 'samples': 5839360, 'steps': 11404, 'loss/train': 2.5532174110412598} +02/24/2022 16:19:58 - INFO - codeparrot_training - Step 11405: {'lr': 0.0004541131602788395, 'samples': 5839872, 'steps': 11405, 'loss/train': 1.873200535774231} +02/24/2022 16:20:06 - INFO - codeparrot_training - Step 11406: {'lr': 0.00045410371195329365, 'samples': 5840384, 'steps': 11406, 'loss/train': 2.221608877182007} +02/24/2022 16:20:09 - INFO - codeparrot_training - Step 11407: {'lr': 0.00045409426275343234, 'samples': 5840896, 'steps': 11407, 'loss/train': 3.276916027069092} +02/24/2022 16:20:15 - INFO - codeparrot_training - Step 11408: {'lr': 0.00045408481267929604, 'samples': 5841408, 'steps': 11408, 'loss/train': 1.904630184173584} +02/24/2022 16:20:20 - INFO - codeparrot_training - Step 11409: {'lr': 0.0004540753617309251, 'samples': 5841920, 'steps': 11409, 'loss/train': 2.037550449371338} +02/24/2022 16:20:24 - INFO - codeparrot_training - Step 11410: {'lr': 0.0004540659099083602, 'samples': 5842432, 'steps': 11410, 'loss/train': 0.9711087942123413} +02/24/2022 16:20:29 - INFO - codeparrot_training - Step 11411: {'lr': 0.0004540564572116418, 'samples': 5842944, 'steps': 11411, 'loss/train': 1.0147775411605835} +02/24/2022 16:20:33 - INFO - codeparrot_training - Step 11412: {'lr': 0.0004540470036408102, 'samples': 5843456, 'steps': 11412, 'loss/train': 2.869175672531128} +02/24/2022 16:20:40 - INFO - codeparrot_training - Step 11413: {'lr': 0.0004540375491959061, 'samples': 5843968, 'steps': 11413, 'loss/train': 3.204055070877075} +02/24/2022 16:20:44 - INFO - codeparrot_training - Step 11414: {'lr': 0.00045402809387697, 'samples': 5844480, 'steps': 11414, 'loss/train': 2.127800941467285} +02/24/2022 16:20:49 - INFO - codeparrot_training - Step 11415: {'lr': 0.00045401863768404217, 'samples': 5844992, 'steps': 11415, 'loss/train': 1.7463570833206177} +02/24/2022 16:20:53 - INFO - codeparrot_training - Step 11416: {'lr': 0.0004540091806171634, 'samples': 5845504, 'steps': 11416, 'loss/train': 3.21770977973938} +02/24/2022 16:20:58 - INFO - codeparrot_training - Step 11417: {'lr': 0.000453999722676374, 'samples': 5846016, 'steps': 11417, 'loss/train': 2.4473228454589844} +02/24/2022 16:21:02 - INFO - codeparrot_training - Step 11418: {'lr': 0.0004539902638617146, 'samples': 5846528, 'steps': 11418, 'loss/train': 2.504650831222534} +02/24/2022 16:21:07 - INFO - codeparrot_training - Step 11419: {'lr': 0.0004539808041732257, 'samples': 5847040, 'steps': 11419, 'loss/train': 0.8776189684867859} +02/24/2022 16:21:11 - INFO - codeparrot_training - Step 11420: {'lr': 0.0004539713436109478, 'samples': 5847552, 'steps': 11420, 'loss/train': 2.253885269165039} +02/24/2022 16:21:16 - INFO - codeparrot_training - Step 11421: {'lr': 0.00045396188217492145, 'samples': 5848064, 'steps': 11421, 'loss/train': 2.1496660709381104} +02/24/2022 16:21:20 - INFO - codeparrot_training - Step 11422: {'lr': 0.00045395241986518714, 'samples': 5848576, 'steps': 11422, 'loss/train': 1.4651926755905151} +02/24/2022 16:21:27 - INFO - codeparrot_training - Step 11423: {'lr': 0.0004539429566817854, 'samples': 5849088, 'steps': 11423, 'loss/train': 2.836231231689453} +02/24/2022 16:21:30 - INFO - codeparrot_training - Step 11424: {'lr': 0.00045393349262475686, 'samples': 5849600, 'steps': 11424, 'loss/train': 1.9236829280853271} +02/24/2022 16:21:36 - INFO - codeparrot_training - Step 11425: {'lr': 0.000453924027694142, 'samples': 5850112, 'steps': 11425, 'loss/train': 1.5504837036132812} +02/24/2022 16:21:40 - INFO - codeparrot_training - Step 11426: {'lr': 0.00045391456188998124, 'samples': 5850624, 'steps': 11426, 'loss/train': 2.539696455001831} +02/24/2022 16:21:45 - INFO - codeparrot_training - Step 11427: {'lr': 0.00045390509521231535, 'samples': 5851136, 'steps': 11427, 'loss/train': 3.041769027709961} +02/24/2022 16:21:49 - INFO - codeparrot_training - Step 11428: {'lr': 0.00045389562766118475, 'samples': 5851648, 'steps': 11428, 'loss/train': 2.6716833114624023} +02/24/2022 16:21:54 - INFO - codeparrot_training - Step 11429: {'lr': 0.00045388615923663004, 'samples': 5852160, 'steps': 11429, 'loss/train': 1.4363834857940674} +02/24/2022 16:21:58 - INFO - codeparrot_training - Step 11430: {'lr': 0.0004538766899386917, 'samples': 5852672, 'steps': 11430, 'loss/train': 2.0188353061676025} +02/24/2022 16:22:03 - INFO - codeparrot_training - Step 11431: {'lr': 0.00045386721976741043, 'samples': 5853184, 'steps': 11431, 'loss/train': 2.653979778289795} +02/24/2022 16:22:07 - INFO - codeparrot_training - Step 11432: {'lr': 0.0004538577487228267, 'samples': 5853696, 'steps': 11432, 'loss/train': 2.52278470993042} +02/24/2022 16:22:12 - INFO - codeparrot_training - Step 11433: {'lr': 0.0004538482768049811, 'samples': 5854208, 'steps': 11433, 'loss/train': 1.777289628982544} +02/24/2022 16:22:16 - INFO - codeparrot_training - Step 11434: {'lr': 0.00045383880401391423, 'samples': 5854720, 'steps': 11434, 'loss/train': 2.0804903507232666} +02/24/2022 16:22:23 - INFO - codeparrot_training - Step 11435: {'lr': 0.00045382933034966667, 'samples': 5855232, 'steps': 11435, 'loss/train': 2.8244993686676025} +02/24/2022 16:22:26 - INFO - codeparrot_training - Step 11436: {'lr': 0.0004538198558122789, 'samples': 5855744, 'steps': 11436, 'loss/train': 2.139791250228882} +02/24/2022 16:22:32 - INFO - codeparrot_training - Step 11437: {'lr': 0.0004538103804017917, 'samples': 5856256, 'steps': 11437, 'loss/train': 1.7734744548797607} +02/24/2022 16:22:35 - INFO - codeparrot_training - Step 11438: {'lr': 0.00045380090411824547, 'samples': 5856768, 'steps': 11438, 'loss/train': 1.9485567808151245} +02/24/2022 16:22:41 - INFO - codeparrot_training - Step 11439: {'lr': 0.0004537914269616809, 'samples': 5857280, 'steps': 11439, 'loss/train': 2.719700574874878} +02/24/2022 16:22:44 - INFO - codeparrot_training - Step 11440: {'lr': 0.00045378194893213854, 'samples': 5857792, 'steps': 11440, 'loss/train': 1.1889444589614868} +02/24/2022 16:22:50 - INFO - codeparrot_training - Step 11441: {'lr': 0.00045377247002965904, 'samples': 5858304, 'steps': 11441, 'loss/train': 1.3447911739349365} +02/24/2022 16:22:53 - INFO - codeparrot_training - Step 11442: {'lr': 0.000453762990254283, 'samples': 5858816, 'steps': 11442, 'loss/train': 1.1948394775390625} +02/24/2022 16:22:59 - INFO - codeparrot_training - Step 11443: {'lr': 0.000453753509606051, 'samples': 5859328, 'steps': 11443, 'loss/train': 0.699555516242981} +02/24/2022 16:23:02 - INFO - codeparrot_training - Step 11444: {'lr': 0.0004537440280850037, 'samples': 5859840, 'steps': 11444, 'loss/train': 1.7442504167556763} +02/24/2022 16:23:10 - INFO - codeparrot_training - Step 11445: {'lr': 0.00045373454569118166, 'samples': 5860352, 'steps': 11445, 'loss/train': 3.653651475906372} +02/24/2022 16:23:13 - INFO - codeparrot_training - Step 11446: {'lr': 0.0004537250624246255, 'samples': 5860864, 'steps': 11446, 'loss/train': 1.8762763738632202} +02/24/2022 16:23:19 - INFO - codeparrot_training - Step 11447: {'lr': 0.00045371557828537585, 'samples': 5861376, 'steps': 11447, 'loss/train': 2.6544928550720215} +02/24/2022 16:23:22 - INFO - codeparrot_training - Step 11448: {'lr': 0.0004537060932734733, 'samples': 5861888, 'steps': 11448, 'loss/train': 2.730875015258789} +02/24/2022 16:23:28 - INFO - codeparrot_training - Step 11449: {'lr': 0.0004536966073889587, 'samples': 5862400, 'steps': 11449, 'loss/train': 2.141685724258423} +02/24/2022 16:23:31 - INFO - codeparrot_training - Step 11450: {'lr': 0.00045368712063187237, 'samples': 5862912, 'steps': 11450, 'loss/train': 1.514075756072998} +02/24/2022 16:23:37 - INFO - codeparrot_training - Step 11451: {'lr': 0.0004536776330022552, 'samples': 5863424, 'steps': 11451, 'loss/train': 2.724309206008911} +02/24/2022 16:23:40 - INFO - codeparrot_training - Step 11452: {'lr': 0.0004536681445001476, 'samples': 5863936, 'steps': 11452, 'loss/train': 1.3461116552352905} +02/24/2022 16:23:45 - INFO - codeparrot_training - Step 11453: {'lr': 0.0004536586551255904, 'samples': 5864448, 'steps': 11453, 'loss/train': 0.9570274353027344} +02/24/2022 16:23:49 - INFO - codeparrot_training - Step 11454: {'lr': 0.0004536491648786242, 'samples': 5864960, 'steps': 11454, 'loss/train': 2.104924201965332} +02/24/2022 16:23:55 - INFO - codeparrot_training - Step 11455: {'lr': 0.0004536396737592896, 'samples': 5865472, 'steps': 11455, 'loss/train': 1.296418309211731} +02/24/2022 16:23:58 - INFO - codeparrot_training - Step 11456: {'lr': 0.0004536301817676274, 'samples': 5865984, 'steps': 11456, 'loss/train': 1.55818510055542} +02/24/2022 16:24:04 - INFO - codeparrot_training - Step 11457: {'lr': 0.00045362068890367804, 'samples': 5866496, 'steps': 11457, 'loss/train': 0.9206820726394653} +02/24/2022 16:24:07 - INFO - codeparrot_training - Step 11458: {'lr': 0.0004536111951674824, 'samples': 5867008, 'steps': 11458, 'loss/train': 2.4314370155334473} +02/24/2022 16:24:12 - INFO - codeparrot_training - Step 11459: {'lr': 0.000453601700559081, 'samples': 5867520, 'steps': 11459, 'loss/train': 2.436211347579956} +02/24/2022 16:24:16 - INFO - codeparrot_training - Step 11460: {'lr': 0.00045359220507851456, 'samples': 5868032, 'steps': 11460, 'loss/train': 1.680085301399231} +02/24/2022 16:24:23 - INFO - codeparrot_training - Step 11461: {'lr': 0.0004535827087258238, 'samples': 5868544, 'steps': 11461, 'loss/train': 3.255981922149658} +02/24/2022 16:24:29 - INFO - codeparrot_training - Step 11462: {'lr': 0.00045357321150104934, 'samples': 5869056, 'steps': 11462, 'loss/train': 1.8159868717193604} +02/24/2022 16:24:32 - INFO - codeparrot_training - Step 11463: {'lr': 0.0004535637134042319, 'samples': 5869568, 'steps': 11463, 'loss/train': 1.8969529867172241} +02/24/2022 16:24:38 - INFO - codeparrot_training - Step 11464: {'lr': 0.00045355421443541214, 'samples': 5870080, 'steps': 11464, 'loss/train': 1.7035434246063232} +02/24/2022 16:24:41 - INFO - codeparrot_training - Step 11465: {'lr': 0.00045354471459463076, 'samples': 5870592, 'steps': 11465, 'loss/train': 2.244917869567871} +02/24/2022 16:24:47 - INFO - codeparrot_training - Step 11466: {'lr': 0.0004535352138819284, 'samples': 5871104, 'steps': 11466, 'loss/train': 1.6521811485290527} +02/24/2022 16:24:50 - INFO - codeparrot_training - Step 11467: {'lr': 0.0004535257122973459, 'samples': 5871616, 'steps': 11467, 'loss/train': 1.382369875907898} +02/24/2022 16:24:56 - INFO - codeparrot_training - Step 11468: {'lr': 0.0004535162098409238, 'samples': 5872128, 'steps': 11468, 'loss/train': 1.3611055612564087} +02/24/2022 16:24:59 - INFO - codeparrot_training - Step 11469: {'lr': 0.000453506706512703, 'samples': 5872640, 'steps': 11469, 'loss/train': 2.5269525051116943} +02/24/2022 16:25:07 - INFO - codeparrot_training - Step 11470: {'lr': 0.00045349720231272395, 'samples': 5873152, 'steps': 11470, 'loss/train': 1.9398821592330933} +02/24/2022 16:25:10 - INFO - codeparrot_training - Step 11471: {'lr': 0.0004534876972410276, 'samples': 5873664, 'steps': 11471, 'loss/train': 2.157519817352295} +02/24/2022 16:25:16 - INFO - codeparrot_training - Step 11472: {'lr': 0.0004534781912976545, 'samples': 5874176, 'steps': 11472, 'loss/train': 2.940617084503174} +02/24/2022 16:25:19 - INFO - codeparrot_training - Step 11473: {'lr': 0.00045346868448264553, 'samples': 5874688, 'steps': 11473, 'loss/train': 2.435476779937744} +02/24/2022 16:25:25 - INFO - codeparrot_training - Step 11474: {'lr': 0.00045345917679604126, 'samples': 5875200, 'steps': 11474, 'loss/train': 2.280003309249878} +02/24/2022 16:25:28 - INFO - codeparrot_training - Step 11475: {'lr': 0.0004534496682378825, 'samples': 5875712, 'steps': 11475, 'loss/train': 1.8867299556732178} +02/24/2022 16:25:34 - INFO - codeparrot_training - Step 11476: {'lr': 0.00045344015880821, 'samples': 5876224, 'steps': 11476, 'loss/train': 4.021996974945068} +02/24/2022 16:25:37 - INFO - codeparrot_training - Step 11477: {'lr': 0.0004534306485070644, 'samples': 5876736, 'steps': 11477, 'loss/train': 2.0361852645874023} +02/24/2022 16:25:43 - INFO - codeparrot_training - Step 11478: {'lr': 0.0004534211373344864, 'samples': 5877248, 'steps': 11478, 'loss/train': 2.842474937438965} +02/24/2022 16:25:46 - INFO - codeparrot_training - Step 11479: {'lr': 0.00045341162529051704, 'samples': 5877760, 'steps': 11479, 'loss/train': 0.6766850352287292} +02/24/2022 16:25:53 - INFO - codeparrot_training - Step 11480: {'lr': 0.0004534021123751968, 'samples': 5878272, 'steps': 11480, 'loss/train': 1.167156457901001} +02/24/2022 16:25:57 - INFO - codeparrot_training - Step 11481: {'lr': 0.0004533925985885664, 'samples': 5878784, 'steps': 11481, 'loss/train': 3.0065672397613525} +02/24/2022 16:26:02 - INFO - codeparrot_training - Step 11482: {'lr': 0.00045338308393066685, 'samples': 5879296, 'steps': 11482, 'loss/train': 1.7414146661758423} +02/24/2022 16:26:06 - INFO - codeparrot_training - Step 11483: {'lr': 0.00045337356840153864, 'samples': 5879808, 'steps': 11483, 'loss/train': 1.34998619556427} +02/24/2022 16:26:11 - INFO - codeparrot_training - Step 11484: {'lr': 0.00045336405200122266, 'samples': 5880320, 'steps': 11484, 'loss/train': 0.5113691687583923} +02/24/2022 16:26:15 - INFO - codeparrot_training - Step 11485: {'lr': 0.0004533545347297597, 'samples': 5880832, 'steps': 11485, 'loss/train': 1.5709724426269531} +02/24/2022 16:26:20 - INFO - codeparrot_training - Step 11486: {'lr': 0.0004533450165871904, 'samples': 5881344, 'steps': 11486, 'loss/train': 3.4390742778778076} +02/24/2022 16:26:24 - INFO - codeparrot_training - Step 11487: {'lr': 0.00045333549757355573, 'samples': 5881856, 'steps': 11487, 'loss/train': 1.904098391532898} +02/24/2022 16:26:30 - INFO - codeparrot_training - Step 11488: {'lr': 0.0004533259776888963, 'samples': 5882368, 'steps': 11488, 'loss/train': 0.5446158647537231} +02/24/2022 16:26:33 - INFO - codeparrot_training - Step 11489: {'lr': 0.00045331645693325295, 'samples': 5882880, 'steps': 11489, 'loss/train': 3.0803136825561523} +02/24/2022 16:26:41 - INFO - codeparrot_training - Step 11490: {'lr': 0.0004533069353066664, 'samples': 5883392, 'steps': 11490, 'loss/train': 1.295883059501648} +02/24/2022 16:26:44 - INFO - codeparrot_training - Step 11491: {'lr': 0.0004532974128091776, 'samples': 5883904, 'steps': 11491, 'loss/train': 1.9756821393966675} +02/24/2022 16:26:49 - INFO - codeparrot_training - Step 11492: {'lr': 0.00045328788944082717, 'samples': 5884416, 'steps': 11492, 'loss/train': 1.6426712274551392} +02/24/2022 16:26:53 - INFO - codeparrot_training - Step 11493: {'lr': 0.000453278365201656, 'samples': 5884928, 'steps': 11493, 'loss/train': 0.24516497552394867} +02/24/2022 16:26:58 - INFO - codeparrot_training - Step 11494: {'lr': 0.00045326884009170486, 'samples': 5885440, 'steps': 11494, 'loss/train': 2.5774052143096924} +02/24/2022 16:27:02 - INFO - codeparrot_training - Step 11495: {'lr': 0.0004532593141110145, 'samples': 5885952, 'steps': 11495, 'loss/train': 1.4146664142608643} +02/24/2022 16:27:07 - INFO - codeparrot_training - Step 11496: {'lr': 0.00045324978725962584, 'samples': 5886464, 'steps': 11496, 'loss/train': 2.5068442821502686} +02/24/2022 16:27:11 - INFO - codeparrot_training - Step 11497: {'lr': 0.0004532402595375796, 'samples': 5886976, 'steps': 11497, 'loss/train': 2.1259090900421143} +02/24/2022 16:27:16 - INFO - codeparrot_training - Step 11498: {'lr': 0.0004532307309449167, 'samples': 5887488, 'steps': 11498, 'loss/train': 2.485146999359131} +02/24/2022 16:27:20 - INFO - codeparrot_training - Step 11499: {'lr': 0.00045322120148167777, 'samples': 5888000, 'steps': 11499, 'loss/train': 2.150620222091675} +02/24/2022 16:27:25 - INFO - codeparrot_training - Step 11500: {'lr': 0.0004532116711479038, 'samples': 5888512, 'steps': 11500, 'loss/train': 1.8521806001663208} +02/24/2022 16:27:29 - INFO - codeparrot_training - Step 11501: {'lr': 0.00045320213994363555, 'samples': 5889024, 'steps': 11501, 'loss/train': 2.786912679672241} +02/24/2022 16:27:34 - INFO - codeparrot_training - Step 11502: {'lr': 0.00045319260786891394, 'samples': 5889536, 'steps': 11502, 'loss/train': 0.6149995923042297} +02/24/2022 16:27:38 - INFO - codeparrot_training - Step 11503: {'lr': 0.0004531830749237796, 'samples': 5890048, 'steps': 11503, 'loss/train': 0.7495447993278503} +02/24/2022 16:27:44 - INFO - codeparrot_training - Step 11504: {'lr': 0.00045317354110827344, 'samples': 5890560, 'steps': 11504, 'loss/train': 1.61348557472229} +02/24/2022 16:27:47 - INFO - codeparrot_training - Step 11505: {'lr': 0.0004531640064224365, 'samples': 5891072, 'steps': 11505, 'loss/train': 1.9044476747512817} +02/24/2022 16:27:54 - INFO - codeparrot_training - Step 11506: {'lr': 0.00045315447086630937, 'samples': 5891584, 'steps': 11506, 'loss/train': 2.6038100719451904} +02/24/2022 16:27:58 - INFO - codeparrot_training - Step 11507: {'lr': 0.000453144934439933, 'samples': 5892096, 'steps': 11507, 'loss/train': 2.211012601852417} +02/24/2022 16:28:03 - INFO - codeparrot_training - Step 11508: {'lr': 0.0004531353971433483, 'samples': 5892608, 'steps': 11508, 'loss/train': 3.507301092147827} +02/24/2022 16:28:07 - INFO - codeparrot_training - Step 11509: {'lr': 0.000453125858976596, 'samples': 5893120, 'steps': 11509, 'loss/train': 1.801110029220581} +02/24/2022 16:28:13 - INFO - codeparrot_training - Step 11510: {'lr': 0.000453116319939717, 'samples': 5893632, 'steps': 11510, 'loss/train': 2.172649383544922} +02/24/2022 16:28:16 - INFO - codeparrot_training - Step 11511: {'lr': 0.0004531067800327523, 'samples': 5894144, 'steps': 11511, 'loss/train': 2.3457870483398438} +02/24/2022 16:28:22 - INFO - codeparrot_training - Step 11512: {'lr': 0.0004530972392557425, 'samples': 5894656, 'steps': 11512, 'loss/train': 1.9584993124008179} +02/24/2022 16:28:25 - INFO - codeparrot_training - Step 11513: {'lr': 0.0004530876976087288, 'samples': 5895168, 'steps': 11513, 'loss/train': 2.7640602588653564} +02/24/2022 16:28:31 - INFO - codeparrot_training - Step 11514: {'lr': 0.00045307815509175177, 'samples': 5895680, 'steps': 11514, 'loss/train': 3.1797080039978027} +02/24/2022 16:28:34 - INFO - codeparrot_training - Step 11515: {'lr': 0.00045306861170485235, 'samples': 5896192, 'steps': 11515, 'loss/train': 1.0782145261764526} +02/24/2022 16:28:41 - INFO - codeparrot_training - Step 11516: {'lr': 0.00045305906744807156, 'samples': 5896704, 'steps': 11516, 'loss/train': 1.984292984008789} +02/24/2022 16:28:45 - INFO - codeparrot_training - Step 11517: {'lr': 0.0004530495223214502, 'samples': 5897216, 'steps': 11517, 'loss/train': 2.7023088932037354} +02/24/2022 16:28:51 - INFO - codeparrot_training - Step 11518: {'lr': 0.00045303997632502915, 'samples': 5897728, 'steps': 11518, 'loss/train': 2.081519603729248} +02/24/2022 16:28:54 - INFO - codeparrot_training - Step 11519: {'lr': 0.00045303042945884933, 'samples': 5898240, 'steps': 11519, 'loss/train': 9.124129295349121} +02/24/2022 16:29:00 - INFO - codeparrot_training - Step 11520: {'lr': 0.0004530208817229516, 'samples': 5898752, 'steps': 11520, 'loss/train': 3.5649006366729736} +02/24/2022 16:29:03 - INFO - codeparrot_training - Step 11521: {'lr': 0.00045301133311737685, 'samples': 5899264, 'steps': 11521, 'loss/train': 1.4435570240020752} +02/24/2022 16:29:09 - INFO - codeparrot_training - Step 11522: {'lr': 0.00045300178364216605, 'samples': 5899776, 'steps': 11522, 'loss/train': 2.2180709838867188} +02/24/2022 16:29:12 - INFO - codeparrot_training - Step 11523: {'lr': 0.00045299223329736004, 'samples': 5900288, 'steps': 11523, 'loss/train': 2.1526942253112793} +02/24/2022 16:29:18 - INFO - codeparrot_training - Step 11524: {'lr': 0.00045298268208299983, 'samples': 5900800, 'steps': 11524, 'loss/train': 3.007046937942505} +02/24/2022 16:29:21 - INFO - codeparrot_training - Step 11525: {'lr': 0.0004529731299991262, 'samples': 5901312, 'steps': 11525, 'loss/train': 2.2633237838745117} +02/24/2022 16:29:28 - INFO - codeparrot_training - Step 11526: {'lr': 0.00045296357704578016, 'samples': 5901824, 'steps': 11526, 'loss/train': 0.4225294291973114} +02/24/2022 16:29:32 - INFO - codeparrot_training - Step 11527: {'lr': 0.0004529540232230026, 'samples': 5902336, 'steps': 11527, 'loss/train': 2.5396106243133545} +02/24/2022 16:29:37 - INFO - codeparrot_training - Step 11528: {'lr': 0.00045294446853083446, 'samples': 5902848, 'steps': 11528, 'loss/train': 1.1456432342529297} +02/24/2022 16:29:41 - INFO - codeparrot_training - Step 11529: {'lr': 0.0004529349129693166, 'samples': 5903360, 'steps': 11529, 'loss/train': 2.259439468383789} +02/24/2022 16:29:46 - INFO - codeparrot_training - Step 11530: {'lr': 0.0004529253565384901, 'samples': 5903872, 'steps': 11530, 'loss/train': 2.5124430656433105} +02/24/2022 16:29:50 - INFO - codeparrot_training - Step 11531: {'lr': 0.00045291579923839576, 'samples': 5904384, 'steps': 11531, 'loss/train': 2.6305928230285645} +02/24/2022 16:29:55 - INFO - codeparrot_training - Step 11532: {'lr': 0.0004529062410690745, 'samples': 5904896, 'steps': 11532, 'loss/train': 1.2240854501724243} +02/24/2022 16:29:59 - INFO - codeparrot_training - Step 11533: {'lr': 0.00045289668203056743, 'samples': 5905408, 'steps': 11533, 'loss/train': 2.1779584884643555} +02/24/2022 16:30:04 - INFO - codeparrot_training - Step 11534: {'lr': 0.00045288712212291537, 'samples': 5905920, 'steps': 11534, 'loss/train': 1.7150356769561768} +02/24/2022 16:30:08 - INFO - codeparrot_training - Step 11535: {'lr': 0.0004528775613461593, 'samples': 5906432, 'steps': 11535, 'loss/train': 1.0887821912765503} +02/24/2022 16:30:13 - INFO - codeparrot_training - Step 11536: {'lr': 0.0004528679997003403, 'samples': 5906944, 'steps': 11536, 'loss/train': 2.1326656341552734} +02/24/2022 16:30:17 - INFO - codeparrot_training - Step 11537: {'lr': 0.000452858437185499, 'samples': 5907456, 'steps': 11537, 'loss/train': 1.573224663734436} +02/24/2022 16:30:22 - INFO - codeparrot_training - Step 11538: {'lr': 0.00045284887380167674, 'samples': 5907968, 'steps': 11538, 'loss/train': 2.3158607482910156} +02/24/2022 16:30:26 - INFO - codeparrot_training - Step 11539: {'lr': 0.0004528393095489142, 'samples': 5908480, 'steps': 11539, 'loss/train': 2.030322313308716} +02/24/2022 16:30:32 - INFO - codeparrot_training - Step 11540: {'lr': 0.0004528297444272525, 'samples': 5908992, 'steps': 11540, 'loss/train': 2.8145506381988525} +02/24/2022 16:30:35 - INFO - codeparrot_training - Step 11541: {'lr': 0.0004528201784367326, 'samples': 5909504, 'steps': 11541, 'loss/train': 2.621253252029419} +02/24/2022 16:30:42 - INFO - codeparrot_training - Step 11542: {'lr': 0.00045281061157739544, 'samples': 5910016, 'steps': 11542, 'loss/train': 2.1283960342407227} +02/24/2022 16:30:46 - INFO - codeparrot_training - Step 11543: {'lr': 0.000452801043849282, 'samples': 5910528, 'steps': 11543, 'loss/train': 2.3949320316314697} +02/24/2022 16:30:51 - INFO - codeparrot_training - Step 11544: {'lr': 0.00045279147525243335, 'samples': 5911040, 'steps': 11544, 'loss/train': 2.003279209136963} +02/24/2022 16:30:55 - INFO - codeparrot_training - Step 11545: {'lr': 0.0004527819057868904, 'samples': 5911552, 'steps': 11545, 'loss/train': 2.2588186264038086} +02/24/2022 16:31:00 - INFO - codeparrot_training - Step 11546: {'lr': 0.00045277233545269415, 'samples': 5912064, 'steps': 11546, 'loss/train': 1.4942573308944702} +02/24/2022 16:31:04 - INFO - codeparrot_training - Step 11547: {'lr': 0.00045276276424988554, 'samples': 5912576, 'steps': 11547, 'loss/train': 2.608991861343384} +02/24/2022 16:31:10 - INFO - codeparrot_training - Step 11548: {'lr': 0.0004527531921785057, 'samples': 5913088, 'steps': 11548, 'loss/train': 2.479332208633423} +02/24/2022 16:31:13 - INFO - codeparrot_training - Step 11549: {'lr': 0.00045274361923859554, 'samples': 5913600, 'steps': 11549, 'loss/train': 3.089203357696533} +02/24/2022 16:31:19 - INFO - codeparrot_training - Step 11550: {'lr': 0.0004527340454301961, 'samples': 5914112, 'steps': 11550, 'loss/train': 2.0475707054138184} +02/24/2022 16:31:22 - INFO - codeparrot_training - Step 11551: {'lr': 0.0004527244707533483, 'samples': 5914624, 'steps': 11551, 'loss/train': 2.8862383365631104} +02/24/2022 16:31:29 - INFO - codeparrot_training - Step 11552: {'lr': 0.00045271489520809337, 'samples': 5915136, 'steps': 11552, 'loss/train': 3.0720741748809814} +02/24/2022 16:31:33 - INFO - codeparrot_training - Step 11553: {'lr': 0.0004527053187944722, 'samples': 5915648, 'steps': 11553, 'loss/train': 2.62570858001709} +02/24/2022 16:31:38 - INFO - codeparrot_training - Step 11554: {'lr': 0.00045269574151252567, 'samples': 5916160, 'steps': 11554, 'loss/train': 2.2534070014953613} +02/24/2022 16:31:42 - INFO - codeparrot_training - Step 11555: {'lr': 0.00045268616336229504, 'samples': 5916672, 'steps': 11555, 'loss/train': 2.829556941986084} +02/24/2022 16:31:47 - INFO - codeparrot_training - Step 11556: {'lr': 0.0004526765843438213, 'samples': 5917184, 'steps': 11556, 'loss/train': 3.0566415786743164} +02/24/2022 16:31:51 - INFO - codeparrot_training - Step 11557: {'lr': 0.0004526670044571454, 'samples': 5917696, 'steps': 11557, 'loss/train': 1.6900790929794312} +02/24/2022 16:31:56 - INFO - codeparrot_training - Step 11558: {'lr': 0.00045265742370230835, 'samples': 5918208, 'steps': 11558, 'loss/train': 2.8533871173858643} +02/24/2022 16:32:00 - INFO - codeparrot_training - Step 11559: {'lr': 0.00045264784207935127, 'samples': 5918720, 'steps': 11559, 'loss/train': 2.1948347091674805} +02/24/2022 16:32:05 - INFO - codeparrot_training - Step 11560: {'lr': 0.0004526382595883152, 'samples': 5919232, 'steps': 11560, 'loss/train': 2.42537522315979} +02/24/2022 16:32:09 - INFO - codeparrot_training - Step 11561: {'lr': 0.0004526286762292411, 'samples': 5919744, 'steps': 11561, 'loss/train': 2.9881153106689453} +02/24/2022 16:32:16 - INFO - codeparrot_training - Step 11562: {'lr': 0.00045261909200217023, 'samples': 5920256, 'steps': 11562, 'loss/train': 2.443436622619629} +02/24/2022 16:32:20 - INFO - codeparrot_training - Step 11563: {'lr': 0.0004526095069071434, 'samples': 5920768, 'steps': 11563, 'loss/train': 0.5942236185073853} +02/24/2022 16:32:25 - INFO - codeparrot_training - Step 11564: {'lr': 0.0004525999209442018, 'samples': 5921280, 'steps': 11564, 'loss/train': 9.764789581298828} +02/24/2022 16:32:29 - INFO - codeparrot_training - Step 11565: {'lr': 0.0004525903341133865, 'samples': 5921792, 'steps': 11565, 'loss/train': 1.7377638816833496} +02/24/2022 16:32:34 - INFO - codeparrot_training - Step 11566: {'lr': 0.0004525807464147385, 'samples': 5922304, 'steps': 11566, 'loss/train': 2.552253484725952} +02/24/2022 16:32:38 - INFO - codeparrot_training - Step 11567: {'lr': 0.00045257115784829897, 'samples': 5922816, 'steps': 11567, 'loss/train': 1.2742775678634644} +02/24/2022 16:32:43 - INFO - codeparrot_training - Step 11568: {'lr': 0.00045256156841410884, 'samples': 5923328, 'steps': 11568, 'loss/train': 2.5633201599121094} +02/24/2022 16:32:47 - INFO - codeparrot_training - Step 11569: {'lr': 0.0004525519781122093, 'samples': 5923840, 'steps': 11569, 'loss/train': 2.5515196323394775} +02/24/2022 16:32:52 - INFO - codeparrot_training - Step 11570: {'lr': 0.00045254238694264145, 'samples': 5924352, 'steps': 11570, 'loss/train': 0.513909637928009} +02/24/2022 16:32:56 - INFO - codeparrot_training - Step 11571: {'lr': 0.00045253279490544627, 'samples': 5924864, 'steps': 11571, 'loss/train': 2.1761553287506104} +02/24/2022 16:33:03 - INFO - codeparrot_training - Step 11572: {'lr': 0.0004525232020006649, 'samples': 5925376, 'steps': 11572, 'loss/train': 2.4223105907440186} +02/24/2022 16:33:07 - INFO - codeparrot_training - Step 11573: {'lr': 0.00045251360822833855, 'samples': 5925888, 'steps': 11573, 'loss/train': 2.2352139949798584} +02/24/2022 16:33:12 - INFO - codeparrot_training - Step 11574: {'lr': 0.00045250401358850814, 'samples': 5926400, 'steps': 11574, 'loss/train': 2.3021950721740723} +02/24/2022 16:33:16 - INFO - codeparrot_training - Step 11575: {'lr': 0.00045249441808121484, 'samples': 5926912, 'steps': 11575, 'loss/train': 2.0894877910614014} +02/24/2022 16:33:21 - INFO - codeparrot_training - Step 11576: {'lr': 0.0004524848217064997, 'samples': 5927424, 'steps': 11576, 'loss/train': 1.4455124139785767} +02/24/2022 16:33:27 - INFO - codeparrot_training - Step 11577: {'lr': 0.0004524752244644039, 'samples': 5927936, 'steps': 11577, 'loss/train': 2.033019542694092} +02/24/2022 16:33:30 - INFO - codeparrot_training - Step 11578: {'lr': 0.0004524656263549686, 'samples': 5928448, 'steps': 11578, 'loss/train': 2.201353073120117} +02/24/2022 16:33:36 - INFO - codeparrot_training - Step 11579: {'lr': 0.0004524560273782348, 'samples': 5928960, 'steps': 11579, 'loss/train': 1.791818618774414} +02/24/2022 16:33:39 - INFO - codeparrot_training - Step 11580: {'lr': 0.00045244642753424364, 'samples': 5929472, 'steps': 11580, 'loss/train': 1.9452608823776245} +02/24/2022 16:33:47 - INFO - codeparrot_training - Step 11581: {'lr': 0.0004524368268230363, 'samples': 5929984, 'steps': 11581, 'loss/train': 2.6028406620025635} +02/24/2022 16:33:50 - INFO - codeparrot_training - Step 11582: {'lr': 0.00045242722524465386, 'samples': 5930496, 'steps': 11582, 'loss/train': 8.990501403808594} +02/24/2022 16:33:56 - INFO - codeparrot_training - Step 11583: {'lr': 0.00045241762279913745, 'samples': 5931008, 'steps': 11583, 'loss/train': 1.931323766708374} +02/24/2022 16:33:59 - INFO - codeparrot_training - Step 11584: {'lr': 0.0004524080194865283, 'samples': 5931520, 'steps': 11584, 'loss/train': 3.437455177307129} +02/24/2022 16:34:05 - INFO - codeparrot_training - Step 11585: {'lr': 0.00045239841530686736, 'samples': 5932032, 'steps': 11585, 'loss/train': 2.2022390365600586} +02/24/2022 16:34:08 - INFO - codeparrot_training - Step 11586: {'lr': 0.000452388810260196, 'samples': 5932544, 'steps': 11586, 'loss/train': 4.043230056762695} +02/24/2022 16:34:14 - INFO - codeparrot_training - Step 11587: {'lr': 0.0004523792043465551, 'samples': 5933056, 'steps': 11587, 'loss/train': 2.509674310684204} +02/24/2022 16:34:17 - INFO - codeparrot_training - Step 11588: {'lr': 0.00045236959756598605, 'samples': 5933568, 'steps': 11588, 'loss/train': 2.6086578369140625} +02/24/2022 16:34:23 - INFO - codeparrot_training - Step 11589: {'lr': 0.0004523599899185299, 'samples': 5934080, 'steps': 11589, 'loss/train': 2.2144455909729004} +02/24/2022 16:34:26 - INFO - codeparrot_training - Step 11590: {'lr': 0.0004523503814042277, 'samples': 5934592, 'steps': 11590, 'loss/train': 2.5732262134552} +02/24/2022 16:34:32 - INFO - codeparrot_training - Step 11591: {'lr': 0.00045234077202312086, 'samples': 5935104, 'steps': 11591, 'loss/train': 1.3404759168624878} +02/24/2022 16:34:35 - INFO - codeparrot_training - Step 11592: {'lr': 0.00045233116177525036, 'samples': 5935616, 'steps': 11592, 'loss/train': 1.8101704120635986} +02/24/2022 16:34:41 - INFO - codeparrot_training - Step 11593: {'lr': 0.00045232155066065737, 'samples': 5936128, 'steps': 11593, 'loss/train': 4.502933025360107} +02/24/2022 16:34:44 - INFO - codeparrot_training - Step 11594: {'lr': 0.00045231193867938314, 'samples': 5936640, 'steps': 11594, 'loss/train': 2.710329294204712} +02/24/2022 16:34:50 - INFO - codeparrot_training - Step 11595: {'lr': 0.0004523023258314688, 'samples': 5937152, 'steps': 11595, 'loss/train': 3.074796199798584} +02/24/2022 16:34:53 - INFO - codeparrot_training - Step 11596: {'lr': 0.00045229271211695554, 'samples': 5937664, 'steps': 11596, 'loss/train': 3.2067344188690186} +02/24/2022 16:35:01 - INFO - codeparrot_training - Step 11597: {'lr': 0.00045228309753588447, 'samples': 5938176, 'steps': 11597, 'loss/train': 2.750671625137329} +02/24/2022 16:35:04 - INFO - codeparrot_training - Step 11598: {'lr': 0.0004522734820882969, 'samples': 5938688, 'steps': 11598, 'loss/train': 1.4830482006072998} +02/24/2022 16:35:10 - INFO - codeparrot_training - Step 11599: {'lr': 0.00045226386577423394, 'samples': 5939200, 'steps': 11599, 'loss/train': 0.29062801599502563} +02/24/2022 16:35:13 - INFO - codeparrot_training - Step 11600: {'lr': 0.0004522542485937369, 'samples': 5939712, 'steps': 11600, 'loss/train': 2.089154005050659} +02/24/2022 16:35:19 - INFO - codeparrot_training - Step 11601: {'lr': 0.0004522446305468468, 'samples': 5940224, 'steps': 11601, 'loss/train': 2.1879985332489014} +02/24/2022 16:35:22 - INFO - codeparrot_training - Step 11602: {'lr': 0.00045223501163360494, 'samples': 5940736, 'steps': 11602, 'loss/train': 1.8698228597640991} +02/24/2022 16:35:28 - INFO - codeparrot_training - Step 11603: {'lr': 0.0004522253918540524, 'samples': 5941248, 'steps': 11603, 'loss/train': 2.562459945678711} +02/24/2022 16:35:31 - INFO - codeparrot_training - Step 11604: {'lr': 0.00045221577120823064, 'samples': 5941760, 'steps': 11604, 'loss/train': 0.799372673034668} +02/24/2022 16:35:37 - INFO - codeparrot_training - Step 11605: {'lr': 0.00045220614969618066, 'samples': 5942272, 'steps': 11605, 'loss/train': 1.231338620185852} +02/24/2022 16:35:40 - INFO - codeparrot_training - Step 11606: {'lr': 0.0004521965273179438, 'samples': 5942784, 'steps': 11606, 'loss/train': 1.9436644315719604} +02/24/2022 16:35:50 - INFO - codeparrot_training - Step 11607: {'lr': 0.00045218690407356117, 'samples': 5943296, 'steps': 11607, 'loss/train': 2.1516036987304688} +02/24/2022 16:35:54 - INFO - codeparrot_training - Step 11608: {'lr': 0.00045217727996307405, 'samples': 5943808, 'steps': 11608, 'loss/train': 1.9412357807159424} +02/24/2022 16:35:59 - INFO - codeparrot_training - Step 11609: {'lr': 0.0004521676549865237, 'samples': 5944320, 'steps': 11609, 'loss/train': 1.489535927772522} +02/24/2022 16:36:02 - INFO - codeparrot_training - Step 11610: {'lr': 0.0004521580291439513, 'samples': 5944832, 'steps': 11610, 'loss/train': 2.8663041591644287} +02/24/2022 16:36:08 - INFO - codeparrot_training - Step 11611: {'lr': 0.00045214840243539803, 'samples': 5945344, 'steps': 11611, 'loss/train': 2.324397563934326} +02/24/2022 16:36:11 - INFO - codeparrot_training - Step 11612: {'lr': 0.00045213877486090524, 'samples': 5945856, 'steps': 11612, 'loss/train': 2.4788997173309326} +02/24/2022 16:36:17 - INFO - codeparrot_training - Step 11613: {'lr': 0.0004521291464205141, 'samples': 5946368, 'steps': 11613, 'loss/train': 1.1995437145233154} +02/24/2022 16:36:20 - INFO - codeparrot_training - Step 11614: {'lr': 0.0004521195171142659, 'samples': 5946880, 'steps': 11614, 'loss/train': 2.288405656814575} +02/24/2022 16:36:26 - INFO - codeparrot_training - Step 11615: {'lr': 0.0004521098869422019, 'samples': 5947392, 'steps': 11615, 'loss/train': 2.9078524112701416} +02/24/2022 16:36:29 - INFO - codeparrot_training - Step 11616: {'lr': 0.00045210025590436333, 'samples': 5947904, 'steps': 11616, 'loss/train': 2.2069733142852783} +02/24/2022 16:36:37 - INFO - codeparrot_training - Step 11617: {'lr': 0.00045209062400079135, 'samples': 5948416, 'steps': 11617, 'loss/train': 1.4807133674621582} +02/24/2022 16:36:40 - INFO - codeparrot_training - Step 11618: {'lr': 0.00045208099123152735, 'samples': 5948928, 'steps': 11618, 'loss/train': 2.1155927181243896} +02/24/2022 16:36:46 - INFO - codeparrot_training - Step 11619: {'lr': 0.00045207135759661255, 'samples': 5949440, 'steps': 11619, 'loss/train': 2.448025941848755} +02/24/2022 16:36:49 - INFO - codeparrot_training - Step 11620: {'lr': 0.0004520617230960883, 'samples': 5949952, 'steps': 11620, 'loss/train': 2.3883309364318848} +02/24/2022 16:36:55 - INFO - codeparrot_training - Step 11621: {'lr': 0.0004520520877299957, 'samples': 5950464, 'steps': 11621, 'loss/train': 2.565316915512085} +02/24/2022 16:36:58 - INFO - codeparrot_training - Step 11622: {'lr': 0.00045204245149837606, 'samples': 5950976, 'steps': 11622, 'loss/train': 3.1621944904327393} +02/24/2022 16:37:04 - INFO - codeparrot_training - Step 11623: {'lr': 0.00045203281440127087, 'samples': 5951488, 'steps': 11623, 'loss/train': 0.9470974206924438} +02/24/2022 16:37:07 - INFO - codeparrot_training - Step 11624: {'lr': 0.00045202317643872113, 'samples': 5952000, 'steps': 11624, 'loss/train': 2.404804229736328} +02/24/2022 16:37:13 - INFO - codeparrot_training - Step 11625: {'lr': 0.0004520135376107683, 'samples': 5952512, 'steps': 11625, 'loss/train': 2.227313995361328} +02/24/2022 16:37:16 - INFO - codeparrot_training - Step 11626: {'lr': 0.00045200389791745364, 'samples': 5953024, 'steps': 11626, 'loss/train': 2.097395181655884} +02/24/2022 16:37:22 - INFO - codeparrot_training - Step 11627: {'lr': 0.0004519942573588184, 'samples': 5953536, 'steps': 11627, 'loss/train': 2.2431325912475586} +02/24/2022 16:37:26 - INFO - codeparrot_training - Step 11628: {'lr': 0.00045198461593490394, 'samples': 5954048, 'steps': 11628, 'loss/train': 6.506643772125244} +02/24/2022 16:37:31 - INFO - codeparrot_training - Step 11629: {'lr': 0.0004519749736457515, 'samples': 5954560, 'steps': 11629, 'loss/train': 1.7646405696868896} +02/24/2022 16:37:35 - INFO - codeparrot_training - Step 11630: {'lr': 0.00045196533049140234, 'samples': 5955072, 'steps': 11630, 'loss/train': 0.810029149055481} +02/24/2022 16:37:40 - INFO - codeparrot_training - Step 11631: {'lr': 0.0004519556864718979, 'samples': 5955584, 'steps': 11631, 'loss/train': 2.208754777908325} +02/24/2022 16:37:43 - INFO - codeparrot_training - Step 11632: {'lr': 0.00045194604158727936, 'samples': 5956096, 'steps': 11632, 'loss/train': 2.148669481277466} +02/24/2022 16:37:51 - INFO - codeparrot_training - Step 11633: {'lr': 0.0004519363958375882, 'samples': 5956608, 'steps': 11633, 'loss/train': 1.9196035861968994} +02/24/2022 16:37:54 - INFO - codeparrot_training - Step 11634: {'lr': 0.00045192674922286556, 'samples': 5957120, 'steps': 11634, 'loss/train': 2.1501853466033936} +02/24/2022 16:38:00 - INFO - codeparrot_training - Step 11635: {'lr': 0.00045191710174315294, 'samples': 5957632, 'steps': 11635, 'loss/train': 2.6451566219329834} +02/24/2022 16:38:03 - INFO - codeparrot_training - Step 11636: {'lr': 0.0004519074533984915, 'samples': 5958144, 'steps': 11636, 'loss/train': 1.8730019330978394} +02/24/2022 16:38:09 - INFO - codeparrot_training - Step 11637: {'lr': 0.0004518978041889227, 'samples': 5958656, 'steps': 11637, 'loss/train': 1.8419873714447021} +02/24/2022 16:38:12 - INFO - codeparrot_training - Step 11638: {'lr': 0.00045188815411448767, 'samples': 5959168, 'steps': 11638, 'loss/train': 3.911593437194824} +02/24/2022 16:38:18 - INFO - codeparrot_training - Step 11639: {'lr': 0.00045187850317522806, 'samples': 5959680, 'steps': 11639, 'loss/train': 2.027243137359619} +02/24/2022 16:38:21 - INFO - codeparrot_training - Step 11640: {'lr': 0.00045186885137118494, 'samples': 5960192, 'steps': 11640, 'loss/train': 2.275195360183716} +02/24/2022 16:38:27 - INFO - codeparrot_training - Step 11641: {'lr': 0.0004518591987023999, 'samples': 5960704, 'steps': 11641, 'loss/train': 2.997321605682373} +02/24/2022 16:38:30 - INFO - codeparrot_training - Step 11642: {'lr': 0.000451849545168914, 'samples': 5961216, 'steps': 11642, 'loss/train': 2.1607887744903564} +02/24/2022 16:38:38 - INFO - codeparrot_training - Step 11643: {'lr': 0.00045183989077076883, 'samples': 5961728, 'steps': 11643, 'loss/train': 1.8438206911087036} +02/24/2022 16:38:41 - INFO - codeparrot_training - Step 11644: {'lr': 0.00045183023550800564, 'samples': 5962240, 'steps': 11644, 'loss/train': 2.06140398979187} +02/24/2022 16:38:47 - INFO - codeparrot_training - Step 11645: {'lr': 0.0004518205793806658, 'samples': 5962752, 'steps': 11645, 'loss/train': 2.292933225631714} +02/24/2022 16:38:50 - INFO - codeparrot_training - Step 11646: {'lr': 0.0004518109223887907, 'samples': 5963264, 'steps': 11646, 'loss/train': 2.528597593307495} +02/24/2022 16:38:56 - INFO - codeparrot_training - Step 11647: {'lr': 0.0004518012645324217, 'samples': 5963776, 'steps': 11647, 'loss/train': 2.747081756591797} +02/24/2022 16:38:59 - INFO - codeparrot_training - Step 11648: {'lr': 0.00045179160581160005, 'samples': 5964288, 'steps': 11648, 'loss/train': 2.287997007369995} +02/24/2022 16:39:05 - INFO - codeparrot_training - Step 11649: {'lr': 0.0004517819462263674, 'samples': 5964800, 'steps': 11649, 'loss/train': 2.0138418674468994} +02/24/2022 16:39:08 - INFO - codeparrot_training - Step 11650: {'lr': 0.0004517722857767649, 'samples': 5965312, 'steps': 11650, 'loss/train': 1.583605408668518} +02/24/2022 16:39:14 - INFO - codeparrot_training - Step 11651: {'lr': 0.0004517626244628339, 'samples': 5965824, 'steps': 11651, 'loss/train': 0.8320340514183044} +02/24/2022 16:39:17 - INFO - codeparrot_training - Step 11652: {'lr': 0.000451752962284616, 'samples': 5966336, 'steps': 11652, 'loss/train': 2.384977340698242} +02/24/2022 16:39:23 - INFO - codeparrot_training - Step 11653: {'lr': 0.0004517432992421524, 'samples': 5966848, 'steps': 11653, 'loss/train': 1.457533836364746} +02/24/2022 16:39:26 - INFO - codeparrot_training - Step 11654: {'lr': 0.00045173363533548464, 'samples': 5967360, 'steps': 11654, 'loss/train': 1.8976963758468628} +02/24/2022 16:39:34 - INFO - codeparrot_training - Step 11655: {'lr': 0.00045172397056465405, 'samples': 5967872, 'steps': 11655, 'loss/train': 2.159296989440918} +02/24/2022 16:39:37 - INFO - codeparrot_training - Step 11656: {'lr': 0.000451714304929702, 'samples': 5968384, 'steps': 11656, 'loss/train': 1.856048822402954} +02/24/2022 16:39:43 - INFO - codeparrot_training - Step 11657: {'lr': 0.0004517046384306699, 'samples': 5968896, 'steps': 11657, 'loss/train': 1.6358002424240112} +02/24/2022 16:39:46 - INFO - codeparrot_training - Step 11658: {'lr': 0.00045169497106759915, 'samples': 5969408, 'steps': 11658, 'loss/train': 1.6984803676605225} +02/24/2022 16:39:52 - INFO - codeparrot_training - Step 11659: {'lr': 0.0004516853028405312, 'samples': 5969920, 'steps': 11659, 'loss/train': 0.12451069056987762} +02/24/2022 16:39:55 - INFO - codeparrot_training - Step 11660: {'lr': 0.0004516756337495075, 'samples': 5970432, 'steps': 11660, 'loss/train': 2.5406110286712646} +02/24/2022 16:40:01 - INFO - codeparrot_training - Step 11661: {'lr': 0.00045166596379456935, 'samples': 5970944, 'steps': 11661, 'loss/train': 2.4823601245880127} +02/24/2022 16:40:04 - INFO - codeparrot_training - Step 11662: {'lr': 0.0004516562929757584, 'samples': 5971456, 'steps': 11662, 'loss/train': 2.3144519329071045} +02/24/2022 16:40:10 - INFO - codeparrot_training - Step 11663: {'lr': 0.0004516466212931158, 'samples': 5971968, 'steps': 11663, 'loss/train': 1.219640851020813} +02/24/2022 16:40:13 - INFO - codeparrot_training - Step 11664: {'lr': 0.00045163694874668316, 'samples': 5972480, 'steps': 11664, 'loss/train': 2.5993688106536865} +02/24/2022 16:40:21 - INFO - codeparrot_training - Step 11665: {'lr': 0.0004516272753365018, 'samples': 5972992, 'steps': 11665, 'loss/train': 2.4267616271972656} +02/24/2022 16:40:24 - INFO - codeparrot_training - Step 11666: {'lr': 0.0004516176010626132, 'samples': 5973504, 'steps': 11666, 'loss/train': 0.460162878036499} +02/24/2022 16:40:30 - INFO - codeparrot_training - Step 11667: {'lr': 0.00045160792592505893, 'samples': 5974016, 'steps': 11667, 'loss/train': 1.9066812992095947} +02/24/2022 16:40:33 - INFO - codeparrot_training - Step 11668: {'lr': 0.0004515982499238802, 'samples': 5974528, 'steps': 11668, 'loss/train': 2.975303888320923} +02/24/2022 16:40:39 - INFO - codeparrot_training - Step 11669: {'lr': 0.0004515885730591187, 'samples': 5975040, 'steps': 11669, 'loss/train': 1.0526678562164307} +02/24/2022 16:40:42 - INFO - codeparrot_training - Step 11670: {'lr': 0.0004515788953308156, 'samples': 5975552, 'steps': 11670, 'loss/train': 2.3513824939727783} +02/24/2022 16:40:48 - INFO - codeparrot_training - Step 11671: {'lr': 0.00045156921673901267, 'samples': 5976064, 'steps': 11671, 'loss/train': 2.775592565536499} +02/24/2022 16:40:51 - INFO - codeparrot_training - Step 11672: {'lr': 0.0004515595372837512, 'samples': 5976576, 'steps': 11672, 'loss/train': 2.224604606628418} +02/24/2022 16:40:57 - INFO - codeparrot_training - Step 11673: {'lr': 0.00045154985696507267, 'samples': 5977088, 'steps': 11673, 'loss/train': 1.5069808959960938} +02/24/2022 16:41:00 - INFO - codeparrot_training - Step 11674: {'lr': 0.0004515401757830185, 'samples': 5977600, 'steps': 11674, 'loss/train': 3.1457831859588623} +02/24/2022 16:41:08 - INFO - codeparrot_training - Step 11675: {'lr': 0.0004515304937376302, 'samples': 5978112, 'steps': 11675, 'loss/train': 1.969599723815918} +02/24/2022 16:41:11 - INFO - codeparrot_training - Step 11676: {'lr': 0.00045152081082894935, 'samples': 5978624, 'steps': 11676, 'loss/train': 1.2151014804840088} +02/24/2022 16:41:16 - INFO - codeparrot_training - Step 11677: {'lr': 0.00045151112705701723, 'samples': 5979136, 'steps': 11677, 'loss/train': 1.9878487586975098} +02/24/2022 16:41:20 - INFO - codeparrot_training - Step 11678: {'lr': 0.00045150144242187554, 'samples': 5979648, 'steps': 11678, 'loss/train': 2.6872684955596924} +02/24/2022 16:41:25 - INFO - codeparrot_training - Step 11679: {'lr': 0.0004514917569235656, 'samples': 5980160, 'steps': 11679, 'loss/train': 3.4789350032806396} +02/24/2022 16:41:29 - INFO - codeparrot_training - Step 11680: {'lr': 0.00045148207056212896, 'samples': 5980672, 'steps': 11680, 'loss/train': 1.7429273128509521} +02/24/2022 16:41:35 - INFO - codeparrot_training - Step 11681: {'lr': 0.0004514723833376071, 'samples': 5981184, 'steps': 11681, 'loss/train': 1.8754760026931763} +02/24/2022 16:41:38 - INFO - codeparrot_training - Step 11682: {'lr': 0.00045146269525004153, 'samples': 5981696, 'steps': 11682, 'loss/train': 1.0769193172454834} +02/24/2022 16:41:44 - INFO - codeparrot_training - Step 11683: {'lr': 0.00045145300629947374, 'samples': 5982208, 'steps': 11683, 'loss/train': 2.6670479774475098} +02/24/2022 16:41:47 - INFO - codeparrot_training - Step 11684: {'lr': 0.0004514433164859453, 'samples': 5982720, 'steps': 11684, 'loss/train': 2.9119553565979004} +02/24/2022 16:41:53 - INFO - codeparrot_training - Step 11685: {'lr': 0.00045143362580949754, 'samples': 5983232, 'steps': 11685, 'loss/train': 1.419554591178894} +02/24/2022 16:41:56 - INFO - codeparrot_training - Step 11686: {'lr': 0.00045142393427017214, 'samples': 5983744, 'steps': 11686, 'loss/train': 1.2289128303527832} +02/24/2022 16:42:02 - INFO - codeparrot_training - Step 11687: {'lr': 0.0004514142418680106, 'samples': 5984256, 'steps': 11687, 'loss/train': 1.5331218242645264} +02/24/2022 16:42:05 - INFO - codeparrot_training - Step 11688: {'lr': 0.00045140454860305435, 'samples': 5984768, 'steps': 11688, 'loss/train': 2.273690700531006} +02/24/2022 16:42:11 - INFO - codeparrot_training - Step 11689: {'lr': 0.000451394854475345, 'samples': 5985280, 'steps': 11689, 'loss/train': 1.7599692344665527} +02/24/2022 16:42:14 - INFO - codeparrot_training - Step 11690: {'lr': 0.0004513851594849241, 'samples': 5985792, 'steps': 11690, 'loss/train': 1.0157694816589355} +02/24/2022 16:42:21 - INFO - codeparrot_training - Step 11691: {'lr': 0.000451375463631833, 'samples': 5986304, 'steps': 11691, 'loss/train': 2.3648629188537598} +02/24/2022 16:42:25 - INFO - codeparrot_training - Step 11692: {'lr': 0.0004513657669161134, 'samples': 5986816, 'steps': 11692, 'loss/train': 1.7742458581924438} +02/24/2022 16:42:30 - INFO - codeparrot_training - Step 11693: {'lr': 0.0004513560693378068, 'samples': 5987328, 'steps': 11693, 'loss/train': 2.3537003993988037} +02/24/2022 16:42:36 - INFO - codeparrot_training - Step 11694: {'lr': 0.00045134637089695484, 'samples': 5987840, 'steps': 11694, 'loss/train': 1.635169506072998} +02/24/2022 16:42:39 - INFO - codeparrot_training - Step 11695: {'lr': 0.0004513366715935988, 'samples': 5988352, 'steps': 11695, 'loss/train': 2.943849563598633} +02/24/2022 16:42:45 - INFO - codeparrot_training - Step 11696: {'lr': 0.00045132697142778044, 'samples': 5988864, 'steps': 11696, 'loss/train': 2.0444397926330566} +02/24/2022 16:42:48 - INFO - codeparrot_training - Step 11697: {'lr': 0.00045131727039954137, 'samples': 5989376, 'steps': 11697, 'loss/train': 1.8476845026016235} +02/24/2022 16:42:54 - INFO - codeparrot_training - Step 11698: {'lr': 0.00045130756850892296, 'samples': 5989888, 'steps': 11698, 'loss/train': 1.385201096534729} +02/24/2022 16:42:57 - INFO - codeparrot_training - Step 11699: {'lr': 0.00045129786575596683, 'samples': 5990400, 'steps': 11699, 'loss/train': 2.6225171089172363} +02/24/2022 16:43:04 - INFO - codeparrot_training - Step 11700: {'lr': 0.00045128816214071453, 'samples': 5990912, 'steps': 11700, 'loss/train': 1.614296793937683} +02/24/2022 16:43:08 - INFO - codeparrot_training - Step 11701: {'lr': 0.00045127845766320773, 'samples': 5991424, 'steps': 11701, 'loss/train': 0.1792672872543335} +02/24/2022 16:43:13 - INFO - codeparrot_training - Step 11702: {'lr': 0.0004512687523234879, 'samples': 5991936, 'steps': 11702, 'loss/train': 2.3859634399414062} +02/24/2022 16:43:17 - INFO - codeparrot_training - Step 11703: {'lr': 0.0004512590461215967, 'samples': 5992448, 'steps': 11703, 'loss/train': 2.563322067260742} +02/24/2022 16:43:22 - INFO - codeparrot_training - Step 11704: {'lr': 0.0004512493390575756, 'samples': 5992960, 'steps': 11704, 'loss/train': 1.340606927871704} +02/24/2022 16:43:26 - INFO - codeparrot_training - Step 11705: {'lr': 0.0004512396311314662, 'samples': 5993472, 'steps': 11705, 'loss/train': 2.1104423999786377} +02/24/2022 16:43:31 - INFO - codeparrot_training - Step 11706: {'lr': 0.00045122992234331017, 'samples': 5993984, 'steps': 11706, 'loss/train': 2.493543863296509} +02/24/2022 16:43:35 - INFO - codeparrot_training - Step 11707: {'lr': 0.00045122021269314907, 'samples': 5994496, 'steps': 11707, 'loss/train': 2.080507278442383} +02/24/2022 16:43:40 - INFO - codeparrot_training - Step 11708: {'lr': 0.0004512105021810244, 'samples': 5995008, 'steps': 11708, 'loss/train': 2.3038547039031982} +02/24/2022 16:43:44 - INFO - codeparrot_training - Step 11709: {'lr': 0.0004512007908069779, 'samples': 5995520, 'steps': 11709, 'loss/train': 2.1975417137145996} +02/24/2022 16:43:51 - INFO - codeparrot_training - Step 11710: {'lr': 0.0004511910785710511, 'samples': 5996032, 'steps': 11710, 'loss/train': 1.5053011178970337} +02/24/2022 16:43:55 - INFO - codeparrot_training - Step 11711: {'lr': 0.0004511813654732856, 'samples': 5996544, 'steps': 11711, 'loss/train': 1.2798415422439575} +02/24/2022 16:44:00 - INFO - codeparrot_training - Step 11712: {'lr': 0.00045117165151372296, 'samples': 5997056, 'steps': 11712, 'loss/train': 2.2211251258850098} +02/24/2022 16:44:04 - INFO - codeparrot_training - Step 11713: {'lr': 0.0004511619366924049, 'samples': 5997568, 'steps': 11713, 'loss/train': 1.8240395784378052} +02/24/2022 16:44:09 - INFO - codeparrot_training - Step 11714: {'lr': 0.00045115222100937293, 'samples': 5998080, 'steps': 11714, 'loss/train': 1.8571007251739502} +02/24/2022 16:44:13 - INFO - codeparrot_training - Step 11715: {'lr': 0.00045114250446466874, 'samples': 5998592, 'steps': 11715, 'loss/train': 2.3949577808380127} +02/24/2022 16:44:18 - INFO - codeparrot_training - Step 11716: {'lr': 0.00045113278705833396, 'samples': 5999104, 'steps': 11716, 'loss/train': 2.4111225605010986} +02/24/2022 16:44:22 - INFO - codeparrot_training - Step 11717: {'lr': 0.00045112306879041016, 'samples': 5999616, 'steps': 11717, 'loss/train': 2.1929476261138916} +02/24/2022 16:44:27 - INFO - codeparrot_training - Step 11718: {'lr': 0.000451113349660939, 'samples': 6000128, 'steps': 11718, 'loss/train': 2.0388059616088867} +02/24/2022 16:44:31 - INFO - codeparrot_training - Step 11719: {'lr': 0.0004511036296699621, 'samples': 6000640, 'steps': 11719, 'loss/train': 1.0479477643966675} +02/24/2022 16:44:38 - INFO - codeparrot_training - Step 11720: {'lr': 0.0004510939088175211, 'samples': 6001152, 'steps': 11720, 'loss/train': 1.0591456890106201} +02/24/2022 16:44:41 - INFO - codeparrot_training - Step 11721: {'lr': 0.00045108418710365774, 'samples': 6001664, 'steps': 11721, 'loss/train': 1.9625349044799805} +02/24/2022 16:44:47 - INFO - codeparrot_training - Step 11722: {'lr': 0.0004510744645284135, 'samples': 6002176, 'steps': 11722, 'loss/train': 1.909144401550293} +02/24/2022 16:44:50 - INFO - codeparrot_training - Step 11723: {'lr': 0.00045106474109183004, 'samples': 6002688, 'steps': 11723, 'loss/train': 1.3695712089538574} +02/24/2022 16:44:56 - INFO - codeparrot_training - Step 11724: {'lr': 0.00045105501679394916, 'samples': 6003200, 'steps': 11724, 'loss/train': 1.601331353187561} +02/24/2022 16:45:00 - INFO - codeparrot_training - Step 11725: {'lr': 0.00045104529163481245, 'samples': 6003712, 'steps': 11725, 'loss/train': 1.130944848060608} +02/24/2022 16:45:05 - INFO - codeparrot_training - Step 11726: {'lr': 0.0004510355656144615, 'samples': 6004224, 'steps': 11726, 'loss/train': 1.0114940404891968} +02/24/2022 16:45:09 - INFO - codeparrot_training - Step 11727: {'lr': 0.000451025838732938, 'samples': 6004736, 'steps': 11727, 'loss/train': 0.564415454864502} +02/24/2022 16:45:14 - INFO - codeparrot_training - Step 11728: {'lr': 0.0004510161109902837, 'samples': 6005248, 'steps': 11728, 'loss/train': 2.2881507873535156} +02/24/2022 16:45:18 - INFO - codeparrot_training - Step 11729: {'lr': 0.00045100638238654013, 'samples': 6005760, 'steps': 11729, 'loss/train': 1.274651288986206} +02/24/2022 16:45:23 - INFO - codeparrot_training - Step 11730: {'lr': 0.00045099665292174917, 'samples': 6006272, 'steps': 11730, 'loss/train': 1.4472706317901611} +02/24/2022 16:45:27 - INFO - codeparrot_training - Step 11731: {'lr': 0.00045098692259595233, 'samples': 6006784, 'steps': 11731, 'loss/train': 0.3856137990951538} +02/24/2022 16:45:32 - INFO - codeparrot_training - Step 11732: {'lr': 0.00045097719140919126, 'samples': 6007296, 'steps': 11732, 'loss/train': 2.1976161003112793} +02/24/2022 16:45:36 - INFO - codeparrot_training - Step 11733: {'lr': 0.00045096745936150774, 'samples': 6007808, 'steps': 11733, 'loss/train': 0.322746604681015} +02/24/2022 16:45:41 - INFO - codeparrot_training - Step 11734: {'lr': 0.00045095772645294347, 'samples': 6008320, 'steps': 11734, 'loss/train': 0.7797043919563293} +02/24/2022 16:45:45 - INFO - codeparrot_training - Step 11735: {'lr': 0.00045094799268354007, 'samples': 6008832, 'steps': 11735, 'loss/train': 2.494370222091675} +02/24/2022 16:45:52 - INFO - codeparrot_training - Step 11736: {'lr': 0.00045093825805333934, 'samples': 6009344, 'steps': 11736, 'loss/train': 2.7397022247314453} +02/24/2022 16:45:56 - INFO - codeparrot_training - Step 11737: {'lr': 0.0004509285225623829, 'samples': 6009856, 'steps': 11737, 'loss/train': 1.7188767194747925} +02/24/2022 16:46:01 - INFO - codeparrot_training - Step 11738: {'lr': 0.0004509187862107125, 'samples': 6010368, 'steps': 11738, 'loss/train': 3.339857578277588} +02/24/2022 16:46:05 - INFO - codeparrot_training - Step 11739: {'lr': 0.0004509090489983697, 'samples': 6010880, 'steps': 11739, 'loss/train': 1.6962321996688843} +02/24/2022 16:46:10 - INFO - codeparrot_training - Step 11740: {'lr': 0.0004508993109253964, 'samples': 6011392, 'steps': 11740, 'loss/train': 1.9202880859375} +02/24/2022 16:46:14 - INFO - codeparrot_training - Step 11741: {'lr': 0.00045088957199183427, 'samples': 6011904, 'steps': 11741, 'loss/train': 1.823652982711792} +02/24/2022 16:46:19 - INFO - codeparrot_training - Step 11742: {'lr': 0.000450879832197725, 'samples': 6012416, 'steps': 11742, 'loss/train': 1.4651334285736084} +02/24/2022 16:46:23 - INFO - codeparrot_training - Step 11743: {'lr': 0.0004508700915431103, 'samples': 6012928, 'steps': 11743, 'loss/train': 1.7653381824493408} +02/24/2022 16:46:28 - INFO - codeparrot_training - Step 11744: {'lr': 0.0004508603500280319, 'samples': 6013440, 'steps': 11744, 'loss/train': 0.8583138585090637} +02/24/2022 16:46:32 - INFO - codeparrot_training - Step 11745: {'lr': 0.00045085060765253157, 'samples': 6013952, 'steps': 11745, 'loss/train': 2.611677646636963} +02/24/2022 16:46:39 - INFO - codeparrot_training - Step 11746: {'lr': 0.00045084086441665093, 'samples': 6014464, 'steps': 11746, 'loss/train': 1.703393578529358} +02/24/2022 16:46:43 - INFO - codeparrot_training - Step 11747: {'lr': 0.00045083112032043196, 'samples': 6014976, 'steps': 11747, 'loss/train': 0.5210812091827393} +02/24/2022 16:46:48 - INFO - codeparrot_training - Step 11748: {'lr': 0.0004508213753639161, 'samples': 6015488, 'steps': 11748, 'loss/train': 2.5765676498413086} +02/24/2022 16:46:52 - INFO - codeparrot_training - Step 11749: {'lr': 0.0004508116295471453, 'samples': 6016000, 'steps': 11749, 'loss/train': 1.7753199338912964} +02/24/2022 16:46:57 - INFO - codeparrot_training - Step 11750: {'lr': 0.0004508018828701612, 'samples': 6016512, 'steps': 11750, 'loss/train': 1.9259554147720337} +02/24/2022 16:47:01 - INFO - codeparrot_training - Step 11751: {'lr': 0.0004507921353330057, 'samples': 6017024, 'steps': 11751, 'loss/train': 2.5465378761291504} +02/24/2022 16:47:06 - INFO - codeparrot_training - Step 11752: {'lr': 0.0004507823869357204, 'samples': 6017536, 'steps': 11752, 'loss/train': 1.3102563619613647} +02/24/2022 16:47:10 - INFO - codeparrot_training - Step 11753: {'lr': 0.00045077263767834703, 'samples': 6018048, 'steps': 11753, 'loss/train': 1.4255242347717285} +02/24/2022 16:47:15 - INFO - codeparrot_training - Step 11754: {'lr': 0.00045076288756092754, 'samples': 6018560, 'steps': 11754, 'loss/train': 1.6492993831634521} +02/24/2022 16:47:19 - INFO - codeparrot_training - Step 11755: {'lr': 0.0004507531365835035, 'samples': 6019072, 'steps': 11755, 'loss/train': 2.3588688373565674} +02/24/2022 16:47:26 - INFO - codeparrot_training - Step 11756: {'lr': 0.00045074338474611683, 'samples': 6019584, 'steps': 11756, 'loss/train': 2.6841821670532227} +02/24/2022 16:47:30 - INFO - codeparrot_training - Step 11757: {'lr': 0.00045073363204880916, 'samples': 6020096, 'steps': 11757, 'loss/train': 2.7651474475860596} +02/24/2022 16:47:35 - INFO - codeparrot_training - Step 11758: {'lr': 0.0004507238784916224, 'samples': 6020608, 'steps': 11758, 'loss/train': 2.136057138442993} +02/24/2022 16:47:39 - INFO - codeparrot_training - Step 11759: {'lr': 0.0004507141240745983, 'samples': 6021120, 'steps': 11759, 'loss/train': 2.7878289222717285} +02/24/2022 16:47:44 - INFO - codeparrot_training - Step 11760: {'lr': 0.0004507043687977787, 'samples': 6021632, 'steps': 11760, 'loss/train': 2.0140068531036377} +02/24/2022 16:47:48 - INFO - codeparrot_training - Step 11761: {'lr': 0.00045069461266120515, 'samples': 6022144, 'steps': 11761, 'loss/train': 1.6322925090789795} +02/24/2022 16:47:53 - INFO - codeparrot_training - Step 11762: {'lr': 0.0004506848556649197, 'samples': 6022656, 'steps': 11762, 'loss/train': 2.4961063861846924} +02/24/2022 16:47:57 - INFO - codeparrot_training - Step 11763: {'lr': 0.0004506750978089641, 'samples': 6023168, 'steps': 11763, 'loss/train': 1.8917371034622192} +02/24/2022 16:48:02 - INFO - codeparrot_training - Step 11764: {'lr': 0.00045066533909338005, 'samples': 6023680, 'steps': 11764, 'loss/train': 1.9698625802993774} +02/24/2022 16:48:05 - INFO - codeparrot_training - Step 11765: {'lr': 0.00045065557951820935, 'samples': 6024192, 'steps': 11765, 'loss/train': 2.764829397201538} +02/24/2022 16:48:13 - INFO - codeparrot_training - Step 11766: {'lr': 0.0004506458190834939, 'samples': 6024704, 'steps': 11766, 'loss/train': 2.0404257774353027} +02/24/2022 16:48:16 - INFO - codeparrot_training - Step 11767: {'lr': 0.0004506360577892755, 'samples': 6025216, 'steps': 11767, 'loss/train': 0.7921394109725952} +02/24/2022 16:48:22 - INFO - codeparrot_training - Step 11768: {'lr': 0.00045062629563559595, 'samples': 6025728, 'steps': 11768, 'loss/train': 1.5200183391571045} +02/24/2022 16:48:25 - INFO - codeparrot_training - Step 11769: {'lr': 0.00045061653262249703, 'samples': 6026240, 'steps': 11769, 'loss/train': 1.6946516036987305} +02/24/2022 16:48:31 - INFO - codeparrot_training - Step 11770: {'lr': 0.0004506067687500206, 'samples': 6026752, 'steps': 11770, 'loss/train': 1.5203983783721924} +02/24/2022 16:48:34 - INFO - codeparrot_training - Step 11771: {'lr': 0.00045059700401820846, 'samples': 6027264, 'steps': 11771, 'loss/train': 2.541382074356079} +02/24/2022 16:48:40 - INFO - codeparrot_training - Step 11772: {'lr': 0.00045058723842710246, 'samples': 6027776, 'steps': 11772, 'loss/train': 2.290221929550171} +02/24/2022 16:48:43 - INFO - codeparrot_training - Step 11773: {'lr': 0.0004505774719767444, 'samples': 6028288, 'steps': 11773, 'loss/train': 1.734066367149353} +02/24/2022 16:48:49 - INFO - codeparrot_training - Step 11774: {'lr': 0.0004505677046671761, 'samples': 6028800, 'steps': 11774, 'loss/train': 2.921297311782837} +02/24/2022 16:48:52 - INFO - codeparrot_training - Step 11775: {'lr': 0.0004505579364984396, 'samples': 6029312, 'steps': 11775, 'loss/train': 2.769432544708252} +02/24/2022 16:49:00 - INFO - codeparrot_training - Step 11776: {'lr': 0.0004505481674705764, 'samples': 6029824, 'steps': 11776, 'loss/train': 2.7122838497161865} +02/24/2022 16:49:03 - INFO - codeparrot_training - Step 11777: {'lr': 0.0004505383975836286, 'samples': 6030336, 'steps': 11777, 'loss/train': 2.342007875442505} +02/24/2022 16:49:09 - INFO - codeparrot_training - Step 11778: {'lr': 0.00045052862683763806, 'samples': 6030848, 'steps': 11778, 'loss/train': 2.0701496601104736} +02/24/2022 16:49:12 - INFO - codeparrot_training - Step 11779: {'lr': 0.0004505188552326465, 'samples': 6031360, 'steps': 11779, 'loss/train': 1.4011969566345215} +02/24/2022 16:49:17 - INFO - codeparrot_training - Step 11780: {'lr': 0.00045050908276869585, 'samples': 6031872, 'steps': 11780, 'loss/train': 2.5028140544891357} +02/24/2022 16:49:21 - INFO - codeparrot_training - Step 11781: {'lr': 0.00045049930944582783, 'samples': 6032384, 'steps': 11781, 'loss/train': 2.1235408782958984} +02/24/2022 16:49:27 - INFO - codeparrot_training - Step 11782: {'lr': 0.0004504895352640846, 'samples': 6032896, 'steps': 11782, 'loss/train': 2.391179084777832} +02/24/2022 16:49:30 - INFO - codeparrot_training - Step 11783: {'lr': 0.0004504797602235078, 'samples': 6033408, 'steps': 11783, 'loss/train': 1.413794994354248} +02/24/2022 16:49:35 - INFO - codeparrot_training - Step 11784: {'lr': 0.0004504699843241394, 'samples': 6033920, 'steps': 11784, 'loss/train': 2.193089246749878} +02/24/2022 16:49:39 - INFO - codeparrot_training - Step 11785: {'lr': 0.0004504602075660212, 'samples': 6034432, 'steps': 11785, 'loss/train': 1.088712453842163} +02/24/2022 16:49:44 - INFO - codeparrot_training - Step 11786: {'lr': 0.00045045042994919514, 'samples': 6034944, 'steps': 11786, 'loss/train': 1.206525206565857} +02/24/2022 16:49:48 - INFO - codeparrot_training - Step 11787: {'lr': 0.00045044065147370303, 'samples': 6035456, 'steps': 11787, 'loss/train': 2.6903977394104004} +02/24/2022 16:49:54 - INFO - codeparrot_training - Step 11788: {'lr': 0.0004504308721395869, 'samples': 6035968, 'steps': 11788, 'loss/train': 2.8004629611968994} +02/24/2022 16:49:58 - INFO - codeparrot_training - Step 11789: {'lr': 0.0004504210919468886, 'samples': 6036480, 'steps': 11789, 'loss/train': 1.1341214179992676} +02/24/2022 16:50:03 - INFO - codeparrot_training - Step 11790: {'lr': 0.0004504113108956499, 'samples': 6036992, 'steps': 11790, 'loss/train': 1.790855050086975} +02/24/2022 16:50:07 - INFO - codeparrot_training - Step 11791: {'lr': 0.0004504015289859128, 'samples': 6037504, 'steps': 11791, 'loss/train': 2.1810033321380615} +02/24/2022 16:50:12 - INFO - codeparrot_training - Step 11792: {'lr': 0.00045039174621771915, 'samples': 6038016, 'steps': 11792, 'loss/train': 2.7970197200775146} +02/24/2022 16:50:16 - INFO - codeparrot_training - Step 11793: {'lr': 0.0004503819625911109, 'samples': 6038528, 'steps': 11793, 'loss/train': 1.2452048063278198} +02/24/2022 16:50:21 - INFO - codeparrot_training - Step 11794: {'lr': 0.00045037217810613004, 'samples': 6039040, 'steps': 11794, 'loss/train': 3.1736698150634766} +02/24/2022 16:50:25 - INFO - codeparrot_training - Step 11795: {'lr': 0.0004503623927628183, 'samples': 6039552, 'steps': 11795, 'loss/train': 1.923999309539795} +02/24/2022 16:50:30 - INFO - codeparrot_training - Step 11796: {'lr': 0.0004503526065612177, 'samples': 6040064, 'steps': 11796, 'loss/train': 1.8024576902389526} +02/24/2022 16:50:34 - INFO - codeparrot_training - Step 11797: {'lr': 0.0004503428195013702, 'samples': 6040576, 'steps': 11797, 'loss/train': 1.5015267133712769} +02/24/2022 16:50:40 - INFO - codeparrot_training - Step 11798: {'lr': 0.00045033303158331764, 'samples': 6041088, 'steps': 11798, 'loss/train': 2.173752546310425} +02/24/2022 16:50:43 - INFO - codeparrot_training - Step 11799: {'lr': 0.00045032324280710204, 'samples': 6041600, 'steps': 11799, 'loss/train': 2.2663638591766357} +02/24/2022 16:50:49 - INFO - codeparrot_training - Step 11800: {'lr': 0.0004503134531727652, 'samples': 6042112, 'steps': 11800, 'loss/train': 2.9252853393554688} +02/24/2022 16:50:52 - INFO - codeparrot_training - Step 11801: {'lr': 0.00045030366268034917, 'samples': 6042624, 'steps': 11801, 'loss/train': 1.0678006410598755} +02/24/2022 16:50:58 - INFO - codeparrot_training - Step 11802: {'lr': 0.00045029387132989587, 'samples': 6043136, 'steps': 11802, 'loss/train': 2.732165575027466} +02/24/2022 16:51:01 - INFO - codeparrot_training - Step 11803: {'lr': 0.0004502840791214472, 'samples': 6043648, 'steps': 11803, 'loss/train': 2.073514461517334} +02/24/2022 16:51:07 - INFO - codeparrot_training - Step 11804: {'lr': 0.00045027428605504507, 'samples': 6044160, 'steps': 11804, 'loss/train': 2.5620059967041016} +02/24/2022 16:51:10 - INFO - codeparrot_training - Step 11805: {'lr': 0.00045026449213073154, 'samples': 6044672, 'steps': 11805, 'loss/train': 1.0992964506149292} +02/24/2022 16:51:16 - INFO - codeparrot_training - Step 11806: {'lr': 0.00045025469734854856, 'samples': 6045184, 'steps': 11806, 'loss/train': 1.7111836671829224} +02/24/2022 16:51:19 - INFO - codeparrot_training - Step 11807: {'lr': 0.00045024490170853806, 'samples': 6045696, 'steps': 11807, 'loss/train': 3.2235193252563477} +02/24/2022 16:51:26 - INFO - codeparrot_training - Step 11808: {'lr': 0.000450235105210742, 'samples': 6046208, 'steps': 11808, 'loss/train': 2.294288158416748} +02/24/2022 16:51:30 - INFO - codeparrot_training - Step 11809: {'lr': 0.0004502253078552022, 'samples': 6046720, 'steps': 11809, 'loss/train': 2.357346534729004} +02/24/2022 16:51:35 - INFO - codeparrot_training - Step 11810: {'lr': 0.00045021550964196086, 'samples': 6047232, 'steps': 11810, 'loss/train': 2.650636911392212} +02/24/2022 16:51:39 - INFO - codeparrot_training - Step 11811: {'lr': 0.0004502057105710598, 'samples': 6047744, 'steps': 11811, 'loss/train': 2.0565428733825684} +02/24/2022 16:51:44 - INFO - codeparrot_training - Step 11812: {'lr': 0.00045019591064254105, 'samples': 6048256, 'steps': 11812, 'loss/train': 1.7709914445877075} +02/24/2022 16:51:48 - INFO - codeparrot_training - Step 11813: {'lr': 0.00045018610985644663, 'samples': 6048768, 'steps': 11813, 'loss/train': 2.0230753421783447} +02/24/2022 16:51:53 - INFO - codeparrot_training - Step 11814: {'lr': 0.00045017630821281854, 'samples': 6049280, 'steps': 11814, 'loss/train': 2.387709379196167} +02/24/2022 16:51:57 - INFO - codeparrot_training - Step 11815: {'lr': 0.0004501665057116986, 'samples': 6049792, 'steps': 11815, 'loss/train': 2.8525302410125732} +02/24/2022 16:52:02 - INFO - codeparrot_training - Step 11816: {'lr': 0.00045015670235312895, 'samples': 6050304, 'steps': 11816, 'loss/train': 3.3121023178100586} +02/24/2022 16:52:06 - INFO - codeparrot_training - Step 11817: {'lr': 0.00045014689813715147, 'samples': 6050816, 'steps': 11817, 'loss/train': 2.3521053791046143} +02/24/2022 16:52:12 - INFO - codeparrot_training - Step 11818: {'lr': 0.00045013709306380837, 'samples': 6051328, 'steps': 11818, 'loss/train': 2.2122199535369873} +02/24/2022 16:52:15 - INFO - codeparrot_training - Step 11819: {'lr': 0.00045012728713314146, 'samples': 6051840, 'steps': 11819, 'loss/train': 1.578689455986023} +02/24/2022 16:52:21 - INFO - codeparrot_training - Step 11820: {'lr': 0.00045011748034519275, 'samples': 6052352, 'steps': 11820, 'loss/train': 1.8257839679718018} +02/24/2022 16:52:24 - INFO - codeparrot_training - Step 11821: {'lr': 0.00045010767270000436, 'samples': 6052864, 'steps': 11821, 'loss/train': 1.7844138145446777} +02/24/2022 16:52:30 - INFO - codeparrot_training - Step 11822: {'lr': 0.00045009786419761825, 'samples': 6053376, 'steps': 11822, 'loss/train': 2.06961727142334} +02/24/2022 16:52:33 - INFO - codeparrot_training - Step 11823: {'lr': 0.00045008805483807637, 'samples': 6053888, 'steps': 11823, 'loss/train': 2.0493924617767334} +02/24/2022 16:52:39 - INFO - codeparrot_training - Step 11824: {'lr': 0.0004500782446214208, 'samples': 6054400, 'steps': 11824, 'loss/train': 1.7303874492645264} +02/24/2022 16:52:42 - INFO - codeparrot_training - Step 11825: {'lr': 0.00045006843354769354, 'samples': 6054912, 'steps': 11825, 'loss/train': 1.4131265878677368} +02/24/2022 16:52:48 - INFO - codeparrot_training - Step 11826: {'lr': 0.0004500586216169367, 'samples': 6055424, 'steps': 11826, 'loss/train': 2.13592267036438} +02/24/2022 16:52:51 - INFO - codeparrot_training - Step 11827: {'lr': 0.0004500488088291923, 'samples': 6055936, 'steps': 11827, 'loss/train': 2.2689785957336426} +02/24/2022 16:52:57 - INFO - codeparrot_training - Step 11828: {'lr': 0.0004500389951845022, 'samples': 6056448, 'steps': 11828, 'loss/train': 2.451796293258667} +02/24/2022 16:53:00 - INFO - codeparrot_training - Step 11829: {'lr': 0.00045002918068290864, 'samples': 6056960, 'steps': 11829, 'loss/train': 2.7076683044433594} +02/24/2022 16:53:06 - INFO - codeparrot_training - Step 11830: {'lr': 0.00045001936532445354, 'samples': 6057472, 'steps': 11830, 'loss/train': 2.344712495803833} +02/24/2022 16:53:09 - INFO - codeparrot_training - Step 11831: {'lr': 0.000450009549109179, 'samples': 6057984, 'steps': 11831, 'loss/train': 0.39552760124206543} +02/24/2022 16:53:15 - INFO - codeparrot_training - Step 11832: {'lr': 0.0004499997320371271, 'samples': 6058496, 'steps': 11832, 'loss/train': 0.9307349324226379} +02/24/2022 16:53:18 - INFO - codeparrot_training - Step 11833: {'lr': 0.0004499899141083399, 'samples': 6059008, 'steps': 11833, 'loss/train': 1.6942479610443115} +02/24/2022 16:53:25 - INFO - codeparrot_training - Step 11834: {'lr': 0.0004499800953228593, 'samples': 6059520, 'steps': 11834, 'loss/train': 1.9799652099609375} +02/24/2022 16:53:28 - INFO - codeparrot_training - Step 11835: {'lr': 0.00044997027568072754, 'samples': 6060032, 'steps': 11835, 'loss/train': 3.412726640701294} +02/24/2022 16:53:34 - INFO - codeparrot_training - Step 11836: {'lr': 0.00044996045518198657, 'samples': 6060544, 'steps': 11836, 'loss/train': 1.8916040658950806} +02/24/2022 16:53:37 - INFO - codeparrot_training - Step 11837: {'lr': 0.00044995063382667855, 'samples': 6061056, 'steps': 11837, 'loss/train': 3.093398332595825} +02/24/2022 16:53:43 - INFO - codeparrot_training - Step 11838: {'lr': 0.0004499408116148455, 'samples': 6061568, 'steps': 11838, 'loss/train': 1.9078476428985596} +02/24/2022 16:53:48 - INFO - codeparrot_training - Step 11839: {'lr': 0.00044993098854652954, 'samples': 6062080, 'steps': 11839, 'loss/train': 2.662196636199951} +02/24/2022 16:53:52 - INFO - codeparrot_training - Step 11840: {'lr': 0.0004499211646217727, 'samples': 6062592, 'steps': 11840, 'loss/train': 2.1113712787628174} +02/24/2022 16:53:57 - INFO - codeparrot_training - Step 11841: {'lr': 0.000449911339840617, 'samples': 6063104, 'steps': 11841, 'loss/train': 2.349804162979126} +02/24/2022 16:54:01 - INFO - codeparrot_training - Step 11842: {'lr': 0.00044990151420310463, 'samples': 6063616, 'steps': 11842, 'loss/train': 1.6631113290786743} +02/24/2022 16:54:08 - INFO - codeparrot_training - Step 11843: {'lr': 0.0004498916877092776, 'samples': 6064128, 'steps': 11843, 'loss/train': 2.216902256011963} +02/24/2022 16:54:11 - INFO - codeparrot_training - Step 11844: {'lr': 0.00044988186035917817, 'samples': 6064640, 'steps': 11844, 'loss/train': 2.565882444381714} +02/24/2022 16:54:16 - INFO - codeparrot_training - Step 11845: {'lr': 0.00044987203215284823, 'samples': 6065152, 'steps': 11845, 'loss/train': 3.2680232524871826} +02/24/2022 16:54:20 - INFO - codeparrot_training - Step 11846: {'lr': 0.00044986220309033, 'samples': 6065664, 'steps': 11846, 'loss/train': 1.5452680587768555} +02/24/2022 16:54:25 - INFO - codeparrot_training - Step 11847: {'lr': 0.00044985237317166554, 'samples': 6066176, 'steps': 11847, 'loss/train': 2.617960214614868} +02/24/2022 16:54:29 - INFO - codeparrot_training - Step 11848: {'lr': 0.00044984254239689703, 'samples': 6066688, 'steps': 11848, 'loss/train': 2.240201950073242} +02/24/2022 16:54:34 - INFO - codeparrot_training - Step 11849: {'lr': 0.00044983271076606644, 'samples': 6067200, 'steps': 11849, 'loss/train': 1.4940757751464844} +02/24/2022 16:54:38 - INFO - codeparrot_training - Step 11850: {'lr': 0.000449822878279216, 'samples': 6067712, 'steps': 11850, 'loss/train': 2.268136501312256} +02/24/2022 16:54:43 - INFO - codeparrot_training - Step 11851: {'lr': 0.00044981304493638786, 'samples': 6068224, 'steps': 11851, 'loss/train': 0.21787181496620178} +02/24/2022 16:54:47 - INFO - codeparrot_training - Step 11852: {'lr': 0.00044980321073762405, 'samples': 6068736, 'steps': 11852, 'loss/train': 1.5555155277252197} +02/24/2022 16:54:52 - INFO - codeparrot_training - Step 11853: {'lr': 0.0004497933756829667, 'samples': 6069248, 'steps': 11853, 'loss/train': 0.5362102389335632} +02/24/2022 16:54:56 - INFO - codeparrot_training - Step 11854: {'lr': 0.000449783539772458, 'samples': 6069760, 'steps': 11854, 'loss/train': 8.661615371704102} +02/24/2022 16:55:02 - INFO - codeparrot_training - Step 11855: {'lr': 0.00044977370300614, 'samples': 6070272, 'steps': 11855, 'loss/train': 2.184626340866089} +02/24/2022 16:55:06 - INFO - codeparrot_training - Step 11856: {'lr': 0.00044976386538405494, 'samples': 6070784, 'steps': 11856, 'loss/train': 1.9198890924453735} +02/24/2022 16:55:11 - INFO - codeparrot_training - Step 11857: {'lr': 0.0004497540269062449, 'samples': 6071296, 'steps': 11857, 'loss/train': 1.743076205253601} +02/24/2022 16:55:15 - INFO - codeparrot_training - Step 11858: {'lr': 0.00044974418757275206, 'samples': 6071808, 'steps': 11858, 'loss/train': 1.8038883209228516} +02/24/2022 16:55:20 - INFO - codeparrot_training - Step 11859: {'lr': 0.00044973434738361853, 'samples': 6072320, 'steps': 11859, 'loss/train': 1.8507227897644043} +02/24/2022 16:55:24 - INFO - codeparrot_training - Step 11860: {'lr': 0.0004497245063388865, 'samples': 6072832, 'steps': 11860, 'loss/train': 1.4610706567764282} +02/24/2022 16:55:29 - INFO - codeparrot_training - Step 11861: {'lr': 0.0004497146644385981, 'samples': 6073344, 'steps': 11861, 'loss/train': 1.0989198684692383} +02/24/2022 16:55:33 - INFO - codeparrot_training - Step 11862: {'lr': 0.00044970482168279547, 'samples': 6073856, 'steps': 11862, 'loss/train': 1.9074040651321411} +02/24/2022 16:55:38 - INFO - codeparrot_training - Step 11863: {'lr': 0.0004496949780715208, 'samples': 6074368, 'steps': 11863, 'loss/train': 1.4850995540618896} +02/24/2022 16:55:41 - INFO - codeparrot_training - Step 11864: {'lr': 0.00044968513360481624, 'samples': 6074880, 'steps': 11864, 'loss/train': 2.1204400062561035} +02/24/2022 16:55:48 - INFO - codeparrot_training - Step 11865: {'lr': 0.000449675288282724, 'samples': 6075392, 'steps': 11865, 'loss/train': 2.1871776580810547} +02/24/2022 16:55:51 - INFO - codeparrot_training - Step 11866: {'lr': 0.0004496654421052862, 'samples': 6075904, 'steps': 11866, 'loss/train': 2.3699865341186523} +02/24/2022 16:55:57 - INFO - codeparrot_training - Step 11867: {'lr': 0.00044965559507254504, 'samples': 6076416, 'steps': 11867, 'loss/train': 2.8026418685913086} +02/24/2022 16:56:00 - INFO - codeparrot_training - Step 11868: {'lr': 0.0004496457471845428, 'samples': 6076928, 'steps': 11868, 'loss/train': 1.7902960777282715} +02/24/2022 16:56:06 - INFO - codeparrot_training - Step 11869: {'lr': 0.0004496358984413215, 'samples': 6077440, 'steps': 11869, 'loss/train': 2.4285483360290527} +02/24/2022 16:56:09 - INFO - codeparrot_training - Step 11870: {'lr': 0.0004496260488429234, 'samples': 6077952, 'steps': 11870, 'loss/train': 1.9883067607879639} +02/24/2022 16:56:15 - INFO - codeparrot_training - Step 11871: {'lr': 0.0004496161983893907, 'samples': 6078464, 'steps': 11871, 'loss/train': 2.715402364730835} +02/24/2022 16:56:18 - INFO - codeparrot_training - Step 11872: {'lr': 0.0004496063470807656, 'samples': 6078976, 'steps': 11872, 'loss/train': 2.8464128971099854} +02/24/2022 16:56:24 - INFO - codeparrot_training - Step 11873: {'lr': 0.0004495964949170903, 'samples': 6079488, 'steps': 11873, 'loss/train': 2.8089919090270996} +02/24/2022 16:56:27 - INFO - codeparrot_training - Step 11874: {'lr': 0.000449586641898407, 'samples': 6080000, 'steps': 11874, 'loss/train': 1.1081875562667847} +02/24/2022 16:56:34 - INFO - codeparrot_training - Step 11875: {'lr': 0.0004495767880247579, 'samples': 6080512, 'steps': 11875, 'loss/train': 3.8264901638031006} +02/24/2022 16:56:37 - INFO - codeparrot_training - Step 11876: {'lr': 0.0004495669332961852, 'samples': 6081024, 'steps': 11876, 'loss/train': 2.336467981338501} +02/24/2022 16:56:43 - INFO - codeparrot_training - Step 11877: {'lr': 0.0004495570777127311, 'samples': 6081536, 'steps': 11877, 'loss/train': 1.2555092573165894} +02/24/2022 16:56:46 - INFO - codeparrot_training - Step 11878: {'lr': 0.00044954722127443786, 'samples': 6082048, 'steps': 11878, 'loss/train': 2.372148036956787} +02/24/2022 16:56:52 - INFO - codeparrot_training - Step 11879: {'lr': 0.0004495373639813477, 'samples': 6082560, 'steps': 11879, 'loss/train': 2.5146970748901367} +02/24/2022 16:56:55 - INFO - codeparrot_training - Step 11880: {'lr': 0.00044952750583350287, 'samples': 6083072, 'steps': 11880, 'loss/train': 2.2158963680267334} +02/24/2022 16:57:01 - INFO - codeparrot_training - Step 11881: {'lr': 0.00044951764683094555, 'samples': 6083584, 'steps': 11881, 'loss/train': 2.6528873443603516} +02/24/2022 16:57:04 - INFO - codeparrot_training - Step 11882: {'lr': 0.000449507786973718, 'samples': 6084096, 'steps': 11882, 'loss/train': 2.0729784965515137} +02/24/2022 16:57:10 - INFO - codeparrot_training - Step 11883: {'lr': 0.0004494979262618624, 'samples': 6084608, 'steps': 11883, 'loss/train': 1.4883427619934082} +02/24/2022 16:57:13 - INFO - codeparrot_training - Step 11884: {'lr': 0.00044948806469542095, 'samples': 6085120, 'steps': 11884, 'loss/train': 1.772378921508789} +02/24/2022 16:57:19 - INFO - codeparrot_training - Step 11885: {'lr': 0.0004494782022744361, 'samples': 6085632, 'steps': 11885, 'loss/train': 1.4385184049606323} +02/24/2022 16:57:22 - INFO - codeparrot_training - Step 11886: {'lr': 0.0004494683389989499, 'samples': 6086144, 'steps': 11886, 'loss/train': 1.2901983261108398} +02/24/2022 16:57:28 - INFO - codeparrot_training - Step 11887: {'lr': 0.0004494584748690047, 'samples': 6086656, 'steps': 11887, 'loss/train': 2.284496784210205} +02/24/2022 16:57:31 - INFO - codeparrot_training - Step 11888: {'lr': 0.00044944860988464276, 'samples': 6087168, 'steps': 11888, 'loss/train': 1.466820240020752} +02/24/2022 16:57:38 - INFO - codeparrot_training - Step 11889: {'lr': 0.0004494387440459063, 'samples': 6087680, 'steps': 11889, 'loss/train': 1.2328132390975952} +02/24/2022 16:57:41 - INFO - codeparrot_training - Step 11890: {'lr': 0.00044942887735283755, 'samples': 6088192, 'steps': 11890, 'loss/train': 3.2682878971099854} +02/24/2022 16:57:47 - INFO - codeparrot_training - Step 11891: {'lr': 0.00044941900980547886, 'samples': 6088704, 'steps': 11891, 'loss/train': 2.589184284210205} +02/24/2022 16:57:50 - INFO - codeparrot_training - Step 11892: {'lr': 0.00044940914140387245, 'samples': 6089216, 'steps': 11892, 'loss/train': 1.2583006620407104} +02/24/2022 16:57:56 - INFO - codeparrot_training - Step 11893: {'lr': 0.00044939927214806055, 'samples': 6089728, 'steps': 11893, 'loss/train': 1.39590322971344} +02/24/2022 16:57:59 - INFO - codeparrot_training - Step 11894: {'lr': 0.0004493894020380855, 'samples': 6090240, 'steps': 11894, 'loss/train': 1.5713070631027222} +02/24/2022 16:58:03 - INFO - codeparrot_training - Step 11895: {'lr': 0.0004493795310739896, 'samples': 6090752, 'steps': 11895, 'loss/train': 1.9005963802337646} +02/24/2022 16:58:08 - INFO - codeparrot_training - Step 11896: {'lr': 0.00044936965925581506, 'samples': 6091264, 'steps': 11896, 'loss/train': 1.2607002258300781} +02/24/2022 16:58:12 - INFO - codeparrot_training - Step 11897: {'lr': 0.0004493597865836042, 'samples': 6091776, 'steps': 11897, 'loss/train': 1.736268401145935} +02/24/2022 16:58:17 - INFO - codeparrot_training - Step 11898: {'lr': 0.00044934991305739936, 'samples': 6092288, 'steps': 11898, 'loss/train': 2.399876832962036} +02/24/2022 16:58:21 - INFO - codeparrot_training - Step 11899: {'lr': 0.00044934003867724284, 'samples': 6092800, 'steps': 11899, 'loss/train': 2.5512542724609375} +02/24/2022 16:58:26 - INFO - codeparrot_training - Step 11900: {'lr': 0.0004493301634431768, 'samples': 6093312, 'steps': 11900, 'loss/train': 0.8661245703697205} +02/24/2022 16:58:30 - INFO - codeparrot_training - Step 11901: {'lr': 0.00044932028735524367, 'samples': 6093824, 'steps': 11901, 'loss/train': 1.3039920330047607} +02/24/2022 16:58:36 - INFO - codeparrot_training - Step 11902: {'lr': 0.0004493104104134857, 'samples': 6094336, 'steps': 11902, 'loss/train': 2.297304391860962} +02/24/2022 16:58:39 - INFO - codeparrot_training - Step 11903: {'lr': 0.0004493005326179452, 'samples': 6094848, 'steps': 11903, 'loss/train': 2.4173314571380615} +02/24/2022 16:58:45 - INFO - codeparrot_training - Step 11904: {'lr': 0.00044929065396866457, 'samples': 6095360, 'steps': 11904, 'loss/train': 2.2927234172821045} +02/24/2022 16:58:48 - INFO - codeparrot_training - Step 11905: {'lr': 0.00044928077446568606, 'samples': 6095872, 'steps': 11905, 'loss/train': 2.6560232639312744} +02/24/2022 16:58:54 - INFO - codeparrot_training - Step 11906: {'lr': 0.000449270894109052, 'samples': 6096384, 'steps': 11906, 'loss/train': 2.1722636222839355} +02/24/2022 16:58:57 - INFO - codeparrot_training - Step 11907: {'lr': 0.0004492610128988046, 'samples': 6096896, 'steps': 11907, 'loss/train': 1.3685529232025146} +02/24/2022 16:59:03 - INFO - codeparrot_training - Step 11908: {'lr': 0.00044925113083498636, 'samples': 6097408, 'steps': 11908, 'loss/train': 1.2973147630691528} +02/24/2022 16:59:08 - INFO - codeparrot_training - Step 11909: {'lr': 0.00044924124791763956, 'samples': 6097920, 'steps': 11909, 'loss/train': 3.2404911518096924} +02/24/2022 16:59:12 - INFO - codeparrot_training - Step 11910: {'lr': 0.0004492313641468065, 'samples': 6098432, 'steps': 11910, 'loss/train': 1.7464429140090942} +02/24/2022 16:59:18 - INFO - codeparrot_training - Step 11911: {'lr': 0.00044922147952252957, 'samples': 6098944, 'steps': 11911, 'loss/train': 0.7987387180328369} +02/24/2022 16:59:22 - INFO - codeparrot_training - Step 11912: {'lr': 0.000449211594044851, 'samples': 6099456, 'steps': 11912, 'loss/train': 2.8895716667175293} +02/24/2022 16:59:27 - INFO - codeparrot_training - Step 11913: {'lr': 0.0004492017077138133, 'samples': 6099968, 'steps': 11913, 'loss/train': 1.7488881349563599} +02/24/2022 16:59:31 - INFO - codeparrot_training - Step 11914: {'lr': 0.00044919182052945866, 'samples': 6100480, 'steps': 11914, 'loss/train': 1.0501044988632202} +02/24/2022 16:59:36 - INFO - codeparrot_training - Step 11915: {'lr': 0.00044918193249182957, 'samples': 6100992, 'steps': 11915, 'loss/train': 1.9240261316299438} +02/24/2022 16:59:40 - INFO - codeparrot_training - Step 11916: {'lr': 0.0004491720436009683, 'samples': 6101504, 'steps': 11916, 'loss/train': 1.406111478805542} +02/24/2022 16:59:45 - INFO - codeparrot_training - Step 11917: {'lr': 0.0004491621538569173, 'samples': 6102016, 'steps': 11917, 'loss/train': 0.8316056728363037} +02/24/2022 16:59:49 - INFO - codeparrot_training - Step 11918: {'lr': 0.0004491522632597188, 'samples': 6102528, 'steps': 11918, 'loss/train': 1.0837879180908203} +02/24/2022 16:59:54 - INFO - codeparrot_training - Step 11919: {'lr': 0.0004491423718094153, 'samples': 6103040, 'steps': 11919, 'loss/train': 1.7377420663833618} +02/24/2022 16:59:58 - INFO - codeparrot_training - Step 11920: {'lr': 0.00044913247950604905, 'samples': 6103552, 'steps': 11920, 'loss/train': 0.8596081137657166} +02/24/2022 17:00:03 - INFO - codeparrot_training - Step 11921: {'lr': 0.0004491225863496625, 'samples': 6104064, 'steps': 11921, 'loss/train': 1.740216851234436} +02/24/2022 17:00:07 - INFO - codeparrot_training - Step 11922: {'lr': 0.0004491126923402981, 'samples': 6104576, 'steps': 11922, 'loss/train': 1.1672195196151733} +02/24/2022 17:00:12 - INFO - codeparrot_training - Step 11923: {'lr': 0.0004491027974779981, 'samples': 6105088, 'steps': 11923, 'loss/train': 1.5681185722351074} +02/24/2022 17:00:16 - INFO - codeparrot_training - Step 11924: {'lr': 0.00044909290176280495, 'samples': 6105600, 'steps': 11924, 'loss/train': 2.9294259548187256} +02/24/2022 17:00:21 - INFO - codeparrot_training - Step 11925: {'lr': 0.000449083005194761, 'samples': 6106112, 'steps': 11925, 'loss/train': 1.2542585134506226} +02/24/2022 17:00:25 - INFO - codeparrot_training - Step 11926: {'lr': 0.0004490731077739087, 'samples': 6106624, 'steps': 11926, 'loss/train': 2.0052225589752197} +02/24/2022 17:00:31 - INFO - codeparrot_training - Step 11927: {'lr': 0.0004490632095002904, 'samples': 6107136, 'steps': 11927, 'loss/train': 2.3806068897247314} +02/24/2022 17:00:35 - INFO - codeparrot_training - Step 11928: {'lr': 0.00044905331037394853, 'samples': 6107648, 'steps': 11928, 'loss/train': 2.7983317375183105} +02/24/2022 17:00:40 - INFO - codeparrot_training - Step 11929: {'lr': 0.00044904341039492544, 'samples': 6108160, 'steps': 11929, 'loss/train': 0.13732248544692993} +02/24/2022 17:00:44 - INFO - codeparrot_training - Step 11930: {'lr': 0.00044903350956326365, 'samples': 6108672, 'steps': 11930, 'loss/train': 2.2320008277893066} +02/24/2022 17:00:49 - INFO - codeparrot_training - Step 11931: {'lr': 0.0004490236078790055, 'samples': 6109184, 'steps': 11931, 'loss/train': 2.022023916244507} +02/24/2022 17:00:53 - INFO - codeparrot_training - Step 11932: {'lr': 0.0004490137053421934, 'samples': 6109696, 'steps': 11932, 'loss/train': 1.7930134534835815} +02/24/2022 17:00:58 - INFO - codeparrot_training - Step 11933: {'lr': 0.00044900380195286974, 'samples': 6110208, 'steps': 11933, 'loss/train': 1.9943904876708984} +02/24/2022 17:01:02 - INFO - codeparrot_training - Step 11934: {'lr': 0.00044899389771107704, 'samples': 6110720, 'steps': 11934, 'loss/train': 1.4732900857925415} +02/24/2022 17:01:07 - INFO - codeparrot_training - Step 11935: {'lr': 0.00044898399261685765, 'samples': 6111232, 'steps': 11935, 'loss/train': 1.4811468124389648} +02/24/2022 17:01:10 - INFO - codeparrot_training - Step 11936: {'lr': 0.00044897408667025397, 'samples': 6111744, 'steps': 11936, 'loss/train': 1.9996992349624634} +02/24/2022 17:01:17 - INFO - codeparrot_training - Step 11937: {'lr': 0.00044896417987130854, 'samples': 6112256, 'steps': 11937, 'loss/train': 1.7018797397613525} +02/24/2022 17:01:20 - INFO - codeparrot_training - Step 11938: {'lr': 0.0004489542722200637, 'samples': 6112768, 'steps': 11938, 'loss/train': 2.303635358810425} +02/24/2022 17:01:26 - INFO - codeparrot_training - Step 11939: {'lr': 0.000448944363716562, 'samples': 6113280, 'steps': 11939, 'loss/train': 2.693312644958496} +02/24/2022 17:01:29 - INFO - codeparrot_training - Step 11940: {'lr': 0.0004489344543608458, 'samples': 6113792, 'steps': 11940, 'loss/train': 1.2922580242156982} +02/24/2022 17:01:35 - INFO - codeparrot_training - Step 11941: {'lr': 0.00044892454415295746, 'samples': 6114304, 'steps': 11941, 'loss/train': 2.4177403450012207} +02/24/2022 17:01:39 - INFO - codeparrot_training - Step 11942: {'lr': 0.0004489146330929397, 'samples': 6114816, 'steps': 11942, 'loss/train': 2.755218267440796} +02/24/2022 17:01:42 - INFO - codeparrot_training - Step 11943: {'lr': 0.0004489047211808347, 'samples': 6115328, 'steps': 11943, 'loss/train': 1.957213282585144} +02/24/2022 17:01:48 - INFO - codeparrot_training - Step 11944: {'lr': 0.0004488948084166851, 'samples': 6115840, 'steps': 11944, 'loss/train': 2.2804155349731445} +02/24/2022 17:01:51 - INFO - codeparrot_training - Step 11945: {'lr': 0.00044888489480053324, 'samples': 6116352, 'steps': 11945, 'loss/train': 2.0375568866729736} +02/24/2022 17:01:57 - INFO - codeparrot_training - Step 11946: {'lr': 0.00044887498033242167, 'samples': 6116864, 'steps': 11946, 'loss/train': 1.9059584140777588} +02/24/2022 17:02:00 - INFO - codeparrot_training - Step 11947: {'lr': 0.0004488650650123929, 'samples': 6117376, 'steps': 11947, 'loss/train': 1.1402106285095215} +02/24/2022 17:02:06 - INFO - codeparrot_training - Step 11948: {'lr': 0.00044885514884048926, 'samples': 6117888, 'steps': 11948, 'loss/train': 2.292330026626587} +02/24/2022 17:02:10 - INFO - codeparrot_training - Step 11949: {'lr': 0.0004488452318167533, 'samples': 6118400, 'steps': 11949, 'loss/train': 2.6049935817718506} +02/24/2022 17:02:15 - INFO - codeparrot_training - Step 11950: {'lr': 0.00044883531394122753, 'samples': 6118912, 'steps': 11950, 'loss/train': 0.7761608958244324} +02/24/2022 17:02:19 - INFO - codeparrot_training - Step 11951: {'lr': 0.00044882539521395436, 'samples': 6119424, 'steps': 11951, 'loss/train': 2.873300075531006} +02/24/2022 17:02:24 - INFO - codeparrot_training - Step 11952: {'lr': 0.0004488154756349764, 'samples': 6119936, 'steps': 11952, 'loss/train': 4.269147872924805} +02/24/2022 17:02:28 - INFO - codeparrot_training - Step 11953: {'lr': 0.0004488055552043361, 'samples': 6120448, 'steps': 11953, 'loss/train': 2.1729979515075684} +02/24/2022 17:02:33 - INFO - codeparrot_training - Step 11954: {'lr': 0.0004487956339220759, 'samples': 6120960, 'steps': 11954, 'loss/train': 0.8693559765815735} +02/24/2022 17:02:37 - INFO - codeparrot_training - Step 11955: {'lr': 0.00044878571178823826, 'samples': 6121472, 'steps': 11955, 'loss/train': 1.254451870918274} +02/24/2022 17:02:42 - INFO - codeparrot_training - Step 11956: {'lr': 0.00044877578880286585, 'samples': 6121984, 'steps': 11956, 'loss/train': 0.19196289777755737} +02/24/2022 17:02:46 - INFO - codeparrot_training - Step 11957: {'lr': 0.000448765864966001, 'samples': 6122496, 'steps': 11957, 'loss/train': 2.527200698852539} +02/24/2022 17:02:52 - INFO - codeparrot_training - Step 11958: {'lr': 0.00044875594027768634, 'samples': 6123008, 'steps': 11958, 'loss/train': 1.9800655841827393} +02/24/2022 17:02:56 - INFO - codeparrot_training - Step 11959: {'lr': 0.00044874601473796435, 'samples': 6123520, 'steps': 11959, 'loss/train': 0.3376052975654602} +02/24/2022 17:03:01 - INFO - codeparrot_training - Step 11960: {'lr': 0.00044873608834687754, 'samples': 6124032, 'steps': 11960, 'loss/train': 2.2521958351135254} +02/24/2022 17:03:04 - INFO - codeparrot_training - Step 11961: {'lr': 0.0004487261611044684, 'samples': 6124544, 'steps': 11961, 'loss/train': 1.5861146450042725} +02/24/2022 17:03:10 - INFO - codeparrot_training - Step 11962: {'lr': 0.0004487162330107795, 'samples': 6125056, 'steps': 11962, 'loss/train': 2.6860108375549316} +02/24/2022 17:03:14 - INFO - codeparrot_training - Step 11963: {'lr': 0.0004487063040658534, 'samples': 6125568, 'steps': 11963, 'loss/train': 2.2602968215942383} +02/24/2022 17:03:19 - INFO - codeparrot_training - Step 11964: {'lr': 0.00044869637426973256, 'samples': 6126080, 'steps': 11964, 'loss/train': 2.0055363178253174} +02/24/2022 17:03:23 - INFO - codeparrot_training - Step 11965: {'lr': 0.0004486864436224595, 'samples': 6126592, 'steps': 11965, 'loss/train': 1.4304295778274536} +02/24/2022 17:03:29 - INFO - codeparrot_training - Step 11966: {'lr': 0.0004486765121240769, 'samples': 6127104, 'steps': 11966, 'loss/train': 2.6333274841308594} +02/24/2022 17:03:32 - INFO - codeparrot_training - Step 11967: {'lr': 0.0004486665797746271, 'samples': 6127616, 'steps': 11967, 'loss/train': 0.22088871896266937} +02/24/2022 17:03:38 - INFO - codeparrot_training - Step 11968: {'lr': 0.00044865664657415286, 'samples': 6128128, 'steps': 11968, 'loss/train': 2.322220802307129} +02/24/2022 17:03:42 - INFO - codeparrot_training - Step 11969: {'lr': 0.00044864671252269663, 'samples': 6128640, 'steps': 11969, 'loss/train': 2.567811965942383} +02/24/2022 17:03:47 - INFO - codeparrot_training - Step 11970: {'lr': 0.00044863677762030087, 'samples': 6129152, 'steps': 11970, 'loss/train': 1.8674544095993042} +02/24/2022 17:03:51 - INFO - codeparrot_training - Step 11971: {'lr': 0.0004486268418670083, 'samples': 6129664, 'steps': 11971, 'loss/train': 1.2539787292480469} +02/24/2022 17:03:56 - INFO - codeparrot_training - Step 11972: {'lr': 0.00044861690526286135, 'samples': 6130176, 'steps': 11972, 'loss/train': 2.8972015380859375} +02/24/2022 17:04:00 - INFO - codeparrot_training - Step 11973: {'lr': 0.00044860696780790266, 'samples': 6130688, 'steps': 11973, 'loss/train': 1.7852134704589844} +02/24/2022 17:04:06 - INFO - codeparrot_training - Step 11974: {'lr': 0.00044859702950217486, 'samples': 6131200, 'steps': 11974, 'loss/train': 3.0962789058685303} +02/24/2022 17:04:09 - INFO - codeparrot_training - Step 11975: {'lr': 0.00044858709034572035, 'samples': 6131712, 'steps': 11975, 'loss/train': 2.5287649631500244} +02/24/2022 17:04:15 - INFO - codeparrot_training - Step 11976: {'lr': 0.00044857715033858183, 'samples': 6132224, 'steps': 11976, 'loss/train': 2.1950368881225586} +02/24/2022 17:04:18 - INFO - codeparrot_training - Step 11977: {'lr': 0.0004485672094808019, 'samples': 6132736, 'steps': 11977, 'loss/train': 1.9684149026870728} +02/24/2022 17:04:24 - INFO - codeparrot_training - Step 11978: {'lr': 0.0004485572677724231, 'samples': 6133248, 'steps': 11978, 'loss/train': 1.745532512664795} +02/24/2022 17:04:27 - INFO - codeparrot_training - Step 11979: {'lr': 0.00044854732521348796, 'samples': 6133760, 'steps': 11979, 'loss/train': 1.880918025970459} +02/24/2022 17:04:33 - INFO - codeparrot_training - Step 11980: {'lr': 0.0004485373818040391, 'samples': 6134272, 'steps': 11980, 'loss/train': 1.9257597923278809} +02/24/2022 17:04:36 - INFO - codeparrot_training - Step 11981: {'lr': 0.00044852743754411915, 'samples': 6134784, 'steps': 11981, 'loss/train': 1.7359751462936401} +02/24/2022 17:04:42 - INFO - codeparrot_training - Step 11982: {'lr': 0.00044851749243377085, 'samples': 6135296, 'steps': 11982, 'loss/train': 1.5540597438812256} +02/24/2022 17:04:45 - INFO - codeparrot_training - Step 11983: {'lr': 0.0004485075464730365, 'samples': 6135808, 'steps': 11983, 'loss/train': 1.6910945177078247} +02/24/2022 17:04:51 - INFO - codeparrot_training - Step 11984: {'lr': 0.0004484975996619589, 'samples': 6136320, 'steps': 11984, 'loss/train': 0.38395506143569946} +02/24/2022 17:04:55 - INFO - codeparrot_training - Step 11985: {'lr': 0.0004484876520005805, 'samples': 6136832, 'steps': 11985, 'loss/train': 1.7679054737091064} +02/24/2022 17:05:01 - INFO - codeparrot_training - Step 11986: {'lr': 0.0004484777034889441, 'samples': 6137344, 'steps': 11986, 'loss/train': 2.9316844940185547} +02/24/2022 17:05:04 - INFO - codeparrot_training - Step 11987: {'lr': 0.0004484677541270923, 'samples': 6137856, 'steps': 11987, 'loss/train': 1.8936922550201416} +02/24/2022 17:05:10 - INFO - codeparrot_training - Step 11988: {'lr': 0.00044845780391506763, 'samples': 6138368, 'steps': 11988, 'loss/train': 3.8764872550964355} +02/24/2022 17:05:13 - INFO - codeparrot_training - Step 11989: {'lr': 0.0004484478528529128, 'samples': 6138880, 'steps': 11989, 'loss/train': 2.5315418243408203} +02/24/2022 17:05:19 - INFO - codeparrot_training - Step 11990: {'lr': 0.00044843790094067026, 'samples': 6139392, 'steps': 11990, 'loss/train': 1.575141191482544} +02/24/2022 17:05:23 - INFO - codeparrot_training - Step 11991: {'lr': 0.00044842794817838286, 'samples': 6139904, 'steps': 11991, 'loss/train': 2.0063159465789795} +02/24/2022 17:05:26 - INFO - codeparrot_training - Step 11992: {'lr': 0.0004484179945660931, 'samples': 6140416, 'steps': 11992, 'loss/train': 1.4903126955032349} +02/24/2022 17:05:32 - INFO - codeparrot_training - Step 11993: {'lr': 0.00044840804010384366, 'samples': 6140928, 'steps': 11993, 'loss/train': 2.07969069480896} +02/24/2022 17:05:35 - INFO - codeparrot_training - Step 11994: {'lr': 0.00044839808479167723, 'samples': 6141440, 'steps': 11994, 'loss/train': 2.9866795539855957} +02/24/2022 17:05:41 - INFO - codeparrot_training - Step 11995: {'lr': 0.00044838812862963627, 'samples': 6141952, 'steps': 11995, 'loss/train': 2.889941453933716} +02/24/2022 17:05:47 - INFO - codeparrot_training - Step 11996: {'lr': 0.00044837817161776366, 'samples': 6142464, 'steps': 11996, 'loss/train': 1.9506334066390991} +02/24/2022 17:05:51 - INFO - codeparrot_training - Step 11997: {'lr': 0.00044836821375610194, 'samples': 6142976, 'steps': 11997, 'loss/train': 1.876620888710022} +02/24/2022 17:05:56 - INFO - codeparrot_training - Step 11998: {'lr': 0.0004483582550446938, 'samples': 6143488, 'steps': 11998, 'loss/train': 1.6544023752212524} +02/24/2022 17:06:00 - INFO - codeparrot_training - Step 11999: {'lr': 0.0004483482954835819, 'samples': 6144000, 'steps': 11999, 'loss/train': 1.3312416076660156} +02/24/2022 17:06:00 - INFO - codeparrot_training - Evaluating and saving model checkpoint