diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -5331,3 +5331,1009 @@ Use FP16 precision: False 02/24/2022 08:01:42 - INFO - codeparrot_training - Step 4998: {'lr': 0.0004952027023159617, 'samples': 2559488, 'steps': 4998, 'loss/train': 3.246948719024658} 02/24/2022 08:01:48 - INFO - codeparrot_training - Step 4999: {'lr': 0.0004951995117335631, 'samples': 2560000, 'steps': 4999, 'loss/train': 2.412139415740967} 02/24/2022 08:01:48 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/24/2022 08:02:04 - WARNING - huggingface_hub.repository - Several commits (5) will be pushed upstream. +02/24/2022 08:02:04 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/24/2022 08:02:36 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + f6d96e1..8f2bfb2 floral-grass-11 -> floral-grass-11 + +02/24/2022 08:02:40 - INFO - codeparrot_training - Step 5000: {'lr': 0.0004951963201008077, 'samples': 2560512, 'steps': 5000, 'loss/train': 2.296778917312622} +02/24/2022 08:02:47 - INFO - codeparrot_training - Step 5001: {'lr': 0.000495193127417709, 'samples': 2561024, 'steps': 5001, 'loss/train': 2.5641069412231445} +02/24/2022 08:02:50 - INFO - codeparrot_training - Step 5002: {'lr': 0.0004951899336842809, 'samples': 2561536, 'steps': 5002, 'loss/train': 2.7109029293060303} +02/24/2022 08:02:56 - INFO - codeparrot_training - Step 5003: {'lr': 0.0004951867389005369, 'samples': 2562048, 'steps': 5003, 'loss/train': 2.5077733993530273} +02/24/2022 08:02:59 - INFO - codeparrot_training - Step 5004: {'lr': 0.0004951835430664908, 'samples': 2562560, 'steps': 5004, 'loss/train': 2.3653955459594727} +02/24/2022 08:03:05 - INFO - codeparrot_training - Step 5005: {'lr': 0.0004951803461821562, 'samples': 2563072, 'steps': 5005, 'loss/train': 2.838548183441162} +02/24/2022 08:03:09 - INFO - codeparrot_training - Step 5006: {'lr': 0.0004951771482475469, 'samples': 2563584, 'steps': 5006, 'loss/train': 2.699648141860962} +02/24/2022 08:03:14 - INFO - codeparrot_training - Step 5007: {'lr': 0.0004951739492626766, 'samples': 2564096, 'steps': 5007, 'loss/train': 2.0335335731506348} +02/24/2022 08:03:18 - INFO - codeparrot_training - Step 5008: {'lr': 0.0004951707492275589, 'samples': 2564608, 'steps': 5008, 'loss/train': 2.9375572204589844} +02/24/2022 08:03:23 - INFO - codeparrot_training - Step 5009: {'lr': 0.0004951675481422075, 'samples': 2565120, 'steps': 5009, 'loss/train': 2.2810325622558594} +02/24/2022 08:03:27 - INFO - codeparrot_training - Step 5010: {'lr': 0.0004951643460066363, 'samples': 2565632, 'steps': 5010, 'loss/train': 4.218005180358887} +02/24/2022 08:03:33 - INFO - codeparrot_training - Step 5011: {'lr': 0.0004951611428208589, 'samples': 2566144, 'steps': 5011, 'loss/train': 3.0080645084381104} +02/24/2022 08:03:36 - INFO - codeparrot_training - Step 5012: {'lr': 0.0004951579385848889, 'samples': 2566656, 'steps': 5012, 'loss/train': 2.662175416946411} +02/24/2022 08:03:42 - INFO - codeparrot_training - Step 5013: {'lr': 0.0004951547332987401, 'samples': 2567168, 'steps': 5013, 'loss/train': 1.3648183345794678} +02/24/2022 08:03:45 - INFO - codeparrot_training - Step 5014: {'lr': 0.0004951515269624265, 'samples': 2567680, 'steps': 5014, 'loss/train': 2.1121575832366943} +02/24/2022 08:03:51 - INFO - codeparrot_training - Step 5015: {'lr': 0.0004951483195759614, 'samples': 2568192, 'steps': 5015, 'loss/train': 3.2036707401275635} +02/24/2022 08:03:54 - INFO - codeparrot_training - Step 5016: {'lr': 0.0004951451111393588, 'samples': 2568704, 'steps': 5016, 'loss/train': 2.3789610862731934} +02/24/2022 08:04:00 - INFO - codeparrot_training - Step 5017: {'lr': 0.0004951419016526324, 'samples': 2569216, 'steps': 5017, 'loss/train': 2.780557155609131} +02/24/2022 08:04:03 - INFO - codeparrot_training - Step 5018: {'lr': 0.0004951386911157959, 'samples': 2569728, 'steps': 5018, 'loss/train': 2.697455883026123} +02/24/2022 08:04:09 - INFO - codeparrot_training - Step 5019: {'lr': 0.0004951354795288631, 'samples': 2570240, 'steps': 5019, 'loss/train': 3.0525665283203125} +02/24/2022 08:04:13 - INFO - codeparrot_training - Step 5020: {'lr': 0.0004951322668918477, 'samples': 2570752, 'steps': 5020, 'loss/train': 2.4270029067993164} +02/24/2022 08:04:16 - INFO - codeparrot_training - Step 5021: {'lr': 0.0004951290532047637, 'samples': 2571264, 'steps': 5021, 'loss/train': 0.8056251406669617} +02/24/2022 08:04:22 - INFO - codeparrot_training - Step 5022: {'lr': 0.0004951258384676244, 'samples': 2571776, 'steps': 5022, 'loss/train': 2.556169033050537} +02/24/2022 08:04:26 - INFO - codeparrot_training - Step 5023: {'lr': 0.0004951226226804441, 'samples': 2572288, 'steps': 5023, 'loss/train': 2.4295411109924316} +02/24/2022 08:04:31 - INFO - codeparrot_training - Step 5024: {'lr': 0.0004951194058432361, 'samples': 2572800, 'steps': 5024, 'loss/train': 2.9989123344421387} +02/24/2022 08:04:35 - INFO - codeparrot_training - Step 5025: {'lr': 0.0004951161879560146, 'samples': 2573312, 'steps': 5025, 'loss/train': 3.6421656608581543} +02/24/2022 08:04:40 - INFO - codeparrot_training - Step 5026: {'lr': 0.000495112969018793, 'samples': 2573824, 'steps': 5026, 'loss/train': 2.5617172718048096} +02/24/2022 08:04:44 - INFO - codeparrot_training - Step 5027: {'lr': 0.0004951097490315853, 'samples': 2574336, 'steps': 5027, 'loss/train': 2.223910331726074} +02/24/2022 08:04:49 - INFO - codeparrot_training - Step 5028: {'lr': 0.0004951065279944054, 'samples': 2574848, 'steps': 5028, 'loss/train': 3.514998197555542} +02/24/2022 08:04:53 - INFO - codeparrot_training - Step 5029: {'lr': 0.0004951033059072668, 'samples': 2575360, 'steps': 5029, 'loss/train': 2.5756258964538574} +02/24/2022 08:04:58 - INFO - codeparrot_training - Step 5030: {'lr': 0.0004951000827701836, 'samples': 2575872, 'steps': 5030, 'loss/train': 1.9040237665176392} +02/24/2022 08:05:02 - INFO - codeparrot_training - Step 5031: {'lr': 0.0004950968585831694, 'samples': 2576384, 'steps': 5031, 'loss/train': 1.8934390544891357} +02/24/2022 08:05:08 - INFO - codeparrot_training - Step 5032: {'lr': 0.0004950936333462381, 'samples': 2576896, 'steps': 5032, 'loss/train': 2.160876989364624} +02/24/2022 08:05:12 - INFO - codeparrot_training - Step 5033: {'lr': 0.0004950904070594036, 'samples': 2577408, 'steps': 5033, 'loss/train': 2.021130323410034} +02/24/2022 08:05:17 - INFO - codeparrot_training - Step 5034: {'lr': 0.0004950871797226795, 'samples': 2577920, 'steps': 5034, 'loss/train': 2.519498586654663} +02/24/2022 08:05:21 - INFO - codeparrot_training - Step 5035: {'lr': 0.0004950839513360798, 'samples': 2578432, 'steps': 5035, 'loss/train': 2.521667242050171} +02/24/2022 08:05:26 - INFO - codeparrot_training - Step 5036: {'lr': 0.0004950807218996182, 'samples': 2578944, 'steps': 5036, 'loss/train': 1.9231854677200317} +02/24/2022 08:05:30 - INFO - codeparrot_training - Step 5037: {'lr': 0.0004950774914133086, 'samples': 2579456, 'steps': 5037, 'loss/train': 1.0685920715332031} +02/24/2022 08:05:35 - INFO - codeparrot_training - Step 5038: {'lr': 0.0004950742598771649, 'samples': 2579968, 'steps': 5038, 'loss/train': 2.0484297275543213} +02/24/2022 08:05:41 - INFO - codeparrot_training - Step 5039: {'lr': 0.0004950710272912009, 'samples': 2580480, 'steps': 5039, 'loss/train': 2.969024658203125} +02/24/2022 08:05:44 - INFO - codeparrot_training - Step 5040: {'lr': 0.0004950677936554305, 'samples': 2580992, 'steps': 5040, 'loss/train': 2.8437583446502686} +02/24/2022 08:05:50 - INFO - codeparrot_training - Step 5041: {'lr': 0.0004950645589698674, 'samples': 2581504, 'steps': 5041, 'loss/train': 2.2863240242004395} +02/24/2022 08:05:53 - INFO - codeparrot_training - Step 5042: {'lr': 0.0004950613232345256, 'samples': 2582016, 'steps': 5042, 'loss/train': 2.68483304977417} +02/24/2022 08:05:59 - INFO - codeparrot_training - Step 5043: {'lr': 0.0004950580864494188, 'samples': 2582528, 'steps': 5043, 'loss/train': 3.0379183292388916} +02/24/2022 08:06:02 - INFO - codeparrot_training - Step 5044: {'lr': 0.0004950548486145611, 'samples': 2583040, 'steps': 5044, 'loss/train': 2.3883142471313477} +02/24/2022 08:06:08 - INFO - codeparrot_training - Step 5045: {'lr': 0.0004950516097299662, 'samples': 2583552, 'steps': 5045, 'loss/train': 2.279304027557373} +02/24/2022 08:06:11 - INFO - codeparrot_training - Step 5046: {'lr': 0.000495048369795648, 'samples': 2584064, 'steps': 5046, 'loss/train': 1.8159793615341187} +02/24/2022 08:06:18 - INFO - codeparrot_training - Step 5047: {'lr': 0.0004950451288116204, 'samples': 2584576, 'steps': 5047, 'loss/train': 2.7002248764038086} +02/24/2022 08:06:21 - INFO - codeparrot_training - Step 5048: {'lr': 0.0004950418867778973, 'samples': 2585088, 'steps': 5048, 'loss/train': 2.738889694213867} +02/24/2022 08:06:27 - INFO - codeparrot_training - Step 5049: {'lr': 0.0004950386436944925, 'samples': 2585600, 'steps': 5049, 'loss/train': 2.8263957500457764} +02/24/2022 08:06:30 - INFO - codeparrot_training - Step 5050: {'lr': 0.0004950353995614201, 'samples': 2586112, 'steps': 5050, 'loss/train': 2.439209461212158} +02/24/2022 08:06:36 - INFO - codeparrot_training - Step 5051: {'lr': 0.0004950321543786937, 'samples': 2586624, 'steps': 5051, 'loss/train': 2.6172542572021484} +02/24/2022 08:06:39 - INFO - codeparrot_training - Step 5052: {'lr': 0.0004950289081463273, 'samples': 2587136, 'steps': 5052, 'loss/train': 3.2669732570648193} +02/24/2022 08:06:45 - INFO - codeparrot_training - Step 5053: {'lr': 0.0004950256608643351, 'samples': 2587648, 'steps': 5053, 'loss/train': 1.2929632663726807} +02/24/2022 08:06:48 - INFO - codeparrot_training - Step 5054: {'lr': 0.0004950224125327307, 'samples': 2588160, 'steps': 5054, 'loss/train': 2.0164365768432617} +02/24/2022 08:06:54 - INFO - codeparrot_training - Step 5055: {'lr': 0.000495019163151528, 'samples': 2588672, 'steps': 5055, 'loss/train': 3.002439022064209} +02/24/2022 08:06:57 - INFO - codeparrot_training - Step 5056: {'lr': 0.0004950159127207411, 'samples': 2589184, 'steps': 5056, 'loss/train': 2.2478277683258057} +02/24/2022 08:07:04 - INFO - codeparrot_training - Step 5057: {'lr': 0.0004950126612403838, 'samples': 2589696, 'steps': 5057, 'loss/train': 3.1365468502044678} +02/24/2022 08:07:07 - INFO - codeparrot_training - Step 5058: {'lr': 0.00049500940871047, 'samples': 2590208, 'steps': 5058, 'loss/train': 3.099783420562744} +02/24/2022 08:07:12 - INFO - codeparrot_training - Step 5059: {'lr': 0.0004950061551310138, 'samples': 2590720, 'steps': 5059, 'loss/train': 2.2945163249969482} +02/24/2022 08:07:16 - INFO - codeparrot_training - Step 5060: {'lr': 0.0004950029005020289, 'samples': 2591232, 'steps': 5060, 'loss/train': 1.7013286352157593} +02/24/2022 08:07:21 - INFO - codeparrot_training - Step 5061: {'lr': 0.0004949996448235294, 'samples': 2591744, 'steps': 5061, 'loss/train': 1.8031805753707886} +02/24/2022 08:07:25 - INFO - codeparrot_training - Step 5062: {'lr': 0.0004949963880955293, 'samples': 2592256, 'steps': 5062, 'loss/train': 2.8380470275878906} +02/24/2022 08:07:30 - INFO - codeparrot_training - Step 5063: {'lr': 0.0004949931303180424, 'samples': 2592768, 'steps': 5063, 'loss/train': 2.81138277053833} +02/24/2022 08:07:34 - INFO - codeparrot_training - Step 5064: {'lr': 0.0004949898714910828, 'samples': 2593280, 'steps': 5064, 'loss/train': 2.8589231967926025} +02/24/2022 08:07:39 - INFO - codeparrot_training - Step 5065: {'lr': 0.0004949866116146643, 'samples': 2593792, 'steps': 5065, 'loss/train': 1.7443885803222656} +02/24/2022 08:07:43 - INFO - codeparrot_training - Step 5066: {'lr': 0.000494983350688801, 'samples': 2594304, 'steps': 5066, 'loss/train': 2.3759613037109375} +02/24/2022 08:07:49 - INFO - codeparrot_training - Step 5067: {'lr': 0.0004949800887135067, 'samples': 2594816, 'steps': 5067, 'loss/train': 1.7359416484832764} +02/24/2022 08:07:53 - INFO - codeparrot_training - Step 5068: {'lr': 0.0004949768256887956, 'samples': 2595328, 'steps': 5068, 'loss/train': 3.2318248748779297} +02/24/2022 08:07:58 - INFO - codeparrot_training - Step 5069: {'lr': 0.0004949735616146816, 'samples': 2595840, 'steps': 5069, 'loss/train': 3.1380720138549805} +02/24/2022 08:08:01 - INFO - codeparrot_training - Step 5070: {'lr': 0.0004949702964911787, 'samples': 2596352, 'steps': 5070, 'loss/train': 2.6741809844970703} +02/24/2022 08:08:07 - INFO - codeparrot_training - Step 5071: {'lr': 0.0004949670303183006, 'samples': 2596864, 'steps': 5071, 'loss/train': 3.599745512008667} +02/24/2022 08:08:10 - INFO - codeparrot_training - Step 5072: {'lr': 0.0004949637630960618, 'samples': 2597376, 'steps': 5072, 'loss/train': 5.534940719604492} +02/24/2022 08:08:16 - INFO - codeparrot_training - Step 5073: {'lr': 0.0004949604948244758, 'samples': 2597888, 'steps': 5073, 'loss/train': 2.1387131214141846} +02/24/2022 08:08:20 - INFO - codeparrot_training - Step 5074: {'lr': 0.0004949572255035569, 'samples': 2598400, 'steps': 5074, 'loss/train': 2.5684547424316406} +02/24/2022 08:08:25 - INFO - codeparrot_training - Step 5075: {'lr': 0.0004949539551333191, 'samples': 2598912, 'steps': 5075, 'loss/train': 1.5825589895248413} +02/24/2022 08:08:28 - INFO - codeparrot_training - Step 5076: {'lr': 0.0004949506837137763, 'samples': 2599424, 'steps': 5076, 'loss/train': 2.939244270324707} +02/24/2022 08:08:35 - INFO - codeparrot_training - Step 5077: {'lr': 0.0004949474112449424, 'samples': 2599936, 'steps': 5077, 'loss/train': 3.7290453910827637} +02/24/2022 08:08:38 - INFO - codeparrot_training - Step 5078: {'lr': 0.0004949441377268318, 'samples': 2600448, 'steps': 5078, 'loss/train': 2.5886638164520264} +02/24/2022 08:08:44 - INFO - codeparrot_training - Step 5079: {'lr': 0.0004949408631594582, 'samples': 2600960, 'steps': 5079, 'loss/train': 2.021549940109253} +02/24/2022 08:08:47 - INFO - codeparrot_training - Step 5080: {'lr': 0.0004949375875428357, 'samples': 2601472, 'steps': 5080, 'loss/train': 2.831801414489746} +02/24/2022 08:08:53 - INFO - codeparrot_training - Step 5081: {'lr': 0.0004949343108769784, 'samples': 2601984, 'steps': 5081, 'loss/train': 2.555068254470825} +02/24/2022 08:08:56 - INFO - codeparrot_training - Step 5082: {'lr': 0.0004949310331619002, 'samples': 2602496, 'steps': 5082, 'loss/train': 2.103515148162842} +02/24/2022 08:09:02 - INFO - codeparrot_training - Step 5083: {'lr': 0.0004949277543976153, 'samples': 2603008, 'steps': 5083, 'loss/train': 2.3618087768554688} +02/24/2022 08:09:05 - INFO - codeparrot_training - Step 5084: {'lr': 0.0004949244745841377, 'samples': 2603520, 'steps': 5084, 'loss/train': 3.752011775970459} +02/24/2022 08:09:11 - INFO - codeparrot_training - Step 5085: {'lr': 0.0004949211937214814, 'samples': 2604032, 'steps': 5085, 'loss/train': 1.4301141500473022} +02/24/2022 08:09:15 - INFO - codeparrot_training - Step 5086: {'lr': 0.0004949179118096604, 'samples': 2604544, 'steps': 5086, 'loss/train': 3.508284330368042} +02/24/2022 08:09:20 - INFO - codeparrot_training - Step 5087: {'lr': 0.0004949146288486889, 'samples': 2605056, 'steps': 5087, 'loss/train': 2.7847771644592285} +02/24/2022 08:09:24 - INFO - codeparrot_training - Step 5088: {'lr': 0.0004949113448385809, 'samples': 2605568, 'steps': 5088, 'loss/train': 1.6906352043151855} +02/24/2022 08:09:29 - INFO - codeparrot_training - Step 5089: {'lr': 0.0004949080597793505, 'samples': 2606080, 'steps': 5089, 'loss/train': 2.9376049041748047} +02/24/2022 08:09:33 - INFO - codeparrot_training - Step 5090: {'lr': 0.0004949047736710116, 'samples': 2606592, 'steps': 5090, 'loss/train': 2.1915385723114014} +02/24/2022 08:09:38 - INFO - codeparrot_training - Step 5091: {'lr': 0.0004949014865135786, 'samples': 2607104, 'steps': 5091, 'loss/train': 2.7079694271087646} +02/24/2022 08:09:41 - INFO - codeparrot_training - Step 5092: {'lr': 0.0004948981983070652, 'samples': 2607616, 'steps': 5092, 'loss/train': 1.000334620475769} +02/24/2022 08:09:48 - INFO - codeparrot_training - Step 5093: {'lr': 0.0004948949090514858, 'samples': 2608128, 'steps': 5093, 'loss/train': 1.335888385772705} +02/24/2022 08:09:51 - INFO - codeparrot_training - Step 5094: {'lr': 0.0004948916187468544, 'samples': 2608640, 'steps': 5094, 'loss/train': 2.8289735317230225} +02/24/2022 08:09:57 - INFO - codeparrot_training - Step 5095: {'lr': 0.000494888327393185, 'samples': 2609152, 'steps': 5095, 'loss/train': 2.2796788215637207} +02/24/2022 08:10:00 - INFO - codeparrot_training - Step 5096: {'lr': 0.0004948850349904919, 'samples': 2609664, 'steps': 5096, 'loss/train': 2.2301361560821533} +02/24/2022 08:10:06 - INFO - codeparrot_training - Step 5097: {'lr': 0.000494881741538789, 'samples': 2610176, 'steps': 5097, 'loss/train': 2.9230470657348633} +02/24/2022 08:10:09 - INFO - codeparrot_training - Step 5098: {'lr': 0.0004948784470380904, 'samples': 2610688, 'steps': 5098, 'loss/train': 2.715867757797241} +02/24/2022 08:10:15 - INFO - codeparrot_training - Step 5099: {'lr': 0.0004948751514884103, 'samples': 2611200, 'steps': 5099, 'loss/train': 2.704303503036499} +02/24/2022 08:10:18 - INFO - codeparrot_training - Step 5100: {'lr': 0.0004948718548897628, 'samples': 2611712, 'steps': 5100, 'loss/train': 3.723182201385498} +02/24/2022 08:10:24 - INFO - codeparrot_training - Step 5101: {'lr': 0.0004948685572421621, 'samples': 2612224, 'steps': 5101, 'loss/train': 2.602644443511963} +02/24/2022 08:10:27 - INFO - codeparrot_training - Step 5102: {'lr': 0.0004948652585456222, 'samples': 2612736, 'steps': 5102, 'loss/train': 2.9009594917297363} +02/24/2022 08:10:33 - INFO - codeparrot_training - Step 5103: {'lr': 0.0004948619588001574, 'samples': 2613248, 'steps': 5103, 'loss/train': 1.9833265542984009} +02/24/2022 08:10:37 - INFO - codeparrot_training - Step 5104: {'lr': 0.0004948586580057816, 'samples': 2613760, 'steps': 5104, 'loss/train': 1.3734633922576904} +02/24/2022 08:10:42 - INFO - codeparrot_training - Step 5105: {'lr': 0.0004948553561625091, 'samples': 2614272, 'steps': 5105, 'loss/train': 2.327197790145874} +02/24/2022 08:10:46 - INFO - codeparrot_training - Step 5106: {'lr': 0.000494852053270354, 'samples': 2614784, 'steps': 5106, 'loss/train': 1.6684679985046387} +02/24/2022 08:10:51 - INFO - codeparrot_training - Step 5107: {'lr': 0.0004948487493293305, 'samples': 2615296, 'steps': 5107, 'loss/train': 2.6789066791534424} +02/24/2022 08:10:55 - INFO - codeparrot_training - Step 5108: {'lr': 0.0004948454443394527, 'samples': 2615808, 'steps': 5108, 'loss/train': 3.3280553817749023} +02/24/2022 08:11:01 - INFO - codeparrot_training - Step 5109: {'lr': 0.0004948421383007347, 'samples': 2616320, 'steps': 5109, 'loss/train': 4.2329487800598145} +02/24/2022 08:11:04 - INFO - codeparrot_training - Step 5110: {'lr': 0.0004948388312131908, 'samples': 2616832, 'steps': 5110, 'loss/train': 1.8370198011398315} +02/24/2022 08:11:10 - INFO - codeparrot_training - Step 5111: {'lr': 0.0004948355230768349, 'samples': 2617344, 'steps': 5111, 'loss/train': 1.8667925596237183} +02/24/2022 08:11:13 - INFO - codeparrot_training - Step 5112: {'lr': 0.0004948322138916816, 'samples': 2617856, 'steps': 5112, 'loss/train': 2.9620187282562256} +02/24/2022 08:11:19 - INFO - codeparrot_training - Step 5113: {'lr': 0.0004948289036577447, 'samples': 2618368, 'steps': 5113, 'loss/train': 2.476203680038452} +02/24/2022 08:11:23 - INFO - codeparrot_training - Step 5114: {'lr': 0.0004948255923750385, 'samples': 2618880, 'steps': 5114, 'loss/train': 2.6586132049560547} +02/24/2022 08:11:28 - INFO - codeparrot_training - Step 5115: {'lr': 0.0004948222800435773, 'samples': 2619392, 'steps': 5115, 'loss/train': 4.017131805419922} +02/24/2022 08:11:32 - INFO - codeparrot_training - Step 5116: {'lr': 0.0004948189666633752, 'samples': 2619904, 'steps': 5116, 'loss/train': 1.6821726560592651} +02/24/2022 08:11:37 - INFO - codeparrot_training - Step 5117: {'lr': 0.0004948156522344463, 'samples': 2620416, 'steps': 5117, 'loss/train': 1.9291718006134033} +02/24/2022 08:11:41 - INFO - codeparrot_training - Step 5118: {'lr': 0.0004948123367568049, 'samples': 2620928, 'steps': 5118, 'loss/train': 3.2326972484588623} +02/24/2022 08:11:46 - INFO - codeparrot_training - Step 5119: {'lr': 0.0004948090202304652, 'samples': 2621440, 'steps': 5119, 'loss/train': 2.32305645942688} +02/24/2022 08:11:50 - INFO - codeparrot_training - Step 5120: {'lr': 0.0004948057026554415, 'samples': 2621952, 'steps': 5120, 'loss/train': 2.8522584438323975} +02/24/2022 08:11:55 - INFO - codeparrot_training - Step 5121: {'lr': 0.0004948023840317477, 'samples': 2622464, 'steps': 5121, 'loss/train': 2.4217073917388916} +02/24/2022 08:11:59 - INFO - codeparrot_training - Step 5122: {'lr': 0.0004947990643593983, 'samples': 2622976, 'steps': 5122, 'loss/train': 2.274604320526123} +02/24/2022 08:12:05 - INFO - codeparrot_training - Step 5123: {'lr': 0.0004947957436384076, 'samples': 2623488, 'steps': 5123, 'loss/train': 2.744251012802124} +02/24/2022 08:12:08 - INFO - codeparrot_training - Step 5124: {'lr': 0.0004947924218687894, 'samples': 2624000, 'steps': 5124, 'loss/train': 3.4140748977661133} +02/24/2022 08:12:14 - INFO - codeparrot_training - Step 5125: {'lr': 0.0004947890990505585, 'samples': 2624512, 'steps': 5125, 'loss/train': 2.790090799331665} +02/24/2022 08:12:17 - INFO - codeparrot_training - Step 5126: {'lr': 0.0004947857751837286, 'samples': 2625024, 'steps': 5126, 'loss/train': 2.408137798309326} +02/24/2022 08:12:23 - INFO - codeparrot_training - Step 5127: {'lr': 0.0004947824502683142, 'samples': 2625536, 'steps': 5127, 'loss/train': 2.5618083477020264} +02/24/2022 08:12:26 - INFO - codeparrot_training - Step 5128: {'lr': 0.0004947791243043296, 'samples': 2626048, 'steps': 5128, 'loss/train': 1.8575185537338257} +02/24/2022 08:12:32 - INFO - codeparrot_training - Step 5129: {'lr': 0.0004947757972917889, 'samples': 2626560, 'steps': 5129, 'loss/train': 2.426994800567627} +02/24/2022 08:12:35 - INFO - codeparrot_training - Step 5130: {'lr': 0.0004947724692307064, 'samples': 2627072, 'steps': 5130, 'loss/train': 2.640988826751709} +02/24/2022 08:12:41 - INFO - codeparrot_training - Step 5131: {'lr': 0.0004947691401210963, 'samples': 2627584, 'steps': 5131, 'loss/train': 2.1888606548309326} +02/24/2022 08:12:44 - INFO - codeparrot_training - Step 5132: {'lr': 0.0004947658099629731, 'samples': 2628096, 'steps': 5132, 'loss/train': 3.2527060508728027} +02/24/2022 08:12:50 - INFO - codeparrot_training - Step 5133: {'lr': 0.0004947624787563507, 'samples': 2628608, 'steps': 5133, 'loss/train': 2.5574615001678467} +02/24/2022 08:12:54 - INFO - codeparrot_training - Step 5134: {'lr': 0.0004947591465012436, 'samples': 2629120, 'steps': 5134, 'loss/train': 3.0218966007232666} +02/24/2022 08:12:59 - INFO - codeparrot_training - Step 5135: {'lr': 0.0004947558131976661, 'samples': 2629632, 'steps': 5135, 'loss/train': 2.7285172939300537} +02/24/2022 08:13:03 - INFO - codeparrot_training - Step 5136: {'lr': 0.0004947524788456324, 'samples': 2630144, 'steps': 5136, 'loss/train': 3.1026570796966553} +02/24/2022 08:13:08 - INFO - codeparrot_training - Step 5137: {'lr': 0.0004947491434451569, 'samples': 2630656, 'steps': 5137, 'loss/train': 2.9265973567962646} +02/24/2022 08:13:12 - INFO - codeparrot_training - Step 5138: {'lr': 0.0004947458069962537, 'samples': 2631168, 'steps': 5138, 'loss/train': 1.492129921913147} +02/24/2022 08:13:18 - INFO - codeparrot_training - Step 5139: {'lr': 0.0004947424694989371, 'samples': 2631680, 'steps': 5139, 'loss/train': 2.335777997970581} +02/24/2022 08:13:21 - INFO - codeparrot_training - Step 5140: {'lr': 0.0004947391309532216, 'samples': 2632192, 'steps': 5140, 'loss/train': 3.307926893234253} +02/24/2022 08:13:27 - INFO - codeparrot_training - Step 5141: {'lr': 0.0004947357913591213, 'samples': 2632704, 'steps': 5141, 'loss/train': 1.5015305280685425} +02/24/2022 08:13:30 - INFO - codeparrot_training - Step 5142: {'lr': 0.0004947324507166505, 'samples': 2633216, 'steps': 5142, 'loss/train': 1.8070907592773438} +02/24/2022 08:13:36 - INFO - codeparrot_training - Step 5143: {'lr': 0.0004947291090258238, 'samples': 2633728, 'steps': 5143, 'loss/train': 2.8901987075805664} +02/24/2022 08:13:39 - INFO - codeparrot_training - Step 5144: {'lr': 0.0004947257662866551, 'samples': 2634240, 'steps': 5144, 'loss/train': 3.076181650161743} +02/24/2022 08:13:45 - INFO - codeparrot_training - Step 5145: {'lr': 0.0004947224224991591, 'samples': 2634752, 'steps': 5145, 'loss/train': 2.332066774368286} +02/24/2022 08:13:48 - INFO - codeparrot_training - Step 5146: {'lr': 0.0004947190776633499, 'samples': 2635264, 'steps': 5146, 'loss/train': 2.256387710571289} +02/24/2022 08:13:54 - INFO - codeparrot_training - Step 5147: {'lr': 0.0004947157317792418, 'samples': 2635776, 'steps': 5147, 'loss/train': 2.681453227996826} +02/24/2022 08:13:57 - INFO - codeparrot_training - Step 5148: {'lr': 0.0004947123848468493, 'samples': 2636288, 'steps': 5148, 'loss/train': 2.7483694553375244} +02/24/2022 08:14:03 - INFO - codeparrot_training - Step 5149: {'lr': 0.0004947090368661866, 'samples': 2636800, 'steps': 5149, 'loss/train': 1.2899115085601807} +02/24/2022 08:14:07 - INFO - codeparrot_training - Step 5150: {'lr': 0.0004947056878372681, 'samples': 2637312, 'steps': 5150, 'loss/train': 1.7127106189727783} +02/24/2022 08:14:12 - INFO - codeparrot_training - Step 5151: {'lr': 0.0004947023377601082, 'samples': 2637824, 'steps': 5151, 'loss/train': 2.629730701446533} +02/24/2022 08:14:16 - INFO - codeparrot_training - Step 5152: {'lr': 0.0004946989866347211, 'samples': 2638336, 'steps': 5152, 'loss/train': 1.9000917673110962} +02/24/2022 08:14:21 - INFO - codeparrot_training - Step 5153: {'lr': 0.0004946956344611212, 'samples': 2638848, 'steps': 5153, 'loss/train': 3.3763539791107178} +02/24/2022 08:14:27 - INFO - codeparrot_training - Step 5154: {'lr': 0.000494692281239323, 'samples': 2639360, 'steps': 5154, 'loss/train': 2.3680930137634277} +02/24/2022 08:14:30 - INFO - codeparrot_training - Step 5155: {'lr': 0.0004946889269693408, 'samples': 2639872, 'steps': 5155, 'loss/train': 2.266359567642212} +02/24/2022 08:14:36 - INFO - codeparrot_training - Step 5156: {'lr': 0.0004946855716511888, 'samples': 2640384, 'steps': 5156, 'loss/train': 3.2081191539764404} +02/24/2022 08:14:39 - INFO - codeparrot_training - Step 5157: {'lr': 0.0004946822152848816, 'samples': 2640896, 'steps': 5157, 'loss/train': 2.1639814376831055} +02/24/2022 08:14:45 - INFO - codeparrot_training - Step 5158: {'lr': 0.0004946788578704335, 'samples': 2641408, 'steps': 5158, 'loss/train': 2.309159994125366} +02/24/2022 08:14:49 - INFO - codeparrot_training - Step 5159: {'lr': 0.0004946754994078588, 'samples': 2641920, 'steps': 5159, 'loss/train': 1.926787257194519} +02/24/2022 08:14:54 - INFO - codeparrot_training - Step 5160: {'lr': 0.000494672139897172, 'samples': 2642432, 'steps': 5160, 'loss/train': 3.462728261947632} +02/24/2022 08:14:58 - INFO - codeparrot_training - Step 5161: {'lr': 0.0004946687793383874, 'samples': 2642944, 'steps': 5161, 'loss/train': 2.4011433124542236} +02/24/2022 08:15:01 - INFO - codeparrot_training - Step 5162: {'lr': 0.0004946654177315194, 'samples': 2643456, 'steps': 5162, 'loss/train': 0.5060781240463257} +02/24/2022 08:15:07 - INFO - codeparrot_training - Step 5163: {'lr': 0.0004946620550765826, 'samples': 2643968, 'steps': 5163, 'loss/train': 2.1764190196990967} +02/24/2022 08:15:10 - INFO - codeparrot_training - Step 5164: {'lr': 0.0004946586913735911, 'samples': 2644480, 'steps': 5164, 'loss/train': 1.2177103757858276} +02/24/2022 08:15:16 - INFO - codeparrot_training - Step 5165: {'lr': 0.0004946553266225595, 'samples': 2644992, 'steps': 5165, 'loss/train': 2.6867284774780273} +02/24/2022 08:15:21 - INFO - codeparrot_training - Step 5166: {'lr': 0.0004946519608235022, 'samples': 2645504, 'steps': 5166, 'loss/train': 2.9138760566711426} +02/24/2022 08:15:25 - INFO - codeparrot_training - Step 5167: {'lr': 0.0004946485939764336, 'samples': 2646016, 'steps': 5167, 'loss/train': 2.5797221660614014} +02/24/2022 08:15:28 - INFO - codeparrot_training - Step 5168: {'lr': 0.000494645226081368, 'samples': 2646528, 'steps': 5168, 'loss/train': 3.369985342025757} +02/24/2022 08:15:35 - INFO - codeparrot_training - Step 5169: {'lr': 0.00049464185713832, 'samples': 2647040, 'steps': 5169, 'loss/train': 2.85191011428833} +02/24/2022 08:15:40 - INFO - codeparrot_training - Step 5170: {'lr': 0.000494638487147304, 'samples': 2647552, 'steps': 5170, 'loss/train': 2.664747953414917} +02/24/2022 08:15:44 - INFO - codeparrot_training - Step 5171: {'lr': 0.0004946351161083344, 'samples': 2648064, 'steps': 5171, 'loss/train': 2.5004470348358154} +02/24/2022 08:15:50 - INFO - codeparrot_training - Step 5172: {'lr': 0.0004946317440214257, 'samples': 2648576, 'steps': 5172, 'loss/train': 2.210859775543213} +02/24/2022 08:15:53 - INFO - codeparrot_training - Step 5173: {'lr': 0.000494628370886592, 'samples': 2649088, 'steps': 5173, 'loss/train': 2.713085412979126} +02/24/2022 08:15:57 - INFO - codeparrot_training - Step 5174: {'lr': 0.0004946249967038483, 'samples': 2649600, 'steps': 5174, 'loss/train': 2.681502342224121} +02/24/2022 08:16:02 - INFO - codeparrot_training - Step 5175: {'lr': 0.0004946216214732088, 'samples': 2650112, 'steps': 5175, 'loss/train': 2.96237850189209} +02/24/2022 08:16:06 - INFO - codeparrot_training - Step 5176: {'lr': 0.0004946182451946878, 'samples': 2650624, 'steps': 5176, 'loss/train': 1.3448158502578735} +02/24/2022 08:16:11 - INFO - codeparrot_training - Step 5177: {'lr': 0.0004946148678683001, 'samples': 2651136, 'steps': 5177, 'loss/train': 1.241605520248413} +02/24/2022 08:16:15 - INFO - codeparrot_training - Step 5178: {'lr': 0.0004946114894940599, 'samples': 2651648, 'steps': 5178, 'loss/train': 2.018709182739258} +02/24/2022 08:16:20 - INFO - codeparrot_training - Step 5179: {'lr': 0.0004946081100719817, 'samples': 2652160, 'steps': 5179, 'loss/train': 2.4574363231658936} +02/24/2022 08:16:24 - INFO - codeparrot_training - Step 5180: {'lr': 0.00049460472960208, 'samples': 2652672, 'steps': 5180, 'loss/train': 3.193479061126709} +02/24/2022 08:16:29 - INFO - codeparrot_training - Step 5181: {'lr': 0.0004946013480843694, 'samples': 2653184, 'steps': 5181, 'loss/train': 1.2273881435394287} +02/24/2022 08:16:33 - INFO - codeparrot_training - Step 5182: {'lr': 0.0004945979655188642, 'samples': 2653696, 'steps': 5182, 'loss/train': 1.6777675151824951} +02/24/2022 08:16:38 - INFO - codeparrot_training - Step 5183: {'lr': 0.0004945945819055791, 'samples': 2654208, 'steps': 5183, 'loss/train': 1.6399005651474} +02/24/2022 08:16:42 - INFO - codeparrot_training - Step 5184: {'lr': 0.0004945911972445284, 'samples': 2654720, 'steps': 5184, 'loss/train': 2.7347631454467773} +02/24/2022 08:16:48 - INFO - codeparrot_training - Step 5185: {'lr': 0.0004945878115357267, 'samples': 2655232, 'steps': 5185, 'loss/train': 2.2540037631988525} +02/24/2022 08:16:51 - INFO - codeparrot_training - Step 5186: {'lr': 0.0004945844247791886, 'samples': 2655744, 'steps': 5186, 'loss/train': 8.276962280273438} +02/24/2022 08:16:58 - INFO - codeparrot_training - Step 5187: {'lr': 0.0004945810369749283, 'samples': 2656256, 'steps': 5187, 'loss/train': 4.214089393615723} +02/24/2022 08:17:01 - INFO - codeparrot_training - Step 5188: {'lr': 0.0004945776481229605, 'samples': 2656768, 'steps': 5188, 'loss/train': 1.785870909690857} +02/24/2022 08:17:04 - INFO - codeparrot_training - Step 5189: {'lr': 0.0004945742582232999, 'samples': 2657280, 'steps': 5189, 'loss/train': 2.70741868019104} +02/24/2022 08:17:10 - INFO - codeparrot_training - Step 5190: {'lr': 0.0004945708672759606, 'samples': 2657792, 'steps': 5190, 'loss/train': 2.295666217803955} +02/24/2022 08:17:13 - INFO - codeparrot_training - Step 5191: {'lr': 0.0004945674752809575, 'samples': 2658304, 'steps': 5191, 'loss/train': 3.018218517303467} +02/24/2022 08:17:19 - INFO - codeparrot_training - Step 5192: {'lr': 0.000494564082238305, 'samples': 2658816, 'steps': 5192, 'loss/train': 1.5652316808700562} +02/24/2022 08:17:22 - INFO - codeparrot_training - Step 5193: {'lr': 0.0004945606881480176, 'samples': 2659328, 'steps': 5193, 'loss/train': 2.667050361633301} +02/24/2022 08:17:28 - INFO - codeparrot_training - Step 5194: {'lr': 0.0004945572930101098, 'samples': 2659840, 'steps': 5194, 'loss/train': 2.329467535018921} +02/24/2022 08:17:31 - INFO - codeparrot_training - Step 5195: {'lr': 0.0004945538968245964, 'samples': 2660352, 'steps': 5195, 'loss/train': 2.79240083694458} +02/24/2022 08:17:37 - INFO - codeparrot_training - Step 5196: {'lr': 0.0004945504995914917, 'samples': 2660864, 'steps': 5196, 'loss/train': 1.2632068395614624} +02/24/2022 08:17:40 - INFO - codeparrot_training - Step 5197: {'lr': 0.0004945471013108102, 'samples': 2661376, 'steps': 5197, 'loss/train': 2.8748514652252197} +02/24/2022 08:17:46 - INFO - codeparrot_training - Step 5198: {'lr': 0.0004945437019825668, 'samples': 2661888, 'steps': 5198, 'loss/train': 1.6277357339859009} +02/24/2022 08:17:50 - INFO - codeparrot_training - Step 5199: {'lr': 0.0004945403016067756, 'samples': 2662400, 'steps': 5199, 'loss/train': 2.3230233192443848} +02/24/2022 08:17:56 - INFO - codeparrot_training - Step 5200: {'lr': 0.0004945369001834514, 'samples': 2662912, 'steps': 5200, 'loss/train': 2.9772768020629883} +02/24/2022 08:17:59 - INFO - codeparrot_training - Step 5201: {'lr': 0.0004945334977126089, 'samples': 2663424, 'steps': 5201, 'loss/train': 1.9875446557998657} +02/24/2022 08:18:05 - INFO - codeparrot_training - Step 5202: {'lr': 0.0004945300941942624, 'samples': 2663936, 'steps': 5202, 'loss/train': 2.912196397781372} +02/24/2022 08:18:08 - INFO - codeparrot_training - Step 5203: {'lr': 0.0004945266896284268, 'samples': 2664448, 'steps': 5203, 'loss/train': 3.1419756412506104} +02/24/2022 08:18:14 - INFO - codeparrot_training - Step 5204: {'lr': 0.0004945232840151164, 'samples': 2664960, 'steps': 5204, 'loss/train': 2.4587762355804443} +02/24/2022 08:18:17 - INFO - codeparrot_training - Step 5205: {'lr': 0.0004945198773543459, 'samples': 2665472, 'steps': 5205, 'loss/train': 2.140803098678589} +02/24/2022 08:18:23 - INFO - codeparrot_training - Step 5206: {'lr': 0.0004945164696461299, 'samples': 2665984, 'steps': 5206, 'loss/train': 2.219256639480591} +02/24/2022 08:18:26 - INFO - codeparrot_training - Step 5207: {'lr': 0.000494513060890483, 'samples': 2666496, 'steps': 5207, 'loss/train': 2.2424747943878174} +02/24/2022 08:18:32 - INFO - codeparrot_training - Step 5208: {'lr': 0.0004945096510874197, 'samples': 2667008, 'steps': 5208, 'loss/train': 2.0000693798065186} +02/24/2022 08:18:36 - INFO - codeparrot_training - Step 5209: {'lr': 0.0004945062402369548, 'samples': 2667520, 'steps': 5209, 'loss/train': 3.330228805541992} +02/24/2022 08:18:42 - INFO - codeparrot_training - Step 5210: {'lr': 0.0004945028283391028, 'samples': 2668032, 'steps': 5210, 'loss/train': 0.6820445656776428} +02/24/2022 08:18:45 - INFO - codeparrot_training - Step 5211: {'lr': 0.0004944994153938783, 'samples': 2668544, 'steps': 5211, 'loss/train': 2.586907386779785} +02/24/2022 08:18:51 - INFO - codeparrot_training - Step 5212: {'lr': 0.0004944960014012959, 'samples': 2669056, 'steps': 5212, 'loss/train': 3.126750946044922} +02/24/2022 08:18:54 - INFO - codeparrot_training - Step 5213: {'lr': 0.0004944925863613704, 'samples': 2669568, 'steps': 5213, 'loss/train': 1.5274075269699097} +02/24/2022 08:19:00 - INFO - codeparrot_training - Step 5214: {'lr': 0.0004944891702741161, 'samples': 2670080, 'steps': 5214, 'loss/train': 3.2495155334472656} +02/24/2022 08:19:03 - INFO - codeparrot_training - Step 5215: {'lr': 0.0004944857531395479, 'samples': 2670592, 'steps': 5215, 'loss/train': 2.186718702316284} +02/24/2022 08:19:09 - INFO - codeparrot_training - Step 5216: {'lr': 0.0004944823349576805, 'samples': 2671104, 'steps': 5216, 'loss/train': 2.643941879272461} +02/24/2022 08:19:13 - INFO - codeparrot_training - Step 5217: {'lr': 0.0004944789157285283, 'samples': 2671616, 'steps': 5217, 'loss/train': 2.964428424835205} +02/24/2022 08:19:18 - INFO - codeparrot_training - Step 5218: {'lr': 0.0004944754954521061, 'samples': 2672128, 'steps': 5218, 'loss/train': 3.1117196083068848} +02/24/2022 08:19:22 - INFO - codeparrot_training - Step 5219: {'lr': 0.0004944720741284285, 'samples': 2672640, 'steps': 5219, 'loss/train': 3.036386251449585} +02/24/2022 08:19:27 - INFO - codeparrot_training - Step 5220: {'lr': 0.00049446865175751, 'samples': 2673152, 'steps': 5220, 'loss/train': 2.484792709350586} +02/24/2022 08:19:31 - INFO - codeparrot_training - Step 5221: {'lr': 0.0004944652283393656, 'samples': 2673664, 'steps': 5221, 'loss/train': 2.8877463340759277} +02/24/2022 08:19:36 - INFO - codeparrot_training - Step 5222: {'lr': 0.0004944618038740098, 'samples': 2674176, 'steps': 5222, 'loss/train': 2.831866502761841} +02/24/2022 08:19:40 - INFO - codeparrot_training - Step 5223: {'lr': 0.0004944583783614571, 'samples': 2674688, 'steps': 5223, 'loss/train': 2.8418331146240234} +02/24/2022 08:19:45 - INFO - codeparrot_training - Step 5224: {'lr': 0.0004944549518017225, 'samples': 2675200, 'steps': 5224, 'loss/train': 3.592921257019043} +02/24/2022 08:19:49 - INFO - codeparrot_training - Step 5225: {'lr': 0.0004944515241948204, 'samples': 2675712, 'steps': 5225, 'loss/train': 2.6743624210357666} +02/24/2022 08:19:54 - INFO - codeparrot_training - Step 5226: {'lr': 0.0004944480955407656, 'samples': 2676224, 'steps': 5226, 'loss/train': 1.7635077238082886} +02/24/2022 08:19:58 - INFO - codeparrot_training - Step 5227: {'lr': 0.0004944446658395728, 'samples': 2676736, 'steps': 5227, 'loss/train': 2.057612895965576} +02/24/2022 08:20:03 - INFO - codeparrot_training - Step 5228: {'lr': 0.0004944412350912567, 'samples': 2677248, 'steps': 5228, 'loss/train': 1.861214518547058} +02/24/2022 08:20:07 - INFO - codeparrot_training - Step 5229: {'lr': 0.000494437803295832, 'samples': 2677760, 'steps': 5229, 'loss/train': 2.5382578372955322} +02/24/2022 08:20:12 - INFO - codeparrot_training - Step 5230: {'lr': 0.0004944343704533133, 'samples': 2678272, 'steps': 5230, 'loss/train': 1.9002410173416138} +02/24/2022 08:20:16 - INFO - codeparrot_training - Step 5231: {'lr': 0.0004944309365637154, 'samples': 2678784, 'steps': 5231, 'loss/train': 2.0640037059783936} +02/24/2022 08:20:22 - INFO - codeparrot_training - Step 5232: {'lr': 0.000494427501627053, 'samples': 2679296, 'steps': 5232, 'loss/train': 5.441315174102783} +02/24/2022 08:20:25 - INFO - codeparrot_training - Step 5233: {'lr': 0.0004944240656433407, 'samples': 2679808, 'steps': 5233, 'loss/train': 1.594360589981079} +02/24/2022 08:20:31 - INFO - codeparrot_training - Step 5234: {'lr': 0.0004944206286125935, 'samples': 2680320, 'steps': 5234, 'loss/train': 1.448689579963684} +02/24/2022 08:20:34 - INFO - codeparrot_training - Step 5235: {'lr': 0.0004944171905348258, 'samples': 2680832, 'steps': 5235, 'loss/train': 8.80094051361084} +02/24/2022 08:20:40 - INFO - codeparrot_training - Step 5236: {'lr': 0.0004944137514100525, 'samples': 2681344, 'steps': 5236, 'loss/train': 5.9965009689331055} +02/24/2022 08:20:43 - INFO - codeparrot_training - Step 5237: {'lr': 0.0004944103112382883, 'samples': 2681856, 'steps': 5237, 'loss/train': 2.855015993118286} +02/24/2022 08:20:49 - INFO - codeparrot_training - Step 5238: {'lr': 0.0004944068700195479, 'samples': 2682368, 'steps': 5238, 'loss/train': 3.7001540660858154} +02/24/2022 08:20:52 - INFO - codeparrot_training - Step 5239: {'lr': 0.0004944034277538462, 'samples': 2682880, 'steps': 5239, 'loss/train': 1.8518186807632446} +02/24/2022 08:20:58 - INFO - codeparrot_training - Step 5240: {'lr': 0.0004943999844411977, 'samples': 2683392, 'steps': 5240, 'loss/train': 3.2561490535736084} +02/24/2022 08:21:01 - INFO - codeparrot_training - Step 5241: {'lr': 0.0004943965400816173, 'samples': 2683904, 'steps': 5241, 'loss/train': 2.6126956939697266} +02/24/2022 08:21:07 - INFO - codeparrot_training - Step 5242: {'lr': 0.0004943930946751197, 'samples': 2684416, 'steps': 5242, 'loss/train': 2.2587852478027344} +02/24/2022 08:21:11 - INFO - codeparrot_training - Step 5243: {'lr': 0.0004943896482217197, 'samples': 2684928, 'steps': 5243, 'loss/train': 2.234565496444702} +02/24/2022 08:21:17 - INFO - codeparrot_training - Step 5244: {'lr': 0.0004943862007214322, 'samples': 2685440, 'steps': 5244, 'loss/train': 2.7044122219085693} +02/24/2022 08:21:20 - INFO - codeparrot_training - Step 5245: {'lr': 0.0004943827521742716, 'samples': 2685952, 'steps': 5245, 'loss/train': 2.0423200130462646} +02/24/2022 08:21:26 - INFO - codeparrot_training - Step 5246: {'lr': 0.000494379302580253, 'samples': 2686464, 'steps': 5246, 'loss/train': 2.8277180194854736} +02/24/2022 08:21:29 - INFO - codeparrot_training - Step 5247: {'lr': 0.000494375851939391, 'samples': 2686976, 'steps': 5247, 'loss/train': 2.1107726097106934} +02/24/2022 08:21:35 - INFO - codeparrot_training - Step 5248: {'lr': 0.0004943724002517005, 'samples': 2687488, 'steps': 5248, 'loss/train': 1.5515531301498413} +02/24/2022 08:21:38 - INFO - codeparrot_training - Step 5249: {'lr': 0.0004943689475171962, 'samples': 2688000, 'steps': 5249, 'loss/train': 2.673150062561035} +02/24/2022 08:21:44 - INFO - codeparrot_training - Step 5250: {'lr': 0.000494365493735893, 'samples': 2688512, 'steps': 5250, 'loss/train': 4.120074272155762} +02/24/2022 08:21:47 - INFO - codeparrot_training - Step 5251: {'lr': 0.0004943620389078055, 'samples': 2689024, 'steps': 5251, 'loss/train': 2.5042974948883057} +02/24/2022 08:21:53 - INFO - codeparrot_training - Step 5252: {'lr': 0.0004943585830329487, 'samples': 2689536, 'steps': 5252, 'loss/train': 1.8689889907836914} +02/24/2022 08:21:56 - INFO - codeparrot_training - Step 5253: {'lr': 0.0004943551261113373, 'samples': 2690048, 'steps': 5253, 'loss/train': 3.1838936805725098} +02/24/2022 08:22:02 - INFO - codeparrot_training - Step 5254: {'lr': 0.0004943516681429861, 'samples': 2690560, 'steps': 5254, 'loss/train': 3.069115400314331} +02/24/2022 08:22:06 - INFO - codeparrot_training - Step 5255: {'lr': 0.0004943482091279101, 'samples': 2691072, 'steps': 5255, 'loss/train': 1.9096789360046387} +02/24/2022 08:22:11 - INFO - codeparrot_training - Step 5256: {'lr': 0.0004943447490661238, 'samples': 2691584, 'steps': 5256, 'loss/train': 2.5676307678222656} +02/24/2022 08:22:15 - INFO - codeparrot_training - Step 5257: {'lr': 0.0004943412879576422, 'samples': 2692096, 'steps': 5257, 'loss/train': 3.0552666187286377} +02/24/2022 08:22:20 - INFO - codeparrot_training - Step 5258: {'lr': 0.0004943378258024802, 'samples': 2692608, 'steps': 5258, 'loss/train': 2.801168918609619} +02/24/2022 08:22:24 - INFO - codeparrot_training - Step 5259: {'lr': 0.0004943343626006524, 'samples': 2693120, 'steps': 5259, 'loss/train': 2.228573799133301} +02/24/2022 08:22:29 - INFO - codeparrot_training - Step 5260: {'lr': 0.000494330898352174, 'samples': 2693632, 'steps': 5260, 'loss/train': 2.013396978378296} +02/24/2022 08:22:33 - INFO - codeparrot_training - Step 5261: {'lr': 0.0004943274330570594, 'samples': 2694144, 'steps': 5261, 'loss/train': 2.8861701488494873} +02/24/2022 08:22:38 - INFO - codeparrot_training - Step 5262: {'lr': 0.0004943239667153237, 'samples': 2694656, 'steps': 5262, 'loss/train': 2.649702548980713} +02/24/2022 08:22:42 - INFO - codeparrot_training - Step 5263: {'lr': 0.0004943204993269818, 'samples': 2695168, 'steps': 5263, 'loss/train': 2.612884283065796} +02/24/2022 08:22:48 - INFO - codeparrot_training - Step 5264: {'lr': 0.0004943170308920483, 'samples': 2695680, 'steps': 5264, 'loss/train': 2.069972038269043} +02/24/2022 08:22:51 - INFO - codeparrot_training - Step 5265: {'lr': 0.0004943135614105384, 'samples': 2696192, 'steps': 5265, 'loss/train': 3.1454107761383057} +02/24/2022 08:22:57 - INFO - codeparrot_training - Step 5266: {'lr': 0.0004943100908824667, 'samples': 2696704, 'steps': 5266, 'loss/train': 2.6592893600463867} +02/24/2022 08:23:00 - INFO - codeparrot_training - Step 5267: {'lr': 0.0004943066193078482, 'samples': 2697216, 'steps': 5267, 'loss/train': 2.364511489868164} +02/24/2022 08:23:06 - INFO - codeparrot_training - Step 5268: {'lr': 0.0004943031466866976, 'samples': 2697728, 'steps': 5268, 'loss/train': 2.4398560523986816} +02/24/2022 08:23:09 - INFO - codeparrot_training - Step 5269: {'lr': 0.00049429967301903, 'samples': 2698240, 'steps': 5269, 'loss/train': 2.7816364765167236} +02/24/2022 08:23:15 - INFO - codeparrot_training - Step 5270: {'lr': 0.0004942961983048601, 'samples': 2698752, 'steps': 5270, 'loss/train': 2.5790302753448486} +02/24/2022 08:23:20 - INFO - codeparrot_training - Step 5271: {'lr': 0.0004942927225442029, 'samples': 2699264, 'steps': 5271, 'loss/train': 2.625483989715576} +02/24/2022 08:23:24 - INFO - codeparrot_training - Step 5272: {'lr': 0.0004942892457370732, 'samples': 2699776, 'steps': 5272, 'loss/train': 2.663005828857422} +02/24/2022 08:23:29 - INFO - codeparrot_training - Step 5273: {'lr': 0.000494285767883486, 'samples': 2700288, 'steps': 5273, 'loss/train': 2.2452316284179688} +02/24/2022 08:23:33 - INFO - codeparrot_training - Step 5274: {'lr': 0.0004942822889834562, 'samples': 2700800, 'steps': 5274, 'loss/train': 1.6624966859817505} +02/24/2022 08:23:38 - INFO - codeparrot_training - Step 5275: {'lr': 0.0004942788090369985, 'samples': 2701312, 'steps': 5275, 'loss/train': 2.4746954441070557} +02/24/2022 08:23:42 - INFO - codeparrot_training - Step 5276: {'lr': 0.0004942753280441281, 'samples': 2701824, 'steps': 5276, 'loss/train': 1.6270679235458374} +02/24/2022 08:23:47 - INFO - codeparrot_training - Step 5277: {'lr': 0.0004942718460048596, 'samples': 2702336, 'steps': 5277, 'loss/train': 2.141404628753662} +02/24/2022 08:23:51 - INFO - codeparrot_training - Step 5278: {'lr': 0.0004942683629192082, 'samples': 2702848, 'steps': 5278, 'loss/train': 1.053605079650879} +02/24/2022 08:23:57 - INFO - codeparrot_training - Step 5279: {'lr': 0.0004942648787871886, 'samples': 2703360, 'steps': 5279, 'loss/train': 1.9970529079437256} +02/24/2022 08:24:00 - INFO - codeparrot_training - Step 5280: {'lr': 0.000494261393608816, 'samples': 2703872, 'steps': 5280, 'loss/train': 1.7869811058044434} +02/24/2022 08:24:06 - INFO - codeparrot_training - Step 5281: {'lr': 0.0004942579073841049, 'samples': 2704384, 'steps': 5281, 'loss/train': 1.0113152265548706} +02/24/2022 08:24:09 - INFO - codeparrot_training - Step 5282: {'lr': 0.0004942544201130706, 'samples': 2704896, 'steps': 5282, 'loss/train': 1.235727310180664} +02/24/2022 08:24:15 - INFO - codeparrot_training - Step 5283: {'lr': 0.000494250931795728, 'samples': 2705408, 'steps': 5283, 'loss/train': 0.6270483136177063} +02/24/2022 08:24:18 - INFO - codeparrot_training - Step 5284: {'lr': 0.0004942474424320919, 'samples': 2705920, 'steps': 5284, 'loss/train': 1.5817296504974365} +02/24/2022 08:24:24 - INFO - codeparrot_training - Step 5285: {'lr': 0.0004942439520221774, 'samples': 2706432, 'steps': 5285, 'loss/train': 2.4787728786468506} +02/24/2022 08:24:27 - INFO - codeparrot_training - Step 5286: {'lr': 0.0004942404605659991, 'samples': 2706944, 'steps': 5286, 'loss/train': 2.474702835083008} +02/24/2022 08:24:33 - INFO - codeparrot_training - Step 5287: {'lr': 0.0004942369680635724, 'samples': 2707456, 'steps': 5287, 'loss/train': 1.1714990139007568} +02/24/2022 08:24:36 - INFO - codeparrot_training - Step 5288: {'lr': 0.0004942334745149122, 'samples': 2707968, 'steps': 5288, 'loss/train': 2.6758017539978027} +02/24/2022 08:24:43 - INFO - codeparrot_training - Step 5289: {'lr': 0.0004942299799200332, 'samples': 2708480, 'steps': 5289, 'loss/train': 2.175381898880005} +02/24/2022 08:24:46 - INFO - codeparrot_training - Step 5290: {'lr': 0.0004942264842789506, 'samples': 2708992, 'steps': 5290, 'loss/train': 2.2731714248657227} +02/24/2022 08:24:52 - INFO - codeparrot_training - Step 5291: {'lr': 0.0004942229875916792, 'samples': 2709504, 'steps': 5291, 'loss/train': 1.9897421598434448} +02/24/2022 08:24:55 - INFO - codeparrot_training - Step 5292: {'lr': 0.0004942194898582341, 'samples': 2710016, 'steps': 5292, 'loss/train': 1.783277153968811} +02/24/2022 08:25:01 - INFO - codeparrot_training - Step 5293: {'lr': 0.0004942159910786303, 'samples': 2710528, 'steps': 5293, 'loss/train': 3.5113205909729004} +02/24/2022 08:25:04 - INFO - codeparrot_training - Step 5294: {'lr': 0.0004942124912528827, 'samples': 2711040, 'steps': 5294, 'loss/train': 2.5687026977539062} +02/24/2022 08:25:10 - INFO - codeparrot_training - Step 5295: {'lr': 0.0004942089903810064, 'samples': 2711552, 'steps': 5295, 'loss/train': 1.251427412033081} +02/24/2022 08:25:13 - INFO - codeparrot_training - Step 5296: {'lr': 0.0004942054884630162, 'samples': 2712064, 'steps': 5296, 'loss/train': 3.1526901721954346} +02/24/2022 08:25:19 - INFO - codeparrot_training - Step 5297: {'lr': 0.0004942019854989274, 'samples': 2712576, 'steps': 5297, 'loss/train': 2.863186836242676} +02/24/2022 08:25:22 - INFO - codeparrot_training - Step 5298: {'lr': 0.0004941984814887546, 'samples': 2713088, 'steps': 5298, 'loss/train': 1.3744895458221436} +02/24/2022 08:25:28 - INFO - codeparrot_training - Step 5299: {'lr': 0.0004941949764325133, 'samples': 2713600, 'steps': 5299, 'loss/train': 2.7279162406921387} +02/24/2022 08:25:32 - INFO - codeparrot_training - Step 5300: {'lr': 0.0004941914703302181, 'samples': 2714112, 'steps': 5300, 'loss/train': 4.265825271606445} +02/24/2022 08:25:37 - INFO - codeparrot_training - Step 5301: {'lr': 0.0004941879631818843, 'samples': 2714624, 'steps': 5301, 'loss/train': 3.0422871112823486} +02/24/2022 08:25:41 - INFO - codeparrot_training - Step 5302: {'lr': 0.0004941844549875267, 'samples': 2715136, 'steps': 5302, 'loss/train': 2.122356653213501} +02/24/2022 08:25:46 - INFO - codeparrot_training - Step 5303: {'lr': 0.0004941809457471605, 'samples': 2715648, 'steps': 5303, 'loss/train': 2.3341476917266846} +02/24/2022 08:25:50 - INFO - codeparrot_training - Step 5304: {'lr': 0.0004941774354608006, 'samples': 2716160, 'steps': 5304, 'loss/train': 2.035097360610962} +02/24/2022 08:25:55 - INFO - codeparrot_training - Step 5305: {'lr': 0.0004941739241284621, 'samples': 2716672, 'steps': 5305, 'loss/train': 2.286090612411499} +02/24/2022 08:25:59 - INFO - codeparrot_training - Step 5306: {'lr': 0.0004941704117501601, 'samples': 2717184, 'steps': 5306, 'loss/train': 1.87517511844635} +02/24/2022 08:26:04 - INFO - codeparrot_training - Step 5307: {'lr': 0.0004941668983259095, 'samples': 2717696, 'steps': 5307, 'loss/train': 1.714046597480774} +02/24/2022 08:26:08 - INFO - codeparrot_training - Step 5308: {'lr': 0.0004941633838557256, 'samples': 2718208, 'steps': 5308, 'loss/train': 2.1482901573181152} +02/24/2022 08:26:14 - INFO - codeparrot_training - Step 5309: {'lr': 0.0004941598683396232, 'samples': 2718720, 'steps': 5309, 'loss/train': 3.1135590076446533} +02/24/2022 08:26:17 - INFO - codeparrot_training - Step 5310: {'lr': 0.0004941563517776174, 'samples': 2719232, 'steps': 5310, 'loss/train': 2.7894322872161865} +02/24/2022 08:26:23 - INFO - codeparrot_training - Step 5311: {'lr': 0.0004941528341697234, 'samples': 2719744, 'steps': 5311, 'loss/train': 2.3022868633270264} +02/24/2022 08:26:26 - INFO - codeparrot_training - Step 5312: {'lr': 0.0004941493155159562, 'samples': 2720256, 'steps': 5312, 'loss/train': 0.8195092678070068} +02/24/2022 08:26:32 - INFO - codeparrot_training - Step 5313: {'lr': 0.0004941457958163308, 'samples': 2720768, 'steps': 5313, 'loss/train': 2.694387674331665} +02/24/2022 08:26:35 - INFO - codeparrot_training - Step 5314: {'lr': 0.0004941422750708623, 'samples': 2721280, 'steps': 5314, 'loss/train': 2.1537344455718994} +02/24/2022 08:26:41 - INFO - codeparrot_training - Step 5315: {'lr': 0.0004941387532795659, 'samples': 2721792, 'steps': 5315, 'loss/train': 3.722724437713623} +02/24/2022 08:26:45 - INFO - codeparrot_training - Step 5316: {'lr': 0.0004941352304424566, 'samples': 2722304, 'steps': 5316, 'loss/train': 2.6303935050964355} +02/24/2022 08:26:50 - INFO - codeparrot_training - Step 5317: {'lr': 0.0004941317065595495, 'samples': 2722816, 'steps': 5317, 'loss/train': 1.962284803390503} +02/24/2022 08:26:56 - INFO - codeparrot_training - Step 5318: {'lr': 0.0004941281816308596, 'samples': 2723328, 'steps': 5318, 'loss/train': 2.6302943229675293} +02/24/2022 08:26:59 - INFO - codeparrot_training - Step 5319: {'lr': 0.0004941246556564021, 'samples': 2723840, 'steps': 5319, 'loss/train': 1.9872844219207764} +02/24/2022 08:27:05 - INFO - codeparrot_training - Step 5320: {'lr': 0.0004941211286361922, 'samples': 2724352, 'steps': 5320, 'loss/train': 2.951214551925659} +02/24/2022 08:27:08 - INFO - codeparrot_training - Step 5321: {'lr': 0.0004941176005702448, 'samples': 2724864, 'steps': 5321, 'loss/train': 0.1776892989873886} +02/24/2022 08:27:14 - INFO - codeparrot_training - Step 5322: {'lr': 0.0004941140714585752, 'samples': 2725376, 'steps': 5322, 'loss/train': 3.4030232429504395} +02/24/2022 08:27:17 - INFO - codeparrot_training - Step 5323: {'lr': 0.0004941105413011984, 'samples': 2725888, 'steps': 5323, 'loss/train': 2.8220651149749756} +02/24/2022 08:27:24 - INFO - codeparrot_training - Step 5324: {'lr': 0.0004941070100981295, 'samples': 2726400, 'steps': 5324, 'loss/train': 2.2283096313476562} +02/24/2022 08:27:27 - INFO - codeparrot_training - Step 5325: {'lr': 0.0004941034778493837, 'samples': 2726912, 'steps': 5325, 'loss/train': 2.513944387435913} +02/24/2022 08:27:30 - INFO - codeparrot_training - Step 5326: {'lr': 0.0004940999445549762, 'samples': 2727424, 'steps': 5326, 'loss/train': 0.9540320038795471} +02/24/2022 08:27:36 - INFO - codeparrot_training - Step 5327: {'lr': 0.0004940964102149219, 'samples': 2727936, 'steps': 5327, 'loss/train': 2.9037890434265137} +02/24/2022 08:27:39 - INFO - codeparrot_training - Step 5328: {'lr': 0.0004940928748292363, 'samples': 2728448, 'steps': 5328, 'loss/train': 2.8778762817382812} +02/24/2022 08:27:45 - INFO - codeparrot_training - Step 5329: {'lr': 0.0004940893383979341, 'samples': 2728960, 'steps': 5329, 'loss/train': 1.5792394876480103} +02/24/2022 08:27:48 - INFO - codeparrot_training - Step 5330: {'lr': 0.0004940858009210308, 'samples': 2729472, 'steps': 5330, 'loss/train': 1.6234550476074219} +02/24/2022 08:27:54 - INFO - codeparrot_training - Step 5331: {'lr': 0.0004940822623985414, 'samples': 2729984, 'steps': 5331, 'loss/train': 3.0696349143981934} +02/24/2022 08:27:57 - INFO - codeparrot_training - Step 5332: {'lr': 0.0004940787228304811, 'samples': 2730496, 'steps': 5332, 'loss/train': 3.5931174755096436} +02/24/2022 08:28:03 - INFO - codeparrot_training - Step 5333: {'lr': 0.0004940751822168651, 'samples': 2731008, 'steps': 5333, 'loss/train': 2.6041266918182373} +02/24/2022 08:28:09 - INFO - codeparrot_training - Step 5334: {'lr': 0.0004940716405577086, 'samples': 2731520, 'steps': 5334, 'loss/train': 2.4724690914154053} +02/24/2022 08:28:13 - INFO - codeparrot_training - Step 5335: {'lr': 0.0004940680978530265, 'samples': 2732032, 'steps': 5335, 'loss/train': 2.6429800987243652} +02/24/2022 08:28:18 - INFO - codeparrot_training - Step 5336: {'lr': 0.0004940645541028343, 'samples': 2732544, 'steps': 5336, 'loss/train': 1.3455263376235962} +02/24/2022 08:28:22 - INFO - codeparrot_training - Step 5337: {'lr': 0.0004940610093071469, 'samples': 2733056, 'steps': 5337, 'loss/train': 4.012539386749268} +02/24/2022 08:28:27 - INFO - codeparrot_training - Step 5338: {'lr': 0.0004940574634659798, 'samples': 2733568, 'steps': 5338, 'loss/train': 2.4877099990844727} +02/24/2022 08:28:31 - INFO - codeparrot_training - Step 5339: {'lr': 0.000494053916579348, 'samples': 2734080, 'steps': 5339, 'loss/train': 2.3281702995300293} +02/24/2022 08:28:36 - INFO - codeparrot_training - Step 5340: {'lr': 0.0004940503686472667, 'samples': 2734592, 'steps': 5340, 'loss/train': 2.581996202468872} +02/24/2022 08:28:40 - INFO - codeparrot_training - Step 5341: {'lr': 0.0004940468196697511, 'samples': 2735104, 'steps': 5341, 'loss/train': 2.3619203567504883} +02/24/2022 08:28:43 - INFO - codeparrot_training - Step 5342: {'lr': 0.0004940432696468164, 'samples': 2735616, 'steps': 5342, 'loss/train': 3.4700541496276855} +02/24/2022 08:28:49 - INFO - codeparrot_training - Step 5343: {'lr': 0.0004940397185784778, 'samples': 2736128, 'steps': 5343, 'loss/train': 3.0689940452575684} +02/24/2022 08:28:55 - INFO - codeparrot_training - Step 5344: {'lr': 0.0004940361664647506, 'samples': 2736640, 'steps': 5344, 'loss/train': 2.58054780960083} +02/24/2022 08:28:58 - INFO - codeparrot_training - Step 5345: {'lr': 0.0004940326133056499, 'samples': 2737152, 'steps': 5345, 'loss/train': 0.9639036655426025} +02/24/2022 08:29:04 - INFO - codeparrot_training - Step 5346: {'lr': 0.000494029059101191, 'samples': 2737664, 'steps': 5346, 'loss/train': 1.3676584959030151} +02/24/2022 08:29:07 - INFO - codeparrot_training - Step 5347: {'lr': 0.0004940255038513891, 'samples': 2738176, 'steps': 5347, 'loss/train': 2.4100656509399414} +02/24/2022 08:29:11 - INFO - codeparrot_training - Step 5348: {'lr': 0.0004940219475562593, 'samples': 2738688, 'steps': 5348, 'loss/train': 0.26062944531440735} +02/24/2022 08:29:16 - INFO - codeparrot_training - Step 5349: {'lr': 0.0004940183902158172, 'samples': 2739200, 'steps': 5349, 'loss/train': 3.099381923675537} +02/24/2022 08:29:22 - INFO - codeparrot_training - Step 5350: {'lr': 0.0004940148318300777, 'samples': 2739712, 'steps': 5350, 'loss/train': 2.632383346557617} +02/24/2022 08:29:25 - INFO - codeparrot_training - Step 5351: {'lr': 0.0004940112723990561, 'samples': 2740224, 'steps': 5351, 'loss/train': 3.2728219032287598} +02/24/2022 08:29:31 - INFO - codeparrot_training - Step 5352: {'lr': 0.0004940077119227678, 'samples': 2740736, 'steps': 5352, 'loss/train': 2.341137409210205} +02/24/2022 08:29:34 - INFO - codeparrot_training - Step 5353: {'lr': 0.0004940041504012279, 'samples': 2741248, 'steps': 5353, 'loss/train': 3.1377363204956055} +02/24/2022 08:29:41 - INFO - codeparrot_training - Step 5354: {'lr': 0.0004940005878344517, 'samples': 2741760, 'steps': 5354, 'loss/train': 3.3415699005126953} +02/24/2022 08:29:44 - INFO - codeparrot_training - Step 5355: {'lr': 0.0004939970242224544, 'samples': 2742272, 'steps': 5355, 'loss/train': 2.1214072704315186} +02/24/2022 08:29:50 - INFO - codeparrot_training - Step 5356: {'lr': 0.0004939934595652513, 'samples': 2742784, 'steps': 5356, 'loss/train': 1.4875153303146362} +02/24/2022 08:29:53 - INFO - codeparrot_training - Step 5357: {'lr': 0.0004939898938628578, 'samples': 2743296, 'steps': 5357, 'loss/train': 2.943678379058838} +02/24/2022 08:29:59 - INFO - codeparrot_training - Step 5358: {'lr': 0.000493986327115289, 'samples': 2743808, 'steps': 5358, 'loss/train': 2.513535737991333} +02/24/2022 08:30:02 - INFO - codeparrot_training - Step 5359: {'lr': 0.0004939827593225602, 'samples': 2744320, 'steps': 5359, 'loss/train': 1.2540435791015625} +02/24/2022 08:30:08 - INFO - codeparrot_training - Step 5360: {'lr': 0.0004939791904846869, 'samples': 2744832, 'steps': 5360, 'loss/train': 0.37946397066116333} +02/24/2022 08:30:11 - INFO - codeparrot_training - Step 5361: {'lr': 0.0004939756206016841, 'samples': 2745344, 'steps': 5361, 'loss/train': 1.523438811302185} +02/24/2022 08:30:17 - INFO - codeparrot_training - Step 5362: {'lr': 0.0004939720496735672, 'samples': 2745856, 'steps': 5362, 'loss/train': 1.9133459329605103} +02/24/2022 08:30:20 - INFO - codeparrot_training - Step 5363: {'lr': 0.0004939684777003516, 'samples': 2746368, 'steps': 5363, 'loss/train': 1.180401086807251} +02/24/2022 08:30:26 - INFO - codeparrot_training - Step 5364: {'lr': 0.0004939649046820524, 'samples': 2746880, 'steps': 5364, 'loss/train': 2.737797498703003} +02/24/2022 08:30:29 - INFO - codeparrot_training - Step 5365: {'lr': 0.0004939613306186851, 'samples': 2747392, 'steps': 5365, 'loss/train': 1.868486762046814} +02/24/2022 08:30:35 - INFO - codeparrot_training - Step 5366: {'lr': 0.0004939577555102649, 'samples': 2747904, 'steps': 5366, 'loss/train': 2.5798048973083496} +02/24/2022 08:30:38 - INFO - codeparrot_training - Step 5367: {'lr': 0.0004939541793568072, 'samples': 2748416, 'steps': 5367, 'loss/train': 2.7518157958984375} +02/24/2022 08:30:44 - INFO - codeparrot_training - Step 5368: {'lr': 0.000493950602158327, 'samples': 2748928, 'steps': 5368, 'loss/train': 3.122177839279175} +02/24/2022 08:30:47 - INFO - codeparrot_training - Step 5369: {'lr': 0.0004939470239148403, 'samples': 2749440, 'steps': 5369, 'loss/train': 1.493192195892334} +02/24/2022 08:30:54 - INFO - codeparrot_training - Step 5370: {'lr': 0.0004939434446263617, 'samples': 2749952, 'steps': 5370, 'loss/train': 3.5367205142974854} +02/24/2022 08:30:57 - INFO - codeparrot_training - Step 5371: {'lr': 0.000493939864292907, 'samples': 2750464, 'steps': 5371, 'loss/train': 2.0677380561828613} +02/24/2022 08:31:03 - INFO - codeparrot_training - Step 5372: {'lr': 0.0004939362829144913, 'samples': 2750976, 'steps': 5372, 'loss/train': 3.0955259799957275} +02/24/2022 08:31:06 - INFO - codeparrot_training - Step 5373: {'lr': 0.00049393270049113, 'samples': 2751488, 'steps': 5373, 'loss/train': 1.9950581789016724} +02/24/2022 08:31:12 - INFO - codeparrot_training - Step 5374: {'lr': 0.0004939291170228385, 'samples': 2752000, 'steps': 5374, 'loss/train': 2.679351568222046} +02/24/2022 08:31:15 - INFO - codeparrot_training - Step 5375: {'lr': 0.0004939255325096321, 'samples': 2752512, 'steps': 5375, 'loss/train': 2.3657546043395996} +02/24/2022 08:31:21 - INFO - codeparrot_training - Step 5376: {'lr': 0.0004939219469515262, 'samples': 2753024, 'steps': 5376, 'loss/train': 2.3725273609161377} +02/24/2022 08:31:24 - INFO - codeparrot_training - Step 5377: {'lr': 0.0004939183603485363, 'samples': 2753536, 'steps': 5377, 'loss/train': 2.000995397567749} +02/24/2022 08:31:30 - INFO - codeparrot_training - Step 5378: {'lr': 0.0004939147727006773, 'samples': 2754048, 'steps': 5378, 'loss/train': 2.109294891357422} +02/24/2022 08:31:33 - INFO - codeparrot_training - Step 5379: {'lr': 0.000493911184007965, 'samples': 2754560, 'steps': 5379, 'loss/train': 3.860227584838867} +02/24/2022 08:31:39 - INFO - codeparrot_training - Step 5380: {'lr': 0.0004939075942704147, 'samples': 2755072, 'steps': 5380, 'loss/train': 2.5177245140075684} +02/24/2022 08:31:43 - INFO - codeparrot_training - Step 5381: {'lr': 0.0004939040034880416, 'samples': 2755584, 'steps': 5381, 'loss/train': 2.001504898071289} +02/24/2022 08:31:48 - INFO - codeparrot_training - Step 5382: {'lr': 0.0004939004116608612, 'samples': 2756096, 'steps': 5382, 'loss/train': 3.1134586334228516} +02/24/2022 08:31:52 - INFO - codeparrot_training - Step 5383: {'lr': 0.000493896818788889, 'samples': 2756608, 'steps': 5383, 'loss/train': 1.1286685466766357} +02/24/2022 08:31:57 - INFO - codeparrot_training - Step 5384: {'lr': 0.0004938932248721401, 'samples': 2757120, 'steps': 5384, 'loss/train': 2.4488136768341064} +02/24/2022 08:32:01 - INFO - codeparrot_training - Step 5385: {'lr': 0.0004938896299106302, 'samples': 2757632, 'steps': 5385, 'loss/train': 1.8871541023254395} +02/24/2022 08:32:06 - INFO - codeparrot_training - Step 5386: {'lr': 0.0004938860339043746, 'samples': 2758144, 'steps': 5386, 'loss/train': 0.17116351425647736} +02/24/2022 08:32:10 - INFO - codeparrot_training - Step 5387: {'lr': 0.0004938824368533886, 'samples': 2758656, 'steps': 5387, 'loss/train': 2.8661715984344482} +02/24/2022 08:32:15 - INFO - codeparrot_training - Step 5388: {'lr': 0.0004938788387576878, 'samples': 2759168, 'steps': 5388, 'loss/train': 1.751478672027588} +02/24/2022 08:32:19 - INFO - codeparrot_training - Step 5389: {'lr': 0.0004938752396172873, 'samples': 2759680, 'steps': 5389, 'loss/train': 1.6210495233535767} +02/24/2022 08:32:26 - INFO - codeparrot_training - Step 5390: {'lr': 0.0004938716394322028, 'samples': 2760192, 'steps': 5390, 'loss/train': 1.0106014013290405} +02/24/2022 08:32:30 - INFO - codeparrot_training - Step 5391: {'lr': 0.0004938680382024497, 'samples': 2760704, 'steps': 5391, 'loss/train': 2.8358685970306396} +02/24/2022 08:32:35 - INFO - codeparrot_training - Step 5392: {'lr': 0.0004938644359280433, 'samples': 2761216, 'steps': 5392, 'loss/train': 2.2548182010650635} +02/24/2022 08:32:39 - INFO - codeparrot_training - Step 5393: {'lr': 0.000493860832608999, 'samples': 2761728, 'steps': 5393, 'loss/train': 2.348198652267456} +02/24/2022 08:32:44 - INFO - codeparrot_training - Step 5394: {'lr': 0.0004938572282453326, 'samples': 2762240, 'steps': 5394, 'loss/train': 2.4740381240844727} +02/24/2022 08:32:48 - INFO - codeparrot_training - Step 5395: {'lr': 0.000493853622837059, 'samples': 2762752, 'steps': 5395, 'loss/train': 2.233853816986084} +02/24/2022 08:32:54 - INFO - codeparrot_training - Step 5396: {'lr': 0.000493850016384194, 'samples': 2763264, 'steps': 5396, 'loss/train': 0.5000482201576233} +02/24/2022 08:32:57 - INFO - codeparrot_training - Step 5397: {'lr': 0.000493846408886753, 'samples': 2763776, 'steps': 5397, 'loss/train': 2.442148208618164} +02/24/2022 08:33:03 - INFO - codeparrot_training - Step 5398: {'lr': 0.0004938428003447514, 'samples': 2764288, 'steps': 5398, 'loss/train': 2.3250579833984375} +02/24/2022 08:33:06 - INFO - codeparrot_training - Step 5399: {'lr': 0.0004938391907582046, 'samples': 2764800, 'steps': 5399, 'loss/train': 3.1738271713256836} +02/24/2022 08:33:14 - INFO - codeparrot_training - Step 5400: {'lr': 0.0004938355801271282, 'samples': 2765312, 'steps': 5400, 'loss/train': 2.0156095027923584} +02/24/2022 08:33:17 - INFO - codeparrot_training - Step 5401: {'lr': 0.0004938319684515375, 'samples': 2765824, 'steps': 5401, 'loss/train': 1.7207632064819336} +02/24/2022 08:33:23 - INFO - codeparrot_training - Step 5402: {'lr': 0.0004938283557314483, 'samples': 2766336, 'steps': 5402, 'loss/train': 1.1186944246292114} +02/24/2022 08:33:26 - INFO - codeparrot_training - Step 5403: {'lr': 0.0004938247419668757, 'samples': 2766848, 'steps': 5403, 'loss/train': 3.0246737003326416} +02/24/2022 08:33:32 - INFO - codeparrot_training - Step 5404: {'lr': 0.0004938211271578352, 'samples': 2767360, 'steps': 5404, 'loss/train': 3.333437204360962} +02/24/2022 08:33:35 - INFO - codeparrot_training - Step 5405: {'lr': 0.0004938175113043426, 'samples': 2767872, 'steps': 5405, 'loss/train': 3.0279159545898438} +02/24/2022 08:33:41 - INFO - codeparrot_training - Step 5406: {'lr': 0.0004938138944064131, 'samples': 2768384, 'steps': 5406, 'loss/train': 2.5381946563720703} +02/24/2022 08:33:44 - INFO - codeparrot_training - Step 5407: {'lr': 0.0004938102764640624, 'samples': 2768896, 'steps': 5407, 'loss/train': 1.55325186252594} +02/24/2022 08:33:50 - INFO - codeparrot_training - Step 5408: {'lr': 0.0004938066574773058, 'samples': 2769408, 'steps': 5408, 'loss/train': 2.9902753829956055} +02/24/2022 08:33:53 - INFO - codeparrot_training - Step 5409: {'lr': 0.000493803037446159, 'samples': 2769920, 'steps': 5409, 'loss/train': 2.5365686416625977} +02/24/2022 08:34:01 - INFO - codeparrot_training - Step 5410: {'lr': 0.0004937994163706374, 'samples': 2770432, 'steps': 5410, 'loss/train': 3.490772247314453} +02/24/2022 08:34:04 - INFO - codeparrot_training - Step 5411: {'lr': 0.0004937957942507564, 'samples': 2770944, 'steps': 5411, 'loss/train': 1.6447222232818604} +02/24/2022 08:34:10 - INFO - codeparrot_training - Step 5412: {'lr': 0.0004937921710865317, 'samples': 2771456, 'steps': 5412, 'loss/train': 2.1643147468566895} +02/24/2022 08:34:13 - INFO - codeparrot_training - Step 5413: {'lr': 0.0004937885468779787, 'samples': 2771968, 'steps': 5413, 'loss/train': 2.4104678630828857} +02/24/2022 08:34:19 - INFO - codeparrot_training - Step 5414: {'lr': 0.000493784921625113, 'samples': 2772480, 'steps': 5414, 'loss/train': 1.894930124282837} +02/24/2022 08:34:22 - INFO - codeparrot_training - Step 5415: {'lr': 0.0004937812953279502, 'samples': 2772992, 'steps': 5415, 'loss/train': 2.163015365600586} +02/24/2022 08:34:28 - INFO - codeparrot_training - Step 5416: {'lr': 0.0004937776679865057, 'samples': 2773504, 'steps': 5416, 'loss/train': 3.095672130584717} +02/24/2022 08:34:31 - INFO - codeparrot_training - Step 5417: {'lr': 0.000493774039600795, 'samples': 2774016, 'steps': 5417, 'loss/train': 2.0287392139434814} +02/24/2022 08:34:37 - INFO - codeparrot_training - Step 5418: {'lr': 0.0004937704101708338, 'samples': 2774528, 'steps': 5418, 'loss/train': 2.356771230697632} +02/24/2022 08:34:40 - INFO - codeparrot_training - Step 5419: {'lr': 0.0004937667796966374, 'samples': 2775040, 'steps': 5419, 'loss/train': 2.3700101375579834} +02/24/2022 08:34:46 - INFO - codeparrot_training - Step 5420: {'lr': 0.0004937631481782218, 'samples': 2775552, 'steps': 5420, 'loss/train': 2.759166717529297} +02/24/2022 08:34:49 - INFO - codeparrot_training - Step 5421: {'lr': 0.000493759515615602, 'samples': 2776064, 'steps': 5421, 'loss/train': 2.496807336807251} +02/24/2022 08:34:57 - INFO - codeparrot_training - Step 5422: {'lr': 0.000493755882008794, 'samples': 2776576, 'steps': 5422, 'loss/train': 2.534860372543335} +02/24/2022 08:35:00 - INFO - codeparrot_training - Step 5423: {'lr': 0.0004937522473578132, 'samples': 2777088, 'steps': 5423, 'loss/train': 1.8650087118148804} +02/24/2022 08:35:06 - INFO - codeparrot_training - Step 5424: {'lr': 0.0004937486116626752, 'samples': 2777600, 'steps': 5424, 'loss/train': 2.7128074169158936} +02/24/2022 08:35:09 - INFO - codeparrot_training - Step 5425: {'lr': 0.0004937449749233954, 'samples': 2778112, 'steps': 5425, 'loss/train': 1.823857307434082} +02/24/2022 08:35:15 - INFO - codeparrot_training - Step 5426: {'lr': 0.0004937413371399897, 'samples': 2778624, 'steps': 5426, 'loss/train': 2.4024977684020996} +02/24/2022 08:35:18 - INFO - codeparrot_training - Step 5427: {'lr': 0.0004937376983124734, 'samples': 2779136, 'steps': 5427, 'loss/train': 1.462932825088501} +02/24/2022 08:35:24 - INFO - codeparrot_training - Step 5428: {'lr': 0.0004937340584408622, 'samples': 2779648, 'steps': 5428, 'loss/train': 1.3764538764953613} +02/24/2022 08:35:27 - INFO - codeparrot_training - Step 5429: {'lr': 0.0004937304175251717, 'samples': 2780160, 'steps': 5429, 'loss/train': 2.4385201930999756} +02/24/2022 08:35:33 - INFO - codeparrot_training - Step 5430: {'lr': 0.0004937267755654174, 'samples': 2780672, 'steps': 5430, 'loss/train': 3.247731924057007} +02/24/2022 08:35:36 - INFO - codeparrot_training - Step 5431: {'lr': 0.0004937231325616152, 'samples': 2781184, 'steps': 5431, 'loss/train': 1.6430755853652954} +02/24/2022 08:35:42 - INFO - codeparrot_training - Step 5432: {'lr': 0.0004937194885137803, 'samples': 2781696, 'steps': 5432, 'loss/train': 2.4172794818878174} +02/24/2022 08:35:45 - INFO - codeparrot_training - Step 5433: {'lr': 0.0004937158434219286, 'samples': 2782208, 'steps': 5433, 'loss/train': 2.495945930480957} +02/24/2022 08:35:51 - INFO - codeparrot_training - Step 5434: {'lr': 0.0004937121972860755, 'samples': 2782720, 'steps': 5434, 'loss/train': 3.0654258728027344} +02/24/2022 08:35:54 - INFO - codeparrot_training - Step 5435: {'lr': 0.0004937085501062369, 'samples': 2783232, 'steps': 5435, 'loss/train': 1.7805092334747314} +02/24/2022 08:36:01 - INFO - codeparrot_training - Step 5436: {'lr': 0.0004937049018824282, 'samples': 2783744, 'steps': 5436, 'loss/train': 2.5049848556518555} +02/24/2022 08:36:07 - INFO - codeparrot_training - Step 5437: {'lr': 0.000493701252614665, 'samples': 2784256, 'steps': 5437, 'loss/train': 2.8795433044433594} +02/24/2022 08:36:11 - INFO - codeparrot_training - Step 5438: {'lr': 0.0004936976023029631, 'samples': 2784768, 'steps': 5438, 'loss/train': 2.858910083770752} +02/24/2022 08:36:16 - INFO - codeparrot_training - Step 5439: {'lr': 0.000493693950947338, 'samples': 2785280, 'steps': 5439, 'loss/train': 2.472738742828369} +02/24/2022 08:36:20 - INFO - codeparrot_training - Step 5440: {'lr': 0.0004936902985478055, 'samples': 2785792, 'steps': 5440, 'loss/train': 2.654956102371216} +02/24/2022 08:36:25 - INFO - codeparrot_training - Step 5441: {'lr': 0.000493686645104381, 'samples': 2786304, 'steps': 5441, 'loss/train': 3.063326358795166} +02/24/2022 08:36:29 - INFO - codeparrot_training - Step 5442: {'lr': 0.0004936829906170804, 'samples': 2786816, 'steps': 5442, 'loss/train': 1.4038671255111694} +02/24/2022 08:36:34 - INFO - codeparrot_training - Step 5443: {'lr': 0.0004936793350859192, 'samples': 2787328, 'steps': 5443, 'loss/train': 2.1081933975219727} +02/24/2022 08:36:38 - INFO - codeparrot_training - Step 5444: {'lr': 0.0004936756785109131, 'samples': 2787840, 'steps': 5444, 'loss/train': 2.282024383544922} +02/24/2022 08:36:45 - INFO - codeparrot_training - Step 5445: {'lr': 0.0004936720208920778, 'samples': 2788352, 'steps': 5445, 'loss/train': 2.9123265743255615} +02/24/2022 08:36:48 - INFO - codeparrot_training - Step 5446: {'lr': 0.0004936683622294289, 'samples': 2788864, 'steps': 5446, 'loss/train': 3.220599412918091} +02/24/2022 08:36:54 - INFO - codeparrot_training - Step 5447: {'lr': 0.0004936647025229822, 'samples': 2789376, 'steps': 5447, 'loss/train': 1.7034388780593872} +02/24/2022 08:36:57 - INFO - codeparrot_training - Step 5448: {'lr': 0.0004936610417727532, 'samples': 2789888, 'steps': 5448, 'loss/train': 0.8039000034332275} +02/24/2022 08:37:03 - INFO - codeparrot_training - Step 5449: {'lr': 0.0004936573799787575, 'samples': 2790400, 'steps': 5449, 'loss/train': 3.2148349285125732} +02/24/2022 08:37:06 - INFO - codeparrot_training - Step 5450: {'lr': 0.0004936537171410112, 'samples': 2790912, 'steps': 5450, 'loss/train': 2.899169683456421} +02/24/2022 08:37:12 - INFO - codeparrot_training - Step 5451: {'lr': 0.0004936500532595297, 'samples': 2791424, 'steps': 5451, 'loss/train': 3.7909696102142334} +02/24/2022 08:37:16 - INFO - codeparrot_training - Step 5452: {'lr': 0.0004936463883343287, 'samples': 2791936, 'steps': 5452, 'loss/train': 2.4795773029327393} +02/24/2022 08:37:19 - INFO - codeparrot_training - Step 5453: {'lr': 0.000493642722365424, 'samples': 2792448, 'steps': 5453, 'loss/train': 1.709165096282959} +02/24/2022 08:37:25 - INFO - codeparrot_training - Step 5454: {'lr': 0.0004936390553528313, 'samples': 2792960, 'steps': 5454, 'loss/train': 2.9883267879486084} +02/24/2022 08:37:32 - INFO - codeparrot_training - Step 5455: {'lr': 0.0004936353872965661, 'samples': 2793472, 'steps': 5455, 'loss/train': 2.1667511463165283} +02/24/2022 08:37:35 - INFO - codeparrot_training - Step 5456: {'lr': 0.0004936317181966443, 'samples': 2793984, 'steps': 5456, 'loss/train': 1.7340718507766724} +02/24/2022 08:37:41 - INFO - codeparrot_training - Step 5457: {'lr': 0.0004936280480530816, 'samples': 2794496, 'steps': 5457, 'loss/train': 1.9111151695251465} +02/24/2022 08:37:44 - INFO - codeparrot_training - Step 5458: {'lr': 0.0004936243768658937, 'samples': 2795008, 'steps': 5458, 'loss/train': 1.3520731925964355} +02/24/2022 08:37:50 - INFO - codeparrot_training - Step 5459: {'lr': 0.0004936207046350963, 'samples': 2795520, 'steps': 5459, 'loss/train': 2.566462516784668} +02/24/2022 08:37:53 - INFO - codeparrot_training - Step 5460: {'lr': 0.0004936170313607053, 'samples': 2796032, 'steps': 5460, 'loss/train': 2.4731314182281494} +02/24/2022 08:37:57 - INFO - codeparrot_training - Step 5461: {'lr': 0.0004936133570427361, 'samples': 2796544, 'steps': 5461, 'loss/train': 2.474146842956543} +02/24/2022 08:38:02 - INFO - codeparrot_training - Step 5462: {'lr': 0.0004936096816812046, 'samples': 2797056, 'steps': 5462, 'loss/train': 2.897639274597168} +02/24/2022 08:38:06 - INFO - codeparrot_training - Step 5463: {'lr': 0.0004936060052761268, 'samples': 2797568, 'steps': 5463, 'loss/train': 2.1374096870422363} +02/24/2022 08:38:11 - INFO - codeparrot_training - Step 5464: {'lr': 0.0004936023278275182, 'samples': 2798080, 'steps': 5464, 'loss/train': 2.5332071781158447} +02/24/2022 08:38:17 - INFO - codeparrot_training - Step 5465: {'lr': 0.0004935986493353944, 'samples': 2798592, 'steps': 5465, 'loss/train': 2.5430822372436523} +02/24/2022 08:38:20 - INFO - codeparrot_training - Step 5466: {'lr': 0.0004935949697997715, 'samples': 2799104, 'steps': 5466, 'loss/train': 2.320448637008667} +02/24/2022 08:38:28 - INFO - codeparrot_training - Step 5467: {'lr': 0.000493591289220665, 'samples': 2799616, 'steps': 5467, 'loss/train': 3.023228406906128} +02/24/2022 08:38:31 - INFO - codeparrot_training - Step 5468: {'lr': 0.0004935876075980908, 'samples': 2800128, 'steps': 5468, 'loss/train': 1.9866042137145996} +02/24/2022 08:38:37 - INFO - codeparrot_training - Step 5469: {'lr': 0.0004935839249320647, 'samples': 2800640, 'steps': 5469, 'loss/train': 2.737163543701172} +02/24/2022 08:38:40 - INFO - codeparrot_training - Step 5470: {'lr': 0.0004935802412226024, 'samples': 2801152, 'steps': 5470, 'loss/train': 1.5813744068145752} +02/24/2022 08:38:46 - INFO - codeparrot_training - Step 5471: {'lr': 0.0004935765564697195, 'samples': 2801664, 'steps': 5471, 'loss/train': 2.9298453330993652} +02/24/2022 08:38:49 - INFO - codeparrot_training - Step 5472: {'lr': 0.0004935728706734322, 'samples': 2802176, 'steps': 5472, 'loss/train': 2.430553674697876} +02/24/2022 08:38:55 - INFO - codeparrot_training - Step 5473: {'lr': 0.000493569183833756, 'samples': 2802688, 'steps': 5473, 'loss/train': 2.3587841987609863} +02/24/2022 08:38:58 - INFO - codeparrot_training - Step 5474: {'lr': 0.0004935654959507068, 'samples': 2803200, 'steps': 5474, 'loss/train': 2.5076725482940674} +02/24/2022 08:39:03 - INFO - codeparrot_training - Step 5475: {'lr': 0.0004935618070243003, 'samples': 2803712, 'steps': 5475, 'loss/train': 2.501732110977173} +02/24/2022 08:39:07 - INFO - codeparrot_training - Step 5476: {'lr': 0.0004935581170545523, 'samples': 2804224, 'steps': 5476, 'loss/train': 1.607458472251892} +02/24/2022 08:39:13 - INFO - codeparrot_training - Step 5477: {'lr': 0.0004935544260414787, 'samples': 2804736, 'steps': 5477, 'loss/train': 2.6318142414093018} +02/24/2022 08:39:16 - INFO - codeparrot_training - Step 5478: {'lr': 0.0004935507339850953, 'samples': 2805248, 'steps': 5478, 'loss/train': 2.7555673122406006} +02/24/2022 08:39:21 - INFO - codeparrot_training - Step 5479: {'lr': 0.0004935470408854179, 'samples': 2805760, 'steps': 5479, 'loss/train': 0.8822837471961975} +02/24/2022 08:39:25 - INFO - codeparrot_training - Step 5480: {'lr': 0.0004935433467424624, 'samples': 2806272, 'steps': 5480, 'loss/train': 2.194122552871704} +02/24/2022 08:39:32 - INFO - codeparrot_training - Step 5481: {'lr': 0.0004935396515562444, 'samples': 2806784, 'steps': 5481, 'loss/train': 2.570009708404541} +02/24/2022 08:39:36 - INFO - codeparrot_training - Step 5482: {'lr': 0.0004935359553267798, 'samples': 2807296, 'steps': 5482, 'loss/train': 2.700437307357788} +02/24/2022 08:39:41 - INFO - codeparrot_training - Step 5483: {'lr': 0.0004935322580540847, 'samples': 2807808, 'steps': 5483, 'loss/train': 2.5435824394226074} +02/24/2022 08:39:45 - INFO - codeparrot_training - Step 5484: {'lr': 0.0004935285597381747, 'samples': 2808320, 'steps': 5484, 'loss/train': 2.6107141971588135} +02/24/2022 08:39:50 - INFO - codeparrot_training - Step 5485: {'lr': 0.0004935248603790656, 'samples': 2808832, 'steps': 5485, 'loss/train': 1.681152582168579} +02/24/2022 08:39:54 - INFO - codeparrot_training - Step 5486: {'lr': 0.0004935211599767733, 'samples': 2809344, 'steps': 5486, 'loss/train': 2.324976921081543} +02/24/2022 08:39:59 - INFO - codeparrot_training - Step 5487: {'lr': 0.0004935174585313138, 'samples': 2809856, 'steps': 5487, 'loss/train': 3.132422685623169} +02/24/2022 08:40:03 - INFO - codeparrot_training - Step 5488: {'lr': 0.0004935137560427027, 'samples': 2810368, 'steps': 5488, 'loss/train': 2.7801859378814697} +02/24/2022 08:40:08 - INFO - codeparrot_training - Step 5489: {'lr': 0.000493510052510956, 'samples': 2810880, 'steps': 5489, 'loss/train': 2.628154754638672} +02/24/2022 08:40:12 - INFO - codeparrot_training - Step 5490: {'lr': 0.0004935063479360897, 'samples': 2811392, 'steps': 5490, 'loss/train': 5.426364421844482} +02/24/2022 08:40:19 - INFO - codeparrot_training - Step 5491: {'lr': 0.0004935026423181194, 'samples': 2811904, 'steps': 5491, 'loss/train': 1.8185917139053345} +02/24/2022 08:40:22 - INFO - codeparrot_training - Step 5492: {'lr': 0.0004934989356570611, 'samples': 2812416, 'steps': 5492, 'loss/train': 1.8836909532546997} +02/24/2022 08:40:28 - INFO - codeparrot_training - Step 5493: {'lr': 0.0004934952279529308, 'samples': 2812928, 'steps': 5493, 'loss/train': 2.1510279178619385} +02/24/2022 08:40:31 - INFO - codeparrot_training - Step 5494: {'lr': 0.0004934915192057441, 'samples': 2813440, 'steps': 5494, 'loss/train': 2.4905660152435303} +02/24/2022 08:40:37 - INFO - codeparrot_training - Step 5495: {'lr': 0.0004934878094155172, 'samples': 2813952, 'steps': 5495, 'loss/train': 2.4707770347595215} +02/24/2022 08:40:40 - INFO - codeparrot_training - Step 5496: {'lr': 0.0004934840985822657, 'samples': 2814464, 'steps': 5496, 'loss/train': 1.2118127346038818} +02/24/2022 08:40:46 - INFO - codeparrot_training - Step 5497: {'lr': 0.0004934803867060058, 'samples': 2814976, 'steps': 5497, 'loss/train': 3.060567855834961} +02/24/2022 08:40:49 - INFO - codeparrot_training - Step 5498: {'lr': 0.0004934766737867531, 'samples': 2815488, 'steps': 5498, 'loss/train': 2.6628003120422363} +02/24/2022 08:40:55 - INFO - codeparrot_training - Step 5499: {'lr': 0.0004934729598245237, 'samples': 2816000, 'steps': 5499, 'loss/train': 2.2565956115722656} +02/24/2022 08:40:58 - INFO - codeparrot_training - Step 5500: {'lr': 0.0004934692448193334, 'samples': 2816512, 'steps': 5500, 'loss/train': 2.064164161682129} +02/24/2022 08:41:04 - INFO - codeparrot_training - Step 5501: {'lr': 0.0004934655287711982, 'samples': 2817024, 'steps': 5501, 'loss/train': 2.3882791996002197} +02/24/2022 08:41:07 - INFO - codeparrot_training - Step 5502: {'lr': 0.0004934618116801341, 'samples': 2817536, 'steps': 5502, 'loss/train': 3.4337117671966553} +02/24/2022 08:41:15 - INFO - codeparrot_training - Step 5503: {'lr': 0.0004934580935461567, 'samples': 2818048, 'steps': 5503, 'loss/train': 3.1718528270721436} +02/24/2022 08:41:18 - INFO - codeparrot_training - Step 5504: {'lr': 0.0004934543743692822, 'samples': 2818560, 'steps': 5504, 'loss/train': 2.7611584663391113} +02/24/2022 08:41:24 - INFO - codeparrot_training - Step 5505: {'lr': 0.0004934506541495265, 'samples': 2819072, 'steps': 5505, 'loss/train': 1.4494328498840332} +02/24/2022 08:41:27 - INFO - codeparrot_training - Step 5506: {'lr': 0.0004934469328869056, 'samples': 2819584, 'steps': 5506, 'loss/train': 2.5046980381011963} +02/24/2022 08:41:33 - INFO - codeparrot_training - Step 5507: {'lr': 0.0004934432105814352, 'samples': 2820096, 'steps': 5507, 'loss/train': 3.164855718612671} +02/24/2022 08:41:36 - INFO - codeparrot_training - Step 5508: {'lr': 0.0004934394872331314, 'samples': 2820608, 'steps': 5508, 'loss/train': 3.4603705406188965} +02/24/2022 08:41:42 - INFO - codeparrot_training - Step 5509: {'lr': 0.0004934357628420101, 'samples': 2821120, 'steps': 5509, 'loss/train': 2.5381319522857666} +02/24/2022 08:41:45 - INFO - codeparrot_training - Step 5510: {'lr': 0.0004934320374080874, 'samples': 2821632, 'steps': 5510, 'loss/train': 3.3778696060180664} +02/24/2022 08:41:51 - INFO - codeparrot_training - Step 5511: {'lr': 0.000493428310931379, 'samples': 2822144, 'steps': 5511, 'loss/train': 2.120063066482544} +02/24/2022 08:41:54 - INFO - codeparrot_training - Step 5512: {'lr': 0.0004934245834119013, 'samples': 2822656, 'steps': 5512, 'loss/train': 2.2954940795898438} +02/24/2022 08:42:02 - INFO - codeparrot_training - Step 5513: {'lr': 0.0004934208548496697, 'samples': 2823168, 'steps': 5513, 'loss/train': 3.1563899517059326} +02/24/2022 08:42:05 - INFO - codeparrot_training - Step 5514: {'lr': 0.0004934171252447006, 'samples': 2823680, 'steps': 5514, 'loss/train': 1.0124396085739136} +02/24/2022 08:42:11 - INFO - codeparrot_training - Step 5515: {'lr': 0.0004934133945970097, 'samples': 2824192, 'steps': 5515, 'loss/train': 3.836822271347046} +02/24/2022 08:42:14 - INFO - codeparrot_training - Step 5516: {'lr': 0.0004934096629066133, 'samples': 2824704, 'steps': 5516, 'loss/train': 0.8433687090873718} +02/24/2022 08:42:20 - INFO - codeparrot_training - Step 5517: {'lr': 0.000493405930173527, 'samples': 2825216, 'steps': 5517, 'loss/train': 2.610063076019287} +02/24/2022 08:42:23 - INFO - codeparrot_training - Step 5518: {'lr': 0.0004934021963977671, 'samples': 2825728, 'steps': 5518, 'loss/train': 1.5580424070358276} +02/24/2022 08:42:29 - INFO - codeparrot_training - Step 5519: {'lr': 0.0004933984615793494, 'samples': 2826240, 'steps': 5519, 'loss/train': 1.7483354806900024} +02/24/2022 08:42:32 - INFO - codeparrot_training - Step 5520: {'lr': 0.0004933947257182901, 'samples': 2826752, 'steps': 5520, 'loss/train': 2.165771007537842} +02/24/2022 08:42:38 - INFO - codeparrot_training - Step 5521: {'lr': 0.000493390988814605, 'samples': 2827264, 'steps': 5521, 'loss/train': 3.6925840377807617} +02/24/2022 08:42:41 - INFO - codeparrot_training - Step 5522: {'lr': 0.0004933872508683101, 'samples': 2827776, 'steps': 5522, 'loss/train': 2.4756855964660645} +02/24/2022 08:42:47 - INFO - codeparrot_training - Step 5523: {'lr': 0.0004933835118794217, 'samples': 2828288, 'steps': 5523, 'loss/train': 0.5385767817497253} +02/24/2022 08:42:50 - INFO - codeparrot_training - Step 5524: {'lr': 0.0004933797718479555, 'samples': 2828800, 'steps': 5524, 'loss/train': 1.9447957277297974} +02/24/2022 08:42:56 - INFO - codeparrot_training - Step 5525: {'lr': 0.0004933760307739277, 'samples': 2829312, 'steps': 5525, 'loss/train': 4.624439239501953} +02/24/2022 08:43:00 - INFO - codeparrot_training - Step 5526: {'lr': 0.0004933722886573542, 'samples': 2829824, 'steps': 5526, 'loss/train': 1.0215610265731812} +02/24/2022 08:43:05 - INFO - codeparrot_training - Step 5527: {'lr': 0.0004933685454982511, 'samples': 2830336, 'steps': 5527, 'loss/train': 1.5295915603637695} +02/24/2022 08:43:08 - INFO - codeparrot_training - Step 5528: {'lr': 0.0004933648012966344, 'samples': 2830848, 'steps': 5528, 'loss/train': 2.2978732585906982} +02/24/2022 08:43:16 - INFO - codeparrot_training - Step 5529: {'lr': 0.0004933610560525203, 'samples': 2831360, 'steps': 5529, 'loss/train': 3.312633991241455} +02/24/2022 08:43:19 - INFO - codeparrot_training - Step 5530: {'lr': 0.0004933573097659246, 'samples': 2831872, 'steps': 5530, 'loss/train': 1.7465201616287231} +02/24/2022 08:43:25 - INFO - codeparrot_training - Step 5531: {'lr': 0.0004933535624368634, 'samples': 2832384, 'steps': 5531, 'loss/train': 2.419267177581787} +02/24/2022 08:43:28 - INFO - codeparrot_training - Step 5532: {'lr': 0.0004933498140653529, 'samples': 2832896, 'steps': 5532, 'loss/train': 3.0108892917633057} +02/24/2022 08:43:34 - INFO - codeparrot_training - Step 5533: {'lr': 0.0004933460646514092, 'samples': 2833408, 'steps': 5533, 'loss/train': 2.615257501602173} +02/24/2022 08:43:37 - INFO - codeparrot_training - Step 5534: {'lr': 0.000493342314195048, 'samples': 2833920, 'steps': 5534, 'loss/train': 3.0479815006256104} +02/24/2022 08:43:43 - INFO - codeparrot_training - Step 5535: {'lr': 0.0004933385626962858, 'samples': 2834432, 'steps': 5535, 'loss/train': 0.3741433322429657} +02/24/2022 08:43:46 - INFO - codeparrot_training - Step 5536: {'lr': 0.0004933348101551383, 'samples': 2834944, 'steps': 5536, 'loss/train': 1.148425579071045} +02/24/2022 08:43:52 - INFO - codeparrot_training - Step 5537: {'lr': 0.0004933310565716218, 'samples': 2835456, 'steps': 5537, 'loss/train': 3.763272523880005} +02/24/2022 08:43:55 - INFO - codeparrot_training - Step 5538: {'lr': 0.0004933273019457524, 'samples': 2835968, 'steps': 5538, 'loss/train': 2.1809885501861572} +02/24/2022 08:44:02 - INFO - codeparrot_training - Step 5539: {'lr': 0.0004933235462775459, 'samples': 2836480, 'steps': 5539, 'loss/train': 2.2543070316314697} +02/24/2022 08:44:06 - INFO - codeparrot_training - Step 5540: {'lr': 0.0004933197895670187, 'samples': 2836992, 'steps': 5540, 'loss/train': 3.752925157546997} +02/24/2022 08:44:11 - INFO - codeparrot_training - Step 5541: {'lr': 0.0004933160318141869, 'samples': 2837504, 'steps': 5541, 'loss/train': 2.4720349311828613} +02/24/2022 08:44:15 - INFO - codeparrot_training - Step 5542: {'lr': 0.0004933122730190663, 'samples': 2838016, 'steps': 5542, 'loss/train': 2.054579496383667} +02/24/2022 08:44:20 - INFO - codeparrot_training - Step 5543: {'lr': 0.0004933085131816733, 'samples': 2838528, 'steps': 5543, 'loss/train': 1.9743494987487793} +02/24/2022 08:44:24 - INFO - codeparrot_training - Step 5544: {'lr': 0.0004933047523020239, 'samples': 2839040, 'steps': 5544, 'loss/train': 3.0286362171173096} +02/24/2022 08:44:29 - INFO - codeparrot_training - Step 5545: {'lr': 0.0004933009903801341, 'samples': 2839552, 'steps': 5545, 'loss/train': 2.624907970428467} +02/24/2022 08:44:33 - INFO - codeparrot_training - Step 5546: {'lr': 0.0004932972274160202, 'samples': 2840064, 'steps': 5546, 'loss/train': 2.792562484741211} +02/24/2022 08:44:38 - INFO - codeparrot_training - Step 5547: {'lr': 0.0004932934634096982, 'samples': 2840576, 'steps': 5547, 'loss/train': 3.149885654449463} +02/24/2022 08:44:42 - INFO - codeparrot_training - Step 5548: {'lr': 0.0004932896983611843, 'samples': 2841088, 'steps': 5548, 'loss/train': 3.6530075073242188} +02/24/2022 08:44:49 - INFO - codeparrot_training - Step 5549: {'lr': 0.0004932859322704944, 'samples': 2841600, 'steps': 5549, 'loss/train': 1.6506565809249878} +02/24/2022 08:44:52 - INFO - codeparrot_training - Step 5550: {'lr': 0.000493282165137645, 'samples': 2842112, 'steps': 5550, 'loss/train': 2.8397183418273926} +02/24/2022 08:44:58 - INFO - codeparrot_training - Step 5551: {'lr': 0.0004932783969626521, 'samples': 2842624, 'steps': 5551, 'loss/train': 2.3374781608581543} +02/24/2022 08:45:01 - INFO - codeparrot_training - Step 5552: {'lr': 0.0004932746277455317, 'samples': 2843136, 'steps': 5552, 'loss/train': 2.3526647090911865} +02/24/2022 08:45:07 - INFO - codeparrot_training - Step 5553: {'lr': 0.0004932708574863, 'samples': 2843648, 'steps': 5553, 'loss/train': 2.5887060165405273} +02/24/2022 08:45:10 - INFO - codeparrot_training - Step 5554: {'lr': 0.0004932670861849733, 'samples': 2844160, 'steps': 5554, 'loss/train': 2.2699503898620605} +02/24/2022 08:45:16 - INFO - codeparrot_training - Step 5555: {'lr': 0.0004932633138415675, 'samples': 2844672, 'steps': 5555, 'loss/train': 2.6233675479888916} +02/24/2022 08:45:19 - INFO - codeparrot_training - Step 5556: {'lr': 0.000493259540456099, 'samples': 2845184, 'steps': 5556, 'loss/train': 2.745791435241699} +02/24/2022 08:45:25 - INFO - codeparrot_training - Step 5557: {'lr': 0.0004932557660285839, 'samples': 2845696, 'steps': 5557, 'loss/train': 2.101487398147583} +02/24/2022 08:45:28 - INFO - codeparrot_training - Step 5558: {'lr': 0.0004932519905590383, 'samples': 2846208, 'steps': 5558, 'loss/train': 2.3263118267059326} +02/24/2022 08:45:35 - INFO - codeparrot_training - Step 5559: {'lr': 0.0004932482140474785, 'samples': 2846720, 'steps': 5559, 'loss/train': 1.1069170236587524} +02/24/2022 08:45:38 - INFO - codeparrot_training - Step 5560: {'lr': 0.0004932444364939204, 'samples': 2847232, 'steps': 5560, 'loss/train': 2.67093563079834} +02/24/2022 08:45:44 - INFO - codeparrot_training - Step 5561: {'lr': 0.0004932406578983806, 'samples': 2847744, 'steps': 5561, 'loss/train': 2.045226573944092} +02/24/2022 08:45:47 - INFO - codeparrot_training - Step 5562: {'lr': 0.0004932368782608749, 'samples': 2848256, 'steps': 5562, 'loss/train': 3.291518449783325} +02/24/2022 08:45:53 - INFO - codeparrot_training - Step 5563: {'lr': 0.0004932330975814198, 'samples': 2848768, 'steps': 5563, 'loss/train': 2.0810651779174805} +02/24/2022 08:45:56 - INFO - codeparrot_training - Step 5564: {'lr': 0.0004932293158600312, 'samples': 2849280, 'steps': 5564, 'loss/train': 1.5393812656402588} +02/24/2022 08:46:02 - INFO - codeparrot_training - Step 5565: {'lr': 0.0004932255330967255, 'samples': 2849792, 'steps': 5565, 'loss/train': 2.9816529750823975} +02/24/2022 08:46:05 - INFO - codeparrot_training - Step 5566: {'lr': 0.0004932217492915189, 'samples': 2850304, 'steps': 5566, 'loss/train': 1.493029236793518} +02/24/2022 08:46:11 - INFO - codeparrot_training - Step 5567: {'lr': 0.0004932179644444274, 'samples': 2850816, 'steps': 5567, 'loss/train': 2.513721227645874} +02/24/2022 08:46:14 - INFO - codeparrot_training - Step 5568: {'lr': 0.0004932141785554676, 'samples': 2851328, 'steps': 5568, 'loss/train': 2.5890614986419678} +02/24/2022 08:46:20 - INFO - codeparrot_training - Step 5569: {'lr': 0.0004932103916246553, 'samples': 2851840, 'steps': 5569, 'loss/train': 1.7538347244262695} +02/24/2022 08:46:25 - INFO - codeparrot_training - Step 5570: {'lr': 0.000493206603652007, 'samples': 2852352, 'steps': 5570, 'loss/train': 1.822554588317871} +02/24/2022 08:46:28 - INFO - codeparrot_training - Step 5571: {'lr': 0.0004932028146375388, 'samples': 2852864, 'steps': 5571, 'loss/train': 1.128174901008606} +02/24/2022 08:46:34 - INFO - codeparrot_training - Step 5572: {'lr': 0.000493199024581267, 'samples': 2853376, 'steps': 5572, 'loss/train': 2.5253806114196777} +02/24/2022 08:46:38 - INFO - codeparrot_training - Step 5573: {'lr': 0.0004931952334832077, 'samples': 2853888, 'steps': 5573, 'loss/train': 0.4618869423866272} +02/24/2022 08:46:44 - INFO - codeparrot_training - Step 5574: {'lr': 0.0004931914413433773, 'samples': 2854400, 'steps': 5574, 'loss/train': 3.4767470359802246} +02/24/2022 08:46:47 - INFO - codeparrot_training - Step 5575: {'lr': 0.0004931876481617921, 'samples': 2854912, 'steps': 5575, 'loss/train': 2.4454307556152344} +02/24/2022 08:46:53 - INFO - codeparrot_training - Step 5576: {'lr': 0.0004931838539384681, 'samples': 2855424, 'steps': 5576, 'loss/train': 1.6719249486923218} +02/24/2022 08:46:56 - INFO - codeparrot_training - Step 5577: {'lr': 0.0004931800586734218, 'samples': 2855936, 'steps': 5577, 'loss/train': 2.5418787002563477} +02/24/2022 08:47:02 - INFO - codeparrot_training - Step 5578: {'lr': 0.0004931762623666692, 'samples': 2856448, 'steps': 5578, 'loss/train': 1.7851618528366089} +02/24/2022 08:47:05 - INFO - codeparrot_training - Step 5579: {'lr': 0.0004931724650182268, 'samples': 2856960, 'steps': 5579, 'loss/train': 0.44172510504722595} +02/24/2022 08:47:11 - INFO - codeparrot_training - Step 5580: {'lr': 0.0004931686666281108, 'samples': 2857472, 'steps': 5580, 'loss/train': 2.741643190383911} +02/24/2022 08:47:14 - INFO - codeparrot_training - Step 5581: {'lr': 0.0004931648671963373, 'samples': 2857984, 'steps': 5581, 'loss/train': 2.207061767578125} +02/24/2022 08:47:20 - INFO - codeparrot_training - Step 5582: {'lr': 0.000493161066722923, 'samples': 2858496, 'steps': 5582, 'loss/train': 1.6693904399871826} +02/24/2022 08:47:23 - INFO - codeparrot_training - Step 5583: {'lr': 0.0004931572652078837, 'samples': 2859008, 'steps': 5583, 'loss/train': 1.9169400930404663} +02/24/2022 08:47:29 - INFO - codeparrot_training - Step 5584: {'lr': 0.0004931534626512359, 'samples': 2859520, 'steps': 5584, 'loss/train': 3.8445916175842285} +02/24/2022 08:47:33 - INFO - codeparrot_training - Step 5585: {'lr': 0.0004931496590529959, 'samples': 2860032, 'steps': 5585, 'loss/train': 2.5335116386413574} +02/24/2022 08:47:38 - INFO - codeparrot_training - Step 5586: {'lr': 0.0004931458544131799, 'samples': 2860544, 'steps': 5586, 'loss/train': 3.1453921794891357} +02/24/2022 08:47:42 - INFO - codeparrot_training - Step 5587: {'lr': 0.0004931420487318044, 'samples': 2861056, 'steps': 5587, 'loss/train': 2.750887155532837} +02/24/2022 08:47:47 - INFO - codeparrot_training - Step 5588: {'lr': 0.0004931382420088855, 'samples': 2861568, 'steps': 5588, 'loss/train': 0.7489635348320007} +02/24/2022 08:47:51 - INFO - codeparrot_training - Step 5589: {'lr': 0.0004931344342444396, 'samples': 2862080, 'steps': 5589, 'loss/train': 2.8323230743408203} +02/24/2022 08:47:56 - INFO - codeparrot_training - Step 5590: {'lr': 0.000493130625438483, 'samples': 2862592, 'steps': 5590, 'loss/train': 1.8913551568984985} +02/24/2022 08:48:00 - INFO - codeparrot_training - Step 5591: {'lr': 0.000493126815591032, 'samples': 2863104, 'steps': 5591, 'loss/train': 3.3016164302825928} +02/24/2022 08:48:05 - INFO - codeparrot_training - Step 5592: {'lr': 0.0004931230047021028, 'samples': 2863616, 'steps': 5592, 'loss/train': 1.8535507917404175} +02/24/2022 08:48:09 - INFO - codeparrot_training - Step 5593: {'lr': 0.000493119192771712, 'samples': 2864128, 'steps': 5593, 'loss/train': 2.1000936031341553} +02/24/2022 08:48:15 - INFO - codeparrot_training - Step 5594: {'lr': 0.0004931153797998757, 'samples': 2864640, 'steps': 5594, 'loss/train': 1.4673081636428833} +02/24/2022 08:48:18 - INFO - codeparrot_training - Step 5595: {'lr': 0.0004931115657866103, 'samples': 2865152, 'steps': 5595, 'loss/train': 1.965951681137085} +02/24/2022 08:48:24 - INFO - codeparrot_training - Step 5596: {'lr': 0.0004931077507319322, 'samples': 2865664, 'steps': 5596, 'loss/train': 1.1762901544570923} +02/24/2022 08:48:27 - INFO - codeparrot_training - Step 5597: {'lr': 0.0004931039346358577, 'samples': 2866176, 'steps': 5597, 'loss/train': 2.6841182708740234} +02/24/2022 08:48:33 - INFO - codeparrot_training - Step 5598: {'lr': 0.0004931001174984032, 'samples': 2866688, 'steps': 5598, 'loss/train': 2.2216854095458984} +02/24/2022 08:48:36 - INFO - codeparrot_training - Step 5599: {'lr': 0.0004930962993195848, 'samples': 2867200, 'steps': 5599, 'loss/train': 2.5812947750091553} +02/24/2022 08:48:42 - INFO - codeparrot_training - Step 5600: {'lr': 0.0004930924800994192, 'samples': 2867712, 'steps': 5600, 'loss/train': 2.8477280139923096} +02/24/2022 08:48:45 - INFO - codeparrot_training - Step 5601: {'lr': 0.0004930886598379225, 'samples': 2868224, 'steps': 5601, 'loss/train': 1.5769703388214111} +02/24/2022 08:48:51 - INFO - codeparrot_training - Step 5602: {'lr': 0.0004930848385351112, 'samples': 2868736, 'steps': 5602, 'loss/train': 3.0094215869903564} +02/24/2022 08:48:54 - INFO - codeparrot_training - Step 5603: {'lr': 0.0004930810161910017, 'samples': 2869248, 'steps': 5603, 'loss/train': 2.762162208557129} +02/24/2022 08:49:00 - INFO - codeparrot_training - Step 5604: {'lr': 0.0004930771928056102, 'samples': 2869760, 'steps': 5604, 'loss/train': 2.9988224506378174} +02/24/2022 08:49:04 - INFO - codeparrot_training - Step 5605: {'lr': 0.0004930733683789533, 'samples': 2870272, 'steps': 5605, 'loss/train': 2.5743329524993896} +02/24/2022 08:49:09 - INFO - codeparrot_training - Step 5606: {'lr': 0.0004930695429110473, 'samples': 2870784, 'steps': 5606, 'loss/train': 2.397738218307495} +02/24/2022 08:49:13 - INFO - codeparrot_training - Step 5607: {'lr': 0.0004930657164019085, 'samples': 2871296, 'steps': 5607, 'loss/train': 3.0244300365448} +02/24/2022 08:49:18 - INFO - codeparrot_training - Step 5608: {'lr': 0.0004930618888515534, 'samples': 2871808, 'steps': 5608, 'loss/train': 2.8468844890594482} +02/24/2022 08:49:22 - INFO - codeparrot_training - Step 5609: {'lr': 0.0004930580602599983, 'samples': 2872320, 'steps': 5609, 'loss/train': 1.5977821350097656} +02/24/2022 08:49:28 - INFO - codeparrot_training - Step 5610: {'lr': 0.0004930542306272596, 'samples': 2872832, 'steps': 5610, 'loss/train': 2.6236846446990967} +02/24/2022 08:49:31 - INFO - codeparrot_training - Step 5611: {'lr': 0.0004930503999533538, 'samples': 2873344, 'steps': 5611, 'loss/train': 2.5708155632019043} +02/24/2022 08:49:37 - INFO - codeparrot_training - Step 5612: {'lr': 0.0004930465682382973, 'samples': 2873856, 'steps': 5612, 'loss/train': 1.275734305381775} +02/24/2022 08:49:40 - INFO - codeparrot_training - Step 5613: {'lr': 0.0004930427354821064, 'samples': 2874368, 'steps': 5613, 'loss/train': 1.1773325204849243} +02/24/2022 08:49:46 - INFO - codeparrot_training - Step 5614: {'lr': 0.0004930389016847977, 'samples': 2874880, 'steps': 5614, 'loss/train': 1.0492737293243408} +02/24/2022 08:49:49 - INFO - codeparrot_training - Step 5615: {'lr': 0.0004930350668463874, 'samples': 2875392, 'steps': 5615, 'loss/train': 2.9726195335388184} +02/24/2022 08:49:55 - INFO - codeparrot_training - Step 5616: {'lr': 0.0004930312309668922, 'samples': 2875904, 'steps': 5616, 'loss/train': 1.563391923904419} +02/24/2022 08:49:58 - INFO - codeparrot_training - Step 5617: {'lr': 0.0004930273940463283, 'samples': 2876416, 'steps': 5617, 'loss/train': 2.0094175338745117} +02/24/2022 08:50:04 - INFO - codeparrot_training - Step 5618: {'lr': 0.0004930235560847121, 'samples': 2876928, 'steps': 5618, 'loss/train': 2.9715640544891357} +02/24/2022 08:50:07 - INFO - codeparrot_training - Step 5619: {'lr': 0.0004930197170820603, 'samples': 2877440, 'steps': 5619, 'loss/train': 2.5607075691223145} +02/24/2022 08:50:13 - INFO - codeparrot_training - Step 5620: {'lr': 0.0004930158770383891, 'samples': 2877952, 'steps': 5620, 'loss/train': 0.5485643744468689} +02/24/2022 08:50:17 - INFO - codeparrot_training - Step 5621: {'lr': 0.0004930120359537153, 'samples': 2878464, 'steps': 5621, 'loss/train': 2.1816604137420654} +02/24/2022 08:50:22 - INFO - codeparrot_training - Step 5622: {'lr': 0.0004930081938280548, 'samples': 2878976, 'steps': 5622, 'loss/train': 1.880281686782837} +02/24/2022 08:50:26 - INFO - codeparrot_training - Step 5623: {'lr': 0.0004930043506614245, 'samples': 2879488, 'steps': 5623, 'loss/train': 2.9321084022521973} +02/24/2022 08:50:31 - INFO - codeparrot_training - Step 5624: {'lr': 0.0004930005064538406, 'samples': 2880000, 'steps': 5624, 'loss/train': 3.324977159500122} +02/24/2022 08:50:37 - INFO - codeparrot_training - Step 5625: {'lr': 0.0004929966612053199, 'samples': 2880512, 'steps': 5625, 'loss/train': 2.425710678100586} +02/24/2022 08:50:40 - INFO - codeparrot_training - Step 5626: {'lr': 0.0004929928149158785, 'samples': 2881024, 'steps': 5626, 'loss/train': 1.9112578630447388} +02/24/2022 08:50:46 - INFO - codeparrot_training - Step 5627: {'lr': 0.0004929889675855332, 'samples': 2881536, 'steps': 5627, 'loss/train': 2.19138240814209} +02/24/2022 08:50:49 - INFO - codeparrot_training - Step 5628: {'lr': 0.0004929851192143001, 'samples': 2882048, 'steps': 5628, 'loss/train': 2.4410400390625} +02/24/2022 08:50:56 - INFO - codeparrot_training - Step 5629: {'lr': 0.0004929812698021961, 'samples': 2882560, 'steps': 5629, 'loss/train': 3.1160783767700195} +02/24/2022 08:50:59 - INFO - codeparrot_training - Step 5630: {'lr': 0.0004929774193492373, 'samples': 2883072, 'steps': 5630, 'loss/train': 2.5817558765411377} +02/24/2022 08:51:05 - INFO - codeparrot_training - Step 5631: {'lr': 0.0004929735678554406, 'samples': 2883584, 'steps': 5631, 'loss/train': 2.8050694465637207} +02/24/2022 08:51:08 - INFO - codeparrot_training - Step 5632: {'lr': 0.0004929697153208221, 'samples': 2884096, 'steps': 5632, 'loss/train': 1.891499400138855} +02/24/2022 08:51:14 - INFO - codeparrot_training - Step 5633: {'lr': 0.0004929658617453986, 'samples': 2884608, 'steps': 5633, 'loss/train': 2.8051838874816895} +02/24/2022 08:51:17 - INFO - codeparrot_training - Step 5634: {'lr': 0.0004929620071291865, 'samples': 2885120, 'steps': 5634, 'loss/train': 2.0820560455322266} +02/24/2022 08:51:23 - INFO - codeparrot_training - Step 5635: {'lr': 0.0004929581514722023, 'samples': 2885632, 'steps': 5635, 'loss/train': 2.417781352996826} +02/24/2022 08:51:26 - INFO - codeparrot_training - Step 5636: {'lr': 0.0004929542947744625, 'samples': 2886144, 'steps': 5636, 'loss/train': 2.3993985652923584} +02/24/2022 08:51:32 - INFO - codeparrot_training - Step 5637: {'lr': 0.0004929504370359837, 'samples': 2886656, 'steps': 5637, 'loss/train': 2.83490252494812} +02/24/2022 08:51:35 - INFO - codeparrot_training - Step 5638: {'lr': 0.0004929465782567824, 'samples': 2887168, 'steps': 5638, 'loss/train': 3.0768420696258545} +02/24/2022 08:51:41 - INFO - codeparrot_training - Step 5639: {'lr': 0.000492942718436875, 'samples': 2887680, 'steps': 5639, 'loss/train': 2.4285888671875} +02/24/2022 08:51:44 - INFO - codeparrot_training - Step 5640: {'lr': 0.0004929388575762782, 'samples': 2888192, 'steps': 5640, 'loss/train': 1.8348264694213867} +02/24/2022 08:51:50 - INFO - codeparrot_training - Step 5641: {'lr': 0.0004929349956750085, 'samples': 2888704, 'steps': 5641, 'loss/train': 2.9415781497955322} +02/24/2022 08:51:54 - INFO - codeparrot_training - Step 5642: {'lr': 0.0004929311327330823, 'samples': 2889216, 'steps': 5642, 'loss/train': 1.738094687461853} +02/24/2022 08:51:59 - INFO - codeparrot_training - Step 5643: {'lr': 0.0004929272687505163, 'samples': 2889728, 'steps': 5643, 'loss/train': 3.554286003112793} +02/24/2022 08:52:03 - INFO - codeparrot_training - Step 5644: {'lr': 0.0004929234037273271, 'samples': 2890240, 'steps': 5644, 'loss/train': 2.46764874458313} +02/24/2022 08:52:08 - INFO - codeparrot_training - Step 5645: {'lr': 0.0004929195376635311, 'samples': 2890752, 'steps': 5645, 'loss/train': 2.29890513420105} +02/24/2022 08:52:12 - INFO - codeparrot_training - Step 5646: {'lr': 0.000492915670559145, 'samples': 2891264, 'steps': 5646, 'loss/train': 3.413724899291992} +02/24/2022 08:52:17 - INFO - codeparrot_training - Step 5647: {'lr': 0.0004929118024141853, 'samples': 2891776, 'steps': 5647, 'loss/train': 1.6830378770828247} +02/24/2022 08:52:21 - INFO - codeparrot_training - Step 5648: {'lr': 0.0004929079332286685, 'samples': 2892288, 'steps': 5648, 'loss/train': 2.7947282791137695} +02/24/2022 08:52:26 - INFO - codeparrot_training - Step 5649: {'lr': 0.0004929040630026112, 'samples': 2892800, 'steps': 5649, 'loss/train': 2.3367056846618652} +02/24/2022 08:52:30 - INFO - codeparrot_training - Step 5650: {'lr': 0.0004929001917360302, 'samples': 2893312, 'steps': 5650, 'loss/train': 2.1625170707702637} +02/24/2022 08:52:35 - INFO - codeparrot_training - Step 5651: {'lr': 0.0004928963194289419, 'samples': 2893824, 'steps': 5651, 'loss/train': 2.3080220222473145} +02/24/2022 08:52:39 - INFO - codeparrot_training - Step 5652: {'lr': 0.0004928924460813627, 'samples': 2894336, 'steps': 5652, 'loss/train': 2.702507257461548} +02/24/2022 08:52:44 - INFO - codeparrot_training - Step 5653: {'lr': 0.0004928885716933096, 'samples': 2894848, 'steps': 5653, 'loss/train': 3.0107593536376953} +02/24/2022 08:52:48 - INFO - codeparrot_training - Step 5654: {'lr': 0.0004928846962647988, 'samples': 2895360, 'steps': 5654, 'loss/train': 2.6054599285125732} +02/24/2022 08:52:54 - INFO - codeparrot_training - Step 5655: {'lr': 0.0004928808197958472, 'samples': 2895872, 'steps': 5655, 'loss/train': 1.2903093099594116} +02/24/2022 08:52:57 - INFO - codeparrot_training - Step 5656: {'lr': 0.0004928769422864712, 'samples': 2896384, 'steps': 5656, 'loss/train': 2.8213772773742676} +02/24/2022 08:53:03 - INFO - codeparrot_training - Step 5657: {'lr': 0.0004928730637366877, 'samples': 2896896, 'steps': 5657, 'loss/train': 1.6182501316070557} +02/24/2022 08:53:06 - INFO - codeparrot_training - Step 5658: {'lr': 0.000492869184146513, 'samples': 2897408, 'steps': 5658, 'loss/train': 1.7428618669509888} +02/24/2022 08:53:12 - INFO - codeparrot_training - Step 5659: {'lr': 0.0004928653035159638, 'samples': 2897920, 'steps': 5659, 'loss/train': 1.595118522644043} +02/24/2022 08:53:16 - INFO - codeparrot_training - Step 5660: {'lr': 0.0004928614218450568, 'samples': 2898432, 'steps': 5660, 'loss/train': 2.697014570236206} +02/24/2022 08:53:21 - INFO - codeparrot_training - Step 5661: {'lr': 0.0004928575391338085, 'samples': 2898944, 'steps': 5661, 'loss/train': 1.9353317022323608} +02/24/2022 08:53:25 - INFO - codeparrot_training - Step 5662: {'lr': 0.0004928536553822357, 'samples': 2899456, 'steps': 5662, 'loss/train': 3.699150800704956} +02/24/2022 08:53:30 - INFO - codeparrot_training - Step 5663: {'lr': 0.0004928497705903549, 'samples': 2899968, 'steps': 5663, 'loss/train': 3.097060441970825} +02/24/2022 08:53:34 - INFO - codeparrot_training - Step 5664: {'lr': 0.0004928458847581828, 'samples': 2900480, 'steps': 5664, 'loss/train': 2.043487548828125} +02/24/2022 08:53:39 - INFO - codeparrot_training - Step 5665: {'lr': 0.0004928419978857361, 'samples': 2900992, 'steps': 5665, 'loss/train': 2.7488622665405273} +02/24/2022 08:53:43 - INFO - codeparrot_training - Step 5666: {'lr': 0.0004928381099730314, 'samples': 2901504, 'steps': 5666, 'loss/train': 2.8547468185424805} +02/24/2022 08:53:49 - INFO - codeparrot_training - Step 5667: {'lr': 0.0004928342210200853, 'samples': 2902016, 'steps': 5667, 'loss/train': 1.4548543691635132} +02/24/2022 08:53:53 - INFO - codeparrot_training - Step 5668: {'lr': 0.0004928303310269145, 'samples': 2902528, 'steps': 5668, 'loss/train': 2.502410411834717} +02/24/2022 08:53:58 - INFO - codeparrot_training - Step 5669: {'lr': 0.0004928264399935357, 'samples': 2903040, 'steps': 5669, 'loss/train': 2.077807664871216} +02/24/2022 08:54:01 - INFO - codeparrot_training - Step 5670: {'lr': 0.0004928225479199655, 'samples': 2903552, 'steps': 5670, 'loss/train': 2.462615728378296} +02/24/2022 08:54:07 - INFO - codeparrot_training - Step 5671: {'lr': 0.0004928186548062206, 'samples': 2904064, 'steps': 5671, 'loss/train': 3.0940253734588623} +02/24/2022 08:54:10 - INFO - codeparrot_training - Step 5672: {'lr': 0.0004928147606523179, 'samples': 2904576, 'steps': 5672, 'loss/train': 2.4129199981689453} +02/24/2022 08:54:16 - INFO - codeparrot_training - Step 5673: {'lr': 0.0004928108654582736, 'samples': 2905088, 'steps': 5673, 'loss/train': 3.663492202758789} +02/24/2022 08:54:19 - INFO - codeparrot_training - Step 5674: {'lr': 0.0004928069692241048, 'samples': 2905600, 'steps': 5674, 'loss/train': 2.200190544128418} +02/24/2022 08:54:25 - INFO - codeparrot_training - Step 5675: {'lr': 0.000492803071949828, 'samples': 2906112, 'steps': 5675, 'loss/train': 3.141918659210205} +02/24/2022 08:54:28 - INFO - codeparrot_training - Step 5676: {'lr': 0.0004927991736354599, 'samples': 2906624, 'steps': 5676, 'loss/train': 2.6103997230529785} +02/24/2022 08:54:35 - INFO - codeparrot_training - Step 5677: {'lr': 0.0004927952742810173, 'samples': 2907136, 'steps': 5677, 'loss/train': 3.0645596981048584} +02/24/2022 08:54:38 - INFO - codeparrot_training - Step 5678: {'lr': 0.0004927913738865167, 'samples': 2907648, 'steps': 5678, 'loss/train': 2.324155569076538} +02/24/2022 08:54:43 - INFO - codeparrot_training - Step 5679: {'lr': 0.0004927874724519751, 'samples': 2908160, 'steps': 5679, 'loss/train': 2.973525047302246} +02/24/2022 08:54:47 - INFO - codeparrot_training - Step 5680: {'lr': 0.000492783569977409, 'samples': 2908672, 'steps': 5680, 'loss/train': 1.9238383769989014} +02/24/2022 08:54:52 - INFO - codeparrot_training - Step 5681: {'lr': 0.0004927796664628353, 'samples': 2909184, 'steps': 5681, 'loss/train': 2.038911819458008} +02/24/2022 08:54:56 - INFO - codeparrot_training - Step 5682: {'lr': 0.0004927757619082704, 'samples': 2909696, 'steps': 5682, 'loss/train': 1.9652752876281738} +02/24/2022 08:55:01 - INFO - codeparrot_training - Step 5683: {'lr': 0.0004927718563137313, 'samples': 2910208, 'steps': 5683, 'loss/train': 2.848663330078125} +02/24/2022 08:55:05 - INFO - codeparrot_training - Step 5684: {'lr': 0.0004927679496792347, 'samples': 2910720, 'steps': 5684, 'loss/train': 2.696091413497925} +02/24/2022 08:55:10 - INFO - codeparrot_training - Step 5685: {'lr': 0.0004927640420047973, 'samples': 2911232, 'steps': 5685, 'loss/train': 0.9404380917549133} +02/24/2022 08:55:14 - INFO - codeparrot_training - Step 5686: {'lr': 0.0004927601332904358, 'samples': 2911744, 'steps': 5686, 'loss/train': 2.1705567836761475} +02/24/2022 08:55:20 - INFO - codeparrot_training - Step 5687: {'lr': 0.0004927562235361669, 'samples': 2912256, 'steps': 5687, 'loss/train': 2.154585123062134} +02/24/2022 08:55:24 - INFO - codeparrot_training - Step 5688: {'lr': 0.0004927523127420076, 'samples': 2912768, 'steps': 5688, 'loss/train': 2.619330644607544} +02/24/2022 08:55:29 - INFO - codeparrot_training - Step 5689: {'lr': 0.0004927484009079743, 'samples': 2913280, 'steps': 5689, 'loss/train': 2.8479294776916504} +02/24/2022 08:55:33 - INFO - codeparrot_training - Step 5690: {'lr': 0.000492744488034084, 'samples': 2913792, 'steps': 5690, 'loss/train': 2.0539581775665283} +02/24/2022 08:55:38 - INFO - codeparrot_training - Step 5691: {'lr': 0.0004927405741203534, 'samples': 2914304, 'steps': 5691, 'loss/train': 3.403379201889038} +02/24/2022 08:55:42 - INFO - codeparrot_training - Step 5692: {'lr': 0.0004927366591667993, 'samples': 2914816, 'steps': 5692, 'loss/train': 1.4618353843688965} +02/24/2022 08:55:47 - INFO - codeparrot_training - Step 5693: {'lr': 0.0004927327431734383, 'samples': 2915328, 'steps': 5693, 'loss/train': 2.345198392868042} +02/24/2022 08:55:50 - INFO - codeparrot_training - Step 5694: {'lr': 0.0004927288261402875, 'samples': 2915840, 'steps': 5694, 'loss/train': 1.8501535654067993} +02/24/2022 08:55:56 - INFO - codeparrot_training - Step 5695: {'lr': 0.0004927249080673633, 'samples': 2916352, 'steps': 5695, 'loss/train': 2.8291263580322266} +02/24/2022 08:55:59 - INFO - codeparrot_training - Step 5696: {'lr': 0.0004927209889546828, 'samples': 2916864, 'steps': 5696, 'loss/train': 2.2671685218811035} +02/24/2022 08:56:05 - INFO - codeparrot_training - Step 5697: {'lr': 0.0004927170688022625, 'samples': 2917376, 'steps': 5697, 'loss/train': 1.8576054573059082} +02/24/2022 08:56:08 - INFO - codeparrot_training - Step 5698: {'lr': 0.0004927131476101195, 'samples': 2917888, 'steps': 5698, 'loss/train': 2.2511613368988037} +02/24/2022 08:56:14 - INFO - codeparrot_training - Step 5699: {'lr': 0.0004927092253782704, 'samples': 2918400, 'steps': 5699, 'loss/train': 3.254352331161499} +02/24/2022 08:56:17 - INFO - codeparrot_training - Step 5700: {'lr': 0.0004927053021067321, 'samples': 2918912, 'steps': 5700, 'loss/train': 2.630722999572754} +02/24/2022 08:56:23 - INFO - codeparrot_training - Step 5701: {'lr': 0.0004927013777955212, 'samples': 2919424, 'steps': 5701, 'loss/train': 2.9244372844696045} +02/24/2022 08:56:26 - INFO - codeparrot_training - Step 5702: {'lr': 0.0004926974524446548, 'samples': 2919936, 'steps': 5702, 'loss/train': 2.6032893657684326} +02/24/2022 08:56:33 - INFO - codeparrot_training - Step 5703: {'lr': 0.0004926935260541496, 'samples': 2920448, 'steps': 5703, 'loss/train': 2.5975043773651123} +02/24/2022 08:56:37 - INFO - codeparrot_training - Step 5704: {'lr': 0.0004926895986240222, 'samples': 2920960, 'steps': 5704, 'loss/train': 3.2763800621032715} +02/24/2022 08:56:42 - INFO - codeparrot_training - Step 5705: {'lr': 0.0004926856701542898, 'samples': 2921472, 'steps': 5705, 'loss/train': 1.6149892807006836} +02/24/2022 08:56:46 - INFO - codeparrot_training - Step 5706: {'lr': 0.000492681740644969, 'samples': 2921984, 'steps': 5706, 'loss/train': 3.00669264793396} +02/24/2022 08:56:51 - INFO - codeparrot_training - Step 5707: {'lr': 0.0004926778100960767, 'samples': 2922496, 'steps': 5707, 'loss/train': 2.7209267616271973} +02/24/2022 08:56:55 - INFO - codeparrot_training - Step 5708: {'lr': 0.0004926738785076297, 'samples': 2923008, 'steps': 5708, 'loss/train': 1.271486759185791} +02/24/2022 08:57:00 - INFO - codeparrot_training - Step 5709: {'lr': 0.0004926699458796448, 'samples': 2923520, 'steps': 5709, 'loss/train': 2.8055872917175293} +02/24/2022 08:57:04 - INFO - codeparrot_training - Step 5710: {'lr': 0.0004926660122121391, 'samples': 2924032, 'steps': 5710, 'loss/train': 1.2185041904449463} +02/24/2022 08:57:09 - INFO - codeparrot_training - Step 5711: {'lr': 0.0004926620775051291, 'samples': 2924544, 'steps': 5711, 'loss/train': 1.8776848316192627} +02/24/2022 08:57:13 - INFO - codeparrot_training - Step 5712: {'lr': 0.0004926581417586318, 'samples': 2925056, 'steps': 5712, 'loss/train': 1.6651860475540161} +02/24/2022 08:57:19 - INFO - codeparrot_training - Step 5713: {'lr': 0.0004926542049726642, 'samples': 2925568, 'steps': 5713, 'loss/train': 3.009711503982544} +02/24/2022 08:57:22 - INFO - codeparrot_training - Step 5714: {'lr': 0.0004926502671472429, 'samples': 2926080, 'steps': 5714, 'loss/train': 3.661102294921875} +02/24/2022 08:57:28 - INFO - codeparrot_training - Step 5715: {'lr': 0.000492646328282385, 'samples': 2926592, 'steps': 5715, 'loss/train': 3.2587244510650635} +02/24/2022 08:57:31 - INFO - codeparrot_training - Step 5716: {'lr': 0.0004926423883781073, 'samples': 2927104, 'steps': 5716, 'loss/train': 2.594268321990967} +02/24/2022 08:57:37 - INFO - codeparrot_training - Step 5717: {'lr': 0.0004926384474344265, 'samples': 2927616, 'steps': 5717, 'loss/train': 2.8316903114318848} +02/24/2022 08:57:40 - INFO - codeparrot_training - Step 5718: {'lr': 0.0004926345054513598, 'samples': 2928128, 'steps': 5718, 'loss/train': 2.4430084228515625} +02/24/2022 08:57:46 - INFO - codeparrot_training - Step 5719: {'lr': 0.0004926305624289238, 'samples': 2928640, 'steps': 5719, 'loss/train': 2.468344211578369} +02/24/2022 08:57:49 - INFO - codeparrot_training - Step 5720: {'lr': 0.0004926266183671356, 'samples': 2929152, 'steps': 5720, 'loss/train': 2.630418539047241} +02/24/2022 08:57:55 - INFO - codeparrot_training - Step 5721: {'lr': 0.000492622673266012, 'samples': 2929664, 'steps': 5721, 'loss/train': 0.7120659351348877} +02/24/2022 08:58:01 - INFO - codeparrot_training - Step 5722: {'lr': 0.0004926187271255698, 'samples': 2930176, 'steps': 5722, 'loss/train': 1.9260408878326416} +02/24/2022 08:58:04 - INFO - codeparrot_training - Step 5723: {'lr': 0.0004926147799458262, 'samples': 2930688, 'steps': 5723, 'loss/train': 1.9589388370513916} +02/24/2022 08:58:10 - INFO - codeparrot_training - Step 5724: {'lr': 0.0004926108317267979, 'samples': 2931200, 'steps': 5724, 'loss/train': 2.889728546142578} +02/24/2022 08:58:13 - INFO - codeparrot_training - Step 5725: {'lr': 0.0004926068824685017, 'samples': 2931712, 'steps': 5725, 'loss/train': 2.8991611003875732} +02/24/2022 08:58:19 - INFO - codeparrot_training - Step 5726: {'lr': 0.0004926029321709548, 'samples': 2932224, 'steps': 5726, 'loss/train': 1.6453745365142822} +02/24/2022 08:58:22 - INFO - codeparrot_training - Step 5727: {'lr': 0.0004925989808341738, 'samples': 2932736, 'steps': 5727, 'loss/train': 2.4324159622192383} +02/24/2022 08:58:28 - INFO - codeparrot_training - Step 5728: {'lr': 0.0004925950284581759, 'samples': 2933248, 'steps': 5728, 'loss/train': 2.519422769546509} +02/24/2022 08:58:31 - INFO - codeparrot_training - Step 5729: {'lr': 0.0004925910750429779, 'samples': 2933760, 'steps': 5729, 'loss/train': 3.4216084480285645} +02/24/2022 08:58:37 - INFO - codeparrot_training - Step 5730: {'lr': 0.0004925871205885968, 'samples': 2934272, 'steps': 5730, 'loss/train': 3.5515549182891846} +02/24/2022 08:58:40 - INFO - codeparrot_training - Step 5731: {'lr': 0.0004925831650950495, 'samples': 2934784, 'steps': 5731, 'loss/train': 2.1032602787017822} +02/24/2022 08:58:46 - INFO - codeparrot_training - Step 5732: {'lr': 0.000492579208562353, 'samples': 2935296, 'steps': 5732, 'loss/train': 1.753894329071045} +02/24/2022 08:58:49 - INFO - codeparrot_training - Step 5733: {'lr': 0.0004925752509905241, 'samples': 2935808, 'steps': 5733, 'loss/train': 3.2489020824432373} +02/24/2022 08:58:55 - INFO - codeparrot_training - Step 5734: {'lr': 0.0004925712923795799, 'samples': 2936320, 'steps': 5734, 'loss/train': 3.059749126434326} +02/24/2022 08:58:59 - INFO - codeparrot_training - Step 5735: {'lr': 0.0004925673327295374, 'samples': 2936832, 'steps': 5735, 'loss/train': 2.2254936695098877} +02/24/2022 08:59:04 - INFO - codeparrot_training - Step 5736: {'lr': 0.0004925633720404132, 'samples': 2937344, 'steps': 5736, 'loss/train': 2.7345802783966064} +02/24/2022 08:59:08 - INFO - codeparrot_training - Step 5737: {'lr': 0.0004925594103122248, 'samples': 2937856, 'steps': 5737, 'loss/train': 2.271477222442627} +02/24/2022 08:59:13 - INFO - codeparrot_training - Step 5738: {'lr': 0.0004925554475449888, 'samples': 2938368, 'steps': 5738, 'loss/train': 1.21036696434021} +02/24/2022 08:59:17 - INFO - codeparrot_training - Step 5739: {'lr': 0.0004925514837387223, 'samples': 2938880, 'steps': 5739, 'loss/train': 3.161942720413208} +02/24/2022 08:59:22 - INFO - codeparrot_training - Step 5740: {'lr': 0.0004925475188934423, 'samples': 2939392, 'steps': 5740, 'loss/train': 1.8844009637832642} +02/24/2022 08:59:26 - INFO - codeparrot_training - Step 5741: {'lr': 0.0004925435530091656, 'samples': 2939904, 'steps': 5741, 'loss/train': 2.592146635055542} +02/24/2022 08:59:31 - INFO - codeparrot_training - Step 5742: {'lr': 0.0004925395860859096, 'samples': 2940416, 'steps': 5742, 'loss/train': 2.1591148376464844} +02/24/2022 08:59:35 - INFO - codeparrot_training - Step 5743: {'lr': 0.0004925356181236908, 'samples': 2940928, 'steps': 5743, 'loss/train': 2.389815330505371} +02/24/2022 08:59:40 - INFO - codeparrot_training - Step 5744: {'lr': 0.0004925316491225265, 'samples': 2941440, 'steps': 5744, 'loss/train': 2.967702627182007} +02/24/2022 08:59:44 - INFO - codeparrot_training - Step 5745: {'lr': 0.0004925276790824336, 'samples': 2941952, 'steps': 5745, 'loss/train': 2.732590913772583} +02/24/2022 08:59:49 - INFO - codeparrot_training - Step 5746: {'lr': 0.0004925237080034291, 'samples': 2942464, 'steps': 5746, 'loss/train': 3.098123788833618} +02/24/2022 08:59:53 - INFO - codeparrot_training - Step 5747: {'lr': 0.0004925197358855301, 'samples': 2942976, 'steps': 5747, 'loss/train': 1.696014165878296} +02/24/2022 08:59:59 - INFO - codeparrot_training - Step 5748: {'lr': 0.0004925157627287536, 'samples': 2943488, 'steps': 5748, 'loss/train': 1.1901603937149048} +02/24/2022 09:00:02 - INFO - codeparrot_training - Step 5749: {'lr': 0.0004925117885331166, 'samples': 2944000, 'steps': 5749, 'loss/train': 2.0456440448760986} +02/24/2022 09:00:08 - INFO - codeparrot_training - Step 5750: {'lr': 0.000492507813298636, 'samples': 2944512, 'steps': 5750, 'loss/train': 1.910574197769165} +02/24/2022 09:00:11 - INFO - codeparrot_training - Step 5751: {'lr': 0.000492503837025329, 'samples': 2945024, 'steps': 5751, 'loss/train': 2.3957343101501465} +02/24/2022 09:00:17 - INFO - codeparrot_training - Step 5752: {'lr': 0.0004924998597132125, 'samples': 2945536, 'steps': 5752, 'loss/train': 2.528535842895508} +02/24/2022 09:00:21 - INFO - codeparrot_training - Step 5753: {'lr': 0.0004924958813623037, 'samples': 2946048, 'steps': 5753, 'loss/train': 2.163933753967285} +02/24/2022 09:00:26 - INFO - codeparrot_training - Step 5754: {'lr': 0.0004924919019726195, 'samples': 2946560, 'steps': 5754, 'loss/train': 2.6796388626098633} +02/24/2022 09:00:29 - INFO - codeparrot_training - Step 5755: {'lr': 0.000492487921544177, 'samples': 2947072, 'steps': 5755, 'loss/train': 2.167558193206787} +02/24/2022 09:00:35 - INFO - codeparrot_training - Step 5756: {'lr': 0.0004924839400769932, 'samples': 2947584, 'steps': 5756, 'loss/train': 2.3096165657043457} +02/24/2022 09:00:38 - INFO - codeparrot_training - Step 5757: {'lr': 0.0004924799575710852, 'samples': 2948096, 'steps': 5757, 'loss/train': 1.3764539957046509} +02/24/2022 09:00:45 - INFO - codeparrot_training - Step 5758: {'lr': 0.0004924759740264701, 'samples': 2948608, 'steps': 5758, 'loss/train': 2.897132635116577} +02/24/2022 09:00:48 - INFO - codeparrot_training - Step 5759: {'lr': 0.000492471989443165, 'samples': 2949120, 'steps': 5759, 'loss/train': 2.8032610416412354} +02/24/2022 09:00:54 - INFO - codeparrot_training - Step 5760: {'lr': 0.0004924680038211868, 'samples': 2949632, 'steps': 5760, 'loss/train': 1.8311803340911865} +02/24/2022 09:00:57 - INFO - codeparrot_training - Step 5761: {'lr': 0.0004924640171605526, 'samples': 2950144, 'steps': 5761, 'loss/train': 2.805656671524048} +02/24/2022 09:01:02 - INFO - codeparrot_training - Step 5762: {'lr': 0.0004924600294612796, 'samples': 2950656, 'steps': 5762, 'loss/train': 3.264230966567993} +02/24/2022 09:01:06 - INFO - codeparrot_training - Step 5763: {'lr': 0.0004924560407233848, 'samples': 2951168, 'steps': 5763, 'loss/train': 2.720665454864502} +02/24/2022 09:01:11 - INFO - codeparrot_training - Step 5764: {'lr': 0.0004924520509468854, 'samples': 2951680, 'steps': 5764, 'loss/train': 1.5945589542388916} +02/24/2022 09:01:15 - INFO - codeparrot_training - Step 5765: {'lr': 0.0004924480601317982, 'samples': 2952192, 'steps': 5765, 'loss/train': 3.415029525756836} +02/24/2022 09:01:20 - INFO - codeparrot_training - Step 5766: {'lr': 0.0004924440682781407, 'samples': 2952704, 'steps': 5766, 'loss/train': 2.8304691314697266} +02/24/2022 09:01:26 - INFO - codeparrot_training - Step 5767: {'lr': 0.0004924400753859297, 'samples': 2953216, 'steps': 5767, 'loss/train': 3.212505578994751} +02/24/2022 09:01:29 - INFO - codeparrot_training - Step 5768: {'lr': 0.0004924360814551825, 'samples': 2953728, 'steps': 5768, 'loss/train': 2.7774264812469482} +02/24/2022 09:01:33 - INFO - codeparrot_training - Step 5769: {'lr': 0.000492432086485916, 'samples': 2954240, 'steps': 5769, 'loss/train': 2.588395357131958} +02/24/2022 09:01:39 - INFO - codeparrot_training - Step 5770: {'lr': 0.0004924280904781475, 'samples': 2954752, 'steps': 5770, 'loss/train': 1.9354759454727173} +02/24/2022 09:01:45 - INFO - codeparrot_training - Step 5771: {'lr': 0.0004924240934318939, 'samples': 2955264, 'steps': 5771, 'loss/train': 2.613406181335449} +02/24/2022 09:01:48 - INFO - codeparrot_training - Step 5772: {'lr': 0.0004924200953471727, 'samples': 2955776, 'steps': 5772, 'loss/train': 2.8672473430633545} +02/24/2022 09:01:51 - INFO - codeparrot_training - Step 5773: {'lr': 0.0004924160962240005, 'samples': 2956288, 'steps': 5773, 'loss/train': 2.887533664703369} +02/24/2022 09:01:57 - INFO - codeparrot_training - Step 5774: {'lr': 0.0004924120960623949, 'samples': 2956800, 'steps': 5774, 'loss/train': 2.5469424724578857} +02/24/2022 09:02:00 - INFO - codeparrot_training - Step 5775: {'lr': 0.0004924080948623729, 'samples': 2957312, 'steps': 5775, 'loss/train': 2.3984427452087402} +02/24/2022 09:02:06 - INFO - codeparrot_training - Step 5776: {'lr': 0.0004924040926239515, 'samples': 2957824, 'steps': 5776, 'loss/train': 2.1007635593414307} +02/24/2022 09:02:09 - INFO - codeparrot_training - Step 5777: {'lr': 0.000492400089347148, 'samples': 2958336, 'steps': 5777, 'loss/train': 1.9850976467132568} +02/24/2022 09:02:15 - INFO - codeparrot_training - Step 5778: {'lr': 0.0004923960850319794, 'samples': 2958848, 'steps': 5778, 'loss/train': 2.062241315841675} +02/24/2022 09:02:18 - INFO - codeparrot_training - Step 5779: {'lr': 0.000492392079678463, 'samples': 2959360, 'steps': 5779, 'loss/train': 3.2259674072265625} +02/24/2022 09:02:24 - INFO - codeparrot_training - Step 5780: {'lr': 0.0004923880732866159, 'samples': 2959872, 'steps': 5780, 'loss/train': 2.1912457942962646} +02/24/2022 09:02:28 - INFO - codeparrot_training - Step 5781: {'lr': 0.0004923840658564553, 'samples': 2960384, 'steps': 5781, 'loss/train': 2.5156610012054443} +02/24/2022 09:02:33 - INFO - codeparrot_training - Step 5782: {'lr': 0.0004923800573879983, 'samples': 2960896, 'steps': 5782, 'loss/train': 1.65301513671875} +02/24/2022 09:02:39 - INFO - codeparrot_training - Step 5783: {'lr': 0.000492376047881262, 'samples': 2961408, 'steps': 5783, 'loss/train': 1.8982449769973755} +02/24/2022 09:02:42 - INFO - codeparrot_training - Step 5784: {'lr': 0.0004923720373362638, 'samples': 2961920, 'steps': 5784, 'loss/train': 2.152921199798584} +02/24/2022 09:02:48 - INFO - codeparrot_training - Step 5785: {'lr': 0.0004923680257530207, 'samples': 2962432, 'steps': 5785, 'loss/train': 2.8000853061676025} +02/24/2022 09:02:52 - INFO - codeparrot_training - Step 5786: {'lr': 0.0004923640131315499, 'samples': 2962944, 'steps': 5786, 'loss/train': 0.4488138258457184} +02/24/2022 09:02:55 - INFO - codeparrot_training - Step 5787: {'lr': 0.0004923599994718687, 'samples': 2963456, 'steps': 5787, 'loss/train': 0.4960688352584839} +02/24/2022 09:03:01 - INFO - codeparrot_training - Step 5788: {'lr': 0.0004923559847739941, 'samples': 2963968, 'steps': 5788, 'loss/train': 2.007481336593628} +02/24/2022 09:03:04 - INFO - codeparrot_training - Step 5789: {'lr': 0.0004923519690379436, 'samples': 2964480, 'steps': 5789, 'loss/train': 2.0681345462799072} +02/24/2022 09:03:10 - INFO - codeparrot_training - Step 5790: {'lr': 0.0004923479522637341, 'samples': 2964992, 'steps': 5790, 'loss/train': 2.125170946121216} +02/24/2022 09:03:14 - INFO - codeparrot_training - Step 5791: {'lr': 0.0004923439344513829, 'samples': 2965504, 'steps': 5791, 'loss/train': 1.9498525857925415} +02/24/2022 09:03:19 - INFO - codeparrot_training - Step 5792: {'lr': 0.0004923399156009073, 'samples': 2966016, 'steps': 5792, 'loss/train': 2.004350185394287} +02/24/2022 09:03:23 - INFO - codeparrot_training - Step 5793: {'lr': 0.0004923358957123245, 'samples': 2966528, 'steps': 5793, 'loss/train': 3.4744937419891357} +02/24/2022 09:03:29 - INFO - codeparrot_training - Step 5794: {'lr': 0.0004923318747856515, 'samples': 2967040, 'steps': 5794, 'loss/train': 2.7088301181793213} +02/24/2022 09:03:32 - INFO - codeparrot_training - Step 5795: {'lr': 0.0004923278528209059, 'samples': 2967552, 'steps': 5795, 'loss/train': 3.662992000579834} +02/24/2022 09:03:38 - INFO - codeparrot_training - Step 5796: {'lr': 0.0004923238298181047, 'samples': 2968064, 'steps': 5796, 'loss/train': 2.6054458618164062} +02/24/2022 09:03:41 - INFO - codeparrot_training - Step 5797: {'lr': 0.0004923198057772651, 'samples': 2968576, 'steps': 5797, 'loss/train': 0.2809189558029175} +02/24/2022 09:03:47 - INFO - codeparrot_training - Step 5798: {'lr': 0.0004923157806984044, 'samples': 2969088, 'steps': 5798, 'loss/train': 2.4262919425964355} +02/24/2022 09:03:50 - INFO - codeparrot_training - Step 5799: {'lr': 0.0004923117545815398, 'samples': 2969600, 'steps': 5799, 'loss/train': 1.0630701780319214} +02/24/2022 09:03:56 - INFO - codeparrot_training - Step 5800: {'lr': 0.0004923077274266886, 'samples': 2970112, 'steps': 5800, 'loss/train': 2.783456802368164} +02/24/2022 09:03:59 - INFO - codeparrot_training - Step 5801: {'lr': 0.0004923036992338681, 'samples': 2970624, 'steps': 5801, 'loss/train': 2.1558947563171387} +02/24/2022 09:04:05 - INFO - codeparrot_training - Step 5802: {'lr': 0.0004922996700030954, 'samples': 2971136, 'steps': 5802, 'loss/train': 2.67423939704895} +02/24/2022 09:04:09 - INFO - codeparrot_training - Step 5803: {'lr': 0.000492295639734388, 'samples': 2971648, 'steps': 5803, 'loss/train': 8.31851863861084} +02/24/2022 09:04:15 - INFO - codeparrot_training - Step 5804: {'lr': 0.0004922916084277629, 'samples': 2972160, 'steps': 5804, 'loss/train': 2.4561474323272705} +02/24/2022 09:04:18 - INFO - codeparrot_training - Step 5805: {'lr': 0.0004922875760832375, 'samples': 2972672, 'steps': 5805, 'loss/train': 2.176706075668335} +02/24/2022 09:04:24 - INFO - codeparrot_training - Step 5806: {'lr': 0.000492283542700829, 'samples': 2973184, 'steps': 5806, 'loss/train': 3.096372604370117} +02/24/2022 09:04:27 - INFO - codeparrot_training - Step 5807: {'lr': 0.0004922795082805549, 'samples': 2973696, 'steps': 5807, 'loss/train': 1.5772497653961182} +02/24/2022 09:04:33 - INFO - codeparrot_training - Step 5808: {'lr': 0.0004922754728224322, 'samples': 2974208, 'steps': 5808, 'loss/train': 2.438209056854248} +02/24/2022 09:04:36 - INFO - codeparrot_training - Step 5809: {'lr': 0.0004922714363264783, 'samples': 2974720, 'steps': 5809, 'loss/train': 0.3431764543056488} +02/24/2022 09:04:42 - INFO - codeparrot_training - Step 5810: {'lr': 0.0004922673987927106, 'samples': 2975232, 'steps': 5810, 'loss/train': 1.535483479499817} +02/24/2022 09:04:45 - INFO - codeparrot_training - Step 5811: {'lr': 0.0004922633602211462, 'samples': 2975744, 'steps': 5811, 'loss/train': 2.461210012435913} +02/24/2022 09:04:51 - INFO - codeparrot_training - Step 5812: {'lr': 0.0004922593206118025, 'samples': 2976256, 'steps': 5812, 'loss/train': 2.38917875289917} +02/24/2022 09:04:54 - INFO - codeparrot_training - Step 5813: {'lr': 0.0004922552799646968, 'samples': 2976768, 'steps': 5813, 'loss/train': 3.163112163543701} +02/24/2022 09:04:59 - INFO - codeparrot_training - Step 5814: {'lr': 0.0004922512382798463, 'samples': 2977280, 'steps': 5814, 'loss/train': 2.611781358718872} +02/24/2022 09:05:03 - INFO - codeparrot_training - Step 5815: {'lr': 0.0004922471955572686, 'samples': 2977792, 'steps': 5815, 'loss/train': 2.7317473888397217} +02/24/2022 09:05:09 - INFO - codeparrot_training - Step 5816: {'lr': 0.0004922431517969808, 'samples': 2978304, 'steps': 5816, 'loss/train': 1.324861764907837} +02/24/2022 09:05:13 - INFO - codeparrot_training - Step 5817: {'lr': 0.0004922391069990002, 'samples': 2978816, 'steps': 5817, 'loss/train': 3.162651777267456} +02/24/2022 09:05:18 - INFO - codeparrot_training - Step 5818: {'lr': 0.0004922350611633442, 'samples': 2979328, 'steps': 5818, 'loss/train': 1.4453734159469604} +02/24/2022 09:05:22 - INFO - codeparrot_training - Step 5819: {'lr': 0.0004922310142900302, 'samples': 2979840, 'steps': 5819, 'loss/train': 2.142991542816162} +02/24/2022 09:05:27 - INFO - codeparrot_training - Step 5820: {'lr': 0.0004922269663790753, 'samples': 2980352, 'steps': 5820, 'loss/train': 2.0249812602996826} +02/24/2022 09:05:31 - INFO - codeparrot_training - Step 5821: {'lr': 0.0004922229174304971, 'samples': 2980864, 'steps': 5821, 'loss/train': 2.174739360809326} +02/24/2022 09:05:36 - INFO - codeparrot_training - Step 5822: {'lr': 0.0004922188674443128, 'samples': 2981376, 'steps': 5822, 'loss/train': 2.2752199172973633} +02/24/2022 09:05:40 - INFO - codeparrot_training - Step 5823: {'lr': 0.0004922148164205398, 'samples': 2981888, 'steps': 5823, 'loss/train': 2.7619760036468506} +02/24/2022 09:05:45 - INFO - codeparrot_training - Step 5824: {'lr': 0.0004922107643591954, 'samples': 2982400, 'steps': 5824, 'loss/train': 2.9031381607055664} +02/24/2022 09:05:49 - INFO - codeparrot_training - Step 5825: {'lr': 0.000492206711260297, 'samples': 2982912, 'steps': 5825, 'loss/train': 1.8890568017959595} +02/24/2022 09:05:56 - INFO - codeparrot_training - Step 5826: {'lr': 0.000492202657123862, 'samples': 2983424, 'steps': 5826, 'loss/train': 3.002413511276245} +02/24/2022 09:05:59 - INFO - codeparrot_training - Step 5827: {'lr': 0.0004921986019499078, 'samples': 2983936, 'steps': 5827, 'loss/train': 3.159044027328491} +02/24/2022 09:06:05 - INFO - codeparrot_training - Step 5828: {'lr': 0.0004921945457384516, 'samples': 2984448, 'steps': 5828, 'loss/train': 2.5415422916412354} +02/24/2022 09:06:08 - INFO - codeparrot_training - Step 5829: {'lr': 0.0004921904884895108, 'samples': 2984960, 'steps': 5829, 'loss/train': 1.8086621761322021} +02/24/2022 09:06:14 - INFO - codeparrot_training - Step 5830: {'lr': 0.000492186430203103, 'samples': 2985472, 'steps': 5830, 'loss/train': 2.4877140522003174} +02/24/2022 09:06:17 - INFO - codeparrot_training - Step 5831: {'lr': 0.0004921823708792453, 'samples': 2985984, 'steps': 5831, 'loss/train': 0.10805436223745346} +02/24/2022 09:06:23 - INFO - codeparrot_training - Step 5832: {'lr': 0.0004921783105179552, 'samples': 2986496, 'steps': 5832, 'loss/train': 2.1557843685150146} +02/24/2022 09:06:26 - INFO - codeparrot_training - Step 5833: {'lr': 0.0004921742491192502, 'samples': 2987008, 'steps': 5833, 'loss/train': 1.9289510250091553} +02/24/2022 09:06:32 - INFO - codeparrot_training - Step 5834: {'lr': 0.0004921701866831477, 'samples': 2987520, 'steps': 5834, 'loss/train': 2.871187925338745} +02/24/2022 09:06:35 - INFO - codeparrot_training - Step 5835: {'lr': 0.000492166123209665, 'samples': 2988032, 'steps': 5835, 'loss/train': 3.156506299972534} +02/24/2022 09:06:41 - INFO - codeparrot_training - Step 5836: {'lr': 0.0004921620586988193, 'samples': 2988544, 'steps': 5836, 'loss/train': 2.2434442043304443} +02/24/2022 09:06:45 - INFO - codeparrot_training - Step 5837: {'lr': 0.0004921579931506285, 'samples': 2989056, 'steps': 5837, 'loss/train': 2.22975754737854} +02/24/2022 09:06:50 - INFO - codeparrot_training - Step 5838: {'lr': 0.0004921539265651096, 'samples': 2989568, 'steps': 5838, 'loss/train': 3.5447118282318115} +02/24/2022 09:06:54 - INFO - codeparrot_training - Step 5839: {'lr': 0.0004921498589422803, 'samples': 2990080, 'steps': 5839, 'loss/train': 2.172454595565796} +02/24/2022 09:06:59 - INFO - codeparrot_training - Step 5840: {'lr': 0.0004921457902821578, 'samples': 2990592, 'steps': 5840, 'loss/train': 2.519798994064331} +02/24/2022 09:07:03 - INFO - codeparrot_training - Step 5841: {'lr': 0.0004921417205847597, 'samples': 2991104, 'steps': 5841, 'loss/train': 3.081843852996826} +02/24/2022 09:07:08 - INFO - codeparrot_training - Step 5842: {'lr': 0.0004921376498501032, 'samples': 2991616, 'steps': 5842, 'loss/train': 2.781508684158325} +02/24/2022 09:07:12 - INFO - codeparrot_training - Step 5843: {'lr': 0.000492133578078206, 'samples': 2992128, 'steps': 5843, 'loss/train': 2.0008127689361572} +02/24/2022 09:07:17 - INFO - codeparrot_training - Step 5844: {'lr': 0.0004921295052690855, 'samples': 2992640, 'steps': 5844, 'loss/train': 2.194403886795044} +02/24/2022 09:07:22 - INFO - codeparrot_training - Step 5845: {'lr': 0.000492125431422759, 'samples': 2993152, 'steps': 5845, 'loss/train': 2.6696548461914062} +02/24/2022 09:07:26 - INFO - codeparrot_training - Step 5846: {'lr': 0.0004921213565392441, 'samples': 2993664, 'steps': 5846, 'loss/train': 1.3034484386444092} +02/24/2022 09:07:31 - INFO - codeparrot_training - Step 5847: {'lr': 0.000492117280618558, 'samples': 2994176, 'steps': 5847, 'loss/train': 2.9973793029785156} +02/24/2022 09:07:35 - INFO - codeparrot_training - Step 5848: {'lr': 0.0004921132036607186, 'samples': 2994688, 'steps': 5848, 'loss/train': 1.3244411945343018} +02/24/2022 09:07:41 - INFO - codeparrot_training - Step 5849: {'lr': 0.0004921091256657429, 'samples': 2995200, 'steps': 5849, 'loss/train': 3.166919469833374} +02/24/2022 09:07:44 - INFO - codeparrot_training - Step 5850: {'lr': 0.0004921050466336487, 'samples': 2995712, 'steps': 5850, 'loss/train': 2.2517757415771484} +02/24/2022 09:07:50 - INFO - codeparrot_training - Step 5851: {'lr': 0.0004921009665644535, 'samples': 2996224, 'steps': 5851, 'loss/train': 1.4505754709243774} +02/24/2022 09:07:54 - INFO - codeparrot_training - Step 5852: {'lr': 0.0004920968854581745, 'samples': 2996736, 'steps': 5852, 'loss/train': 2.563232898712158} +02/24/2022 09:07:59 - INFO - codeparrot_training - Step 5853: {'lr': 0.0004920928033148292, 'samples': 2997248, 'steps': 5853, 'loss/train': 0.4264124035835266} +02/24/2022 09:08:03 - INFO - codeparrot_training - Step 5854: {'lr': 0.0004920887201344353, 'samples': 2997760, 'steps': 5854, 'loss/train': 2.3094394207000732} +02/24/2022 09:08:08 - INFO - codeparrot_training - Step 5855: {'lr': 0.0004920846359170103, 'samples': 2998272, 'steps': 5855, 'loss/train': 2.546543598175049} +02/24/2022 09:08:12 - INFO - codeparrot_training - Step 5856: {'lr': 0.0004920805506625714, 'samples': 2998784, 'steps': 5856, 'loss/train': 1.9825351238250732} +02/24/2022 09:08:17 - INFO - codeparrot_training - Step 5857: {'lr': 0.0004920764643711364, 'samples': 2999296, 'steps': 5857, 'loss/train': 2.429476499557495} +02/24/2022 09:08:21 - INFO - codeparrot_training - Step 5858: {'lr': 0.0004920723770427226, 'samples': 2999808, 'steps': 5858, 'loss/train': 1.733453392982483} +02/24/2022 09:08:26 - INFO - codeparrot_training - Step 5859: {'lr': 0.0004920682886773478, 'samples': 3000320, 'steps': 5859, 'loss/train': 1.9622443914413452} +02/24/2022 09:08:30 - INFO - codeparrot_training - Step 5860: {'lr': 0.000492064199275029, 'samples': 3000832, 'steps': 5860, 'loss/train': 2.8610122203826904} +02/24/2022 09:08:36 - INFO - codeparrot_training - Step 5861: {'lr': 0.0004920601088357844, 'samples': 3001344, 'steps': 5861, 'loss/train': 2.8463919162750244} +02/24/2022 09:08:40 - INFO - codeparrot_training - Step 5862: {'lr': 0.0004920560173596309, 'samples': 3001856, 'steps': 5862, 'loss/train': 2.971566915512085} +02/24/2022 09:08:45 - INFO - codeparrot_training - Step 5863: {'lr': 0.0004920519248465864, 'samples': 3002368, 'steps': 5863, 'loss/train': 2.667762041091919} +02/24/2022 09:08:49 - INFO - codeparrot_training - Step 5864: {'lr': 0.0004920478312966683, 'samples': 3002880, 'steps': 5864, 'loss/train': 2.155449867248535} +02/24/2022 09:08:54 - INFO - codeparrot_training - Step 5865: {'lr': 0.0004920437367098941, 'samples': 3003392, 'steps': 5865, 'loss/train': 2.5240321159362793} +02/24/2022 09:08:58 - INFO - codeparrot_training - Step 5866: {'lr': 0.0004920396410862815, 'samples': 3003904, 'steps': 5866, 'loss/train': 3.0880231857299805} +02/24/2022 09:09:03 - INFO - codeparrot_training - Step 5867: {'lr': 0.0004920355444258479, 'samples': 3004416, 'steps': 5867, 'loss/train': 3.0169613361358643} +02/24/2022 09:09:07 - INFO - codeparrot_training - Step 5868: {'lr': 0.0004920314467286108, 'samples': 3004928, 'steps': 5868, 'loss/train': 1.5566153526306152} +02/24/2022 09:09:12 - INFO - codeparrot_training - Step 5869: {'lr': 0.0004920273479945878, 'samples': 3005440, 'steps': 5869, 'loss/train': 2.141596555709839} +02/24/2022 09:09:16 - INFO - codeparrot_training - Step 5870: {'lr': 0.0004920232482237966, 'samples': 3005952, 'steps': 5870, 'loss/train': 1.9738441705703735} +02/24/2022 09:09:23 - INFO - codeparrot_training - Step 5871: {'lr': 0.0004920191474162547, 'samples': 3006464, 'steps': 5871, 'loss/train': 2.539581298828125} +02/24/2022 09:09:26 - INFO - codeparrot_training - Step 5872: {'lr': 0.0004920150455719795, 'samples': 3006976, 'steps': 5872, 'loss/train': 3.6093921661376953} +02/24/2022 09:09:32 - INFO - codeparrot_training - Step 5873: {'lr': 0.0004920109426909887, 'samples': 3007488, 'steps': 5873, 'loss/train': 3.1514780521392822} +02/24/2022 09:09:35 - INFO - codeparrot_training - Step 5874: {'lr': 0.0004920068387733, 'samples': 3008000, 'steps': 5874, 'loss/train': 2.913638114929199} +02/24/2022 09:09:41 - INFO - codeparrot_training - Step 5875: {'lr': 0.0004920027338189307, 'samples': 3008512, 'steps': 5875, 'loss/train': 2.3762590885162354} +02/24/2022 09:09:44 - INFO - codeparrot_training - Step 5876: {'lr': 0.0004919986278278986, 'samples': 3009024, 'steps': 5876, 'loss/train': 2.9139466285705566} +02/24/2022 09:09:50 - INFO - codeparrot_training - Step 5877: {'lr': 0.0004919945208002212, 'samples': 3009536, 'steps': 5877, 'loss/train': 2.5311338901519775} +02/24/2022 09:09:53 - INFO - codeparrot_training - Step 5878: {'lr': 0.0004919904127359162, 'samples': 3010048, 'steps': 5878, 'loss/train': 2.225706100463867} +02/24/2022 09:09:59 - INFO - codeparrot_training - Step 5879: {'lr': 0.000491986303635001, 'samples': 3010560, 'steps': 5879, 'loss/train': 0.3612838387489319} +02/24/2022 09:10:02 - INFO - codeparrot_training - Step 5880: {'lr': 0.0004919821934974933, 'samples': 3011072, 'steps': 5880, 'loss/train': 2.4800500869750977} +02/24/2022 09:10:08 - INFO - codeparrot_training - Step 5881: {'lr': 0.0004919780823234108, 'samples': 3011584, 'steps': 5881, 'loss/train': 2.014688014984131} +02/24/2022 09:10:12 - INFO - codeparrot_training - Step 5882: {'lr': 0.000491973970112771, 'samples': 3012096, 'steps': 5882, 'loss/train': 2.718477725982666} +02/24/2022 09:10:17 - INFO - codeparrot_training - Step 5883: {'lr': 0.0004919698568655916, 'samples': 3012608, 'steps': 5883, 'loss/train': 2.08742356300354} +02/24/2022 09:10:21 - INFO - codeparrot_training - Step 5884: {'lr': 0.0004919657425818901, 'samples': 3013120, 'steps': 5884, 'loss/train': 3.067800760269165} +02/24/2022 09:10:26 - INFO - codeparrot_training - Step 5885: {'lr': 0.0004919616272616842, 'samples': 3013632, 'steps': 5885, 'loss/train': 2.602139949798584} +02/24/2022 09:10:30 - INFO - codeparrot_training - Step 5886: {'lr': 0.0004919575109049915, 'samples': 3014144, 'steps': 5886, 'loss/train': 2.448864459991455} +02/24/2022 09:10:35 - INFO - codeparrot_training - Step 5887: {'lr': 0.0004919533935118296, 'samples': 3014656, 'steps': 5887, 'loss/train': 2.2215683460235596} +02/24/2022 09:10:39 - INFO - codeparrot_training - Step 5888: {'lr': 0.0004919492750822163, 'samples': 3015168, 'steps': 5888, 'loss/train': 2.8769874572753906} +02/24/2022 09:10:44 - INFO - codeparrot_training - Step 5889: {'lr': 0.0004919451556161692, 'samples': 3015680, 'steps': 5889, 'loss/train': 2.2274057865142822} +02/24/2022 09:10:48 - INFO - codeparrot_training - Step 5890: {'lr': 0.0004919410351137058, 'samples': 3016192, 'steps': 5890, 'loss/train': 1.046937346458435} +02/24/2022 09:10:53 - INFO - codeparrot_training - Step 5891: {'lr': 0.0004919369135748438, 'samples': 3016704, 'steps': 5891, 'loss/train': 1.8074312210083008} +02/24/2022 09:10:57 - INFO - codeparrot_training - Step 5892: {'lr': 0.0004919327909996008, 'samples': 3017216, 'steps': 5892, 'loss/train': 2.5137054920196533} +02/24/2022 09:11:02 - INFO - codeparrot_training - Step 5893: {'lr': 0.0004919286673879948, 'samples': 3017728, 'steps': 5893, 'loss/train': 2.2542898654937744} +02/24/2022 09:11:06 - INFO - codeparrot_training - Step 5894: {'lr': 0.000491924542740043, 'samples': 3018240, 'steps': 5894, 'loss/train': 2.457650899887085} +02/24/2022 09:11:11 - INFO - codeparrot_training - Step 5895: {'lr': 0.0004919204170557634, 'samples': 3018752, 'steps': 5895, 'loss/train': 2.6880722045898438} +02/24/2022 09:11:14 - INFO - codeparrot_training - Step 5896: {'lr': 0.0004919162903351734, 'samples': 3019264, 'steps': 5896, 'loss/train': 2.5171055793762207} +02/24/2022 09:11:21 - INFO - codeparrot_training - Step 5897: {'lr': 0.000491912162578291, 'samples': 3019776, 'steps': 5897, 'loss/train': 2.186033010482788} +02/24/2022 09:11:24 - INFO - codeparrot_training - Step 5898: {'lr': 0.0004919080337851336, 'samples': 3020288, 'steps': 5898, 'loss/train': 1.3456863164901733} +02/24/2022 09:11:30 - INFO - codeparrot_training - Step 5899: {'lr': 0.000491903903955719, 'samples': 3020800, 'steps': 5899, 'loss/train': 1.9215354919433594} +02/24/2022 09:11:33 - INFO - codeparrot_training - Step 5900: {'lr': 0.0004918997730900649, 'samples': 3021312, 'steps': 5900, 'loss/train': 1.5159803628921509} +02/24/2022 09:11:39 - INFO - codeparrot_training - Step 5901: {'lr': 0.000491895641188189, 'samples': 3021824, 'steps': 5901, 'loss/train': 2.7701447010040283} +02/24/2022 09:11:42 - INFO - codeparrot_training - Step 5902: {'lr': 0.000491891508250109, 'samples': 3022336, 'steps': 5902, 'loss/train': 3.80869722366333} +02/24/2022 09:11:48 - INFO - codeparrot_training - Step 5903: {'lr': 0.0004918873742758426, 'samples': 3022848, 'steps': 5903, 'loss/train': 2.234260320663452} +02/24/2022 09:11:51 - INFO - codeparrot_training - Step 5904: {'lr': 0.0004918832392654074, 'samples': 3023360, 'steps': 5904, 'loss/train': 2.6218695640563965} +02/24/2022 09:11:57 - INFO - codeparrot_training - Step 5905: {'lr': 0.0004918791032188214, 'samples': 3023872, 'steps': 5905, 'loss/train': 3.7347638607025146} +02/24/2022 09:12:00 - INFO - codeparrot_training - Step 5906: {'lr': 0.0004918749661361019, 'samples': 3024384, 'steps': 5906, 'loss/train': 2.1661012172698975} +02/24/2022 09:12:06 - INFO - codeparrot_training - Step 5907: {'lr': 0.000491870828017267, 'samples': 3024896, 'steps': 5907, 'loss/train': 2.8488426208496094} +02/24/2022 09:12:10 - INFO - codeparrot_training - Step 5908: {'lr': 0.0004918666888623342, 'samples': 3025408, 'steps': 5908, 'loss/train': 2.373432159423828} +02/24/2022 09:12:15 - INFO - codeparrot_training - Step 5909: {'lr': 0.0004918625486713214, 'samples': 3025920, 'steps': 5909, 'loss/train': 1.8840291500091553} +02/24/2022 09:12:19 - INFO - codeparrot_training - Step 5910: {'lr': 0.0004918584074442462, 'samples': 3026432, 'steps': 5910, 'loss/train': 1.9829857349395752} +02/24/2022 09:12:24 - INFO - codeparrot_training - Step 5911: {'lr': 0.0004918542651811263, 'samples': 3026944, 'steps': 5911, 'loss/train': 2.4264886379241943} +02/24/2022 09:12:28 - INFO - codeparrot_training - Step 5912: {'lr': 0.0004918501218819796, 'samples': 3027456, 'steps': 5912, 'loss/train': 3.1741394996643066} +02/24/2022 09:12:33 - INFO - codeparrot_training - Step 5913: {'lr': 0.0004918459775468238, 'samples': 3027968, 'steps': 5913, 'loss/train': 2.344528913497925} +02/24/2022 09:12:39 - INFO - codeparrot_training - Step 5914: {'lr': 0.0004918418321756766, 'samples': 3028480, 'steps': 5914, 'loss/train': 2.5269615650177} +02/24/2022 09:12:42 - INFO - codeparrot_training - Step 5915: {'lr': 0.0004918376857685557, 'samples': 3028992, 'steps': 5915, 'loss/train': 1.707935094833374} +02/24/2022 09:12:48 - INFO - codeparrot_training - Step 5916: {'lr': 0.000491833538325479, 'samples': 3029504, 'steps': 5916, 'loss/train': 1.5108113288879395} +02/24/2022 09:12:52 - INFO - codeparrot_training - Step 5917: {'lr': 0.0004918293898464643, 'samples': 3030016, 'steps': 5917, 'loss/train': 1.932183861732483} +02/24/2022 09:12:57 - INFO - codeparrot_training - Step 5918: {'lr': 0.0004918252403315292, 'samples': 3030528, 'steps': 5918, 'loss/train': 2.0883026123046875} +02/24/2022 09:13:01 - INFO - codeparrot_training - Step 5919: {'lr': 0.0004918210897806916, 'samples': 3031040, 'steps': 5919, 'loss/train': 2.218614101409912} +02/24/2022 09:13:07 - INFO - codeparrot_training - Step 5920: {'lr': 0.0004918169381939692, 'samples': 3031552, 'steps': 5920, 'loss/train': 2.778390645980835} +02/24/2022 09:13:10 - INFO - codeparrot_training - Step 5921: {'lr': 0.0004918127855713799, 'samples': 3032064, 'steps': 5921, 'loss/train': 2.4599533081054688} +02/24/2022 09:13:16 - INFO - codeparrot_training - Step 5922: {'lr': 0.0004918086319129413, 'samples': 3032576, 'steps': 5922, 'loss/train': 2.545726776123047} +02/24/2022 09:13:19 - INFO - codeparrot_training - Step 5923: {'lr': 0.0004918044772186714, 'samples': 3033088, 'steps': 5923, 'loss/train': 2.906093120574951} +02/24/2022 09:13:22 - INFO - codeparrot_training - Step 5924: {'lr': 0.0004918003214885877, 'samples': 3033600, 'steps': 5924, 'loss/train': 3.477184295654297} +02/24/2022 09:13:28 - INFO - codeparrot_training - Step 5925: {'lr': 0.0004917961647227084, 'samples': 3034112, 'steps': 5925, 'loss/train': 2.5634021759033203} +02/24/2022 09:13:36 - INFO - codeparrot_training - Step 5926: {'lr': 0.0004917920069210511, 'samples': 3034624, 'steps': 5926, 'loss/train': 2.973698616027832} +02/24/2022 09:13:39 - INFO - codeparrot_training - Step 5927: {'lr': 0.0004917878480836336, 'samples': 3035136, 'steps': 5927, 'loss/train': 2.3524036407470703} +02/24/2022 09:13:45 - INFO - codeparrot_training - Step 5928: {'lr': 0.0004917836882104737, 'samples': 3035648, 'steps': 5928, 'loss/train': 2.9325084686279297} +02/24/2022 09:13:48 - INFO - codeparrot_training - Step 5929: {'lr': 0.0004917795273015892, 'samples': 3036160, 'steps': 5929, 'loss/train': 2.26198148727417} +02/24/2022 09:13:54 - INFO - codeparrot_training - Step 5930: {'lr': 0.0004917753653569981, 'samples': 3036672, 'steps': 5930, 'loss/train': 3.487244129180908} +02/24/2022 09:13:57 - INFO - codeparrot_training - Step 5931: {'lr': 0.000491771202376718, 'samples': 3037184, 'steps': 5931, 'loss/train': 3.7557060718536377} +02/24/2022 09:14:03 - INFO - codeparrot_training - Step 5932: {'lr': 0.000491767038360767, 'samples': 3037696, 'steps': 5932, 'loss/train': 2.0902822017669678} +02/24/2022 09:14:06 - INFO - codeparrot_training - Step 5933: {'lr': 0.0004917628733091626, 'samples': 3038208, 'steps': 5933, 'loss/train': 3.6177351474761963} +02/24/2022 09:14:12 - INFO - codeparrot_training - Step 5934: {'lr': 0.000491758707221923, 'samples': 3038720, 'steps': 5934, 'loss/train': 3.8787155151367188} +02/24/2022 09:14:15 - INFO - codeparrot_training - Step 5935: {'lr': 0.0004917545400990657, 'samples': 3039232, 'steps': 5935, 'loss/train': 1.9852008819580078} +02/24/2022 09:14:23 - INFO - codeparrot_training - Step 5936: {'lr': 0.0004917503719406087, 'samples': 3039744, 'steps': 5936, 'loss/train': 2.1472153663635254} +02/24/2022 09:14:26 - INFO - codeparrot_training - Step 5937: {'lr': 0.00049174620274657, 'samples': 3040256, 'steps': 5937, 'loss/train': 2.3118090629577637} +02/24/2022 09:14:32 - INFO - codeparrot_training - Step 5938: {'lr': 0.0004917420325169673, 'samples': 3040768, 'steps': 5938, 'loss/train': 2.198652505874634} +02/24/2022 09:14:35 - INFO - codeparrot_training - Step 5939: {'lr': 0.0004917378612518185, 'samples': 3041280, 'steps': 5939, 'loss/train': 1.9445152282714844} +02/24/2022 09:14:41 - INFO - codeparrot_training - Step 5940: {'lr': 0.0004917336889511414, 'samples': 3041792, 'steps': 5940, 'loss/train': 2.235792875289917} +02/24/2022 09:14:44 - INFO - codeparrot_training - Step 5941: {'lr': 0.0004917295156149539, 'samples': 3042304, 'steps': 5941, 'loss/train': 1.3855277299880981} +02/24/2022 09:14:50 - INFO - codeparrot_training - Step 5942: {'lr': 0.000491725341243274, 'samples': 3042816, 'steps': 5942, 'loss/train': 2.03230619430542} +02/24/2022 09:14:53 - INFO - codeparrot_training - Step 5943: {'lr': 0.0004917211658361196, 'samples': 3043328, 'steps': 5943, 'loss/train': 7.5376973152160645} +02/24/2022 09:14:59 - INFO - codeparrot_training - Step 5944: {'lr': 0.0004917169893935083, 'samples': 3043840, 'steps': 5944, 'loss/train': 1.9802172183990479} +02/24/2022 09:15:02 - INFO - codeparrot_training - Step 5945: {'lr': 0.0004917128119154582, 'samples': 3044352, 'steps': 5945, 'loss/train': 2.066563606262207} +02/24/2022 09:15:10 - INFO - codeparrot_training - Step 5946: {'lr': 0.0004917086334019872, 'samples': 3044864, 'steps': 5946, 'loss/train': 2.6638426780700684} +02/24/2022 09:15:13 - INFO - codeparrot_training - Step 5947: {'lr': 0.0004917044538531131, 'samples': 3045376, 'steps': 5947, 'loss/train': 2.811230421066284} +02/24/2022 09:15:16 - INFO - codeparrot_training - Step 5948: {'lr': 0.000491700273268854, 'samples': 3045888, 'steps': 5948, 'loss/train': 2.620330572128296} +02/24/2022 09:15:22 - INFO - codeparrot_training - Step 5949: {'lr': 0.0004916960916492276, 'samples': 3046400, 'steps': 5949, 'loss/train': 3.8698835372924805} +02/24/2022 09:15:25 - INFO - codeparrot_training - Step 5950: {'lr': 0.0004916919089942519, 'samples': 3046912, 'steps': 5950, 'loss/train': 2.5785202980041504} +02/24/2022 09:15:31 - INFO - codeparrot_training - Step 5951: {'lr': 0.0004916877253039448, 'samples': 3047424, 'steps': 5951, 'loss/train': 3.1925880908966064} +02/24/2022 09:15:37 - INFO - codeparrot_training - Step 5952: {'lr': 0.0004916835405783242, 'samples': 3047936, 'steps': 5952, 'loss/train': 1.9864548444747925} +02/24/2022 09:15:40 - INFO - codeparrot_training - Step 5953: {'lr': 0.0004916793548174081, 'samples': 3048448, 'steps': 5953, 'loss/train': 2.669029951095581} +02/24/2022 09:15:46 - INFO - codeparrot_training - Step 5954: {'lr': 0.0004916751680212145, 'samples': 3048960, 'steps': 5954, 'loss/train': 2.4319956302642822} +02/24/2022 09:15:49 - INFO - codeparrot_training - Step 5955: {'lr': 0.000491670980189761, 'samples': 3049472, 'steps': 5955, 'loss/train': 3.0009117126464844} +02/24/2022 09:15:55 - INFO - codeparrot_training - Step 5956: {'lr': 0.0004916667913230659, 'samples': 3049984, 'steps': 5956, 'loss/train': 2.445373296737671} +02/24/2022 09:15:58 - INFO - codeparrot_training - Step 5957: {'lr': 0.000491662601421147, 'samples': 3050496, 'steps': 5957, 'loss/train': 2.3539483547210693} +02/24/2022 09:16:05 - INFO - codeparrot_training - Step 5958: {'lr': 0.0004916584104840222, 'samples': 3051008, 'steps': 5958, 'loss/train': 3.514528512954712} +02/24/2022 09:16:09 - INFO - codeparrot_training - Step 5959: {'lr': 0.0004916542185117095, 'samples': 3051520, 'steps': 5959, 'loss/train': 2.8166308403015137} +02/24/2022 09:16:14 - INFO - codeparrot_training - Step 5960: {'lr': 0.0004916500255042268, 'samples': 3052032, 'steps': 5960, 'loss/train': 2.885915994644165} +02/24/2022 09:16:18 - INFO - codeparrot_training - Step 5961: {'lr': 0.0004916458314615923, 'samples': 3052544, 'steps': 5961, 'loss/train': 1.8288182020187378} +02/24/2022 09:16:23 - INFO - codeparrot_training - Step 5962: {'lr': 0.0004916416363838237, 'samples': 3053056, 'steps': 5962, 'loss/train': 3.064462661743164} +02/24/2022 09:16:27 - INFO - codeparrot_training - Step 5963: {'lr': 0.000491637440270939, 'samples': 3053568, 'steps': 5963, 'loss/train': 1.8593496084213257} +02/24/2022 09:16:32 - INFO - codeparrot_training - Step 5964: {'lr': 0.0004916332431229562, 'samples': 3054080, 'steps': 5964, 'loss/train': 2.246675968170166} +02/24/2022 09:16:36 - INFO - codeparrot_training - Step 5965: {'lr': 0.0004916290449398934, 'samples': 3054592, 'steps': 5965, 'loss/train': 2.6514439582824707} +02/24/2022 09:16:41 - INFO - codeparrot_training - Step 5966: {'lr': 0.0004916248457217686, 'samples': 3055104, 'steps': 5966, 'loss/train': 0.20919479429721832} +02/24/2022 09:16:45 - INFO - codeparrot_training - Step 5967: {'lr': 0.0004916206454685995, 'samples': 3055616, 'steps': 5967, 'loss/train': 2.311065196990967} +02/24/2022 09:16:50 - INFO - codeparrot_training - Step 5968: {'lr': 0.0004916164441804044, 'samples': 3056128, 'steps': 5968, 'loss/train': 1.8287731409072876} +02/24/2022 09:16:54 - INFO - codeparrot_training - Step 5969: {'lr': 0.0004916122418572011, 'samples': 3056640, 'steps': 5969, 'loss/train': 3.03713321685791} +02/24/2022 09:16:59 - INFO - codeparrot_training - Step 5970: {'lr': 0.0004916080384990077, 'samples': 3057152, 'steps': 5970, 'loss/train': 2.46460223197937} +02/24/2022 09:17:03 - INFO - codeparrot_training - Step 5971: {'lr': 0.0004916038341058423, 'samples': 3057664, 'steps': 5971, 'loss/train': 3.3667428493499756} +02/24/2022 09:17:09 - INFO - codeparrot_training - Step 5972: {'lr': 0.0004915996286777226, 'samples': 3058176, 'steps': 5972, 'loss/train': 2.676967144012451} +02/24/2022 09:17:12 - INFO - codeparrot_training - Step 5973: {'lr': 0.0004915954222146669, 'samples': 3058688, 'steps': 5973, 'loss/train': 2.5330419540405273} +02/24/2022 09:17:20 - INFO - codeparrot_training - Step 5974: {'lr': 0.0004915912147166932, 'samples': 3059200, 'steps': 5974, 'loss/train': 2.5565476417541504} +02/24/2022 09:17:23 - INFO - codeparrot_training - Step 5975: {'lr': 0.0004915870061838193, 'samples': 3059712, 'steps': 5975, 'loss/train': 2.807097911834717} +02/24/2022 09:17:29 - INFO - codeparrot_training - Step 5976: {'lr': 0.0004915827966160634, 'samples': 3060224, 'steps': 5976, 'loss/train': 2.2697389125823975} +02/24/2022 09:17:32 - INFO - codeparrot_training - Step 5977: {'lr': 0.0004915785860134436, 'samples': 3060736, 'steps': 5977, 'loss/train': 2.375128984451294} +02/24/2022 09:17:38 - INFO - codeparrot_training - Step 5978: {'lr': 0.0004915743743759779, 'samples': 3061248, 'steps': 5978, 'loss/train': 2.197355270385742} +02/24/2022 09:17:41 - INFO - codeparrot_training - Step 5979: {'lr': 0.0004915701617036842, 'samples': 3061760, 'steps': 5979, 'loss/train': 2.348787307739258} +02/24/2022 09:17:47 - INFO - codeparrot_training - Step 5980: {'lr': 0.0004915659479965806, 'samples': 3062272, 'steps': 5980, 'loss/train': 2.3333446979522705} +02/24/2022 09:17:50 - INFO - codeparrot_training - Step 5981: {'lr': 0.0004915617332546852, 'samples': 3062784, 'steps': 5981, 'loss/train': 2.1261661052703857} +02/24/2022 09:17:56 - INFO - codeparrot_training - Step 5982: {'lr': 0.0004915575174780161, 'samples': 3063296, 'steps': 5982, 'loss/train': 1.4645841121673584} +02/24/2022 09:17:59 - INFO - codeparrot_training - Step 5983: {'lr': 0.0004915533006665912, 'samples': 3063808, 'steps': 5983, 'loss/train': 2.9423398971557617} +02/24/2022 09:18:06 - INFO - codeparrot_training - Step 5984: {'lr': 0.0004915490828204287, 'samples': 3064320, 'steps': 5984, 'loss/train': 2.4179799556732178} +02/24/2022 09:18:10 - INFO - codeparrot_training - Step 5985: {'lr': 0.0004915448639395466, 'samples': 3064832, 'steps': 5985, 'loss/train': 2.138633966445923} +02/24/2022 09:18:15 - INFO - codeparrot_training - Step 5986: {'lr': 0.0004915406440239631, 'samples': 3065344, 'steps': 5986, 'loss/train': 3.034088611602783} +02/24/2022 09:18:19 - INFO - codeparrot_training - Step 5987: {'lr': 0.0004915364230736961, 'samples': 3065856, 'steps': 5987, 'loss/train': 8.288673400878906} +02/24/2022 09:18:24 - INFO - codeparrot_training - Step 5988: {'lr': 0.0004915322010887637, 'samples': 3066368, 'steps': 5988, 'loss/train': 2.684619665145874} +02/24/2022 09:18:28 - INFO - codeparrot_training - Step 5989: {'lr': 0.0004915279780691843, 'samples': 3066880, 'steps': 5989, 'loss/train': 2.3704662322998047} +02/24/2022 09:18:33 - INFO - codeparrot_training - Step 5990: {'lr': 0.0004915237540149755, 'samples': 3067392, 'steps': 5990, 'loss/train': 1.8893766403198242} +02/24/2022 09:18:37 - INFO - codeparrot_training - Step 5991: {'lr': 0.0004915195289261557, 'samples': 3067904, 'steps': 5991, 'loss/train': 2.4566521644592285} +02/24/2022 09:18:42 - INFO - codeparrot_training - Step 5992: {'lr': 0.0004915153028027429, 'samples': 3068416, 'steps': 5992, 'loss/train': 2.708566188812256} +02/24/2022 09:18:46 - INFO - codeparrot_training - Step 5993: {'lr': 0.0004915110756447552, 'samples': 3068928, 'steps': 5993, 'loss/train': 2.671201467514038} +02/24/2022 09:18:53 - INFO - codeparrot_training - Step 5994: {'lr': 0.0004915068474522109, 'samples': 3069440, 'steps': 5994, 'loss/train': 2.5488717555999756} +02/24/2022 09:18:56 - INFO - codeparrot_training - Step 5995: {'lr': 0.0004915026182251278, 'samples': 3069952, 'steps': 5995, 'loss/train': 1.259892225265503} +02/24/2022 09:19:02 - INFO - codeparrot_training - Step 5996: {'lr': 0.0004914983879635242, 'samples': 3070464, 'steps': 5996, 'loss/train': 2.0499367713928223} +02/24/2022 09:19:05 - INFO - codeparrot_training - Step 5997: {'lr': 0.0004914941566674183, 'samples': 3070976, 'steps': 5997, 'loss/train': 2.102783679962158} +02/24/2022 09:19:11 - INFO - codeparrot_training - Step 5998: {'lr': 0.0004914899243368279, 'samples': 3071488, 'steps': 5998, 'loss/train': 1.7195990085601807} +02/24/2022 09:19:14 - INFO - codeparrot_training - Step 5999: {'lr': 0.0004914856909717715, 'samples': 3072000, 'steps': 5999, 'loss/train': 2.396481513977051} +02/24/2022 09:19:14 - INFO - codeparrot_training - Evaluating and saving model checkpoint