diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -9355,3 +9355,1009 @@ Use FP16 precision: False 02/24/2022 13:12:30 - INFO - codeparrot_training - Step 8998: {'lr': 0.00047423265732571295, 'samples': 4607488, 'steps': 8998, 'loss/train': 2.1104156970977783} 02/24/2022 13:12:34 - INFO - codeparrot_training - Step 8999: {'lr': 0.00047422542183469775, 'samples': 4608000, 'steps': 8999, 'loss/train': 3.3685567378997803} 02/24/2022 13:12:34 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/24/2022 13:12:51 - WARNING - huggingface_hub.repository - Several commits (9) will be pushed upstream. +02/24/2022 13:12:51 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/24/2022 13:13:25 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + 869a374..8cd23e2 floral-grass-11 -> floral-grass-11 + +02/24/2022 13:13:31 - INFO - codeparrot_training - Step 9000: {'lr': 0.0004742181853831721, 'samples': 4608512, 'steps': 9000, 'loss/train': 2.7164244651794434} +02/24/2022 13:13:35 - INFO - codeparrot_training - Step 9001: {'lr': 0.00047421094797116687, 'samples': 4609024, 'steps': 9001, 'loss/train': 2.5897510051727295} +02/24/2022 13:13:40 - INFO - codeparrot_training - Step 9002: {'lr': 0.00047420370959871315, 'samples': 4609536, 'steps': 9002, 'loss/train': 2.2958545684814453} +02/24/2022 13:13:44 - INFO - codeparrot_training - Step 9003: {'lr': 0.000474196470265842, 'samples': 4610048, 'steps': 9003, 'loss/train': 2.6940038204193115} +02/24/2022 13:13:49 - INFO - codeparrot_training - Step 9004: {'lr': 0.0004741892299725843, 'samples': 4610560, 'steps': 9004, 'loss/train': 1.2317273616790771} +02/24/2022 13:13:53 - INFO - codeparrot_training - Step 9005: {'lr': 0.0004741819887189711, 'samples': 4611072, 'steps': 9005, 'loss/train': 2.348780632019043} +02/24/2022 13:13:59 - INFO - codeparrot_training - Step 9006: {'lr': 0.00047417474650503347, 'samples': 4611584, 'steps': 9006, 'loss/train': 1.9111452102661133} +02/24/2022 13:14:02 - INFO - codeparrot_training - Step 9007: {'lr': 0.00047416750333080244, 'samples': 4612096, 'steps': 9007, 'loss/train': 1.8345046043395996} +02/24/2022 13:14:08 - INFO - codeparrot_training - Step 9008: {'lr': 0.000474160259196309, 'samples': 4612608, 'steps': 9008, 'loss/train': 2.2105023860931396} +02/24/2022 13:14:11 - INFO - codeparrot_training - Step 9009: {'lr': 0.00047415301410158416, 'samples': 4613120, 'steps': 9009, 'loss/train': 2.2949531078338623} +02/24/2022 13:14:17 - INFO - codeparrot_training - Step 9010: {'lr': 0.00047414576804665897, 'samples': 4613632, 'steps': 9010, 'loss/train': 2.5287442207336426} +02/24/2022 13:14:20 - INFO - codeparrot_training - Step 9011: {'lr': 0.0004741385210315645, 'samples': 4614144, 'steps': 9011, 'loss/train': 3.376943826675415} +02/24/2022 13:14:26 - INFO - codeparrot_training - Step 9012: {'lr': 0.0004741312730563318, 'samples': 4614656, 'steps': 9012, 'loss/train': 2.552826404571533} +02/24/2022 13:14:29 - INFO - codeparrot_training - Step 9013: {'lr': 0.00047412402412099185, 'samples': 4615168, 'steps': 9013, 'loss/train': 1.4551596641540527} +02/24/2022 13:14:35 - INFO - codeparrot_training - Step 9014: {'lr': 0.00047411677422557586, 'samples': 4615680, 'steps': 9014, 'loss/train': 2.709442377090454} +02/24/2022 13:14:39 - INFO - codeparrot_training - Step 9015: {'lr': 0.0004741095233701147, 'samples': 4616192, 'steps': 9015, 'loss/train': 2.6846683025360107} +02/24/2022 13:14:44 - INFO - codeparrot_training - Step 9016: {'lr': 0.00047410227155463946, 'samples': 4616704, 'steps': 9016, 'loss/train': 1.409234642982483} +02/24/2022 13:14:48 - INFO - codeparrot_training - Step 9017: {'lr': 0.00047409501877918134, 'samples': 4617216, 'steps': 9017, 'loss/train': 2.198273181915283} +02/24/2022 13:14:53 - INFO - codeparrot_training - Step 9018: {'lr': 0.00047408776504377127, 'samples': 4617728, 'steps': 9018, 'loss/train': 3.9294626712799072} +02/24/2022 13:14:57 - INFO - codeparrot_training - Step 9019: {'lr': 0.00047408051034844036, 'samples': 4618240, 'steps': 9019, 'loss/train': 1.6127426624298096} +02/24/2022 13:15:03 - INFO - codeparrot_training - Step 9020: {'lr': 0.00047407325469321973, 'samples': 4618752, 'steps': 9020, 'loss/train': 1.723608374595642} +02/24/2022 13:15:06 - INFO - codeparrot_training - Step 9021: {'lr': 0.00047406599807814034, 'samples': 4619264, 'steps': 9021, 'loss/train': 1.8285869359970093} +02/24/2022 13:15:12 - INFO - codeparrot_training - Step 9022: {'lr': 0.00047405874050323346, 'samples': 4619776, 'steps': 9022, 'loss/train': 2.238917112350464} +02/24/2022 13:15:15 - INFO - codeparrot_training - Step 9023: {'lr': 0.00047405148196853005, 'samples': 4620288, 'steps': 9023, 'loss/train': 2.512418270111084} +02/24/2022 13:15:21 - INFO - codeparrot_training - Step 9024: {'lr': 0.0004740442224740612, 'samples': 4620800, 'steps': 9024, 'loss/train': 2.4134669303894043} +02/24/2022 13:15:24 - INFO - codeparrot_training - Step 9025: {'lr': 0.00047403696201985814, 'samples': 4621312, 'steps': 9025, 'loss/train': 2.0683298110961914} +02/24/2022 13:15:30 - INFO - codeparrot_training - Step 9026: {'lr': 0.0004740297006059517, 'samples': 4621824, 'steps': 9026, 'loss/train': 4.153469085693359} +02/24/2022 13:15:33 - INFO - codeparrot_training - Step 9027: {'lr': 0.00047402243823237335, 'samples': 4622336, 'steps': 9027, 'loss/train': 1.373235821723938} +02/24/2022 13:15:39 - INFO - codeparrot_training - Step 9028: {'lr': 0.0004740151748991539, 'samples': 4622848, 'steps': 9028, 'loss/train': 2.228574752807617} +02/24/2022 13:15:42 - INFO - codeparrot_training - Step 9029: {'lr': 0.00047400791060632464, 'samples': 4623360, 'steps': 9029, 'loss/train': 2.196526050567627} +02/24/2022 13:15:48 - INFO - codeparrot_training - Step 9030: {'lr': 0.0004740006453539166, 'samples': 4623872, 'steps': 9030, 'loss/train': 1.4205386638641357} +02/24/2022 13:15:55 - INFO - codeparrot_training - Step 9031: {'lr': 0.0004739933791419609, 'samples': 4624384, 'steps': 9031, 'loss/train': 2.9199771881103516} +02/24/2022 13:15:58 - INFO - codeparrot_training - Step 9032: {'lr': 0.0004739861119704887, 'samples': 4624896, 'steps': 9032, 'loss/train': 3.2379813194274902} +02/24/2022 13:16:04 - INFO - codeparrot_training - Step 9033: {'lr': 0.00047397884383953114, 'samples': 4625408, 'steps': 9033, 'loss/train': 2.1234307289123535} +02/24/2022 13:16:07 - INFO - codeparrot_training - Step 9034: {'lr': 0.0004739715747491193, 'samples': 4625920, 'steps': 9034, 'loss/train': 0.5818557739257812} +02/24/2022 13:16:13 - INFO - codeparrot_training - Step 9035: {'lr': 0.00047396430469928436, 'samples': 4626432, 'steps': 9035, 'loss/train': 1.9213566780090332} +02/24/2022 13:16:16 - INFO - codeparrot_training - Step 9036: {'lr': 0.0004739570336900575, 'samples': 4626944, 'steps': 9036, 'loss/train': 1.8206393718719482} +02/24/2022 13:16:22 - INFO - codeparrot_training - Step 9037: {'lr': 0.00047394976172146974, 'samples': 4627456, 'steps': 9037, 'loss/train': 2.1901352405548096} +02/24/2022 13:16:25 - INFO - codeparrot_training - Step 9038: {'lr': 0.0004739424887935524, 'samples': 4627968, 'steps': 9038, 'loss/train': 2.1107277870178223} +02/24/2022 13:16:31 - INFO - codeparrot_training - Step 9039: {'lr': 0.0004739352149063365, 'samples': 4628480, 'steps': 9039, 'loss/train': 2.2411656379699707} +02/24/2022 13:16:34 - INFO - codeparrot_training - Step 9040: {'lr': 0.0004739279400598532, 'samples': 4628992, 'steps': 9040, 'loss/train': 1.7671642303466797} +02/24/2022 13:16:40 - INFO - codeparrot_training - Step 9041: {'lr': 0.0004739206642541338, 'samples': 4629504, 'steps': 9041, 'loss/train': 1.9934426546096802} +02/24/2022 13:16:44 - INFO - codeparrot_training - Step 9042: {'lr': 0.0004739133874892093, 'samples': 4630016, 'steps': 9042, 'loss/train': 1.3417733907699585} +02/24/2022 13:16:50 - INFO - codeparrot_training - Step 9043: {'lr': 0.0004739061097651111, 'samples': 4630528, 'steps': 9043, 'loss/train': 0.42076340317726135} +02/24/2022 13:16:53 - INFO - codeparrot_training - Step 9044: {'lr': 0.00047389883108187004, 'samples': 4631040, 'steps': 9044, 'loss/train': 2.7384181022644043} +02/24/2022 13:16:58 - INFO - codeparrot_training - Step 9045: {'lr': 0.0004738915514395176, 'samples': 4631552, 'steps': 9045, 'loss/train': 1.4291678667068481} +02/24/2022 13:17:02 - INFO - codeparrot_training - Step 9046: {'lr': 0.0004738842708380847, 'samples': 4632064, 'steps': 9046, 'loss/train': 2.880378484725952} +02/24/2022 13:17:07 - INFO - codeparrot_training - Step 9047: {'lr': 0.0004738769892776028, 'samples': 4632576, 'steps': 9047, 'loss/train': 1.7809022665023804} +02/24/2022 13:17:11 - INFO - codeparrot_training - Step 9048: {'lr': 0.00047386970675810297, 'samples': 4633088, 'steps': 9048, 'loss/train': 1.3483664989471436} +02/24/2022 13:17:16 - INFO - codeparrot_training - Step 9049: {'lr': 0.00047386242327961635, 'samples': 4633600, 'steps': 9049, 'loss/train': 1.6377965211868286} +02/24/2022 13:17:20 - INFO - codeparrot_training - Step 9050: {'lr': 0.0004738551388421742, 'samples': 4634112, 'steps': 9050, 'loss/train': 1.6631475687026978} +02/24/2022 13:17:26 - INFO - codeparrot_training - Step 9051: {'lr': 0.00047384785344580784, 'samples': 4634624, 'steps': 9051, 'loss/train': 2.241671323776245} +02/24/2022 13:17:30 - INFO - codeparrot_training - Step 9052: {'lr': 0.00047384056709054824, 'samples': 4635136, 'steps': 9052, 'loss/train': 2.9415793418884277} +02/24/2022 13:17:35 - INFO - codeparrot_training - Step 9053: {'lr': 0.0004738332797764267, 'samples': 4635648, 'steps': 9053, 'loss/train': 2.354640007019043} +02/24/2022 13:17:39 - INFO - codeparrot_training - Step 9054: {'lr': 0.0004738259915034745, 'samples': 4636160, 'steps': 9054, 'loss/train': 1.0778690576553345} +02/24/2022 13:17:44 - INFO - codeparrot_training - Step 9055: {'lr': 0.00047381870227172285, 'samples': 4636672, 'steps': 9055, 'loss/train': 1.864437460899353} +02/24/2022 13:17:48 - INFO - codeparrot_training - Step 9056: {'lr': 0.0004738114120812029, 'samples': 4637184, 'steps': 9056, 'loss/train': 4.310451984405518} +02/24/2022 13:17:53 - INFO - codeparrot_training - Step 9057: {'lr': 0.000473804120931946, 'samples': 4637696, 'steps': 9057, 'loss/train': 1.8198201656341553} +02/24/2022 13:17:57 - INFO - codeparrot_training - Step 9058: {'lr': 0.0004737968288239832, 'samples': 4638208, 'steps': 9058, 'loss/train': 0.6944505572319031} +02/24/2022 13:18:02 - INFO - codeparrot_training - Step 9059: {'lr': 0.00047378953575734594, 'samples': 4638720, 'steps': 9059, 'loss/train': 2.7053723335266113} +02/24/2022 13:18:06 - INFO - codeparrot_training - Step 9060: {'lr': 0.0004737822417320654, 'samples': 4639232, 'steps': 9060, 'loss/train': 2.2772247791290283} +02/24/2022 13:18:11 - INFO - codeparrot_training - Step 9061: {'lr': 0.00047377494674817275, 'samples': 4639744, 'steps': 9061, 'loss/train': 1.6539815664291382} +02/24/2022 13:18:15 - INFO - codeparrot_training - Step 9062: {'lr': 0.00047376765080569925, 'samples': 4640256, 'steps': 9062, 'loss/train': 2.4359467029571533} +02/24/2022 13:18:21 - INFO - codeparrot_training - Step 9063: {'lr': 0.0004737603539046762, 'samples': 4640768, 'steps': 9063, 'loss/train': 2.046271800994873} +02/24/2022 13:18:24 - INFO - codeparrot_training - Step 9064: {'lr': 0.0004737530560451349, 'samples': 4641280, 'steps': 9064, 'loss/train': 2.2490618228912354} +02/24/2022 13:18:28 - INFO - codeparrot_training - Step 9065: {'lr': 0.00047374575722710656, 'samples': 4641792, 'steps': 9065, 'loss/train': 2.4609973430633545} +02/24/2022 13:18:33 - INFO - codeparrot_training - Step 9066: {'lr': 0.0004737384574506224, 'samples': 4642304, 'steps': 9066, 'loss/train': 1.7486547231674194} +02/24/2022 13:18:37 - INFO - codeparrot_training - Step 9067: {'lr': 0.0004737311567157137, 'samples': 4642816, 'steps': 9067, 'loss/train': 2.636962890625} +02/24/2022 13:18:43 - INFO - codeparrot_training - Step 9068: {'lr': 0.00047372385502241176, 'samples': 4643328, 'steps': 9068, 'loss/train': 2.1486315727233887} +02/24/2022 13:18:46 - INFO - codeparrot_training - Step 9069: {'lr': 0.00047371655237074794, 'samples': 4643840, 'steps': 9069, 'loss/train': 2.4533753395080566} +02/24/2022 13:18:52 - INFO - codeparrot_training - Step 9070: {'lr': 0.0004737092487607534, 'samples': 4644352, 'steps': 9070, 'loss/train': 2.2555012702941895} +02/24/2022 13:18:55 - INFO - codeparrot_training - Step 9071: {'lr': 0.00047370194419245955, 'samples': 4644864, 'steps': 9071, 'loss/train': 2.153978109359741} +02/24/2022 13:19:01 - INFO - codeparrot_training - Step 9072: {'lr': 0.00047369463866589755, 'samples': 4645376, 'steps': 9072, 'loss/train': 2.525999069213867} +02/24/2022 13:19:05 - INFO - codeparrot_training - Step 9073: {'lr': 0.00047368733218109874, 'samples': 4645888, 'steps': 9073, 'loss/train': 2.533388614654541} +02/24/2022 13:19:10 - INFO - codeparrot_training - Step 9074: {'lr': 0.00047368002473809447, 'samples': 4646400, 'steps': 9074, 'loss/train': 2.483051300048828} +02/24/2022 13:19:13 - INFO - codeparrot_training - Step 9075: {'lr': 0.0004736727163369159, 'samples': 4646912, 'steps': 9075, 'loss/train': 3.273833751678467} +02/24/2022 13:19:19 - INFO - codeparrot_training - Step 9076: {'lr': 0.00047366540697759454, 'samples': 4647424, 'steps': 9076, 'loss/train': 2.524005889892578} +02/24/2022 13:19:22 - INFO - codeparrot_training - Step 9077: {'lr': 0.00047365809666016155, 'samples': 4647936, 'steps': 9077, 'loss/train': 3.258361577987671} +02/24/2022 13:19:29 - INFO - codeparrot_training - Step 9078: {'lr': 0.00047365078538464826, 'samples': 4648448, 'steps': 9078, 'loss/train': 2.655883312225342} +02/24/2022 13:19:32 - INFO - codeparrot_training - Step 9079: {'lr': 0.0004736434731510861, 'samples': 4648960, 'steps': 9079, 'loss/train': 2.2690420150756836} +02/24/2022 13:19:38 - INFO - codeparrot_training - Step 9080: {'lr': 0.00047363615995950624, 'samples': 4649472, 'steps': 9080, 'loss/train': 2.4604878425598145} +02/24/2022 13:19:41 - INFO - codeparrot_training - Step 9081: {'lr': 0.0004736288458099401, 'samples': 4649984, 'steps': 9081, 'loss/train': 2.1250627040863037} +02/24/2022 13:19:47 - INFO - codeparrot_training - Step 9082: {'lr': 0.0004736215307024191, 'samples': 4650496, 'steps': 9082, 'loss/train': 2.5968241691589355} +02/24/2022 13:19:52 - INFO - codeparrot_training - Step 9083: {'lr': 0.0004736142146369744, 'samples': 4651008, 'steps': 9083, 'loss/train': 2.5302371978759766} +02/24/2022 13:19:56 - INFO - codeparrot_training - Step 9084: {'lr': 0.0004736068976136374, 'samples': 4651520, 'steps': 9084, 'loss/train': 2.263505220413208} +02/24/2022 13:20:01 - INFO - codeparrot_training - Step 9085: {'lr': 0.00047359957963243943, 'samples': 4652032, 'steps': 9085, 'loss/train': 1.1718926429748535} +02/24/2022 13:20:05 - INFO - codeparrot_training - Step 9086: {'lr': 0.0004735922606934119, 'samples': 4652544, 'steps': 9086, 'loss/train': 1.6612155437469482} +02/24/2022 13:20:11 - INFO - codeparrot_training - Step 9087: {'lr': 0.0004735849407965861, 'samples': 4653056, 'steps': 9087, 'loss/train': 1.924526333808899} +02/24/2022 13:20:15 - INFO - codeparrot_training - Step 9088: {'lr': 0.00047357761994199345, 'samples': 4653568, 'steps': 9088, 'loss/train': 1.666778564453125} +02/24/2022 13:20:20 - INFO - codeparrot_training - Step 9089: {'lr': 0.00047357029812966525, 'samples': 4654080, 'steps': 9089, 'loss/train': 2.3335912227630615} +02/24/2022 13:20:24 - INFO - codeparrot_training - Step 9090: {'lr': 0.0004735629753596328, 'samples': 4654592, 'steps': 9090, 'loss/train': 2.532266855239868} +02/24/2022 13:20:29 - INFO - codeparrot_training - Step 9091: {'lr': 0.00047355565163192763, 'samples': 4655104, 'steps': 9091, 'loss/train': 2.408735752105713} +02/24/2022 13:20:33 - INFO - codeparrot_training - Step 9092: {'lr': 0.00047354832694658104, 'samples': 4655616, 'steps': 9092, 'loss/train': 2.320756196975708} +02/24/2022 13:20:38 - INFO - codeparrot_training - Step 9093: {'lr': 0.00047354100130362443, 'samples': 4656128, 'steps': 9093, 'loss/train': 2.1825854778289795} +02/24/2022 13:20:42 - INFO - codeparrot_training - Step 9094: {'lr': 0.00047353367470308913, 'samples': 4656640, 'steps': 9094, 'loss/train': 1.8231948614120483} +02/24/2022 13:20:47 - INFO - codeparrot_training - Step 9095: {'lr': 0.0004735263471450065, 'samples': 4657152, 'steps': 9095, 'loss/train': 2.6250102519989014} +02/24/2022 13:20:51 - INFO - codeparrot_training - Step 9096: {'lr': 0.00047351901862940807, 'samples': 4657664, 'steps': 9096, 'loss/train': 2.6300764083862305} +02/24/2022 13:20:57 - INFO - codeparrot_training - Step 9097: {'lr': 0.000473511689156325, 'samples': 4658176, 'steps': 9097, 'loss/train': 1.944015622138977} +02/24/2022 13:21:01 - INFO - codeparrot_training - Step 9098: {'lr': 0.0004735043587257889, 'samples': 4658688, 'steps': 9098, 'loss/train': 1.8121219873428345} +02/24/2022 13:21:06 - INFO - codeparrot_training - Step 9099: {'lr': 0.00047349702733783113, 'samples': 4659200, 'steps': 9099, 'loss/train': 0.78632652759552} +02/24/2022 13:21:10 - INFO - codeparrot_training - Step 9100: {'lr': 0.00047348969499248306, 'samples': 4659712, 'steps': 9100, 'loss/train': 1.459047794342041} +02/24/2022 13:21:15 - INFO - codeparrot_training - Step 9101: {'lr': 0.0004734823616897761, 'samples': 4660224, 'steps': 9101, 'loss/train': 1.4093104600906372} +02/24/2022 13:21:19 - INFO - codeparrot_training - Step 9102: {'lr': 0.0004734750274297416, 'samples': 4660736, 'steps': 9102, 'loss/train': 1.9987393617630005} +02/24/2022 13:21:25 - INFO - codeparrot_training - Step 9103: {'lr': 0.0004734676922124111, 'samples': 4661248, 'steps': 9103, 'loss/train': 2.505825996398926} +02/24/2022 13:21:28 - INFO - codeparrot_training - Step 9104: {'lr': 0.00047346035603781597, 'samples': 4661760, 'steps': 9104, 'loss/train': 1.9600998163223267} +02/24/2022 13:21:34 - INFO - codeparrot_training - Step 9105: {'lr': 0.0004734530189059876, 'samples': 4662272, 'steps': 9105, 'loss/train': 2.631686210632324} +02/24/2022 13:21:37 - INFO - codeparrot_training - Step 9106: {'lr': 0.0004734456808169575, 'samples': 4662784, 'steps': 9106, 'loss/train': 1.004799485206604} +02/24/2022 13:21:43 - INFO - codeparrot_training - Step 9107: {'lr': 0.00047343834177075695, 'samples': 4663296, 'steps': 9107, 'loss/train': 2.3275928497314453} +02/24/2022 13:21:46 - INFO - codeparrot_training - Step 9108: {'lr': 0.0004734310017674176, 'samples': 4663808, 'steps': 9108, 'loss/train': 1.5682642459869385} +02/24/2022 13:21:52 - INFO - codeparrot_training - Step 9109: {'lr': 0.00047342366080697077, 'samples': 4664320, 'steps': 9109, 'loss/train': 2.061352014541626} +02/24/2022 13:21:55 - INFO - codeparrot_training - Step 9110: {'lr': 0.00047341631888944794, 'samples': 4664832, 'steps': 9110, 'loss/train': 1.8658485412597656} +02/24/2022 13:22:01 - INFO - codeparrot_training - Step 9111: {'lr': 0.0004734089760148805, 'samples': 4665344, 'steps': 9111, 'loss/train': 2.7326509952545166} +02/24/2022 13:22:04 - INFO - codeparrot_training - Step 9112: {'lr': 0.0004734016321832999, 'samples': 4665856, 'steps': 9112, 'loss/train': 2.248201608657837} +02/24/2022 13:22:10 - INFO - codeparrot_training - Step 9113: {'lr': 0.0004733942873947377, 'samples': 4666368, 'steps': 9113, 'loss/train': 1.2828941345214844} +02/24/2022 13:22:13 - INFO - codeparrot_training - Step 9114: {'lr': 0.00047338694164922535, 'samples': 4666880, 'steps': 9114, 'loss/train': 2.3066506385803223} +02/24/2022 13:22:19 - INFO - codeparrot_training - Step 9115: {'lr': 0.0004733795949467942, 'samples': 4667392, 'steps': 9115, 'loss/train': 2.111845016479492} +02/24/2022 13:22:23 - INFO - codeparrot_training - Step 9116: {'lr': 0.0004733722472874759, 'samples': 4667904, 'steps': 9116, 'loss/train': 2.4675910472869873} +02/24/2022 13:22:28 - INFO - codeparrot_training - Step 9117: {'lr': 0.0004733648986713017, 'samples': 4668416, 'steps': 9117, 'loss/train': 2.8457374572753906} +02/24/2022 13:22:32 - INFO - codeparrot_training - Step 9118: {'lr': 0.00047335754909830327, 'samples': 4668928, 'steps': 9118, 'loss/train': 1.828161358833313} +02/24/2022 13:22:37 - INFO - codeparrot_training - Step 9119: {'lr': 0.00047335019856851204, 'samples': 4669440, 'steps': 9119, 'loss/train': 1.8511936664581299} +02/24/2022 13:22:41 - INFO - codeparrot_training - Step 9120: {'lr': 0.0004733428470819594, 'samples': 4669952, 'steps': 9120, 'loss/train': 4.304659366607666} +02/24/2022 13:22:46 - INFO - codeparrot_training - Step 9121: {'lr': 0.000473335494638677, 'samples': 4670464, 'steps': 9121, 'loss/train': 1.58551025390625} +02/24/2022 13:22:50 - INFO - codeparrot_training - Step 9122: {'lr': 0.00047332814123869616, 'samples': 4670976, 'steps': 9122, 'loss/train': 1.7067691087722778} +02/24/2022 13:22:55 - INFO - codeparrot_training - Step 9123: {'lr': 0.0004733207868820486, 'samples': 4671488, 'steps': 9123, 'loss/train': 2.051353931427002} +02/24/2022 13:22:59 - INFO - codeparrot_training - Step 9124: {'lr': 0.0004733134315687656, 'samples': 4672000, 'steps': 9124, 'loss/train': 1.982812762260437} +02/24/2022 13:23:05 - INFO - codeparrot_training - Step 9125: {'lr': 0.00047330607529887884, 'samples': 4672512, 'steps': 9125, 'loss/train': 1.914789080619812} +02/24/2022 13:23:08 - INFO - codeparrot_training - Step 9126: {'lr': 0.00047329871807241976, 'samples': 4673024, 'steps': 9126, 'loss/train': 1.613128423690796} +02/24/2022 13:23:14 - INFO - codeparrot_training - Step 9127: {'lr': 0.00047329135988941984, 'samples': 4673536, 'steps': 9127, 'loss/train': 1.704093098640442} +02/24/2022 13:23:17 - INFO - codeparrot_training - Step 9128: {'lr': 0.00047328400074991064, 'samples': 4674048, 'steps': 9128, 'loss/train': 2.713627576828003} +02/24/2022 13:23:23 - INFO - codeparrot_training - Step 9129: {'lr': 0.00047327664065392375, 'samples': 4674560, 'steps': 9129, 'loss/train': 1.7202262878417969} +02/24/2022 13:23:26 - INFO - codeparrot_training - Step 9130: {'lr': 0.0004732692796014905, 'samples': 4675072, 'steps': 9130, 'loss/train': 1.0255662202835083} +02/24/2022 13:23:32 - INFO - codeparrot_training - Step 9131: {'lr': 0.00047326191759264265, 'samples': 4675584, 'steps': 9131, 'loss/train': 2.1260640621185303} +02/24/2022 13:23:35 - INFO - codeparrot_training - Step 9132: {'lr': 0.00047325455462741164, 'samples': 4676096, 'steps': 9132, 'loss/train': 1.1545182466506958} +02/24/2022 13:23:41 - INFO - codeparrot_training - Step 9133: {'lr': 0.00047324719070582894, 'samples': 4676608, 'steps': 9133, 'loss/train': 2.8290140628814697} +02/24/2022 13:23:44 - INFO - codeparrot_training - Step 9134: {'lr': 0.00047323982582792625, 'samples': 4677120, 'steps': 9134, 'loss/train': 1.455802083015442} +02/24/2022 13:23:50 - INFO - codeparrot_training - Step 9135: {'lr': 0.00047323245999373497, 'samples': 4677632, 'steps': 9135, 'loss/train': 2.5370726585388184} +02/24/2022 13:23:54 - INFO - codeparrot_training - Step 9136: {'lr': 0.0004732250932032867, 'samples': 4678144, 'steps': 9136, 'loss/train': 2.597081184387207} +02/24/2022 13:23:59 - INFO - codeparrot_training - Step 9137: {'lr': 0.0004732177254566131, 'samples': 4678656, 'steps': 9137, 'loss/train': 1.2114132642745972} +02/24/2022 13:24:02 - INFO - codeparrot_training - Step 9138: {'lr': 0.0004732103567537456, 'samples': 4679168, 'steps': 9138, 'loss/train': 2.1426267623901367} +02/24/2022 13:24:10 - INFO - codeparrot_training - Step 9139: {'lr': 0.00047320298709471574, 'samples': 4679680, 'steps': 9139, 'loss/train': 1.139854907989502} +02/24/2022 13:24:13 - INFO - codeparrot_training - Step 9140: {'lr': 0.0004731956164795552, 'samples': 4680192, 'steps': 9140, 'loss/train': 2.542071580886841} +02/24/2022 13:24:19 - INFO - codeparrot_training - Step 9141: {'lr': 0.0004731882449082956, 'samples': 4680704, 'steps': 9141, 'loss/train': 2.5392870903015137} +02/24/2022 13:24:22 - INFO - codeparrot_training - Step 9142: {'lr': 0.0004731808723809683, 'samples': 4681216, 'steps': 9142, 'loss/train': 1.6529693603515625} +02/24/2022 13:24:28 - INFO - codeparrot_training - Step 9143: {'lr': 0.0004731734988976051, 'samples': 4681728, 'steps': 9143, 'loss/train': 2.2855355739593506} +02/24/2022 13:24:32 - INFO - codeparrot_training - Step 9144: {'lr': 0.00047316612445823746, 'samples': 4682240, 'steps': 9144, 'loss/train': 2.11826491355896} +02/24/2022 13:24:37 - INFO - codeparrot_training - Step 9145: {'lr': 0.000473158749062897, 'samples': 4682752, 'steps': 9145, 'loss/train': 2.2974278926849365} +02/24/2022 13:24:40 - INFO - codeparrot_training - Step 9146: {'lr': 0.00047315137271161537, 'samples': 4683264, 'steps': 9146, 'loss/train': 2.2753851413726807} +02/24/2022 13:24:46 - INFO - codeparrot_training - Step 9147: {'lr': 0.00047314399540442407, 'samples': 4683776, 'steps': 9147, 'loss/train': 0.22249168157577515} +02/24/2022 13:24:50 - INFO - codeparrot_training - Step 9148: {'lr': 0.00047313661714135476, 'samples': 4684288, 'steps': 9148, 'loss/train': 2.55893874168396} +02/24/2022 13:24:56 - INFO - codeparrot_training - Step 9149: {'lr': 0.000473129237922439, 'samples': 4684800, 'steps': 9149, 'loss/train': 1.9013292789459229} +02/24/2022 13:25:00 - INFO - codeparrot_training - Step 9150: {'lr': 0.0004731218577477085, 'samples': 4685312, 'steps': 9150, 'loss/train': 2.48695969581604} +02/24/2022 13:25:05 - INFO - codeparrot_training - Step 9151: {'lr': 0.0004731144766171948, 'samples': 4685824, 'steps': 9151, 'loss/train': 1.3969038724899292} +02/24/2022 13:25:09 - INFO - codeparrot_training - Step 9152: {'lr': 0.0004731070945309295, 'samples': 4686336, 'steps': 9152, 'loss/train': 3.5837178230285645} +02/24/2022 13:25:14 - INFO - codeparrot_training - Step 9153: {'lr': 0.00047309971148894425, 'samples': 4686848, 'steps': 9153, 'loss/train': 2.50506854057312} +02/24/2022 13:25:18 - INFO - codeparrot_training - Step 9154: {'lr': 0.00047309232749127074, 'samples': 4687360, 'steps': 9154, 'loss/train': 0.8776212334632874} +02/24/2022 13:25:23 - INFO - codeparrot_training - Step 9155: {'lr': 0.0004730849425379404, 'samples': 4687872, 'steps': 9155, 'loss/train': 2.5917952060699463} +02/24/2022 13:25:27 - INFO - codeparrot_training - Step 9156: {'lr': 0.0004730775566289851, 'samples': 4688384, 'steps': 9156, 'loss/train': 2.4982125759124756} +02/24/2022 13:25:32 - INFO - codeparrot_training - Step 9157: {'lr': 0.0004730701697644364, 'samples': 4688896, 'steps': 9157, 'loss/train': 2.920562982559204} +02/24/2022 13:25:36 - INFO - codeparrot_training - Step 9158: {'lr': 0.00047306278194432597, 'samples': 4689408, 'steps': 9158, 'loss/train': 1.4460726976394653} +02/24/2022 13:25:43 - INFO - codeparrot_training - Step 9159: {'lr': 0.0004730553931686853, 'samples': 4689920, 'steps': 9159, 'loss/train': 0.9634991884231567} +02/24/2022 13:25:46 - INFO - codeparrot_training - Step 9160: {'lr': 0.00047304800343754615, 'samples': 4690432, 'steps': 9160, 'loss/train': 2.0099990367889404} +02/24/2022 13:25:52 - INFO - codeparrot_training - Step 9161: {'lr': 0.00047304061275094025, 'samples': 4690944, 'steps': 9161, 'loss/train': 2.5363290309906006} +02/24/2022 13:25:55 - INFO - codeparrot_training - Step 9162: {'lr': 0.0004730332211088992, 'samples': 4691456, 'steps': 9162, 'loss/train': 0.39027178287506104} +02/24/2022 13:26:01 - INFO - codeparrot_training - Step 9163: {'lr': 0.0004730258285114546, 'samples': 4691968, 'steps': 9163, 'loss/train': 2.634152412414551} +02/24/2022 13:26:04 - INFO - codeparrot_training - Step 9164: {'lr': 0.0004730184349586382, 'samples': 4692480, 'steps': 9164, 'loss/train': 2.871751070022583} +02/24/2022 13:26:10 - INFO - codeparrot_training - Step 9165: {'lr': 0.0004730110404504816, 'samples': 4692992, 'steps': 9165, 'loss/train': 1.150871753692627} +02/24/2022 13:26:13 - INFO - codeparrot_training - Step 9166: {'lr': 0.00047300364498701654, 'samples': 4693504, 'steps': 9166, 'loss/train': 1.7111626863479614} +02/24/2022 13:26:19 - INFO - codeparrot_training - Step 9167: {'lr': 0.00047299624856827474, 'samples': 4694016, 'steps': 9167, 'loss/train': 0.24416518211364746} +02/24/2022 13:26:22 - INFO - codeparrot_training - Step 9168: {'lr': 0.0004729888511942877, 'samples': 4694528, 'steps': 9168, 'loss/train': 1.2803181409835815} +02/24/2022 13:26:29 - INFO - codeparrot_training - Step 9169: {'lr': 0.0004729814528650873, 'samples': 4695040, 'steps': 9169, 'loss/train': 2.793639898300171} +02/24/2022 13:26:32 - INFO - codeparrot_training - Step 9170: {'lr': 0.00047297405358070517, 'samples': 4695552, 'steps': 9170, 'loss/train': 2.7124135494232178} +02/24/2022 13:26:38 - INFO - codeparrot_training - Step 9171: {'lr': 0.00047296665334117295, 'samples': 4696064, 'steps': 9171, 'loss/train': 1.1330863237380981} +02/24/2022 13:26:41 - INFO - codeparrot_training - Step 9172: {'lr': 0.0004729592521465224, 'samples': 4696576, 'steps': 9172, 'loss/train': 0.8860397338867188} +02/24/2022 13:26:47 - INFO - codeparrot_training - Step 9173: {'lr': 0.00047295184999678524, 'samples': 4697088, 'steps': 9173, 'loss/train': 2.290264844894409} +02/24/2022 13:26:52 - INFO - codeparrot_training - Step 9174: {'lr': 0.00047294444689199313, 'samples': 4697600, 'steps': 9174, 'loss/train': 1.532676100730896} +02/24/2022 13:26:56 - INFO - codeparrot_training - Step 9175: {'lr': 0.0004729370428321778, 'samples': 4698112, 'steps': 9175, 'loss/train': 2.995622396469116} +02/24/2022 13:27:01 - INFO - codeparrot_training - Step 9176: {'lr': 0.000472929637817371, 'samples': 4698624, 'steps': 9176, 'loss/train': 2.861117362976074} +02/24/2022 13:27:05 - INFO - codeparrot_training - Step 9177: {'lr': 0.0004729222318476044, 'samples': 4699136, 'steps': 9177, 'loss/train': 2.28058123588562} +02/24/2022 13:27:10 - INFO - codeparrot_training - Step 9178: {'lr': 0.0004729148249229097, 'samples': 4699648, 'steps': 9178, 'loss/train': 1.9214019775390625} +02/24/2022 13:27:13 - INFO - codeparrot_training - Step 9179: {'lr': 0.0004729074170433187, 'samples': 4700160, 'steps': 9179, 'loss/train': 1.9854960441589355} +02/24/2022 13:27:19 - INFO - codeparrot_training - Step 9180: {'lr': 0.0004729000082088631, 'samples': 4700672, 'steps': 9180, 'loss/train': 1.4668793678283691} +02/24/2022 13:27:22 - INFO - codeparrot_training - Step 9181: {'lr': 0.0004728925984195748, 'samples': 4701184, 'steps': 9181, 'loss/train': 1.8076584339141846} +02/24/2022 13:27:28 - INFO - codeparrot_training - Step 9182: {'lr': 0.00047288518767548516, 'samples': 4701696, 'steps': 9182, 'loss/train': 2.297683000564575} +02/24/2022 13:27:31 - INFO - codeparrot_training - Step 9183: {'lr': 0.0004728777759766263, 'samples': 4702208, 'steps': 9183, 'loss/train': 1.3928289413452148} +02/24/2022 13:27:38 - INFO - codeparrot_training - Step 9184: {'lr': 0.00047287036332302967, 'samples': 4702720, 'steps': 9184, 'loss/train': 3.4282398223876953} +02/24/2022 13:27:42 - INFO - codeparrot_training - Step 9185: {'lr': 0.0004728629497147273, 'samples': 4703232, 'steps': 9185, 'loss/train': 0.30095282196998596} +02/24/2022 13:27:47 - INFO - codeparrot_training - Step 9186: {'lr': 0.00047285553515175077, 'samples': 4703744, 'steps': 9186, 'loss/train': 2.145885705947876} +02/24/2022 13:27:51 - INFO - codeparrot_training - Step 9187: {'lr': 0.0004728481196341319, 'samples': 4704256, 'steps': 9187, 'loss/train': 0.6703117489814758} +02/24/2022 13:27:56 - INFO - codeparrot_training - Step 9188: {'lr': 0.0004728407031619025, 'samples': 4704768, 'steps': 9188, 'loss/train': 1.5894571542739868} +02/24/2022 13:28:00 - INFO - codeparrot_training - Step 9189: {'lr': 0.0004728332857350942, 'samples': 4705280, 'steps': 9189, 'loss/train': 2.1085352897644043} +02/24/2022 13:28:05 - INFO - codeparrot_training - Step 9190: {'lr': 0.00047282586735373887, 'samples': 4705792, 'steps': 9190, 'loss/train': 2.5246129035949707} +02/24/2022 13:28:09 - INFO - codeparrot_training - Step 9191: {'lr': 0.0004728184480178683, 'samples': 4706304, 'steps': 9191, 'loss/train': 2.775531530380249} +02/24/2022 13:28:14 - INFO - codeparrot_training - Step 9192: {'lr': 0.00047281102772751425, 'samples': 4706816, 'steps': 9192, 'loss/train': 1.6418699026107788} +02/24/2022 13:28:18 - INFO - codeparrot_training - Step 9193: {'lr': 0.0004728036064827086, 'samples': 4707328, 'steps': 9193, 'loss/train': 2.6444785594940186} +02/24/2022 13:28:25 - INFO - codeparrot_training - Step 9194: {'lr': 0.00047279618428348294, 'samples': 4707840, 'steps': 9194, 'loss/train': 1.7925957441329956} +02/24/2022 13:28:28 - INFO - codeparrot_training - Step 9195: {'lr': 0.00047278876112986923, 'samples': 4708352, 'steps': 9195, 'loss/train': 1.8629181385040283} +02/24/2022 13:28:34 - INFO - codeparrot_training - Step 9196: {'lr': 0.0004727813370218992, 'samples': 4708864, 'steps': 9196, 'loss/train': 2.582183599472046} +02/24/2022 13:28:37 - INFO - codeparrot_training - Step 9197: {'lr': 0.00047277391195960463, 'samples': 4709376, 'steps': 9197, 'loss/train': 4.056845188140869} +02/24/2022 13:28:43 - INFO - codeparrot_training - Step 9198: {'lr': 0.00047276648594301733, 'samples': 4709888, 'steps': 9198, 'loss/train': 2.2332284450531006} +02/24/2022 13:28:46 - INFO - codeparrot_training - Step 9199: {'lr': 0.0004727590589721692, 'samples': 4710400, 'steps': 9199, 'loss/train': 2.9682910442352295} +02/24/2022 13:28:52 - INFO - codeparrot_training - Step 9200: {'lr': 0.00047275163104709196, 'samples': 4710912, 'steps': 9200, 'loss/train': 2.4550609588623047} +02/24/2022 13:28:55 - INFO - codeparrot_training - Step 9201: {'lr': 0.0004727442021678175, 'samples': 4711424, 'steps': 9201, 'loss/train': 2.3716583251953125} +02/24/2022 13:29:01 - INFO - codeparrot_training - Step 9202: {'lr': 0.0004727367723343776, 'samples': 4711936, 'steps': 9202, 'loss/train': 1.4867007732391357} +02/24/2022 13:29:04 - INFO - codeparrot_training - Step 9203: {'lr': 0.0004727293415468041, 'samples': 4712448, 'steps': 9203, 'loss/train': 1.2493293285369873} +02/24/2022 13:29:11 - INFO - codeparrot_training - Step 9204: {'lr': 0.00047272190980512875, 'samples': 4712960, 'steps': 9204, 'loss/train': 2.5243260860443115} +02/24/2022 13:29:14 - INFO - codeparrot_training - Step 9205: {'lr': 0.0004727144771093835, 'samples': 4713472, 'steps': 9205, 'loss/train': 2.4383633136749268} +02/24/2022 13:29:20 - INFO - codeparrot_training - Step 9206: {'lr': 0.00047270704345960023, 'samples': 4713984, 'steps': 9206, 'loss/train': 1.977097511291504} +02/24/2022 13:29:23 - INFO - codeparrot_training - Step 9207: {'lr': 0.00047269960885581064, 'samples': 4714496, 'steps': 9207, 'loss/train': 2.731149196624756} +02/24/2022 13:29:29 - INFO - codeparrot_training - Step 9208: {'lr': 0.00047269217329804663, 'samples': 4715008, 'steps': 9208, 'loss/train': 2.0739552974700928} +02/24/2022 13:29:32 - INFO - codeparrot_training - Step 9209: {'lr': 0.00047268473678634007, 'samples': 4715520, 'steps': 9209, 'loss/train': 1.8604692220687866} +02/24/2022 13:29:38 - INFO - codeparrot_training - Step 9210: {'lr': 0.00047267729932072284, 'samples': 4716032, 'steps': 9210, 'loss/train': 2.654740571975708} +02/24/2022 13:29:42 - INFO - codeparrot_training - Step 9211: {'lr': 0.00047266986090122677, 'samples': 4716544, 'steps': 9211, 'loss/train': 2.5369644165039062} +02/24/2022 13:29:47 - INFO - codeparrot_training - Step 9212: {'lr': 0.0004726624215278836, 'samples': 4717056, 'steps': 9212, 'loss/train': 3.6587746143341064} +02/24/2022 13:29:51 - INFO - codeparrot_training - Step 9213: {'lr': 0.00047265498120072546, 'samples': 4717568, 'steps': 9213, 'loss/train': 2.129279136657715} +02/24/2022 13:29:54 - INFO - codeparrot_training - Step 9214: {'lr': 0.00047264753991978404, 'samples': 4718080, 'steps': 9214, 'loss/train': 3.0930986404418945} +02/24/2022 13:30:00 - INFO - codeparrot_training - Step 9215: {'lr': 0.00047264009768509127, 'samples': 4718592, 'steps': 9215, 'loss/train': 1.4839849472045898} +02/24/2022 13:30:03 - INFO - codeparrot_training - Step 9216: {'lr': 0.000472632654496679, 'samples': 4719104, 'steps': 9216, 'loss/train': 2.642756462097168} +02/24/2022 13:30:09 - INFO - codeparrot_training - Step 9217: {'lr': 0.00047262521035457914, 'samples': 4719616, 'steps': 9217, 'loss/train': 1.7101268768310547} +02/24/2022 13:30:15 - INFO - codeparrot_training - Step 9218: {'lr': 0.00047261776525882353, 'samples': 4720128, 'steps': 9218, 'loss/train': 3.559933662414551} +02/24/2022 13:30:18 - INFO - codeparrot_training - Step 9219: {'lr': 0.00047261031920944413, 'samples': 4720640, 'steps': 9219, 'loss/train': 2.0281102657318115} +02/24/2022 13:30:24 - INFO - codeparrot_training - Step 9220: {'lr': 0.0004726028722064728, 'samples': 4721152, 'steps': 9220, 'loss/train': 1.7223827838897705} +02/24/2022 13:30:27 - INFO - codeparrot_training - Step 9221: {'lr': 0.0004725954242499415, 'samples': 4721664, 'steps': 9221, 'loss/train': 3.576780080795288} +02/24/2022 13:30:33 - INFO - codeparrot_training - Step 9222: {'lr': 0.00047258797533988205, 'samples': 4722176, 'steps': 9222, 'loss/train': 1.093144178390503} +02/24/2022 13:30:37 - INFO - codeparrot_training - Step 9223: {'lr': 0.00047258052547632636, 'samples': 4722688, 'steps': 9223, 'loss/train': 2.557705879211426} +02/24/2022 13:30:42 - INFO - codeparrot_training - Step 9224: {'lr': 0.0004725730746593064, 'samples': 4723200, 'steps': 9224, 'loss/train': 1.4148427248001099} +02/24/2022 13:30:45 - INFO - codeparrot_training - Step 9225: {'lr': 0.0004725656228888541, 'samples': 4723712, 'steps': 9225, 'loss/train': 1.9137229919433594} +02/24/2022 13:30:51 - INFO - codeparrot_training - Step 9226: {'lr': 0.0004725581701650014, 'samples': 4724224, 'steps': 9226, 'loss/train': 1.8964622020721436} +02/24/2022 13:30:55 - INFO - codeparrot_training - Step 9227: {'lr': 0.00047255071648778004, 'samples': 4724736, 'steps': 9227, 'loss/train': 1.9602665901184082} +02/24/2022 13:31:00 - INFO - codeparrot_training - Step 9228: {'lr': 0.00047254326185722207, 'samples': 4725248, 'steps': 9228, 'loss/train': 2.1882286071777344} +02/24/2022 13:31:04 - INFO - codeparrot_training - Step 9229: {'lr': 0.00047253580627335944, 'samples': 4725760, 'steps': 9229, 'loss/train': 2.884420394897461} +02/24/2022 13:31:10 - INFO - codeparrot_training - Step 9230: {'lr': 0.00047252834973622414, 'samples': 4726272, 'steps': 9230, 'loss/train': 3.4539942741394043} +02/24/2022 13:31:13 - INFO - codeparrot_training - Step 9231: {'lr': 0.00047252089224584804, 'samples': 4726784, 'steps': 9231, 'loss/train': 2.046510934829712} +02/24/2022 13:31:19 - INFO - codeparrot_training - Step 9232: {'lr': 0.0004725134338022631, 'samples': 4727296, 'steps': 9232, 'loss/train': 2.272949695587158} +02/24/2022 13:31:22 - INFO - codeparrot_training - Step 9233: {'lr': 0.00047250597440550124, 'samples': 4727808, 'steps': 9233, 'loss/train': 2.060175657272339} +02/24/2022 13:31:28 - INFO - codeparrot_training - Step 9234: {'lr': 0.0004724985140555945, 'samples': 4728320, 'steps': 9234, 'loss/train': 1.7731454372406006} +02/24/2022 13:31:31 - INFO - codeparrot_training - Step 9235: {'lr': 0.0004724910527525748, 'samples': 4728832, 'steps': 9235, 'loss/train': 2.637456178665161} +02/24/2022 13:31:37 - INFO - codeparrot_training - Step 9236: {'lr': 0.0004724835904964739, 'samples': 4729344, 'steps': 9236, 'loss/train': 2.847226858139038} +02/24/2022 13:31:40 - INFO - codeparrot_training - Step 9237: {'lr': 0.00047247612728732407, 'samples': 4729856, 'steps': 9237, 'loss/train': 2.606318235397339} +02/24/2022 13:31:46 - INFO - codeparrot_training - Step 9238: {'lr': 0.0004724686631251572, 'samples': 4730368, 'steps': 9238, 'loss/train': 1.946047306060791} +02/24/2022 13:31:49 - INFO - codeparrot_training - Step 9239: {'lr': 0.00047246119801000507, 'samples': 4730880, 'steps': 9239, 'loss/train': 3.4506382942199707} +02/24/2022 13:31:56 - INFO - codeparrot_training - Step 9240: {'lr': 0.00047245373194189995, 'samples': 4731392, 'steps': 9240, 'loss/train': 1.6544857025146484} +02/24/2022 13:31:59 - INFO - codeparrot_training - Step 9241: {'lr': 0.0004724462649208736, 'samples': 4731904, 'steps': 9241, 'loss/train': 0.7858849763870239} +02/24/2022 13:32:04 - INFO - codeparrot_training - Step 9242: {'lr': 0.0004724387969469581, 'samples': 4732416, 'steps': 9242, 'loss/train': 2.2347378730773926} +02/24/2022 13:32:08 - INFO - codeparrot_training - Step 9243: {'lr': 0.00047243132802018544, 'samples': 4732928, 'steps': 9243, 'loss/train': 1.0245441198349} +02/24/2022 13:32:14 - INFO - codeparrot_training - Step 9244: {'lr': 0.00047242385814058764, 'samples': 4733440, 'steps': 9244, 'loss/train': 2.605638027191162} +02/24/2022 13:32:17 - INFO - codeparrot_training - Step 9245: {'lr': 0.0004724163873081966, 'samples': 4733952, 'steps': 9245, 'loss/train': 2.2589056491851807} +02/24/2022 13:32:23 - INFO - codeparrot_training - Step 9246: {'lr': 0.00047240891552304443, 'samples': 4734464, 'steps': 9246, 'loss/train': 1.3129860162734985} +02/24/2022 13:32:26 - INFO - codeparrot_training - Step 9247: {'lr': 0.0004724014427851631, 'samples': 4734976, 'steps': 9247, 'loss/train': 1.9890809059143066} +02/24/2022 13:32:32 - INFO - codeparrot_training - Step 9248: {'lr': 0.0004723939690945845, 'samples': 4735488, 'steps': 9248, 'loss/train': 2.149052858352661} +02/24/2022 13:32:35 - INFO - codeparrot_training - Step 9249: {'lr': 0.00047238649445134086, 'samples': 4736000, 'steps': 9249, 'loss/train': 2.1851701736450195} +02/24/2022 13:32:41 - INFO - codeparrot_training - Step 9250: {'lr': 0.00047237901885546405, 'samples': 4736512, 'steps': 9250, 'loss/train': 2.6466867923736572} +02/24/2022 13:32:44 - INFO - codeparrot_training - Step 9251: {'lr': 0.00047237154230698607, 'samples': 4737024, 'steps': 9251, 'loss/train': 1.60611891746521} +02/24/2022 13:32:50 - INFO - codeparrot_training - Step 9252: {'lr': 0.0004723640648059391, 'samples': 4737536, 'steps': 9252, 'loss/train': 2.268428325653076} +02/24/2022 13:32:54 - INFO - codeparrot_training - Step 9253: {'lr': 0.0004723565863523551, 'samples': 4738048, 'steps': 9253, 'loss/train': 2.487484931945801} +02/24/2022 13:32:59 - INFO - codeparrot_training - Step 9254: {'lr': 0.0004723491069462661, 'samples': 4738560, 'steps': 9254, 'loss/train': 2.7203564643859863} +02/24/2022 13:33:03 - INFO - codeparrot_training - Step 9255: {'lr': 0.00047234162658770407, 'samples': 4739072, 'steps': 9255, 'loss/train': 2.055199384689331} +02/24/2022 13:33:08 - INFO - codeparrot_training - Step 9256: {'lr': 0.00047233414527670113, 'samples': 4739584, 'steps': 9256, 'loss/train': 2.022843599319458} +02/24/2022 13:33:12 - INFO - codeparrot_training - Step 9257: {'lr': 0.0004723266630132893, 'samples': 4740096, 'steps': 9257, 'loss/train': 0.8023931980133057} +02/24/2022 13:33:17 - INFO - codeparrot_training - Step 9258: {'lr': 0.0004723191797975007, 'samples': 4740608, 'steps': 9258, 'loss/train': 2.3807342052459717} +02/24/2022 13:33:21 - INFO - codeparrot_training - Step 9259: {'lr': 0.00047231169562936726, 'samples': 4741120, 'steps': 9259, 'loss/train': 0.8737770915031433} +02/24/2022 13:33:26 - INFO - codeparrot_training - Step 9260: {'lr': 0.00047230421050892116, 'samples': 4741632, 'steps': 9260, 'loss/train': 3.2700276374816895} +02/24/2022 13:33:30 - INFO - codeparrot_training - Step 9261: {'lr': 0.00047229672443619433, 'samples': 4742144, 'steps': 9261, 'loss/train': 2.2991623878479004} +02/24/2022 13:33:36 - INFO - codeparrot_training - Step 9262: {'lr': 0.00047228923741121897, 'samples': 4742656, 'steps': 9262, 'loss/train': 2.6805050373077393} +02/24/2022 13:33:40 - INFO - codeparrot_training - Step 9263: {'lr': 0.0004722817494340271, 'samples': 4743168, 'steps': 9263, 'loss/train': 2.879227638244629} +02/24/2022 13:33:45 - INFO - codeparrot_training - Step 9264: {'lr': 0.00047227426050465085, 'samples': 4743680, 'steps': 9264, 'loss/train': 1.4752908945083618} +02/24/2022 13:33:49 - INFO - codeparrot_training - Step 9265: {'lr': 0.00047226677062312217, 'samples': 4744192, 'steps': 9265, 'loss/train': 2.207026243209839} +02/24/2022 13:33:54 - INFO - codeparrot_training - Step 9266: {'lr': 0.00047225927978947327, 'samples': 4744704, 'steps': 9266, 'loss/train': 1.518178939819336} +02/24/2022 13:33:58 - INFO - codeparrot_training - Step 9267: {'lr': 0.00047225178800373613, 'samples': 4745216, 'steps': 9267, 'loss/train': 1.1507806777954102} +02/24/2022 13:34:03 - INFO - codeparrot_training - Step 9268: {'lr': 0.00047224429526594296, 'samples': 4745728, 'steps': 9268, 'loss/train': 2.770920991897583} +02/24/2022 13:34:07 - INFO - codeparrot_training - Step 9269: {'lr': 0.0004722368015761258, 'samples': 4746240, 'steps': 9269, 'loss/train': 2.097870111465454} +02/24/2022 13:34:12 - INFO - codeparrot_training - Step 9270: {'lr': 0.0004722293069343168, 'samples': 4746752, 'steps': 9270, 'loss/train': 1.8893096446990967} +02/24/2022 13:34:16 - INFO - codeparrot_training - Step 9271: {'lr': 0.00047222181134054785, 'samples': 4747264, 'steps': 9271, 'loss/train': 2.4055800437927246} +02/24/2022 13:34:21 - INFO - codeparrot_training - Step 9272: {'lr': 0.0004722143147948513, 'samples': 4747776, 'steps': 9272, 'loss/train': 2.748605966567993} +02/24/2022 13:34:25 - INFO - codeparrot_training - Step 9273: {'lr': 0.0004722068172972593, 'samples': 4748288, 'steps': 9273, 'loss/train': 2.956089973449707} +02/24/2022 13:34:30 - INFO - codeparrot_training - Step 9274: {'lr': 0.00047219931884780376, 'samples': 4748800, 'steps': 9274, 'loss/train': 2.7256693840026855} +02/24/2022 13:34:34 - INFO - codeparrot_training - Step 9275: {'lr': 0.0004721918194465169, 'samples': 4749312, 'steps': 9275, 'loss/train': 2.108445167541504} +02/24/2022 13:34:39 - INFO - codeparrot_training - Step 9276: {'lr': 0.00047218431909343083, 'samples': 4749824, 'steps': 9276, 'loss/train': 2.1449239253997803} +02/24/2022 13:34:43 - INFO - codeparrot_training - Step 9277: {'lr': 0.0004721768177885777, 'samples': 4750336, 'steps': 9277, 'loss/train': 2.3894524574279785} +02/24/2022 13:34:49 - INFO - codeparrot_training - Step 9278: {'lr': 0.00047216931553198963, 'samples': 4750848, 'steps': 9278, 'loss/train': 1.8229236602783203} +02/24/2022 13:34:53 - INFO - codeparrot_training - Step 9279: {'lr': 0.0004721618123236987, 'samples': 4751360, 'steps': 9279, 'loss/train': 2.724698305130005} +02/24/2022 13:34:58 - INFO - codeparrot_training - Step 9280: {'lr': 0.0004721543081637372, 'samples': 4751872, 'steps': 9280, 'loss/train': 2.0684165954589844} +02/24/2022 13:35:02 - INFO - codeparrot_training - Step 9281: {'lr': 0.0004721468030521372, 'samples': 4752384, 'steps': 9281, 'loss/train': 2.5664479732513428} +02/24/2022 13:35:07 - INFO - codeparrot_training - Step 9282: {'lr': 0.0004721392969889308, 'samples': 4752896, 'steps': 9282, 'loss/train': 2.4262731075286865} +02/24/2022 13:35:11 - INFO - codeparrot_training - Step 9283: {'lr': 0.00047213178997415015, 'samples': 4753408, 'steps': 9283, 'loss/train': 2.0224199295043945} +02/24/2022 13:35:16 - INFO - codeparrot_training - Step 9284: {'lr': 0.00047212428200782744, 'samples': 4753920, 'steps': 9284, 'loss/train': 1.835253119468689} +02/24/2022 13:35:20 - INFO - codeparrot_training - Step 9285: {'lr': 0.0004721167730899949, 'samples': 4754432, 'steps': 9285, 'loss/train': 1.6648372411727905} +02/24/2022 13:35:25 - INFO - codeparrot_training - Step 9286: {'lr': 0.0004721092632206846, 'samples': 4754944, 'steps': 9286, 'loss/train': 2.646677017211914} +02/24/2022 13:35:29 - INFO - codeparrot_training - Step 9287: {'lr': 0.00047210175239992876, 'samples': 4755456, 'steps': 9287, 'loss/train': 2.926750659942627} +02/24/2022 13:35:35 - INFO - codeparrot_training - Step 9288: {'lr': 0.0004720942406277595, 'samples': 4755968, 'steps': 9288, 'loss/train': 1.5843933820724487} +02/24/2022 13:35:39 - INFO - codeparrot_training - Step 9289: {'lr': 0.0004720867279042091, 'samples': 4756480, 'steps': 9289, 'loss/train': 1.838757872581482} +02/24/2022 13:35:44 - INFO - codeparrot_training - Step 9290: {'lr': 0.00047207921422930967, 'samples': 4756992, 'steps': 9290, 'loss/train': 1.2665789127349854} +02/24/2022 13:35:48 - INFO - codeparrot_training - Step 9291: {'lr': 0.00047207169960309335, 'samples': 4757504, 'steps': 9291, 'loss/train': 1.840664267539978} +02/24/2022 13:35:53 - INFO - codeparrot_training - Step 9292: {'lr': 0.00047206418402559236, 'samples': 4758016, 'steps': 9292, 'loss/train': 2.8592965602874756} +02/24/2022 13:35:57 - INFO - codeparrot_training - Step 9293: {'lr': 0.000472056667496839, 'samples': 4758528, 'steps': 9293, 'loss/train': 2.743601083755493} +02/24/2022 13:36:02 - INFO - codeparrot_training - Step 9294: {'lr': 0.0004720491500168654, 'samples': 4759040, 'steps': 9294, 'loss/train': 1.9503521919250488} +02/24/2022 13:36:06 - INFO - codeparrot_training - Step 9295: {'lr': 0.0004720416315857037, 'samples': 4759552, 'steps': 9295, 'loss/train': 2.6600232124328613} +02/24/2022 13:36:11 - INFO - codeparrot_training - Step 9296: {'lr': 0.00047203411220338615, 'samples': 4760064, 'steps': 9296, 'loss/train': 2.670135498046875} +02/24/2022 13:36:17 - INFO - codeparrot_training - Step 9297: {'lr': 0.000472026591869945, 'samples': 4760576, 'steps': 9297, 'loss/train': 2.2512452602386475} +02/24/2022 13:36:21 - INFO - codeparrot_training - Step 9298: {'lr': 0.00047201907058541236, 'samples': 4761088, 'steps': 9298, 'loss/train': 2.2479238510131836} +02/24/2022 13:36:26 - INFO - codeparrot_training - Step 9299: {'lr': 0.0004720115483498206, 'samples': 4761600, 'steps': 9299, 'loss/train': 1.8936635255813599} +02/24/2022 13:36:30 - INFO - codeparrot_training - Step 9300: {'lr': 0.00047200402516320186, 'samples': 4762112, 'steps': 9300, 'loss/train': 1.2354745864868164} +02/24/2022 13:36:35 - INFO - codeparrot_training - Step 9301: {'lr': 0.00047199650102558834, 'samples': 4762624, 'steps': 9301, 'loss/train': 1.5507136583328247} +02/24/2022 13:36:39 - INFO - codeparrot_training - Step 9302: {'lr': 0.0004719889759370123, 'samples': 4763136, 'steps': 9302, 'loss/train': 2.3888351917266846} +02/24/2022 13:36:42 - INFO - codeparrot_training - Step 9303: {'lr': 0.00047198144989750603, 'samples': 4763648, 'steps': 9303, 'loss/train': 1.7949309349060059} +02/24/2022 13:36:48 - INFO - codeparrot_training - Step 9304: {'lr': 0.00047197392290710164, 'samples': 4764160, 'steps': 9304, 'loss/train': 2.4654555320739746} +02/24/2022 13:36:51 - INFO - codeparrot_training - Step 9305: {'lr': 0.0004719663949658315, 'samples': 4764672, 'steps': 9305, 'loss/train': 1.472735047340393} +02/24/2022 13:36:57 - INFO - codeparrot_training - Step 9306: {'lr': 0.00047195886607372773, 'samples': 4765184, 'steps': 9306, 'loss/train': 2.5258872509002686} +02/24/2022 13:37:00 - INFO - codeparrot_training - Step 9307: {'lr': 0.0004719513362308228, 'samples': 4765696, 'steps': 9307, 'loss/train': 1.6016918420791626} +02/24/2022 13:37:07 - INFO - codeparrot_training - Step 9308: {'lr': 0.0004719438054371487, 'samples': 4766208, 'steps': 9308, 'loss/train': 3.078833818435669} +02/24/2022 13:37:11 - INFO - codeparrot_training - Step 9309: {'lr': 0.00047193627369273786, 'samples': 4766720, 'steps': 9309, 'loss/train': 1.6228625774383545} +02/24/2022 13:37:16 - INFO - codeparrot_training - Step 9310: {'lr': 0.00047192874099762246, 'samples': 4767232, 'steps': 9310, 'loss/train': 2.5432825088500977} +02/24/2022 13:37:20 - INFO - codeparrot_training - Step 9311: {'lr': 0.00047192120735183485, 'samples': 4767744, 'steps': 9311, 'loss/train': 1.1503663063049316} +02/24/2022 13:37:25 - INFO - codeparrot_training - Step 9312: {'lr': 0.0004719136727554072, 'samples': 4768256, 'steps': 9312, 'loss/train': 2.543571949005127} +02/24/2022 13:37:28 - INFO - codeparrot_training - Step 9313: {'lr': 0.0004719061372083719, 'samples': 4768768, 'steps': 9313, 'loss/train': 2.7874090671539307} +02/24/2022 13:37:34 - INFO - codeparrot_training - Step 9314: {'lr': 0.00047189860071076114, 'samples': 4769280, 'steps': 9314, 'loss/train': 1.3268343210220337} +02/24/2022 13:37:40 - INFO - codeparrot_training - Step 9315: {'lr': 0.00047189106326260723, 'samples': 4769792, 'steps': 9315, 'loss/train': 3.752823829650879} +02/24/2022 13:37:43 - INFO - codeparrot_training - Step 9316: {'lr': 0.0004718835248639425, 'samples': 4770304, 'steps': 9316, 'loss/train': 2.344468116760254} +02/24/2022 13:37:46 - INFO - codeparrot_training - Step 9317: {'lr': 0.0004718759855147992, 'samples': 4770816, 'steps': 9317, 'loss/train': 2.220879554748535} +02/24/2022 13:37:52 - INFO - codeparrot_training - Step 9318: {'lr': 0.00047186844521520955, 'samples': 4771328, 'steps': 9318, 'loss/train': 2.433905839920044} +02/24/2022 13:37:58 - INFO - codeparrot_training - Step 9319: {'lr': 0.000471860903965206, 'samples': 4771840, 'steps': 9319, 'loss/train': 2.2181622982025146} +02/24/2022 13:38:01 - INFO - codeparrot_training - Step 9320: {'lr': 0.00047185336176482084, 'samples': 4772352, 'steps': 9320, 'loss/train': 1.553859829902649} +02/24/2022 13:38:04 - INFO - codeparrot_training - Step 9321: {'lr': 0.0004718458186140863, 'samples': 4772864, 'steps': 9321, 'loss/train': 1.4725899696350098} +02/24/2022 13:38:11 - INFO - codeparrot_training - Step 9322: {'lr': 0.0004718382745130346, 'samples': 4773376, 'steps': 9322, 'loss/train': 2.2867565155029297} +02/24/2022 13:38:14 - INFO - codeparrot_training - Step 9323: {'lr': 0.0004718307294616983, 'samples': 4773888, 'steps': 9323, 'loss/train': 1.739013433456421} +02/24/2022 13:38:20 - INFO - codeparrot_training - Step 9324: {'lr': 0.00047182318346010953, 'samples': 4774400, 'steps': 9324, 'loss/train': 1.8712568283081055} +02/24/2022 13:38:23 - INFO - codeparrot_training - Step 9325: {'lr': 0.0004718156365083007, 'samples': 4774912, 'steps': 9325, 'loss/train': 3.318185806274414} +02/24/2022 13:38:29 - INFO - codeparrot_training - Step 9326: {'lr': 0.0004718080886063041, 'samples': 4775424, 'steps': 9326, 'loss/train': 1.873559594154358} +02/24/2022 13:38:32 - INFO - codeparrot_training - Step 9327: {'lr': 0.00047180053975415216, 'samples': 4775936, 'steps': 9327, 'loss/train': 3.0185303688049316} +02/24/2022 13:38:38 - INFO - codeparrot_training - Step 9328: {'lr': 0.00047179298995187705, 'samples': 4776448, 'steps': 9328, 'loss/train': 1.2814826965332031} +02/24/2022 13:38:41 - INFO - codeparrot_training - Step 9329: {'lr': 0.00047178543919951124, 'samples': 4776960, 'steps': 9329, 'loss/train': 1.9439716339111328} +02/24/2022 13:38:47 - INFO - codeparrot_training - Step 9330: {'lr': 0.000471777887497087, 'samples': 4777472, 'steps': 9330, 'loss/train': 1.9582996368408203} +02/24/2022 13:38:50 - INFO - codeparrot_training - Step 9331: {'lr': 0.0004717703348446367, 'samples': 4777984, 'steps': 9331, 'loss/train': 1.719980239868164} +02/24/2022 13:38:56 - INFO - codeparrot_training - Step 9332: {'lr': 0.00047176278124219276, 'samples': 4778496, 'steps': 9332, 'loss/train': 2.2232604026794434} +02/24/2022 13:38:59 - INFO - codeparrot_training - Step 9333: {'lr': 0.0004717552266897874, 'samples': 4779008, 'steps': 9333, 'loss/train': 1.6723746061325073} +02/24/2022 13:39:05 - INFO - codeparrot_training - Step 9334: {'lr': 0.0004717476711874532, 'samples': 4779520, 'steps': 9334, 'loss/train': 3.317763328552246} +02/24/2022 13:39:09 - INFO - codeparrot_training - Step 9335: {'lr': 0.00047174011473522225, 'samples': 4780032, 'steps': 9335, 'loss/train': 2.113901376724243} +02/24/2022 13:39:14 - INFO - codeparrot_training - Step 9336: {'lr': 0.0004717325573331271, 'samples': 4780544, 'steps': 9336, 'loss/train': 4.578775405883789} +02/24/2022 13:39:18 - INFO - codeparrot_training - Step 9337: {'lr': 0.00047172499898120014, 'samples': 4781056, 'steps': 9337, 'loss/train': 1.9555407762527466} +02/24/2022 13:39:23 - INFO - codeparrot_training - Step 9338: {'lr': 0.0004717174396794737, 'samples': 4781568, 'steps': 9338, 'loss/train': 1.952053189277649} +02/24/2022 13:39:27 - INFO - codeparrot_training - Step 9339: {'lr': 0.00047170987942798004, 'samples': 4782080, 'steps': 9339, 'loss/train': 1.8725188970565796} +02/24/2022 13:39:32 - INFO - codeparrot_training - Step 9340: {'lr': 0.0004717023182267518, 'samples': 4782592, 'steps': 9340, 'loss/train': 1.8152172565460205} +02/24/2022 13:39:38 - INFO - codeparrot_training - Step 9341: {'lr': 0.00047169475607582113, 'samples': 4783104, 'steps': 9341, 'loss/train': 2.0683112144470215} +02/24/2022 13:39:41 - INFO - codeparrot_training - Step 9342: {'lr': 0.00047168719297522053, 'samples': 4783616, 'steps': 9342, 'loss/train': 1.3040565252304077} +02/24/2022 13:39:47 - INFO - codeparrot_training - Step 9343: {'lr': 0.0004716796289249824, 'samples': 4784128, 'steps': 9343, 'loss/train': 2.081977605819702} +02/24/2022 13:39:51 - INFO - codeparrot_training - Step 9344: {'lr': 0.0004716720639251392, 'samples': 4784640, 'steps': 9344, 'loss/train': 2.5326483249664307} +02/24/2022 13:39:56 - INFO - codeparrot_training - Step 9345: {'lr': 0.00047166449797572316, 'samples': 4785152, 'steps': 9345, 'loss/train': 2.550849676132202} +02/24/2022 13:40:00 - INFO - codeparrot_training - Step 9346: {'lr': 0.0004716569310767668, 'samples': 4785664, 'steps': 9346, 'loss/train': 1.447945237159729} +02/24/2022 13:40:06 - INFO - codeparrot_training - Step 9347: {'lr': 0.00047164936322830256, 'samples': 4786176, 'steps': 9347, 'loss/train': 2.047583818435669} +02/24/2022 13:40:09 - INFO - codeparrot_training - Step 9348: {'lr': 0.0004716417944303628, 'samples': 4786688, 'steps': 9348, 'loss/train': 2.749289035797119} +02/24/2022 13:40:14 - INFO - codeparrot_training - Step 9349: {'lr': 0.00047163422468298003, 'samples': 4787200, 'steps': 9349, 'loss/train': 2.25677227973938} +02/24/2022 13:40:18 - INFO - codeparrot_training - Step 9350: {'lr': 0.00047162665398618666, 'samples': 4787712, 'steps': 9350, 'loss/train': 1.3625988960266113} +02/24/2022 13:40:23 - INFO - codeparrot_training - Step 9351: {'lr': 0.00047161908234001496, 'samples': 4788224, 'steps': 9351, 'loss/train': 2.437257766723633} +02/24/2022 13:40:27 - INFO - codeparrot_training - Step 9352: {'lr': 0.0004716115097444975, 'samples': 4788736, 'steps': 9352, 'loss/train': 2.399075984954834} +02/24/2022 13:40:33 - INFO - codeparrot_training - Step 9353: {'lr': 0.0004716039361996668, 'samples': 4789248, 'steps': 9353, 'loss/train': 2.3500478267669678} +02/24/2022 13:40:37 - INFO - codeparrot_training - Step 9354: {'lr': 0.0004715963617055551, 'samples': 4789760, 'steps': 9354, 'loss/train': 2.6645054817199707} +02/24/2022 13:40:42 - INFO - codeparrot_training - Step 9355: {'lr': 0.00047158878626219505, 'samples': 4790272, 'steps': 9355, 'loss/train': 2.704805850982666} +02/24/2022 13:40:46 - INFO - codeparrot_training - Step 9356: {'lr': 0.00047158120986961897, 'samples': 4790784, 'steps': 9356, 'loss/train': 2.1819796562194824} +02/24/2022 13:40:51 - INFO - codeparrot_training - Step 9357: {'lr': 0.0004715736325278593, 'samples': 4791296, 'steps': 9357, 'loss/train': 1.9590816497802734} +02/24/2022 13:40:55 - INFO - codeparrot_training - Step 9358: {'lr': 0.0004715660542369485, 'samples': 4791808, 'steps': 9358, 'loss/train': 2.725348711013794} +02/24/2022 13:41:00 - INFO - codeparrot_training - Step 9359: {'lr': 0.0004715584749969192, 'samples': 4792320, 'steps': 9359, 'loss/train': 2.174389362335205} +02/24/2022 13:41:04 - INFO - codeparrot_training - Step 9360: {'lr': 0.00047155089480780364, 'samples': 4792832, 'steps': 9360, 'loss/train': 2.9488959312438965} +02/24/2022 13:41:09 - INFO - codeparrot_training - Step 9361: {'lr': 0.0004715433136696345, 'samples': 4793344, 'steps': 9361, 'loss/train': 1.9647334814071655} +02/24/2022 13:41:13 - INFO - codeparrot_training - Step 9362: {'lr': 0.0004715357315824441, 'samples': 4793856, 'steps': 9362, 'loss/train': 1.849334716796875} +02/24/2022 13:41:18 - INFO - codeparrot_training - Step 9363: {'lr': 0.00047152814854626494, 'samples': 4794368, 'steps': 9363, 'loss/train': 2.8237733840942383} +02/24/2022 13:41:22 - INFO - codeparrot_training - Step 9364: {'lr': 0.0004715205645611296, 'samples': 4794880, 'steps': 9364, 'loss/train': 2.231487274169922} +02/24/2022 13:41:27 - INFO - codeparrot_training - Step 9365: {'lr': 0.00047151297962707054, 'samples': 4795392, 'steps': 9365, 'loss/train': 2.5442285537719727} +02/24/2022 13:41:31 - INFO - codeparrot_training - Step 9366: {'lr': 0.00047150539374412004, 'samples': 4795904, 'steps': 9366, 'loss/train': 1.7987245321273804} +02/24/2022 13:41:36 - INFO - codeparrot_training - Step 9367: {'lr': 0.0004714978069123109, 'samples': 4796416, 'steps': 9367, 'loss/train': 1.5481021404266357} +02/24/2022 13:41:40 - INFO - codeparrot_training - Step 9368: {'lr': 0.00047149021913167545, 'samples': 4796928, 'steps': 9368, 'loss/train': 0.6987714767456055} +02/24/2022 13:41:46 - INFO - codeparrot_training - Step 9369: {'lr': 0.00047148263040224626, 'samples': 4797440, 'steps': 9369, 'loss/train': 2.306771755218506} +02/24/2022 13:41:49 - INFO - codeparrot_training - Step 9370: {'lr': 0.00047147504072405575, 'samples': 4797952, 'steps': 9370, 'loss/train': 2.630359649658203} +02/24/2022 13:41:55 - INFO - codeparrot_training - Step 9371: {'lr': 0.0004714674500971366, 'samples': 4798464, 'steps': 9371, 'loss/train': 1.6505128145217896} +02/24/2022 13:41:58 - INFO - codeparrot_training - Step 9372: {'lr': 0.00047145985852152115, 'samples': 4798976, 'steps': 9372, 'loss/train': 2.2431480884552} +02/24/2022 13:42:04 - INFO - codeparrot_training - Step 9373: {'lr': 0.000471452265997242, 'samples': 4799488, 'steps': 9373, 'loss/train': 2.020749092102051} +02/24/2022 13:42:08 - INFO - codeparrot_training - Step 9374: {'lr': 0.00047144467252433164, 'samples': 4800000, 'steps': 9374, 'loss/train': 2.1209564208984375} +02/24/2022 13:42:13 - INFO - codeparrot_training - Step 9375: {'lr': 0.00047143707810282266, 'samples': 4800512, 'steps': 9375, 'loss/train': 0.8257538080215454} +02/24/2022 13:42:16 - INFO - codeparrot_training - Step 9376: {'lr': 0.0004714294827327475, 'samples': 4801024, 'steps': 9376, 'loss/train': 1.7988255023956299} +02/24/2022 13:42:23 - INFO - codeparrot_training - Step 9377: {'lr': 0.00047142188641413873, 'samples': 4801536, 'steps': 9377, 'loss/train': 3.4448342323303223} +02/24/2022 13:42:26 - INFO - codeparrot_training - Step 9378: {'lr': 0.000471414289147029, 'samples': 4802048, 'steps': 9378, 'loss/train': 2.1589090824127197} +02/24/2022 13:42:30 - INFO - codeparrot_training - Step 9379: {'lr': 0.00047140669093145073, 'samples': 4802560, 'steps': 9379, 'loss/train': 2.4292073249816895} +02/24/2022 13:42:36 - INFO - codeparrot_training - Step 9380: {'lr': 0.00047139909176743643, 'samples': 4803072, 'steps': 9380, 'loss/train': 3.0854930877685547} +02/24/2022 13:42:40 - INFO - codeparrot_training - Step 9381: {'lr': 0.0004713914916550188, 'samples': 4803584, 'steps': 9381, 'loss/train': 1.1514456272125244} +02/24/2022 13:42:45 - INFO - codeparrot_training - Step 9382: {'lr': 0.00047138389059423033, 'samples': 4804096, 'steps': 9382, 'loss/train': 2.2677462100982666} +02/24/2022 13:42:49 - INFO - codeparrot_training - Step 9383: {'lr': 0.0004713762885851035, 'samples': 4804608, 'steps': 9383, 'loss/train': 2.2468223571777344} +02/24/2022 13:42:54 - INFO - codeparrot_training - Step 9384: {'lr': 0.000471368685627671, 'samples': 4805120, 'steps': 9384, 'loss/train': 2.3884036540985107} +02/24/2022 13:42:58 - INFO - codeparrot_training - Step 9385: {'lr': 0.00047136108172196535, 'samples': 4805632, 'steps': 9385, 'loss/train': 2.0233447551727295} +02/24/2022 13:43:04 - INFO - codeparrot_training - Step 9386: {'lr': 0.00047135347686801907, 'samples': 4806144, 'steps': 9386, 'loss/train': 2.1385862827301025} +02/24/2022 13:43:07 - INFO - codeparrot_training - Step 9387: {'lr': 0.0004713458710658648, 'samples': 4806656, 'steps': 9387, 'loss/train': 1.24008047580719} +02/24/2022 13:43:13 - INFO - codeparrot_training - Step 9388: {'lr': 0.0004713382643155351, 'samples': 4807168, 'steps': 9388, 'loss/train': 2.990281343460083} +02/24/2022 13:43:16 - INFO - codeparrot_training - Step 9389: {'lr': 0.00047133065661706254, 'samples': 4807680, 'steps': 9389, 'loss/train': 2.3468973636627197} +02/24/2022 13:43:22 - INFO - codeparrot_training - Step 9390: {'lr': 0.00047132304797047975, 'samples': 4808192, 'steps': 9390, 'loss/train': 2.8881564140319824} +02/24/2022 13:43:26 - INFO - codeparrot_training - Step 9391: {'lr': 0.00047131543837581935, 'samples': 4808704, 'steps': 9391, 'loss/train': 1.9530246257781982} +02/24/2022 13:43:31 - INFO - codeparrot_training - Step 9392: {'lr': 0.0004713078278331138, 'samples': 4809216, 'steps': 9392, 'loss/train': 2.6750190258026123} +02/24/2022 13:43:35 - INFO - codeparrot_training - Step 9393: {'lr': 0.00047130021634239584, 'samples': 4809728, 'steps': 9393, 'loss/train': 1.4577772617340088} +02/24/2022 13:43:40 - INFO - codeparrot_training - Step 9394: {'lr': 0.000471292603903698, 'samples': 4810240, 'steps': 9394, 'loss/train': 1.9654873609542847} +02/24/2022 13:43:44 - INFO - codeparrot_training - Step 9395: {'lr': 0.00047128499051705296, 'samples': 4810752, 'steps': 9395, 'loss/train': 3.8678324222564697} +02/24/2022 13:43:49 - INFO - codeparrot_training - Step 9396: {'lr': 0.00047127737618249323, 'samples': 4811264, 'steps': 9396, 'loss/train': 2.4186582565307617} +02/24/2022 13:43:53 - INFO - codeparrot_training - Step 9397: {'lr': 0.00047126976090005153, 'samples': 4811776, 'steps': 9397, 'loss/train': 3.3421342372894287} +02/24/2022 13:43:58 - INFO - codeparrot_training - Step 9398: {'lr': 0.00047126214466976034, 'samples': 4812288, 'steps': 9398, 'loss/train': 2.1782310009002686} +02/24/2022 13:44:02 - INFO - codeparrot_training - Step 9399: {'lr': 0.0004712545274916525, 'samples': 4812800, 'steps': 9399, 'loss/train': 2.024573802947998} +02/24/2022 13:44:08 - INFO - codeparrot_training - Step 9400: {'lr': 0.00047124690936576046, 'samples': 4813312, 'steps': 9400, 'loss/train': 2.6351754665374756} +02/24/2022 13:44:11 - INFO - codeparrot_training - Step 9401: {'lr': 0.000471239290292117, 'samples': 4813824, 'steps': 9401, 'loss/train': 2.5747451782226562} +02/24/2022 13:44:17 - INFO - codeparrot_training - Step 9402: {'lr': 0.00047123167027075455, 'samples': 4814336, 'steps': 9402, 'loss/train': 1.8039214611053467} +02/24/2022 13:44:20 - INFO - codeparrot_training - Step 9403: {'lr': 0.0004712240493017059, 'samples': 4814848, 'steps': 9403, 'loss/train': 2.802110433578491} +02/24/2022 13:44:26 - INFO - codeparrot_training - Step 9404: {'lr': 0.0004712164273850037, 'samples': 4815360, 'steps': 9404, 'loss/train': 2.291334629058838} +02/24/2022 13:44:29 - INFO - codeparrot_training - Step 9405: {'lr': 0.0004712088045206806, 'samples': 4815872, 'steps': 9405, 'loss/train': 2.223386764526367} +02/24/2022 13:44:35 - INFO - codeparrot_training - Step 9406: {'lr': 0.00047120118070876916, 'samples': 4816384, 'steps': 9406, 'loss/train': 2.235356330871582} +02/24/2022 13:44:38 - INFO - codeparrot_training - Step 9407: {'lr': 0.0004711935559493021, 'samples': 4816896, 'steps': 9407, 'loss/train': 1.396200180053711} +02/24/2022 13:44:44 - INFO - codeparrot_training - Step 9408: {'lr': 0.00047118593024231216, 'samples': 4817408, 'steps': 9408, 'loss/train': 2.0703699588775635} +02/24/2022 13:44:47 - INFO - codeparrot_training - Step 9409: {'lr': 0.00047117830358783184, 'samples': 4817920, 'steps': 9409, 'loss/train': 2.2124409675598145} +02/24/2022 13:44:53 - INFO - codeparrot_training - Step 9410: {'lr': 0.0004711706759858939, 'samples': 4818432, 'steps': 9410, 'loss/train': 2.479804039001465} +02/24/2022 13:44:56 - INFO - codeparrot_training - Step 9411: {'lr': 0.0004711630474365311, 'samples': 4818944, 'steps': 9411, 'loss/train': 2.5117905139923096} +02/24/2022 13:45:02 - INFO - codeparrot_training - Step 9412: {'lr': 0.000471155417939776, 'samples': 4819456, 'steps': 9412, 'loss/train': 1.632948875427246} +02/24/2022 13:45:05 - INFO - codeparrot_training - Step 9413: {'lr': 0.00047114778749566123, 'samples': 4819968, 'steps': 9413, 'loss/train': 1.818543553352356} +02/24/2022 13:45:11 - INFO - codeparrot_training - Step 9414: {'lr': 0.00047114015610421966, 'samples': 4820480, 'steps': 9414, 'loss/train': 2.543210744857788} +02/24/2022 13:45:14 - INFO - codeparrot_training - Step 9415: {'lr': 0.00047113252376548387, 'samples': 4820992, 'steps': 9415, 'loss/train': 2.46437931060791} +02/24/2022 13:45:21 - INFO - codeparrot_training - Step 9416: {'lr': 0.00047112489047948655, 'samples': 4821504, 'steps': 9416, 'loss/train': 2.1986892223358154} +02/24/2022 13:45:24 - INFO - codeparrot_training - Step 9417: {'lr': 0.0004711172562462604, 'samples': 4822016, 'steps': 9417, 'loss/train': 1.3839813470840454} +02/24/2022 13:45:30 - INFO - codeparrot_training - Step 9418: {'lr': 0.0004711096210658381, 'samples': 4822528, 'steps': 9418, 'loss/train': 1.9671071767807007} +02/24/2022 13:45:33 - INFO - codeparrot_training - Step 9419: {'lr': 0.0004711019849382525, 'samples': 4823040, 'steps': 9419, 'loss/train': 2.378391742706299} +02/24/2022 13:45:39 - INFO - codeparrot_training - Step 9420: {'lr': 0.0004710943478635361, 'samples': 4823552, 'steps': 9420, 'loss/train': 2.1312999725341797} +02/24/2022 13:45:42 - INFO - codeparrot_training - Step 9421: {'lr': 0.00047108670984172176, 'samples': 4824064, 'steps': 9421, 'loss/train': 0.6496623158454895} +02/24/2022 13:45:48 - INFO - codeparrot_training - Step 9422: {'lr': 0.00047107907087284216, 'samples': 4824576, 'steps': 9422, 'loss/train': 1.5175443887710571} +02/24/2022 13:45:51 - INFO - codeparrot_training - Step 9423: {'lr': 0.00047107143095693007, 'samples': 4825088, 'steps': 9423, 'loss/train': 1.9888180494308472} +02/24/2022 13:45:57 - INFO - codeparrot_training - Step 9424: {'lr': 0.0004710637900940181, 'samples': 4825600, 'steps': 9424, 'loss/train': 1.5517288446426392} +02/24/2022 13:46:00 - INFO - codeparrot_training - Step 9425: {'lr': 0.00047105614828413906, 'samples': 4826112, 'steps': 9425, 'loss/train': 2.545207977294922} +02/24/2022 13:46:07 - INFO - codeparrot_training - Step 9426: {'lr': 0.0004710485055273257, 'samples': 4826624, 'steps': 9426, 'loss/train': 2.6607866287231445} +02/24/2022 13:46:11 - INFO - codeparrot_training - Step 9427: {'lr': 0.00047104086182361073, 'samples': 4827136, 'steps': 9427, 'loss/train': 0.6701631546020508} +02/24/2022 13:46:16 - INFO - codeparrot_training - Step 9428: {'lr': 0.00047103321717302684, 'samples': 4827648, 'steps': 9428, 'loss/train': 1.7967125177383423} +02/24/2022 13:46:20 - INFO - codeparrot_training - Step 9429: {'lr': 0.00047102557157560686, 'samples': 4828160, 'steps': 9429, 'loss/train': 2.142204761505127} +02/24/2022 13:46:25 - INFO - codeparrot_training - Step 9430: {'lr': 0.00047101792503138353, 'samples': 4828672, 'steps': 9430, 'loss/train': 1.7491512298583984} +02/24/2022 13:46:29 - INFO - codeparrot_training - Step 9431: {'lr': 0.0004710102775403896, 'samples': 4829184, 'steps': 9431, 'loss/train': 2.050910472869873} +02/24/2022 13:46:34 - INFO - codeparrot_training - Step 9432: {'lr': 0.00047100262910265787, 'samples': 4829696, 'steps': 9432, 'loss/train': 1.6814892292022705} +02/24/2022 13:46:38 - INFO - codeparrot_training - Step 9433: {'lr': 0.00047099497971822096, 'samples': 4830208, 'steps': 9433, 'loss/train': 1.6766963005065918} +02/24/2022 13:46:43 - INFO - codeparrot_training - Step 9434: {'lr': 0.00047098732938711174, 'samples': 4830720, 'steps': 9434, 'loss/train': 3.0554845333099365} +02/24/2022 13:46:47 - INFO - codeparrot_training - Step 9435: {'lr': 0.00047097967810936305, 'samples': 4831232, 'steps': 9435, 'loss/train': 1.290590763092041} +02/24/2022 13:46:53 - INFO - codeparrot_training - Step 9436: {'lr': 0.00047097202588500747, 'samples': 4831744, 'steps': 9436, 'loss/train': 3.68501615524292} +02/24/2022 13:46:56 - INFO - codeparrot_training - Step 9437: {'lr': 0.000470964372714078, 'samples': 4832256, 'steps': 9437, 'loss/train': 2.2355496883392334} +02/24/2022 13:47:02 - INFO - codeparrot_training - Step 9438: {'lr': 0.00047095671859660726, 'samples': 4832768, 'steps': 9438, 'loss/train': 1.6161638498306274} +02/24/2022 13:47:05 - INFO - codeparrot_training - Step 9439: {'lr': 0.0004709490635326281, 'samples': 4833280, 'steps': 9439, 'loss/train': 1.6555230617523193} +02/24/2022 13:47:11 - INFO - codeparrot_training - Step 9440: {'lr': 0.0004709414075221734, 'samples': 4833792, 'steps': 9440, 'loss/train': 2.132615327835083} +02/24/2022 13:47:14 - INFO - codeparrot_training - Step 9441: {'lr': 0.00047093375056527577, 'samples': 4834304, 'steps': 9441, 'loss/train': 1.8542178869247437} +02/24/2022 13:47:20 - INFO - codeparrot_training - Step 9442: {'lr': 0.0004709260926619682, 'samples': 4834816, 'steps': 9442, 'loss/train': 2.3685977458953857} +02/24/2022 13:47:23 - INFO - codeparrot_training - Step 9443: {'lr': 0.00047091843381228326, 'samples': 4835328, 'steps': 9443, 'loss/train': 2.610386371612549} +02/24/2022 13:47:29 - INFO - codeparrot_training - Step 9444: {'lr': 0.000470910774016254, 'samples': 4835840, 'steps': 9444, 'loss/train': 3.0459628105163574} +02/24/2022 13:47:32 - INFO - codeparrot_training - Step 9445: {'lr': 0.0004709031132739131, 'samples': 4836352, 'steps': 9445, 'loss/train': 2.9118127822875977} +02/24/2022 13:47:38 - INFO - codeparrot_training - Step 9446: {'lr': 0.0004708954515852934, 'samples': 4836864, 'steps': 9446, 'loss/train': 2.26836895942688} +02/24/2022 13:47:42 - INFO - codeparrot_training - Step 9447: {'lr': 0.00047088778895042774, 'samples': 4837376, 'steps': 9447, 'loss/train': 2.414227247238159} +02/24/2022 13:47:48 - INFO - codeparrot_training - Step 9448: {'lr': 0.000470880125369349, 'samples': 4837888, 'steps': 9448, 'loss/train': 2.544114589691162} +02/24/2022 13:47:52 - INFO - codeparrot_training - Step 9449: {'lr': 0.0004708724608420898, 'samples': 4838400, 'steps': 9449, 'loss/train': 2.782829523086548} +02/24/2022 13:47:55 - INFO - codeparrot_training - Step 9450: {'lr': 0.0004708647953686832, 'samples': 4838912, 'steps': 9450, 'loss/train': 2.9846577644348145} +02/24/2022 13:48:01 - INFO - codeparrot_training - Step 9451: {'lr': 0.000470857128949162, 'samples': 4839424, 'steps': 9451, 'loss/train': 1.1818568706512451} +02/24/2022 13:48:04 - INFO - codeparrot_training - Step 9452: {'lr': 0.0004708494615835589, 'samples': 4839936, 'steps': 9452, 'loss/train': 2.4610939025878906} +02/24/2022 13:48:10 - INFO - codeparrot_training - Step 9453: {'lr': 0.0004708417932719068, 'samples': 4840448, 'steps': 9453, 'loss/train': 2.3494925498962402} +02/24/2022 13:48:13 - INFO - codeparrot_training - Step 9454: {'lr': 0.0004708341240142387, 'samples': 4840960, 'steps': 9454, 'loss/train': 2.653748035430908} +02/24/2022 13:48:19 - INFO - codeparrot_training - Step 9455: {'lr': 0.0004708264538105873, 'samples': 4841472, 'steps': 9455, 'loss/train': 2.767214059829712} +02/24/2022 13:48:22 - INFO - codeparrot_training - Step 9456: {'lr': 0.0004708187826609854, 'samples': 4841984, 'steps': 9456, 'loss/train': 2.1033360958099365} +02/24/2022 13:48:28 - INFO - codeparrot_training - Step 9457: {'lr': 0.0004708111105654661, 'samples': 4842496, 'steps': 9457, 'loss/train': 2.3968472480773926} +02/24/2022 13:48:31 - INFO - codeparrot_training - Step 9458: {'lr': 0.000470803437524062, 'samples': 4843008, 'steps': 9458, 'loss/train': 2.016186475753784} +02/24/2022 13:48:37 - INFO - codeparrot_training - Step 9459: {'lr': 0.00047079576353680614, 'samples': 4843520, 'steps': 9459, 'loss/train': 2.1598775386810303} +02/24/2022 13:48:40 - INFO - codeparrot_training - Step 9460: {'lr': 0.0004707880886037314, 'samples': 4844032, 'steps': 9460, 'loss/train': 2.0636279582977295} +02/24/2022 13:48:46 - INFO - codeparrot_training - Step 9461: {'lr': 0.00047078041272487046, 'samples': 4844544, 'steps': 9461, 'loss/train': 2.281644582748413} +02/24/2022 13:48:52 - INFO - codeparrot_training - Step 9462: {'lr': 0.00047077273590025637, 'samples': 4845056, 'steps': 9462, 'loss/train': 1.195705771446228} +02/24/2022 13:48:55 - INFO - codeparrot_training - Step 9463: {'lr': 0.00047076505812992204, 'samples': 4845568, 'steps': 9463, 'loss/train': 2.2360119819641113} +02/24/2022 13:48:59 - INFO - codeparrot_training - Step 9464: {'lr': 0.0004707573794139003, 'samples': 4846080, 'steps': 9464, 'loss/train': 2.893397331237793} +02/24/2022 13:49:05 - INFO - codeparrot_training - Step 9465: {'lr': 0.00047074969975222406, 'samples': 4846592, 'steps': 9465, 'loss/train': 2.2125468254089355} +02/24/2022 13:49:09 - INFO - codeparrot_training - Step 9466: {'lr': 0.0004707420191449261, 'samples': 4847104, 'steps': 9466, 'loss/train': 2.1745247840881348} +02/24/2022 13:49:14 - INFO - codeparrot_training - Step 9467: {'lr': 0.0004707343375920395, 'samples': 4847616, 'steps': 9467, 'loss/train': 2.440685272216797} +02/24/2022 13:49:18 - INFO - codeparrot_training - Step 9468: {'lr': 0.0004707266550935971, 'samples': 4848128, 'steps': 9468, 'loss/train': 2.183720827102661} +02/24/2022 13:49:23 - INFO - codeparrot_training - Step 9469: {'lr': 0.00047071897164963175, 'samples': 4848640, 'steps': 9469, 'loss/train': 0.6161959171295166} +02/24/2022 13:49:29 - INFO - codeparrot_training - Step 9470: {'lr': 0.00047071128726017643, 'samples': 4849152, 'steps': 9470, 'loss/train': 3.4612972736358643} +02/24/2022 13:49:32 - INFO - codeparrot_training - Step 9471: {'lr': 0.0004707036019252641, 'samples': 4849664, 'steps': 9471, 'loss/train': 2.284473419189453} +02/24/2022 13:49:35 - INFO - codeparrot_training - Step 9472: {'lr': 0.00047069591564492753, 'samples': 4850176, 'steps': 9472, 'loss/train': 2.2644498348236084} +02/24/2022 13:49:42 - INFO - codeparrot_training - Step 9473: {'lr': 0.00047068822841919976, 'samples': 4850688, 'steps': 9473, 'loss/train': 1.4850801229476929} +02/24/2022 13:49:46 - INFO - codeparrot_training - Step 9474: {'lr': 0.0004706805402481137, 'samples': 4851200, 'steps': 9474, 'loss/train': 2.9515137672424316} +02/24/2022 13:49:51 - INFO - codeparrot_training - Step 9475: {'lr': 0.00047067285113170233, 'samples': 4851712, 'steps': 9475, 'loss/train': 1.6336843967437744} +02/24/2022 13:49:55 - INFO - codeparrot_training - Step 9476: {'lr': 0.0004706651610699985, 'samples': 4852224, 'steps': 9476, 'loss/train': 2.651027202606201} +02/24/2022 13:50:00 - INFO - codeparrot_training - Step 9477: {'lr': 0.0004706574700630352, 'samples': 4852736, 'steps': 9477, 'loss/train': 2.1046602725982666} +02/24/2022 13:50:06 - INFO - codeparrot_training - Step 9478: {'lr': 0.0004706497781108453, 'samples': 4853248, 'steps': 9478, 'loss/train': 1.5995279550552368} +02/24/2022 13:50:09 - INFO - codeparrot_training - Step 9479: {'lr': 0.00047064208521346184, 'samples': 4853760, 'steps': 9479, 'loss/train': 3.694612979888916} +02/24/2022 13:50:15 - INFO - codeparrot_training - Step 9480: {'lr': 0.0004706343913709178, 'samples': 4854272, 'steps': 9480, 'loss/train': 2.999093532562256} +02/24/2022 13:50:18 - INFO - codeparrot_training - Step 9481: {'lr': 0.0004706266965832461, 'samples': 4854784, 'steps': 9481, 'loss/train': 2.6196208000183105} +02/24/2022 13:50:22 - INFO - codeparrot_training - Step 9482: {'lr': 0.0004706190008504796, 'samples': 4855296, 'steps': 9482, 'loss/train': 2.915841579437256} +02/24/2022 13:50:28 - INFO - codeparrot_training - Step 9483: {'lr': 0.00047061130417265143, 'samples': 4855808, 'steps': 9483, 'loss/train': 2.063542127609253} +02/24/2022 13:50:34 - INFO - codeparrot_training - Step 9484: {'lr': 0.0004706036065497944, 'samples': 4856320, 'steps': 9484, 'loss/train': 2.0744080543518066} +02/24/2022 13:50:37 - INFO - codeparrot_training - Step 9485: {'lr': 0.0004705959079819416, 'samples': 4856832, 'steps': 9485, 'loss/train': 0.7285880446434021} +02/24/2022 13:50:41 - INFO - codeparrot_training - Step 9486: {'lr': 0.0004705882084691261, 'samples': 4857344, 'steps': 9486, 'loss/train': 3.391324520111084} +02/24/2022 13:50:46 - INFO - codeparrot_training - Step 9487: {'lr': 0.00047058050801138064, 'samples': 4857856, 'steps': 9487, 'loss/train': 3.2877824306488037} +02/24/2022 13:50:50 - INFO - codeparrot_training - Step 9488: {'lr': 0.00047057280660873835, 'samples': 4858368, 'steps': 9488, 'loss/train': 2.6793205738067627} +02/24/2022 13:50:55 - INFO - codeparrot_training - Step 9489: {'lr': 0.0004705651042612322, 'samples': 4858880, 'steps': 9489, 'loss/train': 2.9948320388793945} +02/24/2022 13:50:59 - INFO - codeparrot_training - Step 9490: {'lr': 0.00047055740096889516, 'samples': 4859392, 'steps': 9490, 'loss/train': 2.651505470275879} +02/24/2022 13:51:04 - INFO - codeparrot_training - Step 9491: {'lr': 0.0004705496967317603, 'samples': 4859904, 'steps': 9491, 'loss/train': 1.615960717201233} +02/24/2022 13:51:08 - INFO - codeparrot_training - Step 9492: {'lr': 0.0004705419915498605, 'samples': 4860416, 'steps': 9492, 'loss/train': 2.577371835708618} +02/24/2022 13:51:14 - INFO - codeparrot_training - Step 9493: {'lr': 0.0004705342854232288, 'samples': 4860928, 'steps': 9493, 'loss/train': 2.549577474594116} +02/24/2022 13:51:17 - INFO - codeparrot_training - Step 9494: {'lr': 0.00047052657835189836, 'samples': 4861440, 'steps': 9494, 'loss/train': 0.5594264268875122} +02/24/2022 13:51:24 - INFO - codeparrot_training - Step 9495: {'lr': 0.00047051887033590205, 'samples': 4861952, 'steps': 9495, 'loss/train': 3.7060985565185547} +02/24/2022 13:51:27 - INFO - codeparrot_training - Step 9496: {'lr': 0.00047051116137527296, 'samples': 4862464, 'steps': 9496, 'loss/train': 2.390371561050415} +02/24/2022 13:51:33 - INFO - codeparrot_training - Step 9497: {'lr': 0.000470503451470044, 'samples': 4862976, 'steps': 9497, 'loss/train': 2.9564828872680664} +02/24/2022 13:51:36 - INFO - codeparrot_training - Step 9498: {'lr': 0.00047049574062024837, 'samples': 4863488, 'steps': 9498, 'loss/train': 2.4608426094055176} +02/24/2022 13:51:42 - INFO - codeparrot_training - Step 9499: {'lr': 0.0004704880288259189, 'samples': 4864000, 'steps': 9499, 'loss/train': 1.6270740032196045} +02/24/2022 13:51:45 - INFO - codeparrot_training - Step 9500: {'lr': 0.00047048031608708875, 'samples': 4864512, 'steps': 9500, 'loss/train': 0.9599283933639526} +02/24/2022 13:51:51 - INFO - codeparrot_training - Step 9501: {'lr': 0.00047047260240379096, 'samples': 4865024, 'steps': 9501, 'loss/train': 1.2787842750549316} +02/24/2022 13:51:54 - INFO - codeparrot_training - Step 9502: {'lr': 0.00047046488777605853, 'samples': 4865536, 'steps': 9502, 'loss/train': 1.7225559949874878} +02/24/2022 13:52:00 - INFO - codeparrot_training - Step 9503: {'lr': 0.0004704571722039246, 'samples': 4866048, 'steps': 9503, 'loss/train': 8.059077262878418} +02/24/2022 13:52:03 - INFO - codeparrot_training - Step 9504: {'lr': 0.00047044945568742205, 'samples': 4866560, 'steps': 9504, 'loss/train': 2.1754374504089355} +02/24/2022 13:52:09 - INFO - codeparrot_training - Step 9505: {'lr': 0.0004704417382265841, 'samples': 4867072, 'steps': 9505, 'loss/train': 2.2936434745788574} +02/24/2022 13:52:13 - INFO - codeparrot_training - Step 9506: {'lr': 0.0004704340198214437, 'samples': 4867584, 'steps': 9506, 'loss/train': 2.381317615509033} +02/24/2022 13:52:18 - INFO - codeparrot_training - Step 9507: {'lr': 0.00047042630047203394, 'samples': 4868096, 'steps': 9507, 'loss/train': 1.6862726211547852} +02/24/2022 13:52:22 - INFO - codeparrot_training - Step 9508: {'lr': 0.0004704185801783879, 'samples': 4868608, 'steps': 9508, 'loss/train': 1.9814400672912598} +02/24/2022 13:52:28 - INFO - codeparrot_training - Step 9509: {'lr': 0.0004704108589405387, 'samples': 4869120, 'steps': 9509, 'loss/train': 2.798516035079956} +02/24/2022 13:52:31 - INFO - codeparrot_training - Step 9510: {'lr': 0.0004704031367585193, 'samples': 4869632, 'steps': 9510, 'loss/train': 1.804969072341919} +02/24/2022 13:52:37 - INFO - codeparrot_training - Step 9511: {'lr': 0.0004703954136323629, 'samples': 4870144, 'steps': 9511, 'loss/train': 1.420964241027832} +02/24/2022 13:52:40 - INFO - codeparrot_training - Step 9512: {'lr': 0.0004703876895621025, 'samples': 4870656, 'steps': 9512, 'loss/train': 1.7509866952896118} +02/24/2022 13:52:46 - INFO - codeparrot_training - Step 9513: {'lr': 0.00047037996454777134, 'samples': 4871168, 'steps': 9513, 'loss/train': 2.491886854171753} +02/24/2022 13:52:49 - INFO - codeparrot_training - Step 9514: {'lr': 0.00047037223858940224, 'samples': 4871680, 'steps': 9514, 'loss/train': 1.3904900550842285} +02/24/2022 13:52:55 - INFO - codeparrot_training - Step 9515: {'lr': 0.00047036451168702855, 'samples': 4872192, 'steps': 9515, 'loss/train': 2.199233055114746} +02/24/2022 13:52:58 - INFO - codeparrot_training - Step 9516: {'lr': 0.0004703567838406832, 'samples': 4872704, 'steps': 9516, 'loss/train': 1.4684724807739258} +02/24/2022 13:53:04 - INFO - codeparrot_training - Step 9517: {'lr': 0.00047034905505039936, 'samples': 4873216, 'steps': 9517, 'loss/train': 2.099609375} +02/24/2022 13:53:07 - INFO - codeparrot_training - Step 9518: {'lr': 0.0004703413253162102, 'samples': 4873728, 'steps': 9518, 'loss/train': 2.147616386413574} +02/24/2022 13:53:13 - INFO - codeparrot_training - Step 9519: {'lr': 0.00047033359463814875, 'samples': 4874240, 'steps': 9519, 'loss/train': 2.0364086627960205} +02/24/2022 13:53:16 - INFO - codeparrot_training - Step 9520: {'lr': 0.00047032586301624804, 'samples': 4874752, 'steps': 9520, 'loss/train': 1.6985219717025757} +02/24/2022 13:53:22 - INFO - codeparrot_training - Step 9521: {'lr': 0.0004703181304505414, 'samples': 4875264, 'steps': 9521, 'loss/train': 1.8973575830459595} +02/24/2022 13:53:26 - INFO - codeparrot_training - Step 9522: {'lr': 0.0004703103969410618, 'samples': 4875776, 'steps': 9522, 'loss/train': 2.3894176483154297} +02/24/2022 13:53:31 - INFO - codeparrot_training - Step 9523: {'lr': 0.0004703026624878425, 'samples': 4876288, 'steps': 9523, 'loss/train': 2.59407639503479} +02/24/2022 13:53:34 - INFO - codeparrot_training - Step 9524: {'lr': 0.0004702949270909164, 'samples': 4876800, 'steps': 9524, 'loss/train': 3.0909056663513184} +02/24/2022 13:53:40 - INFO - codeparrot_training - Step 9525: {'lr': 0.0004702871907503169, 'samples': 4877312, 'steps': 9525, 'loss/train': 2.36826229095459} +02/24/2022 13:53:44 - INFO - codeparrot_training - Step 9526: {'lr': 0.000470279453466077, 'samples': 4877824, 'steps': 9526, 'loss/train': 2.572887897491455} +02/24/2022 13:53:49 - INFO - codeparrot_training - Step 9527: {'lr': 0.0004702717152382299, 'samples': 4878336, 'steps': 9527, 'loss/train': 2.394839286804199} +02/24/2022 13:53:53 - INFO - codeparrot_training - Step 9528: {'lr': 0.0004702639760668086, 'samples': 4878848, 'steps': 9528, 'loss/train': 2.151036500930786} +02/24/2022 13:53:58 - INFO - codeparrot_training - Step 9529: {'lr': 0.00047025623595184645, 'samples': 4879360, 'steps': 9529, 'loss/train': 2.4957728385925293} +02/24/2022 13:54:02 - INFO - codeparrot_training - Step 9530: {'lr': 0.0004702484948933765, 'samples': 4879872, 'steps': 9530, 'loss/train': 1.154759168624878} +02/24/2022 13:54:08 - INFO - codeparrot_training - Step 9531: {'lr': 0.000470240752891432, 'samples': 4880384, 'steps': 9531, 'loss/train': 2.056220769882202} +02/24/2022 13:54:11 - INFO - codeparrot_training - Step 9532: {'lr': 0.000470233009946046, 'samples': 4880896, 'steps': 9532, 'loss/train': 1.311429738998413} +02/24/2022 13:54:17 - INFO - codeparrot_training - Step 9533: {'lr': 0.0004702252660572517, 'samples': 4881408, 'steps': 9533, 'loss/train': 2.0231940746307373} +02/24/2022 13:54:20 - INFO - codeparrot_training - Step 9534: {'lr': 0.00047021752122508234, 'samples': 4881920, 'steps': 9534, 'loss/train': 2.327814817428589} +02/24/2022 13:54:26 - INFO - codeparrot_training - Step 9535: {'lr': 0.000470209775449571, 'samples': 4882432, 'steps': 9535, 'loss/train': 0.9399018883705139} +02/24/2022 13:54:29 - INFO - codeparrot_training - Step 9536: {'lr': 0.00047020202873075093, 'samples': 4882944, 'steps': 9536, 'loss/train': 1.3210853338241577} +02/24/2022 13:54:35 - INFO - codeparrot_training - Step 9537: {'lr': 0.0004701942810686552, 'samples': 4883456, 'steps': 9537, 'loss/train': 2.2215545177459717} +02/24/2022 13:54:40 - INFO - codeparrot_training - Step 9538: {'lr': 0.00047018653246331724, 'samples': 4883968, 'steps': 9538, 'loss/train': 2.9715187549591064} +02/24/2022 13:54:44 - INFO - codeparrot_training - Step 9539: {'lr': 0.00047017878291477, 'samples': 4884480, 'steps': 9539, 'loss/train': 2.44144344329834} +02/24/2022 13:54:50 - INFO - codeparrot_training - Step 9540: {'lr': 0.0004701710324230468, 'samples': 4884992, 'steps': 9540, 'loss/train': 2.4876208305358887} +02/24/2022 13:54:53 - INFO - codeparrot_training - Step 9541: {'lr': 0.00047016328098818086, 'samples': 4885504, 'steps': 9541, 'loss/train': 2.0679149627685547} +02/24/2022 13:54:59 - INFO - codeparrot_training - Step 9542: {'lr': 0.00047015552861020524, 'samples': 4886016, 'steps': 9542, 'loss/train': 1.988111138343811} +02/24/2022 13:55:02 - INFO - codeparrot_training - Step 9543: {'lr': 0.00047014777528915327, 'samples': 4886528, 'steps': 9543, 'loss/train': 1.9410239458084106} +02/24/2022 13:55:08 - INFO - codeparrot_training - Step 9544: {'lr': 0.0004701400210250581, 'samples': 4887040, 'steps': 9544, 'loss/train': 1.2016911506652832} +02/24/2022 13:55:11 - INFO - codeparrot_training - Step 9545: {'lr': 0.00047013226581795305, 'samples': 4887552, 'steps': 9545, 'loss/train': 2.673309087753296} +02/24/2022 13:55:17 - INFO - codeparrot_training - Step 9546: {'lr': 0.00047012450966787126, 'samples': 4888064, 'steps': 9546, 'loss/train': 2.13564133644104} +02/24/2022 13:55:20 - INFO - codeparrot_training - Step 9547: {'lr': 0.000470116752574846, 'samples': 4888576, 'steps': 9547, 'loss/train': 1.6354318857192993} +02/24/2022 13:55:26 - INFO - codeparrot_training - Step 9548: {'lr': 0.0004701089945389104, 'samples': 4889088, 'steps': 9548, 'loss/train': 2.253312349319458} +02/24/2022 13:55:29 - INFO - codeparrot_training - Step 9549: {'lr': 0.00047010123556009774, 'samples': 4889600, 'steps': 9549, 'loss/train': 2.0510005950927734} +02/24/2022 13:55:35 - INFO - codeparrot_training - Step 9550: {'lr': 0.0004700934756384413, 'samples': 4890112, 'steps': 9550, 'loss/train': 2.5258548259735107} +02/24/2022 13:55:39 - INFO - codeparrot_training - Step 9551: {'lr': 0.00047008571477397435, 'samples': 4890624, 'steps': 9551, 'loss/train': 2.133078098297119} +02/24/2022 13:55:44 - INFO - codeparrot_training - Step 9552: {'lr': 0.00047007795296673006, 'samples': 4891136, 'steps': 9552, 'loss/train': 1.5449477434158325} +02/24/2022 13:55:48 - INFO - codeparrot_training - Step 9553: {'lr': 0.00047007019021674167, 'samples': 4891648, 'steps': 9553, 'loss/train': 2.955099582672119} +02/24/2022 13:55:54 - INFO - codeparrot_training - Step 9554: {'lr': 0.0004700624265240425, 'samples': 4892160, 'steps': 9554, 'loss/train': 1.851338267326355} +02/24/2022 13:55:57 - INFO - codeparrot_training - Step 9555: {'lr': 0.00047005466188866575, 'samples': 4892672, 'steps': 9555, 'loss/train': 2.10430908203125} +02/24/2022 13:56:03 - INFO - codeparrot_training - Step 9556: {'lr': 0.00047004689631064474, 'samples': 4893184, 'steps': 9556, 'loss/train': 1.1847878694534302} +02/24/2022 13:56:06 - INFO - codeparrot_training - Step 9557: {'lr': 0.00047003912979001267, 'samples': 4893696, 'steps': 9557, 'loss/train': 0.5048022866249084} +02/24/2022 13:56:12 - INFO - codeparrot_training - Step 9558: {'lr': 0.0004700313623268028, 'samples': 4894208, 'steps': 9558, 'loss/train': 1.9121215343475342} +02/24/2022 13:56:15 - INFO - codeparrot_training - Step 9559: {'lr': 0.00047002359392104854, 'samples': 4894720, 'steps': 9559, 'loss/train': 1.4153485298156738} +02/24/2022 13:56:21 - INFO - codeparrot_training - Step 9560: {'lr': 0.000470015824572783, 'samples': 4895232, 'steps': 9560, 'loss/train': 2.3217670917510986} +02/24/2022 13:56:24 - INFO - codeparrot_training - Step 9561: {'lr': 0.00047000805428203953, 'samples': 4895744, 'steps': 9561, 'loss/train': 1.4297391176223755} +02/24/2022 13:56:30 - INFO - codeparrot_training - Step 9562: {'lr': 0.00047000028304885143, 'samples': 4896256, 'steps': 9562, 'loss/train': 2.0284175872802734} +02/24/2022 13:56:33 - INFO - codeparrot_training - Step 9563: {'lr': 0.00046999251087325204, 'samples': 4896768, 'steps': 9563, 'loss/train': 2.065173625946045} +02/24/2022 13:56:39 - INFO - codeparrot_training - Step 9564: {'lr': 0.0004699847377552745, 'samples': 4897280, 'steps': 9564, 'loss/train': 2.0511999130249023} +02/24/2022 13:56:42 - INFO - codeparrot_training - Step 9565: {'lr': 0.00046997696369495217, 'samples': 4897792, 'steps': 9565, 'loss/train': 2.5077998638153076} +02/24/2022 13:56:48 - INFO - codeparrot_training - Step 9566: {'lr': 0.00046996918869231843, 'samples': 4898304, 'steps': 9566, 'loss/train': 1.9838497638702393} +02/24/2022 13:56:52 - INFO - codeparrot_training - Step 9567: {'lr': 0.00046996141274740653, 'samples': 4898816, 'steps': 9567, 'loss/train': 2.8092308044433594} +02/24/2022 13:56:57 - INFO - codeparrot_training - Step 9568: {'lr': 0.00046995363586024977, 'samples': 4899328, 'steps': 9568, 'loss/train': 2.267569065093994} +02/24/2022 13:57:01 - INFO - codeparrot_training - Step 9569: {'lr': 0.0004699458580308815, 'samples': 4899840, 'steps': 9569, 'loss/train': 2.0510623455047607} +02/24/2022 13:57:06 - INFO - codeparrot_training - Step 9570: {'lr': 0.00046993807925933503, 'samples': 4900352, 'steps': 9570, 'loss/train': 0.7282016277313232} +02/24/2022 13:57:10 - INFO - codeparrot_training - Step 9571: {'lr': 0.00046993029954564363, 'samples': 4900864, 'steps': 9571, 'loss/train': 2.312633752822876} +02/24/2022 13:57:16 - INFO - codeparrot_training - Step 9572: {'lr': 0.0004699225188898407, 'samples': 4901376, 'steps': 9572, 'loss/train': 2.3100268840789795} +02/24/2022 13:57:19 - INFO - codeparrot_training - Step 9573: {'lr': 0.0004699147372919595, 'samples': 4901888, 'steps': 9573, 'loss/train': 1.6473491191864014} +02/24/2022 13:57:25 - INFO - codeparrot_training - Step 9574: {'lr': 0.00046990695475203337, 'samples': 4902400, 'steps': 9574, 'loss/train': 1.8104907274246216} +02/24/2022 13:57:28 - INFO - codeparrot_training - Step 9575: {'lr': 0.00046989917127009573, 'samples': 4902912, 'steps': 9575, 'loss/train': 2.1922624111175537} +02/24/2022 13:57:35 - INFO - codeparrot_training - Step 9576: {'lr': 0.0004698913868461798, 'samples': 4903424, 'steps': 9576, 'loss/train': 2.563850164413452} +02/24/2022 13:57:38 - INFO - codeparrot_training - Step 9577: {'lr': 0.00046988360148031904, 'samples': 4903936, 'steps': 9577, 'loss/train': 1.61285400390625} +02/24/2022 13:57:43 - INFO - codeparrot_training - Step 9578: {'lr': 0.0004698758151725468, 'samples': 4904448, 'steps': 9578, 'loss/train': 2.6799488067626953} +02/24/2022 13:57:47 - INFO - codeparrot_training - Step 9579: {'lr': 0.0004698680279228963, 'samples': 4904960, 'steps': 9579, 'loss/train': 1.6138412952423096} +02/24/2022 13:57:52 - INFO - codeparrot_training - Step 9580: {'lr': 0.000469860239731401, 'samples': 4905472, 'steps': 9580, 'loss/train': 3.2819690704345703} +02/24/2022 13:57:56 - INFO - codeparrot_training - Step 9581: {'lr': 0.00046985245059809436, 'samples': 4905984, 'steps': 9581, 'loss/train': 1.3251656293869019} +02/24/2022 13:58:01 - INFO - codeparrot_training - Step 9582: {'lr': 0.0004698446605230095, 'samples': 4906496, 'steps': 9582, 'loss/train': 1.789786696434021} +02/24/2022 13:58:05 - INFO - codeparrot_training - Step 9583: {'lr': 0.00046983686950618, 'samples': 4907008, 'steps': 9583, 'loss/train': 1.3839339017868042} +02/24/2022 13:58:10 - INFO - codeparrot_training - Step 9584: {'lr': 0.00046982907754763905, 'samples': 4907520, 'steps': 9584, 'loss/train': 1.4463045597076416} +02/24/2022 13:58:14 - INFO - codeparrot_training - Step 9585: {'lr': 0.00046982128464742026, 'samples': 4908032, 'steps': 9585, 'loss/train': 2.0827138423919678} +02/24/2022 13:58:20 - INFO - codeparrot_training - Step 9586: {'lr': 0.0004698134908055568, 'samples': 4908544, 'steps': 9586, 'loss/train': 0.997948169708252} +02/24/2022 13:58:23 - INFO - codeparrot_training - Step 9587: {'lr': 0.00046980569602208215, 'samples': 4909056, 'steps': 9587, 'loss/train': 2.402827501296997} +02/24/2022 13:58:29 - INFO - codeparrot_training - Step 9588: {'lr': 0.00046979790029702973, 'samples': 4909568, 'steps': 9588, 'loss/train': 1.7306206226348877} +02/24/2022 13:58:32 - INFO - codeparrot_training - Step 9589: {'lr': 0.0004697901036304329, 'samples': 4910080, 'steps': 9589, 'loss/train': 1.250905156135559} +02/24/2022 13:58:38 - INFO - codeparrot_training - Step 9590: {'lr': 0.00046978230602232507, 'samples': 4910592, 'steps': 9590, 'loss/train': 1.3780932426452637} +02/24/2022 13:58:41 - INFO - codeparrot_training - Step 9591: {'lr': 0.00046977450747273956, 'samples': 4911104, 'steps': 9591, 'loss/train': 1.7797541618347168} +02/24/2022 13:58:47 - INFO - codeparrot_training - Step 9592: {'lr': 0.00046976670798171, 'samples': 4911616, 'steps': 9592, 'loss/train': 2.5511999130249023} +02/24/2022 13:58:50 - INFO - codeparrot_training - Step 9593: {'lr': 0.00046975890754926943, 'samples': 4912128, 'steps': 9593, 'loss/train': 1.5138994455337524} +02/24/2022 13:58:56 - INFO - codeparrot_training - Step 9594: {'lr': 0.0004697511061754516, 'samples': 4912640, 'steps': 9594, 'loss/train': 2.4406418800354004} +02/24/2022 13:59:00 - INFO - codeparrot_training - Step 9595: {'lr': 0.00046974330386028985, 'samples': 4913152, 'steps': 9595, 'loss/train': 1.1683231592178345} +02/24/2022 13:59:05 - INFO - codeparrot_training - Step 9596: {'lr': 0.0004697355006038175, 'samples': 4913664, 'steps': 9596, 'loss/train': 2.3619003295898438} +02/24/2022 13:59:09 - INFO - codeparrot_training - Step 9597: {'lr': 0.00046972769640606804, 'samples': 4914176, 'steps': 9597, 'loss/train': 2.7334089279174805} +02/24/2022 13:59:14 - INFO - codeparrot_training - Step 9598: {'lr': 0.0004697198912670749, 'samples': 4914688, 'steps': 9598, 'loss/train': 2.702599048614502} +02/24/2022 13:59:18 - INFO - codeparrot_training - Step 9599: {'lr': 0.0004697120851868715, 'samples': 4915200, 'steps': 9599, 'loss/train': 0.9323081970214844} +02/24/2022 13:59:23 - INFO - codeparrot_training - Step 9600: {'lr': 0.00046970427816549133, 'samples': 4915712, 'steps': 9600, 'loss/train': 1.5623677968978882} +02/24/2022 13:59:27 - INFO - codeparrot_training - Step 9601: {'lr': 0.0004696964702029678, 'samples': 4916224, 'steps': 9601, 'loss/train': 1.9898372888565063} +02/24/2022 13:59:33 - INFO - codeparrot_training - Step 9602: {'lr': 0.00046968866129933436, 'samples': 4916736, 'steps': 9602, 'loss/train': 1.855385184288025} +02/24/2022 13:59:36 - INFO - codeparrot_training - Step 9603: {'lr': 0.0004696808514546244, 'samples': 4917248, 'steps': 9603, 'loss/train': 2.258364200592041} +02/24/2022 13:59:42 - INFO - codeparrot_training - Step 9604: {'lr': 0.0004696730406688715, 'samples': 4917760, 'steps': 9604, 'loss/train': 1.625893235206604} +02/24/2022 13:59:45 - INFO - codeparrot_training - Step 9605: {'lr': 0.000469665228942109, 'samples': 4918272, 'steps': 9605, 'loss/train': 8.917402267456055} +02/24/2022 13:59:51 - INFO - codeparrot_training - Step 9606: {'lr': 0.0004696574162743704, 'samples': 4918784, 'steps': 9606, 'loss/train': 2.3655173778533936} +02/24/2022 13:59:55 - INFO - codeparrot_training - Step 9607: {'lr': 0.00046964960266568926, 'samples': 4919296, 'steps': 9607, 'loss/train': 2.5318398475646973} +02/24/2022 14:00:00 - INFO - codeparrot_training - Step 9608: {'lr': 0.0004696417881160989, 'samples': 4919808, 'steps': 9608, 'loss/train': 1.6272956132888794} +02/24/2022 14:00:04 - INFO - codeparrot_training - Step 9609: {'lr': 0.0004696339726256328, 'samples': 4920320, 'steps': 9609, 'loss/train': 3.1039879322052} +02/24/2022 14:00:09 - INFO - codeparrot_training - Step 9610: {'lr': 0.00046962615619432457, 'samples': 4920832, 'steps': 9610, 'loss/train': 3.6105611324310303} +02/24/2022 14:00:13 - INFO - codeparrot_training - Step 9611: {'lr': 0.0004696183388222077, 'samples': 4921344, 'steps': 9611, 'loss/train': 2.0522360801696777} +02/24/2022 14:00:19 - INFO - codeparrot_training - Step 9612: {'lr': 0.0004696105205093155, 'samples': 4921856, 'steps': 9612, 'loss/train': 2.508798599243164} +02/24/2022 14:00:23 - INFO - codeparrot_training - Step 9613: {'lr': 0.0004696027012556816, 'samples': 4922368, 'steps': 9613, 'loss/train': 3.8638055324554443} +02/24/2022 14:00:28 - INFO - codeparrot_training - Step 9614: {'lr': 0.00046959488106133944, 'samples': 4922880, 'steps': 9614, 'loss/train': 2.9614624977111816} +02/24/2022 14:00:32 - INFO - codeparrot_training - Step 9615: {'lr': 0.0004695870599263226, 'samples': 4923392, 'steps': 9615, 'loss/train': 2.158031463623047} +02/24/2022 14:00:37 - INFO - codeparrot_training - Step 9616: {'lr': 0.0004695792378506645, 'samples': 4923904, 'steps': 9616, 'loss/train': 1.3664880990982056} +02/24/2022 14:00:41 - INFO - codeparrot_training - Step 9617: {'lr': 0.00046957141483439856, 'samples': 4924416, 'steps': 9617, 'loss/train': 1.960555911064148} +02/24/2022 14:00:46 - INFO - codeparrot_training - Step 9618: {'lr': 0.0004695635908775585, 'samples': 4924928, 'steps': 9618, 'loss/train': 1.426436424255371} +02/24/2022 14:00:50 - INFO - codeparrot_training - Step 9619: {'lr': 0.0004695557659801778, 'samples': 4925440, 'steps': 9619, 'loss/train': 1.223673939704895} +02/24/2022 14:00:55 - INFO - codeparrot_training - Step 9620: {'lr': 0.0004695479401422898, 'samples': 4925952, 'steps': 9620, 'loss/train': 2.2305169105529785} +02/24/2022 14:00:59 - INFO - codeparrot_training - Step 9621: {'lr': 0.0004695401133639282, 'samples': 4926464, 'steps': 9621, 'loss/train': 2.5820133686065674} +02/24/2022 14:01:05 - INFO - codeparrot_training - Step 9622: {'lr': 0.0004695322856451264, 'samples': 4926976, 'steps': 9622, 'loss/train': 2.4427130222320557} +02/24/2022 14:01:09 - INFO - codeparrot_training - Step 9623: {'lr': 0.00046952445698591805, 'samples': 4927488, 'steps': 9623, 'loss/train': 1.9843087196350098} +02/24/2022 14:01:14 - INFO - codeparrot_training - Step 9624: {'lr': 0.0004695166273863367, 'samples': 4928000, 'steps': 9624, 'loss/train': 1.5638004541397095} +02/24/2022 14:01:18 - INFO - codeparrot_training - Step 9625: {'lr': 0.00046950879684641567, 'samples': 4928512, 'steps': 9625, 'loss/train': 3.016756057739258} +02/24/2022 14:01:23 - INFO - codeparrot_training - Step 9626: {'lr': 0.00046950096536618876, 'samples': 4929024, 'steps': 9626, 'loss/train': 2.2594692707061768} +02/24/2022 14:01:27 - INFO - codeparrot_training - Step 9627: {'lr': 0.0004694931329456894, 'samples': 4929536, 'steps': 9627, 'loss/train': 3.36421537399292} +02/24/2022 14:01:32 - INFO - codeparrot_training - Step 9628: {'lr': 0.0004694852995849511, 'samples': 4930048, 'steps': 9628, 'loss/train': 0.8950254321098328} +02/24/2022 14:01:36 - INFO - codeparrot_training - Step 9629: {'lr': 0.00046947746528400755, 'samples': 4930560, 'steps': 9629, 'loss/train': 2.137103796005249} +02/24/2022 14:01:41 - INFO - codeparrot_training - Step 9630: {'lr': 0.00046946963004289223, 'samples': 4931072, 'steps': 9630, 'loss/train': 1.9981069564819336} +02/24/2022 14:01:44 - INFO - codeparrot_training - Step 9631: {'lr': 0.0004694617938616386, 'samples': 4931584, 'steps': 9631, 'loss/train': 1.5629870891571045} +02/24/2022 14:01:51 - INFO - codeparrot_training - Step 9632: {'lr': 0.00046945395674028047, 'samples': 4932096, 'steps': 9632, 'loss/train': 8.76471996307373} +02/24/2022 14:01:54 - INFO - codeparrot_training - Step 9633: {'lr': 0.0004694461186788512, 'samples': 4932608, 'steps': 9633, 'loss/train': 1.5739970207214355} +02/24/2022 14:02:00 - INFO - codeparrot_training - Step 9634: {'lr': 0.0004694382796773844, 'samples': 4933120, 'steps': 9634, 'loss/train': 1.3431178331375122} +02/24/2022 14:02:03 - INFO - codeparrot_training - Step 9635: {'lr': 0.0004694304397359137, 'samples': 4933632, 'steps': 9635, 'loss/train': 2.063695192337036} +02/24/2022 14:02:09 - INFO - codeparrot_training - Step 9636: {'lr': 0.00046942259885447273, 'samples': 4934144, 'steps': 9636, 'loss/train': 0.3628299832344055} +02/24/2022 14:02:12 - INFO - codeparrot_training - Step 9637: {'lr': 0.000469414757033095, 'samples': 4934656, 'steps': 9637, 'loss/train': 1.983220100402832} +02/24/2022 14:02:18 - INFO - codeparrot_training - Step 9638: {'lr': 0.00046940691427181414, 'samples': 4935168, 'steps': 9638, 'loss/train': 1.1638362407684326} +02/24/2022 14:02:21 - INFO - codeparrot_training - Step 9639: {'lr': 0.00046939907057066374, 'samples': 4935680, 'steps': 9639, 'loss/train': 2.3545024394989014} +02/24/2022 14:02:27 - INFO - codeparrot_training - Step 9640: {'lr': 0.0004693912259296773, 'samples': 4936192, 'steps': 9640, 'loss/train': 1.83735990524292} +02/24/2022 14:02:30 - INFO - codeparrot_training - Step 9641: {'lr': 0.0004693833803488886, 'samples': 4936704, 'steps': 9641, 'loss/train': 3.072589635848999} +02/24/2022 14:02:35 - INFO - codeparrot_training - Step 9642: {'lr': 0.00046937553382833116, 'samples': 4937216, 'steps': 9642, 'loss/train': 2.5270586013793945} +02/24/2022 14:02:39 - INFO - codeparrot_training - Step 9643: {'lr': 0.00046936768636803857, 'samples': 4937728, 'steps': 9643, 'loss/train': 2.449934959411621} +02/24/2022 14:02:44 - INFO - codeparrot_training - Step 9644: {'lr': 0.00046935983796804443, 'samples': 4938240, 'steps': 9644, 'loss/train': 1.4152569770812988} +02/24/2022 14:02:48 - INFO - codeparrot_training - Step 9645: {'lr': 0.00046935198862838246, 'samples': 4938752, 'steps': 9645, 'loss/train': 2.1745424270629883} +02/24/2022 14:02:53 - INFO - codeparrot_training - Step 9646: {'lr': 0.00046934413834908616, 'samples': 4939264, 'steps': 9646, 'loss/train': 2.4889042377471924} +02/24/2022 14:02:59 - INFO - codeparrot_training - Step 9647: {'lr': 0.0004693362871301893, 'samples': 4939776, 'steps': 9647, 'loss/train': 2.2839608192443848} +02/24/2022 14:03:02 - INFO - codeparrot_training - Step 9648: {'lr': 0.0004693284349717254, 'samples': 4940288, 'steps': 9648, 'loss/train': 3.5323565006256104} +02/24/2022 14:03:09 - INFO - codeparrot_training - Step 9649: {'lr': 0.00046932058187372803, 'samples': 4940800, 'steps': 9649, 'loss/train': 1.9513170719146729} +02/24/2022 14:03:12 - INFO - codeparrot_training - Step 9650: {'lr': 0.00046931272783623106, 'samples': 4941312, 'steps': 9650, 'loss/train': 2.5716538429260254} +02/24/2022 14:03:17 - INFO - codeparrot_training - Step 9651: {'lr': 0.00046930487285926797, 'samples': 4941824, 'steps': 9651, 'loss/train': 1.8524171113967896} +02/24/2022 14:03:21 - INFO - codeparrot_training - Step 9652: {'lr': 0.00046929701694287243, 'samples': 4942336, 'steps': 9652, 'loss/train': 2.0290989875793457} +02/24/2022 14:03:26 - INFO - codeparrot_training - Step 9653: {'lr': 0.0004692891600870781, 'samples': 4942848, 'steps': 9653, 'loss/train': 2.1385068893432617} +02/24/2022 14:03:30 - INFO - codeparrot_training - Step 9654: {'lr': 0.00046928130229191865, 'samples': 4943360, 'steps': 9654, 'loss/train': 1.0425946712493896} +02/24/2022 14:03:35 - INFO - codeparrot_training - Step 9655: {'lr': 0.00046927344355742774, 'samples': 4943872, 'steps': 9655, 'loss/train': 1.6587049961090088} +02/24/2022 14:03:39 - INFO - codeparrot_training - Step 9656: {'lr': 0.00046926558388363904, 'samples': 4944384, 'steps': 9656, 'loss/train': 2.826145648956299} +02/24/2022 14:03:44 - INFO - codeparrot_training - Step 9657: {'lr': 0.00046925772327058616, 'samples': 4944896, 'steps': 9657, 'loss/train': 2.489088296890259} +02/24/2022 14:03:48 - INFO - codeparrot_training - Step 9658: {'lr': 0.0004692498617183028, 'samples': 4945408, 'steps': 9658, 'loss/train': 2.4383544921875} +02/24/2022 14:03:54 - INFO - codeparrot_training - Step 9659: {'lr': 0.0004692419992268227, 'samples': 4945920, 'steps': 9659, 'loss/train': 2.3415846824645996} +02/24/2022 14:03:57 - INFO - codeparrot_training - Step 9660: {'lr': 0.00046923413579617944, 'samples': 4946432, 'steps': 9660, 'loss/train': 1.6319698095321655} +02/24/2022 14:04:03 - INFO - codeparrot_training - Step 9661: {'lr': 0.00046922627142640685, 'samples': 4946944, 'steps': 9661, 'loss/train': 2.659302234649658} +02/24/2022 14:04:06 - INFO - codeparrot_training - Step 9662: {'lr': 0.00046921840611753845, 'samples': 4947456, 'steps': 9662, 'loss/train': 1.9705803394317627} +02/24/2022 14:04:12 - INFO - codeparrot_training - Step 9663: {'lr': 0.000469210539869608, 'samples': 4947968, 'steps': 9663, 'loss/train': 3.4343323707580566} +02/24/2022 14:04:16 - INFO - codeparrot_training - Step 9664: {'lr': 0.0004692026726826493, 'samples': 4948480, 'steps': 9664, 'loss/train': 3.209012269973755} +02/24/2022 14:04:21 - INFO - codeparrot_training - Step 9665: {'lr': 0.0004691948045566958, 'samples': 4948992, 'steps': 9665, 'loss/train': 1.8484220504760742} +02/24/2022 14:04:24 - INFO - codeparrot_training - Step 9666: {'lr': 0.0004691869354917815, 'samples': 4949504, 'steps': 9666, 'loss/train': 2.241541624069214} +02/24/2022 14:04:30 - INFO - codeparrot_training - Step 9667: {'lr': 0.0004691790654879399, 'samples': 4950016, 'steps': 9667, 'loss/train': 1.3001562356948853} +02/24/2022 14:04:33 - INFO - codeparrot_training - Step 9668: {'lr': 0.00046917119454520487, 'samples': 4950528, 'steps': 9668, 'loss/train': 2.233103036880493} +02/24/2022 14:04:39 - INFO - codeparrot_training - Step 9669: {'lr': 0.0004691633226636099, 'samples': 4951040, 'steps': 9669, 'loss/train': 2.446730136871338} +02/24/2022 14:04:43 - INFO - codeparrot_training - Step 9670: {'lr': 0.0004691554498431889, 'samples': 4951552, 'steps': 9670, 'loss/train': 2.168839693069458} +02/24/2022 14:04:51 - INFO - codeparrot_training - Step 9671: {'lr': 0.00046914757608397555, 'samples': 4952064, 'steps': 9671, 'loss/train': 0.19441667199134827} +02/24/2022 14:04:54 - INFO - codeparrot_training - Step 9672: {'lr': 0.00046913970138600357, 'samples': 4952576, 'steps': 9672, 'loss/train': 2.23901104927063} +02/24/2022 14:05:00 - INFO - codeparrot_training - Step 9673: {'lr': 0.0004691318257493067, 'samples': 4953088, 'steps': 9673, 'loss/train': 2.6474897861480713} +02/24/2022 14:05:03 - INFO - codeparrot_training - Step 9674: {'lr': 0.00046912394917391866, 'samples': 4953600, 'steps': 9674, 'loss/train': 2.262817621231079} +02/24/2022 14:05:09 - INFO - codeparrot_training - Step 9675: {'lr': 0.00046911607165987324, 'samples': 4954112, 'steps': 9675, 'loss/train': 1.7118260860443115} +02/24/2022 14:05:12 - INFO - codeparrot_training - Step 9676: {'lr': 0.0004691081932072041, 'samples': 4954624, 'steps': 9676, 'loss/train': 2.1820528507232666} +02/24/2022 14:05:18 - INFO - codeparrot_training - Step 9677: {'lr': 0.0004691003138159451, 'samples': 4955136, 'steps': 9677, 'loss/train': 2.8769967555999756} +02/24/2022 14:05:21 - INFO - codeparrot_training - Step 9678: {'lr': 0.00046909243348612986, 'samples': 4955648, 'steps': 9678, 'loss/train': 1.680458903312683} +02/24/2022 14:05:27 - INFO - codeparrot_training - Step 9679: {'lr': 0.0004690845522177922, 'samples': 4956160, 'steps': 9679, 'loss/train': 1.559897780418396} +02/24/2022 14:05:30 - INFO - codeparrot_training - Step 9680: {'lr': 0.0004690766700109659, 'samples': 4956672, 'steps': 9680, 'loss/train': 1.483007550239563} +02/24/2022 14:05:37 - INFO - codeparrot_training - Step 9681: {'lr': 0.0004690687868656847, 'samples': 4957184, 'steps': 9681, 'loss/train': 1.9457534551620483} +02/24/2022 14:05:41 - INFO - codeparrot_training - Step 9682: {'lr': 0.00046906090278198246, 'samples': 4957696, 'steps': 9682, 'loss/train': 1.7990607023239136} +02/24/2022 14:05:46 - INFO - codeparrot_training - Step 9683: {'lr': 0.00046905301775989277, 'samples': 4958208, 'steps': 9683, 'loss/train': 1.5773755311965942} +02/24/2022 14:05:50 - INFO - codeparrot_training - Step 9684: {'lr': 0.0004690451317994495, 'samples': 4958720, 'steps': 9684, 'loss/train': 2.0951998233795166} +02/24/2022 14:05:55 - INFO - codeparrot_training - Step 9685: {'lr': 0.00046903724490068654, 'samples': 4959232, 'steps': 9685, 'loss/train': 1.9504728317260742} +02/24/2022 14:05:59 - INFO - codeparrot_training - Step 9686: {'lr': 0.00046902935706363754, 'samples': 4959744, 'steps': 9686, 'loss/train': 1.9299465417861938} +02/24/2022 14:06:05 - INFO - codeparrot_training - Step 9687: {'lr': 0.0004690214682883363, 'samples': 4960256, 'steps': 9687, 'loss/train': 2.704273223876953} +02/24/2022 14:06:08 - INFO - codeparrot_training - Step 9688: {'lr': 0.00046901357857481664, 'samples': 4960768, 'steps': 9688, 'loss/train': 2.1331183910369873} +02/24/2022 14:06:14 - INFO - codeparrot_training - Step 9689: {'lr': 0.0004690056879231124, 'samples': 4961280, 'steps': 9689, 'loss/train': 1.3145971298217773} +02/24/2022 14:06:17 - INFO - codeparrot_training - Step 9690: {'lr': 0.0004689977963332572, 'samples': 4961792, 'steps': 9690, 'loss/train': 2.0762250423431396} +02/24/2022 14:06:23 - INFO - codeparrot_training - Step 9691: {'lr': 0.0004689899038052852, 'samples': 4962304, 'steps': 9691, 'loss/train': 1.8556798696517944} +02/24/2022 14:06:26 - INFO - codeparrot_training - Step 9692: {'lr': 0.0004689820103392298, 'samples': 4962816, 'steps': 9692, 'loss/train': 2.8912041187286377} +02/24/2022 14:06:33 - INFO - codeparrot_training - Step 9693: {'lr': 0.0004689741159351251, 'samples': 4963328, 'steps': 9693, 'loss/train': 1.6728296279907227} +02/24/2022 14:06:37 - INFO - codeparrot_training - Step 9694: {'lr': 0.00046896622059300477, 'samples': 4963840, 'steps': 9694, 'loss/train': 1.652101993560791} +02/24/2022 14:06:42 - INFO - codeparrot_training - Step 9695: {'lr': 0.00046895832431290266, 'samples': 4964352, 'steps': 9695, 'loss/train': 3.0453104972839355} +02/24/2022 14:06:46 - INFO - codeparrot_training - Step 9696: {'lr': 0.0004689504270948527, 'samples': 4964864, 'steps': 9696, 'loss/train': 2.844454050064087} +02/24/2022 14:06:51 - INFO - codeparrot_training - Step 9697: {'lr': 0.00046894252893888854, 'samples': 4965376, 'steps': 9697, 'loss/train': 1.9529099464416504} +02/24/2022 14:06:55 - INFO - codeparrot_training - Step 9698: {'lr': 0.0004689346298450442, 'samples': 4965888, 'steps': 9698, 'loss/train': 1.6889833211898804} +02/24/2022 14:07:00 - INFO - codeparrot_training - Step 9699: {'lr': 0.0004689267298133534, 'samples': 4966400, 'steps': 9699, 'loss/train': 1.4936554431915283} +02/24/2022 14:07:04 - INFO - codeparrot_training - Step 9700: {'lr': 0.00046891882884384997, 'samples': 4966912, 'steps': 9700, 'loss/train': 1.9704091548919678} +02/24/2022 14:07:09 - INFO - codeparrot_training - Step 9701: {'lr': 0.00046891092693656777, 'samples': 4967424, 'steps': 9701, 'loss/train': 2.710467576980591} +02/24/2022 14:07:13 - INFO - codeparrot_training - Step 9702: {'lr': 0.0004689030240915407, 'samples': 4967936, 'steps': 9702, 'loss/train': 2.1302390098571777} +02/24/2022 14:07:20 - INFO - codeparrot_training - Step 9703: {'lr': 0.0004688951203088026, 'samples': 4968448, 'steps': 9703, 'loss/train': 2.377704381942749} +02/24/2022 14:07:24 - INFO - codeparrot_training - Step 9704: {'lr': 0.00046888721558838734, 'samples': 4968960, 'steps': 9704, 'loss/train': 0.8488210439682007} +02/24/2022 14:07:29 - INFO - codeparrot_training - Step 9705: {'lr': 0.0004688793099303287, 'samples': 4969472, 'steps': 9705, 'loss/train': 1.5677474737167358} +02/24/2022 14:07:33 - INFO - codeparrot_training - Step 9706: {'lr': 0.0004688714033346606, 'samples': 4969984, 'steps': 9706, 'loss/train': 1.6840240955352783} +02/24/2022 14:07:38 - INFO - codeparrot_training - Step 9707: {'lr': 0.000468863495801417, 'samples': 4970496, 'steps': 9707, 'loss/train': 1.791087031364441} +02/24/2022 14:07:42 - INFO - codeparrot_training - Step 9708: {'lr': 0.00046885558733063157, 'samples': 4971008, 'steps': 9708, 'loss/train': 0.8697268962860107} +02/24/2022 14:07:47 - INFO - codeparrot_training - Step 9709: {'lr': 0.00046884767792233827, 'samples': 4971520, 'steps': 9709, 'loss/train': 3.0986404418945312} +02/24/2022 14:07:53 - INFO - codeparrot_training - Step 9710: {'lr': 0.00046883976757657107, 'samples': 4972032, 'steps': 9710, 'loss/train': 2.8138427734375} +02/24/2022 14:07:56 - INFO - codeparrot_training - Step 9711: {'lr': 0.00046883185629336386, 'samples': 4972544, 'steps': 9711, 'loss/train': 1.4844030141830444} +02/24/2022 14:08:02 - INFO - codeparrot_training - Step 9712: {'lr': 0.0004688239440727504, 'samples': 4973056, 'steps': 9712, 'loss/train': 1.3667101860046387} +02/24/2022 14:08:05 - INFO - codeparrot_training - Step 9713: {'lr': 0.00046881603091476466, 'samples': 4973568, 'steps': 9713, 'loss/train': 1.4271619319915771} +02/24/2022 14:08:11 - INFO - codeparrot_training - Step 9714: {'lr': 0.0004688081168194405, 'samples': 4974080, 'steps': 9714, 'loss/train': 1.250153660774231} +02/24/2022 14:08:14 - INFO - codeparrot_training - Step 9715: {'lr': 0.0004688002017868119, 'samples': 4974592, 'steps': 9715, 'loss/train': 0.762831449508667} +02/24/2022 14:08:22 - INFO - codeparrot_training - Step 9716: {'lr': 0.0004687922858169126, 'samples': 4975104, 'steps': 9716, 'loss/train': 1.4025824069976807} +02/24/2022 14:08:25 - INFO - codeparrot_training - Step 9717: {'lr': 0.0004687843689097767, 'samples': 4975616, 'steps': 9717, 'loss/train': 2.28603196144104} +02/24/2022 14:08:31 - INFO - codeparrot_training - Step 9718: {'lr': 0.0004687764510654381, 'samples': 4976128, 'steps': 9718, 'loss/train': 2.9806973934173584} +02/24/2022 14:08:35 - INFO - codeparrot_training - Step 9719: {'lr': 0.0004687685322839306, 'samples': 4976640, 'steps': 9719, 'loss/train': 2.082766056060791} +02/24/2022 14:08:38 - INFO - codeparrot_training - Step 9720: {'lr': 0.00046876061256528813, 'samples': 4977152, 'steps': 9720, 'loss/train': 1.6067500114440918} +02/24/2022 14:08:44 - INFO - codeparrot_training - Step 9721: {'lr': 0.00046875269190954465, 'samples': 4977664, 'steps': 9721, 'loss/train': 2.112417697906494} +02/24/2022 14:08:47 - INFO - codeparrot_training - Step 9722: {'lr': 0.00046874477031673417, 'samples': 4978176, 'steps': 9722, 'loss/train': 2.8875627517700195} +02/24/2022 14:08:53 - INFO - codeparrot_training - Step 9723: {'lr': 0.00046873684778689053, 'samples': 4978688, 'steps': 9723, 'loss/train': 1.9508256912231445} +02/24/2022 14:08:56 - INFO - codeparrot_training - Step 9724: {'lr': 0.00046872892432004765, 'samples': 4979200, 'steps': 9724, 'loss/train': 2.1635444164276123} +02/24/2022 14:09:02 - INFO - codeparrot_training - Step 9725: {'lr': 0.00046872099991623954, 'samples': 4979712, 'steps': 9725, 'loss/train': 2.386948347091675} +02/24/2022 14:09:05 - INFO - codeparrot_training - Step 9726: {'lr': 0.0004687130745755002, 'samples': 4980224, 'steps': 9726, 'loss/train': 1.574517846107483} +02/24/2022 14:09:13 - INFO - codeparrot_training - Step 9727: {'lr': 0.0004687051482978634, 'samples': 4980736, 'steps': 9727, 'loss/train': 2.435608148574829} +02/24/2022 14:09:16 - INFO - codeparrot_training - Step 9728: {'lr': 0.0004686972210833632, 'samples': 4981248, 'steps': 9728, 'loss/train': 2.3228836059570312} +02/24/2022 14:09:22 - INFO - codeparrot_training - Step 9729: {'lr': 0.00046868929293203355, 'samples': 4981760, 'steps': 9729, 'loss/train': 1.6026933193206787} +02/24/2022 14:09:25 - INFO - codeparrot_training - Step 9730: {'lr': 0.0004686813638439085, 'samples': 4982272, 'steps': 9730, 'loss/train': 2.3634378910064697} +02/24/2022 14:09:30 - INFO - codeparrot_training - Step 9731: {'lr': 0.00046867343381902185, 'samples': 4982784, 'steps': 9731, 'loss/train': 1.9287230968475342} +02/24/2022 14:09:34 - INFO - codeparrot_training - Step 9732: {'lr': 0.0004686655028574076, 'samples': 4983296, 'steps': 9732, 'loss/train': 2.705789804458618} +02/24/2022 14:09:39 - INFO - codeparrot_training - Step 9733: {'lr': 0.0004686575709590998, 'samples': 4983808, 'steps': 9733, 'loss/train': 2.2005279064178467} +02/24/2022 14:09:43 - INFO - codeparrot_training - Step 9734: {'lr': 0.00046864963812413244, 'samples': 4984320, 'steps': 9734, 'loss/train': 0.14529983699321747} +02/24/2022 14:09:48 - INFO - codeparrot_training - Step 9735: {'lr': 0.00046864170435253946, 'samples': 4984832, 'steps': 9735, 'loss/train': 1.6733207702636719} +02/24/2022 14:09:52 - INFO - codeparrot_training - Step 9736: {'lr': 0.0004686337696443548, 'samples': 4985344, 'steps': 9736, 'loss/train': 2.8610265254974365} +02/24/2022 14:09:57 - INFO - codeparrot_training - Step 9737: {'lr': 0.0004686258339996125, 'samples': 4985856, 'steps': 9737, 'loss/train': 1.4688588380813599} +02/24/2022 14:10:01 - INFO - codeparrot_training - Step 9738: {'lr': 0.0004686178974183466, 'samples': 4986368, 'steps': 9738, 'loss/train': 2.0714304447174072} +02/24/2022 14:10:08 - INFO - codeparrot_training - Step 9739: {'lr': 0.00046860995990059096, 'samples': 4986880, 'steps': 9739, 'loss/train': 2.5236568450927734} +02/24/2022 14:10:12 - INFO - codeparrot_training - Step 9740: {'lr': 0.00046860202144637976, 'samples': 4987392, 'steps': 9740, 'loss/train': 2.6240153312683105} +02/24/2022 14:10:17 - INFO - codeparrot_training - Step 9741: {'lr': 0.0004685940820557468, 'samples': 4987904, 'steps': 9741, 'loss/train': 1.732853651046753} +02/24/2022 14:10:21 - INFO - codeparrot_training - Step 9742: {'lr': 0.0004685861417287263, 'samples': 4988416, 'steps': 9742, 'loss/train': 2.13775897026062} +02/24/2022 14:10:26 - INFO - codeparrot_training - Step 9743: {'lr': 0.00046857820046535215, 'samples': 4988928, 'steps': 9743, 'loss/train': 1.5943260192871094} +02/24/2022 14:10:30 - INFO - codeparrot_training - Step 9744: {'lr': 0.0004685702582656584, 'samples': 4989440, 'steps': 9744, 'loss/train': 1.6716082096099854} +02/24/2022 14:10:35 - INFO - codeparrot_training - Step 9745: {'lr': 0.0004685623151296791, 'samples': 4989952, 'steps': 9745, 'loss/train': 2.218304395675659} +02/24/2022 14:10:39 - INFO - codeparrot_training - Step 9746: {'lr': 0.0004685543710574482, 'samples': 4990464, 'steps': 9746, 'loss/train': 2.802156448364258} +02/24/2022 14:10:44 - INFO - codeparrot_training - Step 9747: {'lr': 0.00046854642604899976, 'samples': 4990976, 'steps': 9747, 'loss/train': 2.275113344192505} +02/24/2022 14:10:48 - INFO - codeparrot_training - Step 9748: {'lr': 0.00046853848010436783, 'samples': 4991488, 'steps': 9748, 'loss/train': 1.5402599573135376} +02/24/2022 14:10:55 - INFO - codeparrot_training - Step 9749: {'lr': 0.00046853053322358653, 'samples': 4992000, 'steps': 9749, 'loss/train': 2.3010404109954834} +02/24/2022 14:10:58 - INFO - codeparrot_training - Step 9750: {'lr': 0.00046852258540668973, 'samples': 4992512, 'steps': 9750, 'loss/train': 2.3874311447143555} +02/24/2022 14:11:04 - INFO - codeparrot_training - Step 9751: {'lr': 0.0004685146366537116, 'samples': 4993024, 'steps': 9751, 'loss/train': 2.006460189819336} +02/24/2022 14:11:08 - INFO - codeparrot_training - Step 9752: {'lr': 0.00046850668696468614, 'samples': 4993536, 'steps': 9752, 'loss/train': 2.3719584941864014} +02/24/2022 14:11:13 - INFO - codeparrot_training - Step 9753: {'lr': 0.0004684987363396474, 'samples': 4994048, 'steps': 9753, 'loss/train': 0.9575660824775696} +02/24/2022 14:11:16 - INFO - codeparrot_training - Step 9754: {'lr': 0.0004684907847786295, 'samples': 4994560, 'steps': 9754, 'loss/train': 2.432325839996338} +02/24/2022 14:11:22 - INFO - codeparrot_training - Step 9755: {'lr': 0.0004684828322816664, 'samples': 4995072, 'steps': 9755, 'loss/train': 0.5235753655433655} +02/24/2022 14:11:26 - INFO - codeparrot_training - Step 9756: {'lr': 0.00046847487884879227, 'samples': 4995584, 'steps': 9756, 'loss/train': 1.5608141422271729} +02/24/2022 14:11:31 - INFO - codeparrot_training - Step 9757: {'lr': 0.0004684669244800411, 'samples': 4996096, 'steps': 9757, 'loss/train': 2.2094027996063232} +02/24/2022 14:11:35 - INFO - codeparrot_training - Step 9758: {'lr': 0.00046845896917544703, 'samples': 4996608, 'steps': 9758, 'loss/train': 2.599088668823242} +02/24/2022 14:11:40 - INFO - codeparrot_training - Step 9759: {'lr': 0.00046845101293504403, 'samples': 4997120, 'steps': 9759, 'loss/train': 0.33489975333213806} +02/24/2022 14:11:44 - INFO - codeparrot_training - Step 9760: {'lr': 0.00046844305575886636, 'samples': 4997632, 'steps': 9760, 'loss/train': 2.0347495079040527} +02/24/2022 14:11:49 - INFO - codeparrot_training - Step 9761: {'lr': 0.00046843509764694794, 'samples': 4998144, 'steps': 9761, 'loss/train': 2.290970802307129} +02/24/2022 14:11:53 - INFO - codeparrot_training - Step 9762: {'lr': 0.0004684271385993229, 'samples': 4998656, 'steps': 9762, 'loss/train': 2.3579905033111572} +02/24/2022 14:11:58 - INFO - codeparrot_training - Step 9763: {'lr': 0.0004684191786160254, 'samples': 4999168, 'steps': 9763, 'loss/train': 2.1428382396698} +02/24/2022 14:12:02 - INFO - codeparrot_training - Step 9764: {'lr': 0.0004684112176970895, 'samples': 4999680, 'steps': 9764, 'loss/train': 2.238593816757202} +02/24/2022 14:12:09 - INFO - codeparrot_training - Step 9765: {'lr': 0.0004684032558425493, 'samples': 5000192, 'steps': 9765, 'loss/train': 1.7709112167358398} +02/24/2022 14:12:13 - INFO - codeparrot_training - Step 9766: {'lr': 0.00046839529305243885, 'samples': 5000704, 'steps': 9766, 'loss/train': 2.64963960647583} +02/24/2022 14:12:18 - INFO - codeparrot_training - Step 9767: {'lr': 0.00046838732932679236, 'samples': 5001216, 'steps': 9767, 'loss/train': 2.3232438564300537} +02/24/2022 14:12:22 - INFO - codeparrot_training - Step 9768: {'lr': 0.0004683793646656439, 'samples': 5001728, 'steps': 9768, 'loss/train': 1.7110859155654907} +02/24/2022 14:12:27 - INFO - codeparrot_training - Step 9769: {'lr': 0.00046837139906902753, 'samples': 5002240, 'steps': 9769, 'loss/train': 1.7164878845214844} +02/24/2022 14:12:31 - INFO - codeparrot_training - Step 9770: {'lr': 0.00046836343253697744, 'samples': 5002752, 'steps': 9770, 'loss/train': 2.112417697906494} +02/24/2022 14:12:36 - INFO - codeparrot_training - Step 9771: {'lr': 0.0004683554650695278, 'samples': 5003264, 'steps': 9771, 'loss/train': 2.0658204555511475} +02/24/2022 14:12:40 - INFO - codeparrot_training - Step 9772: {'lr': 0.0004683474966667127, 'samples': 5003776, 'steps': 9772, 'loss/train': 2.604013204574585} +02/24/2022 14:12:45 - INFO - codeparrot_training - Step 9773: {'lr': 0.00046833952732856614, 'samples': 5004288, 'steps': 9773, 'loss/train': 2.144122362136841} +02/24/2022 14:12:49 - INFO - codeparrot_training - Step 9774: {'lr': 0.00046833155705512246, 'samples': 5004800, 'steps': 9774, 'loss/train': 1.988416075706482} +02/24/2022 14:12:56 - INFO - codeparrot_training - Step 9775: {'lr': 0.0004683235858464157, 'samples': 5005312, 'steps': 9775, 'loss/train': 2.118572950363159} +02/24/2022 14:13:00 - INFO - codeparrot_training - Step 9776: {'lr': 0.0004683156137024801, 'samples': 5005824, 'steps': 9776, 'loss/train': 3.1579926013946533} +02/24/2022 14:13:05 - INFO - codeparrot_training - Step 9777: {'lr': 0.0004683076406233496, 'samples': 5006336, 'steps': 9777, 'loss/train': 2.162621259689331} +02/24/2022 14:13:08 - INFO - codeparrot_training - Step 9778: {'lr': 0.0004682996666090585, 'samples': 5006848, 'steps': 9778, 'loss/train': 2.5025382041931152} +02/24/2022 14:13:14 - INFO - codeparrot_training - Step 9779: {'lr': 0.00046829169165964104, 'samples': 5007360, 'steps': 9779, 'loss/train': 3.010511636734009} +02/24/2022 14:13:18 - INFO - codeparrot_training - Step 9780: {'lr': 0.0004682837157751313, 'samples': 5007872, 'steps': 9780, 'loss/train': 0.49823588132858276} +02/24/2022 14:13:23 - INFO - codeparrot_training - Step 9781: {'lr': 0.00046827573895556334, 'samples': 5008384, 'steps': 9781, 'loss/train': 2.351402997970581} +02/24/2022 14:13:27 - INFO - codeparrot_training - Step 9782: {'lr': 0.00046826776120097147, 'samples': 5008896, 'steps': 9782, 'loss/train': 2.7306134700775146} +02/24/2022 14:13:32 - INFO - codeparrot_training - Step 9783: {'lr': 0.0004682597825113898, 'samples': 5009408, 'steps': 9783, 'loss/train': 2.374734878540039} +02/24/2022 14:13:36 - INFO - codeparrot_training - Step 9784: {'lr': 0.00046825180288685253, 'samples': 5009920, 'steps': 9784, 'loss/train': 2.564307451248169} +02/24/2022 14:13:43 - INFO - codeparrot_training - Step 9785: {'lr': 0.00046824382232739386, 'samples': 5010432, 'steps': 9785, 'loss/train': 2.068251371383667} +02/24/2022 14:13:47 - INFO - codeparrot_training - Step 9786: {'lr': 0.00046823584083304794, 'samples': 5010944, 'steps': 9786, 'loss/train': 1.333812952041626} +02/24/2022 14:13:52 - INFO - codeparrot_training - Step 9787: {'lr': 0.00046822785840384897, 'samples': 5011456, 'steps': 9787, 'loss/train': 2.36928653717041} +02/24/2022 14:13:56 - INFO - codeparrot_training - Step 9788: {'lr': 0.0004682198750398312, 'samples': 5011968, 'steps': 9788, 'loss/train': 0.7014451026916504} +02/24/2022 14:14:01 - INFO - codeparrot_training - Step 9789: {'lr': 0.0004682118907410287, 'samples': 5012480, 'steps': 9789, 'loss/train': 2.3775744438171387} +02/24/2022 14:14:05 - INFO - codeparrot_training - Step 9790: {'lr': 0.00046820390550747585, 'samples': 5012992, 'steps': 9790, 'loss/train': 2.2849907875061035} +02/24/2022 14:14:10 - INFO - codeparrot_training - Step 9791: {'lr': 0.0004681959193392067, 'samples': 5013504, 'steps': 9791, 'loss/train': 2.189612627029419} +02/24/2022 14:14:14 - INFO - codeparrot_training - Step 9792: {'lr': 0.00046818793223625543, 'samples': 5014016, 'steps': 9792, 'loss/train': 1.6522659063339233} +02/24/2022 14:14:20 - INFO - codeparrot_training - Step 9793: {'lr': 0.0004681799441986564, 'samples': 5014528, 'steps': 9793, 'loss/train': 2.472900390625} +02/24/2022 14:14:23 - INFO - codeparrot_training - Step 9794: {'lr': 0.00046817195522644387, 'samples': 5015040, 'steps': 9794, 'loss/train': 3.4724793434143066} +02/24/2022 14:14:29 - INFO - codeparrot_training - Step 9795: {'lr': 0.00046816396531965186, 'samples': 5015552, 'steps': 9795, 'loss/train': 1.3836543560028076} +02/24/2022 14:14:32 - INFO - codeparrot_training - Step 9796: {'lr': 0.0004681559744783147, 'samples': 5016064, 'steps': 9796, 'loss/train': 1.037685751914978} +02/24/2022 14:14:38 - INFO - codeparrot_training - Step 9797: {'lr': 0.00046814798270246663, 'samples': 5016576, 'steps': 9797, 'loss/train': 2.375507116317749} +02/24/2022 14:14:45 - INFO - codeparrot_training - Step 9798: {'lr': 0.00046813998999214193, 'samples': 5017088, 'steps': 9798, 'loss/train': 1.9001340866088867} +02/24/2022 14:14:48 - INFO - codeparrot_training - Step 9799: {'lr': 0.0004681319963473747, 'samples': 5017600, 'steps': 9799, 'loss/train': 2.62568998336792} +02/24/2022 14:14:54 - INFO - codeparrot_training - Step 9800: {'lr': 0.0004681240017681993, 'samples': 5018112, 'steps': 9800, 'loss/train': 2.387746572494507} +02/24/2022 14:14:57 - INFO - codeparrot_training - Step 9801: {'lr': 0.0004681160062546499, 'samples': 5018624, 'steps': 9801, 'loss/train': 2.5254976749420166} +02/24/2022 14:15:03 - INFO - codeparrot_training - Step 9802: {'lr': 0.00046810800980676083, 'samples': 5019136, 'steps': 9802, 'loss/train': 1.4963001012802124} +02/24/2022 14:15:06 - INFO - codeparrot_training - Step 9803: {'lr': 0.0004681000124245663, 'samples': 5019648, 'steps': 9803, 'loss/train': 3.0191409587860107} +02/24/2022 14:15:12 - INFO - codeparrot_training - Step 9804: {'lr': 0.0004680920141081005, 'samples': 5020160, 'steps': 9804, 'loss/train': 1.0616954565048218} +02/24/2022 14:15:15 - INFO - codeparrot_training - Step 9805: {'lr': 0.00046808401485739793, 'samples': 5020672, 'steps': 9805, 'loss/train': 1.6365487575531006} +02/24/2022 14:15:21 - INFO - codeparrot_training - Step 9806: {'lr': 0.00046807601467249255, 'samples': 5021184, 'steps': 9806, 'loss/train': 1.661150574684143} +02/24/2022 14:15:24 - INFO - codeparrot_training - Step 9807: {'lr': 0.0004680680135534188, 'samples': 5021696, 'steps': 9807, 'loss/train': 1.475576400756836} +02/24/2022 14:15:30 - INFO - codeparrot_training - Step 9808: {'lr': 0.00046806001150021095, 'samples': 5022208, 'steps': 9808, 'loss/train': 3.4492859840393066} +02/24/2022 14:15:34 - INFO - codeparrot_training - Step 9809: {'lr': 0.0004680520085129032, 'samples': 5022720, 'steps': 9809, 'loss/train': 1.5156227350234985} +02/24/2022 14:15:37 - INFO - codeparrot_training - Step 9810: {'lr': 0.00046804400459152994, 'samples': 5023232, 'steps': 9810, 'loss/train': 3.0102250576019287} +02/24/2022 14:15:44 - INFO - codeparrot_training - Step 9811: {'lr': 0.0004680359997361254, 'samples': 5023744, 'steps': 9811, 'loss/train': 0.6598783731460571} +02/24/2022 14:15:48 - INFO - codeparrot_training - Step 9812: {'lr': 0.0004680279939467238, 'samples': 5024256, 'steps': 9812, 'loss/train': 1.3877547979354858} +02/24/2022 14:15:53 - INFO - codeparrot_training - Step 9813: {'lr': 0.0004680199872233596, 'samples': 5024768, 'steps': 9813, 'loss/train': 1.8924779891967773} +02/24/2022 14:15:57 - INFO - codeparrot_training - Step 9814: {'lr': 0.00046801197956606693, 'samples': 5025280, 'steps': 9814, 'loss/train': 2.622920036315918} +02/24/2022 14:16:02 - INFO - codeparrot_training - Step 9815: {'lr': 0.00046800397097488024, 'samples': 5025792, 'steps': 9815, 'loss/train': 1.2758949995040894} +02/24/2022 14:16:06 - INFO - codeparrot_training - Step 9816: {'lr': 0.0004679959614498337, 'samples': 5026304, 'steps': 9816, 'loss/train': 1.6469138860702515} +02/24/2022 14:16:11 - INFO - codeparrot_training - Step 9817: {'lr': 0.0004679879509909617, 'samples': 5026816, 'steps': 9817, 'loss/train': 1.9287909269332886} +02/24/2022 14:16:15 - INFO - codeparrot_training - Step 9818: {'lr': 0.00046797993959829857, 'samples': 5027328, 'steps': 9818, 'loss/train': 2.843106985092163} +02/24/2022 14:16:20 - INFO - codeparrot_training - Step 9819: {'lr': 0.00046797192727187855, 'samples': 5027840, 'steps': 9819, 'loss/train': 1.385992169380188} +02/24/2022 14:16:24 - INFO - codeparrot_training - Step 9820: {'lr': 0.000467963914011736, 'samples': 5028352, 'steps': 9820, 'loss/train': 2.058119297027588} +02/24/2022 14:16:31 - INFO - codeparrot_training - Step 9821: {'lr': 0.0004679558998179053, 'samples': 5028864, 'steps': 9821, 'loss/train': 1.1119884252548218} +02/24/2022 14:16:34 - INFO - codeparrot_training - Step 9822: {'lr': 0.0004679478846904207, 'samples': 5029376, 'steps': 9822, 'loss/train': 1.6504911184310913} +02/24/2022 14:16:40 - INFO - codeparrot_training - Step 9823: {'lr': 0.00046793986862931654, 'samples': 5029888, 'steps': 9823, 'loss/train': 2.2548770904541016} +02/24/2022 14:16:45 - INFO - codeparrot_training - Step 9824: {'lr': 0.0004679318516346273, 'samples': 5030400, 'steps': 9824, 'loss/train': 2.2708287239074707} +02/24/2022 14:16:49 - INFO - codeparrot_training - Step 9825: {'lr': 0.00046792383370638705, 'samples': 5030912, 'steps': 9825, 'loss/train': 3.161379814147949} +02/24/2022 14:16:54 - INFO - codeparrot_training - Step 9826: {'lr': 0.0004679158148446304, 'samples': 5031424, 'steps': 9826, 'loss/train': 1.0474443435668945} +02/24/2022 14:16:58 - INFO - codeparrot_training - Step 9827: {'lr': 0.00046790779504939155, 'samples': 5031936, 'steps': 9827, 'loss/train': 2.720407485961914} +02/24/2022 14:17:03 - INFO - codeparrot_training - Step 9828: {'lr': 0.00046789977432070497, 'samples': 5032448, 'steps': 9828, 'loss/train': 2.0093250274658203} +02/24/2022 14:17:07 - INFO - codeparrot_training - Step 9829: {'lr': 0.00046789175265860483, 'samples': 5032960, 'steps': 9829, 'loss/train': 1.6026208400726318} +02/24/2022 14:17:14 - INFO - codeparrot_training - Step 9830: {'lr': 0.00046788373006312567, 'samples': 5033472, 'steps': 9830, 'loss/train': 2.3966925144195557} +02/24/2022 14:17:17 - INFO - codeparrot_training - Step 9831: {'lr': 0.0004678757065343019, 'samples': 5033984, 'steps': 9831, 'loss/train': 2.5034494400024414} +02/24/2022 14:17:23 - INFO - codeparrot_training - Step 9832: {'lr': 0.0004678676820721677, 'samples': 5034496, 'steps': 9832, 'loss/train': 2.020383358001709} +02/24/2022 14:17:26 - INFO - codeparrot_training - Step 9833: {'lr': 0.00046785965667675745, 'samples': 5035008, 'steps': 9833, 'loss/train': 1.6288156509399414} +02/24/2022 14:17:32 - INFO - codeparrot_training - Step 9834: {'lr': 0.00046785163034810567, 'samples': 5035520, 'steps': 9834, 'loss/train': 2.30257248878479} +02/24/2022 14:17:35 - INFO - codeparrot_training - Step 9835: {'lr': 0.00046784360308624675, 'samples': 5036032, 'steps': 9835, 'loss/train': 1.4314764738082886} +02/24/2022 14:17:41 - INFO - codeparrot_training - Step 9836: {'lr': 0.0004678355748912149, 'samples': 5036544, 'steps': 9836, 'loss/train': 2.7425527572631836} +02/24/2022 14:17:44 - INFO - codeparrot_training - Step 9837: {'lr': 0.0004678275457630447, 'samples': 5037056, 'steps': 9837, 'loss/train': 1.969691514968872} +02/24/2022 14:17:50 - INFO - codeparrot_training - Step 9838: {'lr': 0.0004678195157017704, 'samples': 5037568, 'steps': 9838, 'loss/train': 2.5779781341552734} +02/24/2022 14:17:53 - INFO - codeparrot_training - Step 9839: {'lr': 0.00046781148470742654, 'samples': 5038080, 'steps': 9839, 'loss/train': 2.4346694946289062} +02/24/2022 14:17:59 - INFO - codeparrot_training - Step 9840: {'lr': 0.0004678034527800474, 'samples': 5038592, 'steps': 9840, 'loss/train': 1.6345288753509521} +02/24/2022 14:18:02 - INFO - codeparrot_training - Step 9841: {'lr': 0.0004677954199196674, 'samples': 5039104, 'steps': 9841, 'loss/train': 2.04012393951416} +02/24/2022 14:18:08 - INFO - codeparrot_training - Step 9842: {'lr': 0.00046778738612632097, 'samples': 5039616, 'steps': 9842, 'loss/train': 0.6263843774795532} +02/24/2022 14:18:11 - INFO - codeparrot_training - Step 9843: {'lr': 0.00046777935140004256, 'samples': 5040128, 'steps': 9843, 'loss/train': 1.6836732625961304} +02/24/2022 14:18:17 - INFO - codeparrot_training - Step 9844: {'lr': 0.00046777131574086663, 'samples': 5040640, 'steps': 9844, 'loss/train': 2.8924083709716797} +02/24/2022 14:18:20 - INFO - codeparrot_training - Step 9845: {'lr': 0.0004677632791488274, 'samples': 5041152, 'steps': 9845, 'loss/train': 1.0286898612976074} +02/24/2022 14:18:28 - INFO - codeparrot_training - Step 9846: {'lr': 0.00046775524162395954, 'samples': 5041664, 'steps': 9846, 'loss/train': 1.0133346319198608} +02/24/2022 14:18:31 - INFO - codeparrot_training - Step 9847: {'lr': 0.00046774720316629734, 'samples': 5042176, 'steps': 9847, 'loss/train': 1.944876790046692} +02/24/2022 14:18:37 - INFO - codeparrot_training - Step 9848: {'lr': 0.00046773916377587524, 'samples': 5042688, 'steps': 9848, 'loss/train': 1.5478363037109375} +02/24/2022 14:18:40 - INFO - codeparrot_training - Step 9849: {'lr': 0.00046773112345272773, 'samples': 5043200, 'steps': 9849, 'loss/train': 2.0946617126464844} +02/24/2022 14:18:46 - INFO - codeparrot_training - Step 9850: {'lr': 0.0004677230821968892, 'samples': 5043712, 'steps': 9850, 'loss/train': 1.566332459449768} +02/24/2022 14:18:49 - INFO - codeparrot_training - Step 9851: {'lr': 0.00046771504000839417, 'samples': 5044224, 'steps': 9851, 'loss/train': 1.9542189836502075} +02/24/2022 14:18:55 - INFO - codeparrot_training - Step 9852: {'lr': 0.0004677069968872769, 'samples': 5044736, 'steps': 9852, 'loss/train': 2.1629180908203125} +02/24/2022 14:18:58 - INFO - codeparrot_training - Step 9853: {'lr': 0.0004676989528335721, 'samples': 5045248, 'steps': 9853, 'loss/train': 2.5073561668395996} +02/24/2022 14:19:04 - INFO - codeparrot_training - Step 9854: {'lr': 0.0004676909078473142, 'samples': 5045760, 'steps': 9854, 'loss/train': 2.185811758041382} +02/24/2022 14:19:07 - INFO - codeparrot_training - Step 9855: {'lr': 0.00046768286192853736, 'samples': 5046272, 'steps': 9855, 'loss/train': 1.9570590257644653} +02/24/2022 14:19:14 - INFO - codeparrot_training - Step 9856: {'lr': 0.00046767481507727646, 'samples': 5046784, 'steps': 9856, 'loss/train': 2.4177072048187256} +02/24/2022 14:19:18 - INFO - codeparrot_training - Step 9857: {'lr': 0.00046766676729356564, 'samples': 5047296, 'steps': 9857, 'loss/train': 3.0085244178771973} +02/24/2022 14:19:23 - INFO - codeparrot_training - Step 9858: {'lr': 0.0004676587185774396, 'samples': 5047808, 'steps': 9858, 'loss/train': 2.3172130584716797} +02/24/2022 14:19:27 - INFO - codeparrot_training - Step 9859: {'lr': 0.00046765066892893266, 'samples': 5048320, 'steps': 9859, 'loss/train': 2.5215938091278076} +02/24/2022 14:19:32 - INFO - codeparrot_training - Step 9860: {'lr': 0.00046764261834807944, 'samples': 5048832, 'steps': 9860, 'loss/train': 1.548329472541809} +02/24/2022 14:19:36 - INFO - codeparrot_training - Step 9861: {'lr': 0.0004676345668349142, 'samples': 5049344, 'steps': 9861, 'loss/train': 2.180375337600708} +02/24/2022 14:19:42 - INFO - codeparrot_training - Step 9862: {'lr': 0.0004676265143894717, 'samples': 5049856, 'steps': 9862, 'loss/train': 1.731307864189148} +02/24/2022 14:19:45 - INFO - codeparrot_training - Step 9863: {'lr': 0.0004676184610117863, 'samples': 5050368, 'steps': 9863, 'loss/train': 3.3570234775543213} +02/24/2022 14:19:51 - INFO - codeparrot_training - Step 9864: {'lr': 0.0004676104067018925, 'samples': 5050880, 'steps': 9864, 'loss/train': 2.9153382778167725} +02/24/2022 14:19:54 - INFO - codeparrot_training - Step 9865: {'lr': 0.0004676023514598249, 'samples': 5051392, 'steps': 9865, 'loss/train': 2.4567854404449463} +02/24/2022 14:20:01 - INFO - codeparrot_training - Step 9866: {'lr': 0.0004675942952856178, 'samples': 5051904, 'steps': 9866, 'loss/train': 1.0560890436172485} +02/24/2022 14:20:05 - INFO - codeparrot_training - Step 9867: {'lr': 0.0004675862381793059, 'samples': 5052416, 'steps': 9867, 'loss/train': 3.4936273097991943} +02/24/2022 14:20:10 - INFO - codeparrot_training - Step 9868: {'lr': 0.0004675781801409236, 'samples': 5052928, 'steps': 9868, 'loss/train': 0.6611115336418152} +02/24/2022 14:20:14 - INFO - codeparrot_training - Step 9869: {'lr': 0.00046757012117050554, 'samples': 5053440, 'steps': 9869, 'loss/train': 2.094862699508667} +02/24/2022 14:20:19 - INFO - codeparrot_training - Step 9870: {'lr': 0.00046756206126808607, 'samples': 5053952, 'steps': 9870, 'loss/train': 1.258583664894104} +02/24/2022 14:20:23 - INFO - codeparrot_training - Step 9871: {'lr': 0.0004675540004336999, 'samples': 5054464, 'steps': 9871, 'loss/train': 1.1366286277770996} +02/24/2022 14:20:28 - INFO - codeparrot_training - Step 9872: {'lr': 0.00046754593866738144, 'samples': 5054976, 'steps': 9872, 'loss/train': 2.1207761764526367} +02/24/2022 14:20:32 - INFO - codeparrot_training - Step 9873: {'lr': 0.0004675378759691652, 'samples': 5055488, 'steps': 9873, 'loss/train': 2.7102530002593994} +02/24/2022 14:20:37 - INFO - codeparrot_training - Step 9874: {'lr': 0.00046752981233908587, 'samples': 5056000, 'steps': 9874, 'loss/train': 5.332662582397461} +02/24/2022 14:20:41 - INFO - codeparrot_training - Step 9875: {'lr': 0.0004675217477771779, 'samples': 5056512, 'steps': 9875, 'loss/train': 1.0341824293136597} +02/24/2022 14:20:46 - INFO - codeparrot_training - Step 9876: {'lr': 0.0004675136822834758, 'samples': 5057024, 'steps': 9876, 'loss/train': 2.6010963916778564} +02/24/2022 14:20:50 - INFO - codeparrot_training - Step 9877: {'lr': 0.0004675056158580141, 'samples': 5057536, 'steps': 9877, 'loss/train': 2.239691972732544} +02/24/2022 14:20:57 - INFO - codeparrot_training - Step 9878: {'lr': 0.0004674975485008275, 'samples': 5058048, 'steps': 9878, 'loss/train': 1.7435237169265747} +02/24/2022 14:21:01 - INFO - codeparrot_training - Step 9879: {'lr': 0.00046748948021195036, 'samples': 5058560, 'steps': 9879, 'loss/train': 2.204639434814453} +02/24/2022 14:21:06 - INFO - codeparrot_training - Step 9880: {'lr': 0.0004674814109914174, 'samples': 5059072, 'steps': 9880, 'loss/train': 1.5364415645599365} +02/24/2022 14:21:10 - INFO - codeparrot_training - Step 9881: {'lr': 0.00046747334083926316, 'samples': 5059584, 'steps': 9881, 'loss/train': 2.10906720161438} +02/24/2022 14:21:15 - INFO - codeparrot_training - Step 9882: {'lr': 0.0004674652697555222, 'samples': 5060096, 'steps': 9882, 'loss/train': 1.8647351264953613} +02/24/2022 14:21:19 - INFO - codeparrot_training - Step 9883: {'lr': 0.000467457197740229, 'samples': 5060608, 'steps': 9883, 'loss/train': 2.3197133541107178} +02/24/2022 14:21:24 - INFO - codeparrot_training - Step 9884: {'lr': 0.00046744912479341826, 'samples': 5061120, 'steps': 9884, 'loss/train': 2.5268044471740723} +02/24/2022 14:21:28 - INFO - codeparrot_training - Step 9885: {'lr': 0.0004674410509151246, 'samples': 5061632, 'steps': 9885, 'loss/train': 2.8732500076293945} +02/24/2022 14:21:33 - INFO - codeparrot_training - Step 9886: {'lr': 0.0004674329761053824, 'samples': 5062144, 'steps': 9886, 'loss/train': 2.8711466789245605} +02/24/2022 14:21:37 - INFO - codeparrot_training - Step 9887: {'lr': 0.00046742490036422635, 'samples': 5062656, 'steps': 9887, 'loss/train': 2.5218918323516846} +02/24/2022 14:21:42 - INFO - codeparrot_training - Step 9888: {'lr': 0.00046741682369169115, 'samples': 5063168, 'steps': 9888, 'loss/train': 2.0408434867858887} +02/24/2022 14:21:46 - INFO - codeparrot_training - Step 9889: {'lr': 0.00046740874608781126, 'samples': 5063680, 'steps': 9889, 'loss/train': 1.8912031650543213} +02/24/2022 14:21:51 - INFO - codeparrot_training - Step 9890: {'lr': 0.0004674006675526214, 'samples': 5064192, 'steps': 9890, 'loss/train': 2.1615066528320312} +02/24/2022 14:21:55 - INFO - codeparrot_training - Step 9891: {'lr': 0.00046739258808615607, 'samples': 5064704, 'steps': 9891, 'loss/train': 1.8317413330078125} +02/24/2022 14:22:02 - INFO - codeparrot_training - Step 9892: {'lr': 0.00046738450768845, 'samples': 5065216, 'steps': 9892, 'loss/train': 0.7311815023422241} +02/24/2022 14:22:06 - INFO - codeparrot_training - Step 9893: {'lr': 0.0004673764263595376, 'samples': 5065728, 'steps': 9893, 'loss/train': 0.9409798383712769} +02/24/2022 14:22:11 - INFO - codeparrot_training - Step 9894: {'lr': 0.00046736834409945364, 'samples': 5066240, 'steps': 9894, 'loss/train': 0.8589231371879578} +02/24/2022 14:22:15 - INFO - codeparrot_training - Step 9895: {'lr': 0.0004673602609082328, 'samples': 5066752, 'steps': 9895, 'loss/train': 0.2276872843503952} +02/24/2022 14:22:20 - INFO - codeparrot_training - Step 9896: {'lr': 0.00046735217678590957, 'samples': 5067264, 'steps': 9896, 'loss/train': 1.5629442930221558} +02/24/2022 14:22:24 - INFO - codeparrot_training - Step 9897: {'lr': 0.0004673440917325186, 'samples': 5067776, 'steps': 9897, 'loss/train': 2.7554752826690674} +02/24/2022 14:22:29 - INFO - codeparrot_training - Step 9898: {'lr': 0.00046733600574809465, 'samples': 5068288, 'steps': 9898, 'loss/train': 1.9369906187057495} +02/24/2022 14:22:33 - INFO - codeparrot_training - Step 9899: {'lr': 0.0004673279188326722, 'samples': 5068800, 'steps': 9899, 'loss/train': 2.524409055709839} +02/24/2022 14:22:38 - INFO - codeparrot_training - Step 9900: {'lr': 0.00046731983098628597, 'samples': 5069312, 'steps': 9900, 'loss/train': 1.9200384616851807} +02/24/2022 14:22:42 - INFO - codeparrot_training - Step 9901: {'lr': 0.00046731174220897054, 'samples': 5069824, 'steps': 9901, 'loss/train': 8.869290351867676} +02/24/2022 14:22:50 - INFO - codeparrot_training - Step 9902: {'lr': 0.0004673036525007607, 'samples': 5070336, 'steps': 9902, 'loss/train': 2.715398073196411} +02/24/2022 14:22:53 - INFO - codeparrot_training - Step 9903: {'lr': 0.0004672955618616909, 'samples': 5070848, 'steps': 9903, 'loss/train': 1.3328404426574707} +02/24/2022 14:22:59 - INFO - codeparrot_training - Step 9904: {'lr': 0.00046728747029179594, 'samples': 5071360, 'steps': 9904, 'loss/train': 2.451209783554077} +02/24/2022 14:23:02 - INFO - codeparrot_training - Step 9905: {'lr': 0.00046727937779111054, 'samples': 5071872, 'steps': 9905, 'loss/train': 1.2708052396774292} +02/24/2022 14:23:08 - INFO - codeparrot_training - Step 9906: {'lr': 0.0004672712843596693, 'samples': 5072384, 'steps': 9906, 'loss/train': 2.001110076904297} +02/24/2022 14:23:11 - INFO - codeparrot_training - Step 9907: {'lr': 0.0004672631899975067, 'samples': 5072896, 'steps': 9907, 'loss/train': 1.4222224950790405} +02/24/2022 14:23:17 - INFO - codeparrot_training - Step 9908: {'lr': 0.0004672550947046577, 'samples': 5073408, 'steps': 9908, 'loss/train': 2.0502870082855225} +02/24/2022 14:23:20 - INFO - codeparrot_training - Step 9909: {'lr': 0.0004672469984811568, 'samples': 5073920, 'steps': 9909, 'loss/train': 0.7946783304214478} +02/24/2022 14:23:26 - INFO - codeparrot_training - Step 9910: {'lr': 0.00046723890132703886, 'samples': 5074432, 'steps': 9910, 'loss/train': 3.2919411659240723} +02/24/2022 14:23:29 - INFO - codeparrot_training - Step 9911: {'lr': 0.0004672308032423384, 'samples': 5074944, 'steps': 9911, 'loss/train': 0.11130419373512268} +02/24/2022 14:23:35 - INFO - codeparrot_training - Step 9912: {'lr': 0.0004672227042270901, 'samples': 5075456, 'steps': 9912, 'loss/train': 1.9338780641555786} +02/24/2022 14:23:38 - INFO - codeparrot_training - Step 9913: {'lr': 0.00046721460428132873, 'samples': 5075968, 'steps': 9913, 'loss/train': 0.8318203091621399} +02/24/2022 14:23:46 - INFO - codeparrot_training - Step 9914: {'lr': 0.00046720650340508895, 'samples': 5076480, 'steps': 9914, 'loss/train': 3.330626964569092} +02/24/2022 14:23:49 - INFO - codeparrot_training - Step 9915: {'lr': 0.00046719840159840557, 'samples': 5076992, 'steps': 9915, 'loss/train': 2.098567485809326} +02/24/2022 14:23:55 - INFO - codeparrot_training - Step 9916: {'lr': 0.00046719029886131317, 'samples': 5077504, 'steps': 9916, 'loss/train': 1.9815986156463623} +02/24/2022 14:23:58 - INFO - codeparrot_training - Step 9917: {'lr': 0.0004671821951938464, 'samples': 5078016, 'steps': 9917, 'loss/train': 0.5972822904586792} +02/24/2022 14:24:04 - INFO - codeparrot_training - Step 9918: {'lr': 0.0004671740905960401, 'samples': 5078528, 'steps': 9918, 'loss/train': 0.6264111399650574} +02/24/2022 14:24:07 - INFO - codeparrot_training - Step 9919: {'lr': 0.00046716598506792905, 'samples': 5079040, 'steps': 9919, 'loss/train': 1.2439059019088745} +02/24/2022 14:24:13 - INFO - codeparrot_training - Step 9920: {'lr': 0.00046715787860954785, 'samples': 5079552, 'steps': 9920, 'loss/train': 1.1996192932128906} +02/24/2022 14:24:16 - INFO - codeparrot_training - Step 9921: {'lr': 0.0004671497712209312, 'samples': 5080064, 'steps': 9921, 'loss/train': 3.2870521545410156} +02/24/2022 14:24:22 - INFO - codeparrot_training - Step 9922: {'lr': 0.0004671416629021139, 'samples': 5080576, 'steps': 9922, 'loss/train': 1.2741830348968506} +02/24/2022 14:24:25 - INFO - codeparrot_training - Step 9923: {'lr': 0.0004671335536531307, 'samples': 5081088, 'steps': 9923, 'loss/train': 2.3450915813446045} +02/24/2022 14:24:32 - INFO - codeparrot_training - Step 9924: {'lr': 0.00046712544347401623, 'samples': 5081600, 'steps': 9924, 'loss/train': 2.386737108230591} +02/24/2022 14:24:36 - INFO - codeparrot_training - Step 9925: {'lr': 0.0004671173323648054, 'samples': 5082112, 'steps': 9925, 'loss/train': 2.5172245502471924} +02/24/2022 14:24:41 - INFO - codeparrot_training - Step 9926: {'lr': 0.00046710922032553283, 'samples': 5082624, 'steps': 9926, 'loss/train': 2.627974510192871} +02/24/2022 14:24:45 - INFO - codeparrot_training - Step 9927: {'lr': 0.00046710110735623326, 'samples': 5083136, 'steps': 9927, 'loss/train': 2.4732723236083984} +02/24/2022 14:24:50 - INFO - codeparrot_training - Step 9928: {'lr': 0.00046709299345694156, 'samples': 5083648, 'steps': 9928, 'loss/train': 2.4833905696868896} +02/24/2022 14:24:54 - INFO - codeparrot_training - Step 9929: {'lr': 0.00046708487862769235, 'samples': 5084160, 'steps': 9929, 'loss/train': 1.388100028038025} +02/24/2022 14:24:59 - INFO - codeparrot_training - Step 9930: {'lr': 0.0004670767628685204, 'samples': 5084672, 'steps': 9930, 'loss/train': 2.0667126178741455} +02/24/2022 14:25:03 - INFO - codeparrot_training - Step 9931: {'lr': 0.00046706864617946064, 'samples': 5085184, 'steps': 9931, 'loss/train': 2.4349706172943115} +02/24/2022 14:25:08 - INFO - codeparrot_training - Step 9932: {'lr': 0.0004670605285605477, 'samples': 5085696, 'steps': 9932, 'loss/train': 3.1051206588745117} +02/24/2022 14:25:12 - INFO - codeparrot_training - Step 9933: {'lr': 0.0004670524100118163, 'samples': 5086208, 'steps': 9933, 'loss/train': 1.7971051931381226} +02/24/2022 14:25:17 - INFO - codeparrot_training - Step 9934: {'lr': 0.00046704429053330137, 'samples': 5086720, 'steps': 9934, 'loss/train': 2.1329686641693115} +02/24/2022 14:25:21 - INFO - codeparrot_training - Step 9935: {'lr': 0.00046703617012503764, 'samples': 5087232, 'steps': 9935, 'loss/train': 1.5983407497406006} +02/24/2022 14:25:26 - INFO - codeparrot_training - Step 9936: {'lr': 0.00046702804878705987, 'samples': 5087744, 'steps': 9936, 'loss/train': 2.7194325923919678} +02/24/2022 14:25:29 - INFO - codeparrot_training - Step 9937: {'lr': 0.00046701992651940275, 'samples': 5088256, 'steps': 9937, 'loss/train': 2.697232723236084} +02/24/2022 14:25:37 - INFO - codeparrot_training - Step 9938: {'lr': 0.00046701180332210125, 'samples': 5088768, 'steps': 9938, 'loss/train': 2.7391722202301025} +02/24/2022 14:25:40 - INFO - codeparrot_training - Step 9939: {'lr': 0.0004670036791951901, 'samples': 5089280, 'steps': 9939, 'loss/train': 2.522125005722046} +02/24/2022 14:25:46 - INFO - codeparrot_training - Step 9940: {'lr': 0.0004669955541387041, 'samples': 5089792, 'steps': 9940, 'loss/train': 2.5859646797180176} +02/24/2022 14:25:49 - INFO - codeparrot_training - Step 9941: {'lr': 0.000466987428152678, 'samples': 5090304, 'steps': 9941, 'loss/train': 3.9463722705841064} +02/24/2022 14:25:55 - INFO - codeparrot_training - Step 9942: {'lr': 0.00046697930123714673, 'samples': 5090816, 'steps': 9942, 'loss/train': 0.7443364858627319} +02/24/2022 14:25:58 - INFO - codeparrot_training - Step 9943: {'lr': 0.000466971173392145, 'samples': 5091328, 'steps': 9943, 'loss/train': 1.603397011756897} +02/24/2022 14:26:04 - INFO - codeparrot_training - Step 9944: {'lr': 0.0004669630446177077, 'samples': 5091840, 'steps': 9944, 'loss/train': 1.6332961320877075} +02/24/2022 14:26:07 - INFO - codeparrot_training - Step 9945: {'lr': 0.00046695491491386955, 'samples': 5092352, 'steps': 9945, 'loss/train': 1.6669552326202393} +02/24/2022 14:26:13 - INFO - codeparrot_training - Step 9946: {'lr': 0.0004669467842806654, 'samples': 5092864, 'steps': 9946, 'loss/train': 2.0874228477478027} +02/24/2022 14:26:16 - INFO - codeparrot_training - Step 9947: {'lr': 0.00046693865271813016, 'samples': 5093376, 'steps': 9947, 'loss/train': 2.525726079940796} +02/24/2022 14:26:23 - INFO - codeparrot_training - Step 9948: {'lr': 0.0004669305202262987, 'samples': 5093888, 'steps': 9948, 'loss/train': 1.0922309160232544} +02/24/2022 14:26:27 - INFO - codeparrot_training - Step 9949: {'lr': 0.00046692238680520564, 'samples': 5094400, 'steps': 9949, 'loss/train': 1.541135311126709} +02/24/2022 14:26:32 - INFO - codeparrot_training - Step 9950: {'lr': 0.00046691425245488607, 'samples': 5094912, 'steps': 9950, 'loss/train': 2.081545114517212} +02/24/2022 14:26:36 - INFO - codeparrot_training - Step 9951: {'lr': 0.0004669061171753746, 'samples': 5095424, 'steps': 9951, 'loss/train': 2.0083301067352295} +02/24/2022 14:26:42 - INFO - codeparrot_training - Step 9952: {'lr': 0.0004668979809667063, 'samples': 5095936, 'steps': 9952, 'loss/train': 2.4489872455596924} +02/24/2022 14:26:45 - INFO - codeparrot_training - Step 9953: {'lr': 0.0004668898438289159, 'samples': 5096448, 'steps': 9953, 'loss/train': 0.8532716035842896} +02/24/2022 14:26:51 - INFO - codeparrot_training - Step 9954: {'lr': 0.00046688170576203827, 'samples': 5096960, 'steps': 9954, 'loss/train': 4.200775623321533} +02/24/2022 14:26:54 - INFO - codeparrot_training - Step 9955: {'lr': 0.00046687356676610825, 'samples': 5097472, 'steps': 9955, 'loss/train': 2.063951015472412} +02/24/2022 14:26:59 - INFO - codeparrot_training - Step 9956: {'lr': 0.00046686542684116073, 'samples': 5097984, 'steps': 9956, 'loss/train': 2.317816972732544} +02/24/2022 14:27:05 - INFO - codeparrot_training - Step 9957: {'lr': 0.00046685728598723063, 'samples': 5098496, 'steps': 9957, 'loss/train': 2.47550368309021} +02/24/2022 14:27:08 - INFO - codeparrot_training - Step 9958: {'lr': 0.00046684914420435275, 'samples': 5099008, 'steps': 9958, 'loss/train': 1.1509557962417603} +02/24/2022 14:27:16 - INFO - codeparrot_training - Step 9959: {'lr': 0.00046684100149256205, 'samples': 5099520, 'steps': 9959, 'loss/train': 2.005819320678711} +02/24/2022 14:27:19 - INFO - codeparrot_training - Step 9960: {'lr': 0.0004668328578518933, 'samples': 5100032, 'steps': 9960, 'loss/train': 2.0511999130249023} +02/24/2022 14:27:24 - INFO - codeparrot_training - Step 9961: {'lr': 0.0004668247132823814, 'samples': 5100544, 'steps': 9961, 'loss/train': 1.570127010345459} +02/24/2022 14:27:28 - INFO - codeparrot_training - Step 9962: {'lr': 0.00046681656778406136, 'samples': 5101056, 'steps': 9962, 'loss/train': 2.2478654384613037} +02/24/2022 14:27:34 - INFO - codeparrot_training - Step 9963: {'lr': 0.000466808421356968, 'samples': 5101568, 'steps': 9963, 'loss/train': 1.4018045663833618} +02/24/2022 14:27:37 - INFO - codeparrot_training - Step 9964: {'lr': 0.00046680027400113614, 'samples': 5102080, 'steps': 9964, 'loss/train': 2.3996355533599854} +02/24/2022 14:27:43 - INFO - codeparrot_training - Step 9965: {'lr': 0.0004667921257166008, 'samples': 5102592, 'steps': 9965, 'loss/train': 2.374163866043091} +02/24/2022 14:27:46 - INFO - codeparrot_training - Step 9966: {'lr': 0.00046678397650339677, 'samples': 5103104, 'steps': 9966, 'loss/train': 1.4455193281173706} +02/24/2022 14:27:51 - INFO - codeparrot_training - Step 9967: {'lr': 0.00046677582636155904, 'samples': 5103616, 'steps': 9967, 'loss/train': 2.928968667984009} +02/24/2022 14:27:55 - INFO - codeparrot_training - Step 9968: {'lr': 0.00046676767529112254, 'samples': 5104128, 'steps': 9968, 'loss/train': 2.4172563552856445} +02/24/2022 14:28:02 - INFO - codeparrot_training - Step 9969: {'lr': 0.0004667595232921221, 'samples': 5104640, 'steps': 9969, 'loss/train': 2.744659185409546} +02/24/2022 14:28:06 - INFO - codeparrot_training - Step 9970: {'lr': 0.00046675137036459273, 'samples': 5105152, 'steps': 9970, 'loss/train': 2.0865478515625} +02/24/2022 14:28:11 - INFO - codeparrot_training - Step 9971: {'lr': 0.0004667432165085693, 'samples': 5105664, 'steps': 9971, 'loss/train': 1.994526743888855} +02/24/2022 14:28:15 - INFO - codeparrot_training - Step 9972: {'lr': 0.00046673506172408675, 'samples': 5106176, 'steps': 9972, 'loss/train': 1.5064557790756226} +02/24/2022 14:28:20 - INFO - codeparrot_training - Step 9973: {'lr': 0.0004667269060111801, 'samples': 5106688, 'steps': 9973, 'loss/train': 1.2326725721359253} +02/24/2022 14:28:24 - INFO - codeparrot_training - Step 9974: {'lr': 0.0004667187493698841, 'samples': 5107200, 'steps': 9974, 'loss/train': 2.0082168579101562} +02/24/2022 14:28:30 - INFO - codeparrot_training - Step 9975: {'lr': 0.00046671059180023377, 'samples': 5107712, 'steps': 9975, 'loss/train': 2.4531643390655518} +02/24/2022 14:28:33 - INFO - codeparrot_training - Step 9976: {'lr': 0.0004667024333022642, 'samples': 5108224, 'steps': 9976, 'loss/train': 2.482114553451538} +02/24/2022 14:28:39 - INFO - codeparrot_training - Step 9977: {'lr': 0.00046669427387601017, 'samples': 5108736, 'steps': 9977, 'loss/train': 2.3870275020599365} +02/24/2022 14:28:42 - INFO - codeparrot_training - Step 9978: {'lr': 0.0004666861135215066, 'samples': 5109248, 'steps': 9978, 'loss/train': 2.2954249382019043} +02/24/2022 14:28:48 - INFO - codeparrot_training - Step 9979: {'lr': 0.0004666779522387886, 'samples': 5109760, 'steps': 9979, 'loss/train': 2.2171740531921387} +02/24/2022 14:28:51 - INFO - codeparrot_training - Step 9980: {'lr': 0.000466669790027891, 'samples': 5110272, 'steps': 9980, 'loss/train': 1.9995105266571045} +02/24/2022 14:28:57 - INFO - codeparrot_training - Step 9981: {'lr': 0.00046666162688884893, 'samples': 5110784, 'steps': 9981, 'loss/train': 2.267690420150757} +02/24/2022 14:29:00 - INFO - codeparrot_training - Step 9982: {'lr': 0.0004666534628216972, 'samples': 5111296, 'steps': 9982, 'loss/train': 2.74352765083313} +02/24/2022 14:29:06 - INFO - codeparrot_training - Step 9983: {'lr': 0.0004666452978264708, 'samples': 5111808, 'steps': 9983, 'loss/train': 2.284552812576294} +02/24/2022 14:29:09 - INFO - codeparrot_training - Step 9984: {'lr': 0.0004666371319032047, 'samples': 5112320, 'steps': 9984, 'loss/train': 2.207453489303589} +02/24/2022 14:29:17 - INFO - codeparrot_training - Step 9985: {'lr': 0.00046662896505193395, 'samples': 5112832, 'steps': 9985, 'loss/train': 2.010751962661743} +02/24/2022 14:29:20 - INFO - codeparrot_training - Step 9986: {'lr': 0.00046662079727269356, 'samples': 5113344, 'steps': 9986, 'loss/train': 1.626746654510498} +02/24/2022 14:29:26 - INFO - codeparrot_training - Step 9987: {'lr': 0.0004666126285655184, 'samples': 5113856, 'steps': 9987, 'loss/train': 2.2685327529907227} +02/24/2022 14:29:29 - INFO - codeparrot_training - Step 9988: {'lr': 0.0004666044589304436, 'samples': 5114368, 'steps': 9988, 'loss/train': 1.9239364862442017} +02/24/2022 14:29:33 - INFO - codeparrot_training - Step 9989: {'lr': 0.000466596288367504, 'samples': 5114880, 'steps': 9989, 'loss/train': 2.3868231773376465} +02/24/2022 14:29:38 - INFO - codeparrot_training - Step 9990: {'lr': 0.0004665881168767346, 'samples': 5115392, 'steps': 9990, 'loss/train': 1.8015762567520142} +02/24/2022 14:29:42 - INFO - codeparrot_training - Step 9991: {'lr': 0.00046657994445817064, 'samples': 5115904, 'steps': 9991, 'loss/train': 1.8796157836914062} +02/24/2022 14:29:48 - INFO - codeparrot_training - Step 9992: {'lr': 0.0004665717711118469, 'samples': 5116416, 'steps': 9992, 'loss/train': 2.834174633026123} +02/24/2022 14:29:51 - INFO - codeparrot_training - Step 9993: {'lr': 0.00046656359683779845, 'samples': 5116928, 'steps': 9993, 'loss/train': 2.1030397415161133} +02/24/2022 14:29:57 - INFO - codeparrot_training - Step 9994: {'lr': 0.00046655542163606033, 'samples': 5117440, 'steps': 9994, 'loss/train': 1.9597599506378174} +02/24/2022 14:30:04 - INFO - codeparrot_training - Step 9995: {'lr': 0.0004665472455066675, 'samples': 5117952, 'steps': 9995, 'loss/train': 2.9539687633514404} +02/24/2022 14:30:08 - INFO - codeparrot_training - Step 9996: {'lr': 0.0004665390684496551, 'samples': 5118464, 'steps': 9996, 'loss/train': 2.8192200660705566} +02/24/2022 14:30:13 - INFO - codeparrot_training - Step 9997: {'lr': 0.0004665308904650581, 'samples': 5118976, 'steps': 9997, 'loss/train': 1.3336280584335327} +02/24/2022 14:30:17 - INFO - codeparrot_training - Step 9998: {'lr': 0.00046652271155291146, 'samples': 5119488, 'steps': 9998, 'loss/train': 2.2988455295562744} +02/24/2022 14:30:22 - INFO - codeparrot_training - Step 9999: {'lr': 0.0004665145317132503, 'samples': 5120000, 'steps': 9999, 'loss/train': 1.8172866106033325} +02/24/2022 14:30:22 - INFO - codeparrot_training - Evaluating and saving model checkpoint