diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -12373,3 +12373,1009 @@ Use FP16 precision: False 02/24/2022 17:05:56 - INFO - codeparrot_training - Step 11998: {'lr': 0.0004483582550446938, 'samples': 6143488, 'steps': 11998, 'loss/train': 1.6544023752212524} 02/24/2022 17:06:00 - INFO - codeparrot_training - Step 11999: {'lr': 0.0004483482954835819, 'samples': 6144000, 'steps': 11999, 'loss/train': 1.3312416076660156} 02/24/2022 17:06:00 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/24/2022 17:06:17 - WARNING - huggingface_hub.repository - Several commits (12) will be pushed upstream. +02/24/2022 17:06:17 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/24/2022 17:06:51 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + 3d9f7da..e455b12 floral-grass-11 -> floral-grass-11 + +02/24/2022 17:06:57 - INFO - codeparrot_training - Step 12000: {'lr': 0.0004483383350728088, 'samples': 6144512, 'steps': 12000, 'loss/train': 1.8612650632858276} +02/24/2022 17:07:00 - INFO - codeparrot_training - Step 12001: {'lr': 0.00044832837381241733, 'samples': 6145024, 'steps': 12001, 'loss/train': 2.081125020980835} +02/24/2022 17:07:06 - INFO - codeparrot_training - Step 12002: {'lr': 0.00044831841170245003, 'samples': 6145536, 'steps': 12002, 'loss/train': 3.2302181720733643} +02/24/2022 17:07:09 - INFO - codeparrot_training - Step 12003: {'lr': 0.0004483084487429496, 'samples': 6146048, 'steps': 12003, 'loss/train': 0.415947288274765} +02/24/2022 17:07:16 - INFO - codeparrot_training - Step 12004: {'lr': 0.00044829848493395884, 'samples': 6146560, 'steps': 12004, 'loss/train': 1.6646977663040161} +02/24/2022 17:07:19 - INFO - codeparrot_training - Step 12005: {'lr': 0.00044828852027552023, 'samples': 6147072, 'steps': 12005, 'loss/train': 2.7473230361938477} +02/24/2022 17:07:25 - INFO - codeparrot_training - Step 12006: {'lr': 0.00044827855476767665, 'samples': 6147584, 'steps': 12006, 'loss/train': 2.021182060241699} +02/24/2022 17:07:28 - INFO - codeparrot_training - Step 12007: {'lr': 0.00044826858841047067, 'samples': 6148096, 'steps': 12007, 'loss/train': 2.6072206497192383} +02/24/2022 17:07:34 - INFO - codeparrot_training - Step 12008: {'lr': 0.00044825862120394504, 'samples': 6148608, 'steps': 12008, 'loss/train': 1.318609356880188} +02/24/2022 17:07:37 - INFO - codeparrot_training - Step 12009: {'lr': 0.00044824865314814245, 'samples': 6149120, 'steps': 12009, 'loss/train': 1.8420289754867554} +02/24/2022 17:07:43 - INFO - codeparrot_training - Step 12010: {'lr': 0.00044823868424310553, 'samples': 6149632, 'steps': 12010, 'loss/train': 2.6879255771636963} +02/24/2022 17:07:46 - INFO - codeparrot_training - Step 12011: {'lr': 0.00044822871448887703, 'samples': 6150144, 'steps': 12011, 'loss/train': 1.9901670217514038} +02/24/2022 17:07:52 - INFO - codeparrot_training - Step 12012: {'lr': 0.0004482187438854997, 'samples': 6150656, 'steps': 12012, 'loss/train': 2.8354454040527344} +02/24/2022 17:07:55 - INFO - codeparrot_training - Step 12013: {'lr': 0.00044820877243301617, 'samples': 6151168, 'steps': 12013, 'loss/train': 2.2613120079040527} +02/24/2022 17:07:59 - INFO - codeparrot_training - Step 12014: {'lr': 0.00044819880013146924, 'samples': 6151680, 'steps': 12014, 'loss/train': 1.8379782438278198} +02/24/2022 17:08:05 - INFO - codeparrot_training - Step 12015: {'lr': 0.0004481888269809016, 'samples': 6152192, 'steps': 12015, 'loss/train': 2.1393845081329346} +02/24/2022 17:08:08 - INFO - codeparrot_training - Step 12016: {'lr': 0.0004481788529813559, 'samples': 6152704, 'steps': 12016, 'loss/train': 2.1670098304748535} +02/24/2022 17:08:14 - INFO - codeparrot_training - Step 12017: {'lr': 0.00044816887813287494, 'samples': 6153216, 'steps': 12017, 'loss/train': 2.503826856613159} +02/24/2022 17:08:19 - INFO - codeparrot_training - Step 12018: {'lr': 0.0004481589024355014, 'samples': 6153728, 'steps': 12018, 'loss/train': 1.9746654033660889} +02/24/2022 17:08:23 - INFO - codeparrot_training - Step 12019: {'lr': 0.00044814892588927816, 'samples': 6154240, 'steps': 12019, 'loss/train': 3.7525477409362793} +02/24/2022 17:08:29 - INFO - codeparrot_training - Step 12020: {'lr': 0.00044813894849424777, 'samples': 6154752, 'steps': 12020, 'loss/train': 2.1124234199523926} +02/24/2022 17:08:32 - INFO - codeparrot_training - Step 12021: {'lr': 0.00044812897025045295, 'samples': 6155264, 'steps': 12021, 'loss/train': 3.585268974304199} +02/24/2022 17:08:38 - INFO - codeparrot_training - Step 12022: {'lr': 0.00044811899115793666, 'samples': 6155776, 'steps': 12022, 'loss/train': 1.7901548147201538} +02/24/2022 17:08:41 - INFO - codeparrot_training - Step 12023: {'lr': 0.0004481090112167415, 'samples': 6156288, 'steps': 12023, 'loss/train': 1.735459566116333} +02/24/2022 17:08:47 - INFO - codeparrot_training - Step 12024: {'lr': 0.0004480990304269102, 'samples': 6156800, 'steps': 12024, 'loss/train': 1.8389549255371094} +02/24/2022 17:08:50 - INFO - codeparrot_training - Step 12025: {'lr': 0.00044808904878848555, 'samples': 6157312, 'steps': 12025, 'loss/train': 2.4541585445404053} +02/24/2022 17:08:56 - INFO - codeparrot_training - Step 12026: {'lr': 0.00044807906630151033, 'samples': 6157824, 'steps': 12026, 'loss/train': 1.9353373050689697} +02/24/2022 17:08:59 - INFO - codeparrot_training - Step 12027: {'lr': 0.00044806908296602733, 'samples': 6158336, 'steps': 12027, 'loss/train': 1.9195120334625244} +02/24/2022 17:09:05 - INFO - codeparrot_training - Step 12028: {'lr': 0.0004480590987820793, 'samples': 6158848, 'steps': 12028, 'loss/train': 2.1406238079071045} +02/24/2022 17:09:08 - INFO - codeparrot_training - Step 12029: {'lr': 0.00044804911374970893, 'samples': 6159360, 'steps': 12029, 'loss/train': 2.5416066646575928} +02/24/2022 17:09:15 - INFO - codeparrot_training - Step 12030: {'lr': 0.000448039127868959, 'samples': 6159872, 'steps': 12030, 'loss/train': 6.636228561401367} +02/24/2022 17:09:18 - INFO - codeparrot_training - Step 12031: {'lr': 0.0004480291411398724, 'samples': 6160384, 'steps': 12031, 'loss/train': 2.637648344039917} +02/24/2022 17:09:24 - INFO - codeparrot_training - Step 12032: {'lr': 0.0004480191535624918, 'samples': 6160896, 'steps': 12032, 'loss/train': 1.6071592569351196} +02/24/2022 17:09:27 - INFO - codeparrot_training - Step 12033: {'lr': 0.00044800916513686, 'samples': 6161408, 'steps': 12033, 'loss/train': 1.6605761051177979} +02/24/2022 17:09:33 - INFO - codeparrot_training - Step 12034: {'lr': 0.00044799917586301987, 'samples': 6161920, 'steps': 12034, 'loss/train': 2.0364322662353516} +02/24/2022 17:09:36 - INFO - codeparrot_training - Step 12035: {'lr': 0.00044798918574101413, 'samples': 6162432, 'steps': 12035, 'loss/train': 2.449662446975708} +02/24/2022 17:09:42 - INFO - codeparrot_training - Step 12036: {'lr': 0.00044797919477088555, 'samples': 6162944, 'steps': 12036, 'loss/train': 2.2326977252960205} +02/24/2022 17:09:45 - INFO - codeparrot_training - Step 12037: {'lr': 0.00044796920295267696, 'samples': 6163456, 'steps': 12037, 'loss/train': 1.6525148153305054} +02/24/2022 17:09:50 - INFO - codeparrot_training - Step 12038: {'lr': 0.0004479592102864313, 'samples': 6163968, 'steps': 12038, 'loss/train': 2.1086108684539795} +02/24/2022 17:09:54 - INFO - codeparrot_training - Step 12039: {'lr': 0.0004479492167721911, 'samples': 6164480, 'steps': 12039, 'loss/train': 1.9792639017105103} +02/24/2022 17:10:00 - INFO - codeparrot_training - Step 12040: {'lr': 0.0004479392224099993, 'samples': 6164992, 'steps': 12040, 'loss/train': 2.3902857303619385} +02/24/2022 17:10:04 - INFO - codeparrot_training - Step 12041: {'lr': 0.00044792922719989883, 'samples': 6165504, 'steps': 12041, 'loss/train': 2.0644795894622803} +02/24/2022 17:10:09 - INFO - codeparrot_training - Step 12042: {'lr': 0.00044791923114193233, 'samples': 6166016, 'steps': 12042, 'loss/train': 2.6241061687469482} +02/24/2022 17:10:13 - INFO - codeparrot_training - Step 12043: {'lr': 0.0004479092342361427, 'samples': 6166528, 'steps': 12043, 'loss/train': 1.2237296104431152} +02/24/2022 17:10:18 - INFO - codeparrot_training - Step 12044: {'lr': 0.0004478992364825728, 'samples': 6167040, 'steps': 12044, 'loss/train': 2.6538925170898438} +02/24/2022 17:10:22 - INFO - codeparrot_training - Step 12045: {'lr': 0.00044788923788126534, 'samples': 6167552, 'steps': 12045, 'loss/train': 2.507875442504883} +02/24/2022 17:10:27 - INFO - codeparrot_training - Step 12046: {'lr': 0.00044787923843226323, 'samples': 6168064, 'steps': 12046, 'loss/train': 2.967284917831421} +02/24/2022 17:10:31 - INFO - codeparrot_training - Step 12047: {'lr': 0.0004478692381356093, 'samples': 6168576, 'steps': 12047, 'loss/train': 2.0878546237945557} +02/24/2022 17:10:36 - INFO - codeparrot_training - Step 12048: {'lr': 0.00044785923699134646, 'samples': 6169088, 'steps': 12048, 'loss/train': 1.0714280605316162} +02/24/2022 17:10:40 - INFO - codeparrot_training - Step 12049: {'lr': 0.0004478492349995174, 'samples': 6169600, 'steps': 12049, 'loss/train': 3.110184907913208} +02/24/2022 17:10:45 - INFO - codeparrot_training - Step 12050: {'lr': 0.00044783923216016507, 'samples': 6170112, 'steps': 12050, 'loss/train': 2.5074799060821533} +02/24/2022 17:10:49 - INFO - codeparrot_training - Step 12051: {'lr': 0.0004478292284733323, 'samples': 6170624, 'steps': 12051, 'loss/train': 1.9328501224517822} +02/24/2022 17:10:54 - INFO - codeparrot_training - Step 12052: {'lr': 0.00044781922393906186, 'samples': 6171136, 'steps': 12052, 'loss/train': 0.18410548567771912} +02/24/2022 17:10:58 - INFO - codeparrot_training - Step 12053: {'lr': 0.00044780921855739676, 'samples': 6171648, 'steps': 12053, 'loss/train': 2.585103988647461} +02/24/2022 17:11:04 - INFO - codeparrot_training - Step 12054: {'lr': 0.00044779921232837973, 'samples': 6172160, 'steps': 12054, 'loss/train': 2.492974281311035} +02/24/2022 17:11:07 - INFO - codeparrot_training - Step 12055: {'lr': 0.0004477892052520537, 'samples': 6172672, 'steps': 12055, 'loss/train': 1.8947025537490845} +02/24/2022 17:11:13 - INFO - codeparrot_training - Step 12056: {'lr': 0.0004477791973284616, 'samples': 6173184, 'steps': 12056, 'loss/train': 2.6554815769195557} +02/24/2022 17:11:17 - INFO - codeparrot_training - Step 12057: {'lr': 0.00044776918855764616, 'samples': 6173696, 'steps': 12057, 'loss/train': 0.9762059450149536} +02/24/2022 17:11:22 - INFO - codeparrot_training - Step 12058: {'lr': 0.00044775917893965025, 'samples': 6174208, 'steps': 12058, 'loss/train': 1.3967198133468628} +02/24/2022 17:11:26 - INFO - codeparrot_training - Step 12059: {'lr': 0.00044774916847451683, 'samples': 6174720, 'steps': 12059, 'loss/train': 2.5747334957122803} +02/24/2022 17:11:31 - INFO - codeparrot_training - Step 12060: {'lr': 0.0004477391571622889, 'samples': 6175232, 'steps': 12060, 'loss/train': 2.2955400943756104} +02/24/2022 17:11:35 - INFO - codeparrot_training - Step 12061: {'lr': 0.00044772914500300907, 'samples': 6175744, 'steps': 12061, 'loss/train': 1.6977120637893677} +02/24/2022 17:11:41 - INFO - codeparrot_training - Step 12062: {'lr': 0.0004477191319967204, 'samples': 6176256, 'steps': 12062, 'loss/train': 1.8222932815551758} +02/24/2022 17:11:44 - INFO - codeparrot_training - Step 12063: {'lr': 0.0004477091181434658, 'samples': 6176768, 'steps': 12063, 'loss/train': 1.5017566680908203} +02/24/2022 17:11:50 - INFO - codeparrot_training - Step 12064: {'lr': 0.00044769910344328803, 'samples': 6177280, 'steps': 12064, 'loss/train': 2.077258825302124} +02/24/2022 17:11:53 - INFO - codeparrot_training - Step 12065: {'lr': 0.00044768908789623015, 'samples': 6177792, 'steps': 12065, 'loss/train': 2.035762071609497} +02/24/2022 17:12:00 - INFO - codeparrot_training - Step 12066: {'lr': 0.00044767907150233496, 'samples': 6178304, 'steps': 12066, 'loss/train': 1.5400667190551758} +02/24/2022 17:12:03 - INFO - codeparrot_training - Step 12067: {'lr': 0.0004476690542616454, 'samples': 6178816, 'steps': 12067, 'loss/train': 3.015357255935669} +02/24/2022 17:12:09 - INFO - codeparrot_training - Step 12068: {'lr': 0.00044765903617420436, 'samples': 6179328, 'steps': 12068, 'loss/train': 1.8078761100769043} +02/24/2022 17:12:12 - INFO - codeparrot_training - Step 12069: {'lr': 0.0004476490172400548, 'samples': 6179840, 'steps': 12069, 'loss/train': 2.3099827766418457} +02/24/2022 17:12:17 - INFO - codeparrot_training - Step 12070: {'lr': 0.00044763899745923965, 'samples': 6180352, 'steps': 12070, 'loss/train': 2.4852898120880127} +02/24/2022 17:12:21 - INFO - codeparrot_training - Step 12071: {'lr': 0.0004476289768318017, 'samples': 6180864, 'steps': 12071, 'loss/train': 3.00781512260437} +02/24/2022 17:12:27 - INFO - codeparrot_training - Step 12072: {'lr': 0.00044761895535778404, 'samples': 6181376, 'steps': 12072, 'loss/train': 1.6103401184082031} +02/24/2022 17:12:30 - INFO - codeparrot_training - Step 12073: {'lr': 0.0004476089330372295, 'samples': 6181888, 'steps': 12073, 'loss/train': 2.1514294147491455} +02/24/2022 17:12:35 - INFO - codeparrot_training - Step 12074: {'lr': 0.00044759890987018105, 'samples': 6182400, 'steps': 12074, 'loss/train': 3.306278705596924} +02/24/2022 17:12:39 - INFO - codeparrot_training - Step 12075: {'lr': 0.0004475888858566816, 'samples': 6182912, 'steps': 12075, 'loss/train': 1.6765960454940796} +02/24/2022 17:12:45 - INFO - codeparrot_training - Step 12076: {'lr': 0.00044757886099677416, 'samples': 6183424, 'steps': 12076, 'loss/train': 1.4008985757827759} +02/24/2022 17:12:49 - INFO - codeparrot_training - Step 12077: {'lr': 0.0004475688352905015, 'samples': 6183936, 'steps': 12077, 'loss/train': 2.1884684562683105} +02/24/2022 17:12:54 - INFO - codeparrot_training - Step 12078: {'lr': 0.00044755880873790675, 'samples': 6184448, 'steps': 12078, 'loss/train': 1.8656758069992065} +02/24/2022 17:12:58 - INFO - codeparrot_training - Step 12079: {'lr': 0.00044754878133903284, 'samples': 6184960, 'steps': 12079, 'loss/train': 1.9137187004089355} +02/24/2022 17:13:03 - INFO - codeparrot_training - Step 12080: {'lr': 0.0004475387530939226, 'samples': 6185472, 'steps': 12080, 'loss/train': 2.4631569385528564} +02/24/2022 17:13:07 - INFO - codeparrot_training - Step 12081: {'lr': 0.00044752872400261913, 'samples': 6185984, 'steps': 12081, 'loss/train': 1.814349889755249} +02/24/2022 17:13:12 - INFO - codeparrot_training - Step 12082: {'lr': 0.0004475186940651653, 'samples': 6186496, 'steps': 12082, 'loss/train': 1.066323161125183} +02/24/2022 17:13:16 - INFO - codeparrot_training - Step 12083: {'lr': 0.0004475086632816041, 'samples': 6187008, 'steps': 12083, 'loss/train': 1.6021504402160645} +02/24/2022 17:13:21 - INFO - codeparrot_training - Step 12084: {'lr': 0.00044749863165197845, 'samples': 6187520, 'steps': 12084, 'loss/train': 1.9285125732421875} +02/24/2022 17:13:25 - INFO - codeparrot_training - Step 12085: {'lr': 0.00044748859917633144, 'samples': 6188032, 'steps': 12085, 'loss/train': 2.1447486877441406} +02/24/2022 17:13:31 - INFO - codeparrot_training - Step 12086: {'lr': 0.00044747856585470604, 'samples': 6188544, 'steps': 12086, 'loss/train': 1.8635483980178833} +02/24/2022 17:13:34 - INFO - codeparrot_training - Step 12087: {'lr': 0.00044746853168714507, 'samples': 6189056, 'steps': 12087, 'loss/train': 2.179267168045044} +02/24/2022 17:13:40 - INFO - codeparrot_training - Step 12088: {'lr': 0.0004474584966736917, 'samples': 6189568, 'steps': 12088, 'loss/train': 2.1392414569854736} +02/24/2022 17:13:43 - INFO - codeparrot_training - Step 12089: {'lr': 0.00044744846081438874, 'samples': 6190080, 'steps': 12089, 'loss/train': 1.8742955923080444} +02/24/2022 17:13:49 - INFO - codeparrot_training - Step 12090: {'lr': 0.0004474384241092793, 'samples': 6190592, 'steps': 12090, 'loss/train': 2.2031991481781006} +02/24/2022 17:13:52 - INFO - codeparrot_training - Step 12091: {'lr': 0.00044742838655840636, 'samples': 6191104, 'steps': 12091, 'loss/train': 2.4598612785339355} +02/24/2022 17:13:58 - INFO - codeparrot_training - Step 12092: {'lr': 0.0004474183481618129, 'samples': 6191616, 'steps': 12092, 'loss/train': 2.594998359680176} +02/24/2022 17:14:02 - INFO - codeparrot_training - Step 12093: {'lr': 0.00044740830891954196, 'samples': 6192128, 'steps': 12093, 'loss/train': 2.505722761154175} +02/24/2022 17:14:07 - INFO - codeparrot_training - Step 12094: {'lr': 0.0004473982688316365, 'samples': 6192640, 'steps': 12094, 'loss/train': 0.6227912902832031} +02/24/2022 17:14:11 - INFO - codeparrot_training - Step 12095: {'lr': 0.0004473882278981395, 'samples': 6193152, 'steps': 12095, 'loss/train': 2.19868540763855} +02/24/2022 17:14:17 - INFO - codeparrot_training - Step 12096: {'lr': 0.000447378186119094, 'samples': 6193664, 'steps': 12096, 'loss/train': 0.2705650329589844} +02/24/2022 17:14:21 - INFO - codeparrot_training - Step 12097: {'lr': 0.00044736814349454303, 'samples': 6194176, 'steps': 12097, 'loss/train': 1.6578989028930664} +02/24/2022 17:14:26 - INFO - codeparrot_training - Step 12098: {'lr': 0.0004473581000245296, 'samples': 6194688, 'steps': 12098, 'loss/train': 1.0228785276412964} +02/24/2022 17:14:30 - INFO - codeparrot_training - Step 12099: {'lr': 0.00044734805570909676, 'samples': 6195200, 'steps': 12099, 'loss/train': 2.7728471755981445} +02/24/2022 17:14:35 - INFO - codeparrot_training - Step 12100: {'lr': 0.0004473380105482875, 'samples': 6195712, 'steps': 12100, 'loss/train': 0.7916803956031799} +02/24/2022 17:14:39 - INFO - codeparrot_training - Step 12101: {'lr': 0.0004473279645421449, 'samples': 6196224, 'steps': 12101, 'loss/train': 3.0350024700164795} +02/24/2022 17:14:44 - INFO - codeparrot_training - Step 12102: {'lr': 0.00044731791769071197, 'samples': 6196736, 'steps': 12102, 'loss/train': 1.2722065448760986} +02/24/2022 17:14:48 - INFO - codeparrot_training - Step 12103: {'lr': 0.00044730786999403166, 'samples': 6197248, 'steps': 12103, 'loss/train': 0.8307028412818909} +02/24/2022 17:14:53 - INFO - codeparrot_training - Step 12104: {'lr': 0.00044729782145214717, 'samples': 6197760, 'steps': 12104, 'loss/train': 2.607999324798584} +02/24/2022 17:14:56 - INFO - codeparrot_training - Step 12105: {'lr': 0.0004472877720651014, 'samples': 6198272, 'steps': 12105, 'loss/train': 2.4205119609832764} +02/24/2022 17:15:02 - INFO - codeparrot_training - Step 12106: {'lr': 0.0004472777218329375, 'samples': 6198784, 'steps': 12106, 'loss/train': 2.9563732147216797} +02/24/2022 17:15:06 - INFO - codeparrot_training - Step 12107: {'lr': 0.00044726767075569843, 'samples': 6199296, 'steps': 12107, 'loss/train': 0.15740205347537994} +02/24/2022 17:15:11 - INFO - codeparrot_training - Step 12108: {'lr': 0.0004472576188334273, 'samples': 6199808, 'steps': 12108, 'loss/train': 2.1738076210021973} +02/24/2022 17:15:15 - INFO - codeparrot_training - Step 12109: {'lr': 0.00044724756606616726, 'samples': 6200320, 'steps': 12109, 'loss/train': 2.113445520401001} +02/24/2022 17:15:20 - INFO - codeparrot_training - Step 12110: {'lr': 0.00044723751245396117, 'samples': 6200832, 'steps': 12110, 'loss/train': 1.9405531883239746} +02/24/2022 17:15:24 - INFO - codeparrot_training - Step 12111: {'lr': 0.00044722745799685227, 'samples': 6201344, 'steps': 12111, 'loss/train': 1.2801152467727661} +02/24/2022 17:15:30 - INFO - codeparrot_training - Step 12112: {'lr': 0.00044721740269488354, 'samples': 6201856, 'steps': 12112, 'loss/train': 2.6905722618103027} +02/24/2022 17:15:33 - INFO - codeparrot_training - Step 12113: {'lr': 0.0004472073465480981, 'samples': 6202368, 'steps': 12113, 'loss/train': 1.1298489570617676} +02/24/2022 17:15:39 - INFO - codeparrot_training - Step 12114: {'lr': 0.000447197289556539, 'samples': 6202880, 'steps': 12114, 'loss/train': 2.1920905113220215} +02/24/2022 17:15:42 - INFO - codeparrot_training - Step 12115: {'lr': 0.0004471872317202493, 'samples': 6203392, 'steps': 12115, 'loss/train': 1.6994199752807617} +02/24/2022 17:15:48 - INFO - codeparrot_training - Step 12116: {'lr': 0.0004471771730392722, 'samples': 6203904, 'steps': 12116, 'loss/train': 1.4567934274673462} +02/24/2022 17:15:51 - INFO - codeparrot_training - Step 12117: {'lr': 0.00044716711351365057, 'samples': 6204416, 'steps': 12117, 'loss/train': 2.0766303539276123} +02/24/2022 17:15:57 - INFO - codeparrot_training - Step 12118: {'lr': 0.00044715705314342776, 'samples': 6204928, 'steps': 12118, 'loss/train': 1.7800449132919312} +02/24/2022 17:16:00 - INFO - codeparrot_training - Step 12119: {'lr': 0.0004471469919286467, 'samples': 6205440, 'steps': 12119, 'loss/train': 2.729935646057129} +02/24/2022 17:16:06 - INFO - codeparrot_training - Step 12120: {'lr': 0.0004471369298693505, 'samples': 6205952, 'steps': 12120, 'loss/train': 2.7959799766540527} +02/24/2022 17:16:09 - INFO - codeparrot_training - Step 12121: {'lr': 0.0004471268669655822, 'samples': 6206464, 'steps': 12121, 'loss/train': 1.8632409572601318} +02/24/2022 17:16:16 - INFO - codeparrot_training - Step 12122: {'lr': 0.0004471168032173852, 'samples': 6206976, 'steps': 12122, 'loss/train': 1.7287883758544922} +02/24/2022 17:16:19 - INFO - codeparrot_training - Step 12123: {'lr': 0.0004471067386248023, 'samples': 6207488, 'steps': 12123, 'loss/train': 1.9525649547576904} +02/24/2022 17:16:25 - INFO - codeparrot_training - Step 12124: {'lr': 0.0004470966731878767, 'samples': 6208000, 'steps': 12124, 'loss/train': 2.1880342960357666} +02/24/2022 17:16:28 - INFO - codeparrot_training - Step 12125: {'lr': 0.0004470866069066516, 'samples': 6208512, 'steps': 12125, 'loss/train': 1.3273388147354126} +02/24/2022 17:16:34 - INFO - codeparrot_training - Step 12126: {'lr': 0.00044707653978117004, 'samples': 6209024, 'steps': 12126, 'loss/train': 2.819064140319824} +02/24/2022 17:16:37 - INFO - codeparrot_training - Step 12127: {'lr': 0.00044706647181147507, 'samples': 6209536, 'steps': 12127, 'loss/train': 3.3440170288085938} +02/24/2022 17:16:43 - INFO - codeparrot_training - Step 12128: {'lr': 0.00044705640299761004, 'samples': 6210048, 'steps': 12128, 'loss/train': 2.1373674869537354} +02/24/2022 17:16:46 - INFO - codeparrot_training - Step 12129: {'lr': 0.0004470463333396179, 'samples': 6210560, 'steps': 12129, 'loss/train': 1.064770221710205} +02/24/2022 17:16:52 - INFO - codeparrot_training - Step 12130: {'lr': 0.0004470362628375418, 'samples': 6211072, 'steps': 12130, 'loss/train': 1.7912788391113281} +02/24/2022 17:16:55 - INFO - codeparrot_training - Step 12131: {'lr': 0.000447026191491425, 'samples': 6211584, 'steps': 12131, 'loss/train': 2.478670597076416} +02/24/2022 17:17:01 - INFO - codeparrot_training - Step 12132: {'lr': 0.0004470161193013105, 'samples': 6212096, 'steps': 12132, 'loss/train': 2.443627119064331} +02/24/2022 17:17:04 - INFO - codeparrot_training - Step 12133: {'lr': 0.0004470060462672415, 'samples': 6212608, 'steps': 12133, 'loss/train': 2.4356517791748047} +02/24/2022 17:17:10 - INFO - codeparrot_training - Step 12134: {'lr': 0.0004469959723892612, 'samples': 6213120, 'steps': 12134, 'loss/train': 2.313127279281616} +02/24/2022 17:17:16 - INFO - codeparrot_training - Step 12135: {'lr': 0.0004469858976674126, 'samples': 6213632, 'steps': 12135, 'loss/train': 1.4179664850234985} +02/24/2022 17:17:19 - INFO - codeparrot_training - Step 12136: {'lr': 0.000446975822101739, 'samples': 6214144, 'steps': 12136, 'loss/train': 2.2050106525421143} +02/24/2022 17:17:25 - INFO - codeparrot_training - Step 12137: {'lr': 0.00044696574569228365, 'samples': 6214656, 'steps': 12137, 'loss/train': 2.052475690841675} +02/24/2022 17:17:28 - INFO - codeparrot_training - Step 12138: {'lr': 0.00044695566843908947, 'samples': 6215168, 'steps': 12138, 'loss/train': 2.0426907539367676} +02/24/2022 17:17:34 - INFO - codeparrot_training - Step 12139: {'lr': 0.0004469455903421998, 'samples': 6215680, 'steps': 12139, 'loss/train': 1.3605375289916992} +02/24/2022 17:17:37 - INFO - codeparrot_training - Step 12140: {'lr': 0.0004469355114016577, 'samples': 6216192, 'steps': 12140, 'loss/train': 0.4752911329269409} +02/24/2022 17:17:43 - INFO - codeparrot_training - Step 12141: {'lr': 0.0004469254316175065, 'samples': 6216704, 'steps': 12141, 'loss/train': 1.0319311618804932} +02/24/2022 17:17:46 - INFO - codeparrot_training - Step 12142: {'lr': 0.0004469153509897892, 'samples': 6217216, 'steps': 12142, 'loss/train': 2.084182024002075} +02/24/2022 17:17:52 - INFO - codeparrot_training - Step 12143: {'lr': 0.00044690526951854907, 'samples': 6217728, 'steps': 12143, 'loss/train': 1.1665127277374268} +02/24/2022 17:17:56 - INFO - codeparrot_training - Step 12144: {'lr': 0.0004468951872038293, 'samples': 6218240, 'steps': 12144, 'loss/train': 2.1122260093688965} +02/24/2022 17:18:01 - INFO - codeparrot_training - Step 12145: {'lr': 0.00044688510404567307, 'samples': 6218752, 'steps': 12145, 'loss/train': 2.1666266918182373} +02/24/2022 17:18:05 - INFO - codeparrot_training - Step 12146: {'lr': 0.0004468750200441236, 'samples': 6219264, 'steps': 12146, 'loss/train': 0.9853890538215637} +02/24/2022 17:18:10 - INFO - codeparrot_training - Step 12147: {'lr': 0.00044686493519922405, 'samples': 6219776, 'steps': 12147, 'loss/train': 2.296105146408081} +02/24/2022 17:18:14 - INFO - codeparrot_training - Step 12148: {'lr': 0.00044685484951101763, 'samples': 6220288, 'steps': 12148, 'loss/train': 1.2640005350112915} +02/24/2022 17:18:19 - INFO - codeparrot_training - Step 12149: {'lr': 0.0004468447629795475, 'samples': 6220800, 'steps': 12149, 'loss/train': 1.3010812997817993} +02/24/2022 17:18:23 - INFO - codeparrot_training - Step 12150: {'lr': 0.00044683467560485696, 'samples': 6221312, 'steps': 12150, 'loss/train': 0.910775363445282} +02/24/2022 17:18:28 - INFO - codeparrot_training - Step 12151: {'lr': 0.00044682458738698916, 'samples': 6221824, 'steps': 12151, 'loss/train': 2.668936252593994} +02/24/2022 17:18:32 - INFO - codeparrot_training - Step 12152: {'lr': 0.0004468144983259873, 'samples': 6222336, 'steps': 12152, 'loss/train': 2.7772552967071533} +02/24/2022 17:18:38 - INFO - codeparrot_training - Step 12153: {'lr': 0.00044680440842189464, 'samples': 6222848, 'steps': 12153, 'loss/train': 1.2003204822540283} +02/24/2022 17:18:41 - INFO - codeparrot_training - Step 12154: {'lr': 0.0004467943176747544, 'samples': 6223360, 'steps': 12154, 'loss/train': 2.720294713973999} +02/24/2022 17:18:47 - INFO - codeparrot_training - Step 12155: {'lr': 0.0004467842260846098, 'samples': 6223872, 'steps': 12155, 'loss/train': 1.2617138624191284} +02/24/2022 17:18:50 - INFO - codeparrot_training - Step 12156: {'lr': 0.00044677413365150397, 'samples': 6224384, 'steps': 12156, 'loss/train': 6.09367036819458} +02/24/2022 17:18:56 - INFO - codeparrot_training - Step 12157: {'lr': 0.00044676404037548035, 'samples': 6224896, 'steps': 12157, 'loss/train': 0.2070409655570984} +02/24/2022 17:18:59 - INFO - codeparrot_training - Step 12158: {'lr': 0.0004467539462565821, 'samples': 6225408, 'steps': 12158, 'loss/train': 2.41174578666687} +02/24/2022 17:19:05 - INFO - codeparrot_training - Step 12159: {'lr': 0.0004467438512948523, 'samples': 6225920, 'steps': 12159, 'loss/train': 2.365654230117798} +02/24/2022 17:19:09 - INFO - codeparrot_training - Step 12160: {'lr': 0.00044673375549033435, 'samples': 6226432, 'steps': 12160, 'loss/train': 2.6146440505981445} +02/24/2022 17:19:14 - INFO - codeparrot_training - Step 12161: {'lr': 0.0004467236588430714, 'samples': 6226944, 'steps': 12161, 'loss/train': 2.173468828201294} +02/24/2022 17:19:18 - INFO - codeparrot_training - Step 12162: {'lr': 0.00044671356135310685, 'samples': 6227456, 'steps': 12162, 'loss/train': 2.3044090270996094} +02/24/2022 17:19:23 - INFO - codeparrot_training - Step 12163: {'lr': 0.0004467034630204839, 'samples': 6227968, 'steps': 12163, 'loss/train': 2.7269012928009033} +02/24/2022 17:19:27 - INFO - codeparrot_training - Step 12164: {'lr': 0.0004466933638452457, 'samples': 6228480, 'steps': 12164, 'loss/train': 1.8663148880004883} +02/24/2022 17:19:33 - INFO - codeparrot_training - Step 12165: {'lr': 0.0004466832638274356, 'samples': 6228992, 'steps': 12165, 'loss/train': 2.520116090774536} +02/24/2022 17:19:36 - INFO - codeparrot_training - Step 12166: {'lr': 0.0004466731629670969, 'samples': 6229504, 'steps': 12166, 'loss/train': 0.5261411070823669} +02/24/2022 17:19:42 - INFO - codeparrot_training - Step 12167: {'lr': 0.00044666306126427276, 'samples': 6230016, 'steps': 12167, 'loss/train': 1.8091787099838257} +02/24/2022 17:19:45 - INFO - codeparrot_training - Step 12168: {'lr': 0.00044665295871900655, 'samples': 6230528, 'steps': 12168, 'loss/train': 1.778935432434082} +02/24/2022 17:19:51 - INFO - codeparrot_training - Step 12169: {'lr': 0.0004466428553313415, 'samples': 6231040, 'steps': 12169, 'loss/train': 1.3250904083251953} +02/24/2022 17:19:55 - INFO - codeparrot_training - Step 12170: {'lr': 0.0004466327511013208, 'samples': 6231552, 'steps': 12170, 'loss/train': 2.5258848667144775} +02/24/2022 17:20:00 - INFO - codeparrot_training - Step 12171: {'lr': 0.00044662264602898794, 'samples': 6232064, 'steps': 12171, 'loss/train': 2.155921220779419} +02/24/2022 17:20:04 - INFO - codeparrot_training - Step 12172: {'lr': 0.00044661254011438614, 'samples': 6232576, 'steps': 12172, 'loss/train': 2.612653970718384} +02/24/2022 17:20:09 - INFO - codeparrot_training - Step 12173: {'lr': 0.00044660243335755854, 'samples': 6233088, 'steps': 12173, 'loss/train': 2.268120765686035} +02/24/2022 17:20:13 - INFO - codeparrot_training - Step 12174: {'lr': 0.00044659232575854866, 'samples': 6233600, 'steps': 12174, 'loss/train': 1.769982933998108} +02/24/2022 17:20:18 - INFO - codeparrot_training - Step 12175: {'lr': 0.00044658221731739954, 'samples': 6234112, 'steps': 12175, 'loss/train': 2.595690965652466} +02/24/2022 17:20:22 - INFO - codeparrot_training - Step 12176: {'lr': 0.0004465721080341547, 'samples': 6234624, 'steps': 12176, 'loss/train': 1.7044028043746948} +02/24/2022 17:20:27 - INFO - codeparrot_training - Step 12177: {'lr': 0.00044656199790885743, 'samples': 6235136, 'steps': 12177, 'loss/train': 2.585681915283203} +02/24/2022 17:20:31 - INFO - codeparrot_training - Step 12178: {'lr': 0.0004465518869415509, 'samples': 6235648, 'steps': 12178, 'loss/train': 2.7242603302001953} +02/24/2022 17:20:37 - INFO - codeparrot_training - Step 12179: {'lr': 0.0004465417751322785, 'samples': 6236160, 'steps': 12179, 'loss/train': 2.0500900745391846} +02/24/2022 17:20:41 - INFO - codeparrot_training - Step 12180: {'lr': 0.00044653166248108357, 'samples': 6236672, 'steps': 12180, 'loss/train': 2.794024705886841} +02/24/2022 17:20:46 - INFO - codeparrot_training - Step 12181: {'lr': 0.00044652154898800937, 'samples': 6237184, 'steps': 12181, 'loss/train': 1.8298736810684204} +02/24/2022 17:20:49 - INFO - codeparrot_training - Step 12182: {'lr': 0.0004465114346530993, 'samples': 6237696, 'steps': 12182, 'loss/train': 2.546334981918335} +02/24/2022 17:20:55 - INFO - codeparrot_training - Step 12183: {'lr': 0.0004465013194763966, 'samples': 6238208, 'steps': 12183, 'loss/train': 2.531102418899536} +02/24/2022 17:20:58 - INFO - codeparrot_training - Step 12184: {'lr': 0.0004464912034579447, 'samples': 6238720, 'steps': 12184, 'loss/train': 2.207943916320801} +02/24/2022 17:21:04 - INFO - codeparrot_training - Step 12185: {'lr': 0.00044648108659778687, 'samples': 6239232, 'steps': 12185, 'loss/train': 1.4557642936706543} +02/24/2022 17:21:07 - INFO - codeparrot_training - Step 12186: {'lr': 0.0004464709688959664, 'samples': 6239744, 'steps': 12186, 'loss/train': 2.5606086254119873} +02/24/2022 17:21:13 - INFO - codeparrot_training - Step 12187: {'lr': 0.0004464608503525267, 'samples': 6240256, 'steps': 12187, 'loss/train': 2.242934226989746} +02/24/2022 17:21:16 - INFO - codeparrot_training - Step 12188: {'lr': 0.0004464507309675111, 'samples': 6240768, 'steps': 12188, 'loss/train': 0.28196999430656433} +02/24/2022 17:21:23 - INFO - codeparrot_training - Step 12189: {'lr': 0.000446440610740963, 'samples': 6241280, 'steps': 12189, 'loss/train': 2.3940367698669434} +02/24/2022 17:21:26 - INFO - codeparrot_training - Step 12190: {'lr': 0.0004464304896729257, 'samples': 6241792, 'steps': 12190, 'loss/train': 2.5269486904144287} +02/24/2022 17:21:32 - INFO - codeparrot_training - Step 12191: {'lr': 0.0004464203677634424, 'samples': 6242304, 'steps': 12191, 'loss/train': 2.1737985610961914} +02/24/2022 17:21:35 - INFO - codeparrot_training - Step 12192: {'lr': 0.0004464102450125568, 'samples': 6242816, 'steps': 12192, 'loss/train': 1.9607230424880981} +02/24/2022 17:21:41 - INFO - codeparrot_training - Step 12193: {'lr': 0.00044640012142031196, 'samples': 6243328, 'steps': 12193, 'loss/train': 2.8272314071655273} +02/24/2022 17:21:44 - INFO - codeparrot_training - Step 12194: {'lr': 0.0004463899969867514, 'samples': 6243840, 'steps': 12194, 'loss/train': 1.5703818798065186} +02/24/2022 17:21:50 - INFO - codeparrot_training - Step 12195: {'lr': 0.0004463798717119185, 'samples': 6244352, 'steps': 12195, 'loss/train': 2.894139289855957} +02/24/2022 17:21:53 - INFO - codeparrot_training - Step 12196: {'lr': 0.00044636974559585655, 'samples': 6244864, 'steps': 12196, 'loss/train': 2.4204652309417725} +02/24/2022 17:21:59 - INFO - codeparrot_training - Step 12197: {'lr': 0.00044635961863860894, 'samples': 6245376, 'steps': 12197, 'loss/train': 0.8477448225021362} +02/24/2022 17:22:02 - INFO - codeparrot_training - Step 12198: {'lr': 0.00044634949084021913, 'samples': 6245888, 'steps': 12198, 'loss/train': 1.9968920946121216} +02/24/2022 17:22:09 - INFO - codeparrot_training - Step 12199: {'lr': 0.0004463393622007305, 'samples': 6246400, 'steps': 12199, 'loss/train': 1.155855655670166} +02/24/2022 17:22:13 - INFO - codeparrot_training - Step 12200: {'lr': 0.0004463292327201862, 'samples': 6246912, 'steps': 12200, 'loss/train': 2.209712505340576} +02/24/2022 17:22:18 - INFO - codeparrot_training - Step 12201: {'lr': 0.0004463191023986299, 'samples': 6247424, 'steps': 12201, 'loss/train': 1.8642038106918335} +02/24/2022 17:22:21 - INFO - codeparrot_training - Step 12202: {'lr': 0.00044630897123610497, 'samples': 6247936, 'steps': 12202, 'loss/train': 2.290565013885498} +02/24/2022 17:22:27 - INFO - codeparrot_training - Step 12203: {'lr': 0.0004462988392326547, 'samples': 6248448, 'steps': 12203, 'loss/train': 1.4512784481048584} +02/24/2022 17:22:30 - INFO - codeparrot_training - Step 12204: {'lr': 0.00044628870638832254, 'samples': 6248960, 'steps': 12204, 'loss/train': 0.11816435307264328} +02/24/2022 17:22:36 - INFO - codeparrot_training - Step 12205: {'lr': 0.00044627857270315187, 'samples': 6249472, 'steps': 12205, 'loss/train': 1.8517296314239502} +02/24/2022 17:22:40 - INFO - codeparrot_training - Step 12206: {'lr': 0.00044626843817718615, 'samples': 6249984, 'steps': 12206, 'loss/train': 2.1940414905548096} +02/24/2022 17:22:45 - INFO - codeparrot_training - Step 12207: {'lr': 0.00044625830281046875, 'samples': 6250496, 'steps': 12207, 'loss/train': 2.197786808013916} +02/24/2022 17:22:48 - INFO - codeparrot_training - Step 12208: {'lr': 0.0004462481666030431, 'samples': 6251008, 'steps': 12208, 'loss/train': 1.139675259590149} +02/24/2022 17:22:54 - INFO - codeparrot_training - Step 12209: {'lr': 0.0004462380295549526, 'samples': 6251520, 'steps': 12209, 'loss/train': 0.5278382897377014} +02/24/2022 17:22:58 - INFO - codeparrot_training - Step 12210: {'lr': 0.0004462278916662407, 'samples': 6252032, 'steps': 12210, 'loss/train': 0.5970871448516846} +02/24/2022 17:23:03 - INFO - codeparrot_training - Step 12211: {'lr': 0.00044621775293695085, 'samples': 6252544, 'steps': 12211, 'loss/train': 2.36564040184021} +02/24/2022 17:23:06 - INFO - codeparrot_training - Step 12212: {'lr': 0.00044620761336712646, 'samples': 6253056, 'steps': 12212, 'loss/train': 1.9601918458938599} +02/24/2022 17:23:12 - INFO - codeparrot_training - Step 12213: {'lr': 0.0004461974729568109, 'samples': 6253568, 'steps': 12213, 'loss/train': 2.555851459503174} +02/24/2022 17:23:15 - INFO - codeparrot_training - Step 12214: {'lr': 0.0004461873317060477, 'samples': 6254080, 'steps': 12214, 'loss/train': 2.2025561332702637} +02/24/2022 17:23:22 - INFO - codeparrot_training - Step 12215: {'lr': 0.00044617718961488024, 'samples': 6254592, 'steps': 12215, 'loss/train': 1.5442249774932861} +02/24/2022 17:23:25 - INFO - codeparrot_training - Step 12216: {'lr': 0.000446167046683352, 'samples': 6255104, 'steps': 12216, 'loss/train': 2.2014803886413574} +02/24/2022 17:23:31 - INFO - codeparrot_training - Step 12217: {'lr': 0.0004461569029115065, 'samples': 6255616, 'steps': 12217, 'loss/train': 2.4430060386657715} +02/24/2022 17:23:34 - INFO - codeparrot_training - Step 12218: {'lr': 0.000446146758299387, 'samples': 6256128, 'steps': 12218, 'loss/train': 2.3343770503997803} +02/24/2022 17:23:40 - INFO - codeparrot_training - Step 12219: {'lr': 0.0004461366128470371, 'samples': 6256640, 'steps': 12219, 'loss/train': 1.514944314956665} +02/24/2022 17:23:43 - INFO - codeparrot_training - Step 12220: {'lr': 0.0004461264665545003, 'samples': 6257152, 'steps': 12220, 'loss/train': 2.2627553939819336} +02/24/2022 17:23:49 - INFO - codeparrot_training - Step 12221: {'lr': 0.00044611631942182, 'samples': 6257664, 'steps': 12221, 'loss/train': 2.365278482437134} +02/24/2022 17:23:52 - INFO - codeparrot_training - Step 12222: {'lr': 0.0004461061714490395, 'samples': 6258176, 'steps': 12222, 'loss/train': 1.005839467048645} +02/24/2022 17:23:58 - INFO - codeparrot_training - Step 12223: {'lr': 0.0004460960226362026, 'samples': 6258688, 'steps': 12223, 'loss/train': 2.1605939865112305} +02/24/2022 17:24:01 - INFO - codeparrot_training - Step 12224: {'lr': 0.0004460858729833525, 'samples': 6259200, 'steps': 12224, 'loss/train': 1.6568334102630615} +02/24/2022 17:24:08 - INFO - codeparrot_training - Step 12225: {'lr': 0.00044607572249053283, 'samples': 6259712, 'steps': 12225, 'loss/train': 0.4331347346305847} +02/24/2022 17:24:11 - INFO - codeparrot_training - Step 12226: {'lr': 0.0004460655711577871, 'samples': 6260224, 'steps': 12226, 'loss/train': 1.968904972076416} +02/24/2022 17:24:17 - INFO - codeparrot_training - Step 12227: {'lr': 0.00044605541898515863, 'samples': 6260736, 'steps': 12227, 'loss/train': 1.7360906600952148} +02/24/2022 17:24:20 - INFO - codeparrot_training - Step 12228: {'lr': 0.00044604526597269103, 'samples': 6261248, 'steps': 12228, 'loss/train': 1.6190905570983887} +02/24/2022 17:24:25 - INFO - codeparrot_training - Step 12229: {'lr': 0.0004460351121204277, 'samples': 6261760, 'steps': 12229, 'loss/train': 1.6429935693740845} +02/24/2022 17:24:29 - INFO - codeparrot_training - Step 12230: {'lr': 0.00044602495742841226, 'samples': 6262272, 'steps': 12230, 'loss/train': 2.3980512619018555} +02/24/2022 17:24:35 - INFO - codeparrot_training - Step 12231: {'lr': 0.00044601480189668816, 'samples': 6262784, 'steps': 12231, 'loss/train': 1.9587669372558594} +02/24/2022 17:24:38 - INFO - codeparrot_training - Step 12232: {'lr': 0.00044600464552529886, 'samples': 6263296, 'steps': 12232, 'loss/train': 2.0523691177368164} +02/24/2022 17:24:44 - INFO - codeparrot_training - Step 12233: {'lr': 0.0004459944883142879, 'samples': 6263808, 'steps': 12233, 'loss/train': 2.524299383163452} +02/24/2022 17:24:49 - INFO - codeparrot_training - Step 12234: {'lr': 0.0004459843302636988, 'samples': 6264320, 'steps': 12234, 'loss/train': 2.87491774559021} +02/24/2022 17:24:53 - INFO - codeparrot_training - Step 12235: {'lr': 0.000445974171373575, 'samples': 6264832, 'steps': 12235, 'loss/train': 1.8807936906814575} +02/24/2022 17:24:59 - INFO - codeparrot_training - Step 12236: {'lr': 0.0004459640116439602, 'samples': 6265344, 'steps': 12236, 'loss/train': 2.117929458618164} +02/24/2022 17:25:03 - INFO - codeparrot_training - Step 12237: {'lr': 0.0004459538510748977, 'samples': 6265856, 'steps': 12237, 'loss/train': 2.6591475009918213} +02/24/2022 17:25:08 - INFO - codeparrot_training - Step 12238: {'lr': 0.0004459436896664312, 'samples': 6266368, 'steps': 12238, 'loss/train': 2.887000799179077} +02/24/2022 17:25:11 - INFO - codeparrot_training - Step 12239: {'lr': 0.00044593352741860404, 'samples': 6266880, 'steps': 12239, 'loss/train': 2.488243579864502} +02/24/2022 17:25:17 - INFO - codeparrot_training - Step 12240: {'lr': 0.00044592336433145995, 'samples': 6267392, 'steps': 12240, 'loss/train': 2.0779874324798584} +02/24/2022 17:25:21 - INFO - codeparrot_training - Step 12241: {'lr': 0.00044591320040504237, 'samples': 6267904, 'steps': 12241, 'loss/train': 2.8562235832214355} +02/24/2022 17:25:26 - INFO - codeparrot_training - Step 12242: {'lr': 0.00044590303563939485, 'samples': 6268416, 'steps': 12242, 'loss/train': 1.9997836351394653} +02/24/2022 17:25:30 - INFO - codeparrot_training - Step 12243: {'lr': 0.0004458928700345609, 'samples': 6268928, 'steps': 12243, 'loss/train': 2.0879430770874023} +02/24/2022 17:25:35 - INFO - codeparrot_training - Step 12244: {'lr': 0.00044588270359058416, 'samples': 6269440, 'steps': 12244, 'loss/train': 1.1297696828842163} +02/24/2022 17:25:39 - INFO - codeparrot_training - Step 12245: {'lr': 0.000445872536307508, 'samples': 6269952, 'steps': 12245, 'loss/train': 1.9869401454925537} +02/24/2022 17:25:45 - INFO - codeparrot_training - Step 12246: {'lr': 0.0004458623681853762, 'samples': 6270464, 'steps': 12246, 'loss/train': 0.7562997937202454} +02/24/2022 17:25:48 - INFO - codeparrot_training - Step 12247: {'lr': 0.0004458521992242322, 'samples': 6270976, 'steps': 12247, 'loss/train': 1.6579252481460571} +02/24/2022 17:25:54 - INFO - codeparrot_training - Step 12248: {'lr': 0.00044584202942411956, 'samples': 6271488, 'steps': 12248, 'loss/train': 1.962679147720337} +02/24/2022 17:25:57 - INFO - codeparrot_training - Step 12249: {'lr': 0.00044583185878508183, 'samples': 6272000, 'steps': 12249, 'loss/train': 1.5631684064865112} +02/24/2022 17:26:03 - INFO - codeparrot_training - Step 12250: {'lr': 0.0004458216873071626, 'samples': 6272512, 'steps': 12250, 'loss/train': 2.2190117835998535} +02/24/2022 17:26:06 - INFO - codeparrot_training - Step 12251: {'lr': 0.00044581151499040547, 'samples': 6273024, 'steps': 12251, 'loss/train': 0.9030312895774841} +02/24/2022 17:26:12 - INFO - codeparrot_training - Step 12252: {'lr': 0.000445801341834854, 'samples': 6273536, 'steps': 12252, 'loss/train': 2.4146976470947266} +02/24/2022 17:26:15 - INFO - codeparrot_training - Step 12253: {'lr': 0.0004457911678405517, 'samples': 6274048, 'steps': 12253, 'loss/train': 0.5449259281158447} +02/24/2022 17:26:21 - INFO - codeparrot_training - Step 12254: {'lr': 0.0004457809930075422, 'samples': 6274560, 'steps': 12254, 'loss/train': 1.6004161834716797} +02/24/2022 17:26:24 - INFO - codeparrot_training - Step 12255: {'lr': 0.0004457708173358691, 'samples': 6275072, 'steps': 12255, 'loss/train': 2.691077947616577} +02/24/2022 17:26:30 - INFO - codeparrot_training - Step 12256: {'lr': 0.00044576064082557605, 'samples': 6275584, 'steps': 12256, 'loss/train': 2.0691773891448975} +02/24/2022 17:26:33 - INFO - codeparrot_training - Step 12257: {'lr': 0.0004457504634767066, 'samples': 6276096, 'steps': 12257, 'loss/train': 2.9726624488830566} +02/24/2022 17:26:39 - INFO - codeparrot_training - Step 12258: {'lr': 0.0004457402852893042, 'samples': 6276608, 'steps': 12258, 'loss/train': 2.6471009254455566} +02/24/2022 17:26:43 - INFO - codeparrot_training - Step 12259: {'lr': 0.0004457301062634126, 'samples': 6277120, 'steps': 12259, 'loss/train': 2.3239684104919434} +02/24/2022 17:26:48 - INFO - codeparrot_training - Step 12260: {'lr': 0.0004457199263990754, 'samples': 6277632, 'steps': 12260, 'loss/train': 2.2256107330322266} +02/24/2022 17:26:52 - INFO - codeparrot_training - Step 12261: {'lr': 0.0004457097456963362, 'samples': 6278144, 'steps': 12261, 'loss/train': 1.5917807817459106} +02/24/2022 17:26:58 - INFO - codeparrot_training - Step 12262: {'lr': 0.0004456995641552386, 'samples': 6278656, 'steps': 12262, 'loss/train': 1.9505926370620728} +02/24/2022 17:27:01 - INFO - codeparrot_training - Step 12263: {'lr': 0.0004456893817758262, 'samples': 6279168, 'steps': 12263, 'loss/train': 1.3096234798431396} +02/24/2022 17:27:07 - INFO - codeparrot_training - Step 12264: {'lr': 0.00044567919855814257, 'samples': 6279680, 'steps': 12264, 'loss/train': 2.4166879653930664} +02/24/2022 17:27:10 - INFO - codeparrot_training - Step 12265: {'lr': 0.0004456690145022314, 'samples': 6280192, 'steps': 12265, 'loss/train': 2.4174020290374756} +02/24/2022 17:27:16 - INFO - codeparrot_training - Step 12266: {'lr': 0.0004456588296081364, 'samples': 6280704, 'steps': 12266, 'loss/train': 2.4440877437591553} +02/24/2022 17:27:19 - INFO - codeparrot_training - Step 12267: {'lr': 0.000445648643875901, 'samples': 6281216, 'steps': 12267, 'loss/train': 2.1894125938415527} +02/24/2022 17:27:25 - INFO - codeparrot_training - Step 12268: {'lr': 0.000445638457305569, 'samples': 6281728, 'steps': 12268, 'loss/train': 2.184979200363159} +02/24/2022 17:27:28 - INFO - codeparrot_training - Step 12269: {'lr': 0.00044562826989718397, 'samples': 6282240, 'steps': 12269, 'loss/train': 2.278891086578369} +02/24/2022 17:27:34 - INFO - codeparrot_training - Step 12270: {'lr': 0.00044561808165078954, 'samples': 6282752, 'steps': 12270, 'loss/train': 1.8342580795288086} +02/24/2022 17:27:37 - INFO - codeparrot_training - Step 12271: {'lr': 0.0004456078925664293, 'samples': 6283264, 'steps': 12271, 'loss/train': 3.0874226093292236} +02/24/2022 17:27:44 - INFO - codeparrot_training - Step 12272: {'lr': 0.000445597702644147, 'samples': 6283776, 'steps': 12272, 'loss/train': 2.4969401359558105} +02/24/2022 17:27:47 - INFO - codeparrot_training - Step 12273: {'lr': 0.0004455875118839863, 'samples': 6284288, 'steps': 12273, 'loss/train': 2.95229172706604} +02/24/2022 17:27:53 - INFO - codeparrot_training - Step 12274: {'lr': 0.00044557732028599077, 'samples': 6284800, 'steps': 12274, 'loss/train': 1.8821544647216797} +02/24/2022 17:27:56 - INFO - codeparrot_training - Step 12275: {'lr': 0.0004455671278502041, 'samples': 6285312, 'steps': 12275, 'loss/train': 1.5584900379180908} +02/24/2022 17:28:02 - INFO - codeparrot_training - Step 12276: {'lr': 0.00044555693457667, 'samples': 6285824, 'steps': 12276, 'loss/train': 2.4532015323638916} +02/24/2022 17:28:05 - INFO - codeparrot_training - Step 12277: {'lr': 0.000445546740465432, 'samples': 6286336, 'steps': 12277, 'loss/train': 1.792319893836975} +02/24/2022 17:28:11 - INFO - codeparrot_training - Step 12278: {'lr': 0.00044553654551653387, 'samples': 6286848, 'steps': 12278, 'loss/train': 4.381943702697754} +02/24/2022 17:28:14 - INFO - codeparrot_training - Step 12279: {'lr': 0.0004455263497300194, 'samples': 6287360, 'steps': 12279, 'loss/train': 0.990138053894043} +02/24/2022 17:28:20 - INFO - codeparrot_training - Step 12280: {'lr': 0.000445516153105932, 'samples': 6287872, 'steps': 12280, 'loss/train': 1.7454262971878052} +02/24/2022 17:28:24 - INFO - codeparrot_training - Step 12281: {'lr': 0.0004455059556443155, 'samples': 6288384, 'steps': 12281, 'loss/train': 2.2448575496673584} +02/24/2022 17:28:30 - INFO - codeparrot_training - Step 12282: {'lr': 0.0004454957573452136, 'samples': 6288896, 'steps': 12282, 'loss/train': 1.0142549276351929} +02/24/2022 17:28:33 - INFO - codeparrot_training - Step 12283: {'lr': 0.0004454855582086699, 'samples': 6289408, 'steps': 12283, 'loss/train': 2.5627574920654297} +02/24/2022 17:28:39 - INFO - codeparrot_training - Step 12284: {'lr': 0.0004454753582347282, 'samples': 6289920, 'steps': 12284, 'loss/train': 2.0157546997070312} +02/24/2022 17:28:42 - INFO - codeparrot_training - Step 12285: {'lr': 0.00044546515742343207, 'samples': 6290432, 'steps': 12285, 'loss/train': 1.9214407205581665} +02/24/2022 17:28:48 - INFO - codeparrot_training - Step 12286: {'lr': 0.00044545495577482535, 'samples': 6290944, 'steps': 12286, 'loss/train': 1.8595911264419556} +02/24/2022 17:28:51 - INFO - codeparrot_training - Step 12287: {'lr': 0.00044544475328895164, 'samples': 6291456, 'steps': 12287, 'loss/train': 1.9937409162521362} +02/24/2022 17:28:57 - INFO - codeparrot_training - Step 12288: {'lr': 0.00044543454996585463, 'samples': 6291968, 'steps': 12288, 'loss/train': 1.5022523403167725} +02/24/2022 17:29:00 - INFO - codeparrot_training - Step 12289: {'lr': 0.0004454243458055781, 'samples': 6292480, 'steps': 12289, 'loss/train': 2.310431718826294} +02/24/2022 17:29:06 - INFO - codeparrot_training - Step 12290: {'lr': 0.00044541414080816573, 'samples': 6292992, 'steps': 12290, 'loss/train': 1.9270498752593994} +02/24/2022 17:29:09 - INFO - codeparrot_training - Step 12291: {'lr': 0.00044540393497366124, 'samples': 6293504, 'steps': 12291, 'loss/train': 2.380450963973999} +02/24/2022 17:29:16 - INFO - codeparrot_training - Step 12292: {'lr': 0.00044539372830210833, 'samples': 6294016, 'steps': 12292, 'loss/train': 1.314091682434082} +02/24/2022 17:29:19 - INFO - codeparrot_training - Step 12293: {'lr': 0.0004453835207935507, 'samples': 6294528, 'steps': 12293, 'loss/train': 1.027348518371582} +02/24/2022 17:29:25 - INFO - codeparrot_training - Step 12294: {'lr': 0.0004453733124480321, 'samples': 6295040, 'steps': 12294, 'loss/train': 2.0489554405212402} +02/24/2022 17:29:28 - INFO - codeparrot_training - Step 12295: {'lr': 0.0004453631032655964, 'samples': 6295552, 'steps': 12295, 'loss/train': 1.8813971281051636} +02/24/2022 17:29:34 - INFO - codeparrot_training - Step 12296: {'lr': 0.00044535289324628704, 'samples': 6296064, 'steps': 12296, 'loss/train': 2.194880485534668} +02/24/2022 17:29:37 - INFO - codeparrot_training - Step 12297: {'lr': 0.00044534268239014796, 'samples': 6296576, 'steps': 12297, 'loss/train': 2.218430995941162} +02/24/2022 17:29:43 - INFO - codeparrot_training - Step 12298: {'lr': 0.00044533247069722295, 'samples': 6297088, 'steps': 12298, 'loss/train': 1.4462721347808838} +02/24/2022 17:29:46 - INFO - codeparrot_training - Step 12299: {'lr': 0.0004453222581675556, 'samples': 6297600, 'steps': 12299, 'loss/train': 1.7703735828399658} +02/24/2022 17:29:52 - INFO - codeparrot_training - Step 12300: {'lr': 0.0004453120448011897, 'samples': 6298112, 'steps': 12300, 'loss/train': 1.7118444442749023} +02/24/2022 17:29:55 - INFO - codeparrot_training - Step 12301: {'lr': 0.00044530183059816896, 'samples': 6298624, 'steps': 12301, 'loss/train': 1.4803307056427002} +02/24/2022 17:30:01 - INFO - codeparrot_training - Step 12302: {'lr': 0.00044529161555853725, 'samples': 6299136, 'steps': 12302, 'loss/train': 1.002977728843689} +02/24/2022 17:30:04 - INFO - codeparrot_training - Step 12303: {'lr': 0.0004452813996823383, 'samples': 6299648, 'steps': 12303, 'loss/train': 1.6941735744476318} +02/24/2022 17:30:10 - INFO - codeparrot_training - Step 12304: {'lr': 0.00044527118296961576, 'samples': 6300160, 'steps': 12304, 'loss/train': 0.8799401521682739} +02/24/2022 17:30:13 - INFO - codeparrot_training - Step 12305: {'lr': 0.0004452609654204136, 'samples': 6300672, 'steps': 12305, 'loss/train': 2.210076093673706} +02/24/2022 17:30:20 - INFO - codeparrot_training - Step 12306: {'lr': 0.0004452507470347754, 'samples': 6301184, 'steps': 12306, 'loss/train': 2.1028568744659424} +02/24/2022 17:30:24 - INFO - codeparrot_training - Step 12307: {'lr': 0.00044524052781274497, 'samples': 6301696, 'steps': 12307, 'loss/train': 2.4408137798309326} +02/24/2022 17:30:29 - INFO - codeparrot_training - Step 12308: {'lr': 0.00044523030775436617, 'samples': 6302208, 'steps': 12308, 'loss/train': 1.1121882200241089} +02/24/2022 17:30:33 - INFO - codeparrot_training - Step 12309: {'lr': 0.0004452200868596827, 'samples': 6302720, 'steps': 12309, 'loss/train': 2.3073625564575195} +02/24/2022 17:30:38 - INFO - codeparrot_training - Step 12310: {'lr': 0.0004452098651287384, 'samples': 6303232, 'steps': 12310, 'loss/train': 1.9814388751983643} +02/24/2022 17:30:42 - INFO - codeparrot_training - Step 12311: {'lr': 0.000445199642561577, 'samples': 6303744, 'steps': 12311, 'loss/train': 1.386033535003662} +02/24/2022 17:30:47 - INFO - codeparrot_training - Step 12312: {'lr': 0.0004451894191582423, 'samples': 6304256, 'steps': 12312, 'loss/train': 2.078249216079712} +02/24/2022 17:30:51 - INFO - codeparrot_training - Step 12313: {'lr': 0.0004451791949187781, 'samples': 6304768, 'steps': 12313, 'loss/train': 2.4705617427825928} +02/24/2022 17:30:56 - INFO - codeparrot_training - Step 12314: {'lr': 0.0004451689698432282, 'samples': 6305280, 'steps': 12314, 'loss/train': 1.6156132221221924} +02/24/2022 17:31:00 - INFO - codeparrot_training - Step 12315: {'lr': 0.0004451587439316365, 'samples': 6305792, 'steps': 12315, 'loss/train': 0.9699227809906006} +02/24/2022 17:31:06 - INFO - codeparrot_training - Step 12316: {'lr': 0.0004451485171840466, 'samples': 6306304, 'steps': 12316, 'loss/train': 1.938400387763977} +02/24/2022 17:31:12 - INFO - codeparrot_training - Step 12317: {'lr': 0.0004451382896005024, 'samples': 6306816, 'steps': 12317, 'loss/train': 2.1531031131744385} +02/24/2022 17:31:15 - INFO - codeparrot_training - Step 12318: {'lr': 0.00044512806118104784, 'samples': 6307328, 'steps': 12318, 'loss/train': 1.8844478130340576} +02/24/2022 17:31:21 - INFO - codeparrot_training - Step 12319: {'lr': 0.0004451178319257265, 'samples': 6307840, 'steps': 12319, 'loss/train': 2.1367862224578857} +02/24/2022 17:31:24 - INFO - codeparrot_training - Step 12320: {'lr': 0.0004451076018345824, 'samples': 6308352, 'steps': 12320, 'loss/train': 1.9447153806686401} +02/24/2022 17:31:28 - INFO - codeparrot_training - Step 12321: {'lr': 0.00044509737090765933, 'samples': 6308864, 'steps': 12321, 'loss/train': 2.543639659881592} +02/24/2022 17:31:34 - INFO - codeparrot_training - Step 12322: {'lr': 0.00044508713914500107, 'samples': 6309376, 'steps': 12322, 'loss/train': 2.049834966659546} +02/24/2022 17:31:37 - INFO - codeparrot_training - Step 12323: {'lr': 0.0004450769065466514, 'samples': 6309888, 'steps': 12323, 'loss/train': 2.6177823543548584} +02/24/2022 17:31:43 - INFO - codeparrot_training - Step 12324: {'lr': 0.0004450666731126542, 'samples': 6310400, 'steps': 12324, 'loss/train': 2.3640940189361572} +02/24/2022 17:31:46 - INFO - codeparrot_training - Step 12325: {'lr': 0.0004450564388430533, 'samples': 6310912, 'steps': 12325, 'loss/train': 1.936601996421814} +02/24/2022 17:31:53 - INFO - codeparrot_training - Step 12326: {'lr': 0.0004450462037378926, 'samples': 6311424, 'steps': 12326, 'loss/train': 2.0119669437408447} +02/24/2022 17:31:56 - INFO - codeparrot_training - Step 12327: {'lr': 0.0004450359677972159, 'samples': 6311936, 'steps': 12327, 'loss/train': 1.828921914100647} +02/24/2022 17:32:02 - INFO - codeparrot_training - Step 12328: {'lr': 0.000445025731021067, 'samples': 6312448, 'steps': 12328, 'loss/train': 1.2264736890792847} +02/24/2022 17:32:05 - INFO - codeparrot_training - Step 12329: {'lr': 0.0004450154934094898, 'samples': 6312960, 'steps': 12329, 'loss/train': 1.1868711709976196} +02/24/2022 17:32:11 - INFO - codeparrot_training - Step 12330: {'lr': 0.0004450052549625282, 'samples': 6313472, 'steps': 12330, 'loss/train': 1.7256996631622314} +02/24/2022 17:32:14 - INFO - codeparrot_training - Step 12331: {'lr': 0.000444995015680226, 'samples': 6313984, 'steps': 12331, 'loss/train': 5.192365646362305} +02/24/2022 17:32:20 - INFO - codeparrot_training - Step 12332: {'lr': 0.0004449847755626271, 'samples': 6314496, 'steps': 12332, 'loss/train': 2.1573946475982666} +02/24/2022 17:32:23 - INFO - codeparrot_training - Step 12333: {'lr': 0.00044497453460977523, 'samples': 6315008, 'steps': 12333, 'loss/train': 2.4214375019073486} +02/24/2022 17:32:29 - INFO - codeparrot_training - Step 12334: {'lr': 0.0004449642928217144, 'samples': 6315520, 'steps': 12334, 'loss/train': 2.800600051879883} +02/24/2022 17:32:32 - INFO - codeparrot_training - Step 12335: {'lr': 0.0004449540501984885, 'samples': 6316032, 'steps': 12335, 'loss/train': 2.9670016765594482} +02/24/2022 17:32:38 - INFO - codeparrot_training - Step 12336: {'lr': 0.0004449438067401413, 'samples': 6316544, 'steps': 12336, 'loss/train': 2.426023483276367} +02/24/2022 17:32:42 - INFO - codeparrot_training - Step 12337: {'lr': 0.0004449335624467168, 'samples': 6317056, 'steps': 12337, 'loss/train': 1.6832247972488403} +02/24/2022 17:32:48 - INFO - codeparrot_training - Step 12338: {'lr': 0.00044492331731825875, 'samples': 6317568, 'steps': 12338, 'loss/train': 1.774661898612976} +02/24/2022 17:32:52 - INFO - codeparrot_training - Step 12339: {'lr': 0.0004449130713548111, 'samples': 6318080, 'steps': 12339, 'loss/train': 2.1241750717163086} +02/24/2022 17:32:57 - INFO - codeparrot_training - Step 12340: {'lr': 0.00044490282455641783, 'samples': 6318592, 'steps': 12340, 'loss/train': 2.314002275466919} +02/24/2022 17:33:01 - INFO - codeparrot_training - Step 12341: {'lr': 0.0004448925769231227, 'samples': 6319104, 'steps': 12341, 'loss/train': 1.9071742296218872} +02/24/2022 17:33:06 - INFO - codeparrot_training - Step 12342: {'lr': 0.0004448823284549696, 'samples': 6319616, 'steps': 12342, 'loss/train': 2.9264605045318604} +02/24/2022 17:33:10 - INFO - codeparrot_training - Step 12343: {'lr': 0.00044487207915200257, 'samples': 6320128, 'steps': 12343, 'loss/train': 0.30424219369888306} +02/24/2022 17:33:15 - INFO - codeparrot_training - Step 12344: {'lr': 0.0004448618290142654, 'samples': 6320640, 'steps': 12344, 'loss/train': 0.4632541537284851} +02/24/2022 17:33:19 - INFO - codeparrot_training - Step 12345: {'lr': 0.000444851578041802, 'samples': 6321152, 'steps': 12345, 'loss/train': 1.989852786064148} +02/24/2022 17:33:24 - INFO - codeparrot_training - Step 12346: {'lr': 0.00044484132623465633, 'samples': 6321664, 'steps': 12346, 'loss/train': 2.248664379119873} +02/24/2022 17:33:28 - INFO - codeparrot_training - Step 12347: {'lr': 0.0004448310735928723, 'samples': 6322176, 'steps': 12347, 'loss/train': 1.5307692289352417} +02/24/2022 17:33:33 - INFO - codeparrot_training - Step 12348: {'lr': 0.0004448208201164938, 'samples': 6322688, 'steps': 12348, 'loss/train': 3.1500887870788574} +02/24/2022 17:33:37 - INFO - codeparrot_training - Step 12349: {'lr': 0.0004448105658055648, 'samples': 6323200, 'steps': 12349, 'loss/train': 0.2151261866092682} +02/24/2022 17:33:42 - INFO - codeparrot_training - Step 12350: {'lr': 0.00044480031066012916, 'samples': 6323712, 'steps': 12350, 'loss/train': 1.7735192775726318} +02/24/2022 17:33:46 - INFO - codeparrot_training - Step 12351: {'lr': 0.00044479005468023086, 'samples': 6324224, 'steps': 12351, 'loss/train': 3.1518568992614746} +02/24/2022 17:33:52 - INFO - codeparrot_training - Step 12352: {'lr': 0.0004447797978659138, 'samples': 6324736, 'steps': 12352, 'loss/train': 2.223313093185425} +02/24/2022 17:33:55 - INFO - codeparrot_training - Step 12353: {'lr': 0.000444769540217222, 'samples': 6325248, 'steps': 12353, 'loss/train': 1.8045709133148193} +02/24/2022 17:34:01 - INFO - codeparrot_training - Step 12354: {'lr': 0.0004447592817341993, 'samples': 6325760, 'steps': 12354, 'loss/train': 0.15722961723804474} +02/24/2022 17:34:04 - INFO - codeparrot_training - Step 12355: {'lr': 0.0004447490224168896, 'samples': 6326272, 'steps': 12355, 'loss/train': 2.2276923656463623} +02/24/2022 17:34:10 - INFO - codeparrot_training - Step 12356: {'lr': 0.00044473876226533703, 'samples': 6326784, 'steps': 12356, 'loss/train': 1.558146595954895} +02/24/2022 17:34:13 - INFO - codeparrot_training - Step 12357: {'lr': 0.0004447285012795854, 'samples': 6327296, 'steps': 12357, 'loss/train': 1.7817211151123047} +02/24/2022 17:34:19 - INFO - codeparrot_training - Step 12358: {'lr': 0.0004447182394596788, 'samples': 6327808, 'steps': 12358, 'loss/train': 1.5735222101211548} +02/24/2022 17:34:22 - INFO - codeparrot_training - Step 12359: {'lr': 0.000444707976805661, 'samples': 6328320, 'steps': 12359, 'loss/train': 2.140296220779419} +02/24/2022 17:34:28 - INFO - codeparrot_training - Step 12360: {'lr': 0.00044469771331757604, 'samples': 6328832, 'steps': 12360, 'loss/train': 1.716566801071167} +02/24/2022 17:34:31 - INFO - codeparrot_training - Step 12361: {'lr': 0.00044468744899546785, 'samples': 6329344, 'steps': 12361, 'loss/train': 1.831556797027588} +02/24/2022 17:34:38 - INFO - codeparrot_training - Step 12362: {'lr': 0.0004446771838393806, 'samples': 6329856, 'steps': 12362, 'loss/train': 2.313117742538452} +02/24/2022 17:34:42 - INFO - codeparrot_training - Step 12363: {'lr': 0.00044466691784935796, 'samples': 6330368, 'steps': 12363, 'loss/train': 2.639930486679077} +02/24/2022 17:34:47 - INFO - codeparrot_training - Step 12364: {'lr': 0.00044465665102544415, 'samples': 6330880, 'steps': 12364, 'loss/train': 1.7952097654342651} +02/24/2022 17:34:53 - INFO - codeparrot_training - Step 12365: {'lr': 0.000444646383367683, 'samples': 6331392, 'steps': 12365, 'loss/train': 1.296103835105896} +02/24/2022 17:34:56 - INFO - codeparrot_training - Step 12366: {'lr': 0.00044463611487611864, 'samples': 6331904, 'steps': 12366, 'loss/train': 0.6917550563812256} +02/24/2022 17:35:02 - INFO - codeparrot_training - Step 12367: {'lr': 0.0004446258455507949, 'samples': 6332416, 'steps': 12367, 'loss/train': 2.458496570587158} +02/24/2022 17:35:05 - INFO - codeparrot_training - Step 12368: {'lr': 0.00044461557539175587, 'samples': 6332928, 'steps': 12368, 'loss/train': 2.906895875930786} +02/24/2022 17:35:11 - INFO - codeparrot_training - Step 12369: {'lr': 0.0004446053043990455, 'samples': 6333440, 'steps': 12369, 'loss/train': 2.5737407207489014} +02/24/2022 17:35:14 - INFO - codeparrot_training - Step 12370: {'lr': 0.00044459503257270776, 'samples': 6333952, 'steps': 12370, 'loss/train': 2.17844557762146} +02/24/2022 17:35:21 - INFO - codeparrot_training - Step 12371: {'lr': 0.0004445847599127868, 'samples': 6334464, 'steps': 12371, 'loss/train': 2.2148101329803467} +02/24/2022 17:35:24 - INFO - codeparrot_training - Step 12372: {'lr': 0.0004445744864193264, 'samples': 6334976, 'steps': 12372, 'loss/train': 3.0531060695648193} +02/24/2022 17:35:30 - INFO - codeparrot_training - Step 12373: {'lr': 0.00044456421209237073, 'samples': 6335488, 'steps': 12373, 'loss/train': 1.7490960359573364} +02/24/2022 17:35:33 - INFO - codeparrot_training - Step 12374: {'lr': 0.00044455393693196375, 'samples': 6336000, 'steps': 12374, 'loss/train': 2.6144533157348633} +02/24/2022 17:35:39 - INFO - codeparrot_training - Step 12375: {'lr': 0.00044454366093814947, 'samples': 6336512, 'steps': 12375, 'loss/train': 2.1560230255126953} +02/24/2022 17:35:43 - INFO - codeparrot_training - Step 12376: {'lr': 0.0004445333841109719, 'samples': 6337024, 'steps': 12376, 'loss/train': 4.896445274353027} +02/24/2022 17:35:48 - INFO - codeparrot_training - Step 12377: {'lr': 0.0004445231064504751, 'samples': 6337536, 'steps': 12377, 'loss/train': 1.9981356859207153} +02/24/2022 17:35:52 - INFO - codeparrot_training - Step 12378: {'lr': 0.00044451282795670313, 'samples': 6338048, 'steps': 12378, 'loss/train': 1.5984537601470947} +02/24/2022 17:35:57 - INFO - codeparrot_training - Step 12379: {'lr': 0.0004445025486297, 'samples': 6338560, 'steps': 12379, 'loss/train': 2.0769050121307373} +02/24/2022 17:36:01 - INFO - codeparrot_training - Step 12380: {'lr': 0.00044449226846950964, 'samples': 6339072, 'steps': 12380, 'loss/train': 2.7042148113250732} +02/24/2022 17:36:07 - INFO - codeparrot_training - Step 12381: {'lr': 0.0004444819874761762, 'samples': 6339584, 'steps': 12381, 'loss/train': 2.5061631202697754} +02/24/2022 17:36:10 - INFO - codeparrot_training - Step 12382: {'lr': 0.0004444717056497436, 'samples': 6340096, 'steps': 12382, 'loss/train': 2.3966879844665527} +02/24/2022 17:36:16 - INFO - codeparrot_training - Step 12383: {'lr': 0.00044446142299025605, 'samples': 6340608, 'steps': 12383, 'loss/train': 1.4159033298492432} +02/24/2022 17:36:19 - INFO - codeparrot_training - Step 12384: {'lr': 0.0004444511394977575, 'samples': 6341120, 'steps': 12384, 'loss/train': 1.7560759782791138} +02/24/2022 17:36:25 - INFO - codeparrot_training - Step 12385: {'lr': 0.0004444408551722919, 'samples': 6341632, 'steps': 12385, 'loss/train': 3.5217506885528564} +02/24/2022 17:36:28 - INFO - codeparrot_training - Step 12386: {'lr': 0.00044443057001390354, 'samples': 6342144, 'steps': 12386, 'loss/train': 1.8106368780136108} +02/24/2022 17:36:34 - INFO - codeparrot_training - Step 12387: {'lr': 0.00044442028402263636, 'samples': 6342656, 'steps': 12387, 'loss/train': 1.9505242109298706} +02/24/2022 17:36:37 - INFO - codeparrot_training - Step 12388: {'lr': 0.00044440999719853435, 'samples': 6343168, 'steps': 12388, 'loss/train': 2.5981204509735107} +02/24/2022 17:36:43 - INFO - codeparrot_training - Step 12389: {'lr': 0.0004443997095416417, 'samples': 6343680, 'steps': 12389, 'loss/train': 2.685946464538574} +02/24/2022 17:36:46 - INFO - codeparrot_training - Step 12390: {'lr': 0.0004443894210520024, 'samples': 6344192, 'steps': 12390, 'loss/train': 0.8968327641487122} +02/24/2022 17:36:52 - INFO - codeparrot_training - Step 12391: {'lr': 0.0004443791317296606, 'samples': 6344704, 'steps': 12391, 'loss/train': 2.7141690254211426} +02/24/2022 17:36:55 - INFO - codeparrot_training - Step 12392: {'lr': 0.0004443688415746602, 'samples': 6345216, 'steps': 12392, 'loss/train': 1.8027390241622925} +02/24/2022 17:37:01 - INFO - codeparrot_training - Step 12393: {'lr': 0.0004443585505870456, 'samples': 6345728, 'steps': 12393, 'loss/train': 2.7816147804260254} +02/24/2022 17:37:04 - INFO - codeparrot_training - Step 12394: {'lr': 0.0004443482587668605, 'samples': 6346240, 'steps': 12394, 'loss/train': 2.1532182693481445} +02/24/2022 17:37:10 - INFO - codeparrot_training - Step 12395: {'lr': 0.00044433796611414924, 'samples': 6346752, 'steps': 12395, 'loss/train': 2.3728513717651367} +02/24/2022 17:37:16 - INFO - codeparrot_training - Step 12396: {'lr': 0.0004443276726289558, 'samples': 6347264, 'steps': 12396, 'loss/train': 2.032423734664917} +02/24/2022 17:37:19 - INFO - codeparrot_training - Step 12397: {'lr': 0.00044431737831132433, 'samples': 6347776, 'steps': 12397, 'loss/train': 2.170013189315796} +02/24/2022 17:37:25 - INFO - codeparrot_training - Step 12398: {'lr': 0.000444307083161299, 'samples': 6348288, 'steps': 12398, 'loss/train': 2.83723521232605} +02/24/2022 17:37:28 - INFO - codeparrot_training - Step 12399: {'lr': 0.00044429678717892366, 'samples': 6348800, 'steps': 12399, 'loss/train': 2.6319732666015625} +02/24/2022 17:37:34 - INFO - codeparrot_training - Step 12400: {'lr': 0.0004442864903642427, 'samples': 6349312, 'steps': 12400, 'loss/train': 2.7992143630981445} +02/24/2022 17:37:37 - INFO - codeparrot_training - Step 12401: {'lr': 0.00044427619271730014, 'samples': 6349824, 'steps': 12401, 'loss/train': 2.6513051986694336} +02/24/2022 17:37:43 - INFO - codeparrot_training - Step 12402: {'lr': 0.00044426589423814003, 'samples': 6350336, 'steps': 12402, 'loss/train': 1.4257817268371582} +02/24/2022 17:37:46 - INFO - codeparrot_training - Step 12403: {'lr': 0.00044425559492680645, 'samples': 6350848, 'steps': 12403, 'loss/train': 2.921034574508667} +02/24/2022 17:37:52 - INFO - codeparrot_training - Step 12404: {'lr': 0.00044424529478334364, 'samples': 6351360, 'steps': 12404, 'loss/train': 2.418518543243408} +02/24/2022 17:37:55 - INFO - codeparrot_training - Step 12405: {'lr': 0.00044423499380779566, 'samples': 6351872, 'steps': 12405, 'loss/train': 1.8839558362960815} +02/24/2022 17:38:01 - INFO - codeparrot_training - Step 12406: {'lr': 0.00044422469200020666, 'samples': 6352384, 'steps': 12406, 'loss/train': 1.413009762763977} +02/24/2022 17:38:04 - INFO - codeparrot_training - Step 12407: {'lr': 0.0004442143893606207, 'samples': 6352896, 'steps': 12407, 'loss/train': 2.4514241218566895} +02/24/2022 17:38:11 - INFO - codeparrot_training - Step 12408: {'lr': 0.000444204085889082, 'samples': 6353408, 'steps': 12408, 'loss/train': 2.5671350955963135} +02/24/2022 17:38:14 - INFO - codeparrot_training - Step 12409: {'lr': 0.00044419378158563465, 'samples': 6353920, 'steps': 12409, 'loss/train': 2.8062143325805664} +02/24/2022 17:38:20 - INFO - codeparrot_training - Step 12410: {'lr': 0.0004441834764503228, 'samples': 6354432, 'steps': 12410, 'loss/train': 2.195612907409668} +02/24/2022 17:38:23 - INFO - codeparrot_training - Step 12411: {'lr': 0.0004441731704831906, 'samples': 6354944, 'steps': 12411, 'loss/train': 2.5319154262542725} +02/24/2022 17:38:29 - INFO - codeparrot_training - Step 12412: {'lr': 0.0004441628636842822, 'samples': 6355456, 'steps': 12412, 'loss/train': 2.2401556968688965} +02/24/2022 17:38:32 - INFO - codeparrot_training - Step 12413: {'lr': 0.0004441525560536418, 'samples': 6355968, 'steps': 12413, 'loss/train': 1.3314924240112305} +02/24/2022 17:38:37 - INFO - codeparrot_training - Step 12414: {'lr': 0.0004441422475913134, 'samples': 6356480, 'steps': 12414, 'loss/train': 2.3003504276275635} +02/24/2022 17:38:41 - INFO - codeparrot_training - Step 12415: {'lr': 0.0004441319382973413, 'samples': 6356992, 'steps': 12415, 'loss/train': 2.1423094272613525} +02/24/2022 17:38:46 - INFO - codeparrot_training - Step 12416: {'lr': 0.00044412162817176966, 'samples': 6357504, 'steps': 12416, 'loss/train': 2.11195707321167} +02/24/2022 17:38:50 - INFO - codeparrot_training - Step 12417: {'lr': 0.0004441113172146426, 'samples': 6358016, 'steps': 12417, 'loss/train': 1.632970929145813} +02/24/2022 17:38:56 - INFO - codeparrot_training - Step 12418: {'lr': 0.00044410100542600423, 'samples': 6358528, 'steps': 12418, 'loss/train': 2.1068115234375} +02/24/2022 17:39:00 - INFO - codeparrot_training - Step 12419: {'lr': 0.00044409069280589887, 'samples': 6359040, 'steps': 12419, 'loss/train': 2.087383985519409} +02/24/2022 17:39:05 - INFO - codeparrot_training - Step 12420: {'lr': 0.0004440803793543705, 'samples': 6359552, 'steps': 12420, 'loss/train': 2.1730427742004395} +02/24/2022 17:39:09 - INFO - codeparrot_training - Step 12421: {'lr': 0.00044407006507146354, 'samples': 6360064, 'steps': 12421, 'loss/train': 1.5413535833358765} +02/24/2022 17:39:15 - INFO - codeparrot_training - Step 12422: {'lr': 0.000444059749957222, 'samples': 6360576, 'steps': 12422, 'loss/train': 5.75645637512207} +02/24/2022 17:39:18 - INFO - codeparrot_training - Step 12423: {'lr': 0.00044404943401169005, 'samples': 6361088, 'steps': 12423, 'loss/train': 2.319354295730591} +02/24/2022 17:39:21 - INFO - codeparrot_training - Step 12424: {'lr': 0.00044403911723491196, 'samples': 6361600, 'steps': 12424, 'loss/train': 2.6930625438690186} +02/24/2022 17:39:27 - INFO - codeparrot_training - Step 12425: {'lr': 0.000444028799626932, 'samples': 6362112, 'steps': 12425, 'loss/train': 2.0183658599853516} +02/24/2022 17:39:30 - INFO - codeparrot_training - Step 12426: {'lr': 0.0004440184811877942, 'samples': 6362624, 'steps': 12426, 'loss/train': 1.2730671167373657} +02/24/2022 17:39:36 - INFO - codeparrot_training - Step 12427: {'lr': 0.0004440081619175428, 'samples': 6363136, 'steps': 12427, 'loss/train': 2.4213807582855225} +02/24/2022 17:39:39 - INFO - codeparrot_training - Step 12428: {'lr': 0.00044399784181622216, 'samples': 6363648, 'steps': 12428, 'loss/train': 2.496400833129883} +02/24/2022 17:39:46 - INFO - codeparrot_training - Step 12429: {'lr': 0.0004439875208838763, 'samples': 6364160, 'steps': 12429, 'loss/train': 2.9761877059936523} +02/24/2022 17:39:49 - INFO - codeparrot_training - Step 12430: {'lr': 0.00044397719912054944, 'samples': 6364672, 'steps': 12430, 'loss/train': 2.6443240642547607} +02/24/2022 17:39:55 - INFO - codeparrot_training - Step 12431: {'lr': 0.00044396687652628586, 'samples': 6365184, 'steps': 12431, 'loss/train': 1.5773134231567383} +02/24/2022 17:39:58 - INFO - codeparrot_training - Step 12432: {'lr': 0.00044395655310112985, 'samples': 6365696, 'steps': 12432, 'loss/train': 0.9131450653076172} +02/24/2022 17:40:04 - INFO - codeparrot_training - Step 12433: {'lr': 0.00044394622884512554, 'samples': 6366208, 'steps': 12433, 'loss/train': 2.3642284870147705} +02/24/2022 17:40:09 - INFO - codeparrot_training - Step 12434: {'lr': 0.00044393590375831716, 'samples': 6366720, 'steps': 12434, 'loss/train': 2.6101467609405518} +02/24/2022 17:40:12 - INFO - codeparrot_training - Step 12435: {'lr': 0.00044392557784074895, 'samples': 6367232, 'steps': 12435, 'loss/train': 2.382425308227539} +02/24/2022 17:40:18 - INFO - codeparrot_training - Step 12436: {'lr': 0.0004439152510924651, 'samples': 6367744, 'steps': 12436, 'loss/train': 2.946174144744873} +02/24/2022 17:40:22 - INFO - codeparrot_training - Step 12437: {'lr': 0.0004439049235135099, 'samples': 6368256, 'steps': 12437, 'loss/train': 1.6053688526153564} +02/24/2022 17:40:28 - INFO - codeparrot_training - Step 12438: {'lr': 0.0004438945951039276, 'samples': 6368768, 'steps': 12438, 'loss/train': 2.4451630115509033} +02/24/2022 17:40:31 - INFO - codeparrot_training - Step 12439: {'lr': 0.0004438842658637624, 'samples': 6369280, 'steps': 12439, 'loss/train': 2.1276416778564453} +02/24/2022 17:40:37 - INFO - codeparrot_training - Step 12440: {'lr': 0.0004438739357930586, 'samples': 6369792, 'steps': 12440, 'loss/train': 2.3103365898132324} +02/24/2022 17:40:40 - INFO - codeparrot_training - Step 12441: {'lr': 0.00044386360489186047, 'samples': 6370304, 'steps': 12441, 'loss/train': 2.244352340698242} +02/24/2022 17:40:46 - INFO - codeparrot_training - Step 12442: {'lr': 0.00044385327316021214, 'samples': 6370816, 'steps': 12442, 'loss/train': 3.0721912384033203} +02/24/2022 17:40:49 - INFO - codeparrot_training - Step 12443: {'lr': 0.000443842940598158, 'samples': 6371328, 'steps': 12443, 'loss/train': 1.8053581714630127} +02/24/2022 17:40:55 - INFO - codeparrot_training - Step 12444: {'lr': 0.00044383260720574214, 'samples': 6371840, 'steps': 12444, 'loss/train': 1.652478575706482} +02/24/2022 17:40:58 - INFO - codeparrot_training - Step 12445: {'lr': 0.00044382227298300905, 'samples': 6372352, 'steps': 12445, 'loss/train': 2.2976057529449463} +02/24/2022 17:41:04 - INFO - codeparrot_training - Step 12446: {'lr': 0.0004438119379300028, 'samples': 6372864, 'steps': 12446, 'loss/train': 1.7779102325439453} +02/24/2022 17:41:07 - INFO - codeparrot_training - Step 12447: {'lr': 0.00044380160204676787, 'samples': 6373376, 'steps': 12447, 'loss/train': 2.2807915210723877} +02/24/2022 17:41:13 - INFO - codeparrot_training - Step 12448: {'lr': 0.00044379126533334836, 'samples': 6373888, 'steps': 12448, 'loss/train': 2.2795181274414062} +02/24/2022 17:41:16 - INFO - codeparrot_training - Step 12449: {'lr': 0.00044378092778978864, 'samples': 6374400, 'steps': 12449, 'loss/train': 1.6085028648376465} +02/24/2022 17:41:22 - INFO - codeparrot_training - Step 12450: {'lr': 0.00044377058941613283, 'samples': 6374912, 'steps': 12450, 'loss/train': 1.0612393617630005} +02/24/2022 17:41:25 - INFO - codeparrot_training - Step 12451: {'lr': 0.0004437602502124255, 'samples': 6375424, 'steps': 12451, 'loss/train': 1.4914125204086304} +02/24/2022 17:41:31 - INFO - codeparrot_training - Step 12452: {'lr': 0.0004437499101787107, 'samples': 6375936, 'steps': 12452, 'loss/train': 0.9224465489387512} +02/24/2022 17:41:34 - INFO - codeparrot_training - Step 12453: {'lr': 0.0004437395693150328, 'samples': 6376448, 'steps': 12453, 'loss/train': 2.0059292316436768} +02/24/2022 17:41:41 - INFO - codeparrot_training - Step 12454: {'lr': 0.0004437292276214361, 'samples': 6376960, 'steps': 12454, 'loss/train': 3.6045403480529785} +02/24/2022 17:41:44 - INFO - codeparrot_training - Step 12455: {'lr': 0.000443718885097965, 'samples': 6377472, 'steps': 12455, 'loss/train': 1.1657453775405884} +02/24/2022 17:41:50 - INFO - codeparrot_training - Step 12456: {'lr': 0.0004437085417446636, 'samples': 6377984, 'steps': 12456, 'loss/train': 3.50653338432312} +02/24/2022 17:41:53 - INFO - codeparrot_training - Step 12457: {'lr': 0.0004436981975615764, 'samples': 6378496, 'steps': 12457, 'loss/train': 1.7108455896377563} +02/24/2022 17:41:59 - INFO - codeparrot_training - Step 12458: {'lr': 0.00044368785254874754, 'samples': 6379008, 'steps': 12458, 'loss/train': 3.026381492614746} +02/24/2022 17:42:02 - INFO - codeparrot_training - Step 12459: {'lr': 0.00044367750670622143, 'samples': 6379520, 'steps': 12459, 'loss/train': 1.752224326133728} +02/24/2022 17:42:08 - INFO - codeparrot_training - Step 12460: {'lr': 0.0004436671600340424, 'samples': 6380032, 'steps': 12460, 'loss/train': 2.599832773208618} +02/24/2022 17:42:11 - INFO - codeparrot_training - Step 12461: {'lr': 0.00044365681253225476, 'samples': 6380544, 'steps': 12461, 'loss/train': 2.228529930114746} +02/24/2022 17:42:17 - INFO - codeparrot_training - Step 12462: {'lr': 0.0004436464642009029, 'samples': 6381056, 'steps': 12462, 'loss/train': 2.433157444000244} +02/24/2022 17:42:20 - INFO - codeparrot_training - Step 12463: {'lr': 0.00044363611504003096, 'samples': 6381568, 'steps': 12463, 'loss/train': 1.9093401432037354} +02/24/2022 17:42:26 - INFO - codeparrot_training - Step 12464: {'lr': 0.00044362576504968344, 'samples': 6382080, 'steps': 12464, 'loss/train': 0.8747289180755615} +02/24/2022 17:42:30 - INFO - codeparrot_training - Step 12465: {'lr': 0.0004436154142299046, 'samples': 6382592, 'steps': 12465, 'loss/train': 1.8154183626174927} +02/24/2022 17:42:35 - INFO - codeparrot_training - Step 12466: {'lr': 0.00044360506258073884, 'samples': 6383104, 'steps': 12466, 'loss/train': 2.9957594871520996} +02/24/2022 17:42:39 - INFO - codeparrot_training - Step 12467: {'lr': 0.0004435947101022305, 'samples': 6383616, 'steps': 12467, 'loss/train': 1.6825523376464844} +02/24/2022 17:42:44 - INFO - codeparrot_training - Step 12468: {'lr': 0.0004435843567944239, 'samples': 6384128, 'steps': 12468, 'loss/train': 1.962235450744629} +02/24/2022 17:42:48 - INFO - codeparrot_training - Step 12469: {'lr': 0.0004435740026573633, 'samples': 6384640, 'steps': 12469, 'loss/train': 2.2906713485717773} +02/24/2022 17:42:53 - INFO - codeparrot_training - Step 12470: {'lr': 0.0004435636476910932, 'samples': 6385152, 'steps': 12470, 'loss/train': 1.7940757274627686} +02/24/2022 17:42:57 - INFO - codeparrot_training - Step 12471: {'lr': 0.00044355329189565783, 'samples': 6385664, 'steps': 12471, 'loss/train': 2.1791868209838867} +02/24/2022 17:43:02 - INFO - codeparrot_training - Step 12472: {'lr': 0.00044354293527110167, 'samples': 6386176, 'steps': 12472, 'loss/train': 1.994907021522522} +02/24/2022 17:43:06 - INFO - codeparrot_training - Step 12473: {'lr': 0.000443532577817469, 'samples': 6386688, 'steps': 12473, 'loss/train': 2.4598143100738525} +02/24/2022 17:43:12 - INFO - codeparrot_training - Step 12474: {'lr': 0.0004435222195348043, 'samples': 6387200, 'steps': 12474, 'loss/train': 2.6731014251708984} +02/24/2022 17:43:15 - INFO - codeparrot_training - Step 12475: {'lr': 0.00044351186042315184, 'samples': 6387712, 'steps': 12475, 'loss/train': 2.357680082321167} +02/24/2022 17:43:21 - INFO - codeparrot_training - Step 12476: {'lr': 0.000443501500482556, 'samples': 6388224, 'steps': 12476, 'loss/train': 2.2906453609466553} +02/24/2022 17:43:24 - INFO - codeparrot_training - Step 12477: {'lr': 0.0004434911397130612, 'samples': 6388736, 'steps': 12477, 'loss/train': 2.5177316665649414} +02/24/2022 17:43:30 - INFO - codeparrot_training - Step 12478: {'lr': 0.0004434807781147117, 'samples': 6389248, 'steps': 12478, 'loss/train': 1.243457555770874} +02/24/2022 17:43:33 - INFO - codeparrot_training - Step 12479: {'lr': 0.0004434704156875521, 'samples': 6389760, 'steps': 12479, 'loss/train': 2.449394941329956} +02/24/2022 17:43:39 - INFO - codeparrot_training - Step 12480: {'lr': 0.00044346005243162654, 'samples': 6390272, 'steps': 12480, 'loss/train': 1.990755558013916} +02/24/2022 17:43:43 - INFO - codeparrot_training - Step 12481: {'lr': 0.0004434496883469796, 'samples': 6390784, 'steps': 12481, 'loss/train': 2.4250757694244385} +02/24/2022 17:43:49 - INFO - codeparrot_training - Step 12482: {'lr': 0.0004434393234336557, 'samples': 6391296, 'steps': 12482, 'loss/train': 2.508273124694824} +02/24/2022 17:43:52 - INFO - codeparrot_training - Step 12483: {'lr': 0.0004434289576916991, 'samples': 6391808, 'steps': 12483, 'loss/train': 2.3634986877441406} +02/24/2022 17:43:56 - INFO - codeparrot_training - Step 12484: {'lr': 0.00044341859112115425, 'samples': 6392320, 'steps': 12484, 'loss/train': 4.695265769958496} +02/24/2022 17:44:01 - INFO - codeparrot_training - Step 12485: {'lr': 0.00044340822372206557, 'samples': 6392832, 'steps': 12485, 'loss/train': 0.9202659130096436} +02/24/2022 17:44:05 - INFO - codeparrot_training - Step 12486: {'lr': 0.00044339785549447756, 'samples': 6393344, 'steps': 12486, 'loss/train': 2.216762065887451} +02/24/2022 17:44:11 - INFO - codeparrot_training - Step 12487: {'lr': 0.00044338748643843446, 'samples': 6393856, 'steps': 12487, 'loss/train': 3.6331098079681396} +02/24/2022 17:44:14 - INFO - codeparrot_training - Step 12488: {'lr': 0.00044337711655398083, 'samples': 6394368, 'steps': 12488, 'loss/train': 1.5643867254257202} +02/24/2022 17:44:20 - INFO - codeparrot_training - Step 12489: {'lr': 0.00044336674584116096, 'samples': 6394880, 'steps': 12489, 'loss/train': 2.4457035064697266} +02/24/2022 17:44:23 - INFO - codeparrot_training - Step 12490: {'lr': 0.0004433563743000195, 'samples': 6395392, 'steps': 12490, 'loss/train': 2.6636414527893066} +02/24/2022 17:44:29 - INFO - codeparrot_training - Step 12491: {'lr': 0.0004433460019306006, 'samples': 6395904, 'steps': 12491, 'loss/train': 0.632786750793457} +02/24/2022 17:44:32 - INFO - codeparrot_training - Step 12492: {'lr': 0.00044333562873294884, 'samples': 6396416, 'steps': 12492, 'loss/train': 1.246692180633545} +02/24/2022 17:44:38 - INFO - codeparrot_training - Step 12493: {'lr': 0.00044332525470710865, 'samples': 6396928, 'steps': 12493, 'loss/train': 2.4409217834472656} +02/24/2022 17:44:41 - INFO - codeparrot_training - Step 12494: {'lr': 0.0004433148798531245, 'samples': 6397440, 'steps': 12494, 'loss/train': 2.7363319396972656} +02/24/2022 17:44:47 - INFO - codeparrot_training - Step 12495: {'lr': 0.0004433045041710407, 'samples': 6397952, 'steps': 12495, 'loss/train': 1.8529976606369019} +02/24/2022 17:44:50 - INFO - codeparrot_training - Step 12496: {'lr': 0.0004432941276609018, 'samples': 6398464, 'steps': 12496, 'loss/train': 2.633256196975708} +02/24/2022 17:44:56 - INFO - codeparrot_training - Step 12497: {'lr': 0.00044328375032275227, 'samples': 6398976, 'steps': 12497, 'loss/train': 2.59234619140625} +02/24/2022 17:44:59 - INFO - codeparrot_training - Step 12498: {'lr': 0.00044327337215663656, 'samples': 6399488, 'steps': 12498, 'loss/train': 1.4717844724655151} +02/24/2022 17:45:05 - INFO - codeparrot_training - Step 12499: {'lr': 0.000443262993162599, 'samples': 6400000, 'steps': 12499, 'loss/train': 1.216798186302185} +02/24/2022 17:45:08 - INFO - codeparrot_training - Step 12500: {'lr': 0.0004432526133406842, 'samples': 6400512, 'steps': 12500, 'loss/train': 2.088089942932129} +02/24/2022 17:45:14 - INFO - codeparrot_training - Step 12501: {'lr': 0.00044324223269093666, 'samples': 6401024, 'steps': 12501, 'loss/train': 1.7118074893951416} +02/24/2022 17:45:18 - INFO - codeparrot_training - Step 12502: {'lr': 0.00044323185121340064, 'samples': 6401536, 'steps': 12502, 'loss/train': 2.1711461544036865} +02/24/2022 17:45:23 - INFO - codeparrot_training - Step 12503: {'lr': 0.00044322146890812076, 'samples': 6402048, 'steps': 12503, 'loss/train': 1.2165560722351074} +02/24/2022 17:45:27 - INFO - codeparrot_training - Step 12504: {'lr': 0.0004432110857751415, 'samples': 6402560, 'steps': 12504, 'loss/train': 1.8111298084259033} +02/24/2022 17:45:32 - INFO - codeparrot_training - Step 12505: {'lr': 0.0004432007018145072, 'samples': 6403072, 'steps': 12505, 'loss/train': 2.0570878982543945} +02/24/2022 17:45:36 - INFO - codeparrot_training - Step 12506: {'lr': 0.00044319031702626255, 'samples': 6403584, 'steps': 12506, 'loss/train': 1.601704478263855} +02/24/2022 17:45:41 - INFO - codeparrot_training - Step 12507: {'lr': 0.0004431799314104519, 'samples': 6404096, 'steps': 12507, 'loss/train': 1.5467791557312012} +02/24/2022 17:45:45 - INFO - codeparrot_training - Step 12508: {'lr': 0.0004431695449671197, 'samples': 6404608, 'steps': 12508, 'loss/train': 1.7713934183120728} +02/24/2022 17:45:50 - INFO - codeparrot_training - Step 12509: {'lr': 0.00044315915769631054, 'samples': 6405120, 'steps': 12509, 'loss/train': 1.8101553916931152} +02/24/2022 17:45:54 - INFO - codeparrot_training - Step 12510: {'lr': 0.0004431487695980689, 'samples': 6405632, 'steps': 12510, 'loss/train': 2.5609304904937744} +02/24/2022 17:45:59 - INFO - codeparrot_training - Step 12511: {'lr': 0.0004431383806724393, 'samples': 6406144, 'steps': 12511, 'loss/train': 1.944899082183838} +02/24/2022 17:46:05 - INFO - codeparrot_training - Step 12512: {'lr': 0.0004431279909194661, 'samples': 6406656, 'steps': 12512, 'loss/train': 1.7914735078811646} +02/24/2022 17:46:09 - INFO - codeparrot_training - Step 12513: {'lr': 0.000443117600339194, 'samples': 6407168, 'steps': 12513, 'loss/train': 1.561868667602539} +02/24/2022 17:46:14 - INFO - codeparrot_training - Step 12514: {'lr': 0.0004431072089316674, 'samples': 6407680, 'steps': 12514, 'loss/train': 2.2434191703796387} +02/24/2022 17:46:18 - INFO - codeparrot_training - Step 12515: {'lr': 0.0004430968166969308, 'samples': 6408192, 'steps': 12515, 'loss/train': 1.392062783241272} +02/24/2022 17:46:23 - INFO - codeparrot_training - Step 12516: {'lr': 0.00044308642363502884, 'samples': 6408704, 'steps': 12516, 'loss/train': 1.9453023672103882} +02/24/2022 17:46:27 - INFO - codeparrot_training - Step 12517: {'lr': 0.00044307602974600594, 'samples': 6409216, 'steps': 12517, 'loss/train': 2.2774412631988525} +02/24/2022 17:46:32 - INFO - codeparrot_training - Step 12518: {'lr': 0.00044306563502990656, 'samples': 6409728, 'steps': 12518, 'loss/train': 1.5544239282608032} +02/24/2022 17:46:36 - INFO - codeparrot_training - Step 12519: {'lr': 0.0004430552394867753, 'samples': 6410240, 'steps': 12519, 'loss/train': 0.8071713447570801} +02/24/2022 17:46:41 - INFO - codeparrot_training - Step 12520: {'lr': 0.0004430448431166567, 'samples': 6410752, 'steps': 12520, 'loss/train': 2.0709426403045654} +02/24/2022 17:46:45 - INFO - codeparrot_training - Step 12521: {'lr': 0.00044303444591959533, 'samples': 6411264, 'steps': 12521, 'loss/train': 2.409284830093384} +02/24/2022 17:46:51 - INFO - codeparrot_training - Step 12522: {'lr': 0.00044302404789563573, 'samples': 6411776, 'steps': 12522, 'loss/train': 1.6684871912002563} +02/24/2022 17:46:54 - INFO - codeparrot_training - Step 12523: {'lr': 0.0004430136490448223, 'samples': 6412288, 'steps': 12523, 'loss/train': 1.6257758140563965} +02/24/2022 17:47:00 - INFO - codeparrot_training - Step 12524: {'lr': 0.0004430032493671998, 'samples': 6412800, 'steps': 12524, 'loss/train': 2.116621255874634} +02/24/2022 17:47:03 - INFO - codeparrot_training - Step 12525: {'lr': 0.0004429928488628126, 'samples': 6413312, 'steps': 12525, 'loss/train': 2.120816707611084} +02/24/2022 17:47:09 - INFO - codeparrot_training - Step 12526: {'lr': 0.00044298244753170535, 'samples': 6413824, 'steps': 12526, 'loss/train': 2.6973717212677} +02/24/2022 17:47:12 - INFO - codeparrot_training - Step 12527: {'lr': 0.00044297204537392253, 'samples': 6414336, 'steps': 12527, 'loss/train': 2.123155117034912} +02/24/2022 17:47:18 - INFO - codeparrot_training - Step 12528: {'lr': 0.00044296164238950874, 'samples': 6414848, 'steps': 12528, 'loss/train': 2.4255053997039795} +02/24/2022 17:47:21 - INFO - codeparrot_training - Step 12529: {'lr': 0.0004429512385785086, 'samples': 6415360, 'steps': 12529, 'loss/train': 2.0674798488616943} +02/24/2022 17:47:27 - INFO - codeparrot_training - Step 12530: {'lr': 0.0004429408339409666, 'samples': 6415872, 'steps': 12530, 'loss/train': 2.0020055770874023} +02/24/2022 17:47:31 - INFO - codeparrot_training - Step 12531: {'lr': 0.00044293042847692735, 'samples': 6416384, 'steps': 12531, 'loss/train': 2.289703845977783} +02/24/2022 17:47:37 - INFO - codeparrot_training - Step 12532: {'lr': 0.00044292002218643533, 'samples': 6416896, 'steps': 12532, 'loss/train': 2.27740216255188} +02/24/2022 17:47:40 - INFO - codeparrot_training - Step 12533: {'lr': 0.00044290961506953525, 'samples': 6417408, 'steps': 12533, 'loss/train': 1.8238201141357422} +02/24/2022 17:47:46 - INFO - codeparrot_training - Step 12534: {'lr': 0.0004428992071262716, 'samples': 6417920, 'steps': 12534, 'loss/train': 1.3410807847976685} +02/24/2022 17:47:49 - INFO - codeparrot_training - Step 12535: {'lr': 0.00044288879835668903, 'samples': 6418432, 'steps': 12535, 'loss/train': 2.37658429145813} +02/24/2022 17:47:55 - INFO - codeparrot_training - Step 12536: {'lr': 0.0004428783887608321, 'samples': 6418944, 'steps': 12536, 'loss/train': 2.2396929264068604} +02/24/2022 17:47:58 - INFO - codeparrot_training - Step 12537: {'lr': 0.0004428679783387454, 'samples': 6419456, 'steps': 12537, 'loss/train': 2.3694121837615967} +02/24/2022 17:48:04 - INFO - codeparrot_training - Step 12538: {'lr': 0.00044285756709047354, 'samples': 6419968, 'steps': 12538, 'loss/train': 2.369859218597412} +02/24/2022 17:48:08 - INFO - codeparrot_training - Step 12539: {'lr': 0.0004428471550160611, 'samples': 6420480, 'steps': 12539, 'loss/train': 0.912290096282959} +02/24/2022 17:48:11 - INFO - codeparrot_training - Step 12540: {'lr': 0.00044283674211555266, 'samples': 6420992, 'steps': 12540, 'loss/train': 0.6784152984619141} +02/24/2022 17:48:17 - INFO - codeparrot_training - Step 12541: {'lr': 0.0004428263283889928, 'samples': 6421504, 'steps': 12541, 'loss/train': 1.8253849744796753} +02/24/2022 17:48:20 - INFO - codeparrot_training - Step 12542: {'lr': 0.0004428159138364263, 'samples': 6422016, 'steps': 12542, 'loss/train': 2.7968780994415283} +02/24/2022 17:48:26 - INFO - codeparrot_training - Step 12543: {'lr': 0.0004428054984578975, 'samples': 6422528, 'steps': 12543, 'loss/train': 1.9343206882476807} +02/24/2022 17:48:29 - INFO - codeparrot_training - Step 12544: {'lr': 0.0004427950822534513, 'samples': 6423040, 'steps': 12544, 'loss/train': 2.3636536598205566} +02/24/2022 17:48:35 - INFO - codeparrot_training - Step 12545: {'lr': 0.0004427846652231321, 'samples': 6423552, 'steps': 12545, 'loss/train': 0.32711923122406006} +02/24/2022 17:48:38 - INFO - codeparrot_training - Step 12546: {'lr': 0.0004427742473669847, 'samples': 6424064, 'steps': 12546, 'loss/train': 1.8101640939712524} +02/24/2022 17:48:45 - INFO - codeparrot_training - Step 12547: {'lr': 0.00044276382868505356, 'samples': 6424576, 'steps': 12547, 'loss/train': 2.392697334289551} +02/24/2022 17:48:48 - INFO - codeparrot_training - Step 12548: {'lr': 0.0004427534091773834, 'samples': 6425088, 'steps': 12548, 'loss/train': 1.978995680809021} +02/24/2022 17:48:54 - INFO - codeparrot_training - Step 12549: {'lr': 0.00044274298884401886, 'samples': 6425600, 'steps': 12549, 'loss/train': 2.957902431488037} +02/24/2022 17:48:57 - INFO - codeparrot_training - Step 12550: {'lr': 0.0004427325676850045, 'samples': 6426112, 'steps': 12550, 'loss/train': 1.2002750635147095} +02/24/2022 17:49:03 - INFO - codeparrot_training - Step 12551: {'lr': 0.00044272214570038513, 'samples': 6426624, 'steps': 12551, 'loss/train': 2.8818886280059814} +02/24/2022 17:49:06 - INFO - codeparrot_training - Step 12552: {'lr': 0.00044271172289020525, 'samples': 6427136, 'steps': 12552, 'loss/train': 2.1332151889801025} +02/24/2022 17:49:12 - INFO - codeparrot_training - Step 12553: {'lr': 0.00044270129925450945, 'samples': 6427648, 'steps': 12553, 'loss/train': 1.5723611116409302} +02/24/2022 17:49:15 - INFO - codeparrot_training - Step 12554: {'lr': 0.00044269087479334256, 'samples': 6428160, 'steps': 12554, 'loss/train': 2.017009735107422} +02/24/2022 17:49:21 - INFO - codeparrot_training - Step 12555: {'lr': 0.00044268044950674913, 'samples': 6428672, 'steps': 12555, 'loss/train': 1.7089089155197144} +02/24/2022 17:49:24 - INFO - codeparrot_training - Step 12556: {'lr': 0.0004426700233947738, 'samples': 6429184, 'steps': 12556, 'loss/train': 2.3912200927734375} +02/24/2022 17:49:31 - INFO - codeparrot_training - Step 12557: {'lr': 0.00044265959645746136, 'samples': 6429696, 'steps': 12557, 'loss/train': 1.775288701057434} +02/24/2022 17:49:34 - INFO - codeparrot_training - Step 12558: {'lr': 0.0004426491686948563, 'samples': 6430208, 'steps': 12558, 'loss/train': 2.274704933166504} +02/24/2022 17:49:40 - INFO - codeparrot_training - Step 12559: {'lr': 0.00044263874010700343, 'samples': 6430720, 'steps': 12559, 'loss/train': 1.691240906715393} +02/24/2022 17:49:43 - INFO - codeparrot_training - Step 12560: {'lr': 0.0004426283106939473, 'samples': 6431232, 'steps': 12560, 'loss/train': 2.5085697174072266} +02/24/2022 17:49:48 - INFO - codeparrot_training - Step 12561: {'lr': 0.0004426178804557327, 'samples': 6431744, 'steps': 12561, 'loss/train': 1.1207683086395264} +02/24/2022 17:49:52 - INFO - codeparrot_training - Step 12562: {'lr': 0.0004426074493924043, 'samples': 6432256, 'steps': 12562, 'loss/train': 2.3670997619628906} +02/24/2022 17:49:57 - INFO - codeparrot_training - Step 12563: {'lr': 0.00044259701750400674, 'samples': 6432768, 'steps': 12563, 'loss/train': 2.062629461288452} +02/24/2022 17:50:01 - INFO - codeparrot_training - Step 12564: {'lr': 0.00044258658479058463, 'samples': 6433280, 'steps': 12564, 'loss/train': 2.1066765785217285} +02/24/2022 17:50:06 - INFO - codeparrot_training - Step 12565: {'lr': 0.00044257615125218273, 'samples': 6433792, 'steps': 12565, 'loss/train': 1.4604438543319702} +02/24/2022 17:50:10 - INFO - codeparrot_training - Step 12566: {'lr': 0.00044256571688884583, 'samples': 6434304, 'steps': 12566, 'loss/train': 1.3201088905334473} +02/24/2022 17:50:15 - INFO - codeparrot_training - Step 12567: {'lr': 0.00044255528170061853, 'samples': 6434816, 'steps': 12567, 'loss/train': 2.093223810195923} +02/24/2022 17:50:19 - INFO - codeparrot_training - Step 12568: {'lr': 0.00044254484568754556, 'samples': 6435328, 'steps': 12568, 'loss/train': 1.8225075006484985} +02/24/2022 17:50:25 - INFO - codeparrot_training - Step 12569: {'lr': 0.0004425344088496716, 'samples': 6435840, 'steps': 12569, 'loss/train': 1.8812451362609863} +02/24/2022 17:50:29 - INFO - codeparrot_training - Step 12570: {'lr': 0.00044252397118704133, 'samples': 6436352, 'steps': 12570, 'loss/train': 1.8809235095977783} +02/24/2022 17:50:34 - INFO - codeparrot_training - Step 12571: {'lr': 0.0004425135326996995, 'samples': 6436864, 'steps': 12571, 'loss/train': 2.5634331703186035} +02/24/2022 17:50:38 - INFO - codeparrot_training - Step 12572: {'lr': 0.0004425030933876909, 'samples': 6437376, 'steps': 12572, 'loss/train': 7.52122163772583} +02/24/2022 17:50:43 - INFO - codeparrot_training - Step 12573: {'lr': 0.00044249265325106013, 'samples': 6437888, 'steps': 12573, 'loss/train': 2.3944826126098633} +02/24/2022 17:50:47 - INFO - codeparrot_training - Step 12574: {'lr': 0.000442482212289852, 'samples': 6438400, 'steps': 12574, 'loss/train': 1.7205818891525269} +02/24/2022 17:50:52 - INFO - codeparrot_training - Step 12575: {'lr': 0.00044247177050411114, 'samples': 6438912, 'steps': 12575, 'loss/train': 2.094712018966675} +02/24/2022 17:50:56 - INFO - codeparrot_training - Step 12576: {'lr': 0.00044246132789388235, 'samples': 6439424, 'steps': 12576, 'loss/train': 2.1907360553741455} +02/24/2022 17:51:01 - INFO - codeparrot_training - Step 12577: {'lr': 0.00044245088445921035, 'samples': 6439936, 'steps': 12577, 'loss/train': 1.7764767408370972} +02/24/2022 17:51:05 - INFO - codeparrot_training - Step 12578: {'lr': 0.00044244044020013985, 'samples': 6440448, 'steps': 12578, 'loss/train': 1.8151516914367676} +02/24/2022 17:51:11 - INFO - codeparrot_training - Step 12579: {'lr': 0.0004424299951167156, 'samples': 6440960, 'steps': 12579, 'loss/train': 2.9270567893981934} +02/24/2022 17:51:14 - INFO - codeparrot_training - Step 12580: {'lr': 0.0004424195492089824, 'samples': 6441472, 'steps': 12580, 'loss/train': 2.571474552154541} +02/24/2022 17:51:20 - INFO - codeparrot_training - Step 12581: {'lr': 0.0004424091024769849, 'samples': 6441984, 'steps': 12581, 'loss/train': 1.6912142038345337} +02/24/2022 17:51:23 - INFO - codeparrot_training - Step 12582: {'lr': 0.00044239865492076794, 'samples': 6442496, 'steps': 12582, 'loss/train': 1.6894973516464233} +02/24/2022 17:51:29 - INFO - codeparrot_training - Step 12583: {'lr': 0.0004423882065403762, 'samples': 6443008, 'steps': 12583, 'loss/train': 2.1041579246520996} +02/24/2022 17:51:32 - INFO - codeparrot_training - Step 12584: {'lr': 0.0004423777573358545, 'samples': 6443520, 'steps': 12584, 'loss/train': 1.6610794067382812} +02/24/2022 17:51:38 - INFO - codeparrot_training - Step 12585: {'lr': 0.0004423673073072476, 'samples': 6444032, 'steps': 12585, 'loss/train': 1.4454855918884277} +02/24/2022 17:51:41 - INFO - codeparrot_training - Step 12586: {'lr': 0.0004423568564546002, 'samples': 6444544, 'steps': 12586, 'loss/train': 2.52445650100708} +02/24/2022 17:51:47 - INFO - codeparrot_training - Step 12587: {'lr': 0.00044234640477795707, 'samples': 6445056, 'steps': 12587, 'loss/train': 2.1523823738098145} +02/24/2022 17:51:50 - INFO - codeparrot_training - Step 12588: {'lr': 0.0004423359522773631, 'samples': 6445568, 'steps': 12588, 'loss/train': 2.0760316848754883} +02/24/2022 17:51:56 - INFO - codeparrot_training - Step 12589: {'lr': 0.00044232549895286294, 'samples': 6446080, 'steps': 12589, 'loss/train': 1.85196852684021} +02/24/2022 17:51:59 - INFO - codeparrot_training - Step 12590: {'lr': 0.00044231504480450145, 'samples': 6446592, 'steps': 12590, 'loss/train': 2.3095033168792725} +02/24/2022 17:52:06 - INFO - codeparrot_training - Step 12591: {'lr': 0.0004423045898323233, 'samples': 6447104, 'steps': 12591, 'loss/train': 2.6988797187805176} +02/24/2022 17:52:09 - INFO - codeparrot_training - Step 12592: {'lr': 0.0004422941340363734, 'samples': 6447616, 'steps': 12592, 'loss/train': 3.069305896759033} +02/24/2022 17:52:14 - INFO - codeparrot_training - Step 12593: {'lr': 0.0004422836774166965, 'samples': 6448128, 'steps': 12593, 'loss/train': 2.4666504859924316} +02/24/2022 17:52:18 - INFO - codeparrot_training - Step 12594: {'lr': 0.00044227321997333737, 'samples': 6448640, 'steps': 12594, 'loss/train': 2.4254045486450195} +02/24/2022 17:52:24 - INFO - codeparrot_training - Step 12595: {'lr': 0.0004422627617063408, 'samples': 6449152, 'steps': 12595, 'loss/train': 2.7385494709014893} +02/24/2022 17:52:27 - INFO - codeparrot_training - Step 12596: {'lr': 0.00044225230261575165, 'samples': 6449664, 'steps': 12596, 'loss/train': 0.8888677358627319} +02/24/2022 17:52:33 - INFO - codeparrot_training - Step 12597: {'lr': 0.00044224184270161466, 'samples': 6450176, 'steps': 12597, 'loss/train': 2.4945390224456787} +02/24/2022 17:52:38 - INFO - codeparrot_training - Step 12598: {'lr': 0.0004422313819639747, 'samples': 6450688, 'steps': 12598, 'loss/train': 1.8744642734527588} +02/24/2022 17:52:41 - INFO - codeparrot_training - Step 12599: {'lr': 0.0004422209204028765, 'samples': 6451200, 'steps': 12599, 'loss/train': 1.5629653930664062} +02/24/2022 17:52:47 - INFO - codeparrot_training - Step 12600: {'lr': 0.0004422104580183649, 'samples': 6451712, 'steps': 12600, 'loss/train': 2.2322680950164795} +02/24/2022 17:52:50 - INFO - codeparrot_training - Step 12601: {'lr': 0.0004421999948104848, 'samples': 6452224, 'steps': 12601, 'loss/train': 2.2347562313079834} +02/24/2022 17:52:56 - INFO - codeparrot_training - Step 12602: {'lr': 0.00044218953077928083, 'samples': 6452736, 'steps': 12602, 'loss/train': 2.259594440460205} +02/24/2022 17:52:59 - INFO - codeparrot_training - Step 12603: {'lr': 0.000442179065924798, 'samples': 6453248, 'steps': 12603, 'loss/train': 2.3209760189056396} +02/24/2022 17:53:06 - INFO - codeparrot_training - Step 12604: {'lr': 0.0004421686002470811, 'samples': 6453760, 'steps': 12604, 'loss/train': 2.8240911960601807} +02/24/2022 17:53:10 - INFO - codeparrot_training - Step 12605: {'lr': 0.0004421581337461749, 'samples': 6454272, 'steps': 12605, 'loss/train': 2.103368043899536} +02/24/2022 17:53:15 - INFO - codeparrot_training - Step 12606: {'lr': 0.00044214766642212435, 'samples': 6454784, 'steps': 12606, 'loss/train': 2.179652690887451} +02/24/2022 17:53:19 - INFO - codeparrot_training - Step 12607: {'lr': 0.00044213719827497413, 'samples': 6455296, 'steps': 12607, 'loss/train': 2.760122537612915} +02/24/2022 17:53:24 - INFO - codeparrot_training - Step 12608: {'lr': 0.0004421267293047692, 'samples': 6455808, 'steps': 12608, 'loss/train': 2.891826629638672} +02/24/2022 17:53:28 - INFO - codeparrot_training - Step 12609: {'lr': 0.00044211625951155433, 'samples': 6456320, 'steps': 12609, 'loss/train': 2.0851290225982666} +02/24/2022 17:53:33 - INFO - codeparrot_training - Step 12610: {'lr': 0.00044210578889537446, 'samples': 6456832, 'steps': 12610, 'loss/train': 2.0992162227630615} +02/24/2022 17:53:37 - INFO - codeparrot_training - Step 12611: {'lr': 0.0004420953174562743, 'samples': 6457344, 'steps': 12611, 'loss/train': 1.9473077058792114} +02/24/2022 17:53:42 - INFO - codeparrot_training - Step 12612: {'lr': 0.0004420848451942989, 'samples': 6457856, 'steps': 12612, 'loss/train': 2.036018133163452} +02/24/2022 17:53:46 - INFO - codeparrot_training - Step 12613: {'lr': 0.000442074372109493, 'samples': 6458368, 'steps': 12613, 'loss/train': 1.6547542810440063} +02/24/2022 17:53:52 - INFO - codeparrot_training - Step 12614: {'lr': 0.0004420638982019014, 'samples': 6458880, 'steps': 12614, 'loss/train': 3.098187208175659} +02/24/2022 17:53:56 - INFO - codeparrot_training - Step 12615: {'lr': 0.0004420534234715691, 'samples': 6459392, 'steps': 12615, 'loss/train': 2.1233577728271484} +02/24/2022 17:54:01 - INFO - codeparrot_training - Step 12616: {'lr': 0.00044204294791854094, 'samples': 6459904, 'steps': 12616, 'loss/train': 0.32390645146369934} +02/24/2022 17:54:05 - INFO - codeparrot_training - Step 12617: {'lr': 0.00044203247154286175, 'samples': 6460416, 'steps': 12617, 'loss/train': 0.4583117961883545} +02/24/2022 17:54:10 - INFO - codeparrot_training - Step 12618: {'lr': 0.0004420219943445765, 'samples': 6460928, 'steps': 12618, 'loss/train': 1.7064653635025024} +02/24/2022 17:54:14 - INFO - codeparrot_training - Step 12619: {'lr': 0.0004420115163237299, 'samples': 6461440, 'steps': 12619, 'loss/train': 3.1020309925079346} +02/24/2022 17:54:19 - INFO - codeparrot_training - Step 12620: {'lr': 0.000442001037480367, 'samples': 6461952, 'steps': 12620, 'loss/train': 1.4251264333724976} +02/24/2022 17:54:23 - INFO - codeparrot_training - Step 12621: {'lr': 0.0004419905578145326, 'samples': 6462464, 'steps': 12621, 'loss/train': 2.4654786586761475} +02/24/2022 17:54:28 - INFO - codeparrot_training - Step 12622: {'lr': 0.00044198007732627155, 'samples': 6462976, 'steps': 12622, 'loss/train': 1.6898380517959595} +02/24/2022 17:54:32 - INFO - codeparrot_training - Step 12623: {'lr': 0.00044196959601562884, 'samples': 6463488, 'steps': 12623, 'loss/train': 2.1188395023345947} +02/24/2022 17:54:37 - INFO - codeparrot_training - Step 12624: {'lr': 0.0004419591138826494, 'samples': 6464000, 'steps': 12624, 'loss/train': 2.0805227756500244} +02/24/2022 17:54:41 - INFO - codeparrot_training - Step 12625: {'lr': 0.000441948630927378, 'samples': 6464512, 'steps': 12625, 'loss/train': 1.6058732271194458} +02/24/2022 17:54:47 - INFO - codeparrot_training - Step 12626: {'lr': 0.0004419381471498597, 'samples': 6465024, 'steps': 12626, 'loss/train': 2.070448160171509} +02/24/2022 17:54:51 - INFO - codeparrot_training - Step 12627: {'lr': 0.00044192766255013926, 'samples': 6465536, 'steps': 12627, 'loss/train': 1.9055109024047852} +02/24/2022 17:54:56 - INFO - codeparrot_training - Step 12628: {'lr': 0.0004419171771282616, 'samples': 6466048, 'steps': 12628, 'loss/train': 1.4694446325302124} +02/24/2022 17:55:00 - INFO - codeparrot_training - Step 12629: {'lr': 0.0004419066908842718, 'samples': 6466560, 'steps': 12629, 'loss/train': 1.4293460845947266} +02/24/2022 17:55:05 - INFO - codeparrot_training - Step 12630: {'lr': 0.0004418962038182146, 'samples': 6467072, 'steps': 12630, 'loss/train': 1.2345119714736938} +02/24/2022 17:55:09 - INFO - codeparrot_training - Step 12631: {'lr': 0.00044188571593013504, 'samples': 6467584, 'steps': 12631, 'loss/train': 1.4546709060668945} +02/24/2022 17:55:14 - INFO - codeparrot_training - Step 12632: {'lr': 0.000441875227220078, 'samples': 6468096, 'steps': 12632, 'loss/train': 3.092456340789795} +02/24/2022 17:55:18 - INFO - codeparrot_training - Step 12633: {'lr': 0.00044186473768808844, 'samples': 6468608, 'steps': 12633, 'loss/train': 1.1801881790161133} +02/24/2022 17:55:23 - INFO - codeparrot_training - Step 12634: {'lr': 0.0004418542473342112, 'samples': 6469120, 'steps': 12634, 'loss/train': 2.223172903060913} +02/24/2022 17:55:27 - INFO - codeparrot_training - Step 12635: {'lr': 0.0004418437561584914, 'samples': 6469632, 'steps': 12635, 'loss/train': 2.7130913734436035} +02/24/2022 17:55:33 - INFO - codeparrot_training - Step 12636: {'lr': 0.00044183326416097373, 'samples': 6470144, 'steps': 12636, 'loss/train': 1.7788987159729004} +02/24/2022 17:55:36 - INFO - codeparrot_training - Step 12637: {'lr': 0.0004418227713417033, 'samples': 6470656, 'steps': 12637, 'loss/train': 2.1338772773742676} +02/24/2022 17:55:42 - INFO - codeparrot_training - Step 12638: {'lr': 0.0004418122777007251, 'samples': 6471168, 'steps': 12638, 'loss/train': 1.1886552572250366} +02/24/2022 17:55:45 - INFO - codeparrot_training - Step 12639: {'lr': 0.00044180178323808395, 'samples': 6471680, 'steps': 12639, 'loss/train': 2.2855968475341797} +02/24/2022 17:55:51 - INFO - codeparrot_training - Step 12640: {'lr': 0.00044179128795382493, 'samples': 6472192, 'steps': 12640, 'loss/train': 2.012110710144043} +02/24/2022 17:55:54 - INFO - codeparrot_training - Step 12641: {'lr': 0.00044178079184799284, 'samples': 6472704, 'steps': 12641, 'loss/train': 3.346752882003784} +02/24/2022 17:56:00 - INFO - codeparrot_training - Step 12642: {'lr': 0.0004417702949206328, 'samples': 6473216, 'steps': 12642, 'loss/train': 2.072218418121338} +02/24/2022 17:56:03 - INFO - codeparrot_training - Step 12643: {'lr': 0.0004417597971717897, 'samples': 6473728, 'steps': 12643, 'loss/train': 2.219122886657715} +02/24/2022 17:56:09 - INFO - codeparrot_training - Step 12644: {'lr': 0.0004417492986015085, 'samples': 6474240, 'steps': 12644, 'loss/train': 1.8840543031692505} +02/24/2022 17:56:12 - INFO - codeparrot_training - Step 12645: {'lr': 0.00044173879920983417, 'samples': 6474752, 'steps': 12645, 'loss/train': 2.312716484069824} +02/24/2022 17:56:18 - INFO - codeparrot_training - Step 12646: {'lr': 0.00044172829899681175, 'samples': 6475264, 'steps': 12646, 'loss/train': 1.9317117929458618} +02/24/2022 17:56:21 - INFO - codeparrot_training - Step 12647: {'lr': 0.00044171779796248623, 'samples': 6475776, 'steps': 12647, 'loss/train': 1.3755879402160645} +02/24/2022 17:56:27 - INFO - codeparrot_training - Step 12648: {'lr': 0.0004417072961069024, 'samples': 6476288, 'steps': 12648, 'loss/train': 2.609785318374634} +02/24/2022 17:56:30 - INFO - codeparrot_training - Step 12649: {'lr': 0.0004416967934301055, 'samples': 6476800, 'steps': 12649, 'loss/train': 1.818744421005249} +02/24/2022 17:56:36 - INFO - codeparrot_training - Step 12650: {'lr': 0.00044168628993214036, 'samples': 6477312, 'steps': 12650, 'loss/train': 2.233635663986206} +02/24/2022 17:56:40 - INFO - codeparrot_training - Step 12651: {'lr': 0.0004416757856130521, 'samples': 6477824, 'steps': 12651, 'loss/train': 0.1283031702041626} +02/24/2022 17:56:45 - INFO - codeparrot_training - Step 12652: {'lr': 0.0004416652804728855, 'samples': 6478336, 'steps': 12652, 'loss/train': 1.9103279113769531} +02/24/2022 17:56:49 - INFO - codeparrot_training - Step 12653: {'lr': 0.0004416547745116858, 'samples': 6478848, 'steps': 12653, 'loss/train': 0.5882845520973206} +02/24/2022 17:56:55 - INFO - codeparrot_training - Step 12654: {'lr': 0.00044164426772949785, 'samples': 6479360, 'steps': 12654, 'loss/train': 0.4565441310405731} +02/24/2022 17:56:58 - INFO - codeparrot_training - Step 12655: {'lr': 0.0004416337601263667, 'samples': 6479872, 'steps': 12655, 'loss/train': 1.2241612672805786} +02/24/2022 17:57:04 - INFO - codeparrot_training - Step 12656: {'lr': 0.00044162325170233745, 'samples': 6480384, 'steps': 12656, 'loss/train': 1.6748687028884888} +02/24/2022 17:57:07 - INFO - codeparrot_training - Step 12657: {'lr': 0.00044161274245745497, 'samples': 6480896, 'steps': 12657, 'loss/train': 0.9990090727806091} +02/24/2022 17:57:13 - INFO - codeparrot_training - Step 12658: {'lr': 0.00044160223239176445, 'samples': 6481408, 'steps': 12658, 'loss/train': 2.565922498703003} +02/24/2022 17:57:16 - INFO - codeparrot_training - Step 12659: {'lr': 0.0004415917215053107, 'samples': 6481920, 'steps': 12659, 'loss/train': 1.7146501541137695} +02/24/2022 17:57:22 - INFO - codeparrot_training - Step 12660: {'lr': 0.00044158120979813885, 'samples': 6482432, 'steps': 12660, 'loss/train': 1.6578271389007568} +02/24/2022 17:57:25 - INFO - codeparrot_training - Step 12661: {'lr': 0.000441570697270294, 'samples': 6482944, 'steps': 12661, 'loss/train': 2.0979325771331787} +02/24/2022 17:57:31 - INFO - codeparrot_training - Step 12662: {'lr': 0.00044156018392182105, 'samples': 6483456, 'steps': 12662, 'loss/train': 1.3775895833969116} +02/24/2022 17:57:35 - INFO - codeparrot_training - Step 12663: {'lr': 0.00044154966975276514, 'samples': 6483968, 'steps': 12663, 'loss/train': 1.668207049369812} +02/24/2022 17:57:40 - INFO - codeparrot_training - Step 12664: {'lr': 0.00044153915476317126, 'samples': 6484480, 'steps': 12664, 'loss/train': 2.0565552711486816} +02/24/2022 17:57:44 - INFO - codeparrot_training - Step 12665: {'lr': 0.00044152863895308446, 'samples': 6484992, 'steps': 12665, 'loss/train': 1.4628971815109253} +02/24/2022 17:57:50 - INFO - codeparrot_training - Step 12666: {'lr': 0.0004415181223225497, 'samples': 6485504, 'steps': 12666, 'loss/train': 2.9463400840759277} +02/24/2022 17:57:53 - INFO - codeparrot_training - Step 12667: {'lr': 0.0004415076048716122, 'samples': 6486016, 'steps': 12667, 'loss/train': 1.8107576370239258} +02/24/2022 17:57:59 - INFO - codeparrot_training - Step 12668: {'lr': 0.00044149708660031704, 'samples': 6486528, 'steps': 12668, 'loss/train': 0.6945056915283203} +02/24/2022 17:58:02 - INFO - codeparrot_training - Step 12669: {'lr': 0.000441486567508709, 'samples': 6487040, 'steps': 12669, 'loss/train': 2.2073943614959717} +02/24/2022 17:58:08 - INFO - codeparrot_training - Step 12670: {'lr': 0.0004414760475968334, 'samples': 6487552, 'steps': 12670, 'loss/train': 2.9699811935424805} +02/24/2022 17:58:11 - INFO - codeparrot_training - Step 12671: {'lr': 0.0004414655268647352, 'samples': 6488064, 'steps': 12671, 'loss/train': 1.8050999641418457} +02/24/2022 17:58:17 - INFO - codeparrot_training - Step 12672: {'lr': 0.0004414550053124594, 'samples': 6488576, 'steps': 12672, 'loss/train': 1.6099040508270264} +02/24/2022 17:58:21 - INFO - codeparrot_training - Step 12673: {'lr': 0.0004414444829400512, 'samples': 6489088, 'steps': 12673, 'loss/train': 2.013878107070923} +02/24/2022 17:58:26 - INFO - codeparrot_training - Step 12674: {'lr': 0.00044143395974755565, 'samples': 6489600, 'steps': 12674, 'loss/train': 2.288484811782837} +02/24/2022 17:58:30 - INFO - codeparrot_training - Step 12675: {'lr': 0.00044142343573501787, 'samples': 6490112, 'steps': 12675, 'loss/train': 1.8778387308120728} +02/24/2022 17:58:35 - INFO - codeparrot_training - Step 12676: {'lr': 0.0004414129109024827, 'samples': 6490624, 'steps': 12676, 'loss/train': 1.9495888948440552} +02/24/2022 17:58:39 - INFO - codeparrot_training - Step 12677: {'lr': 0.00044140238524999556, 'samples': 6491136, 'steps': 12677, 'loss/train': 2.473463535308838} +02/24/2022 17:58:44 - INFO - codeparrot_training - Step 12678: {'lr': 0.0004413918587776013, 'samples': 6491648, 'steps': 12678, 'loss/train': 2.351705551147461} +02/24/2022 17:58:48 - INFO - codeparrot_training - Step 12679: {'lr': 0.0004413813314853451, 'samples': 6492160, 'steps': 12679, 'loss/train': 1.420594334602356} +02/24/2022 17:58:53 - INFO - codeparrot_training - Step 12680: {'lr': 0.00044137080337327205, 'samples': 6492672, 'steps': 12680, 'loss/train': 2.236908435821533} +02/24/2022 17:58:57 - INFO - codeparrot_training - Step 12681: {'lr': 0.00044136027444142723, 'samples': 6493184, 'steps': 12681, 'loss/train': 0.9558871984481812} +02/24/2022 17:59:02 - INFO - codeparrot_training - Step 12682: {'lr': 0.0004413497446898558, 'samples': 6493696, 'steps': 12682, 'loss/train': 1.4634085893630981} +02/24/2022 17:59:06 - INFO - codeparrot_training - Step 12683: {'lr': 0.0004413392141186028, 'samples': 6494208, 'steps': 12683, 'loss/train': 2.386815309524536} +02/24/2022 17:59:11 - INFO - codeparrot_training - Step 12684: {'lr': 0.00044132868272771334, 'samples': 6494720, 'steps': 12684, 'loss/train': 2.317451000213623} +02/24/2022 17:59:15 - INFO - codeparrot_training - Step 12685: {'lr': 0.0004413181505172326, 'samples': 6495232, 'steps': 12685, 'loss/train': 2.307893753051758} +02/24/2022 17:59:20 - INFO - codeparrot_training - Step 12686: {'lr': 0.0004413076174872056, 'samples': 6495744, 'steps': 12686, 'loss/train': 2.906240701675415} +02/24/2022 17:59:24 - INFO - codeparrot_training - Step 12687: {'lr': 0.0004412970836376776, 'samples': 6496256, 'steps': 12687, 'loss/train': 1.8739978075027466} +02/24/2022 17:59:30 - INFO - codeparrot_training - Step 12688: {'lr': 0.00044128654896869357, 'samples': 6496768, 'steps': 12688, 'loss/train': 2.8445639610290527} +02/24/2022 17:59:34 - INFO - codeparrot_training - Step 12689: {'lr': 0.00044127601348029874, 'samples': 6497280, 'steps': 12689, 'loss/train': 1.6321231126785278} +02/24/2022 17:59:39 - INFO - codeparrot_training - Step 12690: {'lr': 0.0004412654771725382, 'samples': 6497792, 'steps': 12690, 'loss/train': 2.0426223278045654} +02/24/2022 17:59:43 - INFO - codeparrot_training - Step 12691: {'lr': 0.00044125494004545703, 'samples': 6498304, 'steps': 12691, 'loss/train': 2.3062593936920166} +02/24/2022 17:59:48 - INFO - codeparrot_training - Step 12692: {'lr': 0.0004412444020991004, 'samples': 6498816, 'steps': 12692, 'loss/train': 1.3057618141174316} +02/24/2022 17:59:52 - INFO - codeparrot_training - Step 12693: {'lr': 0.00044123386333351364, 'samples': 6499328, 'steps': 12693, 'loss/train': 2.1326727867126465} +02/24/2022 17:59:57 - INFO - codeparrot_training - Step 12694: {'lr': 0.00044122332374874166, 'samples': 6499840, 'steps': 12694, 'loss/train': 0.8666550517082214} +02/24/2022 18:00:01 - INFO - codeparrot_training - Step 12695: {'lr': 0.0004412127833448296, 'samples': 6500352, 'steps': 12695, 'loss/train': 1.8085170984268188} +02/24/2022 18:00:06 - INFO - codeparrot_training - Step 12696: {'lr': 0.00044120224212182283, 'samples': 6500864, 'steps': 12696, 'loss/train': 2.3644838333129883} +02/24/2022 18:00:10 - INFO - codeparrot_training - Step 12697: {'lr': 0.0004411917000797663, 'samples': 6501376, 'steps': 12697, 'loss/train': 1.6678123474121094} +02/24/2022 18:00:16 - INFO - codeparrot_training - Step 12698: {'lr': 0.0004411811572187052, 'samples': 6501888, 'steps': 12698, 'loss/train': 2.0448853969573975} +02/24/2022 18:00:20 - INFO - codeparrot_training - Step 12699: {'lr': 0.0004411706135386847, 'samples': 6502400, 'steps': 12699, 'loss/train': 1.9106106758117676} +02/24/2022 18:00:25 - INFO - codeparrot_training - Step 12700: {'lr': 0.0004411600690397501, 'samples': 6502912, 'steps': 12700, 'loss/train': 1.6755871772766113} +02/24/2022 18:00:29 - INFO - codeparrot_training - Step 12701: {'lr': 0.0004411495237219464, 'samples': 6503424, 'steps': 12701, 'loss/train': 1.4629929065704346} +02/24/2022 18:00:34 - INFO - codeparrot_training - Step 12702: {'lr': 0.00044113897758531884, 'samples': 6503936, 'steps': 12702, 'loss/train': 2.561931610107422} +02/24/2022 18:00:38 - INFO - codeparrot_training - Step 12703: {'lr': 0.00044112843062991264, 'samples': 6504448, 'steps': 12703, 'loss/train': 1.908326268196106} +02/24/2022 18:00:43 - INFO - codeparrot_training - Step 12704: {'lr': 0.0004411178828557729, 'samples': 6504960, 'steps': 12704, 'loss/train': 2.5652146339416504} +02/24/2022 18:00:47 - INFO - codeparrot_training - Step 12705: {'lr': 0.00044110733426294484, 'samples': 6505472, 'steps': 12705, 'loss/train': 3.0432803630828857} +02/24/2022 18:00:52 - INFO - codeparrot_training - Step 12706: {'lr': 0.00044109678485147367, 'samples': 6505984, 'steps': 12706, 'loss/train': 2.6836273670196533} +02/24/2022 18:00:56 - INFO - codeparrot_training - Step 12707: {'lr': 0.00044108623462140454, 'samples': 6506496, 'steps': 12707, 'loss/train': 2.0609307289123535} +02/24/2022 18:01:02 - INFO - codeparrot_training - Step 12708: {'lr': 0.0004410756835727826, 'samples': 6507008, 'steps': 12708, 'loss/train': 1.7371406555175781} +02/24/2022 18:01:06 - INFO - codeparrot_training - Step 12709: {'lr': 0.0004410651317056532, 'samples': 6507520, 'steps': 12709, 'loss/train': 4.048654079437256} +02/24/2022 18:01:11 - INFO - codeparrot_training - Step 12710: {'lr': 0.0004410545790200614, 'samples': 6508032, 'steps': 12710, 'loss/train': 2.5537118911743164} +02/24/2022 18:01:15 - INFO - codeparrot_training - Step 12711: {'lr': 0.00044104402551605246, 'samples': 6508544, 'steps': 12711, 'loss/train': 1.905525803565979} +02/24/2022 18:01:20 - INFO - codeparrot_training - Step 12712: {'lr': 0.00044103347119367155, 'samples': 6509056, 'steps': 12712, 'loss/train': 2.7290377616882324} +02/24/2022 18:01:23 - INFO - codeparrot_training - Step 12713: {'lr': 0.0004410229160529639, 'samples': 6509568, 'steps': 12713, 'loss/train': 1.5874825716018677} +02/24/2022 18:01:29 - INFO - codeparrot_training - Step 12714: {'lr': 0.0004410123600939747, 'samples': 6510080, 'steps': 12714, 'loss/train': 2.000169277191162} +02/24/2022 18:01:32 - INFO - codeparrot_training - Step 12715: {'lr': 0.00044100180331674933, 'samples': 6510592, 'steps': 12715, 'loss/train': 2.084585666656494} +02/24/2022 18:01:38 - INFO - codeparrot_training - Step 12716: {'lr': 0.00044099124572133283, 'samples': 6511104, 'steps': 12716, 'loss/train': 2.405771255493164} +02/24/2022 18:01:41 - INFO - codeparrot_training - Step 12717: {'lr': 0.0004409806873077704, 'samples': 6511616, 'steps': 12717, 'loss/train': 1.4571832418441772} +02/24/2022 18:01:48 - INFO - codeparrot_training - Step 12718: {'lr': 0.0004409701280761075, 'samples': 6512128, 'steps': 12718, 'loss/train': 1.939368486404419} +02/24/2022 18:01:51 - INFO - codeparrot_training - Step 12719: {'lr': 0.0004409595680263891, 'samples': 6512640, 'steps': 12719, 'loss/train': 1.5244413614273071} +02/24/2022 18:01:57 - INFO - codeparrot_training - Step 12720: {'lr': 0.0004409490071586606, 'samples': 6513152, 'steps': 12720, 'loss/train': 2.952091932296753} +02/24/2022 18:02:01 - INFO - codeparrot_training - Step 12721: {'lr': 0.00044093844547296715, 'samples': 6513664, 'steps': 12721, 'loss/train': 1.806796908378601} +02/24/2022 18:02:06 - INFO - codeparrot_training - Step 12722: {'lr': 0.000440927882969354, 'samples': 6514176, 'steps': 12722, 'loss/train': 2.065204620361328} +02/24/2022 18:02:10 - INFO - codeparrot_training - Step 12723: {'lr': 0.0004409173196478665, 'samples': 6514688, 'steps': 12723, 'loss/train': 1.4060415029525757} +02/24/2022 18:02:15 - INFO - codeparrot_training - Step 12724: {'lr': 0.00044090675550854973, 'samples': 6515200, 'steps': 12724, 'loss/train': 1.4661648273468018} +02/24/2022 18:02:19 - INFO - codeparrot_training - Step 12725: {'lr': 0.00044089619055144916, 'samples': 6515712, 'steps': 12725, 'loss/train': 2.989802122116089} +02/24/2022 18:02:25 - INFO - codeparrot_training - Step 12726: {'lr': 0.0004408856247766098, 'samples': 6516224, 'steps': 12726, 'loss/train': 2.071134567260742} +02/24/2022 18:02:28 - INFO - codeparrot_training - Step 12727: {'lr': 0.00044087505818407715, 'samples': 6516736, 'steps': 12727, 'loss/train': 1.6357592344284058} +02/24/2022 18:02:34 - INFO - codeparrot_training - Step 12728: {'lr': 0.00044086449077389636, 'samples': 6517248, 'steps': 12728, 'loss/train': 2.09582781791687} +02/24/2022 18:02:37 - INFO - codeparrot_training - Step 12729: {'lr': 0.0004408539225461126, 'samples': 6517760, 'steps': 12729, 'loss/train': 2.021911382675171} +02/24/2022 18:02:43 - INFO - codeparrot_training - Step 12730: {'lr': 0.0004408433535007713, 'samples': 6518272, 'steps': 12730, 'loss/train': 1.679516315460205} +02/24/2022 18:02:46 - INFO - codeparrot_training - Step 12731: {'lr': 0.0004408327836379177, 'samples': 6518784, 'steps': 12731, 'loss/train': 2.283358097076416} +02/24/2022 18:02:52 - INFO - codeparrot_training - Step 12732: {'lr': 0.0004408222129575969, 'samples': 6519296, 'steps': 12732, 'loss/train': 2.89127516746521} +02/24/2022 18:02:55 - INFO - codeparrot_training - Step 12733: {'lr': 0.0004408116414598545, 'samples': 6519808, 'steps': 12733, 'loss/train': 2.235253095626831} +02/24/2022 18:03:01 - INFO - codeparrot_training - Step 12734: {'lr': 0.0004408010691447356, 'samples': 6520320, 'steps': 12734, 'loss/train': 3.0562398433685303} +02/24/2022 18:03:05 - INFO - codeparrot_training - Step 12735: {'lr': 0.00044079049601228543, 'samples': 6520832, 'steps': 12735, 'loss/train': 1.7523605823516846} +02/24/2022 18:03:10 - INFO - codeparrot_training - Step 12736: {'lr': 0.00044077992206254934, 'samples': 6521344, 'steps': 12736, 'loss/train': 1.1580567359924316} +02/24/2022 18:03:14 - INFO - codeparrot_training - Step 12737: {'lr': 0.0004407693472955727, 'samples': 6521856, 'steps': 12737, 'loss/train': 2.4555981159210205} +02/24/2022 18:03:19 - INFO - codeparrot_training - Step 12738: {'lr': 0.00044075877171140075, 'samples': 6522368, 'steps': 12738, 'loss/train': 2.8675730228424072} +02/24/2022 18:03:23 - INFO - codeparrot_training - Step 12739: {'lr': 0.00044074819531007885, 'samples': 6522880, 'steps': 12739, 'loss/train': 0.9625195264816284} +02/24/2022 18:03:28 - INFO - codeparrot_training - Step 12740: {'lr': 0.0004407376180916522, 'samples': 6523392, 'steps': 12740, 'loss/train': 2.4256608486175537} +02/24/2022 18:03:32 - INFO - codeparrot_training - Step 12741: {'lr': 0.00044072704005616614, 'samples': 6523904, 'steps': 12741, 'loss/train': 2.437060832977295} +02/24/2022 18:03:37 - INFO - codeparrot_training - Step 12742: {'lr': 0.00044071646120366604, 'samples': 6524416, 'steps': 12742, 'loss/train': 0.27724093198776245} +02/24/2022 18:03:41 - INFO - codeparrot_training - Step 12743: {'lr': 0.00044070588153419715, 'samples': 6524928, 'steps': 12743, 'loss/train': 2.35150408744812} +02/24/2022 18:03:47 - INFO - codeparrot_training - Step 12744: {'lr': 0.00044069530104780486, 'samples': 6525440, 'steps': 12744, 'loss/train': 1.5001403093338013} +02/24/2022 18:03:51 - INFO - codeparrot_training - Step 12745: {'lr': 0.00044068471974453437, 'samples': 6525952, 'steps': 12745, 'loss/train': 2.5214011669158936} +02/24/2022 18:03:56 - INFO - codeparrot_training - Step 12746: {'lr': 0.0004406741376244312, 'samples': 6526464, 'steps': 12746, 'loss/train': 1.9468610286712646} +02/24/2022 18:04:00 - INFO - codeparrot_training - Step 12747: {'lr': 0.00044066355468754047, 'samples': 6526976, 'steps': 12747, 'loss/train': 2.306725263595581} +02/24/2022 18:04:05 - INFO - codeparrot_training - Step 12748: {'lr': 0.00044065297093390764, 'samples': 6527488, 'steps': 12748, 'loss/train': 2.6221346855163574} +02/24/2022 18:04:09 - INFO - codeparrot_training - Step 12749: {'lr': 0.0004406423863635781, 'samples': 6528000, 'steps': 12749, 'loss/train': 2.135566234588623} +02/24/2022 18:04:14 - INFO - codeparrot_training - Step 12750: {'lr': 0.00044063180097659704, 'samples': 6528512, 'steps': 12750, 'loss/train': 1.3207799196243286} +02/24/2022 18:04:18 - INFO - codeparrot_training - Step 12751: {'lr': 0.00044062121477300985, 'samples': 6529024, 'steps': 12751, 'loss/train': 1.6424869298934937} +02/24/2022 18:04:23 - INFO - codeparrot_training - Step 12752: {'lr': 0.000440610627752862, 'samples': 6529536, 'steps': 12752, 'loss/train': 2.7339706420898438} +02/24/2022 18:04:27 - INFO - codeparrot_training - Step 12753: {'lr': 0.0004406000399161987, 'samples': 6530048, 'steps': 12753, 'loss/train': 2.766383647918701} +02/24/2022 18:04:33 - INFO - codeparrot_training - Step 12754: {'lr': 0.00044058945126306535, 'samples': 6530560, 'steps': 12754, 'loss/train': 2.0135092735290527} +02/24/2022 18:04:36 - INFO - codeparrot_training - Step 12755: {'lr': 0.0004405788617935073, 'samples': 6531072, 'steps': 12755, 'loss/train': 1.3996175527572632} +02/24/2022 18:04:42 - INFO - codeparrot_training - Step 12756: {'lr': 0.0004405682715075699, 'samples': 6531584, 'steps': 12756, 'loss/train': 2.252906084060669} +02/24/2022 18:04:45 - INFO - codeparrot_training - Step 12757: {'lr': 0.0004405576804052985, 'samples': 6532096, 'steps': 12757, 'loss/train': 1.8308268785476685} +02/24/2022 18:04:51 - INFO - codeparrot_training - Step 12758: {'lr': 0.0004405470884867386, 'samples': 6532608, 'steps': 12758, 'loss/train': 1.2382327318191528} +02/24/2022 18:04:55 - INFO - codeparrot_training - Step 12759: {'lr': 0.00044053649575193543, 'samples': 6533120, 'steps': 12759, 'loss/train': 3.1187937259674072} +02/24/2022 18:05:00 - INFO - codeparrot_training - Step 12760: {'lr': 0.00044052590220093445, 'samples': 6533632, 'steps': 12760, 'loss/train': 0.9883297681808472} +02/24/2022 18:05:04 - INFO - codeparrot_training - Step 12761: {'lr': 0.00044051530783378103, 'samples': 6534144, 'steps': 12761, 'loss/train': 1.8920166492462158} +02/24/2022 18:05:09 - INFO - codeparrot_training - Step 12762: {'lr': 0.0004405047126505204, 'samples': 6534656, 'steps': 12762, 'loss/train': 1.4813631772994995} +02/24/2022 18:05:13 - INFO - codeparrot_training - Step 12763: {'lr': 0.0004404941166511982, 'samples': 6535168, 'steps': 12763, 'loss/train': 2.164565086364746} +02/24/2022 18:05:19 - INFO - codeparrot_training - Step 12764: {'lr': 0.00044048351983585966, 'samples': 6535680, 'steps': 12764, 'loss/train': 0.6382386088371277} +02/24/2022 18:05:25 - INFO - codeparrot_training - Step 12765: {'lr': 0.00044047292220455016, 'samples': 6536192, 'steps': 12765, 'loss/train': 2.824276924133301} +02/24/2022 18:05:28 - INFO - codeparrot_training - Step 12766: {'lr': 0.0004404623237573152, 'samples': 6536704, 'steps': 12766, 'loss/train': 2.3192460536956787} +02/24/2022 18:05:31 - INFO - codeparrot_training - Step 12767: {'lr': 0.00044045172449420005, 'samples': 6537216, 'steps': 12767, 'loss/train': 1.1514025926589966} +02/24/2022 18:05:37 - INFO - codeparrot_training - Step 12768: {'lr': 0.00044044112441525026, 'samples': 6537728, 'steps': 12768, 'loss/train': 1.3377079963684082} +02/24/2022 18:05:42 - INFO - codeparrot_training - Step 12769: {'lr': 0.0004404305235205112, 'samples': 6538240, 'steps': 12769, 'loss/train': 2.5847256183624268} +02/24/2022 18:05:46 - INFO - codeparrot_training - Step 12770: {'lr': 0.0004404199218100281, 'samples': 6538752, 'steps': 12770, 'loss/train': 2.267228841781616} +02/24/2022 18:05:51 - INFO - codeparrot_training - Step 12771: {'lr': 0.00044040931928384665, 'samples': 6539264, 'steps': 12771, 'loss/train': 1.6417549848556519} +02/24/2022 18:05:55 - INFO - codeparrot_training - Step 12772: {'lr': 0.0004403987159420121, 'samples': 6539776, 'steps': 12772, 'loss/train': 1.6507021188735962} +02/24/2022 18:06:01 - INFO - codeparrot_training - Step 12773: {'lr': 0.0004403881117845699, 'samples': 6540288, 'steps': 12773, 'loss/train': 1.3791691064834595} +02/24/2022 18:06:04 - INFO - codeparrot_training - Step 12774: {'lr': 0.00044037750681156547, 'samples': 6540800, 'steps': 12774, 'loss/train': 2.366238832473755} +02/24/2022 18:06:10 - INFO - codeparrot_training - Step 12775: {'lr': 0.0004403669010230443, 'samples': 6541312, 'steps': 12775, 'loss/train': 1.6738210916519165} +02/24/2022 18:06:13 - INFO - codeparrot_training - Step 12776: {'lr': 0.00044035629441905173, 'samples': 6541824, 'steps': 12776, 'loss/train': 2.1039857864379883} +02/24/2022 18:06:19 - INFO - codeparrot_training - Step 12777: {'lr': 0.0004403456869996333, 'samples': 6542336, 'steps': 12777, 'loss/train': 1.8654409646987915} +02/24/2022 18:06:22 - INFO - codeparrot_training - Step 12778: {'lr': 0.0004403350787648343, 'samples': 6542848, 'steps': 12778, 'loss/train': 1.9655073881149292} +02/24/2022 18:06:29 - INFO - codeparrot_training - Step 12779: {'lr': 0.0004403244697147003, 'samples': 6543360, 'steps': 12779, 'loss/train': 2.61484956741333} +02/24/2022 18:06:32 - INFO - codeparrot_training - Step 12780: {'lr': 0.00044031385984927675, 'samples': 6543872, 'steps': 12780, 'loss/train': 5.149417877197266} +02/24/2022 18:06:38 - INFO - codeparrot_training - Step 12781: {'lr': 0.000440303249168609, 'samples': 6544384, 'steps': 12781, 'loss/train': 1.9456908702850342} +02/24/2022 18:06:41 - INFO - codeparrot_training - Step 12782: {'lr': 0.0004402926376727425, 'samples': 6544896, 'steps': 12782, 'loss/train': 1.6736698150634766} +02/24/2022 18:06:45 - INFO - codeparrot_training - Step 12783: {'lr': 0.0004402820253617229, 'samples': 6545408, 'steps': 12783, 'loss/train': 1.9663290977478027} +02/24/2022 18:06:51 - INFO - codeparrot_training - Step 12784: {'lr': 0.0004402714122355955, 'samples': 6545920, 'steps': 12784, 'loss/train': 1.8992382287979126} +02/24/2022 18:06:54 - INFO - codeparrot_training - Step 12785: {'lr': 0.00044026079829440567, 'samples': 6546432, 'steps': 12785, 'loss/train': 2.6155500411987305} +02/24/2022 18:07:00 - INFO - codeparrot_training - Step 12786: {'lr': 0.0004402501835381991, 'samples': 6546944, 'steps': 12786, 'loss/train': 3.9356064796447754} +02/24/2022 18:07:03 - INFO - codeparrot_training - Step 12787: {'lr': 0.00044023956796702116, 'samples': 6547456, 'steps': 12787, 'loss/train': 1.4840117692947388} +02/24/2022 18:07:09 - INFO - codeparrot_training - Step 12788: {'lr': 0.0004402289515809172, 'samples': 6547968, 'steps': 12788, 'loss/train': 1.8973579406738281} +02/24/2022 18:07:12 - INFO - codeparrot_training - Step 12789: {'lr': 0.00044021833437993296, 'samples': 6548480, 'steps': 12789, 'loss/train': 1.8810702562332153} +02/24/2022 18:07:19 - INFO - codeparrot_training - Step 12790: {'lr': 0.0004402077163641137, 'samples': 6548992, 'steps': 12790, 'loss/train': 2.2041003704071045} +02/24/2022 18:07:22 - INFO - codeparrot_training - Step 12791: {'lr': 0.000440197097533505, 'samples': 6549504, 'steps': 12791, 'loss/train': 0.6744171977043152} +02/24/2022 18:07:27 - INFO - codeparrot_training - Step 12792: {'lr': 0.00044018647788815235, 'samples': 6550016, 'steps': 12792, 'loss/train': 1.9453179836273193} +02/24/2022 18:07:33 - INFO - codeparrot_training - Step 12793: {'lr': 0.00044017585742810124, 'samples': 6550528, 'steps': 12793, 'loss/train': 2.635545015335083} +02/24/2022 18:07:36 - INFO - codeparrot_training - Step 12794: {'lr': 0.0004401652361533971, 'samples': 6551040, 'steps': 12794, 'loss/train': 1.8903262615203857} +02/24/2022 18:07:42 - INFO - codeparrot_training - Step 12795: {'lr': 0.00044015461406408544, 'samples': 6551552, 'steps': 12795, 'loss/train': 2.7911055088043213} +02/24/2022 18:07:45 - INFO - codeparrot_training - Step 12796: {'lr': 0.00044014399116021184, 'samples': 6552064, 'steps': 12796, 'loss/train': 1.8545019626617432} +02/24/2022 18:07:51 - INFO - codeparrot_training - Step 12797: {'lr': 0.00044013336744182176, 'samples': 6552576, 'steps': 12797, 'loss/train': 2.298102855682373} +02/24/2022 18:07:55 - INFO - codeparrot_training - Step 12798: {'lr': 0.0004401227429089607, 'samples': 6553088, 'steps': 12798, 'loss/train': 1.5750998258590698} +02/24/2022 18:08:01 - INFO - codeparrot_training - Step 12799: {'lr': 0.00044011211756167425, 'samples': 6553600, 'steps': 12799, 'loss/train': 2.0372374057769775} +02/24/2022 18:08:04 - INFO - codeparrot_training - Step 12800: {'lr': 0.0004401014914000078, 'samples': 6554112, 'steps': 12800, 'loss/train': 2.5791444778442383} +02/24/2022 18:08:10 - INFO - codeparrot_training - Step 12801: {'lr': 0.00044009086442400684, 'samples': 6554624, 'steps': 12801, 'loss/train': 2.1451714038848877} +02/24/2022 18:08:13 - INFO - codeparrot_training - Step 12802: {'lr': 0.0004400802366337171, 'samples': 6555136, 'steps': 12802, 'loss/train': 1.864613652229309} +02/24/2022 18:08:19 - INFO - codeparrot_training - Step 12803: {'lr': 0.00044006960802918393, 'samples': 6555648, 'steps': 12803, 'loss/train': 1.5845685005187988} +02/24/2022 18:08:22 - INFO - codeparrot_training - Step 12804: {'lr': 0.0004400589786104529, 'samples': 6556160, 'steps': 12804, 'loss/train': 2.4343771934509277} +02/24/2022 18:08:28 - INFO - codeparrot_training - Step 12805: {'lr': 0.0004400483483775696, 'samples': 6556672, 'steps': 12805, 'loss/train': 2.7292592525482178} +02/24/2022 18:08:31 - INFO - codeparrot_training - Step 12806: {'lr': 0.00044003771733057943, 'samples': 6557184, 'steps': 12806, 'loss/train': 2.767972230911255} +02/24/2022 18:08:37 - INFO - codeparrot_training - Step 12807: {'lr': 0.0004400270854695281, 'samples': 6557696, 'steps': 12807, 'loss/train': 1.6104705333709717} +02/24/2022 18:08:40 - INFO - codeparrot_training - Step 12808: {'lr': 0.0004400164527944611, 'samples': 6558208, 'steps': 12808, 'loss/train': 2.1192734241485596} +02/24/2022 18:08:44 - INFO - codeparrot_training - Step 12809: {'lr': 0.0004400058193054239, 'samples': 6558720, 'steps': 12809, 'loss/train': 2.5320072174072266} +02/24/2022 18:08:50 - INFO - codeparrot_training - Step 12810: {'lr': 0.0004399951850024621, 'samples': 6559232, 'steps': 12810, 'loss/train': 2.262166738510132} +02/24/2022 18:08:54 - INFO - codeparrot_training - Step 12811: {'lr': 0.0004399845498856213, 'samples': 6559744, 'steps': 12811, 'loss/train': 0.6867892742156982} +02/24/2022 18:08:59 - INFO - codeparrot_training - Step 12812: {'lr': 0.000439973913954947, 'samples': 6560256, 'steps': 12812, 'loss/train': 2.066507577896118} +02/24/2022 18:09:03 - INFO - codeparrot_training - Step 12813: {'lr': 0.0004399632772104848, 'samples': 6560768, 'steps': 12813, 'loss/train': 1.0127909183502197} +02/24/2022 18:09:08 - INFO - codeparrot_training - Step 12814: {'lr': 0.00043995263965228016, 'samples': 6561280, 'steps': 12814, 'loss/train': 1.0926337242126465} +02/24/2022 18:09:12 - INFO - codeparrot_training - Step 12815: {'lr': 0.00043994200128037877, 'samples': 6561792, 'steps': 12815, 'loss/train': 2.4272892475128174} +02/24/2022 18:09:17 - INFO - codeparrot_training - Step 12816: {'lr': 0.0004399313620948262, 'samples': 6562304, 'steps': 12816, 'loss/train': 2.5267903804779053} +02/24/2022 18:09:23 - INFO - codeparrot_training - Step 12817: {'lr': 0.00043992072209566793, 'samples': 6562816, 'steps': 12817, 'loss/train': 2.595418691635132} +02/24/2022 18:09:26 - INFO - codeparrot_training - Step 12818: {'lr': 0.0004399100812829496, 'samples': 6563328, 'steps': 12818, 'loss/train': 2.586636781692505} +02/24/2022 18:09:30 - INFO - codeparrot_training - Step 12819: {'lr': 0.00043989943965671685, 'samples': 6563840, 'steps': 12819, 'loss/train': 2.1191513538360596} +02/24/2022 18:09:35 - INFO - codeparrot_training - Step 12820: {'lr': 0.00043988879721701515, 'samples': 6564352, 'steps': 12820, 'loss/train': 1.8866201639175415} +02/24/2022 18:09:41 - INFO - codeparrot_training - Step 12821: {'lr': 0.0004398781539638901, 'samples': 6564864, 'steps': 12821, 'loss/train': 3.1851587295532227} +02/24/2022 18:09:44 - INFO - codeparrot_training - Step 12822: {'lr': 0.00043986750989738737, 'samples': 6565376, 'steps': 12822, 'loss/train': 2.0842721462249756} +02/24/2022 18:09:50 - INFO - codeparrot_training - Step 12823: {'lr': 0.0004398568650175525, 'samples': 6565888, 'steps': 12823, 'loss/train': 2.483586549758911} +02/24/2022 18:09:53 - INFO - codeparrot_training - Step 12824: {'lr': 0.00043984621932443115, 'samples': 6566400, 'steps': 12824, 'loss/train': 2.3778891563415527} +02/24/2022 18:09:59 - INFO - codeparrot_training - Step 12825: {'lr': 0.0004398355728180689, 'samples': 6566912, 'steps': 12825, 'loss/train': 2.1336655616760254} +02/24/2022 18:10:03 - INFO - codeparrot_training - Step 12826: {'lr': 0.0004398249254985113, 'samples': 6567424, 'steps': 12826, 'loss/train': 2.381819725036621} +02/24/2022 18:10:09 - INFO - codeparrot_training - Step 12827: {'lr': 0.00043981427736580395, 'samples': 6567936, 'steps': 12827, 'loss/train': 2.261563539505005} +02/24/2022 18:10:12 - INFO - codeparrot_training - Step 12828: {'lr': 0.00043980362841999253, 'samples': 6568448, 'steps': 12828, 'loss/train': 2.7353408336639404} +02/24/2022 18:10:16 - INFO - codeparrot_training - Step 12829: {'lr': 0.0004397929786611227, 'samples': 6568960, 'steps': 12829, 'loss/train': 1.0050386190414429} +02/24/2022 18:10:23 - INFO - codeparrot_training - Step 12830: {'lr': 0.00043978232808923996, 'samples': 6569472, 'steps': 12830, 'loss/train': 0.19775129854679108} +02/24/2022 18:10:26 - INFO - codeparrot_training - Step 12831: {'lr': 0.00043977167670439, 'samples': 6569984, 'steps': 12831, 'loss/train': 1.5964388847351074} +02/24/2022 18:10:32 - INFO - codeparrot_training - Step 12832: {'lr': 0.0004397610245066184, 'samples': 6570496, 'steps': 12832, 'loss/train': 1.4163870811462402} +02/24/2022 18:10:35 - INFO - codeparrot_training - Step 12833: {'lr': 0.00043975037149597085, 'samples': 6571008, 'steps': 12833, 'loss/train': 1.9741743803024292} +02/24/2022 18:10:41 - INFO - codeparrot_training - Step 12834: {'lr': 0.00043973971767249297, 'samples': 6571520, 'steps': 12834, 'loss/train': 2.2019591331481934} +02/24/2022 18:10:44 - INFO - codeparrot_training - Step 12835: {'lr': 0.0004397290630362304, 'samples': 6572032, 'steps': 12835, 'loss/train': 2.1687161922454834} +02/24/2022 18:10:50 - INFO - codeparrot_training - Step 12836: {'lr': 0.0004397184075872288, 'samples': 6572544, 'steps': 12836, 'loss/train': 2.214421510696411} +02/24/2022 18:10:53 - INFO - codeparrot_training - Step 12837: {'lr': 0.00043970775132553375, 'samples': 6573056, 'steps': 12837, 'loss/train': 0.9080241322517395} +02/24/2022 18:10:59 - INFO - codeparrot_training - Step 12838: {'lr': 0.00043969709425119085, 'samples': 6573568, 'steps': 12838, 'loss/train': 1.4517842531204224} +02/24/2022 18:11:02 - INFO - codeparrot_training - Step 12839: {'lr': 0.000439686436364246, 'samples': 6574080, 'steps': 12839, 'loss/train': 1.7576817274093628} +02/24/2022 18:11:09 - INFO - codeparrot_training - Step 12840: {'lr': 0.00043967577766474455, 'samples': 6574592, 'steps': 12840, 'loss/train': 1.9305963516235352} +02/24/2022 18:11:13 - INFO - codeparrot_training - Step 12841: {'lr': 0.00043966511815273233, 'samples': 6575104, 'steps': 12841, 'loss/train': 1.845685601234436} +02/24/2022 18:11:19 - INFO - codeparrot_training - Step 12842: {'lr': 0.00043965445782825495, 'samples': 6575616, 'steps': 12842, 'loss/train': 2.229151964187622} +02/24/2022 18:11:22 - INFO - codeparrot_training - Step 12843: {'lr': 0.00043964379669135815, 'samples': 6576128, 'steps': 12843, 'loss/train': 1.5379745960235596} +02/24/2022 18:11:28 - INFO - codeparrot_training - Step 12844: {'lr': 0.00043963313474208753, 'samples': 6576640, 'steps': 12844, 'loss/train': 0.8637090921401978} +02/24/2022 18:11:31 - INFO - codeparrot_training - Step 12845: {'lr': 0.0004396224719804888, 'samples': 6577152, 'steps': 12845, 'loss/train': 2.0980193614959717} +02/24/2022 18:11:37 - INFO - codeparrot_training - Step 12846: {'lr': 0.0004396118084066075, 'samples': 6577664, 'steps': 12846, 'loss/train': 1.9521963596343994} +02/24/2022 18:11:40 - INFO - codeparrot_training - Step 12847: {'lr': 0.00043960114402048957, 'samples': 6578176, 'steps': 12847, 'loss/train': 2.294322967529297} +02/24/2022 18:11:46 - INFO - codeparrot_training - Step 12848: {'lr': 0.0004395904788221805, 'samples': 6578688, 'steps': 12848, 'loss/train': 2.5258712768554688} +02/24/2022 18:11:49 - INFO - codeparrot_training - Step 12849: {'lr': 0.00043957981281172597, 'samples': 6579200, 'steps': 12849, 'loss/train': 1.0141184329986572} +02/24/2022 18:11:56 - INFO - codeparrot_training - Step 12850: {'lr': 0.00043956914598917177, 'samples': 6579712, 'steps': 12850, 'loss/train': 1.8903825283050537} +02/24/2022 18:11:59 - INFO - codeparrot_training - Step 12851: {'lr': 0.00043955847835456353, 'samples': 6580224, 'steps': 12851, 'loss/train': 2.1995418071746826} +02/24/2022 18:12:05 - INFO - codeparrot_training - Step 12852: {'lr': 0.00043954780990794695, 'samples': 6580736, 'steps': 12852, 'loss/train': 0.5545242428779602} +02/24/2022 18:12:08 - INFO - codeparrot_training - Step 12853: {'lr': 0.0004395371406493677, 'samples': 6581248, 'steps': 12853, 'loss/train': 1.7464067935943604} +02/24/2022 18:12:14 - INFO - codeparrot_training - Step 12854: {'lr': 0.0004395264705788716, 'samples': 6581760, 'steps': 12854, 'loss/train': 2.0774736404418945} +02/24/2022 18:12:17 - INFO - codeparrot_training - Step 12855: {'lr': 0.00043951579969650424, 'samples': 6582272, 'steps': 12855, 'loss/train': 3.024357795715332} +02/24/2022 18:12:23 - INFO - codeparrot_training - Step 12856: {'lr': 0.00043950512800231136, 'samples': 6582784, 'steps': 12856, 'loss/train': 1.100299596786499} +02/24/2022 18:12:26 - INFO - codeparrot_training - Step 12857: {'lr': 0.0004394944554963387, 'samples': 6583296, 'steps': 12857, 'loss/train': 1.196905255317688} +02/24/2022 18:12:32 - INFO - codeparrot_training - Step 12858: {'lr': 0.000439483782178632, 'samples': 6583808, 'steps': 12858, 'loss/train': 2.6736104488372803} +02/24/2022 18:12:35 - INFO - codeparrot_training - Step 12859: {'lr': 0.0004394731080492369, 'samples': 6584320, 'steps': 12859, 'loss/train': 1.8345617055892944} +02/24/2022 18:12:41 - INFO - codeparrot_training - Step 12860: {'lr': 0.0004394624331081992, 'samples': 6584832, 'steps': 12860, 'loss/train': 2.343151569366455} +02/24/2022 18:12:45 - INFO - codeparrot_training - Step 12861: {'lr': 0.00043945175735556454, 'samples': 6585344, 'steps': 12861, 'loss/train': 0.6751885414123535} +02/24/2022 18:12:50 - INFO - codeparrot_training - Step 12862: {'lr': 0.0004394410807913788, 'samples': 6585856, 'steps': 12862, 'loss/train': 1.2433993816375732} +02/24/2022 18:12:54 - INFO - codeparrot_training - Step 12863: {'lr': 0.0004394304034156875, 'samples': 6586368, 'steps': 12863, 'loss/train': 0.16667544841766357} +02/24/2022 18:13:00 - INFO - codeparrot_training - Step 12864: {'lr': 0.00043941972522853665, 'samples': 6586880, 'steps': 12864, 'loss/train': 2.7264668941497803} +02/24/2022 18:13:03 - INFO - codeparrot_training - Step 12865: {'lr': 0.00043940904622997176, 'samples': 6587392, 'steps': 12865, 'loss/train': 1.565804123878479} +02/24/2022 18:13:08 - INFO - codeparrot_training - Step 12866: {'lr': 0.00043939836642003865, 'samples': 6587904, 'steps': 12866, 'loss/train': 3.104874610900879} +02/24/2022 18:13:12 - INFO - codeparrot_training - Step 12867: {'lr': 0.0004393876857987831, 'samples': 6588416, 'steps': 12867, 'loss/train': 1.7425435781478882} +02/24/2022 18:13:18 - INFO - codeparrot_training - Step 12868: {'lr': 0.0004393770043662508, 'samples': 6588928, 'steps': 12868, 'loss/train': 0.7137417197227478} +02/24/2022 18:13:21 - INFO - codeparrot_training - Step 12869: {'lr': 0.0004393663221224876, 'samples': 6589440, 'steps': 12869, 'loss/train': 2.7801997661590576} +02/24/2022 18:13:27 - INFO - codeparrot_training - Step 12870: {'lr': 0.00043935563906753923, 'samples': 6589952, 'steps': 12870, 'loss/train': 1.2647124528884888} +02/24/2022 18:13:31 - INFO - codeparrot_training - Step 12871: {'lr': 0.0004393449552014514, 'samples': 6590464, 'steps': 12871, 'loss/train': 2.162167549133301} +02/24/2022 18:13:36 - INFO - codeparrot_training - Step 12872: {'lr': 0.00043933427052426986, 'samples': 6590976, 'steps': 12872, 'loss/train': 2.060293436050415} +02/24/2022 18:13:40 - INFO - codeparrot_training - Step 12873: {'lr': 0.00043932358503604054, 'samples': 6591488, 'steps': 12873, 'loss/train': 1.4157055616378784} +02/24/2022 18:13:45 - INFO - codeparrot_training - Step 12874: {'lr': 0.000439312898736809, 'samples': 6592000, 'steps': 12874, 'loss/train': 2.2592687606811523} +02/24/2022 18:13:49 - INFO - codeparrot_training - Step 12875: {'lr': 0.00043930221162662115, 'samples': 6592512, 'steps': 12875, 'loss/train': 1.7817293405532837} +02/24/2022 18:13:54 - INFO - codeparrot_training - Step 12876: {'lr': 0.0004392915237055227, 'samples': 6593024, 'steps': 12876, 'loss/train': 2.011043071746826} +02/24/2022 18:13:58 - INFO - codeparrot_training - Step 12877: {'lr': 0.00043928083497355954, 'samples': 6593536, 'steps': 12877, 'loss/train': 1.628848910331726} +02/24/2022 18:14:03 - INFO - codeparrot_training - Step 12878: {'lr': 0.0004392701454307773, 'samples': 6594048, 'steps': 12878, 'loss/train': 2.68397855758667} +02/24/2022 18:14:06 - INFO - codeparrot_training - Step 12879: {'lr': 0.00043925945507722195, 'samples': 6594560, 'steps': 12879, 'loss/train': 1.3266445398330688} +02/24/2022 18:14:12 - INFO - codeparrot_training - Step 12880: {'lr': 0.0004392487639129391, 'samples': 6595072, 'steps': 12880, 'loss/train': 1.838348388671875} +02/24/2022 18:14:16 - INFO - codeparrot_training - Step 12881: {'lr': 0.0004392380719379747, 'samples': 6595584, 'steps': 12881, 'loss/train': 1.6737326383590698} +02/24/2022 18:14:21 - INFO - codeparrot_training - Step 12882: {'lr': 0.0004392273791523744, 'samples': 6596096, 'steps': 12882, 'loss/train': 2.3976011276245117} +02/24/2022 18:14:25 - INFO - codeparrot_training - Step 12883: {'lr': 0.0004392166855561842, 'samples': 6596608, 'steps': 12883, 'loss/train': 1.4859952926635742} +02/24/2022 18:14:30 - INFO - codeparrot_training - Step 12884: {'lr': 0.0004392059911494498, 'samples': 6597120, 'steps': 12884, 'loss/train': 1.7109577655792236} +02/24/2022 18:14:34 - INFO - codeparrot_training - Step 12885: {'lr': 0.00043919529593221696, 'samples': 6597632, 'steps': 12885, 'loss/train': 1.3060704469680786} +02/24/2022 18:14:41 - INFO - codeparrot_training - Step 12886: {'lr': 0.00043918459990453156, 'samples': 6598144, 'steps': 12886, 'loss/train': 2.6402320861816406} +02/24/2022 18:14:44 - INFO - codeparrot_training - Step 12887: {'lr': 0.00043917390306643945, 'samples': 6598656, 'steps': 12887, 'loss/train': 3.1464781761169434} +02/24/2022 18:14:50 - INFO - codeparrot_training - Step 12888: {'lr': 0.0004391632054179864, 'samples': 6599168, 'steps': 12888, 'loss/train': 1.0135523080825806} +02/24/2022 18:14:53 - INFO - codeparrot_training - Step 12889: {'lr': 0.00043915250695921815, 'samples': 6599680, 'steps': 12889, 'loss/train': 1.9060496091842651} +02/24/2022 18:14:59 - INFO - codeparrot_training - Step 12890: {'lr': 0.00043914180769018073, 'samples': 6600192, 'steps': 12890, 'loss/train': 0.6762253046035767} +02/24/2022 18:15:02 - INFO - codeparrot_training - Step 12891: {'lr': 0.0004391311076109198, 'samples': 6600704, 'steps': 12891, 'loss/train': 1.163953423500061} +02/24/2022 18:15:08 - INFO - codeparrot_training - Step 12892: {'lr': 0.00043912040672148135, 'samples': 6601216, 'steps': 12892, 'loss/train': 1.510191798210144} +02/24/2022 18:15:11 - INFO - codeparrot_training - Step 12893: {'lr': 0.00043910970502191105, 'samples': 6601728, 'steps': 12893, 'loss/train': 2.106886148452759} +02/24/2022 18:15:17 - INFO - codeparrot_training - Step 12894: {'lr': 0.00043909900251225476, 'samples': 6602240, 'steps': 12894, 'loss/train': 1.1448532342910767} +02/24/2022 18:15:20 - INFO - codeparrot_training - Step 12895: {'lr': 0.00043908829919255855, 'samples': 6602752, 'steps': 12895, 'loss/train': 1.8956762552261353} +02/24/2022 18:15:27 - INFO - codeparrot_training - Step 12896: {'lr': 0.00043907759506286797, 'samples': 6603264, 'steps': 12896, 'loss/train': 1.6714473962783813} +02/24/2022 18:15:31 - INFO - codeparrot_training - Step 12897: {'lr': 0.0004390668901232291, 'samples': 6603776, 'steps': 12897, 'loss/train': 1.9626426696777344} +02/24/2022 18:15:36 - INFO - codeparrot_training - Step 12898: {'lr': 0.00043905618437368766, 'samples': 6604288, 'steps': 12898, 'loss/train': 2.3590214252471924} +02/24/2022 18:15:40 - INFO - codeparrot_training - Step 12899: {'lr': 0.0004390454778142896, 'samples': 6604800, 'steps': 12899, 'loss/train': 2.0362513065338135} +02/24/2022 18:15:45 - INFO - codeparrot_training - Step 12900: {'lr': 0.00043903477044508066, 'samples': 6605312, 'steps': 12900, 'loss/train': 1.9012597799301147} +02/24/2022 18:15:49 - INFO - codeparrot_training - Step 12901: {'lr': 0.0004390240622661069, 'samples': 6605824, 'steps': 12901, 'loss/train': 0.5600261688232422} +02/24/2022 18:15:54 - INFO - codeparrot_training - Step 12902: {'lr': 0.000439013353277414, 'samples': 6606336, 'steps': 12902, 'loss/train': 1.669532299041748} +02/24/2022 18:15:58 - INFO - codeparrot_training - Step 12903: {'lr': 0.00043900264347904796, 'samples': 6606848, 'steps': 12903, 'loss/train': 2.456749439239502} +02/24/2022 18:16:03 - INFO - codeparrot_training - Step 12904: {'lr': 0.00043899193287105456, 'samples': 6607360, 'steps': 12904, 'loss/train': 1.5611181259155273} +02/24/2022 18:16:07 - INFO - codeparrot_training - Step 12905: {'lr': 0.0004389812214534798, 'samples': 6607872, 'steps': 12905, 'loss/train': 1.9544636011123657} +02/24/2022 18:16:12 - INFO - codeparrot_training - Step 12906: {'lr': 0.00043897050922636947, 'samples': 6608384, 'steps': 12906, 'loss/train': 2.197460412979126} +02/24/2022 18:16:16 - INFO - codeparrot_training - Step 12907: {'lr': 0.00043895979618976944, 'samples': 6608896, 'steps': 12907, 'loss/train': 2.3713438510894775} +02/24/2022 18:16:22 - INFO - codeparrot_training - Step 12908: {'lr': 0.00043894908234372564, 'samples': 6609408, 'steps': 12908, 'loss/train': 2.0503082275390625} +02/24/2022 18:16:26 - INFO - codeparrot_training - Step 12909: {'lr': 0.00043893836768828405, 'samples': 6609920, 'steps': 12909, 'loss/train': 2.2581403255462646} +02/24/2022 18:16:31 - INFO - codeparrot_training - Step 12910: {'lr': 0.0004389276522234904, 'samples': 6610432, 'steps': 12910, 'loss/train': 2.018444538116455} +02/24/2022 18:16:35 - INFO - codeparrot_training - Step 12911: {'lr': 0.00043891693594939077, 'samples': 6610944, 'steps': 12911, 'loss/train': 1.256234884262085} +02/24/2022 18:16:40 - INFO - codeparrot_training - Step 12912: {'lr': 0.0004389062188660309, 'samples': 6611456, 'steps': 12912, 'loss/train': 2.297994613647461} +02/24/2022 18:16:44 - INFO - codeparrot_training - Step 12913: {'lr': 0.00043889550097345675, 'samples': 6611968, 'steps': 12913, 'loss/train': 3.1912038326263428} +02/24/2022 18:16:49 - INFO - codeparrot_training - Step 12914: {'lr': 0.0004388847822717144, 'samples': 6612480, 'steps': 12914, 'loss/train': 0.8842859268188477} +02/24/2022 18:16:53 - INFO - codeparrot_training - Step 12915: {'lr': 0.0004388740627608495, 'samples': 6612992, 'steps': 12915, 'loss/train': 2.4785799980163574} +02/24/2022 18:16:58 - INFO - codeparrot_training - Step 12916: {'lr': 0.0004388633424409081, 'samples': 6613504, 'steps': 12916, 'loss/train': 0.728019654750824} +02/24/2022 18:17:02 - INFO - codeparrot_training - Step 12917: {'lr': 0.0004388526213119361, 'samples': 6614016, 'steps': 12917, 'loss/train': 1.5862822532653809} +02/24/2022 18:17:07 - INFO - codeparrot_training - Step 12918: {'lr': 0.00043884189937397946, 'samples': 6614528, 'steps': 12918, 'loss/train': 1.834671974182129} +02/24/2022 18:17:11 - INFO - codeparrot_training - Step 12919: {'lr': 0.00043883117662708404, 'samples': 6615040, 'steps': 12919, 'loss/train': 1.723826289176941} +02/24/2022 18:17:16 - INFO - codeparrot_training - Step 12920: {'lr': 0.0004388204530712959, 'samples': 6615552, 'steps': 12920, 'loss/train': 1.9067957401275635} +02/24/2022 18:17:20 - INFO - codeparrot_training - Step 12921: {'lr': 0.00043880972870666084, 'samples': 6616064, 'steps': 12921, 'loss/train': 1.8042864799499512} +02/24/2022 18:17:26 - INFO - codeparrot_training - Step 12922: {'lr': 0.0004387990035332249, 'samples': 6616576, 'steps': 12922, 'loss/train': 3.0477044582366943} +02/24/2022 18:17:30 - INFO - codeparrot_training - Step 12923: {'lr': 0.00043878827755103404, 'samples': 6617088, 'steps': 12923, 'loss/train': 2.090799331665039} +02/24/2022 18:17:35 - INFO - codeparrot_training - Step 12924: {'lr': 0.00043877755076013406, 'samples': 6617600, 'steps': 12924, 'loss/train': 1.9760048389434814} +02/24/2022 18:17:39 - INFO - codeparrot_training - Step 12925: {'lr': 0.00043876682316057095, 'samples': 6618112, 'steps': 12925, 'loss/train': 2.9608991146087646} +02/24/2022 18:17:44 - INFO - codeparrot_training - Step 12926: {'lr': 0.0004387560947523908, 'samples': 6618624, 'steps': 12926, 'loss/train': 2.5437731742858887} +02/24/2022 18:17:48 - INFO - codeparrot_training - Step 12927: {'lr': 0.0004387453655356394, 'samples': 6619136, 'steps': 12927, 'loss/train': 2.234454870223999} +02/24/2022 18:17:53 - INFO - codeparrot_training - Step 12928: {'lr': 0.00043873463551036284, 'samples': 6619648, 'steps': 12928, 'loss/train': 0.7627431750297546} +02/24/2022 18:17:57 - INFO - codeparrot_training - Step 12929: {'lr': 0.000438723904676607, 'samples': 6620160, 'steps': 12929, 'loss/train': 1.6176753044128418} +02/24/2022 18:18:02 - INFO - codeparrot_training - Step 12930: {'lr': 0.0004387131730344179, 'samples': 6620672, 'steps': 12930, 'loss/train': 2.3379111289978027} +02/24/2022 18:18:06 - INFO - codeparrot_training - Step 12931: {'lr': 0.00043870244058384145, 'samples': 6621184, 'steps': 12931, 'loss/train': 1.8111900091171265} +02/24/2022 18:18:12 - INFO - codeparrot_training - Step 12932: {'lr': 0.0004386917073249237, 'samples': 6621696, 'steps': 12932, 'loss/train': 1.5553728342056274} +02/24/2022 18:18:15 - INFO - codeparrot_training - Step 12933: {'lr': 0.00043868097325771064, 'samples': 6622208, 'steps': 12933, 'loss/train': 2.6121296882629395} +02/24/2022 18:18:21 - INFO - codeparrot_training - Step 12934: {'lr': 0.0004386702383822482, 'samples': 6622720, 'steps': 12934, 'loss/train': 2.1837029457092285} +02/24/2022 18:18:24 - INFO - codeparrot_training - Step 12935: {'lr': 0.00043865950269858224, 'samples': 6623232, 'steps': 12935, 'loss/train': 0.8351127505302429} +02/24/2022 18:18:30 - INFO - codeparrot_training - Step 12936: {'lr': 0.000438648766206759, 'samples': 6623744, 'steps': 12936, 'loss/train': 2.213078022003174} +02/24/2022 18:18:33 - INFO - codeparrot_training - Step 12937: {'lr': 0.0004386380289068243, 'samples': 6624256, 'steps': 12937, 'loss/train': 1.1169012784957886} +02/24/2022 18:18:39 - INFO - codeparrot_training - Step 12938: {'lr': 0.0004386272907988242, 'samples': 6624768, 'steps': 12938, 'loss/train': 1.2164520025253296} +02/24/2022 18:18:42 - INFO - codeparrot_training - Step 12939: {'lr': 0.0004386165518828047, 'samples': 6625280, 'steps': 12939, 'loss/train': 1.025045394897461} +02/24/2022 18:18:48 - INFO - codeparrot_training - Step 12940: {'lr': 0.0004386058121588117, 'samples': 6625792, 'steps': 12940, 'loss/train': 1.1823631525039673} +02/24/2022 18:18:51 - INFO - codeparrot_training - Step 12941: {'lr': 0.0004385950716268914, 'samples': 6626304, 'steps': 12941, 'loss/train': 1.8581124544143677} +02/24/2022 18:18:58 - INFO - codeparrot_training - Step 12942: {'lr': 0.0004385843302870896, 'samples': 6626816, 'steps': 12942, 'loss/train': 1.457020878791809} +02/24/2022 18:19:01 - INFO - codeparrot_training - Step 12943: {'lr': 0.0004385735881394525, 'samples': 6627328, 'steps': 12943, 'loss/train': 1.3479233980178833} +02/24/2022 18:19:07 - INFO - codeparrot_training - Step 12944: {'lr': 0.00043856284518402594, 'samples': 6627840, 'steps': 12944, 'loss/train': 2.614276885986328} +02/24/2022 18:19:10 - INFO - codeparrot_training - Step 12945: {'lr': 0.00043855210142085613, 'samples': 6628352, 'steps': 12945, 'loss/train': 1.8227146863937378} +02/24/2022 18:19:16 - INFO - codeparrot_training - Step 12946: {'lr': 0.00043854135684998893, 'samples': 6628864, 'steps': 12946, 'loss/train': 2.0301523208618164} +02/24/2022 18:19:21 - INFO - codeparrot_training - Step 12947: {'lr': 0.0004385306114714704, 'samples': 6629376, 'steps': 12947, 'loss/train': 1.6931921243667603} +02/24/2022 18:19:25 - INFO - codeparrot_training - Step 12948: {'lr': 0.0004385198652853466, 'samples': 6629888, 'steps': 12948, 'loss/train': 1.820383071899414} +02/24/2022 18:19:28 - INFO - codeparrot_training - Step 12949: {'lr': 0.00043850911829166364, 'samples': 6630400, 'steps': 12949, 'loss/train': 1.475953459739685} +02/24/2022 18:19:34 - INFO - codeparrot_training - Step 12950: {'lr': 0.00043849837049046735, 'samples': 6630912, 'steps': 12950, 'loss/train': 2.13820743560791} +02/24/2022 18:19:37 - INFO - codeparrot_training - Step 12951: {'lr': 0.000438487621881804, 'samples': 6631424, 'steps': 12951, 'loss/train': 2.1887857913970947} +02/24/2022 18:19:44 - INFO - codeparrot_training - Step 12952: {'lr': 0.00043847687246571955, 'samples': 6631936, 'steps': 12952, 'loss/train': 1.4776455163955688} +02/24/2022 18:19:48 - INFO - codeparrot_training - Step 12953: {'lr': 0.0004384661222422599, 'samples': 6632448, 'steps': 12953, 'loss/train': 0.7721335291862488} +02/24/2022 18:19:53 - INFO - codeparrot_training - Step 12954: {'lr': 0.00043845537121147126, 'samples': 6632960, 'steps': 12954, 'loss/train': 0.9670889973640442} +02/24/2022 18:19:59 - INFO - codeparrot_training - Step 12955: {'lr': 0.00043844461937339976, 'samples': 6633472, 'steps': 12955, 'loss/train': 2.7996771335601807} +02/24/2022 18:20:02 - INFO - codeparrot_training - Step 12956: {'lr': 0.00043843386672809127, 'samples': 6633984, 'steps': 12956, 'loss/train': 0.17765621840953827} +02/24/2022 18:20:08 - INFO - codeparrot_training - Step 12957: {'lr': 0.00043842311327559194, 'samples': 6634496, 'steps': 12957, 'loss/train': 2.5870721340179443} +02/24/2022 18:20:11 - INFO - codeparrot_training - Step 12958: {'lr': 0.0004384123590159478, 'samples': 6635008, 'steps': 12958, 'loss/train': 2.4279298782348633} +02/24/2022 18:20:15 - INFO - codeparrot_training - Step 12959: {'lr': 0.000438401603949205, 'samples': 6635520, 'steps': 12959, 'loss/train': 1.7032833099365234} +02/24/2022 18:20:20 - INFO - codeparrot_training - Step 12960: {'lr': 0.0004383908480754095, 'samples': 6636032, 'steps': 12960, 'loss/train': 2.8009588718414307} +02/24/2022 18:20:24 - INFO - codeparrot_training - Step 12961: {'lr': 0.0004383800913946074, 'samples': 6636544, 'steps': 12961, 'loss/train': 2.1121156215667725} +02/24/2022 18:20:29 - INFO - codeparrot_training - Step 12962: {'lr': 0.00043836933390684486, 'samples': 6637056, 'steps': 12962, 'loss/train': 1.0790863037109375} +02/24/2022 18:20:33 - INFO - codeparrot_training - Step 12963: {'lr': 0.0004383585756121679, 'samples': 6637568, 'steps': 12963, 'loss/train': 2.1460609436035156} +02/24/2022 18:20:38 - INFO - codeparrot_training - Step 12964: {'lr': 0.00043834781651062263, 'samples': 6638080, 'steps': 12964, 'loss/train': 2.0029067993164062} +02/24/2022 18:20:42 - INFO - codeparrot_training - Step 12965: {'lr': 0.00043833705660225507, 'samples': 6638592, 'steps': 12965, 'loss/train': 1.537346363067627} +02/24/2022 18:20:47 - INFO - codeparrot_training - Step 12966: {'lr': 0.0004383262958871114, 'samples': 6639104, 'steps': 12966, 'loss/train': 1.7548096179962158} +02/24/2022 18:20:53 - INFO - codeparrot_training - Step 12967: {'lr': 0.0004383155343652377, 'samples': 6639616, 'steps': 12967, 'loss/train': 2.1672027111053467} +02/24/2022 18:20:56 - INFO - codeparrot_training - Step 12968: {'lr': 0.00043830477203668, 'samples': 6640128, 'steps': 12968, 'loss/train': 1.9574977159500122} +02/24/2022 18:21:03 - INFO - codeparrot_training - Step 12969: {'lr': 0.00043829400890148446, 'samples': 6640640, 'steps': 12969, 'loss/train': 1.378501296043396} +02/24/2022 18:21:06 - INFO - codeparrot_training - Step 12970: {'lr': 0.0004382832449596972, 'samples': 6641152, 'steps': 12970, 'loss/train': 1.8503460884094238} +02/24/2022 18:21:12 - INFO - codeparrot_training - Step 12971: {'lr': 0.0004382724802113643, 'samples': 6641664, 'steps': 12971, 'loss/train': 1.3277899026870728} +02/24/2022 18:21:15 - INFO - codeparrot_training - Step 12972: {'lr': 0.0004382617146565319, 'samples': 6642176, 'steps': 12972, 'loss/train': 2.1431236267089844} +02/24/2022 18:21:21 - INFO - codeparrot_training - Step 12973: {'lr': 0.00043825094829524604, 'samples': 6642688, 'steps': 12973, 'loss/train': 2.597724676132202} +02/24/2022 18:21:24 - INFO - codeparrot_training - Step 12974: {'lr': 0.0004382401811275529, 'samples': 6643200, 'steps': 12974, 'loss/train': 0.6234491467475891} +02/24/2022 18:21:29 - INFO - codeparrot_training - Step 12975: {'lr': 0.0004382294131534986, 'samples': 6643712, 'steps': 12975, 'loss/train': 1.8240588903427124} +02/24/2022 18:21:33 - INFO - codeparrot_training - Step 12976: {'lr': 0.00043821864437312933, 'samples': 6644224, 'steps': 12976, 'loss/train': 2.4910523891448975} +02/24/2022 18:21:38 - INFO - codeparrot_training - Step 12977: {'lr': 0.00043820787478649105, 'samples': 6644736, 'steps': 12977, 'loss/train': 1.7990282773971558} +02/24/2022 18:21:42 - INFO - codeparrot_training - Step 12978: {'lr': 0.00043819710439363, 'samples': 6645248, 'steps': 12978, 'loss/train': 2.0579190254211426} +02/24/2022 18:21:48 - INFO - codeparrot_training - Step 12979: {'lr': 0.00043818633319459244, 'samples': 6645760, 'steps': 12979, 'loss/train': 2.8401296138763428} +02/24/2022 18:21:51 - INFO - codeparrot_training - Step 12980: {'lr': 0.00043817556118942426, 'samples': 6646272, 'steps': 12980, 'loss/train': 2.048617124557495} +02/24/2022 18:21:57 - INFO - codeparrot_training - Step 12981: {'lr': 0.00043816478837817183, 'samples': 6646784, 'steps': 12981, 'loss/train': 1.5831965208053589} +02/24/2022 18:22:00 - INFO - codeparrot_training - Step 12982: {'lr': 0.0004381540147608811, 'samples': 6647296, 'steps': 12982, 'loss/train': 1.0544679164886475} +02/24/2022 18:22:06 - INFO - codeparrot_training - Step 12983: {'lr': 0.00043814324033759834, 'samples': 6647808, 'steps': 12983, 'loss/train': 1.7173771858215332} +02/24/2022 18:22:09 - INFO - codeparrot_training - Step 12984: {'lr': 0.0004381324651083697, 'samples': 6648320, 'steps': 12984, 'loss/train': 1.8546584844589233} +02/24/2022 18:22:15 - INFO - codeparrot_training - Step 12985: {'lr': 0.00043812168907324137, 'samples': 6648832, 'steps': 12985, 'loss/train': 2.1783199310302734} +02/24/2022 18:22:18 - INFO - codeparrot_training - Step 12986: {'lr': 0.0004381109122322594, 'samples': 6649344, 'steps': 12986, 'loss/train': 0.3237724006175995} +02/24/2022 18:22:25 - INFO - codeparrot_training - Step 12987: {'lr': 0.00043810013458547007, 'samples': 6649856, 'steps': 12987, 'loss/train': 2.743412971496582} +02/24/2022 18:22:28 - INFO - codeparrot_training - Step 12988: {'lr': 0.00043808935613291934, 'samples': 6650368, 'steps': 12988, 'loss/train': 1.7829055786132812} +02/24/2022 18:22:34 - INFO - codeparrot_training - Step 12989: {'lr': 0.0004380785768746537, 'samples': 6650880, 'steps': 12989, 'loss/train': 1.761879563331604} +02/24/2022 18:22:37 - INFO - codeparrot_training - Step 12990: {'lr': 0.00043806779681071907, 'samples': 6651392, 'steps': 12990, 'loss/train': 1.6081982851028442} +02/24/2022 18:22:43 - INFO - codeparrot_training - Step 12991: {'lr': 0.00043805701594116175, 'samples': 6651904, 'steps': 12991, 'loss/train': 1.6534076929092407} +02/24/2022 18:22:46 - INFO - codeparrot_training - Step 12992: {'lr': 0.00043804623426602784, 'samples': 6652416, 'steps': 12992, 'loss/train': 1.8382694721221924} +02/24/2022 18:22:52 - INFO - codeparrot_training - Step 12993: {'lr': 0.00043803545178536365, 'samples': 6652928, 'steps': 12993, 'loss/train': 1.7728208303451538} +02/24/2022 18:22:55 - INFO - codeparrot_training - Step 12994: {'lr': 0.00043802466849921526, 'samples': 6653440, 'steps': 12994, 'loss/train': 1.5112247467041016} +02/24/2022 18:23:01 - INFO - codeparrot_training - Step 12995: {'lr': 0.0004380138844076289, 'samples': 6653952, 'steps': 12995, 'loss/train': 2.430936098098755} +02/24/2022 18:23:04 - INFO - codeparrot_training - Step 12996: {'lr': 0.00043800309951065076, 'samples': 6654464, 'steps': 12996, 'loss/train': 1.249114751815796} +02/24/2022 18:23:10 - INFO - codeparrot_training - Step 12997: {'lr': 0.000437992313808327, 'samples': 6654976, 'steps': 12997, 'loss/train': 0.6308334469795227} +02/24/2022 18:23:13 - INFO - codeparrot_training - Step 12998: {'lr': 0.0004379815273007039, 'samples': 6655488, 'steps': 12998, 'loss/train': 2.557718276977539} +02/24/2022 18:23:19 - INFO - codeparrot_training - Step 12999: {'lr': 0.0004379707399878276, 'samples': 6656000, 'steps': 12999, 'loss/train': 1.6013245582580566} +02/24/2022 18:23:19 - INFO - codeparrot_training - Evaluating and saving model checkpoint