diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -10361,3 +10361,1009 @@ Use FP16 precision: False 02/24/2022 14:30:17 - INFO - codeparrot_training - Step 9998: {'lr': 0.00046652271155291146, 'samples': 5119488, 'steps': 9998, 'loss/train': 2.2988455295562744} 02/24/2022 14:30:22 - INFO - codeparrot_training - Step 9999: {'lr': 0.0004665145317132503, 'samples': 5120000, 'steps': 9999, 'loss/train': 1.8172866106033325} 02/24/2022 14:30:22 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/24/2022 14:30:39 - WARNING - huggingface_hub.repository - Several commits (10) will be pushed upstream. +02/24/2022 14:30:39 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/24/2022 14:31:13 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + 8cd23e2..c2659cd floral-grass-11 -> floral-grass-11 + +02/24/2022 14:31:18 - INFO - codeparrot_training - Step 10000: {'lr': 0.00046650635094610973, 'samples': 5120512, 'steps': 10000, 'loss/train': 1.3753174543380737} +02/24/2022 14:31:23 - INFO - codeparrot_training - Step 10001: {'lr': 0.00046649816925152456, 'samples': 5121024, 'steps': 10001, 'loss/train': 2.441896438598633} +02/24/2022 14:31:27 - INFO - codeparrot_training - Step 10002: {'lr': 0.00046648998662953003, 'samples': 5121536, 'steps': 10002, 'loss/train': 1.613208293914795} +02/24/2022 14:31:30 - INFO - codeparrot_training - Step 10003: {'lr': 0.00046648180308016116, 'samples': 5122048, 'steps': 10003, 'loss/train': 3.103625774383545} +02/24/2022 14:31:36 - INFO - codeparrot_training - Step 10004: {'lr': 0.00046647361860345293, 'samples': 5122560, 'steps': 10004, 'loss/train': 2.8003487586975098} +02/24/2022 14:31:39 - INFO - codeparrot_training - Step 10005: {'lr': 0.00046646543319944057, 'samples': 5123072, 'steps': 10005, 'loss/train': 3.8220736980438232} +02/24/2022 14:31:46 - INFO - codeparrot_training - Step 10006: {'lr': 0.00046645724686815893, 'samples': 5123584, 'steps': 10006, 'loss/train': 2.9445724487304688} +02/24/2022 14:31:50 - INFO - codeparrot_training - Step 10007: {'lr': 0.00046644905960964325, 'samples': 5124096, 'steps': 10007, 'loss/train': 2.2180960178375244} +02/24/2022 14:31:55 - INFO - codeparrot_training - Step 10008: {'lr': 0.00046644087142392845, 'samples': 5124608, 'steps': 10008, 'loss/train': 2.4489901065826416} +02/24/2022 14:32:01 - INFO - codeparrot_training - Step 10009: {'lr': 0.00046643268231104975, 'samples': 5125120, 'steps': 10009, 'loss/train': 1.7591034173965454} +02/24/2022 14:32:04 - INFO - codeparrot_training - Step 10010: {'lr': 0.00046642449227104213, 'samples': 5125632, 'steps': 10010, 'loss/train': 2.5898399353027344} +02/24/2022 14:32:10 - INFO - codeparrot_training - Step 10011: {'lr': 0.00046641630130394066, 'samples': 5126144, 'steps': 10011, 'loss/train': 1.4844536781311035} +02/24/2022 14:32:13 - INFO - codeparrot_training - Step 10012: {'lr': 0.0004664081094097805, 'samples': 5126656, 'steps': 10012, 'loss/train': 2.326171398162842} +02/24/2022 14:32:19 - INFO - codeparrot_training - Step 10013: {'lr': 0.00046639991658859684, 'samples': 5127168, 'steps': 10013, 'loss/train': 1.8968192338943481} +02/24/2022 14:32:22 - INFO - codeparrot_training - Step 10014: {'lr': 0.00046639172284042453, 'samples': 5127680, 'steps': 10014, 'loss/train': 1.6029521226882935} +02/24/2022 14:32:29 - INFO - codeparrot_training - Step 10015: {'lr': 0.00046638352816529883, 'samples': 5128192, 'steps': 10015, 'loss/train': 1.8007721900939941} +02/24/2022 14:32:33 - INFO - codeparrot_training - Step 10016: {'lr': 0.00046637533256325476, 'samples': 5128704, 'steps': 10016, 'loss/train': 2.874091625213623} +02/24/2022 14:32:38 - INFO - codeparrot_training - Step 10017: {'lr': 0.0004663671360343275, 'samples': 5129216, 'steps': 10017, 'loss/train': 1.657929539680481} +02/24/2022 14:32:42 - INFO - codeparrot_training - Step 10018: {'lr': 0.00046635893857855217, 'samples': 5129728, 'steps': 10018, 'loss/train': 1.4173426628112793} +02/24/2022 14:32:47 - INFO - codeparrot_training - Step 10019: {'lr': 0.0004663507401959638, 'samples': 5130240, 'steps': 10019, 'loss/train': 2.41587233543396} +02/24/2022 14:32:51 - INFO - codeparrot_training - Step 10020: {'lr': 0.00046634254088659757, 'samples': 5130752, 'steps': 10020, 'loss/train': 2.928043842315674} +02/24/2022 14:32:56 - INFO - codeparrot_training - Step 10021: {'lr': 0.00046633434065048855, 'samples': 5131264, 'steps': 10021, 'loss/train': 2.5403943061828613} +02/24/2022 14:33:00 - INFO - codeparrot_training - Step 10022: {'lr': 0.000466326139487672, 'samples': 5131776, 'steps': 10022, 'loss/train': 2.6487104892730713} +02/24/2022 14:33:05 - INFO - codeparrot_training - Step 10023: {'lr': 0.0004663179373981829, 'samples': 5132288, 'steps': 10023, 'loss/train': 1.5769028663635254} +02/24/2022 14:33:09 - INFO - codeparrot_training - Step 10024: {'lr': 0.0004663097343820565, 'samples': 5132800, 'steps': 10024, 'loss/train': 1.0461530685424805} +02/24/2022 14:33:14 - INFO - codeparrot_training - Step 10025: {'lr': 0.00046630153043932784, 'samples': 5133312, 'steps': 10025, 'loss/train': 2.093100070953369} +02/24/2022 14:33:18 - INFO - codeparrot_training - Step 10026: {'lr': 0.00046629332557003215, 'samples': 5133824, 'steps': 10026, 'loss/train': 1.5797640085220337} +02/24/2022 14:33:23 - INFO - codeparrot_training - Step 10027: {'lr': 0.00046628511977420443, 'samples': 5134336, 'steps': 10027, 'loss/train': 2.1969242095947266} +02/24/2022 14:33:27 - INFO - codeparrot_training - Step 10028: {'lr': 0.00046627691305188004, 'samples': 5134848, 'steps': 10028, 'loss/train': 2.601287841796875} +02/24/2022 14:33:34 - INFO - codeparrot_training - Step 10029: {'lr': 0.00046626870540309394, 'samples': 5135360, 'steps': 10029, 'loss/train': 2.6741883754730225} +02/24/2022 14:33:38 - INFO - codeparrot_training - Step 10030: {'lr': 0.00046626049682788143, 'samples': 5135872, 'steps': 10030, 'loss/train': 1.5147557258605957} +02/24/2022 14:33:44 - INFO - codeparrot_training - Step 10031: {'lr': 0.00046625228732627763, 'samples': 5136384, 'steps': 10031, 'loss/train': 2.425189733505249} +02/24/2022 14:33:47 - INFO - codeparrot_training - Step 10032: {'lr': 0.00046624407689831773, 'samples': 5136896, 'steps': 10032, 'loss/train': 2.1255462169647217} +02/24/2022 14:33:53 - INFO - codeparrot_training - Step 10033: {'lr': 0.0004662358655440368, 'samples': 5137408, 'steps': 10033, 'loss/train': 2.444613456726074} +02/24/2022 14:33:56 - INFO - codeparrot_training - Step 10034: {'lr': 0.0004662276532634701, 'samples': 5137920, 'steps': 10034, 'loss/train': 2.3182966709136963} +02/24/2022 14:34:02 - INFO - codeparrot_training - Step 10035: {'lr': 0.0004662194400566528, 'samples': 5138432, 'steps': 10035, 'loss/train': 2.421915054321289} +02/24/2022 14:34:05 - INFO - codeparrot_training - Step 10036: {'lr': 0.0004662112259236201, 'samples': 5138944, 'steps': 10036, 'loss/train': 1.6606255769729614} +02/24/2022 14:34:11 - INFO - codeparrot_training - Step 10037: {'lr': 0.00046620301086440713, 'samples': 5139456, 'steps': 10037, 'loss/train': 2.000070810317993} +02/24/2022 14:34:14 - INFO - codeparrot_training - Step 10038: {'lr': 0.00046619479487904915, 'samples': 5139968, 'steps': 10038, 'loss/train': 1.8782694339752197} +02/24/2022 14:34:20 - INFO - codeparrot_training - Step 10039: {'lr': 0.0004661865779675813, 'samples': 5140480, 'steps': 10039, 'loss/train': 1.970636010169983} +02/24/2022 14:34:24 - INFO - codeparrot_training - Step 10040: {'lr': 0.0004661783601300388, 'samples': 5140992, 'steps': 10040, 'loss/train': 1.6922369003295898} +02/24/2022 14:34:31 - INFO - codeparrot_training - Step 10041: {'lr': 0.00046617014136645686, 'samples': 5141504, 'steps': 10041, 'loss/train': 2.4848549365997314} +02/24/2022 14:34:34 - INFO - codeparrot_training - Step 10042: {'lr': 0.00046616192167687066, 'samples': 5142016, 'steps': 10042, 'loss/train': 2.2349853515625} +02/24/2022 14:34:40 - INFO - codeparrot_training - Step 10043: {'lr': 0.00046615370106131536, 'samples': 5142528, 'steps': 10043, 'loss/train': 2.6010119915008545} +02/24/2022 14:34:43 - INFO - codeparrot_training - Step 10044: {'lr': 0.00046614547951982636, 'samples': 5143040, 'steps': 10044, 'loss/train': 2.7375640869140625} +02/24/2022 14:34:49 - INFO - codeparrot_training - Step 10045: {'lr': 0.00046613725705243873, 'samples': 5143552, 'steps': 10045, 'loss/train': 2.314051628112793} +02/24/2022 14:34:52 - INFO - codeparrot_training - Step 10046: {'lr': 0.0004661290336591877, 'samples': 5144064, 'steps': 10046, 'loss/train': 1.0697977542877197} +02/24/2022 14:34:58 - INFO - codeparrot_training - Step 10047: {'lr': 0.0004661208093401085, 'samples': 5144576, 'steps': 10047, 'loss/train': 2.632368564605713} +02/24/2022 14:35:01 - INFO - codeparrot_training - Step 10048: {'lr': 0.0004661125840952364, 'samples': 5145088, 'steps': 10048, 'loss/train': 3.0041842460632324} +02/24/2022 14:35:07 - INFO - codeparrot_training - Step 10049: {'lr': 0.0004661043579246066, 'samples': 5145600, 'steps': 10049, 'loss/train': 1.8701436519622803} +02/24/2022 14:35:10 - INFO - codeparrot_training - Step 10050: {'lr': 0.00046609613082825436, 'samples': 5146112, 'steps': 10050, 'loss/train': 3.2705018520355225} +02/24/2022 14:35:18 - INFO - codeparrot_training - Step 10051: {'lr': 0.00046608790280621494, 'samples': 5146624, 'steps': 10051, 'loss/train': 2.601288318634033} +02/24/2022 14:35:21 - INFO - codeparrot_training - Step 10052: {'lr': 0.0004660796738585235, 'samples': 5147136, 'steps': 10052, 'loss/train': 0.8518520593643188} +02/24/2022 14:35:26 - INFO - codeparrot_training - Step 10053: {'lr': 0.0004660714439852154, 'samples': 5147648, 'steps': 10053, 'loss/train': 1.528947114944458} +02/24/2022 14:35:30 - INFO - codeparrot_training - Step 10054: {'lr': 0.0004660632131863258, 'samples': 5148160, 'steps': 10054, 'loss/train': 1.620219111442566} +02/24/2022 14:35:35 - INFO - codeparrot_training - Step 10055: {'lr': 0.0004660549814618901, 'samples': 5148672, 'steps': 10055, 'loss/train': 2.8213744163513184} +02/24/2022 14:35:39 - INFO - codeparrot_training - Step 10056: {'lr': 0.00046604674881194335, 'samples': 5149184, 'steps': 10056, 'loss/train': 2.5623490810394287} +02/24/2022 14:35:44 - INFO - codeparrot_training - Step 10057: {'lr': 0.000466038515236521, 'samples': 5149696, 'steps': 10057, 'loss/train': 2.445793390274048} +02/24/2022 14:35:48 - INFO - codeparrot_training - Step 10058: {'lr': 0.0004660302807356582, 'samples': 5150208, 'steps': 10058, 'loss/train': 1.1359987258911133} +02/24/2022 14:35:53 - INFO - codeparrot_training - Step 10059: {'lr': 0.0004660220453093903, 'samples': 5150720, 'steps': 10059, 'loss/train': 0.461545467376709} +02/24/2022 14:35:57 - INFO - codeparrot_training - Step 10060: {'lr': 0.0004660138089577526, 'samples': 5151232, 'steps': 10060, 'loss/train': 2.7554819583892822} +02/24/2022 14:36:04 - INFO - codeparrot_training - Step 10061: {'lr': 0.00046600557168078026, 'samples': 5151744, 'steps': 10061, 'loss/train': 1.467819333076477} +02/24/2022 14:36:08 - INFO - codeparrot_training - Step 10062: {'lr': 0.0004659973334785087, 'samples': 5152256, 'steps': 10062, 'loss/train': 2.1805968284606934} +02/24/2022 14:36:13 - INFO - codeparrot_training - Step 10063: {'lr': 0.00046598909435097315, 'samples': 5152768, 'steps': 10063, 'loss/train': 2.4827444553375244} +02/24/2022 14:36:17 - INFO - codeparrot_training - Step 10064: {'lr': 0.0004659808542982088, 'samples': 5153280, 'steps': 10064, 'loss/train': 2.6353976726531982} +02/24/2022 14:36:22 - INFO - codeparrot_training - Step 10065: {'lr': 0.0004659726133202512, 'samples': 5153792, 'steps': 10065, 'loss/train': 1.5538828372955322} +02/24/2022 14:36:26 - INFO - codeparrot_training - Step 10066: {'lr': 0.0004659643714171354, 'samples': 5154304, 'steps': 10066, 'loss/train': 3.414149045944214} +02/24/2022 14:36:31 - INFO - codeparrot_training - Step 10067: {'lr': 0.00046595612858889686, 'samples': 5154816, 'steps': 10067, 'loss/train': 2.5281076431274414} +02/24/2022 14:36:35 - INFO - codeparrot_training - Step 10068: {'lr': 0.00046594788483557084, 'samples': 5155328, 'steps': 10068, 'loss/train': 2.7684803009033203} +02/24/2022 14:36:40 - INFO - codeparrot_training - Step 10069: {'lr': 0.00046593964015719257, 'samples': 5155840, 'steps': 10069, 'loss/train': 2.6864817142486572} +02/24/2022 14:36:44 - INFO - codeparrot_training - Step 10070: {'lr': 0.0004659313945537975, 'samples': 5156352, 'steps': 10070, 'loss/train': 2.4148175716400146} +02/24/2022 14:36:49 - INFO - codeparrot_training - Step 10071: {'lr': 0.00046592314802542095, 'samples': 5156864, 'steps': 10071, 'loss/train': 1.5033036470413208} +02/24/2022 14:36:53 - INFO - codeparrot_training - Step 10072: {'lr': 0.0004659149005720982, 'samples': 5157376, 'steps': 10072, 'loss/train': 1.4138095378875732} +02/24/2022 14:36:58 - INFO - codeparrot_training - Step 10073: {'lr': 0.00046590665219386454, 'samples': 5157888, 'steps': 10073, 'loss/train': 1.4330164194107056} +02/24/2022 14:37:02 - INFO - codeparrot_training - Step 10074: {'lr': 0.0004658984028907553, 'samples': 5158400, 'steps': 10074, 'loss/train': 1.749715805053711} +02/24/2022 14:37:09 - INFO - codeparrot_training - Step 10075: {'lr': 0.0004658901526628059, 'samples': 5158912, 'steps': 10075, 'loss/train': 2.8505806922912598} +02/24/2022 14:37:13 - INFO - codeparrot_training - Step 10076: {'lr': 0.00046588190151005163, 'samples': 5159424, 'steps': 10076, 'loss/train': 2.787666082382202} +02/24/2022 14:37:18 - INFO - codeparrot_training - Step 10077: {'lr': 0.00046587364943252783, 'samples': 5159936, 'steps': 10077, 'loss/train': 1.2507398128509521} +02/24/2022 14:37:22 - INFO - codeparrot_training - Step 10078: {'lr': 0.00046586539643026994, 'samples': 5160448, 'steps': 10078, 'loss/train': 3.095327854156494} +02/24/2022 14:37:27 - INFO - codeparrot_training - Step 10079: {'lr': 0.0004658571425033131, 'samples': 5160960, 'steps': 10079, 'loss/train': 2.252150774002075} +02/24/2022 14:37:31 - INFO - codeparrot_training - Step 10080: {'lr': 0.0004658488876516929, 'samples': 5161472, 'steps': 10080, 'loss/train': 0.7183483242988586} +02/24/2022 14:37:36 - INFO - codeparrot_training - Step 10081: {'lr': 0.0004658406318754446, 'samples': 5161984, 'steps': 10081, 'loss/train': 1.6840614080429077} +02/24/2022 14:37:40 - INFO - codeparrot_training - Step 10082: {'lr': 0.0004658323751746036, 'samples': 5162496, 'steps': 10082, 'loss/train': 1.894370675086975} +02/24/2022 14:37:45 - INFO - codeparrot_training - Step 10083: {'lr': 0.00046582411754920517, 'samples': 5163008, 'steps': 10083, 'loss/train': 2.1872527599334717} +02/24/2022 14:37:49 - INFO - codeparrot_training - Step 10084: {'lr': 0.0004658158589992848, 'samples': 5163520, 'steps': 10084, 'loss/train': 3.0928308963775635} +02/24/2022 14:37:54 - INFO - codeparrot_training - Step 10085: {'lr': 0.00046580759952487776, 'samples': 5164032, 'steps': 10085, 'loss/train': 1.4812055826187134} +02/24/2022 14:37:58 - INFO - codeparrot_training - Step 10086: {'lr': 0.00046579933912601956, 'samples': 5164544, 'steps': 10086, 'loss/train': 1.1863903999328613} +02/24/2022 14:38:05 - INFO - codeparrot_training - Step 10087: {'lr': 0.00046579107780274543, 'samples': 5165056, 'steps': 10087, 'loss/train': 1.3037912845611572} +02/24/2022 14:38:09 - INFO - codeparrot_training - Step 10088: {'lr': 0.00046578281555509094, 'samples': 5165568, 'steps': 10088, 'loss/train': 2.437044143676758} +02/24/2022 14:38:14 - INFO - codeparrot_training - Step 10089: {'lr': 0.0004657745523830914, 'samples': 5166080, 'steps': 10089, 'loss/train': 1.872549295425415} +02/24/2022 14:38:18 - INFO - codeparrot_training - Step 10090: {'lr': 0.0004657662882867821, 'samples': 5166592, 'steps': 10090, 'loss/train': 0.8082488775253296} +02/24/2022 14:38:23 - INFO - codeparrot_training - Step 10091: {'lr': 0.0004657580232661985, 'samples': 5167104, 'steps': 10091, 'loss/train': 1.8585642576217651} +02/24/2022 14:38:27 - INFO - codeparrot_training - Step 10092: {'lr': 0.00046574975732137613, 'samples': 5167616, 'steps': 10092, 'loss/train': 2.1034302711486816} +02/24/2022 14:38:32 - INFO - codeparrot_training - Step 10093: {'lr': 0.0004657414904523504, 'samples': 5168128, 'steps': 10093, 'loss/train': 2.5881094932556152} +02/24/2022 14:38:36 - INFO - codeparrot_training - Step 10094: {'lr': 0.0004657332226591565, 'samples': 5168640, 'steps': 10094, 'loss/train': 2.7554502487182617} +02/24/2022 14:38:41 - INFO - codeparrot_training - Step 10095: {'lr': 0.00046572495394183, 'samples': 5169152, 'steps': 10095, 'loss/train': 2.156113386154175} +02/24/2022 14:38:45 - INFO - codeparrot_training - Step 10096: {'lr': 0.00046571668430040624, 'samples': 5169664, 'steps': 10096, 'loss/train': 0.9854620099067688} +02/24/2022 14:38:52 - INFO - codeparrot_training - Step 10097: {'lr': 0.0004657084137349208, 'samples': 5170176, 'steps': 10097, 'loss/train': 1.3659499883651733} +02/24/2022 14:38:56 - INFO - codeparrot_training - Step 10098: {'lr': 0.0004657001422454089, 'samples': 5170688, 'steps': 10098, 'loss/train': 3.0307724475860596} +02/24/2022 14:39:01 - INFO - codeparrot_training - Step 10099: {'lr': 0.0004656918698319062, 'samples': 5171200, 'steps': 10099, 'loss/train': 1.0281710624694824} +02/24/2022 14:39:05 - INFO - codeparrot_training - Step 10100: {'lr': 0.00046568359649444796, 'samples': 5171712, 'steps': 10100, 'loss/train': 2.967120885848999} +02/24/2022 14:39:10 - INFO - codeparrot_training - Step 10101: {'lr': 0.0004656753222330697, 'samples': 5172224, 'steps': 10101, 'loss/train': 1.1926450729370117} +02/24/2022 14:39:14 - INFO - codeparrot_training - Step 10102: {'lr': 0.0004656670470478068, 'samples': 5172736, 'steps': 10102, 'loss/train': 2.2363133430480957} +02/24/2022 14:39:19 - INFO - codeparrot_training - Step 10103: {'lr': 0.0004656587709386948, 'samples': 5173248, 'steps': 10103, 'loss/train': 2.533914566040039} +02/24/2022 14:39:22 - INFO - codeparrot_training - Step 10104: {'lr': 0.00046565049390576906, 'samples': 5173760, 'steps': 10104, 'loss/train': 1.83980131149292} +02/24/2022 14:39:28 - INFO - codeparrot_training - Step 10105: {'lr': 0.0004656422159490652, 'samples': 5174272, 'steps': 10105, 'loss/train': 2.584303140640259} +02/24/2022 14:39:31 - INFO - codeparrot_training - Step 10106: {'lr': 0.00046563393706861847, 'samples': 5174784, 'steps': 10106, 'loss/train': 1.3872103691101074} +02/24/2022 14:39:37 - INFO - codeparrot_training - Step 10107: {'lr': 0.00046562565726446437, 'samples': 5175296, 'steps': 10107, 'loss/train': 2.090421676635742} +02/24/2022 14:39:40 - INFO - codeparrot_training - Step 10108: {'lr': 0.0004656173765366385, 'samples': 5175808, 'steps': 10108, 'loss/train': 2.775364637374878} +02/24/2022 14:39:46 - INFO - codeparrot_training - Step 10109: {'lr': 0.00046560909488517623, 'samples': 5176320, 'steps': 10109, 'loss/train': 3.0249388217926025} +02/24/2022 14:39:50 - INFO - codeparrot_training - Step 10110: {'lr': 0.0004656008123101131, 'samples': 5176832, 'steps': 10110, 'loss/train': 0.6997179985046387} +02/24/2022 14:39:56 - INFO - codeparrot_training - Step 10111: {'lr': 0.0004655925288114845, 'samples': 5177344, 'steps': 10111, 'loss/train': 1.3380553722381592} +02/24/2022 14:39:59 - INFO - codeparrot_training - Step 10112: {'lr': 0.000465584244389326, 'samples': 5177856, 'steps': 10112, 'loss/train': 2.2695019245147705} +02/24/2022 14:40:06 - INFO - codeparrot_training - Step 10113: {'lr': 0.000465575959043673, 'samples': 5178368, 'steps': 10113, 'loss/train': 0.9244392514228821} +02/24/2022 14:40:10 - INFO - codeparrot_training - Step 10114: {'lr': 0.0004655676727745611, 'samples': 5178880, 'steps': 10114, 'loss/train': 1.605212926864624} +02/24/2022 14:40:15 - INFO - codeparrot_training - Step 10115: {'lr': 0.0004655593855820257, 'samples': 5179392, 'steps': 10115, 'loss/train': 1.8432780504226685} +02/24/2022 14:40:19 - INFO - codeparrot_training - Step 10116: {'lr': 0.00046555109746610244, 'samples': 5179904, 'steps': 10116, 'loss/train': 1.3577485084533691} +02/24/2022 14:40:24 - INFO - codeparrot_training - Step 10117: {'lr': 0.0004655428084268266, 'samples': 5180416, 'steps': 10117, 'loss/train': 1.7021634578704834} +02/24/2022 14:40:28 - INFO - codeparrot_training - Step 10118: {'lr': 0.00046553451846423387, 'samples': 5180928, 'steps': 10118, 'loss/train': 2.499723434448242} +02/24/2022 14:40:33 - INFO - codeparrot_training - Step 10119: {'lr': 0.0004655262275783597, 'samples': 5181440, 'steps': 10119, 'loss/train': 0.8837375640869141} +02/24/2022 14:40:37 - INFO - codeparrot_training - Step 10120: {'lr': 0.00046551793576923964, 'samples': 5181952, 'steps': 10120, 'loss/train': 1.524060606956482} +02/24/2022 14:40:42 - INFO - codeparrot_training - Step 10121: {'lr': 0.0004655096430369091, 'samples': 5182464, 'steps': 10121, 'loss/train': 1.3317874670028687} +02/24/2022 14:40:46 - INFO - codeparrot_training - Step 10122: {'lr': 0.00046550134938140375, 'samples': 5182976, 'steps': 10122, 'loss/train': 1.8425887823104858} +02/24/2022 14:40:53 - INFO - codeparrot_training - Step 10123: {'lr': 0.00046549305480275894, 'samples': 5183488, 'steps': 10123, 'loss/train': 1.166218876838684} +02/24/2022 14:40:56 - INFO - codeparrot_training - Step 10124: {'lr': 0.0004654847593010104, 'samples': 5184000, 'steps': 10124, 'loss/train': 1.6765146255493164} +02/24/2022 14:41:02 - INFO - codeparrot_training - Step 10125: {'lr': 0.00046547646287619363, 'samples': 5184512, 'steps': 10125, 'loss/train': 4.011613368988037} +02/24/2022 14:41:05 - INFO - codeparrot_training - Step 10126: {'lr': 0.00046546816552834404, 'samples': 5185024, 'steps': 10126, 'loss/train': 2.2532076835632324} +02/24/2022 14:41:11 - INFO - codeparrot_training - Step 10127: {'lr': 0.00046545986725749725, 'samples': 5185536, 'steps': 10127, 'loss/train': 2.7270307540893555} +02/24/2022 14:41:14 - INFO - codeparrot_training - Step 10128: {'lr': 0.0004654515680636888, 'samples': 5186048, 'steps': 10128, 'loss/train': 2.2106196880340576} +02/24/2022 14:41:20 - INFO - codeparrot_training - Step 10129: {'lr': 0.00046544326794695424, 'samples': 5186560, 'steps': 10129, 'loss/train': 3.012795925140381} +02/24/2022 14:41:24 - INFO - codeparrot_training - Step 10130: {'lr': 0.00046543496690732914, 'samples': 5187072, 'steps': 10130, 'loss/train': 2.5087478160858154} +02/24/2022 14:41:29 - INFO - codeparrot_training - Step 10131: {'lr': 0.0004654266649448491, 'samples': 5187584, 'steps': 10131, 'loss/train': 2.468700885772705} +02/24/2022 14:41:33 - INFO - codeparrot_training - Step 10132: {'lr': 0.00046541836205954955, 'samples': 5188096, 'steps': 10132, 'loss/train': 1.8703206777572632} +02/24/2022 14:41:40 - INFO - codeparrot_training - Step 10133: {'lr': 0.0004654100582514662, 'samples': 5188608, 'steps': 10133, 'loss/train': 2.7259156703948975} +02/24/2022 14:41:43 - INFO - codeparrot_training - Step 10134: {'lr': 0.0004654017535206345, 'samples': 5189120, 'steps': 10134, 'loss/train': 1.6057307720184326} +02/24/2022 14:41:49 - INFO - codeparrot_training - Step 10135: {'lr': 0.00046539344786709013, 'samples': 5189632, 'steps': 10135, 'loss/train': 2.871105670928955} +02/24/2022 14:41:52 - INFO - codeparrot_training - Step 10136: {'lr': 0.0004653851412908686, 'samples': 5190144, 'steps': 10136, 'loss/train': 1.8268897533416748} +02/24/2022 14:41:58 - INFO - codeparrot_training - Step 10137: {'lr': 0.0004653768337920056, 'samples': 5190656, 'steps': 10137, 'loss/train': 2.5554840564727783} +02/24/2022 14:42:02 - INFO - codeparrot_training - Step 10138: {'lr': 0.00046536852537053654, 'samples': 5191168, 'steps': 10138, 'loss/train': 1.1800227165222168} +02/24/2022 14:42:07 - INFO - codeparrot_training - Step 10139: {'lr': 0.00046536021602649715, 'samples': 5191680, 'steps': 10139, 'loss/train': 2.837959051132202} +02/24/2022 14:42:10 - INFO - codeparrot_training - Step 10140: {'lr': 0.0004653519057599229, 'samples': 5192192, 'steps': 10140, 'loss/train': 2.6552610397338867} +02/24/2022 14:42:16 - INFO - codeparrot_training - Step 10141: {'lr': 0.0004653435945708496, 'samples': 5192704, 'steps': 10141, 'loss/train': 2.022085189819336} +02/24/2022 14:42:19 - INFO - codeparrot_training - Step 10142: {'lr': 0.00046533528245931266, 'samples': 5193216, 'steps': 10142, 'loss/train': 1.8562556505203247} +02/24/2022 14:42:25 - INFO - codeparrot_training - Step 10143: {'lr': 0.0004653269694253477, 'samples': 5193728, 'steps': 10143, 'loss/train': 0.3280055522918701} +02/24/2022 14:42:28 - INFO - codeparrot_training - Step 10144: {'lr': 0.00046531865546899044, 'samples': 5194240, 'steps': 10144, 'loss/train': 1.831819772720337} +02/24/2022 14:42:36 - INFO - codeparrot_training - Step 10145: {'lr': 0.00046531034059027644, 'samples': 5194752, 'steps': 10145, 'loss/train': 1.441911220550537} +02/24/2022 14:42:39 - INFO - codeparrot_training - Step 10146: {'lr': 0.0004653020247892412, 'samples': 5195264, 'steps': 10146, 'loss/train': 2.196563243865967} +02/24/2022 14:42:45 - INFO - codeparrot_training - Step 10147: {'lr': 0.0004652937080659206, 'samples': 5195776, 'steps': 10147, 'loss/train': 2.6439316272735596} +02/24/2022 14:42:48 - INFO - codeparrot_training - Step 10148: {'lr': 0.00046528539042035, 'samples': 5196288, 'steps': 10148, 'loss/train': 2.147430658340454} +02/24/2022 14:42:54 - INFO - codeparrot_training - Step 10149: {'lr': 0.0004652770718525652, 'samples': 5196800, 'steps': 10149, 'loss/train': 1.8699212074279785} +02/24/2022 14:42:57 - INFO - codeparrot_training - Step 10150: {'lr': 0.0004652687523626018, 'samples': 5197312, 'steps': 10150, 'loss/train': 2.5532984733581543} +02/24/2022 14:43:03 - INFO - codeparrot_training - Step 10151: {'lr': 0.0004652604319504954, 'samples': 5197824, 'steps': 10151, 'loss/train': 2.511273145675659} +02/24/2022 14:43:06 - INFO - codeparrot_training - Step 10152: {'lr': 0.00046525211061628163, 'samples': 5198336, 'steps': 10152, 'loss/train': 1.8393397331237793} +02/24/2022 14:43:12 - INFO - codeparrot_training - Step 10153: {'lr': 0.0004652437883599962, 'samples': 5198848, 'steps': 10153, 'loss/train': 1.9094717502593994} +02/24/2022 14:43:15 - INFO - codeparrot_training - Step 10154: {'lr': 0.0004652354651816747, 'samples': 5199360, 'steps': 10154, 'loss/train': 1.2962555885314941} +02/24/2022 14:43:21 - INFO - codeparrot_training - Step 10155: {'lr': 0.0004652271410813529, 'samples': 5199872, 'steps': 10155, 'loss/train': 1.813214898109436} +02/24/2022 14:43:24 - INFO - codeparrot_training - Step 10156: {'lr': 0.0004652188160590663, 'samples': 5200384, 'steps': 10156, 'loss/train': 1.9466413259506226} +02/24/2022 14:43:30 - INFO - codeparrot_training - Step 10157: {'lr': 0.00046521049011485064, 'samples': 5200896, 'steps': 10157, 'loss/train': 2.275838851928711} +02/24/2022 14:43:33 - INFO - codeparrot_training - Step 10158: {'lr': 0.0004652021632487415, 'samples': 5201408, 'steps': 10158, 'loss/train': 1.5771344900131226} +02/24/2022 14:43:40 - INFO - codeparrot_training - Step 10159: {'lr': 0.00046519383546077476, 'samples': 5201920, 'steps': 10159, 'loss/train': 0.6537622213363647} +02/24/2022 14:43:44 - INFO - codeparrot_training - Step 10160: {'lr': 0.0004651855067509859, 'samples': 5202432, 'steps': 10160, 'loss/train': 2.155850410461426} +02/24/2022 14:43:49 - INFO - codeparrot_training - Step 10161: {'lr': 0.00046517717711941066, 'samples': 5202944, 'steps': 10161, 'loss/train': 1.7206408977508545} +02/24/2022 14:43:53 - INFO - codeparrot_training - Step 10162: {'lr': 0.0004651688465660847, 'samples': 5203456, 'steps': 10162, 'loss/train': 2.2713823318481445} +02/24/2022 14:43:58 - INFO - codeparrot_training - Step 10163: {'lr': 0.00046516051509104376, 'samples': 5203968, 'steps': 10163, 'loss/train': 0.8373558521270752} +02/24/2022 14:44:02 - INFO - codeparrot_training - Step 10164: {'lr': 0.0004651521826943235, 'samples': 5204480, 'steps': 10164, 'loss/train': 2.706667423248291} +02/24/2022 14:44:07 - INFO - codeparrot_training - Step 10165: {'lr': 0.00046514384937595965, 'samples': 5204992, 'steps': 10165, 'loss/train': 2.2613797187805176} +02/24/2022 14:44:11 - INFO - codeparrot_training - Step 10166: {'lr': 0.0004651355151359878, 'samples': 5205504, 'steps': 10166, 'loss/train': 3.5909008979797363} +02/24/2022 14:44:16 - INFO - codeparrot_training - Step 10167: {'lr': 0.0004651271799744437, 'samples': 5206016, 'steps': 10167, 'loss/train': 2.2181339263916016} +02/24/2022 14:44:20 - INFO - codeparrot_training - Step 10168: {'lr': 0.0004651188438913631, 'samples': 5206528, 'steps': 10168, 'loss/train': 2.4214959144592285} +02/24/2022 14:44:25 - INFO - codeparrot_training - Step 10169: {'lr': 0.0004651105068867817, 'samples': 5207040, 'steps': 10169, 'loss/train': 2.7240946292877197} +02/24/2022 14:44:29 - INFO - codeparrot_training - Step 10170: {'lr': 0.00046510216896073517, 'samples': 5207552, 'steps': 10170, 'loss/train': 1.931358814239502} +02/24/2022 14:44:36 - INFO - codeparrot_training - Step 10171: {'lr': 0.00046509383011325925, 'samples': 5208064, 'steps': 10171, 'loss/train': 2.3203563690185547} +02/24/2022 14:44:40 - INFO - codeparrot_training - Step 10172: {'lr': 0.0004650854903443896, 'samples': 5208576, 'steps': 10172, 'loss/train': 2.6517884731292725} +02/24/2022 14:44:45 - INFO - codeparrot_training - Step 10173: {'lr': 0.0004650771496541621, 'samples': 5209088, 'steps': 10173, 'loss/train': 2.381789445877075} +02/24/2022 14:44:49 - INFO - codeparrot_training - Step 10174: {'lr': 0.0004650688080426123, 'samples': 5209600, 'steps': 10174, 'loss/train': 9.420103073120117} +02/24/2022 14:44:54 - INFO - codeparrot_training - Step 10175: {'lr': 0.0004650604655097761, 'samples': 5210112, 'steps': 10175, 'loss/train': 1.1089231967926025} +02/24/2022 14:45:00 - INFO - codeparrot_training - Step 10176: {'lr': 0.00046505212205568916, 'samples': 5210624, 'steps': 10176, 'loss/train': 1.0034704208374023} +02/24/2022 14:45:03 - INFO - codeparrot_training - Step 10177: {'lr': 0.0004650437776803872, 'samples': 5211136, 'steps': 10177, 'loss/train': 2.005133867263794} +02/24/2022 14:45:09 - INFO - codeparrot_training - Step 10178: {'lr': 0.00046503543238390595, 'samples': 5211648, 'steps': 10178, 'loss/train': 2.20976185798645} +02/24/2022 14:45:12 - INFO - codeparrot_training - Step 10179: {'lr': 0.0004650270861662812, 'samples': 5212160, 'steps': 10179, 'loss/train': 2.0330116748809814} +02/24/2022 14:45:20 - INFO - codeparrot_training - Step 10180: {'lr': 0.00046501873902754867, 'samples': 5212672, 'steps': 10180, 'loss/train': 1.8069636821746826} +02/24/2022 14:45:23 - INFO - codeparrot_training - Step 10181: {'lr': 0.00046501039096774415, 'samples': 5213184, 'steps': 10181, 'loss/train': 2.5174460411071777} +02/24/2022 14:45:29 - INFO - codeparrot_training - Step 10182: {'lr': 0.00046500204198690343, 'samples': 5213696, 'steps': 10182, 'loss/train': 2.5565450191497803} +02/24/2022 14:45:32 - INFO - codeparrot_training - Step 10183: {'lr': 0.0004649936920850622, 'samples': 5214208, 'steps': 10183, 'loss/train': 3.108539581298828} +02/24/2022 14:45:38 - INFO - codeparrot_training - Step 10184: {'lr': 0.00046498534126225625, 'samples': 5214720, 'steps': 10184, 'loss/train': 1.167891263961792} +02/24/2022 14:45:41 - INFO - codeparrot_training - Step 10185: {'lr': 0.0004649769895185214, 'samples': 5215232, 'steps': 10185, 'loss/train': 2.0358080863952637} +02/24/2022 14:45:46 - INFO - codeparrot_training - Step 10186: {'lr': 0.00046496863685389336, 'samples': 5215744, 'steps': 10186, 'loss/train': 3.45823335647583} +02/24/2022 14:45:50 - INFO - codeparrot_training - Step 10187: {'lr': 0.00046496028326840796, 'samples': 5216256, 'steps': 10187, 'loss/train': 2.4293813705444336} +02/24/2022 14:45:55 - INFO - codeparrot_training - Step 10188: {'lr': 0.000464951928762101, 'samples': 5216768, 'steps': 10188, 'loss/train': 1.1065857410430908} +02/24/2022 14:45:59 - INFO - codeparrot_training - Step 10189: {'lr': 0.00046494357333500816, 'samples': 5217280, 'steps': 10189, 'loss/train': 1.3676937818527222} +02/24/2022 14:46:07 - INFO - codeparrot_training - Step 10190: {'lr': 0.00046493521698716536, 'samples': 5217792, 'steps': 10190, 'loss/train': 1.7447195053100586} +02/24/2022 14:46:10 - INFO - codeparrot_training - Step 10191: {'lr': 0.00046492685971860826, 'samples': 5218304, 'steps': 10191, 'loss/train': 2.390455722808838} +02/24/2022 14:46:16 - INFO - codeparrot_training - Step 10192: {'lr': 0.00046491850152937276, 'samples': 5218816, 'steps': 10192, 'loss/train': 2.055619478225708} +02/24/2022 14:46:19 - INFO - codeparrot_training - Step 10193: {'lr': 0.0004649101424194947, 'samples': 5219328, 'steps': 10193, 'loss/train': 2.118504524230957} +02/24/2022 14:46:26 - INFO - codeparrot_training - Step 10194: {'lr': 0.0004649017823890098, 'samples': 5219840, 'steps': 10194, 'loss/train': 2.7681474685668945} +02/24/2022 14:46:29 - INFO - codeparrot_training - Step 10195: {'lr': 0.0004648934214379539, 'samples': 5220352, 'steps': 10195, 'loss/train': 2.295779228210449} +02/24/2022 14:46:33 - INFO - codeparrot_training - Step 10196: {'lr': 0.00046488505956636286, 'samples': 5220864, 'steps': 10196, 'loss/train': 2.5109503269195557} +02/24/2022 14:46:38 - INFO - codeparrot_training - Step 10197: {'lr': 0.00046487669677427237, 'samples': 5221376, 'steps': 10197, 'loss/train': 0.6117112040519714} +02/24/2022 14:46:42 - INFO - codeparrot_training - Step 10198: {'lr': 0.0004648683330617184, 'samples': 5221888, 'steps': 10198, 'loss/train': 2.5975136756896973} +02/24/2022 14:46:47 - INFO - codeparrot_training - Step 10199: {'lr': 0.00046485996842873676, 'samples': 5222400, 'steps': 10199, 'loss/train': 1.9540332555770874} +02/24/2022 14:46:51 - INFO - codeparrot_training - Step 10200: {'lr': 0.0004648516028753632, 'samples': 5222912, 'steps': 10200, 'loss/train': 2.2206778526306152} +02/24/2022 14:46:57 - INFO - codeparrot_training - Step 10201: {'lr': 0.00046484323640163356, 'samples': 5223424, 'steps': 10201, 'loss/train': 2.2316691875457764} +02/24/2022 14:47:00 - INFO - codeparrot_training - Step 10202: {'lr': 0.00046483486900758374, 'samples': 5223936, 'steps': 10202, 'loss/train': 1.1625144481658936} +02/24/2022 14:47:06 - INFO - codeparrot_training - Step 10203: {'lr': 0.0004648265006932496, 'samples': 5224448, 'steps': 10203, 'loss/train': 2.249019145965576} +02/24/2022 14:47:09 - INFO - codeparrot_training - Step 10204: {'lr': 0.0004648181314586669, 'samples': 5224960, 'steps': 10204, 'loss/train': 2.6625969409942627} +02/24/2022 14:47:17 - INFO - codeparrot_training - Step 10205: {'lr': 0.00046480976130387156, 'samples': 5225472, 'steps': 10205, 'loss/train': 2.237865686416626} +02/24/2022 14:47:20 - INFO - codeparrot_training - Step 10206: {'lr': 0.0004648013902288994, 'samples': 5225984, 'steps': 10206, 'loss/train': 3.1730339527130127} +02/24/2022 14:47:26 - INFO - codeparrot_training - Step 10207: {'lr': 0.0004647930182337863, 'samples': 5226496, 'steps': 10207, 'loss/train': 2.2114531993865967} +02/24/2022 14:47:29 - INFO - codeparrot_training - Step 10208: {'lr': 0.0004647846453185681, 'samples': 5227008, 'steps': 10208, 'loss/train': 3.4303669929504395} +02/24/2022 14:47:35 - INFO - codeparrot_training - Step 10209: {'lr': 0.0004647762714832807, 'samples': 5227520, 'steps': 10209, 'loss/train': 2.6461150646209717} +02/24/2022 14:47:38 - INFO - codeparrot_training - Step 10210: {'lr': 0.00046476789672795994, 'samples': 5228032, 'steps': 10210, 'loss/train': 1.9035426378250122} +02/24/2022 14:47:44 - INFO - codeparrot_training - Step 10211: {'lr': 0.00046475952105264176, 'samples': 5228544, 'steps': 10211, 'loss/train': 1.794641137123108} +02/24/2022 14:47:47 - INFO - codeparrot_training - Step 10212: {'lr': 0.0004647511444573619, 'samples': 5229056, 'steps': 10212, 'loss/train': 3.3274474143981934} +02/24/2022 14:47:53 - INFO - codeparrot_training - Step 10213: {'lr': 0.00046474276694215635, 'samples': 5229568, 'steps': 10213, 'loss/train': 1.993664026260376} +02/24/2022 14:47:56 - INFO - codeparrot_training - Step 10214: {'lr': 0.000464734388507061, 'samples': 5230080, 'steps': 10214, 'loss/train': 2.331465005874634} +02/24/2022 14:48:03 - INFO - codeparrot_training - Step 10215: {'lr': 0.00046472600915211174, 'samples': 5230592, 'steps': 10215, 'loss/train': 2.0613880157470703} +02/24/2022 14:48:06 - INFO - codeparrot_training - Step 10216: {'lr': 0.00046471762887734437, 'samples': 5231104, 'steps': 10216, 'loss/train': 3.046858072280884} +02/24/2022 14:48:12 - INFO - codeparrot_training - Step 10217: {'lr': 0.0004647092476827949, 'samples': 5231616, 'steps': 10217, 'loss/train': 1.8909368515014648} +02/24/2022 14:48:15 - INFO - codeparrot_training - Step 10218: {'lr': 0.0004647008655684992, 'samples': 5232128, 'steps': 10218, 'loss/train': 2.1916615962982178} +02/24/2022 14:48:21 - INFO - codeparrot_training - Step 10219: {'lr': 0.00046469248253449316, 'samples': 5232640, 'steps': 10219, 'loss/train': 2.1193439960479736} +02/24/2022 14:48:24 - INFO - codeparrot_training - Step 10220: {'lr': 0.0004646840985808126, 'samples': 5233152, 'steps': 10220, 'loss/train': 1.7376508712768555} +02/24/2022 14:48:30 - INFO - codeparrot_training - Step 10221: {'lr': 0.00046467571370749366, 'samples': 5233664, 'steps': 10221, 'loss/train': 1.3292498588562012} +02/24/2022 14:48:33 - INFO - codeparrot_training - Step 10222: {'lr': 0.0004646673279145721, 'samples': 5234176, 'steps': 10222, 'loss/train': 2.7708213329315186} +02/24/2022 14:48:39 - INFO - codeparrot_training - Step 10223: {'lr': 0.00046465894120208384, 'samples': 5234688, 'steps': 10223, 'loss/train': 1.8738151788711548} +02/24/2022 14:48:42 - INFO - codeparrot_training - Step 10224: {'lr': 0.00046465055357006494, 'samples': 5235200, 'steps': 10224, 'loss/train': 1.7299456596374512} +02/24/2022 14:48:48 - INFO - codeparrot_training - Step 10225: {'lr': 0.00046464216501855104, 'samples': 5235712, 'steps': 10225, 'loss/train': 3.709348440170288} +02/24/2022 14:48:52 - INFO - codeparrot_training - Step 10226: {'lr': 0.0004646337755475784, 'samples': 5236224, 'steps': 10226, 'loss/train': 1.841853380203247} +02/24/2022 14:48:58 - INFO - codeparrot_training - Step 10227: {'lr': 0.00046462538515718276, 'samples': 5236736, 'steps': 10227, 'loss/train': 2.1630375385284424} +02/24/2022 14:49:01 - INFO - codeparrot_training - Step 10228: {'lr': 0.0004646169938474002, 'samples': 5237248, 'steps': 10228, 'loss/train': 6.674635887145996} +02/24/2022 14:49:07 - INFO - codeparrot_training - Step 10229: {'lr': 0.0004646086016182666, 'samples': 5237760, 'steps': 10229, 'loss/train': 2.4645204544067383} +02/24/2022 14:49:10 - INFO - codeparrot_training - Step 10230: {'lr': 0.00046460020846981776, 'samples': 5238272, 'steps': 10230, 'loss/train': 2.987922191619873} +02/24/2022 14:49:16 - INFO - codeparrot_training - Step 10231: {'lr': 0.00046459181440208986, 'samples': 5238784, 'steps': 10231, 'loss/train': 5.388568878173828} +02/24/2022 14:49:19 - INFO - codeparrot_training - Step 10232: {'lr': 0.0004645834194151187, 'samples': 5239296, 'steps': 10232, 'loss/train': 1.9100579023361206} +02/24/2022 14:49:25 - INFO - codeparrot_training - Step 10233: {'lr': 0.00046457502350894046, 'samples': 5239808, 'steps': 10233, 'loss/train': 2.3590855598449707} +02/24/2022 14:49:28 - INFO - codeparrot_training - Step 10234: {'lr': 0.0004645666266835908, 'samples': 5240320, 'steps': 10234, 'loss/train': 1.1606158018112183} +02/24/2022 14:49:34 - INFO - codeparrot_training - Step 10235: {'lr': 0.0004645582289391059, 'samples': 5240832, 'steps': 10235, 'loss/train': 2.213454484939575} +02/24/2022 14:49:37 - INFO - codeparrot_training - Step 10236: {'lr': 0.00046454983027552165, 'samples': 5241344, 'steps': 10236, 'loss/train': 0.4401305317878723} +02/24/2022 14:49:44 - INFO - codeparrot_training - Step 10237: {'lr': 0.0004645414306928741, 'samples': 5241856, 'steps': 10237, 'loss/train': 1.0115946531295776} +02/24/2022 14:49:47 - INFO - codeparrot_training - Step 10238: {'lr': 0.0004645330301911992, 'samples': 5242368, 'steps': 10238, 'loss/train': 1.8077300786972046} +02/24/2022 14:49:53 - INFO - codeparrot_training - Step 10239: {'lr': 0.0004645246287705329, 'samples': 5242880, 'steps': 10239, 'loss/train': 1.8756779432296753} +02/24/2022 14:49:56 - INFO - codeparrot_training - Step 10240: {'lr': 0.0004645162264309112, 'samples': 5243392, 'steps': 10240, 'loss/train': 1.9027059078216553} +02/24/2022 14:50:02 - INFO - codeparrot_training - Step 10241: {'lr': 0.0004645078231723701, 'samples': 5243904, 'steps': 10241, 'loss/train': 1.5016604661941528} +02/24/2022 14:50:05 - INFO - codeparrot_training - Step 10242: {'lr': 0.0004644994189949455, 'samples': 5244416, 'steps': 10242, 'loss/train': 1.804394006729126} +02/24/2022 14:50:11 - INFO - codeparrot_training - Step 10243: {'lr': 0.00046449101389867364, 'samples': 5244928, 'steps': 10243, 'loss/train': 2.33488392829895} +02/24/2022 14:50:14 - INFO - codeparrot_training - Step 10244: {'lr': 0.0004644826078835903, 'samples': 5245440, 'steps': 10244, 'loss/train': 1.6047513484954834} +02/24/2022 14:50:20 - INFO - codeparrot_training - Step 10245: {'lr': 0.00046447420094973167, 'samples': 5245952, 'steps': 10245, 'loss/train': 1.8958096504211426} +02/24/2022 14:50:23 - INFO - codeparrot_training - Step 10246: {'lr': 0.0004644657930971336, 'samples': 5246464, 'steps': 10246, 'loss/train': 2.2004952430725098} +02/24/2022 14:50:29 - INFO - codeparrot_training - Step 10247: {'lr': 0.00046445738432583216, 'samples': 5246976, 'steps': 10247, 'loss/train': 1.1674318313598633} +02/24/2022 14:50:32 - INFO - codeparrot_training - Step 10248: {'lr': 0.00046444897463586345, 'samples': 5247488, 'steps': 10248, 'loss/train': 2.0538482666015625} +02/24/2022 14:50:38 - INFO - codeparrot_training - Step 10249: {'lr': 0.00046444056402726336, 'samples': 5248000, 'steps': 10249, 'loss/train': 1.1609246730804443} +02/24/2022 14:50:41 - INFO - codeparrot_training - Step 10250: {'lr': 0.00046443215250006805, 'samples': 5248512, 'steps': 10250, 'loss/train': 2.9854023456573486} +02/24/2022 14:50:48 - INFO - codeparrot_training - Step 10251: {'lr': 0.00046442374005431345, 'samples': 5249024, 'steps': 10251, 'loss/train': 2.560157060623169} +02/24/2022 14:50:51 - INFO - codeparrot_training - Step 10252: {'lr': 0.0004644153266900356, 'samples': 5249536, 'steps': 10252, 'loss/train': 2.1460869312286377} +02/24/2022 14:50:55 - INFO - codeparrot_training - Step 10253: {'lr': 0.0004644069124072706, 'samples': 5250048, 'steps': 10253, 'loss/train': 2.3790555000305176} +02/24/2022 14:51:00 - INFO - codeparrot_training - Step 10254: {'lr': 0.0004643984972060545, 'samples': 5250560, 'steps': 10254, 'loss/train': 1.6774470806121826} +02/24/2022 14:51:04 - INFO - codeparrot_training - Step 10255: {'lr': 0.00046439008108642335, 'samples': 5251072, 'steps': 10255, 'loss/train': 1.579734444618225} +02/24/2022 14:51:09 - INFO - codeparrot_training - Step 10256: {'lr': 0.0004643816640484131, 'samples': 5251584, 'steps': 10256, 'loss/train': 2.8604841232299805} +02/24/2022 14:51:13 - INFO - codeparrot_training - Step 10257: {'lr': 0.0004643732460920599, 'samples': 5252096, 'steps': 10257, 'loss/train': 1.7653487920761108} +02/24/2022 14:51:18 - INFO - codeparrot_training - Step 10258: {'lr': 0.00046436482721739976, 'samples': 5252608, 'steps': 10258, 'loss/train': 2.517033815383911} +02/24/2022 14:51:24 - INFO - codeparrot_training - Step 10259: {'lr': 0.00046435640742446875, 'samples': 5253120, 'steps': 10259, 'loss/train': 3.2435221672058105} +02/24/2022 14:51:27 - INFO - codeparrot_training - Step 10260: {'lr': 0.000464347986713303, 'samples': 5253632, 'steps': 10260, 'loss/train': 1.3066688776016235} +02/24/2022 14:51:32 - INFO - codeparrot_training - Step 10261: {'lr': 0.00046433956508393855, 'samples': 5254144, 'steps': 10261, 'loss/train': 2.3456404209136963} +02/24/2022 14:51:36 - INFO - codeparrot_training - Step 10262: {'lr': 0.0004643311425364114, 'samples': 5254656, 'steps': 10262, 'loss/train': 2.8051798343658447} +02/24/2022 14:51:42 - INFO - codeparrot_training - Step 10263: {'lr': 0.0004643227190707577, 'samples': 5255168, 'steps': 10263, 'loss/train': 3.029179334640503} +02/24/2022 14:51:46 - INFO - codeparrot_training - Step 10264: {'lr': 0.00046431429468701363, 'samples': 5255680, 'steps': 10264, 'loss/train': 2.406796932220459} +02/24/2022 14:51:51 - INFO - codeparrot_training - Step 10265: {'lr': 0.0004643058693852151, 'samples': 5256192, 'steps': 10265, 'loss/train': 0.2629365921020508} +02/24/2022 14:51:55 - INFO - codeparrot_training - Step 10266: {'lr': 0.0004642974431653983, 'samples': 5256704, 'steps': 10266, 'loss/train': 1.8382028341293335} +02/24/2022 14:52:00 - INFO - codeparrot_training - Step 10267: {'lr': 0.00046428901602759933, 'samples': 5257216, 'steps': 10267, 'loss/train': 2.460885763168335} +02/24/2022 14:52:04 - INFO - codeparrot_training - Step 10268: {'lr': 0.00046428058797185417, 'samples': 5257728, 'steps': 10268, 'loss/train': 2.5617311000823975} +02/24/2022 14:52:09 - INFO - codeparrot_training - Step 10269: {'lr': 0.0004642721589981991, 'samples': 5258240, 'steps': 10269, 'loss/train': 4.107093334197998} +02/24/2022 14:52:13 - INFO - codeparrot_training - Step 10270: {'lr': 0.00046426372910667003, 'samples': 5258752, 'steps': 10270, 'loss/train': 0.7126080393791199} +02/24/2022 14:52:18 - INFO - codeparrot_training - Step 10271: {'lr': 0.00046425529829730326, 'samples': 5259264, 'steps': 10271, 'loss/train': 2.419494152069092} +02/24/2022 14:52:22 - INFO - codeparrot_training - Step 10272: {'lr': 0.0004642468665701348, 'samples': 5259776, 'steps': 10272, 'loss/train': 2.052345037460327} +02/24/2022 14:52:28 - INFO - codeparrot_training - Step 10273: {'lr': 0.0004642384339252008, 'samples': 5260288, 'steps': 10273, 'loss/train': 4.275566577911377} +02/24/2022 14:52:32 - INFO - codeparrot_training - Step 10274: {'lr': 0.0004642300003625374, 'samples': 5260800, 'steps': 10274, 'loss/train': 2.1266286373138428} +02/24/2022 14:52:38 - INFO - codeparrot_training - Step 10275: {'lr': 0.0004642215658821807, 'samples': 5261312, 'steps': 10275, 'loss/train': 2.6137502193450928} +02/24/2022 14:52:41 - INFO - codeparrot_training - Step 10276: {'lr': 0.0004642131304841668, 'samples': 5261824, 'steps': 10276, 'loss/train': 3.646739959716797} +02/24/2022 14:52:45 - INFO - codeparrot_training - Step 10277: {'lr': 0.00046420469416853197, 'samples': 5262336, 'steps': 10277, 'loss/train': 1.6540005207061768} +02/24/2022 14:52:50 - INFO - codeparrot_training - Step 10278: {'lr': 0.0004641962569353121, 'samples': 5262848, 'steps': 10278, 'loss/train': 1.4199060201644897} +02/24/2022 14:52:54 - INFO - codeparrot_training - Step 10279: {'lr': 0.0004641878187845436, 'samples': 5263360, 'steps': 10279, 'loss/train': 2.1314585208892822} +02/24/2022 14:52:59 - INFO - codeparrot_training - Step 10280: {'lr': 0.00046417937971626245, 'samples': 5263872, 'steps': 10280, 'loss/train': 1.3049476146697998} +02/24/2022 14:53:03 - INFO - codeparrot_training - Step 10281: {'lr': 0.00046417093973050486, 'samples': 5264384, 'steps': 10281, 'loss/train': 3.138810157775879} +02/24/2022 14:53:08 - INFO - codeparrot_training - Step 10282: {'lr': 0.0004641624988273069, 'samples': 5264896, 'steps': 10282, 'loss/train': 2.1349916458129883} +02/24/2022 14:53:12 - INFO - codeparrot_training - Step 10283: {'lr': 0.0004641540570067049, 'samples': 5265408, 'steps': 10283, 'loss/train': 1.3850854635238647} +02/24/2022 14:53:18 - INFO - codeparrot_training - Step 10284: {'lr': 0.0004641456142687348, 'samples': 5265920, 'steps': 10284, 'loss/train': 2.4753170013427734} +02/24/2022 14:53:22 - INFO - codeparrot_training - Step 10285: {'lr': 0.000464137170613433, 'samples': 5266432, 'steps': 10285, 'loss/train': 1.1113992929458618} +02/24/2022 14:53:27 - INFO - codeparrot_training - Step 10286: {'lr': 0.00046412872604083554, 'samples': 5266944, 'steps': 10286, 'loss/train': 3.43532395362854} +02/24/2022 14:53:31 - INFO - codeparrot_training - Step 10287: {'lr': 0.00046412028055097855, 'samples': 5267456, 'steps': 10287, 'loss/train': 2.749605417251587} +02/24/2022 14:53:36 - INFO - codeparrot_training - Step 10288: {'lr': 0.00046411183414389834, 'samples': 5267968, 'steps': 10288, 'loss/train': 1.774010419845581} +02/24/2022 14:53:40 - INFO - codeparrot_training - Step 10289: {'lr': 0.000464103386819631, 'samples': 5268480, 'steps': 10289, 'loss/train': 2.439481019973755} +02/24/2022 14:53:45 - INFO - codeparrot_training - Step 10290: {'lr': 0.00046409493857821273, 'samples': 5268992, 'steps': 10290, 'loss/train': 1.5167802572250366} +02/24/2022 14:53:49 - INFO - codeparrot_training - Step 10291: {'lr': 0.00046408648941967975, 'samples': 5269504, 'steps': 10291, 'loss/train': 0.35078924894332886} +02/24/2022 14:53:54 - INFO - codeparrot_training - Step 10292: {'lr': 0.0004640780393440682, 'samples': 5270016, 'steps': 10292, 'loss/train': 1.8131710290908813} +02/24/2022 14:53:58 - INFO - codeparrot_training - Step 10293: {'lr': 0.0004640695883514143, 'samples': 5270528, 'steps': 10293, 'loss/train': 2.1447877883911133} +02/24/2022 14:54:04 - INFO - codeparrot_training - Step 10294: {'lr': 0.0004640611364417543, 'samples': 5271040, 'steps': 10294, 'loss/train': 2.1572015285491943} +02/24/2022 14:54:07 - INFO - codeparrot_training - Step 10295: {'lr': 0.0004640526836151243, 'samples': 5271552, 'steps': 10295, 'loss/train': 2.3479580879211426} +02/24/2022 14:54:13 - INFO - codeparrot_training - Step 10296: {'lr': 0.0004640442298715606, 'samples': 5272064, 'steps': 10296, 'loss/train': 2.0927531719207764} +02/24/2022 14:54:17 - INFO - codeparrot_training - Step 10297: {'lr': 0.0004640357752110994, 'samples': 5272576, 'steps': 10297, 'loss/train': 2.046898365020752} +02/24/2022 14:54:22 - INFO - codeparrot_training - Step 10298: {'lr': 0.00046402731963377685, 'samples': 5273088, 'steps': 10298, 'loss/train': 1.562732458114624} +02/24/2022 14:54:26 - INFO - codeparrot_training - Step 10299: {'lr': 0.0004640188631396293, 'samples': 5273600, 'steps': 10299, 'loss/train': 2.679598569869995} +02/24/2022 14:54:31 - INFO - codeparrot_training - Step 10300: {'lr': 0.0004640104057286929, 'samples': 5274112, 'steps': 10300, 'loss/train': 1.7256455421447754} +02/24/2022 14:54:35 - INFO - codeparrot_training - Step 10301: {'lr': 0.0004640019474010038, 'samples': 5274624, 'steps': 10301, 'loss/train': 1.2026939392089844} +02/24/2022 14:54:40 - INFO - codeparrot_training - Step 10302: {'lr': 0.00046399348815659837, 'samples': 5275136, 'steps': 10302, 'loss/train': 2.291311264038086} +02/24/2022 14:54:44 - INFO - codeparrot_training - Step 10303: {'lr': 0.0004639850279955128, 'samples': 5275648, 'steps': 10303, 'loss/train': 1.6144028902053833} +02/24/2022 14:54:49 - INFO - codeparrot_training - Step 10304: {'lr': 0.0004639765669177833, 'samples': 5276160, 'steps': 10304, 'loss/train': 2.092270851135254} +02/24/2022 14:54:53 - INFO - codeparrot_training - Step 10305: {'lr': 0.0004639681049234461, 'samples': 5276672, 'steps': 10305, 'loss/train': 1.8585162162780762} +02/24/2022 14:54:58 - INFO - codeparrot_training - Step 10306: {'lr': 0.0004639596420125375, 'samples': 5277184, 'steps': 10306, 'loss/train': 2.8110735416412354} +02/24/2022 14:55:02 - INFO - codeparrot_training - Step 10307: {'lr': 0.0004639511781850937, 'samples': 5277696, 'steps': 10307, 'loss/train': 2.093008518218994} +02/24/2022 14:55:07 - INFO - codeparrot_training - Step 10308: {'lr': 0.000463942713441151, 'samples': 5278208, 'steps': 10308, 'loss/train': 1.4693944454193115} +02/24/2022 14:55:11 - INFO - codeparrot_training - Step 10309: {'lr': 0.00046393424778074573, 'samples': 5278720, 'steps': 10309, 'loss/train': 2.2482521533966064} +02/24/2022 14:55:17 - INFO - codeparrot_training - Step 10310: {'lr': 0.000463925781203914, 'samples': 5279232, 'steps': 10310, 'loss/train': 1.3743386268615723} +02/24/2022 14:55:20 - INFO - codeparrot_training - Step 10311: {'lr': 0.00046391731371069224, 'samples': 5279744, 'steps': 10311, 'loss/train': 2.4796366691589355} +02/24/2022 14:55:26 - INFO - codeparrot_training - Step 10312: {'lr': 0.00046390884530111656, 'samples': 5280256, 'steps': 10312, 'loss/train': 2.6968576908111572} +02/24/2022 14:55:29 - INFO - codeparrot_training - Step 10313: {'lr': 0.0004639003759752233, 'samples': 5280768, 'steps': 10313, 'loss/train': 1.3039103746414185} +02/24/2022 14:55:35 - INFO - codeparrot_training - Step 10314: {'lr': 0.00046389190573304875, 'samples': 5281280, 'steps': 10314, 'loss/train': 2.093705654144287} +02/24/2022 14:55:38 - INFO - codeparrot_training - Step 10315: {'lr': 0.0004638834345746292, 'samples': 5281792, 'steps': 10315, 'loss/train': 0.8147538900375366} +02/24/2022 14:55:44 - INFO - codeparrot_training - Step 10316: {'lr': 0.00046387496250000095, 'samples': 5282304, 'steps': 10316, 'loss/train': 2.4193246364593506} +02/24/2022 14:55:49 - INFO - codeparrot_training - Step 10317: {'lr': 0.00046386648950920027, 'samples': 5282816, 'steps': 10317, 'loss/train': 1.7856088876724243} +02/24/2022 14:55:53 - INFO - codeparrot_training - Step 10318: {'lr': 0.0004638580156022635, 'samples': 5283328, 'steps': 10318, 'loss/train': 3.212846279144287} +02/24/2022 14:55:59 - INFO - codeparrot_training - Step 10319: {'lr': 0.0004638495407792268, 'samples': 5283840, 'steps': 10319, 'loss/train': 1.845513939857483} +02/24/2022 14:56:02 - INFO - codeparrot_training - Step 10320: {'lr': 0.0004638410650401267, 'samples': 5284352, 'steps': 10320, 'loss/train': 1.9201545715332031} +02/24/2022 14:56:08 - INFO - codeparrot_training - Step 10321: {'lr': 0.0004638325883849993, 'samples': 5284864, 'steps': 10321, 'loss/train': 3.156074047088623} +02/24/2022 14:56:11 - INFO - codeparrot_training - Step 10322: {'lr': 0.00046382411081388096, 'samples': 5285376, 'steps': 10322, 'loss/train': 0.6073839068412781} +02/24/2022 14:56:17 - INFO - codeparrot_training - Step 10323: {'lr': 0.0004638156323268081, 'samples': 5285888, 'steps': 10323, 'loss/train': 2.121464252471924} +02/24/2022 14:56:20 - INFO - codeparrot_training - Step 10324: {'lr': 0.00046380715292381695, 'samples': 5286400, 'steps': 10324, 'loss/train': 1.2324066162109375} +02/24/2022 14:56:26 - INFO - codeparrot_training - Step 10325: {'lr': 0.0004637986726049438, 'samples': 5286912, 'steps': 10325, 'loss/train': 3.2524149417877197} +02/24/2022 14:56:29 - INFO - codeparrot_training - Step 10326: {'lr': 0.00046379019137022506, 'samples': 5287424, 'steps': 10326, 'loss/train': 2.1994616985321045} +02/24/2022 14:56:35 - INFO - codeparrot_training - Step 10327: {'lr': 0.000463781709219697, 'samples': 5287936, 'steps': 10327, 'loss/train': 2.3758130073547363} +02/24/2022 14:56:38 - INFO - codeparrot_training - Step 10328: {'lr': 0.000463773226153396, 'samples': 5288448, 'steps': 10328, 'loss/train': 0.9522534012794495} +02/24/2022 14:56:44 - INFO - codeparrot_training - Step 10329: {'lr': 0.0004637647421713584, 'samples': 5288960, 'steps': 10329, 'loss/train': 2.19032883644104} +02/24/2022 14:56:48 - INFO - codeparrot_training - Step 10330: {'lr': 0.0004637562572736205, 'samples': 5289472, 'steps': 10330, 'loss/train': 1.8007209300994873} +02/24/2022 14:56:53 - INFO - codeparrot_training - Step 10331: {'lr': 0.00046374777146021865, 'samples': 5289984, 'steps': 10331, 'loss/train': 2.995356798171997} +02/24/2022 14:56:57 - INFO - codeparrot_training - Step 10332: {'lr': 0.00046373928473118927, 'samples': 5290496, 'steps': 10332, 'loss/train': 1.9197447299957275} +02/24/2022 14:57:02 - INFO - codeparrot_training - Step 10333: {'lr': 0.0004637307970865686, 'samples': 5291008, 'steps': 10333, 'loss/train': 3.180255174636841} +02/24/2022 14:57:06 - INFO - codeparrot_training - Step 10334: {'lr': 0.00046372230852639314, 'samples': 5291520, 'steps': 10334, 'loss/train': 2.269125461578369} +02/24/2022 14:57:11 - INFO - codeparrot_training - Step 10335: {'lr': 0.0004637138190506991, 'samples': 5292032, 'steps': 10335, 'loss/train': 2.0725109577178955} +02/24/2022 14:57:15 - INFO - codeparrot_training - Step 10336: {'lr': 0.00046370532865952296, 'samples': 5292544, 'steps': 10336, 'loss/train': 2.2971909046173096} +02/24/2022 14:57:20 - INFO - codeparrot_training - Step 10337: {'lr': 0.0004636968373529011, 'samples': 5293056, 'steps': 10337, 'loss/train': 2.3532931804656982} +02/24/2022 14:57:24 - INFO - codeparrot_training - Step 10338: {'lr': 0.00046368834513086976, 'samples': 5293568, 'steps': 10338, 'loss/train': 2.426110029220581} +02/24/2022 14:57:30 - INFO - codeparrot_training - Step 10339: {'lr': 0.00046367985199346546, 'samples': 5294080, 'steps': 10339, 'loss/train': 1.3799611330032349} +02/24/2022 14:57:33 - INFO - codeparrot_training - Step 10340: {'lr': 0.00046367135794072445, 'samples': 5294592, 'steps': 10340, 'loss/train': 1.8972915410995483} +02/24/2022 14:57:39 - INFO - codeparrot_training - Step 10341: {'lr': 0.00046366286297268327, 'samples': 5295104, 'steps': 10341, 'loss/train': 2.365474224090576} +02/24/2022 14:57:42 - INFO - codeparrot_training - Step 10342: {'lr': 0.0004636543670893782, 'samples': 5295616, 'steps': 10342, 'loss/train': 1.4184718132019043} +02/24/2022 14:57:48 - INFO - codeparrot_training - Step 10343: {'lr': 0.0004636458702908457, 'samples': 5296128, 'steps': 10343, 'loss/train': 1.5894575119018555} +02/24/2022 14:57:51 - INFO - codeparrot_training - Step 10344: {'lr': 0.0004636373725771221, 'samples': 5296640, 'steps': 10344, 'loss/train': 1.9423401355743408} +02/24/2022 14:57:57 - INFO - codeparrot_training - Step 10345: {'lr': 0.0004636288739482438, 'samples': 5297152, 'steps': 10345, 'loss/train': 1.7704288959503174} +02/24/2022 14:58:00 - INFO - codeparrot_training - Step 10346: {'lr': 0.0004636203744042473, 'samples': 5297664, 'steps': 10346, 'loss/train': 2.4374425411224365} +02/24/2022 14:58:06 - INFO - codeparrot_training - Step 10347: {'lr': 0.0004636118739451689, 'samples': 5298176, 'steps': 10347, 'loss/train': 3.2590057849884033} +02/24/2022 14:58:09 - INFO - codeparrot_training - Step 10348: {'lr': 0.0004636033725710451, 'samples': 5298688, 'steps': 10348, 'loss/train': 2.1918511390686035} +02/24/2022 14:58:15 - INFO - codeparrot_training - Step 10349: {'lr': 0.00046359487028191224, 'samples': 5299200, 'steps': 10349, 'loss/train': 3.533186912536621} +02/24/2022 14:58:18 - INFO - codeparrot_training - Step 10350: {'lr': 0.0004635863670778068, 'samples': 5299712, 'steps': 10350, 'loss/train': 1.1902270317077637} +02/24/2022 14:58:24 - INFO - codeparrot_training - Step 10351: {'lr': 0.00046357786295876517, 'samples': 5300224, 'steps': 10351, 'loss/train': 1.4413707256317139} +02/24/2022 14:58:28 - INFO - codeparrot_training - Step 10352: {'lr': 0.0004635693579248238, 'samples': 5300736, 'steps': 10352, 'loss/train': 2.7832536697387695} +02/24/2022 14:58:33 - INFO - codeparrot_training - Step 10353: {'lr': 0.0004635608519760191, 'samples': 5301248, 'steps': 10353, 'loss/train': 1.747622013092041} +02/24/2022 14:58:37 - INFO - codeparrot_training - Step 10354: {'lr': 0.00046355234511238756, 'samples': 5301760, 'steps': 10354, 'loss/train': 2.0732836723327637} +02/24/2022 14:58:42 - INFO - codeparrot_training - Step 10355: {'lr': 0.00046354383733396553, 'samples': 5302272, 'steps': 10355, 'loss/train': 2.2942445278167725} +02/24/2022 14:58:45 - INFO - codeparrot_training - Step 10356: {'lr': 0.0004635353286407896, 'samples': 5302784, 'steps': 10356, 'loss/train': 2.1034181118011475} +02/24/2022 14:58:52 - INFO - codeparrot_training - Step 10357: {'lr': 0.00046352681903289605, 'samples': 5303296, 'steps': 10357, 'loss/train': 1.9662588834762573} +02/24/2022 14:58:55 - INFO - codeparrot_training - Step 10358: {'lr': 0.00046351830851032146, 'samples': 5303808, 'steps': 10358, 'loss/train': 1.8031994104385376} +02/24/2022 14:59:01 - INFO - codeparrot_training - Step 10359: {'lr': 0.00046350979707310226, 'samples': 5304320, 'steps': 10359, 'loss/train': 2.544552803039551} +02/24/2022 14:59:04 - INFO - codeparrot_training - Step 10360: {'lr': 0.00046350128472127483, 'samples': 5304832, 'steps': 10360, 'loss/train': 1.9541394710540771} +02/24/2022 14:59:10 - INFO - codeparrot_training - Step 10361: {'lr': 0.00046349277145487565, 'samples': 5305344, 'steps': 10361, 'loss/train': 1.484208106994629} +02/24/2022 14:59:13 - INFO - codeparrot_training - Step 10362: {'lr': 0.00046348425727394126, 'samples': 5305856, 'steps': 10362, 'loss/train': 0.7156141400337219} +02/24/2022 14:59:19 - INFO - codeparrot_training - Step 10363: {'lr': 0.0004634757421785082, 'samples': 5306368, 'steps': 10363, 'loss/train': 2.1460771560668945} +02/24/2022 14:59:22 - INFO - codeparrot_training - Step 10364: {'lr': 0.0004634672261686127, 'samples': 5306880, 'steps': 10364, 'loss/train': 3.2442281246185303} +02/24/2022 14:59:27 - INFO - codeparrot_training - Step 10365: {'lr': 0.0004634587092442915, 'samples': 5307392, 'steps': 10365, 'loss/train': 1.8552302122116089} +02/24/2022 14:59:34 - INFO - codeparrot_training - Step 10366: {'lr': 0.00046345019140558085, 'samples': 5307904, 'steps': 10366, 'loss/train': 2.884028434753418} +02/24/2022 14:59:37 - INFO - codeparrot_training - Step 10367: {'lr': 0.0004634416726525175, 'samples': 5308416, 'steps': 10367, 'loss/train': 2.494704484939575} +02/24/2022 14:59:43 - INFO - codeparrot_training - Step 10368: {'lr': 0.00046343315298513765, 'samples': 5308928, 'steps': 10368, 'loss/train': 4.402544021606445} +02/24/2022 14:59:46 - INFO - codeparrot_training - Step 10369: {'lr': 0.0004634246324034781, 'samples': 5309440, 'steps': 10369, 'loss/train': 2.1542961597442627} +02/24/2022 14:59:52 - INFO - codeparrot_training - Step 10370: {'lr': 0.0004634161109075751, 'samples': 5309952, 'steps': 10370, 'loss/train': 2.0879409313201904} +02/24/2022 14:59:55 - INFO - codeparrot_training - Step 10371: {'lr': 0.0004634075884974652, 'samples': 5310464, 'steps': 10371, 'loss/train': 1.6788160800933838} +02/24/2022 14:59:59 - INFO - codeparrot_training - Step 10372: {'lr': 0.00046339906517318507, 'samples': 5310976, 'steps': 10372, 'loss/train': 1.8942443132400513} +02/24/2022 15:00:04 - INFO - codeparrot_training - Step 10373: {'lr': 0.0004633905409347711, 'samples': 5311488, 'steps': 10373, 'loss/train': 2.1707465648651123} +02/24/2022 15:00:11 - INFO - codeparrot_training - Step 10374: {'lr': 0.00046338201578225975, 'samples': 5312000, 'steps': 10374, 'loss/train': 2.4812490940093994} +02/24/2022 15:00:15 - INFO - codeparrot_training - Step 10375: {'lr': 0.0004633734897156876, 'samples': 5312512, 'steps': 10375, 'loss/train': 2.2935268878936768} +02/24/2022 15:00:20 - INFO - codeparrot_training - Step 10376: {'lr': 0.0004633649627350912, 'samples': 5313024, 'steps': 10376, 'loss/train': 1.2265043258666992} +02/24/2022 15:00:24 - INFO - codeparrot_training - Step 10377: {'lr': 0.000463356434840507, 'samples': 5313536, 'steps': 10377, 'loss/train': 2.8925282955169678} +02/24/2022 15:00:29 - INFO - codeparrot_training - Step 10378: {'lr': 0.0004633479060319717, 'samples': 5314048, 'steps': 10378, 'loss/train': 2.223686695098877} +02/24/2022 15:00:33 - INFO - codeparrot_training - Step 10379: {'lr': 0.00046333937630952163, 'samples': 5314560, 'steps': 10379, 'loss/train': 2.1045138835906982} +02/24/2022 15:00:38 - INFO - codeparrot_training - Step 10380: {'lr': 0.00046333084567319344, 'samples': 5315072, 'steps': 10380, 'loss/train': 1.5444692373275757} +02/24/2022 15:00:42 - INFO - codeparrot_training - Step 10381: {'lr': 0.0004633223141230236, 'samples': 5315584, 'steps': 10381, 'loss/train': 2.1285784244537354} +02/24/2022 15:00:47 - INFO - codeparrot_training - Step 10382: {'lr': 0.0004633137816590488, 'samples': 5316096, 'steps': 10382, 'loss/train': 1.839762568473816} +02/24/2022 15:00:51 - INFO - codeparrot_training - Step 10383: {'lr': 0.00046330524828130536, 'samples': 5316608, 'steps': 10383, 'loss/train': 1.6819720268249512} +02/24/2022 15:00:56 - INFO - codeparrot_training - Step 10384: {'lr': 0.00046329671398983007, 'samples': 5317120, 'steps': 10384, 'loss/train': 2.060786485671997} +02/24/2022 15:01:00 - INFO - codeparrot_training - Step 10385: {'lr': 0.0004632881787846594, 'samples': 5317632, 'steps': 10385, 'loss/train': 0.47012028098106384} +02/24/2022 15:01:06 - INFO - codeparrot_training - Step 10386: {'lr': 0.0004632796426658298, 'samples': 5318144, 'steps': 10386, 'loss/train': 2.015021800994873} +02/24/2022 15:01:09 - INFO - codeparrot_training - Step 10387: {'lr': 0.00046327110563337804, 'samples': 5318656, 'steps': 10387, 'loss/train': 2.4878122806549072} +02/24/2022 15:01:15 - INFO - codeparrot_training - Step 10388: {'lr': 0.00046326256768734053, 'samples': 5319168, 'steps': 10388, 'loss/train': 1.9941998720169067} +02/24/2022 15:01:18 - INFO - codeparrot_training - Step 10389: {'lr': 0.0004632540288277539, 'samples': 5319680, 'steps': 10389, 'loss/train': 0.9611613154411316} +02/24/2022 15:01:24 - INFO - codeparrot_training - Step 10390: {'lr': 0.0004632454890546547, 'samples': 5320192, 'steps': 10390, 'loss/train': 2.2257487773895264} +02/24/2022 15:01:28 - INFO - codeparrot_training - Step 10391: {'lr': 0.0004632369483680796, 'samples': 5320704, 'steps': 10391, 'loss/train': 3.8595004081726074} +02/24/2022 15:01:33 - INFO - codeparrot_training - Step 10392: {'lr': 0.0004632284067680651, 'samples': 5321216, 'steps': 10392, 'loss/train': 1.0521191358566284} +02/24/2022 15:01:37 - INFO - codeparrot_training - Step 10393: {'lr': 0.0004632198642546478, 'samples': 5321728, 'steps': 10393, 'loss/train': 0.564673662185669} +02/24/2022 15:01:42 - INFO - codeparrot_training - Step 10394: {'lr': 0.0004632113208278643, 'samples': 5322240, 'steps': 10394, 'loss/train': 2.7666842937469482} +02/24/2022 15:01:46 - INFO - codeparrot_training - Step 10395: {'lr': 0.00046320277648775123, 'samples': 5322752, 'steps': 10395, 'loss/train': 3.366778612136841} +02/24/2022 15:01:52 - INFO - codeparrot_training - Step 10396: {'lr': 0.0004631942312343452, 'samples': 5323264, 'steps': 10396, 'loss/train': 1.51768159866333} +02/24/2022 15:01:55 - INFO - codeparrot_training - Step 10397: {'lr': 0.00046318568506768267, 'samples': 5323776, 'steps': 10397, 'loss/train': 3.0890817642211914} +02/24/2022 15:02:01 - INFO - codeparrot_training - Step 10398: {'lr': 0.0004631771379878005, 'samples': 5324288, 'steps': 10398, 'loss/train': 1.256666660308838} +02/24/2022 15:02:04 - INFO - codeparrot_training - Step 10399: {'lr': 0.00046316858999473506, 'samples': 5324800, 'steps': 10399, 'loss/train': 1.6612883806228638} +02/24/2022 15:02:10 - INFO - codeparrot_training - Step 10400: {'lr': 0.00046316004108852305, 'samples': 5325312, 'steps': 10400, 'loss/train': 2.061941385269165} +02/24/2022 15:02:13 - INFO - codeparrot_training - Step 10401: {'lr': 0.0004631514912692012, 'samples': 5325824, 'steps': 10401, 'loss/train': 3.0965874195098877} +02/24/2022 15:02:20 - INFO - codeparrot_training - Step 10402: {'lr': 0.00046314294053680593, 'samples': 5326336, 'steps': 10402, 'loss/train': 2.3951609134674072} +02/24/2022 15:02:23 - INFO - codeparrot_training - Step 10403: {'lr': 0.0004631343888913741, 'samples': 5326848, 'steps': 10403, 'loss/train': 1.980438232421875} +02/24/2022 15:02:27 - INFO - codeparrot_training - Step 10404: {'lr': 0.00046312583633294213, 'samples': 5327360, 'steps': 10404, 'loss/train': 1.0517843961715698} +02/24/2022 15:02:32 - INFO - codeparrot_training - Step 10405: {'lr': 0.0004631172828615469, 'samples': 5327872, 'steps': 10405, 'loss/train': 2.7647383213043213} +02/24/2022 15:02:36 - INFO - codeparrot_training - Step 10406: {'lr': 0.0004631087284772247, 'samples': 5328384, 'steps': 10406, 'loss/train': 2.3980698585510254} +02/24/2022 15:02:41 - INFO - codeparrot_training - Step 10407: {'lr': 0.0004631001731800125, 'samples': 5328896, 'steps': 10407, 'loss/train': 3.1227638721466064} +02/24/2022 15:02:45 - INFO - codeparrot_training - Step 10408: {'lr': 0.0004630916169699468, 'samples': 5329408, 'steps': 10408, 'loss/train': 1.1033917665481567} +02/24/2022 15:02:50 - INFO - codeparrot_training - Step 10409: {'lr': 0.00046308305984706435, 'samples': 5329920, 'steps': 10409, 'loss/train': 1.6527396440505981} +02/24/2022 15:02:54 - INFO - codeparrot_training - Step 10410: {'lr': 0.00046307450181140163, 'samples': 5330432, 'steps': 10410, 'loss/train': 2.8946332931518555} +02/24/2022 15:02:59 - INFO - codeparrot_training - Step 10411: {'lr': 0.00046306594286299544, 'samples': 5330944, 'steps': 10411, 'loss/train': 1.771353006362915} +02/24/2022 15:03:03 - INFO - codeparrot_training - Step 10412: {'lr': 0.0004630573830018824, 'samples': 5331456, 'steps': 10412, 'loss/train': 2.3282008171081543} +02/24/2022 15:03:09 - INFO - codeparrot_training - Step 10413: {'lr': 0.00046304882222809917, 'samples': 5331968, 'steps': 10413, 'loss/train': 1.580085039138794} +02/24/2022 15:03:12 - INFO - codeparrot_training - Step 10414: {'lr': 0.0004630402605416825, 'samples': 5332480, 'steps': 10414, 'loss/train': 3.142756700515747} +02/24/2022 15:03:18 - INFO - codeparrot_training - Step 10415: {'lr': 0.0004630316979426689, 'samples': 5332992, 'steps': 10415, 'loss/train': 2.4721357822418213} +02/24/2022 15:03:21 - INFO - codeparrot_training - Step 10416: {'lr': 0.00046302313443109523, 'samples': 5333504, 'steps': 10416, 'loss/train': 2.2335314750671387} +02/24/2022 15:03:27 - INFO - codeparrot_training - Step 10417: {'lr': 0.00046301457000699807, 'samples': 5334016, 'steps': 10417, 'loss/train': 3.5762007236480713} +02/24/2022 15:03:30 - INFO - codeparrot_training - Step 10418: {'lr': 0.0004630060046704141, 'samples': 5334528, 'steps': 10418, 'loss/train': 2.231334924697876} +02/24/2022 15:03:36 - INFO - codeparrot_training - Step 10419: {'lr': 0.0004629974384213801, 'samples': 5335040, 'steps': 10419, 'loss/train': 2.758619546890259} +02/24/2022 15:03:39 - INFO - codeparrot_training - Step 10420: {'lr': 0.0004629888712599327, 'samples': 5335552, 'steps': 10420, 'loss/train': 1.9766424894332886} +02/24/2022 15:03:45 - INFO - codeparrot_training - Step 10421: {'lr': 0.0004629803031861086, 'samples': 5336064, 'steps': 10421, 'loss/train': 3.0400912761688232} +02/24/2022 15:03:48 - INFO - codeparrot_training - Step 10422: {'lr': 0.0004629717341999445, 'samples': 5336576, 'steps': 10422, 'loss/train': 2.448423385620117} +02/24/2022 15:03:55 - INFO - codeparrot_training - Step 10423: {'lr': 0.0004629631643014771, 'samples': 5337088, 'steps': 10423, 'loss/train': 1.903863787651062} +02/24/2022 15:03:58 - INFO - codeparrot_training - Step 10424: {'lr': 0.00046295459349074316, 'samples': 5337600, 'steps': 10424, 'loss/train': 2.0889322757720947} +02/24/2022 15:04:04 - INFO - codeparrot_training - Step 10425: {'lr': 0.00046294602176777936, 'samples': 5338112, 'steps': 10425, 'loss/train': 1.5387037992477417} +02/24/2022 15:04:07 - INFO - codeparrot_training - Step 10426: {'lr': 0.0004629374491326224, 'samples': 5338624, 'steps': 10426, 'loss/train': 1.9643384218215942} +02/24/2022 15:04:13 - INFO - codeparrot_training - Step 10427: {'lr': 0.00046292887558530905, 'samples': 5339136, 'steps': 10427, 'loss/train': 1.6737027168273926} +02/24/2022 15:04:16 - INFO - codeparrot_training - Step 10428: {'lr': 0.000462920301125876, 'samples': 5339648, 'steps': 10428, 'loss/train': 2.879610776901245} +02/24/2022 15:04:22 - INFO - codeparrot_training - Step 10429: {'lr': 0.0004629117257543599, 'samples': 5340160, 'steps': 10429, 'loss/train': 2.397786855697632} +02/24/2022 15:04:25 - INFO - codeparrot_training - Step 10430: {'lr': 0.0004629031494707977, 'samples': 5340672, 'steps': 10430, 'loss/train': 2.504424810409546} +02/24/2022 15:04:31 - INFO - codeparrot_training - Step 10431: {'lr': 0.00046289457227522595, 'samples': 5341184, 'steps': 10431, 'loss/train': 2.152984142303467} +02/24/2022 15:04:34 - INFO - codeparrot_training - Step 10432: {'lr': 0.0004628859941676815, 'samples': 5341696, 'steps': 10432, 'loss/train': 2.405923843383789} +02/24/2022 15:04:40 - INFO - codeparrot_training - Step 10433: {'lr': 0.000462877415148201, 'samples': 5342208, 'steps': 10433, 'loss/train': 1.4465056657791138} +02/24/2022 15:04:44 - INFO - codeparrot_training - Step 10434: {'lr': 0.0004628688352168213, 'samples': 5342720, 'steps': 10434, 'loss/train': 1.8572214841842651} +02/24/2022 15:04:49 - INFO - codeparrot_training - Step 10435: {'lr': 0.00046286025437357905, 'samples': 5343232, 'steps': 10435, 'loss/train': 0.44998013973236084} +02/24/2022 15:04:53 - INFO - codeparrot_training - Step 10436: {'lr': 0.00046285167261851114, 'samples': 5343744, 'steps': 10436, 'loss/train': 0.5047389268875122} +02/24/2022 15:04:58 - INFO - codeparrot_training - Step 10437: {'lr': 0.00046284308995165414, 'samples': 5344256, 'steps': 10437, 'loss/train': 1.4467048645019531} +02/24/2022 15:05:02 - INFO - codeparrot_training - Step 10438: {'lr': 0.00046283450637304497, 'samples': 5344768, 'steps': 10438, 'loss/train': 2.112070083618164} +02/24/2022 15:05:07 - INFO - codeparrot_training - Step 10439: {'lr': 0.0004628259218827204, 'samples': 5345280, 'steps': 10439, 'loss/train': 1.5674123764038086} +02/24/2022 15:05:11 - INFO - codeparrot_training - Step 10440: {'lr': 0.0004628173364807171, 'samples': 5345792, 'steps': 10440, 'loss/train': 2.8122034072875977} +02/24/2022 15:05:16 - INFO - codeparrot_training - Step 10441: {'lr': 0.00046280875016707195, 'samples': 5346304, 'steps': 10441, 'loss/train': 1.816400170326233} +02/24/2022 15:05:20 - INFO - codeparrot_training - Step 10442: {'lr': 0.0004628001629418217, 'samples': 5346816, 'steps': 10442, 'loss/train': 2.770390033721924} +02/24/2022 15:05:26 - INFO - codeparrot_training - Step 10443: {'lr': 0.0004627915748050031, 'samples': 5347328, 'steps': 10443, 'loss/train': 2.79180908203125} +02/24/2022 15:05:29 - INFO - codeparrot_training - Step 10444: {'lr': 0.000462782985756653, 'samples': 5347840, 'steps': 10444, 'loss/train': 1.995596170425415} +02/24/2022 15:05:35 - INFO - codeparrot_training - Step 10445: {'lr': 0.0004627743957968081, 'samples': 5348352, 'steps': 10445, 'loss/train': 1.672791838645935} +02/24/2022 15:05:39 - INFO - codeparrot_training - Step 10446: {'lr': 0.00046276580492550523, 'samples': 5348864, 'steps': 10446, 'loss/train': 2.561729907989502} +02/24/2022 15:05:44 - INFO - codeparrot_training - Step 10447: {'lr': 0.0004627572131427813, 'samples': 5349376, 'steps': 10447, 'loss/train': 1.3663554191589355} +02/24/2022 15:05:50 - INFO - codeparrot_training - Step 10448: {'lr': 0.000462748620448673, 'samples': 5349888, 'steps': 10448, 'loss/train': 2.5932722091674805} +02/24/2022 15:05:53 - INFO - codeparrot_training - Step 10449: {'lr': 0.00046274002684321716, 'samples': 5350400, 'steps': 10449, 'loss/train': 2.050666570663452} +02/24/2022 15:05:59 - INFO - codeparrot_training - Step 10450: {'lr': 0.00046273143232645054, 'samples': 5350912, 'steps': 10450, 'loss/train': 2.084097146987915} +02/24/2022 15:06:02 - INFO - codeparrot_training - Step 10451: {'lr': 0.0004627228368984101, 'samples': 5351424, 'steps': 10451, 'loss/train': 1.6187797784805298} +02/24/2022 15:06:08 - INFO - codeparrot_training - Step 10452: {'lr': 0.0004627142405591325, 'samples': 5351936, 'steps': 10452, 'loss/train': 1.9603959321975708} +02/24/2022 15:06:11 - INFO - codeparrot_training - Step 10453: {'lr': 0.00046270564330865466, 'samples': 5352448, 'steps': 10453, 'loss/train': 0.7993656992912292} +02/24/2022 15:06:17 - INFO - codeparrot_training - Step 10454: {'lr': 0.0004626970451470134, 'samples': 5352960, 'steps': 10454, 'loss/train': 1.8133491277694702} +02/24/2022 15:06:21 - INFO - codeparrot_training - Step 10455: {'lr': 0.0004626884460742455, 'samples': 5353472, 'steps': 10455, 'loss/train': 1.907637596130371} +02/24/2022 15:06:24 - INFO - codeparrot_training - Step 10456: {'lr': 0.00046267984609038793, 'samples': 5353984, 'steps': 10456, 'loss/train': 8.732415199279785} +02/24/2022 15:06:30 - INFO - codeparrot_training - Step 10457: {'lr': 0.0004626712451954773, 'samples': 5354496, 'steps': 10457, 'loss/train': 2.1762301921844482} +02/24/2022 15:06:33 - INFO - codeparrot_training - Step 10458: {'lr': 0.0004626626433895507, 'samples': 5355008, 'steps': 10458, 'loss/train': 0.9044533967971802} +02/24/2022 15:06:40 - INFO - codeparrot_training - Step 10459: {'lr': 0.00046265404067264484, 'samples': 5355520, 'steps': 10459, 'loss/train': 2.3630051612854004} +02/24/2022 15:06:43 - INFO - codeparrot_training - Step 10460: {'lr': 0.00046264543704479654, 'samples': 5356032, 'steps': 10460, 'loss/train': 2.51969051361084} +02/24/2022 15:06:49 - INFO - codeparrot_training - Step 10461: {'lr': 0.0004626368325060428, 'samples': 5356544, 'steps': 10461, 'loss/train': 1.684999704360962} +02/24/2022 15:06:52 - INFO - codeparrot_training - Step 10462: {'lr': 0.00046262822705642025, 'samples': 5357056, 'steps': 10462, 'loss/train': 2.6834254264831543} +02/24/2022 15:06:58 - INFO - codeparrot_training - Step 10463: {'lr': 0.00046261962069596603, 'samples': 5357568, 'steps': 10463, 'loss/train': 2.5085251331329346} +02/24/2022 15:07:01 - INFO - codeparrot_training - Step 10464: {'lr': 0.0004626110134247168, 'samples': 5358080, 'steps': 10464, 'loss/train': 2.391413688659668} +02/24/2022 15:07:07 - INFO - codeparrot_training - Step 10465: {'lr': 0.0004626024052427095, 'samples': 5358592, 'steps': 10465, 'loss/train': 1.5142011642456055} +02/24/2022 15:07:10 - INFO - codeparrot_training - Step 10466: {'lr': 0.00046259379614998103, 'samples': 5359104, 'steps': 10466, 'loss/train': 2.719632387161255} +02/24/2022 15:07:16 - INFO - codeparrot_training - Step 10467: {'lr': 0.00046258518614656827, 'samples': 5359616, 'steps': 10467, 'loss/train': 1.3169454336166382} +02/24/2022 15:07:19 - INFO - codeparrot_training - Step 10468: {'lr': 0.0004625765752325081, 'samples': 5360128, 'steps': 10468, 'loss/train': 2.7741124629974365} +02/24/2022 15:07:25 - INFO - codeparrot_training - Step 10469: {'lr': 0.0004625679634078372, 'samples': 5360640, 'steps': 10469, 'loss/train': 3.4212605953216553} +02/24/2022 15:07:29 - INFO - codeparrot_training - Step 10470: {'lr': 0.0004625593506725928, 'samples': 5361152, 'steps': 10470, 'loss/train': 2.8670907020568848} +02/24/2022 15:07:34 - INFO - codeparrot_training - Step 10471: {'lr': 0.0004625507370268116, 'samples': 5361664, 'steps': 10471, 'loss/train': 2.548942804336548} +02/24/2022 15:07:38 - INFO - codeparrot_training - Step 10472: {'lr': 0.00046254212247053055, 'samples': 5362176, 'steps': 10472, 'loss/train': 2.207308053970337} +02/24/2022 15:07:43 - INFO - codeparrot_training - Step 10473: {'lr': 0.00046253350700378655, 'samples': 5362688, 'steps': 10473, 'loss/train': 1.3895708322525024} +02/24/2022 15:07:47 - INFO - codeparrot_training - Step 10474: {'lr': 0.0004625248906266165, 'samples': 5363200, 'steps': 10474, 'loss/train': 2.06227970123291} +02/24/2022 15:07:52 - INFO - codeparrot_training - Step 10475: {'lr': 0.00046251627333905723, 'samples': 5363712, 'steps': 10475, 'loss/train': 2.119565486907959} +02/24/2022 15:07:56 - INFO - codeparrot_training - Step 10476: {'lr': 0.0004625076551411458, 'samples': 5364224, 'steps': 10476, 'loss/train': 1.8095057010650635} +02/24/2022 15:08:01 - INFO - codeparrot_training - Step 10477: {'lr': 0.000462499036032919, 'samples': 5364736, 'steps': 10477, 'loss/train': 2.1605141162872314} +02/24/2022 15:08:05 - INFO - codeparrot_training - Step 10478: {'lr': 0.0004624904160144138, 'samples': 5365248, 'steps': 10478, 'loss/train': 2.337312936782837} +02/24/2022 15:08:11 - INFO - codeparrot_training - Step 10479: {'lr': 0.00046248179508566716, 'samples': 5365760, 'steps': 10479, 'loss/train': 1.1478919982910156} +02/24/2022 15:08:15 - INFO - codeparrot_training - Step 10480: {'lr': 0.000462473173246716, 'samples': 5366272, 'steps': 10480, 'loss/train': 2.8255481719970703} +02/24/2022 15:08:20 - INFO - codeparrot_training - Step 10481: {'lr': 0.00046246455049759716, 'samples': 5366784, 'steps': 10481, 'loss/train': 1.9251611232757568} +02/24/2022 15:08:24 - INFO - codeparrot_training - Step 10482: {'lr': 0.00046245592683834773, 'samples': 5367296, 'steps': 10482, 'loss/train': 1.8433847427368164} +02/24/2022 15:08:29 - INFO - codeparrot_training - Step 10483: {'lr': 0.00046244730226900453, 'samples': 5367808, 'steps': 10483, 'loss/train': 1.8261759281158447} +02/24/2022 15:08:33 - INFO - codeparrot_training - Step 10484: {'lr': 0.00046243867678960463, 'samples': 5368320, 'steps': 10484, 'loss/train': 0.6374279260635376} +02/24/2022 15:08:38 - INFO - codeparrot_training - Step 10485: {'lr': 0.00046243005040018484, 'samples': 5368832, 'steps': 10485, 'loss/train': 2.717811107635498} +02/24/2022 15:08:42 - INFO - codeparrot_training - Step 10486: {'lr': 0.0004624214231007821, 'samples': 5369344, 'steps': 10486, 'loss/train': 1.7226450443267822} +02/24/2022 15:08:47 - INFO - codeparrot_training - Step 10487: {'lr': 0.0004624127948914335, 'samples': 5369856, 'steps': 10487, 'loss/train': 1.8952337503433228} +02/24/2022 15:08:51 - INFO - codeparrot_training - Step 10488: {'lr': 0.0004624041657721759, 'samples': 5370368, 'steps': 10488, 'loss/train': 3.1727347373962402} +02/24/2022 15:08:56 - INFO - codeparrot_training - Step 10489: {'lr': 0.0004623955357430464, 'samples': 5370880, 'steps': 10489, 'loss/train': 1.7089414596557617} +02/24/2022 15:09:00 - INFO - codeparrot_training - Step 10490: {'lr': 0.0004623869048040817, 'samples': 5371392, 'steps': 10490, 'loss/train': 2.485283374786377} +02/24/2022 15:09:06 - INFO - codeparrot_training - Step 10491: {'lr': 0.0004623782729553191, 'samples': 5371904, 'steps': 10491, 'loss/train': 3.259850263595581} +02/24/2022 15:09:09 - INFO - codeparrot_training - Step 10492: {'lr': 0.00046236964019679533, 'samples': 5372416, 'steps': 10492, 'loss/train': 2.32462739944458} +02/24/2022 15:09:15 - INFO - codeparrot_training - Step 10493: {'lr': 0.0004623610065285475, 'samples': 5372928, 'steps': 10493, 'loss/train': 1.2415096759796143} +02/24/2022 15:09:18 - INFO - codeparrot_training - Step 10494: {'lr': 0.00046235237195061253, 'samples': 5373440, 'steps': 10494, 'loss/train': 2.2489330768585205} +02/24/2022 15:09:24 - INFO - codeparrot_training - Step 10495: {'lr': 0.00046234373646302743, 'samples': 5373952, 'steps': 10495, 'loss/train': 1.502928614616394} +02/24/2022 15:09:28 - INFO - codeparrot_training - Step 10496: {'lr': 0.00046233510006582913, 'samples': 5374464, 'steps': 10496, 'loss/train': 2.110473871231079} +02/24/2022 15:09:33 - INFO - codeparrot_training - Step 10497: {'lr': 0.00046232646275905475, 'samples': 5374976, 'steps': 10497, 'loss/train': 2.732248306274414} +02/24/2022 15:09:36 - INFO - codeparrot_training - Step 10498: {'lr': 0.00046231782454274117, 'samples': 5375488, 'steps': 10498, 'loss/train': 0.805681049823761} +02/24/2022 15:09:42 - INFO - codeparrot_training - Step 10499: {'lr': 0.00046230918541692557, 'samples': 5376000, 'steps': 10499, 'loss/train': 1.8752543926239014} +02/24/2022 15:09:45 - INFO - codeparrot_training - Step 10500: {'lr': 0.00046230054538164475, 'samples': 5376512, 'steps': 10500, 'loss/train': 2.2577922344207764} +02/24/2022 15:09:52 - INFO - codeparrot_training - Step 10501: {'lr': 0.0004622919044369358, 'samples': 5377024, 'steps': 10501, 'loss/train': 1.9790390729904175} +02/24/2022 15:09:55 - INFO - codeparrot_training - Step 10502: {'lr': 0.00046228326258283576, 'samples': 5377536, 'steps': 10502, 'loss/train': 2.1342992782592773} +02/24/2022 15:10:01 - INFO - codeparrot_training - Step 10503: {'lr': 0.0004622746198193816, 'samples': 5378048, 'steps': 10503, 'loss/train': 1.1480612754821777} +02/24/2022 15:10:04 - INFO - codeparrot_training - Step 10504: {'lr': 0.00046226597614661044, 'samples': 5378560, 'steps': 10504, 'loss/train': 1.5505146980285645} +02/24/2022 15:10:10 - INFO - codeparrot_training - Step 10505: {'lr': 0.00046225733156455916, 'samples': 5379072, 'steps': 10505, 'loss/train': 1.1708292961120605} +02/24/2022 15:10:13 - INFO - codeparrot_training - Step 10506: {'lr': 0.00046224868607326494, 'samples': 5379584, 'steps': 10506, 'loss/train': 1.649767279624939} +02/24/2022 15:10:19 - INFO - codeparrot_training - Step 10507: {'lr': 0.00046224003967276474, 'samples': 5380096, 'steps': 10507, 'loss/train': 2.3073229789733887} +02/24/2022 15:10:22 - INFO - codeparrot_training - Step 10508: {'lr': 0.00046223139236309553, 'samples': 5380608, 'steps': 10508, 'loss/train': 1.979972004890442} +02/24/2022 15:10:28 - INFO - codeparrot_training - Step 10509: {'lr': 0.0004622227441442945, 'samples': 5381120, 'steps': 10509, 'loss/train': 1.5092341899871826} +02/24/2022 15:10:31 - INFO - codeparrot_training - Step 10510: {'lr': 0.00046221409501639863, 'samples': 5381632, 'steps': 10510, 'loss/train': 2.424701452255249} +02/24/2022 15:10:37 - INFO - codeparrot_training - Step 10511: {'lr': 0.0004622054449794449, 'samples': 5382144, 'steps': 10511, 'loss/train': 2.3061470985412598} +02/24/2022 15:10:40 - INFO - codeparrot_training - Step 10512: {'lr': 0.0004621967940334705, 'samples': 5382656, 'steps': 10512, 'loss/train': 2.4230778217315674} +02/24/2022 15:10:46 - INFO - codeparrot_training - Step 10513: {'lr': 0.00046218814217851233, 'samples': 5383168, 'steps': 10513, 'loss/train': 1.9350486993789673} +02/24/2022 15:10:49 - INFO - codeparrot_training - Step 10514: {'lr': 0.0004621794894146076, 'samples': 5383680, 'steps': 10514, 'loss/train': 1.7866779565811157} +02/24/2022 15:10:55 - INFO - codeparrot_training - Step 10515: {'lr': 0.0004621708357417933, 'samples': 5384192, 'steps': 10515, 'loss/train': 3.268122673034668} +02/24/2022 15:11:01 - INFO - codeparrot_training - Step 10516: {'lr': 0.00046216218116010646, 'samples': 5384704, 'steps': 10516, 'loss/train': 1.798248052597046} +02/24/2022 15:11:04 - INFO - codeparrot_training - Step 10517: {'lr': 0.00046215352566958423, 'samples': 5385216, 'steps': 10517, 'loss/train': 2.2035131454467773} +02/24/2022 15:11:10 - INFO - codeparrot_training - Step 10518: {'lr': 0.00046214486927026373, 'samples': 5385728, 'steps': 10518, 'loss/train': 1.996370792388916} +02/24/2022 15:11:13 - INFO - codeparrot_training - Step 10519: {'lr': 0.0004621362119621819, 'samples': 5386240, 'steps': 10519, 'loss/train': 2.58803653717041} +02/24/2022 15:11:19 - INFO - codeparrot_training - Step 10520: {'lr': 0.00046212755374537594, 'samples': 5386752, 'steps': 10520, 'loss/train': 0.9014103412628174} +02/24/2022 15:11:22 - INFO - codeparrot_training - Step 10521: {'lr': 0.00046211889461988286, 'samples': 5387264, 'steps': 10521, 'loss/train': 2.401191473007202} +02/24/2022 15:11:28 - INFO - codeparrot_training - Step 10522: {'lr': 0.0004621102345857399, 'samples': 5387776, 'steps': 10522, 'loss/train': 2.2674074172973633} +02/24/2022 15:11:31 - INFO - codeparrot_training - Step 10523: {'lr': 0.0004621015736429839, 'samples': 5388288, 'steps': 10523, 'loss/train': 1.804938554763794} +02/24/2022 15:11:36 - INFO - codeparrot_training - Step 10524: {'lr': 0.00046209291179165216, 'samples': 5388800, 'steps': 10524, 'loss/train': 2.3942768573760986} +02/24/2022 15:11:40 - INFO - codeparrot_training - Step 10525: {'lr': 0.0004620842490317817, 'samples': 5389312, 'steps': 10525, 'loss/train': 1.725327730178833} +02/24/2022 15:11:46 - INFO - codeparrot_training - Step 10526: {'lr': 0.0004620755853634097, 'samples': 5389824, 'steps': 10526, 'loss/train': 2.086566686630249} +02/24/2022 15:11:50 - INFO - codeparrot_training - Step 10527: {'lr': 0.00046206692078657325, 'samples': 5390336, 'steps': 10527, 'loss/train': 2.959632158279419} +02/24/2022 15:11:55 - INFO - codeparrot_training - Step 10528: {'lr': 0.0004620582553013094, 'samples': 5390848, 'steps': 10528, 'loss/train': 1.8163808584213257} +02/24/2022 15:11:59 - INFO - codeparrot_training - Step 10529: {'lr': 0.00046204958890765536, 'samples': 5391360, 'steps': 10529, 'loss/train': 1.567827582359314} +02/24/2022 15:12:04 - INFO - codeparrot_training - Step 10530: {'lr': 0.0004620409216056483, 'samples': 5391872, 'steps': 10530, 'loss/train': 1.542049765586853} +02/24/2022 15:12:08 - INFO - codeparrot_training - Step 10531: {'lr': 0.00046203225339532515, 'samples': 5392384, 'steps': 10531, 'loss/train': 2.0172841548919678} +02/24/2022 15:12:13 - INFO - codeparrot_training - Step 10532: {'lr': 0.00046202358427672313, 'samples': 5392896, 'steps': 10532, 'loss/train': 2.221881151199341} +02/24/2022 15:12:17 - INFO - codeparrot_training - Step 10533: {'lr': 0.0004620149142498795, 'samples': 5393408, 'steps': 10533, 'loss/train': 2.393617630004883} +02/24/2022 15:12:22 - INFO - codeparrot_training - Step 10534: {'lr': 0.0004620062433148312, 'samples': 5393920, 'steps': 10534, 'loss/train': 2.6493568420410156} +02/24/2022 15:12:26 - INFO - codeparrot_training - Step 10535: {'lr': 0.00046199757147161554, 'samples': 5394432, 'steps': 10535, 'loss/train': 0.931510865688324} +02/24/2022 15:12:32 - INFO - codeparrot_training - Step 10536: {'lr': 0.00046198889872026963, 'samples': 5394944, 'steps': 10536, 'loss/train': 2.0688412189483643} +02/24/2022 15:12:36 - INFO - codeparrot_training - Step 10537: {'lr': 0.0004619802250608305, 'samples': 5395456, 'steps': 10537, 'loss/train': 1.9365615844726562} +02/24/2022 15:12:41 - INFO - codeparrot_training - Step 10538: {'lr': 0.0004619715504933354, 'samples': 5395968, 'steps': 10538, 'loss/train': 1.5142697095870972} +02/24/2022 15:12:45 - INFO - codeparrot_training - Step 10539: {'lr': 0.00046196287501782155, 'samples': 5396480, 'steps': 10539, 'loss/train': 2.4453554153442383} +02/24/2022 15:12:50 - INFO - codeparrot_training - Step 10540: {'lr': 0.00046195419863432604, 'samples': 5396992, 'steps': 10540, 'loss/train': 1.559774398803711} +02/24/2022 15:12:54 - INFO - codeparrot_training - Step 10541: {'lr': 0.000461945521342886, 'samples': 5397504, 'steps': 10541, 'loss/train': 1.8514355421066284} +02/24/2022 15:12:59 - INFO - codeparrot_training - Step 10542: {'lr': 0.0004619368431435387, 'samples': 5398016, 'steps': 10542, 'loss/train': 0.19608449935913086} +02/24/2022 15:13:03 - INFO - codeparrot_training - Step 10543: {'lr': 0.0004619281640363212, 'samples': 5398528, 'steps': 10543, 'loss/train': 2.3130812644958496} +02/24/2022 15:13:08 - INFO - codeparrot_training - Step 10544: {'lr': 0.0004619194840212708, 'samples': 5399040, 'steps': 10544, 'loss/train': 1.5492312908172607} +02/24/2022 15:13:12 - INFO - codeparrot_training - Step 10545: {'lr': 0.00046191080309842457, 'samples': 5399552, 'steps': 10545, 'loss/train': 2.719177722930908} +02/24/2022 15:13:18 - INFO - codeparrot_training - Step 10546: {'lr': 0.0004619021212678198, 'samples': 5400064, 'steps': 10546, 'loss/train': 1.6152968406677246} +02/24/2022 15:13:22 - INFO - codeparrot_training - Step 10547: {'lr': 0.0004618934385294936, 'samples': 5400576, 'steps': 10547, 'loss/train': 2.703350782394409} +02/24/2022 15:13:27 - INFO - codeparrot_training - Step 10548: {'lr': 0.0004618847548834833, 'samples': 5401088, 'steps': 10548, 'loss/train': 2.941051721572876} +02/24/2022 15:13:30 - INFO - codeparrot_training - Step 10549: {'lr': 0.0004618760703298258, 'samples': 5401600, 'steps': 10549, 'loss/train': 2.6009654998779297} +02/24/2022 15:13:36 - INFO - codeparrot_training - Step 10550: {'lr': 0.0004618673848685586, 'samples': 5402112, 'steps': 10550, 'loss/train': 0.8980075716972351} +02/24/2022 15:13:40 - INFO - codeparrot_training - Step 10551: {'lr': 0.00046185869849971884, 'samples': 5402624, 'steps': 10551, 'loss/train': 2.0634307861328125} +02/24/2022 15:13:45 - INFO - codeparrot_training - Step 10552: {'lr': 0.0004618500112233436, 'samples': 5403136, 'steps': 10552, 'loss/train': 0.6996252536773682} +02/24/2022 15:13:49 - INFO - codeparrot_training - Step 10553: {'lr': 0.0004618413230394702, 'samples': 5403648, 'steps': 10553, 'loss/train': 2.558077573776245} +02/24/2022 15:13:54 - INFO - codeparrot_training - Step 10554: {'lr': 0.0004618326339481359, 'samples': 5404160, 'steps': 10554, 'loss/train': 2.5209367275238037} +02/24/2022 15:13:58 - INFO - codeparrot_training - Step 10555: {'lr': 0.00046182394394937774, 'samples': 5404672, 'steps': 10555, 'loss/train': 1.4326109886169434} +02/24/2022 15:14:03 - INFO - codeparrot_training - Step 10556: {'lr': 0.00046181525304323325, 'samples': 5405184, 'steps': 10556, 'loss/train': 2.0825324058532715} +02/24/2022 15:14:07 - INFO - codeparrot_training - Step 10557: {'lr': 0.0004618065612297393, 'samples': 5405696, 'steps': 10557, 'loss/train': 1.4221198558807373} +02/24/2022 15:14:12 - INFO - codeparrot_training - Step 10558: {'lr': 0.00046179786850893335, 'samples': 5406208, 'steps': 10558, 'loss/train': 2.5532338619232178} +02/24/2022 15:14:16 - INFO - codeparrot_training - Step 10559: {'lr': 0.0004617891748808526, 'samples': 5406720, 'steps': 10559, 'loss/train': 1.6183686256408691} +02/24/2022 15:14:22 - INFO - codeparrot_training - Step 10560: {'lr': 0.0004617804803455343, 'samples': 5407232, 'steps': 10560, 'loss/train': 2.1259236335754395} +02/24/2022 15:14:25 - INFO - codeparrot_training - Step 10561: {'lr': 0.0004617717849030156, 'samples': 5407744, 'steps': 10561, 'loss/train': 1.8071929216384888} +02/24/2022 15:14:31 - INFO - codeparrot_training - Step 10562: {'lr': 0.00046176308855333395, 'samples': 5408256, 'steps': 10562, 'loss/train': 2.0819756984710693} +02/24/2022 15:14:35 - INFO - codeparrot_training - Step 10563: {'lr': 0.00046175439129652636, 'samples': 5408768, 'steps': 10563, 'loss/train': 2.60795259475708} +02/24/2022 15:14:38 - INFO - codeparrot_training - Step 10564: {'lr': 0.0004617456931326302, 'samples': 5409280, 'steps': 10564, 'loss/train': 1.0081745386123657} +02/24/2022 15:14:44 - INFO - codeparrot_training - Step 10565: {'lr': 0.00046173699406168277, 'samples': 5409792, 'steps': 10565, 'loss/train': 2.4007821083068848} +02/24/2022 15:14:47 - INFO - codeparrot_training - Step 10566: {'lr': 0.00046172829408372125, 'samples': 5410304, 'steps': 10566, 'loss/train': 3.254162549972534} +02/24/2022 15:14:52 - INFO - codeparrot_training - Step 10567: {'lr': 0.000461719593198783, 'samples': 5410816, 'steps': 10567, 'loss/train': 1.541430115699768} +02/24/2022 15:14:58 - INFO - codeparrot_training - Step 10568: {'lr': 0.0004617108914069052, 'samples': 5411328, 'steps': 10568, 'loss/train': 1.8166934251785278} +02/24/2022 15:15:01 - INFO - codeparrot_training - Step 10569: {'lr': 0.00046170218870812517, 'samples': 5411840, 'steps': 10569, 'loss/train': 2.7922027111053467} +02/24/2022 15:15:07 - INFO - codeparrot_training - Step 10570: {'lr': 0.0004616934851024802, 'samples': 5412352, 'steps': 10570, 'loss/train': 1.125774621963501} +02/24/2022 15:15:10 - INFO - codeparrot_training - Step 10571: {'lr': 0.00046168478059000753, 'samples': 5412864, 'steps': 10571, 'loss/train': 1.5489860773086548} +02/24/2022 15:15:17 - INFO - codeparrot_training - Step 10572: {'lr': 0.0004616760751707445, 'samples': 5413376, 'steps': 10572, 'loss/train': 1.6841152906417847} +02/24/2022 15:15:20 - INFO - codeparrot_training - Step 10573: {'lr': 0.0004616673688447284, 'samples': 5413888, 'steps': 10573, 'loss/train': 2.14851450920105} +02/24/2022 15:15:26 - INFO - codeparrot_training - Step 10574: {'lr': 0.0004616586616119964, 'samples': 5414400, 'steps': 10574, 'loss/train': 1.840282917022705} +02/24/2022 15:15:29 - INFO - codeparrot_training - Step 10575: {'lr': 0.0004616499534725861, 'samples': 5414912, 'steps': 10575, 'loss/train': 1.9462885856628418} +02/24/2022 15:15:35 - INFO - codeparrot_training - Step 10576: {'lr': 0.00046164124442653445, 'samples': 5415424, 'steps': 10576, 'loss/train': 1.85312020778656} +02/24/2022 15:15:38 - INFO - codeparrot_training - Step 10577: {'lr': 0.00046163253447387896, 'samples': 5415936, 'steps': 10577, 'loss/train': 2.4246721267700195} +02/24/2022 15:15:44 - INFO - codeparrot_training - Step 10578: {'lr': 0.0004616238236146569, 'samples': 5416448, 'steps': 10578, 'loss/train': 2.5775511264801025} +02/24/2022 15:15:47 - INFO - codeparrot_training - Step 10579: {'lr': 0.0004616151118489056, 'samples': 5416960, 'steps': 10579, 'loss/train': 2.937816858291626} +02/24/2022 15:15:53 - INFO - codeparrot_training - Step 10580: {'lr': 0.0004616063991766623, 'samples': 5417472, 'steps': 10580, 'loss/train': 0.9874318242073059} +02/24/2022 15:15:56 - INFO - codeparrot_training - Step 10581: {'lr': 0.00046159768559796437, 'samples': 5417984, 'steps': 10581, 'loss/train': 1.7239164113998413} +02/24/2022 15:16:02 - INFO - codeparrot_training - Step 10582: {'lr': 0.0004615889711128492, 'samples': 5418496, 'steps': 10582, 'loss/train': 2.2855613231658936} +02/24/2022 15:16:06 - INFO - codeparrot_training - Step 10583: {'lr': 0.00046158025572135404, 'samples': 5419008, 'steps': 10583, 'loss/train': 3.1169204711914062} +02/24/2022 15:16:11 - INFO - codeparrot_training - Step 10584: {'lr': 0.00046157153942351625, 'samples': 5419520, 'steps': 10584, 'loss/train': 1.6316964626312256} +02/24/2022 15:16:15 - INFO - codeparrot_training - Step 10585: {'lr': 0.0004615628222193732, 'samples': 5420032, 'steps': 10585, 'loss/train': 4.91056489944458} +02/24/2022 15:16:20 - INFO - codeparrot_training - Step 10586: {'lr': 0.00046155410410896215, 'samples': 5420544, 'steps': 10586, 'loss/train': 2.4259636402130127} +02/24/2022 15:16:24 - INFO - codeparrot_training - Step 10587: {'lr': 0.00046154538509232044, 'samples': 5421056, 'steps': 10587, 'loss/train': 2.300079107284546} +02/24/2022 15:16:29 - INFO - codeparrot_training - Step 10588: {'lr': 0.00046153666516948554, 'samples': 5421568, 'steps': 10588, 'loss/train': 1.9895038604736328} +02/24/2022 15:16:33 - INFO - codeparrot_training - Step 10589: {'lr': 0.0004615279443404948, 'samples': 5422080, 'steps': 10589, 'loss/train': 1.9219489097595215} +02/24/2022 15:16:38 - INFO - codeparrot_training - Step 10590: {'lr': 0.0004615192226053855, 'samples': 5422592, 'steps': 10590, 'loss/train': 1.7846968173980713} +02/24/2022 15:16:42 - INFO - codeparrot_training - Step 10591: {'lr': 0.0004615104999641949, 'samples': 5423104, 'steps': 10591, 'loss/train': 2.1698429584503174} +02/24/2022 15:16:48 - INFO - codeparrot_training - Step 10592: {'lr': 0.0004615017764169606, 'samples': 5423616, 'steps': 10592, 'loss/train': 0.9332539439201355} +02/24/2022 15:16:52 - INFO - codeparrot_training - Step 10593: {'lr': 0.0004614930519637198, 'samples': 5424128, 'steps': 10593, 'loss/train': 2.5743725299835205} +02/24/2022 15:16:57 - INFO - codeparrot_training - Step 10594: {'lr': 0.0004614843266045099, 'samples': 5424640, 'steps': 10594, 'loss/train': 2.1349785327911377} +02/24/2022 15:17:01 - INFO - codeparrot_training - Step 10595: {'lr': 0.0004614756003393683, 'samples': 5425152, 'steps': 10595, 'loss/train': 1.0838006734848022} +02/24/2022 15:17:06 - INFO - codeparrot_training - Step 10596: {'lr': 0.00046146687316833235, 'samples': 5425664, 'steps': 10596, 'loss/train': 3.6415281295776367} +02/24/2022 15:17:10 - INFO - codeparrot_training - Step 10597: {'lr': 0.00046145814509143955, 'samples': 5426176, 'steps': 10597, 'loss/train': 1.4740712642669678} +02/24/2022 15:17:15 - INFO - codeparrot_training - Step 10598: {'lr': 0.0004614494161087271, 'samples': 5426688, 'steps': 10598, 'loss/train': 2.9835093021392822} +02/24/2022 15:17:19 - INFO - codeparrot_training - Step 10599: {'lr': 0.00046144068622023263, 'samples': 5427200, 'steps': 10599, 'loss/train': 1.950249195098877} +02/24/2022 15:17:24 - INFO - codeparrot_training - Step 10600: {'lr': 0.00046143195542599336, 'samples': 5427712, 'steps': 10600, 'loss/train': 1.9784170389175415} +02/24/2022 15:17:28 - INFO - codeparrot_training - Step 10601: {'lr': 0.00046142322372604667, 'samples': 5428224, 'steps': 10601, 'loss/train': 2.5392589569091797} +02/24/2022 15:17:33 - INFO - codeparrot_training - Step 10602: {'lr': 0.00046141449112043, 'samples': 5428736, 'steps': 10602, 'loss/train': 3.438002586364746} +02/24/2022 15:17:37 - INFO - codeparrot_training - Step 10603: {'lr': 0.0004614057576091809, 'samples': 5429248, 'steps': 10603, 'loss/train': 1.6824004650115967} +02/24/2022 15:17:42 - INFO - codeparrot_training - Step 10604: {'lr': 0.00046139702319233656, 'samples': 5429760, 'steps': 10604, 'loss/train': 3.164616823196411} +02/24/2022 15:17:46 - INFO - codeparrot_training - Step 10605: {'lr': 0.00046138828786993456, 'samples': 5430272, 'steps': 10605, 'loss/train': 2.3131275177001953} +02/24/2022 15:17:51 - INFO - codeparrot_training - Step 10606: {'lr': 0.0004613795516420122, 'samples': 5430784, 'steps': 10606, 'loss/train': 1.7368401288986206} +02/24/2022 15:17:55 - INFO - codeparrot_training - Step 10607: {'lr': 0.000461370814508607, 'samples': 5431296, 'steps': 10607, 'loss/train': 2.6561665534973145} +02/24/2022 15:18:01 - INFO - codeparrot_training - Step 10608: {'lr': 0.00046136207646975635, 'samples': 5431808, 'steps': 10608, 'loss/train': 2.068272352218628} +02/24/2022 15:18:05 - INFO - codeparrot_training - Step 10609: {'lr': 0.0004613533375254977, 'samples': 5432320, 'steps': 10609, 'loss/train': 2.232407331466675} +02/24/2022 15:18:10 - INFO - codeparrot_training - Step 10610: {'lr': 0.00046134459767586847, 'samples': 5432832, 'steps': 10610, 'loss/train': 2.3803439140319824} +02/24/2022 15:18:14 - INFO - codeparrot_training - Step 10611: {'lr': 0.00046133585692090603, 'samples': 5433344, 'steps': 10611, 'loss/train': 1.705320119857788} +02/24/2022 15:18:19 - INFO - codeparrot_training - Step 10612: {'lr': 0.0004613271152606479, 'samples': 5433856, 'steps': 10612, 'loss/train': 1.8641812801361084} +02/24/2022 15:18:23 - INFO - codeparrot_training - Step 10613: {'lr': 0.00046131837269513154, 'samples': 5434368, 'steps': 10613, 'loss/train': 1.3345494270324707} +02/24/2022 15:18:28 - INFO - codeparrot_training - Step 10614: {'lr': 0.00046130962922439435, 'samples': 5434880, 'steps': 10614, 'loss/train': 3.0980565547943115} +02/24/2022 15:18:32 - INFO - codeparrot_training - Step 10615: {'lr': 0.00046130088484847383, 'samples': 5435392, 'steps': 10615, 'loss/train': 1.9705994129180908} +02/24/2022 15:18:37 - INFO - codeparrot_training - Step 10616: {'lr': 0.0004612921395674074, 'samples': 5435904, 'steps': 10616, 'loss/train': 2.215280055999756} +02/24/2022 15:18:41 - INFO - codeparrot_training - Step 10617: {'lr': 0.00046128339338123253, 'samples': 5436416, 'steps': 10617, 'loss/train': 2.13520884513855} +02/24/2022 15:18:47 - INFO - codeparrot_training - Step 10618: {'lr': 0.0004612746462899867, 'samples': 5436928, 'steps': 10618, 'loss/train': 2.596813440322876} +02/24/2022 15:18:50 - INFO - codeparrot_training - Step 10619: {'lr': 0.00046126589829370736, 'samples': 5437440, 'steps': 10619, 'loss/train': 2.2310431003570557} +02/24/2022 15:18:56 - INFO - codeparrot_training - Step 10620: {'lr': 0.00046125714939243204, 'samples': 5437952, 'steps': 10620, 'loss/train': 2.1403725147247314} +02/24/2022 15:18:59 - INFO - codeparrot_training - Step 10621: {'lr': 0.00046124839958619815, 'samples': 5438464, 'steps': 10621, 'loss/train': 2.138474464416504} +02/24/2022 15:19:05 - INFO - codeparrot_training - Step 10622: {'lr': 0.0004612396488750432, 'samples': 5438976, 'steps': 10622, 'loss/train': 2.5780928134918213} +02/24/2022 15:19:08 - INFO - codeparrot_training - Step 10623: {'lr': 0.00046123089725900464, 'samples': 5439488, 'steps': 10623, 'loss/train': 2.0884387493133545} +02/24/2022 15:19:14 - INFO - codeparrot_training - Step 10624: {'lr': 0.00046122214473812005, 'samples': 5440000, 'steps': 10624, 'loss/train': 1.0501275062561035} +02/24/2022 15:19:17 - INFO - codeparrot_training - Step 10625: {'lr': 0.0004612133913124268, 'samples': 5440512, 'steps': 10625, 'loss/train': 2.3679120540618896} +02/24/2022 15:19:23 - INFO - codeparrot_training - Step 10626: {'lr': 0.00046120463698196245, 'samples': 5441024, 'steps': 10626, 'loss/train': 2.4974405765533447} +02/24/2022 15:19:26 - INFO - codeparrot_training - Step 10627: {'lr': 0.00046119588174676454, 'samples': 5441536, 'steps': 10627, 'loss/train': 2.1923253536224365} +02/24/2022 15:19:32 - INFO - codeparrot_training - Step 10628: {'lr': 0.0004611871256068705, 'samples': 5442048, 'steps': 10628, 'loss/train': 2.741480827331543} +02/24/2022 15:19:36 - INFO - codeparrot_training - Step 10629: {'lr': 0.0004611783685623179, 'samples': 5442560, 'steps': 10629, 'loss/train': 3.049236536026001} +02/24/2022 15:19:41 - INFO - codeparrot_training - Step 10630: {'lr': 0.00046116961061314424, 'samples': 5443072, 'steps': 10630, 'loss/train': 2.523798704147339} +02/24/2022 15:19:45 - INFO - codeparrot_training - Step 10631: {'lr': 0.00046116085175938694, 'samples': 5443584, 'steps': 10631, 'loss/train': 4.917596340179443} +02/24/2022 15:19:50 - INFO - codeparrot_training - Step 10632: {'lr': 0.00046115209200108366, 'samples': 5444096, 'steps': 10632, 'loss/train': 1.781620740890503} +02/24/2022 15:19:54 - INFO - codeparrot_training - Step 10633: {'lr': 0.00046114333133827194, 'samples': 5444608, 'steps': 10633, 'loss/train': 2.1847450733184814} +02/24/2022 15:19:59 - INFO - codeparrot_training - Step 10634: {'lr': 0.0004611345697709891, 'samples': 5445120, 'steps': 10634, 'loss/train': 2.132807731628418} +02/24/2022 15:20:03 - INFO - codeparrot_training - Step 10635: {'lr': 0.0004611258072992729, 'samples': 5445632, 'steps': 10635, 'loss/train': 1.9459401369094849} +02/24/2022 15:20:08 - INFO - codeparrot_training - Step 10636: {'lr': 0.0004611170439231607, 'samples': 5446144, 'steps': 10636, 'loss/train': 2.5081591606140137} +02/24/2022 15:20:12 - INFO - codeparrot_training - Step 10637: {'lr': 0.0004611082796426902, 'samples': 5446656, 'steps': 10637, 'loss/train': 2.3935770988464355} +02/24/2022 15:20:18 - INFO - codeparrot_training - Step 10638: {'lr': 0.00046109951445789883, 'samples': 5447168, 'steps': 10638, 'loss/train': 1.6926934719085693} +02/24/2022 15:20:22 - INFO - codeparrot_training - Step 10639: {'lr': 0.00046109074836882415, 'samples': 5447680, 'steps': 10639, 'loss/train': 1.7920057773590088} +02/24/2022 15:20:27 - INFO - codeparrot_training - Step 10640: {'lr': 0.00046108198137550377, 'samples': 5448192, 'steps': 10640, 'loss/train': 1.946357011795044} +02/24/2022 15:20:33 - INFO - codeparrot_training - Step 10641: {'lr': 0.0004610732134779752, 'samples': 5448704, 'steps': 10641, 'loss/train': 2.303535223007202} +02/24/2022 15:20:36 - INFO - codeparrot_training - Step 10642: {'lr': 0.000461064444676276, 'samples': 5449216, 'steps': 10642, 'loss/train': 2.0571703910827637} +02/24/2022 15:20:40 - INFO - codeparrot_training - Step 10643: {'lr': 0.0004610556749704438, 'samples': 5449728, 'steps': 10643, 'loss/train': 1.1858084201812744} +02/24/2022 15:20:45 - INFO - codeparrot_training - Step 10644: {'lr': 0.000461046904360516, 'samples': 5450240, 'steps': 10644, 'loss/train': 3.731553316116333} +02/24/2022 15:20:51 - INFO - codeparrot_training - Step 10645: {'lr': 0.0004610381328465303, 'samples': 5450752, 'steps': 10645, 'loss/train': 2.4287173748016357} +02/24/2022 15:20:54 - INFO - codeparrot_training - Step 10646: {'lr': 0.0004610293604285243, 'samples': 5451264, 'steps': 10646, 'loss/train': 1.9319217205047607} +02/24/2022 15:21:00 - INFO - codeparrot_training - Step 10647: {'lr': 0.0004610205871065355, 'samples': 5451776, 'steps': 10647, 'loss/train': 0.9761449098587036} +02/24/2022 15:21:03 - INFO - codeparrot_training - Step 10648: {'lr': 0.0004610118128806016, 'samples': 5452288, 'steps': 10648, 'loss/train': 2.363436698913574} +02/24/2022 15:21:09 - INFO - codeparrot_training - Step 10649: {'lr': 0.0004610030377507599, 'samples': 5452800, 'steps': 10649, 'loss/train': 2.598527669906616} +02/24/2022 15:21:12 - INFO - codeparrot_training - Step 10650: {'lr': 0.0004609942617170483, 'samples': 5453312, 'steps': 10650, 'loss/train': 1.288580060005188} +02/24/2022 15:21:18 - INFO - codeparrot_training - Step 10651: {'lr': 0.0004609854847795043, 'samples': 5453824, 'steps': 10651, 'loss/train': 1.7000689506530762} +02/24/2022 15:21:21 - INFO - codeparrot_training - Step 10652: {'lr': 0.0004609767069381655, 'samples': 5454336, 'steps': 10652, 'loss/train': 2.576692819595337} +02/24/2022 15:21:27 - INFO - codeparrot_training - Step 10653: {'lr': 0.00046096792819306945, 'samples': 5454848, 'steps': 10653, 'loss/train': 1.9039483070373535} +02/24/2022 15:21:31 - INFO - codeparrot_training - Step 10654: {'lr': 0.00046095914854425376, 'samples': 5455360, 'steps': 10654, 'loss/train': 2.774906635284424} +02/24/2022 15:21:36 - INFO - codeparrot_training - Step 10655: {'lr': 0.00046095036799175606, 'samples': 5455872, 'steps': 10655, 'loss/train': 1.4152904748916626} +02/24/2022 15:21:40 - INFO - codeparrot_training - Step 10656: {'lr': 0.000460941586535614, 'samples': 5456384, 'steps': 10656, 'loss/train': 3.429565668106079} +02/24/2022 15:21:45 - INFO - codeparrot_training - Step 10657: {'lr': 0.00046093280417586517, 'samples': 5456896, 'steps': 10657, 'loss/train': 0.16530273854732513} +02/24/2022 15:21:49 - INFO - codeparrot_training - Step 10658: {'lr': 0.0004609240209125472, 'samples': 5457408, 'steps': 10658, 'loss/train': 1.9924647808074951} +02/24/2022 15:21:54 - INFO - codeparrot_training - Step 10659: {'lr': 0.00046091523674569765, 'samples': 5457920, 'steps': 10659, 'loss/train': 1.7584319114685059} +02/24/2022 15:21:58 - INFO - codeparrot_training - Step 10660: {'lr': 0.00046090645167535415, 'samples': 5458432, 'steps': 10660, 'loss/train': 2.420118570327759} +02/24/2022 15:22:03 - INFO - codeparrot_training - Step 10661: {'lr': 0.00046089766570155447, 'samples': 5458944, 'steps': 10661, 'loss/train': 0.7556182146072388} +02/24/2022 15:22:07 - INFO - codeparrot_training - Step 10662: {'lr': 0.0004608888788243362, 'samples': 5459456, 'steps': 10662, 'loss/train': 2.5232532024383545} +02/24/2022 15:22:13 - INFO - codeparrot_training - Step 10663: {'lr': 0.00046088009104373683, 'samples': 5459968, 'steps': 10663, 'loss/train': 1.2358235120773315} +02/24/2022 15:22:17 - INFO - codeparrot_training - Step 10664: {'lr': 0.0004608713023597941, 'samples': 5460480, 'steps': 10664, 'loss/train': 2.8571650981903076} +02/24/2022 15:22:22 - INFO - codeparrot_training - Step 10665: {'lr': 0.0004608625127725458, 'samples': 5460992, 'steps': 10665, 'loss/train': 2.98763108253479} +02/24/2022 15:22:26 - INFO - codeparrot_training - Step 10666: {'lr': 0.0004608537222820294, 'samples': 5461504, 'steps': 10666, 'loss/train': 2.2872185707092285} +02/24/2022 15:22:31 - INFO - codeparrot_training - Step 10667: {'lr': 0.0004608449308882826, 'samples': 5462016, 'steps': 10667, 'loss/train': 2.736595869064331} +02/24/2022 15:22:35 - INFO - codeparrot_training - Step 10668: {'lr': 0.000460836138591343, 'samples': 5462528, 'steps': 10668, 'loss/train': 1.7485947608947754} +02/24/2022 15:22:40 - INFO - codeparrot_training - Step 10669: {'lr': 0.0004608273453912484, 'samples': 5463040, 'steps': 10669, 'loss/train': 1.9313406944274902} +02/24/2022 15:22:44 - INFO - codeparrot_training - Step 10670: {'lr': 0.0004608185512880364, 'samples': 5463552, 'steps': 10670, 'loss/train': 3.0228350162506104} +02/24/2022 15:22:49 - INFO - codeparrot_training - Step 10671: {'lr': 0.0004608097562817446, 'samples': 5464064, 'steps': 10671, 'loss/train': 3.2250454425811768} +02/24/2022 15:22:52 - INFO - codeparrot_training - Step 10672: {'lr': 0.0004608009603724108, 'samples': 5464576, 'steps': 10672, 'loss/train': 2.2080538272857666} +02/24/2022 15:22:58 - INFO - codeparrot_training - Step 10673: {'lr': 0.0004607921635600726, 'samples': 5465088, 'steps': 10673, 'loss/train': 1.8597644567489624} +02/24/2022 15:23:01 - INFO - codeparrot_training - Step 10674: {'lr': 0.00046078336584476777, 'samples': 5465600, 'steps': 10674, 'loss/train': 1.579274296760559} +02/24/2022 15:23:08 - INFO - codeparrot_training - Step 10675: {'lr': 0.00046077456722653387, 'samples': 5466112, 'steps': 10675, 'loss/train': 1.0815410614013672} +02/24/2022 15:23:11 - INFO - codeparrot_training - Step 10676: {'lr': 0.00046076576770540865, 'samples': 5466624, 'steps': 10676, 'loss/train': 2.622030019760132} +02/24/2022 15:23:17 - INFO - codeparrot_training - Step 10677: {'lr': 0.00046075696728142986, 'samples': 5467136, 'steps': 10677, 'loss/train': 1.6888563632965088} +02/24/2022 15:23:20 - INFO - codeparrot_training - Step 10678: {'lr': 0.0004607481659546351, 'samples': 5467648, 'steps': 10678, 'loss/train': 1.7208749055862427} +02/24/2022 15:23:26 - INFO - codeparrot_training - Step 10679: {'lr': 0.0004607393637250621, 'samples': 5468160, 'steps': 10679, 'loss/train': 3.6741042137145996} +02/24/2022 15:23:29 - INFO - codeparrot_training - Step 10680: {'lr': 0.00046073056059274867, 'samples': 5468672, 'steps': 10680, 'loss/train': 1.9011914730072021} +02/24/2022 15:23:35 - INFO - codeparrot_training - Step 10681: {'lr': 0.0004607217565577323, 'samples': 5469184, 'steps': 10681, 'loss/train': 2.0433027744293213} +02/24/2022 15:23:38 - INFO - codeparrot_training - Step 10682: {'lr': 0.0004607129516200509, 'samples': 5469696, 'steps': 10682, 'loss/train': 3.231966495513916} +02/24/2022 15:23:44 - INFO - codeparrot_training - Step 10683: {'lr': 0.00046070414577974216, 'samples': 5470208, 'steps': 10683, 'loss/train': 1.6576157808303833} +02/24/2022 15:23:47 - INFO - codeparrot_training - Step 10684: {'lr': 0.00046069533903684374, 'samples': 5470720, 'steps': 10684, 'loss/train': 1.943198800086975} +02/24/2022 15:23:53 - INFO - codeparrot_training - Step 10685: {'lr': 0.00046068653139139337, 'samples': 5471232, 'steps': 10685, 'loss/train': 2.388160467147827} +02/24/2022 15:23:57 - INFO - codeparrot_training - Step 10686: {'lr': 0.0004606777228434288, 'samples': 5471744, 'steps': 10686, 'loss/train': 2.79186749458313} +02/24/2022 15:24:02 - INFO - codeparrot_training - Step 10687: {'lr': 0.00046066891339298783, 'samples': 5472256, 'steps': 10687, 'loss/train': 2.1405656337738037} +02/24/2022 15:24:06 - INFO - codeparrot_training - Step 10688: {'lr': 0.0004606601030401081, 'samples': 5472768, 'steps': 10688, 'loss/train': 2.591496706008911} +02/24/2022 15:24:12 - INFO - codeparrot_training - Step 10689: {'lr': 0.00046065129178482733, 'samples': 5473280, 'steps': 10689, 'loss/train': 1.8618096113204956} +02/24/2022 15:24:15 - INFO - codeparrot_training - Step 10690: {'lr': 0.0004606424796271834, 'samples': 5473792, 'steps': 10690, 'loss/train': 3.25329852104187} +02/24/2022 15:24:21 - INFO - codeparrot_training - Step 10691: {'lr': 0.0004606336665672139, 'samples': 5474304, 'steps': 10691, 'loss/train': 1.3476084470748901} +02/24/2022 15:24:24 - INFO - codeparrot_training - Step 10692: {'lr': 0.00046062485260495666, 'samples': 5474816, 'steps': 10692, 'loss/train': 2.275230646133423} +02/24/2022 15:24:30 - INFO - codeparrot_training - Step 10693: {'lr': 0.00046061603774044945, 'samples': 5475328, 'steps': 10693, 'loss/train': 1.5449998378753662} +02/24/2022 15:24:33 - INFO - codeparrot_training - Step 10694: {'lr': 0.00046060722197373, 'samples': 5475840, 'steps': 10694, 'loss/train': 1.3837188482284546} +02/24/2022 15:24:39 - INFO - codeparrot_training - Step 10695: {'lr': 0.0004605984053048361, 'samples': 5476352, 'steps': 10695, 'loss/train': 0.9562422037124634} +02/24/2022 15:24:42 - INFO - codeparrot_training - Step 10696: {'lr': 0.0004605895877338055, 'samples': 5476864, 'steps': 10696, 'loss/train': 3.118607521057129} +02/24/2022 15:24:48 - INFO - codeparrot_training - Step 10697: {'lr': 0.000460580769260676, 'samples': 5477376, 'steps': 10697, 'loss/train': 2.567796230316162} +02/24/2022 15:24:51 - INFO - codeparrot_training - Step 10698: {'lr': 0.0004605719498854853, 'samples': 5477888, 'steps': 10698, 'loss/train': 1.7771592140197754} +02/24/2022 15:24:57 - INFO - codeparrot_training - Step 10699: {'lr': 0.0004605631296082713, 'samples': 5478400, 'steps': 10699, 'loss/train': 2.086648941040039} +02/24/2022 15:25:00 - INFO - codeparrot_training - Step 10700: {'lr': 0.0004605543084290716, 'samples': 5478912, 'steps': 10700, 'loss/train': 2.4179673194885254} +02/24/2022 15:25:06 - INFO - codeparrot_training - Step 10701: {'lr': 0.00046054548634792426, 'samples': 5479424, 'steps': 10701, 'loss/train': 3.4130523204803467} +02/24/2022 15:25:10 - INFO - codeparrot_training - Step 10702: {'lr': 0.0004605366633648668, 'samples': 5479936, 'steps': 10702, 'loss/train': 2.9978597164154053} +02/24/2022 15:25:15 - INFO - codeparrot_training - Step 10703: {'lr': 0.00046052783947993713, 'samples': 5480448, 'steps': 10703, 'loss/train': 1.5903500318527222} +02/24/2022 15:25:19 - INFO - codeparrot_training - Step 10704: {'lr': 0.0004605190146931731, 'samples': 5480960, 'steps': 10704, 'loss/train': 1.268998384475708} +02/24/2022 15:25:24 - INFO - codeparrot_training - Step 10705: {'lr': 0.0004605101890046124, 'samples': 5481472, 'steps': 10705, 'loss/train': 2.714695930480957} +02/24/2022 15:25:28 - INFO - codeparrot_training - Step 10706: {'lr': 0.00046050136241429295, 'samples': 5481984, 'steps': 10706, 'loss/train': 2.2369601726531982} +02/24/2022 15:25:33 - INFO - codeparrot_training - Step 10707: {'lr': 0.0004604925349222525, 'samples': 5482496, 'steps': 10707, 'loss/train': 2.965559959411621} +02/24/2022 15:25:37 - INFO - codeparrot_training - Step 10708: {'lr': 0.00046048370652852885, 'samples': 5483008, 'steps': 10708, 'loss/train': 0.43953466415405273} +02/24/2022 15:25:42 - INFO - codeparrot_training - Step 10709: {'lr': 0.00046047487723315986, 'samples': 5483520, 'steps': 10709, 'loss/train': 0.8210500478744507} +02/24/2022 15:25:46 - INFO - codeparrot_training - Step 10710: {'lr': 0.0004604660470361832, 'samples': 5484032, 'steps': 10710, 'loss/train': 2.3636443614959717} +02/24/2022 15:25:52 - INFO - codeparrot_training - Step 10711: {'lr': 0.000460457215937637, 'samples': 5484544, 'steps': 10711, 'loss/train': 1.7563261985778809} +02/24/2022 15:25:56 - INFO - codeparrot_training - Step 10712: {'lr': 0.00046044838393755885, 'samples': 5485056, 'steps': 10712, 'loss/train': 1.3773894309997559} +02/24/2022 15:26:01 - INFO - codeparrot_training - Step 10713: {'lr': 0.0004604395510359867, 'samples': 5485568, 'steps': 10713, 'loss/train': 1.2420666217803955} +02/24/2022 15:26:05 - INFO - codeparrot_training - Step 10714: {'lr': 0.0004604307172329582, 'samples': 5486080, 'steps': 10714, 'loss/train': 1.2597712278366089} +02/24/2022 15:26:10 - INFO - codeparrot_training - Step 10715: {'lr': 0.0004604218825285114, 'samples': 5486592, 'steps': 10715, 'loss/train': 2.457947015762329} +02/24/2022 15:26:14 - INFO - codeparrot_training - Step 10716: {'lr': 0.00046041304692268407, 'samples': 5487104, 'steps': 10716, 'loss/train': 1.957581877708435} +02/24/2022 15:26:19 - INFO - codeparrot_training - Step 10717: {'lr': 0.00046040421041551404, 'samples': 5487616, 'steps': 10717, 'loss/train': 1.2665934562683105} +02/24/2022 15:26:23 - INFO - codeparrot_training - Step 10718: {'lr': 0.00046039537300703926, 'samples': 5488128, 'steps': 10718, 'loss/train': 2.0186431407928467} +02/24/2022 15:26:28 - INFO - codeparrot_training - Step 10719: {'lr': 0.00046038653469729747, 'samples': 5488640, 'steps': 10719, 'loss/train': 1.9662803411483765} +02/24/2022 15:26:32 - INFO - codeparrot_training - Step 10720: {'lr': 0.00046037769548632656, 'samples': 5489152, 'steps': 10720, 'loss/train': 0.5897735357284546} +02/24/2022 15:26:38 - INFO - codeparrot_training - Step 10721: {'lr': 0.0004603688553741644, 'samples': 5489664, 'steps': 10721, 'loss/train': 2.4701669216156006} +02/24/2022 15:26:42 - INFO - codeparrot_training - Step 10722: {'lr': 0.0004603600143608488, 'samples': 5490176, 'steps': 10722, 'loss/train': 2.212117910385132} +02/24/2022 15:26:47 - INFO - codeparrot_training - Step 10723: {'lr': 0.00046035117244641783, 'samples': 5490688, 'steps': 10723, 'loss/train': 2.6752448081970215} +02/24/2022 15:26:51 - INFO - codeparrot_training - Step 10724: {'lr': 0.0004603423296309092, 'samples': 5491200, 'steps': 10724, 'loss/train': 1.9107458591461182} +02/24/2022 15:26:56 - INFO - codeparrot_training - Step 10725: {'lr': 0.0004603334859143608, 'samples': 5491712, 'steps': 10725, 'loss/train': 1.4822959899902344} +02/24/2022 15:27:00 - INFO - codeparrot_training - Step 10726: {'lr': 0.0004603246412968105, 'samples': 5492224, 'steps': 10726, 'loss/train': 2.253957748413086} +02/24/2022 15:27:05 - INFO - codeparrot_training - Step 10727: {'lr': 0.00046031579577829616, 'samples': 5492736, 'steps': 10727, 'loss/train': 1.159075379371643} +02/24/2022 15:27:09 - INFO - codeparrot_training - Step 10728: {'lr': 0.00046030694935885586, 'samples': 5493248, 'steps': 10728, 'loss/train': 1.7162890434265137} +02/24/2022 15:27:14 - INFO - codeparrot_training - Step 10729: {'lr': 0.00046029810203852736, 'samples': 5493760, 'steps': 10729, 'loss/train': 1.3259321451187134} +02/24/2022 15:27:18 - INFO - codeparrot_training - Step 10730: {'lr': 0.00046028925381734855, 'samples': 5494272, 'steps': 10730, 'loss/train': 2.19993257522583} +02/24/2022 15:27:24 - INFO - codeparrot_training - Step 10731: {'lr': 0.00046028040469535734, 'samples': 5494784, 'steps': 10731, 'loss/train': 2.3265724182128906} +02/24/2022 15:27:27 - INFO - codeparrot_training - Step 10732: {'lr': 0.00046027155467259166, 'samples': 5495296, 'steps': 10732, 'loss/train': 1.720679759979248} +02/24/2022 15:27:33 - INFO - codeparrot_training - Step 10733: {'lr': 0.00046026270374908935, 'samples': 5495808, 'steps': 10733, 'loss/train': 2.1227848529815674} +02/24/2022 15:27:36 - INFO - codeparrot_training - Step 10734: {'lr': 0.0004602538519248884, 'samples': 5496320, 'steps': 10734, 'loss/train': 2.029430389404297} +02/24/2022 15:27:42 - INFO - codeparrot_training - Step 10735: {'lr': 0.00046024499920002676, 'samples': 5496832, 'steps': 10735, 'loss/train': 2.8200113773345947} +02/24/2022 15:27:45 - INFO - codeparrot_training - Step 10736: {'lr': 0.0004602361455745423, 'samples': 5497344, 'steps': 10736, 'loss/train': 2.5483901500701904} +02/24/2022 15:27:52 - INFO - codeparrot_training - Step 10737: {'lr': 0.00046022729104847293, 'samples': 5497856, 'steps': 10737, 'loss/train': 2.080850124359131} +02/24/2022 15:27:56 - INFO - codeparrot_training - Step 10738: {'lr': 0.0004602184356218566, 'samples': 5498368, 'steps': 10738, 'loss/train': 4.888075351715088} +02/24/2022 15:28:01 - INFO - codeparrot_training - Step 10739: {'lr': 0.0004602095792947312, 'samples': 5498880, 'steps': 10739, 'loss/train': 2.311472177505493} +02/24/2022 15:28:05 - INFO - codeparrot_training - Step 10740: {'lr': 0.00046020072206713484, 'samples': 5499392, 'steps': 10740, 'loss/train': 2.3736765384674072} +02/24/2022 15:28:10 - INFO - codeparrot_training - Step 10741: {'lr': 0.0004601918639391052, 'samples': 5499904, 'steps': 10741, 'loss/train': 1.785421371459961} +02/24/2022 15:28:14 - INFO - codeparrot_training - Step 10742: {'lr': 0.0004601830049106804, 'samples': 5500416, 'steps': 10742, 'loss/train': 2.111750841140747} +02/24/2022 15:28:19 - INFO - codeparrot_training - Step 10743: {'lr': 0.0004601741449818984, 'samples': 5500928, 'steps': 10743, 'loss/train': 2.075619697570801} +02/24/2022 15:28:23 - INFO - codeparrot_training - Step 10744: {'lr': 0.000460165284152797, 'samples': 5501440, 'steps': 10744, 'loss/train': 2.629167318344116} +02/24/2022 15:28:28 - INFO - codeparrot_training - Step 10745: {'lr': 0.0004601564224234143, 'samples': 5501952, 'steps': 10745, 'loss/train': 2.1144609451293945} +02/24/2022 15:28:32 - INFO - codeparrot_training - Step 10746: {'lr': 0.00046014755979378825, 'samples': 5502464, 'steps': 10746, 'loss/train': 2.298802614212036} +02/24/2022 15:28:38 - INFO - codeparrot_training - Step 10747: {'lr': 0.0004601386962639568, 'samples': 5502976, 'steps': 10747, 'loss/train': 1.6870520114898682} +02/24/2022 15:28:42 - INFO - codeparrot_training - Step 10748: {'lr': 0.0004601298318339578, 'samples': 5503488, 'steps': 10748, 'loss/train': 0.5674211382865906} +02/24/2022 15:28:47 - INFO - codeparrot_training - Step 10749: {'lr': 0.0004601209665038294, 'samples': 5504000, 'steps': 10749, 'loss/train': 1.2293298244476318} +02/24/2022 15:28:51 - INFO - codeparrot_training - Step 10750: {'lr': 0.0004601121002736095, 'samples': 5504512, 'steps': 10750, 'loss/train': 3.513444662094116} +02/24/2022 15:28:56 - INFO - codeparrot_training - Step 10751: {'lr': 0.0004601032331433361, 'samples': 5505024, 'steps': 10751, 'loss/train': 2.7403571605682373} +02/24/2022 15:29:00 - INFO - codeparrot_training - Step 10752: {'lr': 0.00046009436511304714, 'samples': 5505536, 'steps': 10752, 'loss/train': 2.4913318157196045} +02/24/2022 15:29:05 - INFO - codeparrot_training - Step 10753: {'lr': 0.0004600854961827806, 'samples': 5506048, 'steps': 10753, 'loss/train': 2.5256471633911133} +02/24/2022 15:29:09 - INFO - codeparrot_training - Step 10754: {'lr': 0.00046007662635257453, 'samples': 5506560, 'steps': 10754, 'loss/train': 2.621028423309326} +02/24/2022 15:29:14 - INFO - codeparrot_training - Step 10755: {'lr': 0.0004600677556224669, 'samples': 5507072, 'steps': 10755, 'loss/train': 2.0305933952331543} +02/24/2022 15:29:21 - INFO - codeparrot_training - Step 10756: {'lr': 0.00046005888399249575, 'samples': 5507584, 'steps': 10756, 'loss/train': 2.801264762878418} +02/24/2022 15:29:24 - INFO - codeparrot_training - Step 10757: {'lr': 0.000460050011462699, 'samples': 5508096, 'steps': 10757, 'loss/train': 2.604865074157715} +02/24/2022 15:29:30 - INFO - codeparrot_training - Step 10758: {'lr': 0.0004600411380331146, 'samples': 5508608, 'steps': 10758, 'loss/train': 1.9681214094161987} +02/24/2022 15:29:33 - INFO - codeparrot_training - Step 10759: {'lr': 0.0004600322637037808, 'samples': 5509120, 'steps': 10759, 'loss/train': 2.2678351402282715} +02/24/2022 15:29:37 - INFO - codeparrot_training - Step 10760: {'lr': 0.00046002338847473545, 'samples': 5509632, 'steps': 10760, 'loss/train': 1.1324044466018677} +02/24/2022 15:29:43 - INFO - codeparrot_training - Step 10761: {'lr': 0.00046001451234601665, 'samples': 5510144, 'steps': 10761, 'loss/train': 5.01030158996582} +02/24/2022 15:29:46 - INFO - codeparrot_training - Step 10762: {'lr': 0.0004600056353176623, 'samples': 5510656, 'steps': 10762, 'loss/train': 3.141014337539673} +02/24/2022 15:29:52 - INFO - codeparrot_training - Step 10763: {'lr': 0.00045999675738971047, 'samples': 5511168, 'steps': 10763, 'loss/train': 1.300363302230835} +02/24/2022 15:29:55 - INFO - codeparrot_training - Step 10764: {'lr': 0.00045998787856219925, 'samples': 5511680, 'steps': 10764, 'loss/train': 2.2624716758728027} +02/24/2022 15:30:01 - INFO - codeparrot_training - Step 10765: {'lr': 0.0004599789988351666, 'samples': 5512192, 'steps': 10765, 'loss/train': 3.016848564147949} +02/24/2022 15:30:04 - INFO - codeparrot_training - Step 10766: {'lr': 0.0004599701182086506, 'samples': 5512704, 'steps': 10766, 'loss/train': 2.2997207641601562} +02/24/2022 15:30:10 - INFO - codeparrot_training - Step 10767: {'lr': 0.0004599612366826893, 'samples': 5513216, 'steps': 10767, 'loss/train': 1.757472276687622} +02/24/2022 15:30:16 - INFO - codeparrot_training - Step 10768: {'lr': 0.00045995235425732076, 'samples': 5513728, 'steps': 10768, 'loss/train': 1.4633433818817139} +02/24/2022 15:30:19 - INFO - codeparrot_training - Step 10769: {'lr': 0.00045994347093258295, 'samples': 5514240, 'steps': 10769, 'loss/train': 2.670070171356201} +02/24/2022 15:30:25 - INFO - codeparrot_training - Step 10770: {'lr': 0.00045993458670851397, 'samples': 5514752, 'steps': 10770, 'loss/train': 2.204164743423462} +02/24/2022 15:30:28 - INFO - codeparrot_training - Step 10771: {'lr': 0.0004599257015851519, 'samples': 5515264, 'steps': 10771, 'loss/train': 2.4114902019500732} +02/24/2022 15:30:34 - INFO - codeparrot_training - Step 10772: {'lr': 0.0004599168155625348, 'samples': 5515776, 'steps': 10772, 'loss/train': 1.9435793161392212} +02/24/2022 15:30:37 - INFO - codeparrot_training - Step 10773: {'lr': 0.00045990792864070075, 'samples': 5516288, 'steps': 10773, 'loss/train': 3.077359199523926} +02/24/2022 15:30:43 - INFO - codeparrot_training - Step 10774: {'lr': 0.0004598990408196878, 'samples': 5516800, 'steps': 10774, 'loss/train': 3.066190481185913} +02/24/2022 15:30:46 - INFO - codeparrot_training - Step 10775: {'lr': 0.00045989015209953394, 'samples': 5517312, 'steps': 10775, 'loss/train': 0.8637327551841736} +02/24/2022 15:30:52 - INFO - codeparrot_training - Step 10776: {'lr': 0.00045988126248027735, 'samples': 5517824, 'steps': 10776, 'loss/train': 0.5748113393783569} +02/24/2022 15:30:55 - INFO - codeparrot_training - Step 10777: {'lr': 0.00045987237196195603, 'samples': 5518336, 'steps': 10777, 'loss/train': 2.196979522705078} +02/24/2022 15:31:01 - INFO - codeparrot_training - Step 10778: {'lr': 0.00045986348054460815, 'samples': 5518848, 'steps': 10778, 'loss/train': 2.402722120285034} +02/24/2022 15:31:04 - INFO - codeparrot_training - Step 10779: {'lr': 0.00045985458822827175, 'samples': 5519360, 'steps': 10779, 'loss/train': 1.4091347455978394} +02/24/2022 15:31:10 - INFO - codeparrot_training - Step 10780: {'lr': 0.0004598456950129849, 'samples': 5519872, 'steps': 10780, 'loss/train': 2.7950053215026855} +02/24/2022 15:31:13 - INFO - codeparrot_training - Step 10781: {'lr': 0.00045983680089878575, 'samples': 5520384, 'steps': 10781, 'loss/train': 2.7543787956237793} +02/24/2022 15:31:20 - INFO - codeparrot_training - Step 10782: {'lr': 0.0004598279058857124, 'samples': 5520896, 'steps': 10782, 'loss/train': 1.9277764558792114} +02/24/2022 15:31:23 - INFO - codeparrot_training - Step 10783: {'lr': 0.00045981900997380296, 'samples': 5521408, 'steps': 10783, 'loss/train': 2.1256773471832275} +02/24/2022 15:31:29 - INFO - codeparrot_training - Step 10784: {'lr': 0.0004598101131630954, 'samples': 5521920, 'steps': 10784, 'loss/train': 1.8175573348999023} +02/24/2022 15:31:32 - INFO - codeparrot_training - Step 10785: {'lr': 0.00045980121545362805, 'samples': 5522432, 'steps': 10785, 'loss/train': 2.7589738368988037} +02/24/2022 15:31:38 - INFO - codeparrot_training - Step 10786: {'lr': 0.0004597923168454389, 'samples': 5522944, 'steps': 10786, 'loss/train': 2.586043119430542} +02/24/2022 15:31:41 - INFO - codeparrot_training - Step 10787: {'lr': 0.000459783417338566, 'samples': 5523456, 'steps': 10787, 'loss/train': 1.959873914718628} +02/24/2022 15:31:47 - INFO - codeparrot_training - Step 10788: {'lr': 0.0004597745169330476, 'samples': 5523968, 'steps': 10788, 'loss/train': 2.5134518146514893} +02/24/2022 15:31:50 - INFO - codeparrot_training - Step 10789: {'lr': 0.0004597656156289217, 'samples': 5524480, 'steps': 10789, 'loss/train': 2.8788723945617676} +02/24/2022 15:31:56 - INFO - codeparrot_training - Step 10790: {'lr': 0.0004597567134262266, 'samples': 5524992, 'steps': 10790, 'loss/train': 1.55586576461792} +02/24/2022 15:31:59 - INFO - codeparrot_training - Step 10791: {'lr': 0.00045974781032500034, 'samples': 5525504, 'steps': 10791, 'loss/train': 1.7677415609359741} +02/24/2022 15:32:06 - INFO - codeparrot_training - Step 10792: {'lr': 0.00045973890632528106, 'samples': 5526016, 'steps': 10792, 'loss/train': 1.8218841552734375} +02/24/2022 15:32:09 - INFO - codeparrot_training - Step 10793: {'lr': 0.00045973000142710696, 'samples': 5526528, 'steps': 10793, 'loss/train': 2.461500644683838} +02/24/2022 15:32:13 - INFO - codeparrot_training - Step 10794: {'lr': 0.000459721095630516, 'samples': 5527040, 'steps': 10794, 'loss/train': 3.791186809539795} +02/24/2022 15:32:18 - INFO - codeparrot_training - Step 10795: {'lr': 0.00045971218893554655, 'samples': 5527552, 'steps': 10795, 'loss/train': 1.9880272150039673} +02/24/2022 15:32:22 - INFO - codeparrot_training - Step 10796: {'lr': 0.0004597032813422367, 'samples': 5528064, 'steps': 10796, 'loss/train': 2.9160258769989014} +02/24/2022 15:32:27 - INFO - codeparrot_training - Step 10797: {'lr': 0.00045969437285062453, 'samples': 5528576, 'steps': 10797, 'loss/train': 2.4157333374023438} +02/24/2022 15:32:31 - INFO - codeparrot_training - Step 10798: {'lr': 0.00045968546346074823, 'samples': 5529088, 'steps': 10798, 'loss/train': 3.254408359527588} +02/24/2022 15:32:37 - INFO - codeparrot_training - Step 10799: {'lr': 0.000459676553172646, 'samples': 5529600, 'steps': 10799, 'loss/train': 2.1163313388824463} +02/24/2022 15:32:40 - INFO - codeparrot_training - Step 10800: {'lr': 0.00045966764198635603, 'samples': 5530112, 'steps': 10800, 'loss/train': 2.4462223052978516} +02/24/2022 15:32:46 - INFO - codeparrot_training - Step 10801: {'lr': 0.0004596587299019164, 'samples': 5530624, 'steps': 10801, 'loss/train': 2.0351014137268066} +02/24/2022 15:32:49 - INFO - codeparrot_training - Step 10802: {'lr': 0.0004596498169193654, 'samples': 5531136, 'steps': 10802, 'loss/train': 1.1944851875305176} +02/24/2022 15:32:55 - INFO - codeparrot_training - Step 10803: {'lr': 0.00045964090303874115, 'samples': 5531648, 'steps': 10803, 'loss/train': 2.605156421661377} +02/24/2022 15:32:59 - INFO - codeparrot_training - Step 10804: {'lr': 0.0004596319882600818, 'samples': 5532160, 'steps': 10804, 'loss/train': 2.180009603500366} +02/24/2022 15:33:04 - INFO - codeparrot_training - Step 10805: {'lr': 0.00045962307258342564, 'samples': 5532672, 'steps': 10805, 'loss/train': 2.020578622817993} +02/24/2022 15:33:08 - INFO - codeparrot_training - Step 10806: {'lr': 0.00045961415600881075, 'samples': 5533184, 'steps': 10806, 'loss/train': 2.613784074783325} +02/24/2022 15:33:13 - INFO - codeparrot_training - Step 10807: {'lr': 0.0004596052385362754, 'samples': 5533696, 'steps': 10807, 'loss/train': 2.621772050857544} +02/24/2022 15:33:17 - INFO - codeparrot_training - Step 10808: {'lr': 0.00045959632016585774, 'samples': 5534208, 'steps': 10808, 'loss/train': 2.413472890853882} +02/24/2022 15:33:22 - INFO - codeparrot_training - Step 10809: {'lr': 0.00045958740089759606, 'samples': 5534720, 'steps': 10809, 'loss/train': 1.3405696153640747} +02/24/2022 15:33:26 - INFO - codeparrot_training - Step 10810: {'lr': 0.0004595784807315284, 'samples': 5535232, 'steps': 10810, 'loss/train': 2.480562210083008} +02/24/2022 15:33:31 - INFO - codeparrot_training - Step 10811: {'lr': 0.0004595695596676932, 'samples': 5535744, 'steps': 10811, 'loss/train': 1.643795371055603} +02/24/2022 15:33:35 - INFO - codeparrot_training - Step 10812: {'lr': 0.00045956063770612843, 'samples': 5536256, 'steps': 10812, 'loss/train': 2.5044960975646973} +02/24/2022 15:33:41 - INFO - codeparrot_training - Step 10813: {'lr': 0.00045955171484687255, 'samples': 5536768, 'steps': 10813, 'loss/train': 3.2657511234283447} +02/24/2022 15:33:44 - INFO - codeparrot_training - Step 10814: {'lr': 0.0004595427910899636, 'samples': 5537280, 'steps': 10814, 'loss/train': 1.3925988674163818} +02/24/2022 15:33:50 - INFO - codeparrot_training - Step 10815: {'lr': 0.00045953386643543987, 'samples': 5537792, 'steps': 10815, 'loss/train': 1.8427921533584595} +02/24/2022 15:33:53 - INFO - codeparrot_training - Step 10816: {'lr': 0.0004595249408833396, 'samples': 5538304, 'steps': 10816, 'loss/train': 2.8641726970672607} +02/24/2022 15:33:59 - INFO - codeparrot_training - Step 10817: {'lr': 0.00045951601443370107, 'samples': 5538816, 'steps': 10817, 'loss/train': 2.1586177349090576} +02/24/2022 15:34:02 - INFO - codeparrot_training - Step 10818: {'lr': 0.00045950708708656236, 'samples': 5539328, 'steps': 10818, 'loss/train': 2.1059703826904297} +02/24/2022 15:34:08 - INFO - codeparrot_training - Step 10819: {'lr': 0.0004594981588419619, 'samples': 5539840, 'steps': 10819, 'loss/train': 2.009880781173706} +02/24/2022 15:34:11 - INFO - codeparrot_training - Step 10820: {'lr': 0.00045948922969993777, 'samples': 5540352, 'steps': 10820, 'loss/train': 3.571247100830078} +02/24/2022 15:34:17 - INFO - codeparrot_training - Step 10821: {'lr': 0.00045948029966052834, 'samples': 5540864, 'steps': 10821, 'loss/train': 0.4738519489765167} +02/24/2022 15:34:20 - INFO - codeparrot_training - Step 10822: {'lr': 0.0004594713687237718, 'samples': 5541376, 'steps': 10822, 'loss/train': 2.604982614517212} +02/24/2022 15:34:26 - INFO - codeparrot_training - Step 10823: {'lr': 0.00045946243688970643, 'samples': 5541888, 'steps': 10823, 'loss/train': 2.6622674465179443} +02/24/2022 15:34:29 - INFO - codeparrot_training - Step 10824: {'lr': 0.00045945350415837056, 'samples': 5542400, 'steps': 10824, 'loss/train': 2.3063488006591797} +02/24/2022 15:34:36 - INFO - codeparrot_training - Step 10825: {'lr': 0.00045944457052980237, 'samples': 5542912, 'steps': 10825, 'loss/train': 1.1940544843673706} +02/24/2022 15:34:39 - INFO - codeparrot_training - Step 10826: {'lr': 0.0004594356360040401, 'samples': 5543424, 'steps': 10826, 'loss/train': 1.5733952522277832} +02/24/2022 15:34:44 - INFO - codeparrot_training - Step 10827: {'lr': 0.0004594267005811221, 'samples': 5543936, 'steps': 10827, 'loss/train': 1.4370841979980469} +02/24/2022 15:34:48 - INFO - codeparrot_training - Step 10828: {'lr': 0.0004594177642610866, 'samples': 5544448, 'steps': 10828, 'loss/train': 2.525839328765869} +02/24/2022 15:34:54 - INFO - codeparrot_training - Step 10829: {'lr': 0.0004594088270439719, 'samples': 5544960, 'steps': 10829, 'loss/train': 1.8740156888961792} +02/24/2022 15:34:57 - INFO - codeparrot_training - Step 10830: {'lr': 0.00045939988892981624, 'samples': 5545472, 'steps': 10830, 'loss/train': 1.3015533685684204} +02/24/2022 15:35:03 - INFO - codeparrot_training - Step 10831: {'lr': 0.00045939094991865806, 'samples': 5545984, 'steps': 10831, 'loss/train': 2.91900634765625} +02/24/2022 15:35:06 - INFO - codeparrot_training - Step 10832: {'lr': 0.00045938201001053546, 'samples': 5546496, 'steps': 10832, 'loss/train': 2.7854232788085938} +02/24/2022 15:35:12 - INFO - codeparrot_training - Step 10833: {'lr': 0.00045937306920548684, 'samples': 5547008, 'steps': 10833, 'loss/train': 2.633469581604004} +02/24/2022 15:35:15 - INFO - codeparrot_training - Step 10834: {'lr': 0.0004593641275035504, 'samples': 5547520, 'steps': 10834, 'loss/train': 2.394116163253784} +02/24/2022 15:35:21 - INFO - codeparrot_training - Step 10835: {'lr': 0.00045935518490476456, 'samples': 5548032, 'steps': 10835, 'loss/train': 2.002525806427002} +02/24/2022 15:35:24 - INFO - codeparrot_training - Step 10836: {'lr': 0.00045934624140916763, 'samples': 5548544, 'steps': 10836, 'loss/train': 2.4536209106445312} +02/24/2022 15:35:30 - INFO - codeparrot_training - Step 10837: {'lr': 0.0004593372970167978, 'samples': 5549056, 'steps': 10837, 'loss/train': 1.2019845247268677} +02/24/2022 15:35:36 - INFO - codeparrot_training - Step 10838: {'lr': 0.0004593283517276936, 'samples': 5549568, 'steps': 10838, 'loss/train': 2.4683220386505127} +02/24/2022 15:35:39 - INFO - codeparrot_training - Step 10839: {'lr': 0.0004593194055418931, 'samples': 5550080, 'steps': 10839, 'loss/train': 1.516682744026184} +02/24/2022 15:35:45 - INFO - codeparrot_training - Step 10840: {'lr': 0.00045931045845943474, 'samples': 5550592, 'steps': 10840, 'loss/train': 2.4613759517669678} +02/24/2022 15:35:48 - INFO - codeparrot_training - Step 10841: {'lr': 0.00045930151048035684, 'samples': 5551104, 'steps': 10841, 'loss/train': 1.887755036354065} +02/24/2022 15:35:54 - INFO - codeparrot_training - Step 10842: {'lr': 0.0004592925616046978, 'samples': 5551616, 'steps': 10842, 'loss/train': 1.085592269897461} +02/24/2022 15:35:57 - INFO - codeparrot_training - Step 10843: {'lr': 0.0004592836118324958, 'samples': 5552128, 'steps': 10843, 'loss/train': 1.5014142990112305} +02/24/2022 15:36:01 - INFO - codeparrot_training - Step 10844: {'lr': 0.0004592746611637893, 'samples': 5552640, 'steps': 10844, 'loss/train': 2.393275022506714} +02/24/2022 15:36:07 - INFO - codeparrot_training - Step 10845: {'lr': 0.00045926570959861656, 'samples': 5553152, 'steps': 10845, 'loss/train': 1.8171045780181885} +02/24/2022 15:36:10 - INFO - codeparrot_training - Step 10846: {'lr': 0.000459256757137016, 'samples': 5553664, 'steps': 10846, 'loss/train': 1.567799687385559} +02/24/2022 15:36:16 - INFO - codeparrot_training - Step 10847: {'lr': 0.00045924780377902595, 'samples': 5554176, 'steps': 10847, 'loss/train': 1.839835286140442} +02/24/2022 15:36:19 - INFO - codeparrot_training - Step 10848: {'lr': 0.00045923884952468475, 'samples': 5554688, 'steps': 10848, 'loss/train': 3.0737197399139404} +02/24/2022 15:36:25 - INFO - codeparrot_training - Step 10849: {'lr': 0.00045922989437403074, 'samples': 5555200, 'steps': 10849, 'loss/train': 2.6786131858825684} +02/24/2022 15:36:29 - INFO - codeparrot_training - Step 10850: {'lr': 0.0004592209383271023, 'samples': 5555712, 'steps': 10850, 'loss/train': 2.7745351791381836} +02/24/2022 15:36:34 - INFO - codeparrot_training - Step 10851: {'lr': 0.0004592119813839378, 'samples': 5556224, 'steps': 10851, 'loss/train': 1.8272593021392822} +02/24/2022 15:36:38 - INFO - codeparrot_training - Step 10852: {'lr': 0.0004592030235445757, 'samples': 5556736, 'steps': 10852, 'loss/train': 2.176370859146118} +02/24/2022 15:36:43 - INFO - codeparrot_training - Step 10853: {'lr': 0.00045919406480905413, 'samples': 5557248, 'steps': 10853, 'loss/train': 9.989912033081055} +02/24/2022 15:36:47 - INFO - codeparrot_training - Step 10854: {'lr': 0.0004591851051774117, 'samples': 5557760, 'steps': 10854, 'loss/train': 2.11321759223938} +02/24/2022 15:36:52 - INFO - codeparrot_training - Step 10855: {'lr': 0.00045917614464968665, 'samples': 5558272, 'steps': 10855, 'loss/train': 2.581662893295288} +02/24/2022 15:36:56 - INFO - codeparrot_training - Step 10856: {'lr': 0.0004591671832259174, 'samples': 5558784, 'steps': 10856, 'loss/train': 2.2340810298919678} +02/24/2022 15:37:01 - INFO - codeparrot_training - Step 10857: {'lr': 0.00045915822090614243, 'samples': 5559296, 'steps': 10857, 'loss/train': 1.391809105873108} +02/24/2022 15:37:05 - INFO - codeparrot_training - Step 10858: {'lr': 0.00045914925769040006, 'samples': 5559808, 'steps': 10858, 'loss/train': 1.831588625907898} +02/24/2022 15:37:11 - INFO - codeparrot_training - Step 10859: {'lr': 0.0004591402935787287, 'samples': 5560320, 'steps': 10859, 'loss/train': 2.4328787326812744} +02/24/2022 15:37:15 - INFO - codeparrot_training - Step 10860: {'lr': 0.00045913132857116663, 'samples': 5560832, 'steps': 10860, 'loss/train': 1.9180657863616943} +02/24/2022 15:37:20 - INFO - codeparrot_training - Step 10861: {'lr': 0.00045912236266775245, 'samples': 5561344, 'steps': 10861, 'loss/train': 2.2141122817993164} +02/24/2022 15:37:24 - INFO - codeparrot_training - Step 10862: {'lr': 0.0004591133958685244, 'samples': 5561856, 'steps': 10862, 'loss/train': 2.738919734954834} +02/24/2022 15:37:29 - INFO - codeparrot_training - Step 10863: {'lr': 0.00045910442817352095, 'samples': 5562368, 'steps': 10863, 'loss/train': 2.1409969329833984} +02/24/2022 15:37:33 - INFO - codeparrot_training - Step 10864: {'lr': 0.0004590954595827806, 'samples': 5562880, 'steps': 10864, 'loss/train': 1.8137991428375244} +02/24/2022 15:37:38 - INFO - codeparrot_training - Step 10865: {'lr': 0.00045908649009634165, 'samples': 5563392, 'steps': 10865, 'loss/train': 1.4388374090194702} +02/24/2022 15:37:42 - INFO - codeparrot_training - Step 10866: {'lr': 0.0004590775197142426, 'samples': 5563904, 'steps': 10866, 'loss/train': 1.8825113773345947} +02/24/2022 15:37:47 - INFO - codeparrot_training - Step 10867: {'lr': 0.0004590685484365218, 'samples': 5564416, 'steps': 10867, 'loss/train': 2.259903907775879} +02/24/2022 15:37:51 - INFO - codeparrot_training - Step 10868: {'lr': 0.00045905957626321775, 'samples': 5564928, 'steps': 10868, 'loss/train': 1.8798149824142456} +02/24/2022 15:37:56 - INFO - codeparrot_training - Step 10869: {'lr': 0.0004590506031943689, 'samples': 5565440, 'steps': 10869, 'loss/train': 1.683966875076294} +02/24/2022 15:38:00 - INFO - codeparrot_training - Step 10870: {'lr': 0.00045904162923001356, 'samples': 5565952, 'steps': 10870, 'loss/train': 2.2319300174713135} +02/24/2022 15:38:06 - INFO - codeparrot_training - Step 10871: {'lr': 0.00045903265437019036, 'samples': 5566464, 'steps': 10871, 'loss/train': 1.6371709108352661} +02/24/2022 15:38:09 - INFO - codeparrot_training - Step 10872: {'lr': 0.00045902367861493754, 'samples': 5566976, 'steps': 10872, 'loss/train': 2.279087781906128} +02/24/2022 15:38:15 - INFO - codeparrot_training - Step 10873: {'lr': 0.00045901470196429376, 'samples': 5567488, 'steps': 10873, 'loss/train': 1.701271891593933} +02/24/2022 15:38:18 - INFO - codeparrot_training - Step 10874: {'lr': 0.0004590057244182972, 'samples': 5568000, 'steps': 10874, 'loss/train': 1.8044042587280273} +02/24/2022 15:38:24 - INFO - codeparrot_training - Step 10875: {'lr': 0.0004589967459769867, 'samples': 5568512, 'steps': 10875, 'loss/train': 1.7072850465774536} +02/24/2022 15:38:27 - INFO - codeparrot_training - Step 10876: {'lr': 0.00045898776664040036, 'samples': 5569024, 'steps': 10876, 'loss/train': 1.484846830368042} +02/24/2022 15:38:33 - INFO - codeparrot_training - Step 10877: {'lr': 0.00045897878640857684, 'samples': 5569536, 'steps': 10877, 'loss/train': 2.2672948837280273} +02/24/2022 15:38:36 - INFO - codeparrot_training - Step 10878: {'lr': 0.00045896980528155454, 'samples': 5570048, 'steps': 10878, 'loss/train': 1.7351562976837158} +02/24/2022 15:38:42 - INFO - codeparrot_training - Step 10879: {'lr': 0.0004589608232593719, 'samples': 5570560, 'steps': 10879, 'loss/train': 1.8515613079071045} +02/24/2022 15:38:45 - INFO - codeparrot_training - Step 10880: {'lr': 0.0004589518403420676, 'samples': 5571072, 'steps': 10880, 'loss/train': 2.7688052654266357} +02/24/2022 15:38:52 - INFO - codeparrot_training - Step 10881: {'lr': 0.0004589428565296798, 'samples': 5571584, 'steps': 10881, 'loss/train': 1.9160237312316895} +02/24/2022 15:38:55 - INFO - codeparrot_training - Step 10882: {'lr': 0.0004589338718222473, 'samples': 5572096, 'steps': 10882, 'loss/train': 2.819997549057007} +02/24/2022 15:39:01 - INFO - codeparrot_training - Step 10883: {'lr': 0.0004589248862198083, 'samples': 5572608, 'steps': 10883, 'loss/train': 1.2545146942138672} +02/24/2022 15:39:04 - INFO - codeparrot_training - Step 10884: {'lr': 0.0004589158997224015, 'samples': 5573120, 'steps': 10884, 'loss/train': 2.1836278438568115} +02/24/2022 15:39:10 - INFO - codeparrot_training - Step 10885: {'lr': 0.0004589069123300653, 'samples': 5573632, 'steps': 10885, 'loss/train': 2.161229372024536} +02/24/2022 15:39:13 - INFO - codeparrot_training - Step 10886: {'lr': 0.0004588979240428383, 'samples': 5574144, 'steps': 10886, 'loss/train': 1.6314976215362549} +02/24/2022 15:39:19 - INFO - codeparrot_training - Step 10887: {'lr': 0.00045888893486075875, 'samples': 5574656, 'steps': 10887, 'loss/train': 1.9694976806640625} +02/24/2022 15:39:22 - INFO - codeparrot_training - Step 10888: {'lr': 0.0004588799447838655, 'samples': 5575168, 'steps': 10888, 'loss/train': 2.6182644367218018} +02/24/2022 15:39:28 - INFO - codeparrot_training - Step 10889: {'lr': 0.0004588709538121968, 'samples': 5575680, 'steps': 10889, 'loss/train': 3.0257925987243652} +02/24/2022 15:39:31 - INFO - codeparrot_training - Step 10890: {'lr': 0.00045886196194579133, 'samples': 5576192, 'steps': 10890, 'loss/train': 2.9008426666259766} +02/24/2022 15:39:37 - INFO - codeparrot_training - Step 10891: {'lr': 0.00045885296918468746, 'samples': 5576704, 'steps': 10891, 'loss/train': 2.428581714630127} +02/24/2022 15:39:40 - INFO - codeparrot_training - Step 10892: {'lr': 0.0004588439755289238, 'samples': 5577216, 'steps': 10892, 'loss/train': 2.0430517196655273} +02/24/2022 15:39:46 - INFO - codeparrot_training - Step 10893: {'lr': 0.00045883498097853894, 'samples': 5577728, 'steps': 10893, 'loss/train': 1.7443327903747559} +02/24/2022 15:39:49 - INFO - codeparrot_training - Step 10894: {'lr': 0.00045882598553357125, 'samples': 5578240, 'steps': 10894, 'loss/train': 2.746281862258911} +02/24/2022 15:39:55 - INFO - codeparrot_training - Step 10895: {'lr': 0.00045881698919405937, 'samples': 5578752, 'steps': 10895, 'loss/train': 2.3394124507904053} +02/24/2022 15:39:58 - INFO - codeparrot_training - Step 10896: {'lr': 0.00045880799196004187, 'samples': 5579264, 'steps': 10896, 'loss/train': 1.704870581626892} +02/24/2022 15:40:05 - INFO - codeparrot_training - Step 10897: {'lr': 0.00045879899383155715, 'samples': 5579776, 'steps': 10897, 'loss/train': 1.8913606405258179} +02/24/2022 15:40:08 - INFO - codeparrot_training - Step 10898: {'lr': 0.00045878999480864386, 'samples': 5580288, 'steps': 10898, 'loss/train': 2.157862663269043} +02/24/2022 15:40:14 - INFO - codeparrot_training - Step 10899: {'lr': 0.0004587809948913406, 'samples': 5580800, 'steps': 10899, 'loss/train': 1.7100781202316284} +02/24/2022 15:40:17 - INFO - codeparrot_training - Step 10900: {'lr': 0.00045877199407968577, 'samples': 5581312, 'steps': 10900, 'loss/train': 4.836899757385254} +02/24/2022 15:40:23 - INFO - codeparrot_training - Step 10901: {'lr': 0.00045876299237371807, 'samples': 5581824, 'steps': 10901, 'loss/train': 1.926213264465332} +02/24/2022 15:40:26 - INFO - codeparrot_training - Step 10902: {'lr': 0.00045875398977347596, 'samples': 5582336, 'steps': 10902, 'loss/train': 2.1030848026275635} +02/24/2022 15:40:32 - INFO - codeparrot_training - Step 10903: {'lr': 0.00045874498627899806, 'samples': 5582848, 'steps': 10903, 'loss/train': 2.2585361003875732} +02/24/2022 15:40:36 - INFO - codeparrot_training - Step 10904: {'lr': 0.00045873598189032295, 'samples': 5583360, 'steps': 10904, 'loss/train': 2.3475728034973145} +02/24/2022 15:40:41 - INFO - codeparrot_training - Step 10905: {'lr': 0.0004587269766074891, 'samples': 5583872, 'steps': 10905, 'loss/train': 1.4917577505111694} +02/24/2022 15:40:45 - INFO - codeparrot_training - Step 10906: {'lr': 0.0004587179704305353, 'samples': 5584384, 'steps': 10906, 'loss/train': 2.9434027671813965} +02/24/2022 15:40:52 - INFO - codeparrot_training - Step 10907: {'lr': 0.00045870896335949987, 'samples': 5584896, 'steps': 10907, 'loss/train': 1.5502668619155884} +02/24/2022 15:40:55 - INFO - codeparrot_training - Step 10908: {'lr': 0.00045869995539442153, 'samples': 5585408, 'steps': 10908, 'loss/train': 1.712061882019043} +02/24/2022 15:41:00 - INFO - codeparrot_training - Step 10909: {'lr': 0.0004586909465353388, 'samples': 5585920, 'steps': 10909, 'loss/train': 2.5004844665527344} +02/24/2022 15:41:06 - INFO - codeparrot_training - Step 10910: {'lr': 0.0004586819367822904, 'samples': 5586432, 'steps': 10910, 'loss/train': 2.4756505489349365} +02/24/2022 15:41:09 - INFO - codeparrot_training - Step 10911: {'lr': 0.00045867292613531484, 'samples': 5586944, 'steps': 10911, 'loss/train': 2.9871819019317627} +02/24/2022 15:41:15 - INFO - codeparrot_training - Step 10912: {'lr': 0.0004586639145944508, 'samples': 5587456, 'steps': 10912, 'loss/train': 1.3270901441574097} +02/24/2022 15:41:18 - INFO - codeparrot_training - Step 10913: {'lr': 0.0004586549021597367, 'samples': 5587968, 'steps': 10913, 'loss/train': 1.992263674736023} +02/24/2022 15:41:24 - INFO - codeparrot_training - Step 10914: {'lr': 0.00045864588883121125, 'samples': 5588480, 'steps': 10914, 'loss/train': 1.609466791152954} +02/24/2022 15:41:27 - INFO - codeparrot_training - Step 10915: {'lr': 0.00045863687460891313, 'samples': 5588992, 'steps': 10915, 'loss/train': 2.2666447162628174} +02/24/2022 15:41:34 - INFO - codeparrot_training - Step 10916: {'lr': 0.0004586278594928808, 'samples': 5589504, 'steps': 10916, 'loss/train': 2.4565460681915283} +02/24/2022 15:41:37 - INFO - codeparrot_training - Step 10917: {'lr': 0.0004586188434831531, 'samples': 5590016, 'steps': 10917, 'loss/train': 2.018866539001465} +02/24/2022 15:41:43 - INFO - codeparrot_training - Step 10918: {'lr': 0.00045860982657976835, 'samples': 5590528, 'steps': 10918, 'loss/train': 1.8322100639343262} +02/24/2022 15:41:46 - INFO - codeparrot_training - Step 10919: {'lr': 0.00045860080878276546, 'samples': 5591040, 'steps': 10919, 'loss/train': 2.1766133308410645} +02/24/2022 15:41:52 - INFO - codeparrot_training - Step 10920: {'lr': 0.0004585917900921829, 'samples': 5591552, 'steps': 10920, 'loss/train': 1.6431580781936646} +02/24/2022 15:41:55 - INFO - codeparrot_training - Step 10921: {'lr': 0.0004585827705080594, 'samples': 5592064, 'steps': 10921, 'loss/train': 0.9396345615386963} +02/24/2022 15:42:01 - INFO - codeparrot_training - Step 10922: {'lr': 0.0004585737500304335, 'samples': 5592576, 'steps': 10922, 'loss/train': 1.7167716026306152} +02/24/2022 15:42:04 - INFO - codeparrot_training - Step 10923: {'lr': 0.0004585647286593439, 'samples': 5593088, 'steps': 10923, 'loss/train': 1.7322036027908325} +02/24/2022 15:42:10 - INFO - codeparrot_training - Step 10924: {'lr': 0.0004585557063948292, 'samples': 5593600, 'steps': 10924, 'loss/train': 1.560473084449768} +02/24/2022 15:42:13 - INFO - codeparrot_training - Step 10925: {'lr': 0.00045854668323692813, 'samples': 5594112, 'steps': 10925, 'loss/train': 3.1254656314849854} +02/24/2022 15:42:19 - INFO - codeparrot_training - Step 10926: {'lr': 0.00045853765918567926, 'samples': 5594624, 'steps': 10926, 'loss/train': 2.171631336212158} +02/24/2022 15:42:22 - INFO - codeparrot_training - Step 10927: {'lr': 0.00045852863424112125, 'samples': 5595136, 'steps': 10927, 'loss/train': 2.027039051055908} +02/24/2022 15:42:28 - INFO - codeparrot_training - Step 10928: {'lr': 0.0004585196084032928, 'samples': 5595648, 'steps': 10928, 'loss/train': 2.348907470703125} +02/24/2022 15:42:31 - INFO - codeparrot_training - Step 10929: {'lr': 0.0004585105816722326, 'samples': 5596160, 'steps': 10929, 'loss/train': 1.5306440591812134} +02/24/2022 15:42:37 - INFO - codeparrot_training - Step 10930: {'lr': 0.0004585015540479792, 'samples': 5596672, 'steps': 10930, 'loss/train': 2.6781058311462402} +02/24/2022 15:42:40 - INFO - codeparrot_training - Step 10931: {'lr': 0.00045849252553057144, 'samples': 5597184, 'steps': 10931, 'loss/train': 2.7256364822387695} +02/24/2022 15:42:46 - INFO - codeparrot_training - Step 10932: {'lr': 0.00045848349612004786, 'samples': 5597696, 'steps': 10932, 'loss/train': 2.1196982860565186} +02/24/2022 15:42:50 - INFO - codeparrot_training - Step 10933: {'lr': 0.0004584744658164472, 'samples': 5598208, 'steps': 10933, 'loss/train': 1.3694515228271484} +02/24/2022 15:42:55 - INFO - codeparrot_training - Step 10934: {'lr': 0.00045846543461980805, 'samples': 5598720, 'steps': 10934, 'loss/train': 1.7228940725326538} +02/24/2022 15:42:59 - INFO - codeparrot_training - Step 10935: {'lr': 0.0004584564025301693, 'samples': 5599232, 'steps': 10935, 'loss/train': 2.2873504161834717} +02/24/2022 15:43:04 - INFO - codeparrot_training - Step 10936: {'lr': 0.00045844736954756937, 'samples': 5599744, 'steps': 10936, 'loss/train': 1.5120030641555786} +02/24/2022 15:43:08 - INFO - codeparrot_training - Step 10937: {'lr': 0.0004584383356720472, 'samples': 5600256, 'steps': 10937, 'loss/train': 1.1111712455749512} +02/24/2022 15:43:13 - INFO - codeparrot_training - Step 10938: {'lr': 0.0004584293009036414, 'samples': 5600768, 'steps': 10938, 'loss/train': 1.3232792615890503} +02/24/2022 15:43:17 - INFO - codeparrot_training - Step 10939: {'lr': 0.0004584202652423906, 'samples': 5601280, 'steps': 10939, 'loss/train': 1.0158010721206665} +02/24/2022 15:43:22 - INFO - codeparrot_training - Step 10940: {'lr': 0.0004584112286883336, 'samples': 5601792, 'steps': 10940, 'loss/train': 0.9630935788154602} +02/24/2022 15:43:26 - INFO - codeparrot_training - Step 10941: {'lr': 0.00045840219124150907, 'samples': 5602304, 'steps': 10941, 'loss/train': 2.6655266284942627} +02/24/2022 15:43:32 - INFO - codeparrot_training - Step 10942: {'lr': 0.0004583931529019557, 'samples': 5602816, 'steps': 10942, 'loss/train': 4.958943843841553} +02/24/2022 15:43:36 - INFO - codeparrot_training - Step 10943: {'lr': 0.00045838411366971225, 'samples': 5603328, 'steps': 10943, 'loss/train': 1.0271648168563843} +02/24/2022 15:43:41 - INFO - codeparrot_training - Step 10944: {'lr': 0.00045837507354481744, 'samples': 5603840, 'steps': 10944, 'loss/train': 1.8632652759552002} +02/24/2022 15:43:44 - INFO - codeparrot_training - Step 10945: {'lr': 0.00045836603252731004, 'samples': 5604352, 'steps': 10945, 'loss/train': 1.5784664154052734} +02/24/2022 15:43:50 - INFO - codeparrot_training - Step 10946: {'lr': 0.0004583569906172286, 'samples': 5604864, 'steps': 10946, 'loss/train': 2.515704393386841} +02/24/2022 15:43:53 - INFO - codeparrot_training - Step 10947: {'lr': 0.000458347947814612, 'samples': 5605376, 'steps': 10947, 'loss/train': 1.323952317237854} +02/24/2022 15:43:59 - INFO - codeparrot_training - Step 10948: {'lr': 0.00045833890411949897, 'samples': 5605888, 'steps': 10948, 'loss/train': 1.5894198417663574} +02/24/2022 15:44:03 - INFO - codeparrot_training - Step 10949: {'lr': 0.0004583298595319283, 'samples': 5606400, 'steps': 10949, 'loss/train': 1.3341354131698608} +02/24/2022 15:44:08 - INFO - codeparrot_training - Step 10950: {'lr': 0.0004583208140519386, 'samples': 5606912, 'steps': 10950, 'loss/train': 2.3943071365356445} +02/24/2022 15:44:12 - INFO - codeparrot_training - Step 10951: {'lr': 0.00045831176767956866, 'samples': 5607424, 'steps': 10951, 'loss/train': 1.355905294418335} +02/24/2022 15:44:18 - INFO - codeparrot_training - Step 10952: {'lr': 0.0004583027204148573, 'samples': 5607936, 'steps': 10952, 'loss/train': 2.6468935012817383} +02/24/2022 15:44:21 - INFO - codeparrot_training - Step 10953: {'lr': 0.00045829367225784317, 'samples': 5608448, 'steps': 10953, 'loss/train': 0.3694375455379486} +02/24/2022 15:44:27 - INFO - codeparrot_training - Step 10954: {'lr': 0.0004582846232085651, 'samples': 5608960, 'steps': 10954, 'loss/train': 3.0442774295806885} +02/24/2022 15:44:30 - INFO - codeparrot_training - Step 10955: {'lr': 0.0004582755732670619, 'samples': 5609472, 'steps': 10955, 'loss/train': 1.7182607650756836} +02/24/2022 15:44:36 - INFO - codeparrot_training - Step 10956: {'lr': 0.00045826652243337226, 'samples': 5609984, 'steps': 10956, 'loss/train': 1.9290764331817627} +02/24/2022 15:44:39 - INFO - codeparrot_training - Step 10957: {'lr': 0.0004582574707075349, 'samples': 5610496, 'steps': 10957, 'loss/train': 3.0623626708984375} +02/24/2022 15:44:45 - INFO - codeparrot_training - Step 10958: {'lr': 0.00045824841808958874, 'samples': 5611008, 'steps': 10958, 'loss/train': 2.4898617267608643} +02/24/2022 15:44:50 - INFO - codeparrot_training - Step 10959: {'lr': 0.0004582393645795725, 'samples': 5611520, 'steps': 10959, 'loss/train': 1.9221982955932617} +02/24/2022 15:44:54 - INFO - codeparrot_training - Step 10960: {'lr': 0.00045823031017752484, 'samples': 5612032, 'steps': 10960, 'loss/train': 2.3630614280700684} +02/24/2022 15:45:00 - INFO - codeparrot_training - Step 10961: {'lr': 0.00045822125488348474, 'samples': 5612544, 'steps': 10961, 'loss/train': 1.5384422540664673} +02/24/2022 15:45:03 - INFO - codeparrot_training - Step 10962: {'lr': 0.00045821219869749086, 'samples': 5613056, 'steps': 10962, 'loss/train': 2.370610237121582} +02/24/2022 15:45:09 - INFO - codeparrot_training - Step 10963: {'lr': 0.00045820314161958207, 'samples': 5613568, 'steps': 10963, 'loss/train': 1.781028389930725} +02/24/2022 15:45:12 - INFO - codeparrot_training - Step 10964: {'lr': 0.00045819408364979714, 'samples': 5614080, 'steps': 10964, 'loss/train': 0.5244473814964294} +02/24/2022 15:45:18 - INFO - codeparrot_training - Step 10965: {'lr': 0.0004581850247881749, 'samples': 5614592, 'steps': 10965, 'loss/train': 2.107774496078491} +02/24/2022 15:45:21 - INFO - codeparrot_training - Step 10966: {'lr': 0.000458175965034754, 'samples': 5615104, 'steps': 10966, 'loss/train': 3.912019729614258} +02/24/2022 15:45:27 - INFO - codeparrot_training - Step 10967: {'lr': 0.0004581669043895734, 'samples': 5615616, 'steps': 10967, 'loss/train': 2.6622848510742188} +02/24/2022 15:45:30 - INFO - codeparrot_training - Step 10968: {'lr': 0.000458157842852672, 'samples': 5616128, 'steps': 10968, 'loss/train': 2.020294427871704} +02/24/2022 15:45:36 - INFO - codeparrot_training - Step 10969: {'lr': 0.0004581487804240884, 'samples': 5616640, 'steps': 10969, 'loss/train': 1.6713902950286865} +02/24/2022 15:45:39 - INFO - codeparrot_training - Step 10970: {'lr': 0.00045813971710386147, 'samples': 5617152, 'steps': 10970, 'loss/train': 1.7464865446090698} +02/24/2022 15:45:45 - INFO - codeparrot_training - Step 10971: {'lr': 0.0004581306528920302, 'samples': 5617664, 'steps': 10971, 'loss/train': 2.3608994483947754} +02/24/2022 15:45:48 - INFO - codeparrot_training - Step 10972: {'lr': 0.0004581215877886332, 'samples': 5618176, 'steps': 10972, 'loss/train': 3.615652561187744} +02/24/2022 15:45:54 - INFO - codeparrot_training - Step 10973: {'lr': 0.0004581125217937095, 'samples': 5618688, 'steps': 10973, 'loss/train': 2.6661667823791504} +02/24/2022 15:45:57 - INFO - codeparrot_training - Step 10974: {'lr': 0.00045810345490729777, 'samples': 5619200, 'steps': 10974, 'loss/train': 1.650611400604248} +02/24/2022 15:46:03 - INFO - codeparrot_training - Step 10975: {'lr': 0.00045809438712943694, 'samples': 5619712, 'steps': 10975, 'loss/train': 1.4975214004516602} +02/24/2022 15:46:06 - INFO - codeparrot_training - Step 10976: {'lr': 0.0004580853184601659, 'samples': 5620224, 'steps': 10976, 'loss/train': 1.5695899724960327} +02/24/2022 15:46:13 - INFO - codeparrot_training - Step 10977: {'lr': 0.00045807624889952336, 'samples': 5620736, 'steps': 10977, 'loss/train': 0.6024316549301147} +02/24/2022 15:46:16 - INFO - codeparrot_training - Step 10978: {'lr': 0.0004580671784475482, 'samples': 5621248, 'steps': 10978, 'loss/train': 1.947343349456787} +02/24/2022 15:46:21 - INFO - codeparrot_training - Step 10979: {'lr': 0.0004580581071042794, 'samples': 5621760, 'steps': 10979, 'loss/train': 2.1722631454467773} +02/24/2022 15:46:25 - INFO - codeparrot_training - Step 10980: {'lr': 0.00045804903486975566, 'samples': 5622272, 'steps': 10980, 'loss/train': 2.6500258445739746} +02/24/2022 15:46:31 - INFO - codeparrot_training - Step 10981: {'lr': 0.00045803996174401595, 'samples': 5622784, 'steps': 10981, 'loss/train': 2.291271209716797} +02/24/2022 15:46:34 - INFO - codeparrot_training - Step 10982: {'lr': 0.00045803088772709914, 'samples': 5623296, 'steps': 10982, 'loss/train': 3.9501991271972656} +02/24/2022 15:46:40 - INFO - codeparrot_training - Step 10983: {'lr': 0.00045802181281904403, 'samples': 5623808, 'steps': 10983, 'loss/train': 2.3780431747436523} +02/24/2022 15:46:43 - INFO - codeparrot_training - Step 10984: {'lr': 0.00045801273701988955, 'samples': 5624320, 'steps': 10984, 'loss/train': 2.2774314880371094} +02/24/2022 15:46:49 - INFO - codeparrot_training - Step 10985: {'lr': 0.0004580036603296746, 'samples': 5624832, 'steps': 10985, 'loss/train': 2.2266483306884766} +02/24/2022 15:46:52 - INFO - codeparrot_training - Step 10986: {'lr': 0.00045799458274843786, 'samples': 5625344, 'steps': 10986, 'loss/train': 6.575760364532471} +02/24/2022 15:46:58 - INFO - codeparrot_training - Step 10987: {'lr': 0.0004579855042762185, 'samples': 5625856, 'steps': 10987, 'loss/train': 2.204773187637329} +02/24/2022 15:47:02 - INFO - codeparrot_training - Step 10988: {'lr': 0.00045797642491305523, 'samples': 5626368, 'steps': 10988, 'loss/train': 1.8324697017669678} +02/24/2022 15:47:07 - INFO - codeparrot_training - Step 10989: {'lr': 0.00045796734465898705, 'samples': 5626880, 'steps': 10989, 'loss/train': 2.0985260009765625} +02/24/2022 15:47:11 - INFO - codeparrot_training - Step 10990: {'lr': 0.00045795826351405276, 'samples': 5627392, 'steps': 10990, 'loss/train': 2.278046131134033} +02/24/2022 15:47:16 - INFO - codeparrot_training - Step 10991: {'lr': 0.00045794918147829135, 'samples': 5627904, 'steps': 10991, 'loss/train': 2.584219217300415} +02/24/2022 15:47:20 - INFO - codeparrot_training - Step 10992: {'lr': 0.00045794009855174163, 'samples': 5628416, 'steps': 10992, 'loss/train': 2.37514591217041} +02/24/2022 15:47:25 - INFO - codeparrot_training - Step 10993: {'lr': 0.0004579310147344425, 'samples': 5628928, 'steps': 10993, 'loss/train': 2.830026626586914} +02/24/2022 15:47:29 - INFO - codeparrot_training - Step 10994: {'lr': 0.000457921930026433, 'samples': 5629440, 'steps': 10994, 'loss/train': 1.923923373222351} +02/24/2022 15:47:34 - INFO - codeparrot_training - Step 10995: {'lr': 0.00045791284442775205, 'samples': 5629952, 'steps': 10995, 'loss/train': 2.3469104766845703} +02/24/2022 15:47:38 - INFO - codeparrot_training - Step 10996: {'lr': 0.0004579037579384384, 'samples': 5630464, 'steps': 10996, 'loss/train': 1.6095459461212158} +02/24/2022 15:47:45 - INFO - codeparrot_training - Step 10997: {'lr': 0.00045789467055853104, 'samples': 5630976, 'steps': 10997, 'loss/train': 2.2742464542388916} +02/24/2022 15:47:48 - INFO - codeparrot_training - Step 10998: {'lr': 0.000457885582288069, 'samples': 5631488, 'steps': 10998, 'loss/train': 1.8267676830291748} +02/24/2022 15:47:54 - INFO - codeparrot_training - Step 10999: {'lr': 0.0004578764931270911, 'samples': 5632000, 'steps': 10999, 'loss/train': 1.6772180795669556} +02/24/2022 15:47:54 - INFO - codeparrot_training - Evaluating and saving model checkpoint