diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -23439,3 +23439,1009 @@ Use FP16 precision: False 02/25/2022 07:21:51 - INFO - codeparrot_training - Step 22998: {'lr': 0.0002988046762093344, 'samples': 11775488, 'steps': 22998, 'loss/train': 1.0141639709472656} 02/25/2022 07:21:55 - INFO - codeparrot_training - Step 22999: {'lr': 0.00029878862846118075, 'samples': 11776000, 'steps': 22999, 'loss/train': 0.8634439706802368} 02/25/2022 07:21:55 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/25/2022 07:22:12 - WARNING - huggingface_hub.repository - Several commits (23) will be pushed upstream. +02/25/2022 07:22:12 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/25/2022 07:22:46 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + de93bae..04e6e2d floral-grass-11 -> floral-grass-11 + +02/25/2022 07:22:53 - INFO - codeparrot_training - Step 23000: {'lr': 0.0002987725805040321, 'samples': 11776512, 'steps': 23000, 'loss/train': 2.382962703704834} +02/25/2022 07:22:56 - INFO - codeparrot_training - Step 23001: {'lr': 0.00029875653233795715, 'samples': 11777024, 'steps': 23001, 'loss/train': 1.9941284656524658} +02/25/2022 07:23:02 - INFO - codeparrot_training - Step 23002: {'lr': 0.0002987404839630248, 'samples': 11777536, 'steps': 23002, 'loss/train': 2.2633652687072754} +02/25/2022 07:23:05 - INFO - codeparrot_training - Step 23003: {'lr': 0.00029872443537930357, 'samples': 11778048, 'steps': 23003, 'loss/train': 2.39821457862854} +02/25/2022 07:23:09 - INFO - codeparrot_training - Step 23004: {'lr': 0.0002987083865868624, 'samples': 11778560, 'steps': 23004, 'loss/train': 2.1304678916931152} +02/25/2022 07:23:14 - INFO - codeparrot_training - Step 23005: {'lr': 0.0002986923375857699, 'samples': 11779072, 'steps': 23005, 'loss/train': 0.8211341500282288} +02/25/2022 07:23:18 - INFO - codeparrot_training - Step 23006: {'lr': 0.00029867628837609503, 'samples': 11779584, 'steps': 23006, 'loss/train': 2.1896445751190186} +02/25/2022 07:23:23 - INFO - codeparrot_training - Step 23007: {'lr': 0.0002986602389579062, 'samples': 11780096, 'steps': 23007, 'loss/train': 1.629866123199463} +02/25/2022 07:23:29 - INFO - codeparrot_training - Step 23008: {'lr': 0.0002986441893312726, 'samples': 11780608, 'steps': 23008, 'loss/train': 1.587494969367981} +02/25/2022 07:23:32 - INFO - codeparrot_training - Step 23009: {'lr': 0.0002986281394962626, 'samples': 11781120, 'steps': 23009, 'loss/train': 1.5454816818237305} +02/25/2022 07:23:39 - INFO - codeparrot_training - Step 23010: {'lr': 0.00029861208945294507, 'samples': 11781632, 'steps': 23010, 'loss/train': 2.3994152545928955} +02/25/2022 07:23:42 - INFO - codeparrot_training - Step 23011: {'lr': 0.00029859603920138876, 'samples': 11782144, 'steps': 23011, 'loss/train': 1.8347333669662476} +02/25/2022 07:23:48 - INFO - codeparrot_training - Step 23012: {'lr': 0.00029857998874166253, 'samples': 11782656, 'steps': 23012, 'loss/train': 1.088434100151062} +02/25/2022 07:23:51 - INFO - codeparrot_training - Step 23013: {'lr': 0.00029856393807383504, 'samples': 11783168, 'steps': 23013, 'loss/train': 1.871364712715149} +02/25/2022 07:23:57 - INFO - codeparrot_training - Step 23014: {'lr': 0.000298547887197975, 'samples': 11783680, 'steps': 23014, 'loss/train': 1.372658133506775} +02/25/2022 07:24:00 - INFO - codeparrot_training - Step 23015: {'lr': 0.0002985318361141513, 'samples': 11784192, 'steps': 23015, 'loss/train': 1.7402955293655396} +02/25/2022 07:24:06 - INFO - codeparrot_training - Step 23016: {'lr': 0.0002985157848224326, 'samples': 11784704, 'steps': 23016, 'loss/train': 1.1968350410461426} +02/25/2022 07:24:09 - INFO - codeparrot_training - Step 23017: {'lr': 0.00029849973332288763, 'samples': 11785216, 'steps': 23017, 'loss/train': 2.6905784606933594} +02/25/2022 07:24:15 - INFO - codeparrot_training - Step 23018: {'lr': 0.00029848368161558526, 'samples': 11785728, 'steps': 23018, 'loss/train': 1.9995077848434448} +02/25/2022 07:24:18 - INFO - codeparrot_training - Step 23019: {'lr': 0.0002984676297005942, 'samples': 11786240, 'steps': 23019, 'loss/train': 1.9210776090621948} +02/25/2022 07:24:24 - INFO - codeparrot_training - Step 23020: {'lr': 0.0002984515775779832, 'samples': 11786752, 'steps': 23020, 'loss/train': 2.007884979248047} +02/25/2022 07:24:28 - INFO - codeparrot_training - Step 23021: {'lr': 0.00029843552524782104, 'samples': 11787264, 'steps': 23021, 'loss/train': 3.0704941749572754} +02/25/2022 07:24:33 - INFO - codeparrot_training - Step 23022: {'lr': 0.00029841947271017647, 'samples': 11787776, 'steps': 23022, 'loss/train': 1.3323493003845215} +02/25/2022 07:24:37 - INFO - codeparrot_training - Step 23023: {'lr': 0.0002984034199651182, 'samples': 11788288, 'steps': 23023, 'loss/train': 1.605269193649292} +02/25/2022 07:24:42 - INFO - codeparrot_training - Step 23024: {'lr': 0.00029838736701271514, 'samples': 11788800, 'steps': 23024, 'loss/train': 2.2570393085479736} +02/25/2022 07:24:46 - INFO - codeparrot_training - Step 23025: {'lr': 0.00029837131385303587, 'samples': 11789312, 'steps': 23025, 'loss/train': 1.5309020280838013} +02/25/2022 07:24:52 - INFO - codeparrot_training - Step 23026: {'lr': 0.0002983552604861493, 'samples': 11789824, 'steps': 23026, 'loss/train': 2.29774808883667} +02/25/2022 07:24:55 - INFO - codeparrot_training - Step 23027: {'lr': 0.0002983392069121241, 'samples': 11790336, 'steps': 23027, 'loss/train': 1.4504623413085938} +02/25/2022 07:25:00 - INFO - codeparrot_training - Step 23028: {'lr': 0.00029832315313102915, 'samples': 11790848, 'steps': 23028, 'loss/train': 1.5331591367721558} +02/25/2022 07:25:04 - INFO - codeparrot_training - Step 23029: {'lr': 0.00029830709914293306, 'samples': 11791360, 'steps': 23029, 'loss/train': 2.652798652648926} +02/25/2022 07:25:09 - INFO - codeparrot_training - Step 23030: {'lr': 0.00029829104494790483, 'samples': 11791872, 'steps': 23030, 'loss/train': 2.51054310798645} +02/25/2022 07:25:13 - INFO - codeparrot_training - Step 23031: {'lr': 0.00029827499054601306, 'samples': 11792384, 'steps': 23031, 'loss/train': 1.9142118692398071} +02/25/2022 07:25:19 - INFO - codeparrot_training - Step 23032: {'lr': 0.0002982589359373265, 'samples': 11792896, 'steps': 23032, 'loss/train': 2.2765953540802} +02/25/2022 07:25:23 - INFO - codeparrot_training - Step 23033: {'lr': 0.000298242881121914, 'samples': 11793408, 'steps': 23033, 'loss/train': 2.8762152194976807} +02/25/2022 07:25:28 - INFO - codeparrot_training - Step 23034: {'lr': 0.00029822682609984436, 'samples': 11793920, 'steps': 23034, 'loss/train': 1.7421038150787354} +02/25/2022 07:25:31 - INFO - codeparrot_training - Step 23035: {'lr': 0.00029821077087118625, 'samples': 11794432, 'steps': 23035, 'loss/train': 1.9507066011428833} +02/25/2022 07:25:37 - INFO - codeparrot_training - Step 23036: {'lr': 0.00029819471543600856, 'samples': 11794944, 'steps': 23036, 'loss/train': 2.204113245010376} +02/25/2022 07:25:41 - INFO - codeparrot_training - Step 23037: {'lr': 0.00029817865979437996, 'samples': 11795456, 'steps': 23037, 'loss/train': 2.5255625247955322} +02/25/2022 07:25:46 - INFO - codeparrot_training - Step 23038: {'lr': 0.0002981626039463693, 'samples': 11795968, 'steps': 23038, 'loss/train': 1.2746597528457642} +02/25/2022 07:25:50 - INFO - codeparrot_training - Step 23039: {'lr': 0.0002981465478920454, 'samples': 11796480, 'steps': 23039, 'loss/train': 1.636218547821045} +02/25/2022 07:25:55 - INFO - codeparrot_training - Step 23040: {'lr': 0.00029813049163147687, 'samples': 11796992, 'steps': 23040, 'loss/train': 2.1896657943725586} +02/25/2022 07:25:59 - INFO - codeparrot_training - Step 23041: {'lr': 0.0002981144351647327, 'samples': 11797504, 'steps': 23041, 'loss/train': 1.9750356674194336} +02/25/2022 07:26:04 - INFO - codeparrot_training - Step 23042: {'lr': 0.0002980983784918815, 'samples': 11798016, 'steps': 23042, 'loss/train': 3.3246102333068848} +02/25/2022 07:26:08 - INFO - codeparrot_training - Step 23043: {'lr': 0.0002980823216129921, 'samples': 11798528, 'steps': 23043, 'loss/train': 1.5700490474700928} +02/25/2022 07:26:13 - INFO - codeparrot_training - Step 23044: {'lr': 0.00029806626452813333, 'samples': 11799040, 'steps': 23044, 'loss/train': 1.8791347742080688} +02/25/2022 07:26:17 - INFO - codeparrot_training - Step 23045: {'lr': 0.000298050207237374, 'samples': 11799552, 'steps': 23045, 'loss/train': 1.9777239561080933} +02/25/2022 07:26:23 - INFO - codeparrot_training - Step 23046: {'lr': 0.0002980341497407828, 'samples': 11800064, 'steps': 23046, 'loss/train': 1.7885113954544067} +02/25/2022 07:26:27 - INFO - codeparrot_training - Step 23047: {'lr': 0.0002980180920384286, 'samples': 11800576, 'steps': 23047, 'loss/train': 1.5340200662612915} +02/25/2022 07:26:32 - INFO - codeparrot_training - Step 23048: {'lr': 0.00029800203413038, 'samples': 11801088, 'steps': 23048, 'loss/train': 1.889414668083191} +02/25/2022 07:26:36 - INFO - codeparrot_training - Step 23049: {'lr': 0.000297985976016706, 'samples': 11801600, 'steps': 23049, 'loss/train': 1.3475145101547241} +02/25/2022 07:26:41 - INFO - codeparrot_training - Step 23050: {'lr': 0.0002979699176974754, 'samples': 11802112, 'steps': 23050, 'loss/train': 1.8376847505569458} +02/25/2022 07:26:45 - INFO - codeparrot_training - Step 23051: {'lr': 0.0002979538591727568, 'samples': 11802624, 'steps': 23051, 'loss/train': 1.7431894540786743} +02/25/2022 07:26:50 - INFO - codeparrot_training - Step 23052: {'lr': 0.00029793780044261916, 'samples': 11803136, 'steps': 23052, 'loss/train': 2.2214648723602295} +02/25/2022 07:26:54 - INFO - codeparrot_training - Step 23053: {'lr': 0.0002979217415071311, 'samples': 11803648, 'steps': 23053, 'loss/train': 2.104339122772217} +02/25/2022 07:26:59 - INFO - codeparrot_training - Step 23054: {'lr': 0.00029790568236636166, 'samples': 11804160, 'steps': 23054, 'loss/train': 1.9334293603897095} +02/25/2022 07:27:03 - INFO - codeparrot_training - Step 23055: {'lr': 0.0002978896230203794, 'samples': 11804672, 'steps': 23055, 'loss/train': 1.538697600364685} +02/25/2022 07:27:09 - INFO - codeparrot_training - Step 23056: {'lr': 0.0002978735634692533, 'samples': 11805184, 'steps': 23056, 'loss/train': 2.4216134548187256} +02/25/2022 07:27:12 - INFO - codeparrot_training - Step 23057: {'lr': 0.0002978575037130519, 'samples': 11805696, 'steps': 23057, 'loss/train': 1.93329656124115} +02/25/2022 07:27:18 - INFO - codeparrot_training - Step 23058: {'lr': 0.0002978414437518443, 'samples': 11806208, 'steps': 23058, 'loss/train': 0.9649325013160706} +02/25/2022 07:27:21 - INFO - codeparrot_training - Step 23059: {'lr': 0.00029782538358569905, 'samples': 11806720, 'steps': 23059, 'loss/train': 0.5590830445289612} +02/25/2022 07:27:27 - INFO - codeparrot_training - Step 23060: {'lr': 0.0002978093232146851, 'samples': 11807232, 'steps': 23060, 'loss/train': 2.119703769683838} +02/25/2022 07:27:30 - INFO - codeparrot_training - Step 23061: {'lr': 0.00029779326263887113, 'samples': 11807744, 'steps': 23061, 'loss/train': 1.141127586364746} +02/25/2022 07:27:36 - INFO - codeparrot_training - Step 23062: {'lr': 0.00029777720185832605, 'samples': 11808256, 'steps': 23062, 'loss/train': 2.6110951900482178} +02/25/2022 07:27:40 - INFO - codeparrot_training - Step 23063: {'lr': 0.0002977611408731186, 'samples': 11808768, 'steps': 23063, 'loss/train': 2.351412534713745} +02/25/2022 07:27:43 - INFO - codeparrot_training - Step 23064: {'lr': 0.0002977450796833176, 'samples': 11809280, 'steps': 23064, 'loss/train': 0.686683177947998} +02/25/2022 07:27:49 - INFO - codeparrot_training - Step 23065: {'lr': 0.0002977290182889918, 'samples': 11809792, 'steps': 23065, 'loss/train': 1.3830899000167847} +02/25/2022 07:27:52 - INFO - codeparrot_training - Step 23066: {'lr': 0.00029771295669021, 'samples': 11810304, 'steps': 23066, 'loss/train': 1.66257643699646} +02/25/2022 07:27:59 - INFO - codeparrot_training - Step 23067: {'lr': 0.00029769689488704117, 'samples': 11810816, 'steps': 23067, 'loss/train': 1.7433782815933228} +02/25/2022 07:28:02 - INFO - codeparrot_training - Step 23068: {'lr': 0.00029768083287955394, 'samples': 11811328, 'steps': 23068, 'loss/train': 0.8610262274742126} +02/25/2022 07:28:07 - INFO - codeparrot_training - Step 23069: {'lr': 0.00029766477066781716, 'samples': 11811840, 'steps': 23069, 'loss/train': 2.1658577919006348} +02/25/2022 07:28:11 - INFO - codeparrot_training - Step 23070: {'lr': 0.0002976487082518996, 'samples': 11812352, 'steps': 23070, 'loss/train': 1.1951266527175903} +02/25/2022 07:28:17 - INFO - codeparrot_training - Step 23071: {'lr': 0.00029763264563187016, 'samples': 11812864, 'steps': 23071, 'loss/train': 1.8942818641662598} +02/25/2022 07:28:20 - INFO - codeparrot_training - Step 23072: {'lr': 0.0002976165828077975, 'samples': 11813376, 'steps': 23072, 'loss/train': 1.3101755380630493} +02/25/2022 07:28:26 - INFO - codeparrot_training - Step 23073: {'lr': 0.00029760051977975057, 'samples': 11813888, 'steps': 23073, 'loss/train': 1.9660675525665283} +02/25/2022 07:28:29 - INFO - codeparrot_training - Step 23074: {'lr': 0.00029758445654779814, 'samples': 11814400, 'steps': 23074, 'loss/train': 1.7070472240447998} +02/25/2022 07:28:35 - INFO - codeparrot_training - Step 23075: {'lr': 0.000297568393112009, 'samples': 11814912, 'steps': 23075, 'loss/train': 1.3628267049789429} +02/25/2022 07:28:38 - INFO - codeparrot_training - Step 23076: {'lr': 0.00029755232947245195, 'samples': 11815424, 'steps': 23076, 'loss/train': 1.8507574796676636} +02/25/2022 07:28:44 - INFO - codeparrot_training - Step 23077: {'lr': 0.0002975362656291958, 'samples': 11815936, 'steps': 23077, 'loss/train': 2.1007096767425537} +02/25/2022 07:28:47 - INFO - codeparrot_training - Step 23078: {'lr': 0.0002975202015823095, 'samples': 11816448, 'steps': 23078, 'loss/train': 0.45156094431877136} +02/25/2022 07:28:53 - INFO - codeparrot_training - Step 23079: {'lr': 0.0002975041373318617, 'samples': 11816960, 'steps': 23079, 'loss/train': 1.8716130256652832} +02/25/2022 07:28:57 - INFO - codeparrot_training - Step 23080: {'lr': 0.0002974880728779212, 'samples': 11817472, 'steps': 23080, 'loss/train': 1.1723854541778564} +02/25/2022 07:29:02 - INFO - codeparrot_training - Step 23081: {'lr': 0.00029747200822055684, 'samples': 11817984, 'steps': 23081, 'loss/train': 0.7784999012947083} +02/25/2022 07:29:05 - INFO - codeparrot_training - Step 23082: {'lr': 0.0002974559433598376, 'samples': 11818496, 'steps': 23082, 'loss/train': 1.902015209197998} +02/25/2022 07:29:11 - INFO - codeparrot_training - Step 23083: {'lr': 0.0002974398782958321, 'samples': 11819008, 'steps': 23083, 'loss/train': 1.523607611656189} +02/25/2022 07:29:15 - INFO - codeparrot_training - Step 23084: {'lr': 0.00029742381302860923, 'samples': 11819520, 'steps': 23084, 'loss/train': 2.626331329345703} +02/25/2022 07:29:20 - INFO - codeparrot_training - Step 23085: {'lr': 0.00029740774755823777, 'samples': 11820032, 'steps': 23085, 'loss/train': 2.2582669258117676} +02/25/2022 07:29:24 - INFO - codeparrot_training - Step 23086: {'lr': 0.0002973916818847866, 'samples': 11820544, 'steps': 23086, 'loss/train': 3.8212239742279053} +02/25/2022 07:29:29 - INFO - codeparrot_training - Step 23087: {'lr': 0.00029737561600832454, 'samples': 11821056, 'steps': 23087, 'loss/train': 2.4119222164154053} +02/25/2022 07:29:33 - INFO - codeparrot_training - Step 23088: {'lr': 0.00029735954992892035, 'samples': 11821568, 'steps': 23088, 'loss/train': 2.844414234161377} +02/25/2022 07:29:38 - INFO - codeparrot_training - Step 23089: {'lr': 0.00029734348364664285, 'samples': 11822080, 'steps': 23089, 'loss/train': 1.7163355350494385} +02/25/2022 07:29:42 - INFO - codeparrot_training - Step 23090: {'lr': 0.000297327417161561, 'samples': 11822592, 'steps': 23090, 'loss/train': 1.9642480611801147} +02/25/2022 07:29:47 - INFO - codeparrot_training - Step 23091: {'lr': 0.0002973113504737435, 'samples': 11823104, 'steps': 23091, 'loss/train': 2.682631492614746} +02/25/2022 07:29:51 - INFO - codeparrot_training - Step 23092: {'lr': 0.00029729528358325914, 'samples': 11823616, 'steps': 23092, 'loss/train': 0.8743570446968079} +02/25/2022 07:29:57 - INFO - codeparrot_training - Step 23093: {'lr': 0.00029727921649017687, 'samples': 11824128, 'steps': 23093, 'loss/train': 0.8451046943664551} +02/25/2022 07:30:00 - INFO - codeparrot_training - Step 23094: {'lr': 0.0002972631491945653, 'samples': 11824640, 'steps': 23094, 'loss/train': 1.8207341432571411} +02/25/2022 07:30:06 - INFO - codeparrot_training - Step 23095: {'lr': 0.00029724708169649364, 'samples': 11825152, 'steps': 23095, 'loss/train': 2.172386646270752} +02/25/2022 07:30:10 - INFO - codeparrot_training - Step 23096: {'lr': 0.0002972310139960303, 'samples': 11825664, 'steps': 23096, 'loss/train': 1.44020676612854} +02/25/2022 07:30:15 - INFO - codeparrot_training - Step 23097: {'lr': 0.00029721494609324435, 'samples': 11826176, 'steps': 23097, 'loss/train': 1.4003266096115112} +02/25/2022 07:30:19 - INFO - codeparrot_training - Step 23098: {'lr': 0.00029719887798820453, 'samples': 11826688, 'steps': 23098, 'loss/train': 1.6491516828536987} +02/25/2022 07:30:24 - INFO - codeparrot_training - Step 23099: {'lr': 0.00029718280968097976, 'samples': 11827200, 'steps': 23099, 'loss/train': 1.5048080682754517} +02/25/2022 07:30:28 - INFO - codeparrot_training - Step 23100: {'lr': 0.00029716674117163884, 'samples': 11827712, 'steps': 23100, 'loss/train': 1.9512022733688354} +02/25/2022 07:30:33 - INFO - codeparrot_training - Step 23101: {'lr': 0.0002971506724602505, 'samples': 11828224, 'steps': 23101, 'loss/train': 1.9504514932632446} +02/25/2022 07:30:37 - INFO - codeparrot_training - Step 23102: {'lr': 0.0002971346035468837, 'samples': 11828736, 'steps': 23102, 'loss/train': 1.9367523193359375} +02/25/2022 07:30:43 - INFO - codeparrot_training - Step 23103: {'lr': 0.0002971185344316072, 'samples': 11829248, 'steps': 23103, 'loss/train': 1.373284101486206} +02/25/2022 07:30:46 - INFO - codeparrot_training - Step 23104: {'lr': 0.0002971024651144899, 'samples': 11829760, 'steps': 23104, 'loss/train': 1.8134773969650269} +02/25/2022 07:30:52 - INFO - codeparrot_training - Step 23105: {'lr': 0.0002970863955956005, 'samples': 11830272, 'steps': 23105, 'loss/train': 2.2017300128936768} +02/25/2022 07:30:55 - INFO - codeparrot_training - Step 23106: {'lr': 0.00029707032587500805, 'samples': 11830784, 'steps': 23106, 'loss/train': 1.5983376502990723} +02/25/2022 07:31:01 - INFO - codeparrot_training - Step 23107: {'lr': 0.00029705425595278126, 'samples': 11831296, 'steps': 23107, 'loss/train': 1.9444851875305176} +02/25/2022 07:31:04 - INFO - codeparrot_training - Step 23108: {'lr': 0.0002970381858289889, 'samples': 11831808, 'steps': 23108, 'loss/train': 0.9139845967292786} +02/25/2022 07:31:10 - INFO - codeparrot_training - Step 23109: {'lr': 0.0002970221155037, 'samples': 11832320, 'steps': 23109, 'loss/train': 1.834768295288086} +02/25/2022 07:31:13 - INFO - codeparrot_training - Step 23110: {'lr': 0.0002970060449769832, 'samples': 11832832, 'steps': 23110, 'loss/train': 1.7454090118408203} +02/25/2022 07:31:19 - INFO - codeparrot_training - Step 23111: {'lr': 0.00029698997424890746, 'samples': 11833344, 'steps': 23111, 'loss/train': 1.0150907039642334} +02/25/2022 07:31:22 - INFO - codeparrot_training - Step 23112: {'lr': 0.0002969739033195415, 'samples': 11833856, 'steps': 23112, 'loss/train': 2.812715768814087} +02/25/2022 07:31:29 - INFO - codeparrot_training - Step 23113: {'lr': 0.0002969578321889544, 'samples': 11834368, 'steps': 23113, 'loss/train': 1.3748753070831299} +02/25/2022 07:31:32 - INFO - codeparrot_training - Step 23114: {'lr': 0.00029694176085721474, 'samples': 11834880, 'steps': 23114, 'loss/train': 1.3746641874313354} +02/25/2022 07:31:38 - INFO - codeparrot_training - Step 23115: {'lr': 0.0002969256893243916, 'samples': 11835392, 'steps': 23115, 'loss/train': 2.402064800262451} +02/25/2022 07:31:41 - INFO - codeparrot_training - Step 23116: {'lr': 0.0002969096175905536, 'samples': 11835904, 'steps': 23116, 'loss/train': 0.9432001113891602} +02/25/2022 07:31:47 - INFO - codeparrot_training - Step 23117: {'lr': 0.00029689354565576976, 'samples': 11836416, 'steps': 23117, 'loss/train': 1.957139492034912} +02/25/2022 07:31:50 - INFO - codeparrot_training - Step 23118: {'lr': 0.0002968774735201088, 'samples': 11836928, 'steps': 23118, 'loss/train': 1.8836572170257568} +02/25/2022 07:31:56 - INFO - codeparrot_training - Step 23119: {'lr': 0.0002968614011836397, 'samples': 11837440, 'steps': 23119, 'loss/train': 1.0959608554840088} +02/25/2022 07:31:59 - INFO - codeparrot_training - Step 23120: {'lr': 0.0002968453286464312, 'samples': 11837952, 'steps': 23120, 'loss/train': 2.70177960395813} +02/25/2022 07:32:05 - INFO - codeparrot_training - Step 23121: {'lr': 0.00029682925590855213, 'samples': 11838464, 'steps': 23121, 'loss/train': 2.2461211681365967} +02/25/2022 07:32:08 - INFO - codeparrot_training - Step 23122: {'lr': 0.0002968131829700715, 'samples': 11838976, 'steps': 23122, 'loss/train': 1.3319445848464966} +02/25/2022 07:32:14 - INFO - codeparrot_training - Step 23123: {'lr': 0.000296797109831058, 'samples': 11839488, 'steps': 23123, 'loss/train': 1.2728564739227295} +02/25/2022 07:32:17 - INFO - codeparrot_training - Step 23124: {'lr': 0.00029678103649158057, 'samples': 11840000, 'steps': 23124, 'loss/train': 1.7950807809829712} +02/25/2022 07:32:24 - INFO - codeparrot_training - Step 23125: {'lr': 0.00029676496295170804, 'samples': 11840512, 'steps': 23125, 'loss/train': 1.8831136226654053} +02/25/2022 07:32:27 - INFO - codeparrot_training - Step 23126: {'lr': 0.0002967488892115092, 'samples': 11841024, 'steps': 23126, 'loss/train': 1.8715931177139282} +02/25/2022 07:32:33 - INFO - codeparrot_training - Step 23127: {'lr': 0.000296732815271053, 'samples': 11841536, 'steps': 23127, 'loss/train': 2.4496424198150635} +02/25/2022 07:32:36 - INFO - codeparrot_training - Step 23128: {'lr': 0.00029671674113040833, 'samples': 11842048, 'steps': 23128, 'loss/train': 1.496080994606018} +02/25/2022 07:32:42 - INFO - codeparrot_training - Step 23129: {'lr': 0.00029670066678964385, 'samples': 11842560, 'steps': 23129, 'loss/train': 2.152468204498291} +02/25/2022 07:32:45 - INFO - codeparrot_training - Step 23130: {'lr': 0.0002966845922488286, 'samples': 11843072, 'steps': 23130, 'loss/train': 2.4569578170776367} +02/25/2022 07:32:51 - INFO - codeparrot_training - Step 23131: {'lr': 0.00029666851750803137, 'samples': 11843584, 'steps': 23131, 'loss/train': 2.5395867824554443} +02/25/2022 07:32:55 - INFO - codeparrot_training - Step 23132: {'lr': 0.00029665244256732107, 'samples': 11844096, 'steps': 23132, 'loss/train': 1.6447722911834717} +02/25/2022 07:33:00 - INFO - codeparrot_training - Step 23133: {'lr': 0.0002966363674267665, 'samples': 11844608, 'steps': 23133, 'loss/train': 1.27051842212677} +02/25/2022 07:33:04 - INFO - codeparrot_training - Step 23134: {'lr': 0.00029662029208643646, 'samples': 11845120, 'steps': 23134, 'loss/train': 1.4961507320404053} +02/25/2022 07:33:09 - INFO - codeparrot_training - Step 23135: {'lr': 0.0002966042165464, 'samples': 11845632, 'steps': 23135, 'loss/train': 1.6537866592407227} +02/25/2022 07:33:13 - INFO - codeparrot_training - Step 23136: {'lr': 0.0002965881408067258, 'samples': 11846144, 'steps': 23136, 'loss/train': 1.022843599319458} +02/25/2022 07:33:18 - INFO - codeparrot_training - Step 23137: {'lr': 0.0002965720648674829, 'samples': 11846656, 'steps': 23137, 'loss/train': 1.7009072303771973} +02/25/2022 07:33:22 - INFO - codeparrot_training - Step 23138: {'lr': 0.00029655598872873994, 'samples': 11847168, 'steps': 23138, 'loss/train': 0.5573568344116211} +02/25/2022 07:33:27 - INFO - codeparrot_training - Step 23139: {'lr': 0.000296539912390566, 'samples': 11847680, 'steps': 23139, 'loss/train': 2.214735746383667} +02/25/2022 07:33:31 - INFO - codeparrot_training - Step 23140: {'lr': 0.0002965238358530298, 'samples': 11848192, 'steps': 23140, 'loss/train': 1.9246439933776855} +02/25/2022 07:33:37 - INFO - codeparrot_training - Step 23141: {'lr': 0.0002965077591162003, 'samples': 11848704, 'steps': 23141, 'loss/train': 1.6096656322479248} +02/25/2022 07:33:40 - INFO - codeparrot_training - Step 23142: {'lr': 0.00029649168218014627, 'samples': 11849216, 'steps': 23142, 'loss/train': 2.5338127613067627} +02/25/2022 07:33:46 - INFO - codeparrot_training - Step 23143: {'lr': 0.0002964756050449367, 'samples': 11849728, 'steps': 23143, 'loss/train': 1.336146593093872} +02/25/2022 07:33:49 - INFO - codeparrot_training - Step 23144: {'lr': 0.0002964595277106403, 'samples': 11850240, 'steps': 23144, 'loss/train': 0.9666948318481445} +02/25/2022 07:33:55 - INFO - codeparrot_training - Step 23145: {'lr': 0.0002964434501773262, 'samples': 11850752, 'steps': 23145, 'loss/train': 1.3791836500167847} +02/25/2022 07:33:58 - INFO - codeparrot_training - Step 23146: {'lr': 0.00029642737244506295, 'samples': 11851264, 'steps': 23146, 'loss/train': 1.6816203594207764} +02/25/2022 07:34:04 - INFO - codeparrot_training - Step 23147: {'lr': 0.0002964112945139196, 'samples': 11851776, 'steps': 23147, 'loss/train': 2.0669057369232178} +02/25/2022 07:34:07 - INFO - codeparrot_training - Step 23148: {'lr': 0.0002963952163839651, 'samples': 11852288, 'steps': 23148, 'loss/train': 2.180706262588501} +02/25/2022 07:34:13 - INFO - codeparrot_training - Step 23149: {'lr': 0.00029637913805526816, 'samples': 11852800, 'steps': 23149, 'loss/train': 2.2974071502685547} +02/25/2022 07:34:16 - INFO - codeparrot_training - Step 23150: {'lr': 0.0002963630595278977, 'samples': 11853312, 'steps': 23150, 'loss/train': 1.8673449754714966} +02/25/2022 07:34:23 - INFO - codeparrot_training - Step 23151: {'lr': 0.0002963469808019227, 'samples': 11853824, 'steps': 23151, 'loss/train': 1.8603227138519287} +02/25/2022 07:34:26 - INFO - codeparrot_training - Step 23152: {'lr': 0.00029633090187741187, 'samples': 11854336, 'steps': 23152, 'loss/train': 2.259190320968628} +02/25/2022 07:34:32 - INFO - codeparrot_training - Step 23153: {'lr': 0.00029631482275443424, 'samples': 11854848, 'steps': 23153, 'loss/train': 3.105224370956421} +02/25/2022 07:34:35 - INFO - codeparrot_training - Step 23154: {'lr': 0.0002962987434330585, 'samples': 11855360, 'steps': 23154, 'loss/train': 2.0582025051116943} +02/25/2022 07:34:41 - INFO - codeparrot_training - Step 23155: {'lr': 0.0002962826639133538, 'samples': 11855872, 'steps': 23155, 'loss/train': 0.9695888161659241} +02/25/2022 07:34:44 - INFO - codeparrot_training - Step 23156: {'lr': 0.00029626658419538873, 'samples': 11856384, 'steps': 23156, 'loss/train': 2.500014543533325} +02/25/2022 07:34:50 - INFO - codeparrot_training - Step 23157: {'lr': 0.0002962505042792324, 'samples': 11856896, 'steps': 23157, 'loss/train': 1.32759428024292} +02/25/2022 07:34:53 - INFO - codeparrot_training - Step 23158: {'lr': 0.0002962344241649535, 'samples': 11857408, 'steps': 23158, 'loss/train': 1.75245201587677} +02/25/2022 07:34:59 - INFO - codeparrot_training - Step 23159: {'lr': 0.000296218343852621, 'samples': 11857920, 'steps': 23159, 'loss/train': 2.4808712005615234} +02/25/2022 07:35:02 - INFO - codeparrot_training - Step 23160: {'lr': 0.0002962022633423039, 'samples': 11858432, 'steps': 23160, 'loss/train': 2.1747500896453857} +02/25/2022 07:35:09 - INFO - codeparrot_training - Step 23161: {'lr': 0.00029618618263407094, 'samples': 11858944, 'steps': 23161, 'loss/train': 2.921461820602417} +02/25/2022 07:35:12 - INFO - codeparrot_training - Step 23162: {'lr': 0.00029617010172799095, 'samples': 11859456, 'steps': 23162, 'loss/train': 1.1186691522598267} +02/25/2022 07:35:18 - INFO - codeparrot_training - Step 23163: {'lr': 0.00029615402062413307, 'samples': 11859968, 'steps': 23163, 'loss/train': 1.9791722297668457} +02/25/2022 07:35:21 - INFO - codeparrot_training - Step 23164: {'lr': 0.00029613793932256583, 'samples': 11860480, 'steps': 23164, 'loss/train': 1.3354239463806152} +02/25/2022 07:35:27 - INFO - codeparrot_training - Step 23165: {'lr': 0.00029612185782335845, 'samples': 11860992, 'steps': 23165, 'loss/train': 2.0327491760253906} +02/25/2022 07:35:30 - INFO - codeparrot_training - Step 23166: {'lr': 0.00029610577612657963, 'samples': 11861504, 'steps': 23166, 'loss/train': 1.3488519191741943} +02/25/2022 07:35:36 - INFO - codeparrot_training - Step 23167: {'lr': 0.0002960896942322983, 'samples': 11862016, 'steps': 23167, 'loss/train': 1.3818823099136353} +02/25/2022 07:35:39 - INFO - codeparrot_training - Step 23168: {'lr': 0.0002960736121405834, 'samples': 11862528, 'steps': 23168, 'loss/train': 2.542564868927002} +02/25/2022 07:35:44 - INFO - codeparrot_training - Step 23169: {'lr': 0.00029605752985150367, 'samples': 11863040, 'steps': 23169, 'loss/train': 1.3945231437683105} +02/25/2022 07:35:48 - INFO - codeparrot_training - Step 23170: {'lr': 0.00029604144736512816, 'samples': 11863552, 'steps': 23170, 'loss/train': 1.9836618900299072} +02/25/2022 07:35:55 - INFO - codeparrot_training - Step 23171: {'lr': 0.00029602536468152575, 'samples': 11864064, 'steps': 23171, 'loss/train': 1.4883031845092773} +02/25/2022 07:36:00 - INFO - codeparrot_training - Step 23172: {'lr': 0.00029600928180076525, 'samples': 11864576, 'steps': 23172, 'loss/train': 0.9137559533119202} +02/25/2022 07:36:04 - INFO - codeparrot_training - Step 23173: {'lr': 0.0002959931987229156, 'samples': 11865088, 'steps': 23173, 'loss/train': 1.8890084028244019} +02/25/2022 07:36:07 - INFO - codeparrot_training - Step 23174: {'lr': 0.0002959771154480457, 'samples': 11865600, 'steps': 23174, 'loss/train': 1.723131775856018} +02/25/2022 07:36:13 - INFO - codeparrot_training - Step 23175: {'lr': 0.0002959610319762244, 'samples': 11866112, 'steps': 23175, 'loss/train': 0.6872315406799316} +02/25/2022 07:36:18 - INFO - codeparrot_training - Step 23176: {'lr': 0.0002959449483075207, 'samples': 11866624, 'steps': 23176, 'loss/train': 1.8419396877288818} +02/25/2022 07:36:22 - INFO - codeparrot_training - Step 23177: {'lr': 0.0002959288644420034, 'samples': 11867136, 'steps': 23177, 'loss/train': 0.9334335327148438} +02/25/2022 07:36:27 - INFO - codeparrot_training - Step 23178: {'lr': 0.00029591278037974147, 'samples': 11867648, 'steps': 23178, 'loss/train': 2.110797882080078} +02/25/2022 07:36:31 - INFO - codeparrot_training - Step 23179: {'lr': 0.0002958966961208037, 'samples': 11868160, 'steps': 23179, 'loss/train': 1.0112143754959106} +02/25/2022 07:36:36 - INFO - codeparrot_training - Step 23180: {'lr': 0.0002958806116652591, 'samples': 11868672, 'steps': 23180, 'loss/train': 1.5760059356689453} +02/25/2022 07:36:40 - INFO - codeparrot_training - Step 23181: {'lr': 0.0002958645270131765, 'samples': 11869184, 'steps': 23181, 'loss/train': 1.6010059118270874} +02/25/2022 07:36:45 - INFO - codeparrot_training - Step 23182: {'lr': 0.0002958484421646248, 'samples': 11869696, 'steps': 23182, 'loss/train': 2.161012649536133} +02/25/2022 07:36:49 - INFO - codeparrot_training - Step 23183: {'lr': 0.000295832357119673, 'samples': 11870208, 'steps': 23183, 'loss/train': 3.2022106647491455} +02/25/2022 07:36:55 - INFO - codeparrot_training - Step 23184: {'lr': 0.00029581627187838993, 'samples': 11870720, 'steps': 23184, 'loss/train': 2.231543779373169} +02/25/2022 07:36:58 - INFO - codeparrot_training - Step 23185: {'lr': 0.0002958001864408445, 'samples': 11871232, 'steps': 23185, 'loss/train': 1.2037922143936157} +02/25/2022 07:37:04 - INFO - codeparrot_training - Step 23186: {'lr': 0.0002957841008071056, 'samples': 11871744, 'steps': 23186, 'loss/train': 2.15334153175354} +02/25/2022 07:37:08 - INFO - codeparrot_training - Step 23187: {'lr': 0.0002957680149772422, 'samples': 11872256, 'steps': 23187, 'loss/train': 2.383380651473999} +02/25/2022 07:37:13 - INFO - codeparrot_training - Step 23188: {'lr': 0.000295751928951323, 'samples': 11872768, 'steps': 23188, 'loss/train': 2.431396961212158} +02/25/2022 07:37:17 - INFO - codeparrot_training - Step 23189: {'lr': 0.0002957358427294172, 'samples': 11873280, 'steps': 23189, 'loss/train': 2.157102584838867} +02/25/2022 07:37:22 - INFO - codeparrot_training - Step 23190: {'lr': 0.00029571975631159355, 'samples': 11873792, 'steps': 23190, 'loss/train': 1.9784764051437378} +02/25/2022 07:37:26 - INFO - codeparrot_training - Step 23191: {'lr': 0.000295703669697921, 'samples': 11874304, 'steps': 23191, 'loss/train': 1.3329405784606934} +02/25/2022 07:37:31 - INFO - codeparrot_training - Step 23192: {'lr': 0.0002956875828884684, 'samples': 11874816, 'steps': 23192, 'loss/train': 1.9540085792541504} +02/25/2022 07:37:35 - INFO - codeparrot_training - Step 23193: {'lr': 0.0002956714958833047, 'samples': 11875328, 'steps': 23193, 'loss/train': 1.9511375427246094} +02/25/2022 07:37:40 - INFO - codeparrot_training - Step 23194: {'lr': 0.00029565540868249884, 'samples': 11875840, 'steps': 23194, 'loss/train': 2.8634321689605713} +02/25/2022 07:37:44 - INFO - codeparrot_training - Step 23195: {'lr': 0.0002956393212861197, 'samples': 11876352, 'steps': 23195, 'loss/train': 1.680410385131836} +02/25/2022 07:37:50 - INFO - codeparrot_training - Step 23196: {'lr': 0.0002956232336942362, 'samples': 11876864, 'steps': 23196, 'loss/train': 2.1952054500579834} +02/25/2022 07:37:53 - INFO - codeparrot_training - Step 23197: {'lr': 0.0002956071459069173, 'samples': 11877376, 'steps': 23197, 'loss/train': 2.2575438022613525} +02/25/2022 07:37:59 - INFO - codeparrot_training - Step 23198: {'lr': 0.0002955910579242318, 'samples': 11877888, 'steps': 23198, 'loss/train': 1.839053988456726} +02/25/2022 07:38:03 - INFO - codeparrot_training - Step 23199: {'lr': 0.0002955749697462487, 'samples': 11878400, 'steps': 23199, 'loss/train': 2.897972822189331} +02/25/2022 07:38:08 - INFO - codeparrot_training - Step 23200: {'lr': 0.0002955588813730369, 'samples': 11878912, 'steps': 23200, 'loss/train': 2.0402991771698} +02/25/2022 07:38:12 - INFO - codeparrot_training - Step 23201: {'lr': 0.0002955427928046653, 'samples': 11879424, 'steps': 23201, 'loss/train': 2.769561767578125} +02/25/2022 07:38:17 - INFO - codeparrot_training - Step 23202: {'lr': 0.0002955267040412029, 'samples': 11879936, 'steps': 23202, 'loss/train': 1.3466453552246094} +02/25/2022 07:38:21 - INFO - codeparrot_training - Step 23203: {'lr': 0.0002955106150827185, 'samples': 11880448, 'steps': 23203, 'loss/train': 2.1680798530578613} +02/25/2022 07:38:26 - INFO - codeparrot_training - Step 23204: {'lr': 0.0002954945259292811, 'samples': 11880960, 'steps': 23204, 'loss/train': 2.1050631999969482} +02/25/2022 07:38:30 - INFO - codeparrot_training - Step 23205: {'lr': 0.0002954784365809596, 'samples': 11881472, 'steps': 23205, 'loss/train': 2.2999377250671387} +02/25/2022 07:38:36 - INFO - codeparrot_training - Step 23206: {'lr': 0.0002954623470378229, 'samples': 11881984, 'steps': 23206, 'loss/train': 1.374948501586914} +02/25/2022 07:38:39 - INFO - codeparrot_training - Step 23207: {'lr': 0.00029544625729993997, 'samples': 11882496, 'steps': 23207, 'loss/train': 1.9059501886367798} +02/25/2022 07:38:45 - INFO - codeparrot_training - Step 23208: {'lr': 0.0002954301673673797, 'samples': 11883008, 'steps': 23208, 'loss/train': 1.356425404548645} +02/25/2022 07:38:48 - INFO - codeparrot_training - Step 23209: {'lr': 0.00029541407724021095, 'samples': 11883520, 'steps': 23209, 'loss/train': 1.6585997343063354} +02/25/2022 07:38:54 - INFO - codeparrot_training - Step 23210: {'lr': 0.00029539798691850274, 'samples': 11884032, 'steps': 23210, 'loss/train': 1.3325096368789673} +02/25/2022 07:38:58 - INFO - codeparrot_training - Step 23211: {'lr': 0.00029538189640232406, 'samples': 11884544, 'steps': 23211, 'loss/train': 2.0629372596740723} +02/25/2022 07:39:03 - INFO - codeparrot_training - Step 23212: {'lr': 0.00029536580569174364, 'samples': 11885056, 'steps': 23212, 'loss/train': 2.1043946743011475} +02/25/2022 07:39:07 - INFO - codeparrot_training - Step 23213: {'lr': 0.0002953497147868306, 'samples': 11885568, 'steps': 23213, 'loss/train': 1.8474342823028564} +02/25/2022 07:39:12 - INFO - codeparrot_training - Step 23214: {'lr': 0.0002953336236876537, 'samples': 11886080, 'steps': 23214, 'loss/train': 2.839002847671509} +02/25/2022 07:39:16 - INFO - codeparrot_training - Step 23215: {'lr': 0.000295317532394282, 'samples': 11886592, 'steps': 23215, 'loss/train': 0.8753924369812012} +02/25/2022 07:39:21 - INFO - codeparrot_training - Step 23216: {'lr': 0.0002953014409067844, 'samples': 11887104, 'steps': 23216, 'loss/train': 1.5727424621582031} +02/25/2022 07:39:25 - INFO - codeparrot_training - Step 23217: {'lr': 0.00029528534922522974, 'samples': 11887616, 'steps': 23217, 'loss/train': 1.8975706100463867} +02/25/2022 07:39:30 - INFO - codeparrot_training - Step 23218: {'lr': 0.00029526925734968705, 'samples': 11888128, 'steps': 23218, 'loss/train': 1.2779752016067505} +02/25/2022 07:39:34 - INFO - codeparrot_training - Step 23219: {'lr': 0.0002952531652802252, 'samples': 11888640, 'steps': 23219, 'loss/train': 2.38446044921875} +02/25/2022 07:39:39 - INFO - codeparrot_training - Step 23220: {'lr': 0.00029523707301691327, 'samples': 11889152, 'steps': 23220, 'loss/train': 2.2417595386505127} +02/25/2022 07:39:43 - INFO - codeparrot_training - Step 23221: {'lr': 0.00029522098055982, 'samples': 11889664, 'steps': 23221, 'loss/train': 1.8170905113220215} +02/25/2022 07:39:49 - INFO - codeparrot_training - Step 23222: {'lr': 0.00029520488790901446, 'samples': 11890176, 'steps': 23222, 'loss/train': 0.8516761064529419} +02/25/2022 07:39:53 - INFO - codeparrot_training - Step 23223: {'lr': 0.00029518879506456556, 'samples': 11890688, 'steps': 23223, 'loss/train': 0.8684076070785522} +02/25/2022 07:39:58 - INFO - codeparrot_training - Step 23224: {'lr': 0.0002951727020265421, 'samples': 11891200, 'steps': 23224, 'loss/train': 2.3690929412841797} +02/25/2022 07:40:02 - INFO - codeparrot_training - Step 23225: {'lr': 0.0002951566087950132, 'samples': 11891712, 'steps': 23225, 'loss/train': 1.27492356300354} +02/25/2022 07:40:07 - INFO - codeparrot_training - Step 23226: {'lr': 0.0002951405153700477, 'samples': 11892224, 'steps': 23226, 'loss/train': 2.8572022914886475} +02/25/2022 07:40:11 - INFO - codeparrot_training - Step 23227: {'lr': 0.0002951244217517146, 'samples': 11892736, 'steps': 23227, 'loss/train': 2.1676816940307617} +02/25/2022 07:40:16 - INFO - codeparrot_training - Step 23228: {'lr': 0.0002951083279400828, 'samples': 11893248, 'steps': 23228, 'loss/train': 1.5893467664718628} +02/25/2022 07:40:20 - INFO - codeparrot_training - Step 23229: {'lr': 0.00029509223393522124, 'samples': 11893760, 'steps': 23229, 'loss/train': 1.9131907224655151} +02/25/2022 07:40:25 - INFO - codeparrot_training - Step 23230: {'lr': 0.00029507613973719883, 'samples': 11894272, 'steps': 23230, 'loss/train': 1.2191270589828491} +02/25/2022 07:40:29 - INFO - codeparrot_training - Step 23231: {'lr': 0.0002950600453460846, 'samples': 11894784, 'steps': 23231, 'loss/train': 1.5991896390914917} +02/25/2022 07:40:34 - INFO - codeparrot_training - Step 23232: {'lr': 0.0002950439507619474, 'samples': 11895296, 'steps': 23232, 'loss/train': 2.732994318008423} +02/25/2022 07:40:38 - INFO - codeparrot_training - Step 23233: {'lr': 0.00029502785598485624, 'samples': 11895808, 'steps': 23233, 'loss/train': 3.0105879306793213} +02/25/2022 07:40:44 - INFO - codeparrot_training - Step 23234: {'lr': 0.00029501176101488004, 'samples': 11896320, 'steps': 23234, 'loss/train': 0.6744531393051147} +02/25/2022 07:40:47 - INFO - codeparrot_training - Step 23235: {'lr': 0.00029499566585208776, 'samples': 11896832, 'steps': 23235, 'loss/train': 2.4094879627227783} +02/25/2022 07:40:53 - INFO - codeparrot_training - Step 23236: {'lr': 0.00029497957049654834, 'samples': 11897344, 'steps': 23236, 'loss/train': 0.689185619354248} +02/25/2022 07:40:56 - INFO - codeparrot_training - Step 23237: {'lr': 0.0002949634749483307, 'samples': 11897856, 'steps': 23237, 'loss/train': 1.8878679275512695} +02/25/2022 07:41:02 - INFO - codeparrot_training - Step 23238: {'lr': 0.0002949473792075039, 'samples': 11898368, 'steps': 23238, 'loss/train': 1.487168312072754} +02/25/2022 07:41:05 - INFO - codeparrot_training - Step 23239: {'lr': 0.00029493128327413664, 'samples': 11898880, 'steps': 23239, 'loss/train': 0.883429229259491} +02/25/2022 07:41:11 - INFO - codeparrot_training - Step 23240: {'lr': 0.0002949151871482982, 'samples': 11899392, 'steps': 23240, 'loss/train': 2.5060760974884033} +02/25/2022 07:41:14 - INFO - codeparrot_training - Step 23241: {'lr': 0.00029489909083005723, 'samples': 11899904, 'steps': 23241, 'loss/train': 0.9486697316169739} +02/25/2022 07:41:20 - INFO - codeparrot_training - Step 23242: {'lr': 0.0002948829943194829, 'samples': 11900416, 'steps': 23242, 'loss/train': 1.5521860122680664} +02/25/2022 07:41:23 - INFO - codeparrot_training - Step 23243: {'lr': 0.00029486689761664396, 'samples': 11900928, 'steps': 23243, 'loss/train': 1.8223611116409302} +02/25/2022 07:41:30 - INFO - codeparrot_training - Step 23244: {'lr': 0.0002948508007216096, 'samples': 11901440, 'steps': 23244, 'loss/train': 0.6826063394546509} +02/25/2022 07:41:34 - INFO - codeparrot_training - Step 23245: {'lr': 0.00029483470363444856, 'samples': 11901952, 'steps': 23245, 'loss/train': 2.939300298690796} +02/25/2022 07:41:39 - INFO - codeparrot_training - Step 23246: {'lr': 0.00029481860635522994, 'samples': 11902464, 'steps': 23246, 'loss/train': 1.946687936782837} +02/25/2022 07:41:43 - INFO - codeparrot_training - Step 23247: {'lr': 0.0002948025088840226, 'samples': 11902976, 'steps': 23247, 'loss/train': 1.5869731903076172} +02/25/2022 07:41:48 - INFO - codeparrot_training - Step 23248: {'lr': 0.0002947864112208956, 'samples': 11903488, 'steps': 23248, 'loss/train': 2.951584577560425} +02/25/2022 07:41:52 - INFO - codeparrot_training - Step 23249: {'lr': 0.0002947703133659178, 'samples': 11904000, 'steps': 23249, 'loss/train': 1.869722843170166} +02/25/2022 07:41:57 - INFO - codeparrot_training - Step 23250: {'lr': 0.00029475421531915827, 'samples': 11904512, 'steps': 23250, 'loss/train': 3.3536550998687744} +02/25/2022 07:42:01 - INFO - codeparrot_training - Step 23251: {'lr': 0.00029473811708068576, 'samples': 11905024, 'steps': 23251, 'loss/train': 1.612632393836975} +02/25/2022 07:42:06 - INFO - codeparrot_training - Step 23252: {'lr': 0.0002947220186505694, 'samples': 11905536, 'steps': 23252, 'loss/train': 1.134292721748352} +02/25/2022 07:42:10 - INFO - codeparrot_training - Step 23253: {'lr': 0.00029470592002887815, 'samples': 11906048, 'steps': 23253, 'loss/train': 1.8171945810317993} +02/25/2022 07:42:16 - INFO - codeparrot_training - Step 23254: {'lr': 0.00029468982121568096, 'samples': 11906560, 'steps': 23254, 'loss/train': 2.15289044380188} +02/25/2022 07:42:20 - INFO - codeparrot_training - Step 23255: {'lr': 0.0002946737222110467, 'samples': 11907072, 'steps': 23255, 'loss/train': 1.3837889432907104} +02/25/2022 07:42:25 - INFO - codeparrot_training - Step 23256: {'lr': 0.0002946576230150444, 'samples': 11907584, 'steps': 23256, 'loss/train': 1.6993913650512695} +02/25/2022 07:42:29 - INFO - codeparrot_training - Step 23257: {'lr': 0.00029464152362774305, 'samples': 11908096, 'steps': 23257, 'loss/train': 1.5446391105651855} +02/25/2022 07:42:34 - INFO - codeparrot_training - Step 23258: {'lr': 0.00029462542404921156, 'samples': 11908608, 'steps': 23258, 'loss/train': 2.648787498474121} +02/25/2022 07:42:38 - INFO - codeparrot_training - Step 23259: {'lr': 0.00029460932427951897, 'samples': 11909120, 'steps': 23259, 'loss/train': 0.8352705836296082} +02/25/2022 07:42:44 - INFO - codeparrot_training - Step 23260: {'lr': 0.00029459322431873416, 'samples': 11909632, 'steps': 23260, 'loss/train': 1.9974157810211182} +02/25/2022 07:42:47 - INFO - codeparrot_training - Step 23261: {'lr': 0.00029457712416692617, 'samples': 11910144, 'steps': 23261, 'loss/train': 1.5597301721572876} +02/25/2022 07:42:53 - INFO - codeparrot_training - Step 23262: {'lr': 0.0002945610238241639, 'samples': 11910656, 'steps': 23262, 'loss/train': 4.257477760314941} +02/25/2022 07:42:56 - INFO - codeparrot_training - Step 23263: {'lr': 0.0002945449232905164, 'samples': 11911168, 'steps': 23263, 'loss/train': 1.993708848953247} +02/25/2022 07:43:02 - INFO - codeparrot_training - Step 23264: {'lr': 0.0002945288225660525, 'samples': 11911680, 'steps': 23264, 'loss/train': 2.6475870609283447} +02/25/2022 07:43:05 - INFO - codeparrot_training - Step 23265: {'lr': 0.00029451272165084144, 'samples': 11912192, 'steps': 23265, 'loss/train': 2.3297805786132812} +02/25/2022 07:43:11 - INFO - codeparrot_training - Step 23266: {'lr': 0.00029449662054495184, 'samples': 11912704, 'steps': 23266, 'loss/train': 2.599456787109375} +02/25/2022 07:43:14 - INFO - codeparrot_training - Step 23267: {'lr': 0.0002944805192484529, 'samples': 11913216, 'steps': 23267, 'loss/train': 1.3924566507339478} +02/25/2022 07:43:20 - INFO - codeparrot_training - Step 23268: {'lr': 0.0002944644177614136, 'samples': 11913728, 'steps': 23268, 'loss/train': 1.5712859630584717} +02/25/2022 07:43:24 - INFO - codeparrot_training - Step 23269: {'lr': 0.00029444831608390276, 'samples': 11914240, 'steps': 23269, 'loss/train': 2.215167284011841} +02/25/2022 07:43:29 - INFO - codeparrot_training - Step 23270: {'lr': 0.0002944322142159895, 'samples': 11914752, 'steps': 23270, 'loss/train': 2.082902431488037} +02/25/2022 07:43:33 - INFO - codeparrot_training - Step 23271: {'lr': 0.0002944161121577427, 'samples': 11915264, 'steps': 23271, 'loss/train': 1.5175353288650513} +02/25/2022 07:43:38 - INFO - codeparrot_training - Step 23272: {'lr': 0.0002944000099092314, 'samples': 11915776, 'steps': 23272, 'loss/train': 1.6339221000671387} +02/25/2022 07:43:42 - INFO - codeparrot_training - Step 23273: {'lr': 0.0002943839074705246, 'samples': 11916288, 'steps': 23273, 'loss/train': 1.7660932540893555} +02/25/2022 07:43:47 - INFO - codeparrot_training - Step 23274: {'lr': 0.00029436780484169125, 'samples': 11916800, 'steps': 23274, 'loss/train': 1.6506413221359253} +02/25/2022 07:43:51 - INFO - codeparrot_training - Step 23275: {'lr': 0.0002943517020228003, 'samples': 11917312, 'steps': 23275, 'loss/train': 1.7423412799835205} +02/25/2022 07:43:58 - INFO - codeparrot_training - Step 23276: {'lr': 0.00029433559901392067, 'samples': 11917824, 'steps': 23276, 'loss/train': 1.5406180620193481} +02/25/2022 07:44:02 - INFO - codeparrot_training - Step 23277: {'lr': 0.0002943194958151214, 'samples': 11918336, 'steps': 23277, 'loss/train': 1.117648720741272} +02/25/2022 07:44:07 - INFO - codeparrot_training - Step 23278: {'lr': 0.00029430339242647157, 'samples': 11918848, 'steps': 23278, 'loss/train': 2.628079414367676} +02/25/2022 07:44:11 - INFO - codeparrot_training - Step 23279: {'lr': 0.00029428728884804, 'samples': 11919360, 'steps': 23279, 'loss/train': 2.8892574310302734} +02/25/2022 07:44:16 - INFO - codeparrot_training - Step 23280: {'lr': 0.0002942711850798959, 'samples': 11919872, 'steps': 23280, 'loss/train': 1.594959020614624} +02/25/2022 07:44:20 - INFO - codeparrot_training - Step 23281: {'lr': 0.00029425508112210794, 'samples': 11920384, 'steps': 23281, 'loss/train': 1.6724756956100464} +02/25/2022 07:44:25 - INFO - codeparrot_training - Step 23282: {'lr': 0.00029423897697474535, 'samples': 11920896, 'steps': 23282, 'loss/train': 1.522385597229004} +02/25/2022 07:44:29 - INFO - codeparrot_training - Step 23283: {'lr': 0.00029422287263787696, 'samples': 11921408, 'steps': 23283, 'loss/train': 3.1397314071655273} +02/25/2022 07:44:34 - INFO - codeparrot_training - Step 23284: {'lr': 0.0002942067681115719, 'samples': 11921920, 'steps': 23284, 'loss/train': 1.8952866792678833} +02/25/2022 07:44:38 - INFO - codeparrot_training - Step 23285: {'lr': 0.0002941906633958989, 'samples': 11922432, 'steps': 23285, 'loss/train': 2.027068853378296} +02/25/2022 07:44:45 - INFO - codeparrot_training - Step 23286: {'lr': 0.0002941745584909274, 'samples': 11922944, 'steps': 23286, 'loss/train': 2.7364048957824707} +02/25/2022 07:44:49 - INFO - codeparrot_training - Step 23287: {'lr': 0.00029415845339672596, 'samples': 11923456, 'steps': 23287, 'loss/train': 1.1098403930664062} +02/25/2022 07:44:54 - INFO - codeparrot_training - Step 23288: {'lr': 0.00029414234811336377, 'samples': 11923968, 'steps': 23288, 'loss/train': 1.479647159576416} +02/25/2022 07:44:57 - INFO - codeparrot_training - Step 23289: {'lr': 0.00029412624264090983, 'samples': 11924480, 'steps': 23289, 'loss/train': 0.11885195225477219} +02/25/2022 07:45:03 - INFO - codeparrot_training - Step 23290: {'lr': 0.00029411013697943294, 'samples': 11924992, 'steps': 23290, 'loss/train': 1.6844847202301025} +02/25/2022 07:45:07 - INFO - codeparrot_training - Step 23291: {'lr': 0.00029409403112900227, 'samples': 11925504, 'steps': 23291, 'loss/train': 0.6062667369842529} +02/25/2022 07:45:12 - INFO - codeparrot_training - Step 23292: {'lr': 0.00029407792508968683, 'samples': 11926016, 'steps': 23292, 'loss/train': 1.4477728605270386} +02/25/2022 07:45:16 - INFO - codeparrot_training - Step 23293: {'lr': 0.0002940618188615555, 'samples': 11926528, 'steps': 23293, 'loss/train': 0.6699808239936829} +02/25/2022 07:45:21 - INFO - codeparrot_training - Step 23294: {'lr': 0.0002940457124446774, 'samples': 11927040, 'steps': 23294, 'loss/train': 2.055760145187378} +02/25/2022 07:45:25 - INFO - codeparrot_training - Step 23295: {'lr': 0.0002940296058391214, 'samples': 11927552, 'steps': 23295, 'loss/train': 1.6895854473114014} +02/25/2022 07:45:32 - INFO - codeparrot_training - Step 23296: {'lr': 0.00029401349904495654, 'samples': 11928064, 'steps': 23296, 'loss/train': 0.45486074686050415} +02/25/2022 07:45:35 - INFO - codeparrot_training - Step 23297: {'lr': 0.00029399739206225186, 'samples': 11928576, 'steps': 23297, 'loss/train': 1.7504163980484009} +02/25/2022 07:45:41 - INFO - codeparrot_training - Step 23298: {'lr': 0.00029398128489107635, 'samples': 11929088, 'steps': 23298, 'loss/train': 1.576931118965149} +02/25/2022 07:45:44 - INFO - codeparrot_training - Step 23299: {'lr': 0.0002939651775314989, 'samples': 11929600, 'steps': 23299, 'loss/train': 2.00386905670166} +02/25/2022 07:45:50 - INFO - codeparrot_training - Step 23300: {'lr': 0.0002939490699835887, 'samples': 11930112, 'steps': 23300, 'loss/train': 1.555681586265564} +02/25/2022 07:45:53 - INFO - codeparrot_training - Step 23301: {'lr': 0.0002939329622474145, 'samples': 11930624, 'steps': 23301, 'loss/train': 2.8473918437957764} +02/25/2022 07:46:00 - INFO - codeparrot_training - Step 23302: {'lr': 0.00029391685432304554, 'samples': 11931136, 'steps': 23302, 'loss/train': 1.720230221748352} +02/25/2022 07:46:03 - INFO - codeparrot_training - Step 23303: {'lr': 0.0002939007462105507, 'samples': 11931648, 'steps': 23303, 'loss/train': 2.24920654296875} +02/25/2022 07:46:09 - INFO - codeparrot_training - Step 23304: {'lr': 0.000293884637909999, 'samples': 11932160, 'steps': 23304, 'loss/train': 1.97749924659729} +02/25/2022 07:46:12 - INFO - codeparrot_training - Step 23305: {'lr': 0.00029386852942145944, 'samples': 11932672, 'steps': 23305, 'loss/train': 1.7793421745300293} +02/25/2022 07:46:19 - INFO - codeparrot_training - Step 23306: {'lr': 0.000293852420745001, 'samples': 11933184, 'steps': 23306, 'loss/train': 2.5778260231018066} +02/25/2022 07:46:23 - INFO - codeparrot_training - Step 23307: {'lr': 0.0002938363118806928, 'samples': 11933696, 'steps': 23307, 'loss/train': 1.3569884300231934} +02/25/2022 07:46:28 - INFO - codeparrot_training - Step 23308: {'lr': 0.00029382020282860366, 'samples': 11934208, 'steps': 23308, 'loss/train': 2.33257794380188} +02/25/2022 07:46:32 - INFO - codeparrot_training - Step 23309: {'lr': 0.00029380409358880277, 'samples': 11934720, 'steps': 23309, 'loss/train': 2.3145346641540527} +02/25/2022 07:46:37 - INFO - codeparrot_training - Step 23310: {'lr': 0.000293787984161359, 'samples': 11935232, 'steps': 23310, 'loss/train': 1.8709235191345215} +02/25/2022 07:46:41 - INFO - codeparrot_training - Step 23311: {'lr': 0.0002937718745463414, 'samples': 11935744, 'steps': 23311, 'loss/train': 1.5701366662979126} +02/25/2022 07:46:46 - INFO - codeparrot_training - Step 23312: {'lr': 0.00029375576474381903, 'samples': 11936256, 'steps': 23312, 'loss/train': 2.1795248985290527} +02/25/2022 07:46:50 - INFO - codeparrot_training - Step 23313: {'lr': 0.0002937396547538609, 'samples': 11936768, 'steps': 23313, 'loss/train': 2.784252643585205} +02/25/2022 07:46:55 - INFO - codeparrot_training - Step 23314: {'lr': 0.00029372354457653585, 'samples': 11937280, 'steps': 23314, 'loss/train': 2.8848493099212646} +02/25/2022 07:46:59 - INFO - codeparrot_training - Step 23315: {'lr': 0.00029370743421191305, 'samples': 11937792, 'steps': 23315, 'loss/train': 1.4853767156600952} +02/25/2022 07:47:04 - INFO - codeparrot_training - Step 23316: {'lr': 0.0002936913236600616, 'samples': 11938304, 'steps': 23316, 'loss/train': 1.9631294012069702} +02/25/2022 07:47:08 - INFO - codeparrot_training - Step 23317: {'lr': 0.0002936752129210503, 'samples': 11938816, 'steps': 23317, 'loss/train': 2.023798704147339} +02/25/2022 07:47:13 - INFO - codeparrot_training - Step 23318: {'lr': 0.00029365910199494823, 'samples': 11939328, 'steps': 23318, 'loss/train': 2.9043664932250977} +02/25/2022 07:47:17 - INFO - codeparrot_training - Step 23319: {'lr': 0.00029364299088182445, 'samples': 11939840, 'steps': 23319, 'loss/train': 0.6101219058036804} +02/25/2022 07:47:22 - INFO - codeparrot_training - Step 23320: {'lr': 0.000293626879581748, 'samples': 11940352, 'steps': 23320, 'loss/train': 1.6024255752563477} +02/25/2022 07:47:26 - INFO - codeparrot_training - Step 23321: {'lr': 0.00029361076809478784, 'samples': 11940864, 'steps': 23321, 'loss/train': 1.73313307762146} +02/25/2022 07:47:33 - INFO - codeparrot_training - Step 23322: {'lr': 0.00029359465642101297, 'samples': 11941376, 'steps': 23322, 'loss/train': 1.649749517440796} +02/25/2022 07:47:37 - INFO - codeparrot_training - Step 23323: {'lr': 0.0002935785445604924, 'samples': 11941888, 'steps': 23323, 'loss/train': 2.128927230834961} +02/25/2022 07:47:42 - INFO - codeparrot_training - Step 23324: {'lr': 0.0002935624325132953, 'samples': 11942400, 'steps': 23324, 'loss/train': 1.4314079284667969} +02/25/2022 07:47:46 - INFO - codeparrot_training - Step 23325: {'lr': 0.0002935463202794905, 'samples': 11942912, 'steps': 23325, 'loss/train': 2.885136365890503} +02/25/2022 07:47:52 - INFO - codeparrot_training - Step 23326: {'lr': 0.00029353020785914707, 'samples': 11943424, 'steps': 23326, 'loss/train': 1.5936180353164673} +02/25/2022 07:47:55 - INFO - codeparrot_training - Step 23327: {'lr': 0.00029351409525233416, 'samples': 11943936, 'steps': 23327, 'loss/train': 1.7138837575912476} +02/25/2022 07:48:00 - INFO - codeparrot_training - Step 23328: {'lr': 0.0002934979824591205, 'samples': 11944448, 'steps': 23328, 'loss/train': 0.6495726704597473} +02/25/2022 07:48:04 - INFO - codeparrot_training - Step 23329: {'lr': 0.0002934818694795755, 'samples': 11944960, 'steps': 23329, 'loss/train': 1.9211796522140503} +02/25/2022 07:48:10 - INFO - codeparrot_training - Step 23330: {'lr': 0.0002934657563137678, 'samples': 11945472, 'steps': 23330, 'loss/train': 1.0002098083496094} +02/25/2022 07:48:13 - INFO - codeparrot_training - Step 23331: {'lr': 0.00029344964296176666, 'samples': 11945984, 'steps': 23331, 'loss/train': 0.7236199378967285} +02/25/2022 07:48:20 - INFO - codeparrot_training - Step 23332: {'lr': 0.00029343352942364106, 'samples': 11946496, 'steps': 23332, 'loss/train': 2.111624240875244} +02/25/2022 07:48:24 - INFO - codeparrot_training - Step 23333: {'lr': 0.00029341741569946007, 'samples': 11947008, 'steps': 23333, 'loss/train': 1.6773176193237305} +02/25/2022 07:48:29 - INFO - codeparrot_training - Step 23334: {'lr': 0.00029340130178929255, 'samples': 11947520, 'steps': 23334, 'loss/train': 2.680154323577881} +02/25/2022 07:48:33 - INFO - codeparrot_training - Step 23335: {'lr': 0.0002933851876932077, 'samples': 11948032, 'steps': 23335, 'loss/train': 2.562551975250244} +02/25/2022 07:48:38 - INFO - codeparrot_training - Step 23336: {'lr': 0.00029336907341127443, 'samples': 11948544, 'steps': 23336, 'loss/train': 1.8527193069458008} +02/25/2022 07:48:42 - INFO - codeparrot_training - Step 23337: {'lr': 0.0002933529589435619, 'samples': 11949056, 'steps': 23337, 'loss/train': 2.284768581390381} +02/25/2022 07:48:45 - INFO - codeparrot_training - Step 23338: {'lr': 0.000293336844290139, 'samples': 11949568, 'steps': 23338, 'loss/train': 1.9783297777175903} +02/25/2022 07:48:51 - INFO - codeparrot_training - Step 23339: {'lr': 0.0002933207294510748, 'samples': 11950080, 'steps': 23339, 'loss/train': 2.1956281661987305} +02/25/2022 07:48:56 - INFO - codeparrot_training - Step 23340: {'lr': 0.0002933046144264384, 'samples': 11950592, 'steps': 23340, 'loss/train': 1.364457368850708} +02/25/2022 07:49:00 - INFO - codeparrot_training - Step 23341: {'lr': 0.0002932884992162987, 'samples': 11951104, 'steps': 23341, 'loss/train': 1.0895816087722778} +02/25/2022 07:49:03 - INFO - codeparrot_training - Step 23342: {'lr': 0.00029327238382072495, 'samples': 11951616, 'steps': 23342, 'loss/train': 1.349489450454712} +02/25/2022 07:49:11 - INFO - codeparrot_training - Step 23343: {'lr': 0.000293256268239786, 'samples': 11952128, 'steps': 23343, 'loss/train': 2.8808159828186035} +02/25/2022 07:49:14 - INFO - codeparrot_training - Step 23344: {'lr': 0.00029324015247355093, 'samples': 11952640, 'steps': 23344, 'loss/train': 2.297421932220459} +02/25/2022 07:49:20 - INFO - codeparrot_training - Step 23345: {'lr': 0.0002932240365220887, 'samples': 11953152, 'steps': 23345, 'loss/train': 1.592259407043457} +02/25/2022 07:49:23 - INFO - codeparrot_training - Step 23346: {'lr': 0.00029320792038546855, 'samples': 11953664, 'steps': 23346, 'loss/train': 2.0885281562805176} +02/25/2022 07:49:29 - INFO - codeparrot_training - Step 23347: {'lr': 0.0002931918040637593, 'samples': 11954176, 'steps': 23347, 'loss/train': 2.349640369415283} +02/25/2022 07:49:34 - INFO - codeparrot_training - Step 23348: {'lr': 0.0002931756875570301, 'samples': 11954688, 'steps': 23348, 'loss/train': 1.9957149028778076} +02/25/2022 07:49:38 - INFO - codeparrot_training - Step 23349: {'lr': 0.0002931595708653499, 'samples': 11955200, 'steps': 23349, 'loss/train': 1.07041597366333} +02/25/2022 07:49:43 - INFO - codeparrot_training - Step 23350: {'lr': 0.00029314345398878796, 'samples': 11955712, 'steps': 23350, 'loss/train': 2.185713291168213} +02/25/2022 07:49:47 - INFO - codeparrot_training - Step 23351: {'lr': 0.00029312733692741307, 'samples': 11956224, 'steps': 23351, 'loss/train': 1.176406979560852} +02/25/2022 07:49:54 - INFO - codeparrot_training - Step 23352: {'lr': 0.00029311121968129435, 'samples': 11956736, 'steps': 23352, 'loss/train': 1.394729495048523} +02/25/2022 07:49:57 - INFO - codeparrot_training - Step 23353: {'lr': 0.00029309510225050087, 'samples': 11957248, 'steps': 23353, 'loss/train': 1.989072322845459} +02/25/2022 07:50:03 - INFO - codeparrot_training - Step 23354: {'lr': 0.00029307898463510164, 'samples': 11957760, 'steps': 23354, 'loss/train': 1.8369868993759155} +02/25/2022 07:50:06 - INFO - codeparrot_training - Step 23355: {'lr': 0.00029306286683516573, 'samples': 11958272, 'steps': 23355, 'loss/train': 1.9826176166534424} +02/25/2022 07:50:12 - INFO - codeparrot_training - Step 23356: {'lr': 0.00029304674885076215, 'samples': 11958784, 'steps': 23356, 'loss/train': 3.8771159648895264} +02/25/2022 07:50:15 - INFO - codeparrot_training - Step 23357: {'lr': 0.00029303063068196006, 'samples': 11959296, 'steps': 23357, 'loss/train': 1.3828481435775757} +02/25/2022 07:50:21 - INFO - codeparrot_training - Step 23358: {'lr': 0.0002930145123288283, 'samples': 11959808, 'steps': 23358, 'loss/train': 1.342498779296875} +02/25/2022 07:50:24 - INFO - codeparrot_training - Step 23359: {'lr': 0.00029299839379143613, 'samples': 11960320, 'steps': 23359, 'loss/train': 2.260441541671753} +02/25/2022 07:50:30 - INFO - codeparrot_training - Step 23360: {'lr': 0.0002929822750698524, 'samples': 11960832, 'steps': 23360, 'loss/train': 2.9931976795196533} +02/25/2022 07:50:33 - INFO - codeparrot_training - Step 23361: {'lr': 0.0002929661561641463, 'samples': 11961344, 'steps': 23361, 'loss/train': 1.3829420804977417} +02/25/2022 07:50:39 - INFO - codeparrot_training - Step 23362: {'lr': 0.0002929500370743868, 'samples': 11961856, 'steps': 23362, 'loss/train': 1.6675212383270264} +02/25/2022 07:50:43 - INFO - codeparrot_training - Step 23363: {'lr': 0.000292933917800643, 'samples': 11962368, 'steps': 23363, 'loss/train': 1.8965108394622803} +02/25/2022 07:50:48 - INFO - codeparrot_training - Step 23364: {'lr': 0.0002929177983429839, 'samples': 11962880, 'steps': 23364, 'loss/train': 2.617595911026001} +02/25/2022 07:50:52 - INFO - codeparrot_training - Step 23365: {'lr': 0.0002929016787014786, 'samples': 11963392, 'steps': 23365, 'loss/train': 1.4088730812072754} +02/25/2022 07:50:57 - INFO - codeparrot_training - Step 23366: {'lr': 0.0002928855588761962, 'samples': 11963904, 'steps': 23366, 'loss/train': 2.2544336318969727} +02/25/2022 07:51:01 - INFO - codeparrot_training - Step 23367: {'lr': 0.0002928694388672056, 'samples': 11964416, 'steps': 23367, 'loss/train': 1.7133792638778687} +02/25/2022 07:51:08 - INFO - codeparrot_training - Step 23368: {'lr': 0.00029285331867457597, 'samples': 11964928, 'steps': 23368, 'loss/train': 1.6071662902832031} +02/25/2022 07:51:11 - INFO - codeparrot_training - Step 23369: {'lr': 0.00029283719829837636, 'samples': 11965440, 'steps': 23369, 'loss/train': 1.1860008239746094} +02/25/2022 07:51:17 - INFO - codeparrot_training - Step 23370: {'lr': 0.0002928210777386757, 'samples': 11965952, 'steps': 23370, 'loss/train': 1.4075809717178345} +02/25/2022 07:51:21 - INFO - codeparrot_training - Step 23371: {'lr': 0.0002928049569955432, 'samples': 11966464, 'steps': 23371, 'loss/train': 2.2653560638427734} +02/25/2022 07:51:24 - INFO - codeparrot_training - Step 23372: {'lr': 0.0002927888360690478, 'samples': 11966976, 'steps': 23372, 'loss/train': 1.7175580263137817} +02/25/2022 07:51:30 - INFO - codeparrot_training - Step 23373: {'lr': 0.00029277271495925874, 'samples': 11967488, 'steps': 23373, 'loss/train': 1.8998998403549194} +02/25/2022 07:51:33 - INFO - codeparrot_training - Step 23374: {'lr': 0.00029275659366624493, 'samples': 11968000, 'steps': 23374, 'loss/train': 2.1208808422088623} +02/25/2022 07:51:39 - INFO - codeparrot_training - Step 23375: {'lr': 0.00029274047219007534, 'samples': 11968512, 'steps': 23375, 'loss/train': 2.0657408237457275} +02/25/2022 07:51:42 - INFO - codeparrot_training - Step 23376: {'lr': 0.00029272435053081917, 'samples': 11969024, 'steps': 23376, 'loss/train': 1.7744271755218506} +02/25/2022 07:51:48 - INFO - codeparrot_training - Step 23377: {'lr': 0.0002927082286885455, 'samples': 11969536, 'steps': 23377, 'loss/train': 2.682598352432251} +02/25/2022 07:51:51 - INFO - codeparrot_training - Step 23378: {'lr': 0.0002926921066633233, 'samples': 11970048, 'steps': 23378, 'loss/train': 2.908313751220703} +02/25/2022 07:52:00 - INFO - codeparrot_training - Step 23379: {'lr': 0.0002926759844552217, 'samples': 11970560, 'steps': 23379, 'loss/train': 2.297487735748291} +02/25/2022 07:52:04 - INFO - codeparrot_training - Step 23380: {'lr': 0.0002926598620643097, 'samples': 11971072, 'steps': 23380, 'loss/train': 2.234861373901367} +02/25/2022 07:52:09 - INFO - codeparrot_training - Step 23381: {'lr': 0.0002926437394906564, 'samples': 11971584, 'steps': 23381, 'loss/train': 2.625631809234619} +02/25/2022 07:52:12 - INFO - codeparrot_training - Step 23382: {'lr': 0.00029262761673433087, 'samples': 11972096, 'steps': 23382, 'loss/train': 2.7523982524871826} +02/25/2022 07:52:18 - INFO - codeparrot_training - Step 23383: {'lr': 0.0002926114937954022, 'samples': 11972608, 'steps': 23383, 'loss/train': 1.088431477546692} +02/25/2022 07:52:21 - INFO - codeparrot_training - Step 23384: {'lr': 0.0002925953706739394, 'samples': 11973120, 'steps': 23384, 'loss/train': 1.984533667564392} +02/25/2022 07:52:27 - INFO - codeparrot_training - Step 23385: {'lr': 0.0002925792473700116, 'samples': 11973632, 'steps': 23385, 'loss/train': 1.199676275253296} +02/25/2022 07:52:30 - INFO - codeparrot_training - Step 23386: {'lr': 0.00029256312388368773, 'samples': 11974144, 'steps': 23386, 'loss/train': 2.3697826862335205} +02/25/2022 07:52:36 - INFO - codeparrot_training - Step 23387: {'lr': 0.000292547000215037, 'samples': 11974656, 'steps': 23387, 'loss/train': 3.095698118209839} +02/25/2022 07:52:39 - INFO - codeparrot_training - Step 23388: {'lr': 0.0002925308763641284, 'samples': 11975168, 'steps': 23388, 'loss/train': 2.0039196014404297} +02/25/2022 07:52:45 - INFO - codeparrot_training - Step 23389: {'lr': 0.00029251475233103104, 'samples': 11975680, 'steps': 23389, 'loss/train': 2.377756357192993} +02/25/2022 07:52:48 - INFO - codeparrot_training - Step 23390: {'lr': 0.000292498628115814, 'samples': 11976192, 'steps': 23390, 'loss/train': 2.5976409912109375} +02/25/2022 07:52:56 - INFO - codeparrot_training - Step 23391: {'lr': 0.00029248250371854636, 'samples': 11976704, 'steps': 23391, 'loss/train': 2.7413089275360107} +02/25/2022 07:53:01 - INFO - codeparrot_training - Step 23392: {'lr': 0.0002924663791392971, 'samples': 11977216, 'steps': 23392, 'loss/train': 1.9561271667480469} +02/25/2022 07:53:05 - INFO - codeparrot_training - Step 23393: {'lr': 0.0002924502543781354, 'samples': 11977728, 'steps': 23393, 'loss/train': 1.8099284172058105} +02/25/2022 07:53:10 - INFO - codeparrot_training - Step 23394: {'lr': 0.0002924341294351303, 'samples': 11978240, 'steps': 23394, 'loss/train': 1.7902275323867798} +02/25/2022 07:53:14 - INFO - codeparrot_training - Step 23395: {'lr': 0.0002924180043103508, 'samples': 11978752, 'steps': 23395, 'loss/train': 2.017207622528076} +02/25/2022 07:53:19 - INFO - codeparrot_training - Step 23396: {'lr': 0.0002924018790038662, 'samples': 11979264, 'steps': 23396, 'loss/train': 2.2842447757720947} +02/25/2022 07:53:23 - INFO - codeparrot_training - Step 23397: {'lr': 0.0002923857535157452, 'samples': 11979776, 'steps': 23397, 'loss/train': 1.391904592514038} +02/25/2022 07:53:26 - INFO - codeparrot_training - Step 23398: {'lr': 0.00029236962784605727, 'samples': 11980288, 'steps': 23398, 'loss/train': 1.9955264329910278} +02/25/2022 07:53:32 - INFO - codeparrot_training - Step 23399: {'lr': 0.00029235350199487115, 'samples': 11980800, 'steps': 23399, 'loss/train': 2.3995463848114014} +02/25/2022 07:53:39 - INFO - codeparrot_training - Step 23400: {'lr': 0.0002923373759622561, 'samples': 11981312, 'steps': 23400, 'loss/train': 1.9457592964172363} +02/25/2022 07:53:43 - INFO - codeparrot_training - Step 23401: {'lr': 0.00029232124974828126, 'samples': 11981824, 'steps': 23401, 'loss/train': 2.4070663452148438} +02/25/2022 07:53:46 - INFO - codeparrot_training - Step 23402: {'lr': 0.0002923051233530156, 'samples': 11982336, 'steps': 23402, 'loss/train': 1.2458875179290771} +02/25/2022 07:53:52 - INFO - codeparrot_training - Step 23403: {'lr': 0.00029228899677652826, 'samples': 11982848, 'steps': 23403, 'loss/train': 2.039367914199829} +02/25/2022 07:53:55 - INFO - codeparrot_training - Step 23404: {'lr': 0.0002922728700188882, 'samples': 11983360, 'steps': 23404, 'loss/train': 2.762005090713501} +02/25/2022 07:54:01 - INFO - codeparrot_training - Step 23405: {'lr': 0.00029225674308016464, 'samples': 11983872, 'steps': 23405, 'loss/train': 1.8602744340896606} +02/25/2022 07:54:06 - INFO - codeparrot_training - Step 23406: {'lr': 0.00029224061596042663, 'samples': 11984384, 'steps': 23406, 'loss/train': 1.7666420936584473} +02/25/2022 07:54:10 - INFO - codeparrot_training - Step 23407: {'lr': 0.0002922244886597432, 'samples': 11984896, 'steps': 23407, 'loss/train': 2.2174315452575684} +02/25/2022 07:54:15 - INFO - codeparrot_training - Step 23408: {'lr': 0.00029220836117818346, 'samples': 11985408, 'steps': 23408, 'loss/train': 2.378347635269165} +02/25/2022 07:54:19 - INFO - codeparrot_training - Step 23409: {'lr': 0.00029219223351581653, 'samples': 11985920, 'steps': 23409, 'loss/train': 2.434917449951172} +02/25/2022 07:54:24 - INFO - codeparrot_training - Step 23410: {'lr': 0.00029217610567271147, 'samples': 11986432, 'steps': 23410, 'loss/train': 5.452525615692139} +02/25/2022 07:54:28 - INFO - codeparrot_training - Step 23411: {'lr': 0.00029215997764893734, 'samples': 11986944, 'steps': 23411, 'loss/train': 0.46564772725105286} +02/25/2022 07:54:33 - INFO - codeparrot_training - Step 23412: {'lr': 0.00029214384944456327, 'samples': 11987456, 'steps': 23412, 'loss/train': 1.8892968893051147} +02/25/2022 07:54:37 - INFO - codeparrot_training - Step 23413: {'lr': 0.00029212772105965837, 'samples': 11987968, 'steps': 23413, 'loss/train': 2.130406379699707} +02/25/2022 07:54:44 - INFO - codeparrot_training - Step 23414: {'lr': 0.0002921115924942916, 'samples': 11988480, 'steps': 23414, 'loss/train': 0.36277735233306885} +02/25/2022 07:54:48 - INFO - codeparrot_training - Step 23415: {'lr': 0.0002920954637485322, 'samples': 11988992, 'steps': 23415, 'loss/train': 1.677271842956543} +02/25/2022 07:54:53 - INFO - codeparrot_training - Step 23416: {'lr': 0.0002920793348224493, 'samples': 11989504, 'steps': 23416, 'loss/train': 0.4123888611793518} +02/25/2022 07:54:56 - INFO - codeparrot_training - Step 23417: {'lr': 0.0002920632057161117, 'samples': 11990016, 'steps': 23417, 'loss/train': 1.8849787712097168} +02/25/2022 07:55:02 - INFO - codeparrot_training - Step 23418: {'lr': 0.00029204707642958876, 'samples': 11990528, 'steps': 23418, 'loss/train': 1.4175535440444946} +02/25/2022 07:55:06 - INFO - codeparrot_training - Step 23419: {'lr': 0.0002920309469629495, 'samples': 11991040, 'steps': 23419, 'loss/train': 0.1740378588438034} +02/25/2022 07:55:11 - INFO - codeparrot_training - Step 23420: {'lr': 0.000292014817316263, 'samples': 11991552, 'steps': 23420, 'loss/train': 1.7785595655441284} +02/25/2022 07:55:15 - INFO - codeparrot_training - Step 23421: {'lr': 0.00029199868748959836, 'samples': 11992064, 'steps': 23421, 'loss/train': 1.7453330755233765} +02/25/2022 07:55:20 - INFO - codeparrot_training - Step 23422: {'lr': 0.00029198255748302473, 'samples': 11992576, 'steps': 23422, 'loss/train': 1.1835572719573975} +02/25/2022 07:55:24 - INFO - codeparrot_training - Step 23423: {'lr': 0.00029196642729661113, 'samples': 11993088, 'steps': 23423, 'loss/train': 2.376190662384033} +02/25/2022 07:55:29 - INFO - codeparrot_training - Step 23424: {'lr': 0.0002919502969304266, 'samples': 11993600, 'steps': 23424, 'loss/train': 1.0783319473266602} +02/25/2022 07:55:33 - INFO - codeparrot_training - Step 23425: {'lr': 0.0002919341663845404, 'samples': 11994112, 'steps': 23425, 'loss/train': 2.0160794258117676} +02/25/2022 07:55:40 - INFO - codeparrot_training - Step 23426: {'lr': 0.00029191803565902153, 'samples': 11994624, 'steps': 23426, 'loss/train': 2.896087646484375} +02/25/2022 07:55:43 - INFO - codeparrot_training - Step 23427: {'lr': 0.00029190190475393915, 'samples': 11995136, 'steps': 23427, 'loss/train': 1.8752520084381104} +02/25/2022 07:55:49 - INFO - codeparrot_training - Step 23428: {'lr': 0.0002918857736693622, 'samples': 11995648, 'steps': 23428, 'loss/train': 1.5570167303085327} +02/25/2022 07:55:52 - INFO - codeparrot_training - Step 23429: {'lr': 0.00029186964240536, 'samples': 11996160, 'steps': 23429, 'loss/train': 1.6154569387435913} +02/25/2022 07:55:58 - INFO - codeparrot_training - Step 23430: {'lr': 0.0002918535109620015, 'samples': 11996672, 'steps': 23430, 'loss/train': 2.3734962940216064} +02/25/2022 07:56:01 - INFO - codeparrot_training - Step 23431: {'lr': 0.0002918373793393559, 'samples': 11997184, 'steps': 23431, 'loss/train': 2.393853187561035} +02/25/2022 07:56:07 - INFO - codeparrot_training - Step 23432: {'lr': 0.0002918212475374922, 'samples': 11997696, 'steps': 23432, 'loss/train': 1.748063087463379} +02/25/2022 07:56:10 - INFO - codeparrot_training - Step 23433: {'lr': 0.0002918051155564796, 'samples': 11998208, 'steps': 23433, 'loss/train': 1.5458824634552002} +02/25/2022 07:56:16 - INFO - codeparrot_training - Step 23434: {'lr': 0.0002917889833963871, 'samples': 11998720, 'steps': 23434, 'loss/train': 2.2787227630615234} +02/25/2022 07:56:19 - INFO - codeparrot_training - Step 23435: {'lr': 0.00029177285105728393, 'samples': 11999232, 'steps': 23435, 'loss/train': 2.189514636993408} +02/25/2022 07:56:26 - INFO - codeparrot_training - Step 23436: {'lr': 0.00029175671853923907, 'samples': 11999744, 'steps': 23436, 'loss/train': 1.7956748008728027} +02/25/2022 07:56:30 - INFO - codeparrot_training - Step 23437: {'lr': 0.00029174058584232167, 'samples': 12000256, 'steps': 23437, 'loss/train': 1.8041551113128662} +02/25/2022 07:56:35 - INFO - codeparrot_training - Step 23438: {'lr': 0.00029172445296660096, 'samples': 12000768, 'steps': 23438, 'loss/train': 2.3593952655792236} +02/25/2022 07:56:39 - INFO - codeparrot_training - Step 23439: {'lr': 0.0002917083199121459, 'samples': 12001280, 'steps': 23439, 'loss/train': 0.9599772095680237} +02/25/2022 07:56:44 - INFO - codeparrot_training - Step 23440: {'lr': 0.00029169218667902556, 'samples': 12001792, 'steps': 23440, 'loss/train': 0.8404916524887085} +02/25/2022 07:56:48 - INFO - codeparrot_training - Step 23441: {'lr': 0.0002916760532673092, 'samples': 12002304, 'steps': 23441, 'loss/train': 2.7631075382232666} +02/25/2022 07:56:53 - INFO - codeparrot_training - Step 23442: {'lr': 0.0002916599196770659, 'samples': 12002816, 'steps': 23442, 'loss/train': 2.1840200424194336} +02/25/2022 07:56:57 - INFO - codeparrot_training - Step 23443: {'lr': 0.00029164378590836466, 'samples': 12003328, 'steps': 23443, 'loss/train': 1.6124993562698364} +02/25/2022 07:57:02 - INFO - codeparrot_training - Step 23444: {'lr': 0.0002916276519612747, 'samples': 12003840, 'steps': 23444, 'loss/train': 0.8980755805969238} +02/25/2022 07:57:06 - INFO - codeparrot_training - Step 23445: {'lr': 0.00029161151783586506, 'samples': 12004352, 'steps': 23445, 'loss/train': 1.6769890785217285} +02/25/2022 07:57:12 - INFO - codeparrot_training - Step 23446: {'lr': 0.0002915953835322049, 'samples': 12004864, 'steps': 23446, 'loss/train': 1.6724718809127808} +02/25/2022 07:57:15 - INFO - codeparrot_training - Step 23447: {'lr': 0.0002915792490503634, 'samples': 12005376, 'steps': 23447, 'loss/train': 2.1814796924591064} +02/25/2022 07:57:21 - INFO - codeparrot_training - Step 23448: {'lr': 0.0002915631143904095, 'samples': 12005888, 'steps': 23448, 'loss/train': 0.5531330704689026} +02/25/2022 07:57:24 - INFO - codeparrot_training - Step 23449: {'lr': 0.0002915469795524125, 'samples': 12006400, 'steps': 23449, 'loss/train': 2.236222505569458} +02/25/2022 07:57:30 - INFO - codeparrot_training - Step 23450: {'lr': 0.00029153084453644135, 'samples': 12006912, 'steps': 23450, 'loss/train': 1.8865327835083008} +02/25/2022 07:57:33 - INFO - codeparrot_training - Step 23451: {'lr': 0.0002915147093425653, 'samples': 12007424, 'steps': 23451, 'loss/train': 1.2760186195373535} +02/25/2022 07:57:39 - INFO - codeparrot_training - Step 23452: {'lr': 0.0002914985739708534, 'samples': 12007936, 'steps': 23452, 'loss/train': 1.7267462015151978} +02/25/2022 07:57:42 - INFO - codeparrot_training - Step 23453: {'lr': 0.00029148243842137486, 'samples': 12008448, 'steps': 23453, 'loss/train': 1.568948745727539} +02/25/2022 07:57:48 - INFO - codeparrot_training - Step 23454: {'lr': 0.0002914663026941986, 'samples': 12008960, 'steps': 23454, 'loss/train': 2.4859275817871094} +02/25/2022 07:57:51 - INFO - codeparrot_training - Step 23455: {'lr': 0.00029145016678939394, 'samples': 12009472, 'steps': 23455, 'loss/train': 2.7000420093536377} +02/25/2022 07:57:57 - INFO - codeparrot_training - Step 23456: {'lr': 0.00029143403070702994, 'samples': 12009984, 'steps': 23456, 'loss/train': 1.298871636390686} +02/25/2022 07:58:01 - INFO - codeparrot_training - Step 23457: {'lr': 0.0002914178944471757, 'samples': 12010496, 'steps': 23457, 'loss/train': 1.8028844594955444} +02/25/2022 07:58:06 - INFO - codeparrot_training - Step 23458: {'lr': 0.00029140175800990034, 'samples': 12011008, 'steps': 23458, 'loss/train': 2.457233428955078} +02/25/2022 07:58:10 - INFO - codeparrot_training - Step 23459: {'lr': 0.000291385621395273, 'samples': 12011520, 'steps': 23459, 'loss/train': 1.549008846282959} +02/25/2022 07:58:15 - INFO - codeparrot_training - Step 23460: {'lr': 0.0002913694846033628, 'samples': 12012032, 'steps': 23460, 'loss/train': 0.4102102518081665} +02/25/2022 07:58:19 - INFO - codeparrot_training - Step 23461: {'lr': 0.00029135334763423883, 'samples': 12012544, 'steps': 23461, 'loss/train': 1.9378471374511719} +02/25/2022 07:58:25 - INFO - codeparrot_training - Step 23462: {'lr': 0.0002913372104879703, 'samples': 12013056, 'steps': 23462, 'loss/train': 2.656374454498291} +02/25/2022 07:58:28 - INFO - codeparrot_training - Step 23463: {'lr': 0.00029132107316462625, 'samples': 12013568, 'steps': 23463, 'loss/train': 1.1116570234298706} +02/25/2022 07:58:34 - INFO - codeparrot_training - Step 23464: {'lr': 0.00029130493566427586, 'samples': 12014080, 'steps': 23464, 'loss/train': 2.106980562210083} +02/25/2022 07:58:37 - INFO - codeparrot_training - Step 23465: {'lr': 0.0002912887979869882, 'samples': 12014592, 'steps': 23465, 'loss/train': 1.8519765138626099} +02/25/2022 07:58:43 - INFO - codeparrot_training - Step 23466: {'lr': 0.00029127266013283254, 'samples': 12015104, 'steps': 23466, 'loss/train': 2.661858320236206} +02/25/2022 07:58:47 - INFO - codeparrot_training - Step 23467: {'lr': 0.0002912565221018778, 'samples': 12015616, 'steps': 23467, 'loss/train': 1.2358543872833252} +02/25/2022 07:58:52 - INFO - codeparrot_training - Step 23468: {'lr': 0.00029124038389419325, 'samples': 12016128, 'steps': 23468, 'loss/train': 1.6122462749481201} +02/25/2022 07:58:56 - INFO - codeparrot_training - Step 23469: {'lr': 0.00029122424550984805, 'samples': 12016640, 'steps': 23469, 'loss/train': 1.77985680103302} +02/25/2022 07:59:01 - INFO - codeparrot_training - Step 23470: {'lr': 0.00029120810694891126, 'samples': 12017152, 'steps': 23470, 'loss/train': 1.2456860542297363} +02/25/2022 07:59:05 - INFO - codeparrot_training - Step 23471: {'lr': 0.00029119196821145193, 'samples': 12017664, 'steps': 23471, 'loss/train': 1.7298152446746826} +02/25/2022 07:59:11 - INFO - codeparrot_training - Step 23472: {'lr': 0.00029117582929753935, 'samples': 12018176, 'steps': 23472, 'loss/train': 2.3893356323242188} +02/25/2022 07:59:14 - INFO - codeparrot_training - Step 23473: {'lr': 0.00029115969020724263, 'samples': 12018688, 'steps': 23473, 'loss/train': 1.2058300971984863} +02/25/2022 07:59:20 - INFO - codeparrot_training - Step 23474: {'lr': 0.0002911435509406308, 'samples': 12019200, 'steps': 23474, 'loss/train': 2.3196771144866943} +02/25/2022 07:59:23 - INFO - codeparrot_training - Step 23475: {'lr': 0.0002911274114977731, 'samples': 12019712, 'steps': 23475, 'loss/train': 2.146773338317871} +02/25/2022 07:59:29 - INFO - codeparrot_training - Step 23476: {'lr': 0.0002911112718787386, 'samples': 12020224, 'steps': 23476, 'loss/train': 1.6535544395446777} +02/25/2022 07:59:32 - INFO - codeparrot_training - Step 23477: {'lr': 0.0002910951320835965, 'samples': 12020736, 'steps': 23477, 'loss/train': 2.479684829711914} +02/25/2022 07:59:38 - INFO - codeparrot_training - Step 23478: {'lr': 0.0002910789921124159, 'samples': 12021248, 'steps': 23478, 'loss/train': 1.8687198162078857} +02/25/2022 07:59:43 - INFO - codeparrot_training - Step 23479: {'lr': 0.00029106285196526594, 'samples': 12021760, 'steps': 23479, 'loss/train': 2.0104100704193115} +02/25/2022 07:59:47 - INFO - codeparrot_training - Step 23480: {'lr': 0.00029104671164221574, 'samples': 12022272, 'steps': 23480, 'loss/train': 2.8713185787200928} +02/25/2022 07:59:53 - INFO - codeparrot_training - Step 23481: {'lr': 0.0002910305711433345, 'samples': 12022784, 'steps': 23481, 'loss/train': 2.610586166381836} +02/25/2022 07:59:57 - INFO - codeparrot_training - Step 23482: {'lr': 0.00029101443046869127, 'samples': 12023296, 'steps': 23482, 'loss/train': 2.129425287246704} +02/25/2022 08:00:02 - INFO - codeparrot_training - Step 23483: {'lr': 0.0002909982896183553, 'samples': 12023808, 'steps': 23483, 'loss/train': 0.7625100612640381} +02/25/2022 08:00:06 - INFO - codeparrot_training - Step 23484: {'lr': 0.0002909821485923956, 'samples': 12024320, 'steps': 23484, 'loss/train': 2.345798969268799} +02/25/2022 08:00:11 - INFO - codeparrot_training - Step 23485: {'lr': 0.0002909660073908814, 'samples': 12024832, 'steps': 23485, 'loss/train': 2.2357516288757324} +02/25/2022 08:00:15 - INFO - codeparrot_training - Step 23486: {'lr': 0.00029094986601388195, 'samples': 12025344, 'steps': 23486, 'loss/train': 2.2574121952056885} +02/25/2022 08:00:21 - INFO - codeparrot_training - Step 23487: {'lr': 0.00029093372446146613, 'samples': 12025856, 'steps': 23487, 'loss/train': 1.0914568901062012} +02/25/2022 08:00:24 - INFO - codeparrot_training - Step 23488: {'lr': 0.0002909175827337033, 'samples': 12026368, 'steps': 23488, 'loss/train': 2.441570997238159} +02/25/2022 08:00:30 - INFO - codeparrot_training - Step 23489: {'lr': 0.00029090144083066254, 'samples': 12026880, 'steps': 23489, 'loss/train': 1.7172644138336182} +02/25/2022 08:00:33 - INFO - codeparrot_training - Step 23490: {'lr': 0.00029088529875241306, 'samples': 12027392, 'steps': 23490, 'loss/train': 1.4030110836029053} +02/25/2022 08:00:39 - INFO - codeparrot_training - Step 23491: {'lr': 0.00029086915649902383, 'samples': 12027904, 'steps': 23491, 'loss/train': 2.096893548965454} +02/25/2022 08:00:43 - INFO - codeparrot_training - Step 23492: {'lr': 0.0002908530140705642, 'samples': 12028416, 'steps': 23492, 'loss/train': 2.14057993888855} +02/25/2022 08:00:48 - INFO - codeparrot_training - Step 23493: {'lr': 0.0002908368714671031, 'samples': 12028928, 'steps': 23493, 'loss/train': 1.6170334815979004} +02/25/2022 08:00:52 - INFO - codeparrot_training - Step 23494: {'lr': 0.00029082072868870997, 'samples': 12029440, 'steps': 23494, 'loss/train': 1.5164546966552734} +02/25/2022 08:00:57 - INFO - codeparrot_training - Step 23495: {'lr': 0.00029080458573545366, 'samples': 12029952, 'steps': 23495, 'loss/train': 1.3293206691741943} +02/25/2022 08:01:01 - INFO - codeparrot_training - Step 23496: {'lr': 0.0002907884426074036, 'samples': 12030464, 'steps': 23496, 'loss/train': 1.4230456352233887} +02/25/2022 08:01:07 - INFO - codeparrot_training - Step 23497: {'lr': 0.0002907722993046287, 'samples': 12030976, 'steps': 23497, 'loss/train': 1.4491885900497437} +02/25/2022 08:01:10 - INFO - codeparrot_training - Step 23498: {'lr': 0.0002907561558271983, 'samples': 12031488, 'steps': 23498, 'loss/train': 1.9440513849258423} +02/25/2022 08:01:15 - INFO - codeparrot_training - Step 23499: {'lr': 0.0002907400121751814, 'samples': 12032000, 'steps': 23499, 'loss/train': 1.219433307647705} +02/25/2022 08:01:19 - INFO - codeparrot_training - Step 23500: {'lr': 0.0002907238683486472, 'samples': 12032512, 'steps': 23500, 'loss/train': 1.6441736221313477} +02/25/2022 08:01:24 - INFO - codeparrot_training - Step 23501: {'lr': 0.0002907077243476649, 'samples': 12033024, 'steps': 23501, 'loss/train': 1.0706543922424316} +02/25/2022 08:01:28 - INFO - codeparrot_training - Step 23502: {'lr': 0.0002906915801723037, 'samples': 12033536, 'steps': 23502, 'loss/train': 1.668981909751892} +02/25/2022 08:01:33 - INFO - codeparrot_training - Step 23503: {'lr': 0.00029067543582263265, 'samples': 12034048, 'steps': 23503, 'loss/train': 1.8567200899124146} +02/25/2022 08:01:37 - INFO - codeparrot_training - Step 23504: {'lr': 0.00029065929129872095, 'samples': 12034560, 'steps': 23504, 'loss/train': 2.8706367015838623} +02/25/2022 08:01:42 - INFO - codeparrot_training - Step 23505: {'lr': 0.0002906431466006378, 'samples': 12035072, 'steps': 23505, 'loss/train': 2.2313928604125977} +02/25/2022 08:01:46 - INFO - codeparrot_training - Step 23506: {'lr': 0.0002906270017284522, 'samples': 12035584, 'steps': 23506, 'loss/train': 1.8907411098480225} +02/25/2022 08:01:53 - INFO - codeparrot_training - Step 23507: {'lr': 0.0002906108566822335, 'samples': 12036096, 'steps': 23507, 'loss/train': 0.9114428758621216} +02/25/2022 08:01:56 - INFO - codeparrot_training - Step 23508: {'lr': 0.0002905947114620508, 'samples': 12036608, 'steps': 23508, 'loss/train': 2.088773727416992} +02/25/2022 08:02:02 - INFO - codeparrot_training - Step 23509: {'lr': 0.0002905785660679732, 'samples': 12037120, 'steps': 23509, 'loss/train': 1.829337477684021} +02/25/2022 08:02:05 - INFO - codeparrot_training - Step 23510: {'lr': 0.00029056242050006995, 'samples': 12037632, 'steps': 23510, 'loss/train': 1.8250982761383057} +02/25/2022 08:02:11 - INFO - codeparrot_training - Step 23511: {'lr': 0.0002905462747584101, 'samples': 12038144, 'steps': 23511, 'loss/train': 2.116312026977539} +02/25/2022 08:02:14 - INFO - codeparrot_training - Step 23512: {'lr': 0.00029053012884306295, 'samples': 12038656, 'steps': 23512, 'loss/train': 2.298887252807617} +02/25/2022 08:02:20 - INFO - codeparrot_training - Step 23513: {'lr': 0.00029051398275409756, 'samples': 12039168, 'steps': 23513, 'loss/train': 2.1299140453338623} +02/25/2022 08:02:23 - INFO - codeparrot_training - Step 23514: {'lr': 0.0002904978364915832, 'samples': 12039680, 'steps': 23514, 'loss/train': 1.426316499710083} +02/25/2022 08:02:29 - INFO - codeparrot_training - Step 23515: {'lr': 0.0002904816900555889, 'samples': 12040192, 'steps': 23515, 'loss/train': 2.437614917755127} +02/25/2022 08:02:32 - INFO - codeparrot_training - Step 23516: {'lr': 0.000290465543446184, 'samples': 12040704, 'steps': 23516, 'loss/train': 2.7329211235046387} +02/25/2022 08:02:39 - INFO - codeparrot_training - Step 23517: {'lr': 0.0002904493966634374, 'samples': 12041216, 'steps': 23517, 'loss/train': 1.8629857301712036} +02/25/2022 08:02:42 - INFO - codeparrot_training - Step 23518: {'lr': 0.0002904332497074186, 'samples': 12041728, 'steps': 23518, 'loss/train': 2.3979287147521973} +02/25/2022 08:02:48 - INFO - codeparrot_training - Step 23519: {'lr': 0.00029041710257819643, 'samples': 12042240, 'steps': 23519, 'loss/train': 1.4235047101974487} +02/25/2022 08:02:51 - INFO - codeparrot_training - Step 23520: {'lr': 0.0002904009552758403, 'samples': 12042752, 'steps': 23520, 'loss/train': 2.374789237976074} +02/25/2022 08:02:57 - INFO - codeparrot_training - Step 23521: {'lr': 0.0002903848078004193, 'samples': 12043264, 'steps': 23521, 'loss/train': 2.884300708770752} +02/25/2022 08:03:00 - INFO - codeparrot_training - Step 23522: {'lr': 0.00029036866015200256, 'samples': 12043776, 'steps': 23522, 'loss/train': 1.8297744989395142} +02/25/2022 08:03:06 - INFO - codeparrot_training - Step 23523: {'lr': 0.0002903525123306594, 'samples': 12044288, 'steps': 23523, 'loss/train': 2.233595609664917} +02/25/2022 08:03:09 - INFO - codeparrot_training - Step 23524: {'lr': 0.0002903363643364588, 'samples': 12044800, 'steps': 23524, 'loss/train': 1.9957176446914673} +02/25/2022 08:03:15 - INFO - codeparrot_training - Step 23525: {'lr': 0.0002903202161694701, 'samples': 12045312, 'steps': 23525, 'loss/train': 2.4707109928131104} +02/25/2022 08:03:18 - INFO - codeparrot_training - Step 23526: {'lr': 0.0002903040678297624, 'samples': 12045824, 'steps': 23526, 'loss/train': 2.564110279083252} +02/25/2022 08:03:25 - INFO - codeparrot_training - Step 23527: {'lr': 0.00029028791931740483, 'samples': 12046336, 'steps': 23527, 'loss/train': 2.197964668273926} +02/25/2022 08:03:28 - INFO - codeparrot_training - Step 23528: {'lr': 0.0002902717706324666, 'samples': 12046848, 'steps': 23528, 'loss/train': 2.2101938724517822} +02/25/2022 08:03:33 - INFO - codeparrot_training - Step 23529: {'lr': 0.00029025562177501695, 'samples': 12047360, 'steps': 23529, 'loss/train': 2.7704222202301025} +02/25/2022 08:03:37 - INFO - codeparrot_training - Step 23530: {'lr': 0.0002902394727451249, 'samples': 12047872, 'steps': 23530, 'loss/train': 2.554912567138672} +02/25/2022 08:03:42 - INFO - codeparrot_training - Step 23531: {'lr': 0.00029022332354285986, 'samples': 12048384, 'steps': 23531, 'loss/train': 1.7678285837173462} +02/25/2022 08:03:46 - INFO - codeparrot_training - Step 23532: {'lr': 0.00029020717416829075, 'samples': 12048896, 'steps': 23532, 'loss/train': 2.1888411045074463} +02/25/2022 08:03:51 - INFO - codeparrot_training - Step 23533: {'lr': 0.0002901910246214869, 'samples': 12049408, 'steps': 23533, 'loss/train': 1.2653511762619019} +02/25/2022 08:03:55 - INFO - codeparrot_training - Step 23534: {'lr': 0.0002901748749025175, 'samples': 12049920, 'steps': 23534, 'loss/train': 2.6030540466308594} +02/25/2022 08:04:00 - INFO - codeparrot_training - Step 23535: {'lr': 0.00029015872501145164, 'samples': 12050432, 'steps': 23535, 'loss/train': 1.6123956441879272} +02/25/2022 08:04:04 - INFO - codeparrot_training - Step 23536: {'lr': 0.00029014257494835863, 'samples': 12050944, 'steps': 23536, 'loss/train': 1.9868042469024658} +02/25/2022 08:04:10 - INFO - codeparrot_training - Step 23537: {'lr': 0.0002901264247133075, 'samples': 12051456, 'steps': 23537, 'loss/train': 2.39188814163208} +02/25/2022 08:04:14 - INFO - codeparrot_training - Step 23538: {'lr': 0.00029011027430636755, 'samples': 12051968, 'steps': 23538, 'loss/train': 1.4962033033370972} +02/25/2022 08:04:19 - INFO - codeparrot_training - Step 23539: {'lr': 0.00029009412372760793, 'samples': 12052480, 'steps': 23539, 'loss/train': 0.9243882894515991} +02/25/2022 08:04:23 - INFO - codeparrot_training - Step 23540: {'lr': 0.00029007797297709784, 'samples': 12052992, 'steps': 23540, 'loss/train': 2.113966226577759} +02/25/2022 08:04:28 - INFO - codeparrot_training - Step 23541: {'lr': 0.00029006182205490634, 'samples': 12053504, 'steps': 23541, 'loss/train': 2.3709867000579834} +02/25/2022 08:04:31 - INFO - codeparrot_training - Step 23542: {'lr': 0.0002900456709611028, 'samples': 12054016, 'steps': 23542, 'loss/train': 1.5875611305236816} +02/25/2022 08:04:37 - INFO - codeparrot_training - Step 23543: {'lr': 0.0002900295196957563, 'samples': 12054528, 'steps': 23543, 'loss/train': 1.8569351434707642} +02/25/2022 08:04:41 - INFO - codeparrot_training - Step 23544: {'lr': 0.00029001336825893603, 'samples': 12055040, 'steps': 23544, 'loss/train': 2.1762990951538086} +02/25/2022 08:04:46 - INFO - codeparrot_training - Step 23545: {'lr': 0.0002899972166507112, 'samples': 12055552, 'steps': 23545, 'loss/train': 2.8551700115203857} +02/25/2022 08:04:50 - INFO - codeparrot_training - Step 23546: {'lr': 0.000289981064871151, 'samples': 12056064, 'steps': 23546, 'loss/train': 0.300271213054657} +02/25/2022 08:04:55 - INFO - codeparrot_training - Step 23547: {'lr': 0.00028996491292032465, 'samples': 12056576, 'steps': 23547, 'loss/train': 2.8087878227233887} +02/25/2022 08:04:58 - INFO - codeparrot_training - Step 23548: {'lr': 0.00028994876079830125, 'samples': 12057088, 'steps': 23548, 'loss/train': 2.1102256774902344} +02/25/2022 08:05:04 - INFO - codeparrot_training - Step 23549: {'lr': 0.00028993260850515015, 'samples': 12057600, 'steps': 23549, 'loss/train': 1.7570375204086304} +02/25/2022 08:05:08 - INFO - codeparrot_training - Step 23550: {'lr': 0.0002899164560409403, 'samples': 12058112, 'steps': 23550, 'loss/train': 2.206887722015381} +02/25/2022 08:05:13 - INFO - codeparrot_training - Step 23551: {'lr': 0.00028990030340574107, 'samples': 12058624, 'steps': 23551, 'loss/train': 2.4764509201049805} +02/25/2022 08:05:17 - INFO - codeparrot_training - Step 23552: {'lr': 0.0002898841505996216, 'samples': 12059136, 'steps': 23552, 'loss/train': 1.5376133918762207} +02/25/2022 08:05:23 - INFO - codeparrot_training - Step 23553: {'lr': 0.0002898679976226511, 'samples': 12059648, 'steps': 23553, 'loss/train': 1.9802324771881104} +02/25/2022 08:05:26 - INFO - codeparrot_training - Step 23554: {'lr': 0.00028985184447489874, 'samples': 12060160, 'steps': 23554, 'loss/train': 2.137173891067505} +02/25/2022 08:05:32 - INFO - codeparrot_training - Step 23555: {'lr': 0.00028983569115643385, 'samples': 12060672, 'steps': 23555, 'loss/train': 1.8138989210128784} +02/25/2022 08:05:35 - INFO - codeparrot_training - Step 23556: {'lr': 0.0002898195376673254, 'samples': 12061184, 'steps': 23556, 'loss/train': 2.3573219776153564} +02/25/2022 08:05:41 - INFO - codeparrot_training - Step 23557: {'lr': 0.0002898033840076427, 'samples': 12061696, 'steps': 23557, 'loss/train': 2.5795705318450928} +02/25/2022 08:05:44 - INFO - codeparrot_training - Step 23558: {'lr': 0.00028978723017745496, 'samples': 12062208, 'steps': 23558, 'loss/train': 1.5039584636688232} +02/25/2022 08:05:50 - INFO - codeparrot_training - Step 23559: {'lr': 0.0002897710761768313, 'samples': 12062720, 'steps': 23559, 'loss/train': 1.0971256494522095} +02/25/2022 08:05:53 - INFO - codeparrot_training - Step 23560: {'lr': 0.00028975492200584106, 'samples': 12063232, 'steps': 23560, 'loss/train': 2.4428679943084717} +02/25/2022 08:05:59 - INFO - codeparrot_training - Step 23561: {'lr': 0.00028973876766455334, 'samples': 12063744, 'steps': 23561, 'loss/train': 1.3993030786514282} +02/25/2022 08:06:02 - INFO - codeparrot_training - Step 23562: {'lr': 0.00028972261315303736, 'samples': 12064256, 'steps': 23562, 'loss/train': 0.6624284982681274} +02/25/2022 08:06:08 - INFO - codeparrot_training - Step 23563: {'lr': 0.0002897064584713623, 'samples': 12064768, 'steps': 23563, 'loss/train': 2.4249911308288574} +02/25/2022 08:06:11 - INFO - codeparrot_training - Step 23564: {'lr': 0.0002896903036195974, 'samples': 12065280, 'steps': 23564, 'loss/train': 0.613732099533081} +02/25/2022 08:06:18 - INFO - codeparrot_training - Step 23565: {'lr': 0.0002896741485978118, 'samples': 12065792, 'steps': 23565, 'loss/train': 1.7938508987426758} +02/25/2022 08:06:21 - INFO - codeparrot_training - Step 23566: {'lr': 0.0002896579934060748, 'samples': 12066304, 'steps': 23566, 'loss/train': 0.8712823987007141} +02/25/2022 08:06:27 - INFO - codeparrot_training - Step 23567: {'lr': 0.00028964183804445554, 'samples': 12066816, 'steps': 23567, 'loss/train': 2.06577730178833} +02/25/2022 08:06:30 - INFO - codeparrot_training - Step 23568: {'lr': 0.00028962568251302326, 'samples': 12067328, 'steps': 23568, 'loss/train': 1.487418293952942} +02/25/2022 08:06:36 - INFO - codeparrot_training - Step 23569: {'lr': 0.00028960952681184705, 'samples': 12067840, 'steps': 23569, 'loss/train': 1.7848950624465942} +02/25/2022 08:06:39 - INFO - codeparrot_training - Step 23570: {'lr': 0.00028959337094099617, 'samples': 12068352, 'steps': 23570, 'loss/train': 2.7480626106262207} +02/25/2022 08:06:44 - INFO - codeparrot_training - Step 23571: {'lr': 0.00028957721490054, 'samples': 12068864, 'steps': 23571, 'loss/train': 1.9302492141723633} +02/25/2022 08:06:51 - INFO - codeparrot_training - Step 23572: {'lr': 0.00028956105869054757, 'samples': 12069376, 'steps': 23572, 'loss/train': 1.0581475496292114} +02/25/2022 08:06:54 - INFO - codeparrot_training - Step 23573: {'lr': 0.0002895449023110881, 'samples': 12069888, 'steps': 23573, 'loss/train': 2.0345520973205566} +02/25/2022 08:07:00 - INFO - codeparrot_training - Step 23574: {'lr': 0.00028952874576223083, 'samples': 12070400, 'steps': 23574, 'loss/train': 1.625321865081787} +02/25/2022 08:07:03 - INFO - codeparrot_training - Step 23575: {'lr': 0.000289512589044045, 'samples': 12070912, 'steps': 23575, 'loss/train': 2.2414658069610596} +02/25/2022 08:07:09 - INFO - codeparrot_training - Step 23576: {'lr': 0.0002894964321565997, 'samples': 12071424, 'steps': 23576, 'loss/train': 0.6521539092063904} +02/25/2022 08:07:12 - INFO - codeparrot_training - Step 23577: {'lr': 0.00028948027509996435, 'samples': 12071936, 'steps': 23577, 'loss/train': 0.5160545706748962} +02/25/2022 08:07:18 - INFO - codeparrot_training - Step 23578: {'lr': 0.0002894641178742079, 'samples': 12072448, 'steps': 23578, 'loss/train': 2.873347520828247} +02/25/2022 08:07:21 - INFO - codeparrot_training - Step 23579: {'lr': 0.0002894479604793998, 'samples': 12072960, 'steps': 23579, 'loss/train': 2.0298352241516113} +02/25/2022 08:07:27 - INFO - codeparrot_training - Step 23580: {'lr': 0.0002894318029156091, 'samples': 12073472, 'steps': 23580, 'loss/train': 2.342482328414917} +02/25/2022 08:07:30 - INFO - codeparrot_training - Step 23581: {'lr': 0.0002894156451829051, 'samples': 12073984, 'steps': 23581, 'loss/train': 2.3614048957824707} +02/25/2022 08:07:36 - INFO - codeparrot_training - Step 23582: {'lr': 0.000289399487281357, 'samples': 12074496, 'steps': 23582, 'loss/train': 1.5407274961471558} +02/25/2022 08:07:39 - INFO - codeparrot_training - Step 23583: {'lr': 0.000289383329211034, 'samples': 12075008, 'steps': 23583, 'loss/train': 1.610329270362854} +02/25/2022 08:07:45 - INFO - codeparrot_training - Step 23584: {'lr': 0.0002893671709720054, 'samples': 12075520, 'steps': 23584, 'loss/train': 1.6335129737854004} +02/25/2022 08:07:49 - INFO - codeparrot_training - Step 23585: {'lr': 0.0002893510125643403, 'samples': 12076032, 'steps': 23585, 'loss/train': 1.1203486919403076} +02/25/2022 08:07:54 - INFO - codeparrot_training - Step 23586: {'lr': 0.0002893348539881079, 'samples': 12076544, 'steps': 23586, 'loss/train': 1.49395751953125} +02/25/2022 08:07:58 - INFO - codeparrot_training - Step 23587: {'lr': 0.0002893186952433775, 'samples': 12077056, 'steps': 23587, 'loss/train': 1.9389312267303467} +02/25/2022 08:08:03 - INFO - codeparrot_training - Step 23588: {'lr': 0.00028930253633021826, 'samples': 12077568, 'steps': 23588, 'loss/train': 2.972951650619507} +02/25/2022 08:08:07 - INFO - codeparrot_training - Step 23589: {'lr': 0.0002892863772486995, 'samples': 12078080, 'steps': 23589, 'loss/train': 1.420477271080017} +02/25/2022 08:08:12 - INFO - codeparrot_training - Step 23590: {'lr': 0.0002892702179988903, 'samples': 12078592, 'steps': 23590, 'loss/train': 0.7500631809234619} +02/25/2022 08:08:16 - INFO - codeparrot_training - Step 23591: {'lr': 0.00028925405858086007, 'samples': 12079104, 'steps': 23591, 'loss/train': 2.364516496658325} +02/25/2022 08:08:21 - INFO - codeparrot_training - Step 23592: {'lr': 0.0002892378989946779, 'samples': 12079616, 'steps': 23592, 'loss/train': 1.313019871711731} +02/25/2022 08:08:25 - INFO - codeparrot_training - Step 23593: {'lr': 0.00028922173924041296, 'samples': 12080128, 'steps': 23593, 'loss/train': 1.4040350914001465} +02/25/2022 08:08:31 - INFO - codeparrot_training - Step 23594: {'lr': 0.00028920557931813454, 'samples': 12080640, 'steps': 23594, 'loss/train': 1.1124212741851807} +02/25/2022 08:08:34 - INFO - codeparrot_training - Step 23595: {'lr': 0.00028918941922791185, 'samples': 12081152, 'steps': 23595, 'loss/train': 1.933236002922058} +02/25/2022 08:08:39 - INFO - codeparrot_training - Step 23596: {'lr': 0.00028917325896981417, 'samples': 12081664, 'steps': 23596, 'loss/train': 1.9197604656219482} +02/25/2022 08:08:43 - INFO - codeparrot_training - Step 23597: {'lr': 0.0002891570985439108, 'samples': 12082176, 'steps': 23597, 'loss/train': 2.299419403076172} +02/25/2022 08:08:49 - INFO - codeparrot_training - Step 23598: {'lr': 0.00028914093795027063, 'samples': 12082688, 'steps': 23598, 'loss/train': 1.5218538045883179} +02/25/2022 08:08:53 - INFO - codeparrot_training - Step 23599: {'lr': 0.0002891247771889633, 'samples': 12083200, 'steps': 23599, 'loss/train': 2.170194625854492} +02/25/2022 08:08:58 - INFO - codeparrot_training - Step 23600: {'lr': 0.00028910861626005774, 'samples': 12083712, 'steps': 23600, 'loss/train': 1.9556257724761963} +02/25/2022 08:09:02 - INFO - codeparrot_training - Step 23601: {'lr': 0.0002890924551636234, 'samples': 12084224, 'steps': 23601, 'loss/train': 1.3168898820877075} +02/25/2022 08:09:07 - INFO - codeparrot_training - Step 23602: {'lr': 0.00028907629389972924, 'samples': 12084736, 'steps': 23602, 'loss/train': 1.6897366046905518} +02/25/2022 08:09:11 - INFO - codeparrot_training - Step 23603: {'lr': 0.00028906013246844474, 'samples': 12085248, 'steps': 23603, 'loss/train': 2.4212253093719482} +02/25/2022 08:09:16 - INFO - codeparrot_training - Step 23604: {'lr': 0.000289043970869839, 'samples': 12085760, 'steps': 23604, 'loss/train': 1.745396614074707} +02/25/2022 08:09:20 - INFO - codeparrot_training - Step 23605: {'lr': 0.0002890278091039813, 'samples': 12086272, 'steps': 23605, 'loss/train': 1.6462994813919067} +02/25/2022 08:09:25 - INFO - codeparrot_training - Step 23606: {'lr': 0.00028901164717094085, 'samples': 12086784, 'steps': 23606, 'loss/train': 0.9969839453697205} +02/25/2022 08:09:29 - INFO - codeparrot_training - Step 23607: {'lr': 0.0002889954850707869, 'samples': 12087296, 'steps': 23607, 'loss/train': 2.06868052482605} +02/25/2022 08:09:34 - INFO - codeparrot_training - Step 23608: {'lr': 0.0002889793228035887, 'samples': 12087808, 'steps': 23608, 'loss/train': 0.4268155097961426} +02/25/2022 08:09:37 - INFO - codeparrot_training - Step 23609: {'lr': 0.0002889631603694154, 'samples': 12088320, 'steps': 23609, 'loss/train': 2.3262276649475098} +02/25/2022 08:09:44 - INFO - codeparrot_training - Step 23610: {'lr': 0.00028894699776833637, 'samples': 12088832, 'steps': 23610, 'loss/train': 2.541623115539551} +02/25/2022 08:09:47 - INFO - codeparrot_training - Step 23611: {'lr': 0.0002889308350004207, 'samples': 12089344, 'steps': 23611, 'loss/train': 2.428924083709717} +02/25/2022 08:09:53 - INFO - codeparrot_training - Step 23612: {'lr': 0.00028891467206573773, 'samples': 12089856, 'steps': 23612, 'loss/train': 2.2161452770233154} +02/25/2022 08:09:56 - INFO - codeparrot_training - Step 23613: {'lr': 0.00028889850896435666, 'samples': 12090368, 'steps': 23613, 'loss/train': 1.9017736911773682} +02/25/2022 08:10:02 - INFO - codeparrot_training - Step 23614: {'lr': 0.00028888234569634673, 'samples': 12090880, 'steps': 23614, 'loss/train': 0.44609636068344116} +02/25/2022 08:10:05 - INFO - codeparrot_training - Step 23615: {'lr': 0.00028886618226177716, 'samples': 12091392, 'steps': 23615, 'loss/train': 0.5661726593971252} +02/25/2022 08:10:11 - INFO - codeparrot_training - Step 23616: {'lr': 0.00028885001866071723, 'samples': 12091904, 'steps': 23616, 'loss/train': 1.4779706001281738} +02/25/2022 08:10:14 - INFO - codeparrot_training - Step 23617: {'lr': 0.0002888338548932361, 'samples': 12092416, 'steps': 23617, 'loss/train': 1.9706391096115112} +02/25/2022 08:10:20 - INFO - codeparrot_training - Step 23618: {'lr': 0.00028881769095940316, 'samples': 12092928, 'steps': 23618, 'loss/train': 1.127479910850525} +02/25/2022 08:10:23 - INFO - codeparrot_training - Step 23619: {'lr': 0.0002888015268592875, 'samples': 12093440, 'steps': 23619, 'loss/train': 0.5383728742599487} +02/25/2022 08:10:29 - INFO - codeparrot_training - Step 23620: {'lr': 0.0002887853625929584, 'samples': 12093952, 'steps': 23620, 'loss/train': 1.414881944656372} +02/25/2022 08:10:33 - INFO - codeparrot_training - Step 23621: {'lr': 0.00028876919816048517, 'samples': 12094464, 'steps': 23621, 'loss/train': 2.3857603073120117} +02/25/2022 08:10:38 - INFO - codeparrot_training - Step 23622: {'lr': 0.00028875303356193697, 'samples': 12094976, 'steps': 23622, 'loss/train': 2.302809476852417} +02/25/2022 08:10:42 - INFO - codeparrot_training - Step 23623: {'lr': 0.0002887368687973831, 'samples': 12095488, 'steps': 23623, 'loss/train': 2.480724811553955} +02/25/2022 08:10:47 - INFO - codeparrot_training - Step 23624: {'lr': 0.00028872070386689274, 'samples': 12096000, 'steps': 23624, 'loss/train': 2.1395230293273926} +02/25/2022 08:10:51 - INFO - codeparrot_training - Step 23625: {'lr': 0.00028870453877053527, 'samples': 12096512, 'steps': 23625, 'loss/train': 1.15674889087677} +02/25/2022 08:10:56 - INFO - codeparrot_training - Step 23626: {'lr': 0.00028868837350837977, 'samples': 12097024, 'steps': 23626, 'loss/train': 1.0673731565475464} +02/25/2022 08:11:00 - INFO - codeparrot_training - Step 23627: {'lr': 0.0002886722080804956, 'samples': 12097536, 'steps': 23627, 'loss/train': 2.1376001834869385} +02/25/2022 08:11:05 - INFO - codeparrot_training - Step 23628: {'lr': 0.000288656042486952, 'samples': 12098048, 'steps': 23628, 'loss/train': 2.4457335472106934} +02/25/2022 08:11:09 - INFO - codeparrot_training - Step 23629: {'lr': 0.0002886398767278181, 'samples': 12098560, 'steps': 23629, 'loss/train': 2.3115906715393066} +02/25/2022 08:11:15 - INFO - codeparrot_training - Step 23630: {'lr': 0.0002886237108031633, 'samples': 12099072, 'steps': 23630, 'loss/train': 1.891711950302124} +02/25/2022 08:11:19 - INFO - codeparrot_training - Step 23631: {'lr': 0.0002886075447130568, 'samples': 12099584, 'steps': 23631, 'loss/train': 2.793041706085205} +02/25/2022 08:11:24 - INFO - codeparrot_training - Step 23632: {'lr': 0.00028859137845756784, 'samples': 12100096, 'steps': 23632, 'loss/train': 2.1435625553131104} +02/25/2022 08:11:28 - INFO - codeparrot_training - Step 23633: {'lr': 0.00028857521203676565, 'samples': 12100608, 'steps': 23633, 'loss/train': 1.3745698928833008} +02/25/2022 08:11:33 - INFO - codeparrot_training - Step 23634: {'lr': 0.0002885590454507195, 'samples': 12101120, 'steps': 23634, 'loss/train': 1.522180199623108} +02/25/2022 08:11:37 - INFO - codeparrot_training - Step 23635: {'lr': 0.00028854287869949856, 'samples': 12101632, 'steps': 23635, 'loss/train': 2.0995702743530273} +02/25/2022 08:11:42 - INFO - codeparrot_training - Step 23636: {'lr': 0.00028852671178317233, 'samples': 12102144, 'steps': 23636, 'loss/train': 1.5845879316329956} +02/25/2022 08:11:46 - INFO - codeparrot_training - Step 23637: {'lr': 0.00028851054470180977, 'samples': 12102656, 'steps': 23637, 'loss/train': 2.264266014099121} +02/25/2022 08:11:51 - INFO - codeparrot_training - Step 23638: {'lr': 0.0002884943774554803, 'samples': 12103168, 'steps': 23638, 'loss/train': 6.310962677001953} +02/25/2022 08:11:55 - INFO - codeparrot_training - Step 23639: {'lr': 0.0002884782100442531, 'samples': 12103680, 'steps': 23639, 'loss/train': 2.1470947265625} +02/25/2022 08:12:00 - INFO - codeparrot_training - Step 23640: {'lr': 0.0002884620424681976, 'samples': 12104192, 'steps': 23640, 'loss/train': 1.6811007261276245} +02/25/2022 08:12:04 - INFO - codeparrot_training - Step 23641: {'lr': 0.0002884458747273828, 'samples': 12104704, 'steps': 23641, 'loss/train': 1.70032799243927} +02/25/2022 08:12:09 - INFO - codeparrot_training - Step 23642: {'lr': 0.0002884297068218781, 'samples': 12105216, 'steps': 23642, 'loss/train': 0.9124624729156494} +02/25/2022 08:12:13 - INFO - codeparrot_training - Step 23643: {'lr': 0.00028841353875175274, 'samples': 12105728, 'steps': 23643, 'loss/train': 1.7605984210968018} +02/25/2022 08:12:19 - INFO - codeparrot_training - Step 23644: {'lr': 0.000288397370517076, 'samples': 12106240, 'steps': 23644, 'loss/train': 2.816835403442383} +02/25/2022 08:12:22 - INFO - codeparrot_training - Step 23645: {'lr': 0.0002883812021179171, 'samples': 12106752, 'steps': 23645, 'loss/train': 2.8557376861572266} +02/25/2022 08:12:28 - INFO - codeparrot_training - Step 23646: {'lr': 0.0002883650335543453, 'samples': 12107264, 'steps': 23646, 'loss/train': 0.38947081565856934} +02/25/2022 08:12:32 - INFO - codeparrot_training - Step 23647: {'lr': 0.00028834886482643, 'samples': 12107776, 'steps': 23647, 'loss/train': 2.050731897354126} +02/25/2022 08:12:37 - INFO - codeparrot_training - Step 23648: {'lr': 0.00028833269593424017, 'samples': 12108288, 'steps': 23648, 'loss/train': 1.3458151817321777} +02/25/2022 08:12:41 - INFO - codeparrot_training - Step 23649: {'lr': 0.0002883165268778454, 'samples': 12108800, 'steps': 23649, 'loss/train': 1.7565962076187134} +02/25/2022 08:12:46 - INFO - codeparrot_training - Step 23650: {'lr': 0.00028830035765731464, 'samples': 12109312, 'steps': 23650, 'loss/train': 1.6757336854934692} +02/25/2022 08:12:50 - INFO - codeparrot_training - Step 23651: {'lr': 0.0002882841882727174, 'samples': 12109824, 'steps': 23651, 'loss/train': 1.373449683189392} +02/25/2022 08:12:56 - INFO - codeparrot_training - Step 23652: {'lr': 0.00028826801872412284, 'samples': 12110336, 'steps': 23652, 'loss/train': 1.4238611459732056} +02/25/2022 08:12:59 - INFO - codeparrot_training - Step 23653: {'lr': 0.00028825184901160023, 'samples': 12110848, 'steps': 23653, 'loss/train': 1.948917269706726} +02/25/2022 08:13:05 - INFO - codeparrot_training - Step 23654: {'lr': 0.00028823567913521884, 'samples': 12111360, 'steps': 23654, 'loss/train': 1.3194708824157715} +02/25/2022 08:13:08 - INFO - codeparrot_training - Step 23655: {'lr': 0.0002882195090950479, 'samples': 12111872, 'steps': 23655, 'loss/train': 0.4681048095226288} +02/25/2022 08:13:14 - INFO - codeparrot_training - Step 23656: {'lr': 0.00028820333889115684, 'samples': 12112384, 'steps': 23656, 'loss/train': 2.575822591781616} +02/25/2022 08:13:18 - INFO - codeparrot_training - Step 23657: {'lr': 0.0002881871685236147, 'samples': 12112896, 'steps': 23657, 'loss/train': 2.1207542419433594} +02/25/2022 08:13:23 - INFO - codeparrot_training - Step 23658: {'lr': 0.0002881709979924909, 'samples': 12113408, 'steps': 23658, 'loss/train': 1.9079558849334717} +02/25/2022 08:13:27 - INFO - codeparrot_training - Step 23659: {'lr': 0.00028815482729785467, 'samples': 12113920, 'steps': 23659, 'loss/train': 2.139220952987671} +02/25/2022 08:13:32 - INFO - codeparrot_training - Step 23660: {'lr': 0.00028813865643977527, 'samples': 12114432, 'steps': 23660, 'loss/train': 1.6365087032318115} +02/25/2022 08:13:36 - INFO - codeparrot_training - Step 23661: {'lr': 0.000288122485418322, 'samples': 12114944, 'steps': 23661, 'loss/train': 0.6145601868629456} +02/25/2022 08:13:41 - INFO - codeparrot_training - Step 23662: {'lr': 0.0002881063142335641, 'samples': 12115456, 'steps': 23662, 'loss/train': 1.2969300746917725} +02/25/2022 08:13:45 - INFO - codeparrot_training - Step 23663: {'lr': 0.0002880901428855708, 'samples': 12115968, 'steps': 23663, 'loss/train': 1.2844479084014893} +02/25/2022 08:13:50 - INFO - codeparrot_training - Step 23664: {'lr': 0.0002880739713744114, 'samples': 12116480, 'steps': 23664, 'loss/train': 1.845025658607483} +02/25/2022 08:13:54 - INFO - codeparrot_training - Step 23665: {'lr': 0.00028805779970015525, 'samples': 12116992, 'steps': 23665, 'loss/train': 0.7780728340148926} +02/25/2022 08:14:00 - INFO - codeparrot_training - Step 23666: {'lr': 0.00028804162786287156, 'samples': 12117504, 'steps': 23666, 'loss/train': 0.5093708634376526} +02/25/2022 08:14:04 - INFO - codeparrot_training - Step 23667: {'lr': 0.0002880254558626297, 'samples': 12118016, 'steps': 23667, 'loss/train': 1.828165888786316} +02/25/2022 08:14:09 - INFO - codeparrot_training - Step 23668: {'lr': 0.0002880092836994987, 'samples': 12118528, 'steps': 23668, 'loss/train': 2.9405670166015625} +02/25/2022 08:14:13 - INFO - codeparrot_training - Step 23669: {'lr': 0.0002879931113735482, 'samples': 12119040, 'steps': 23669, 'loss/train': 1.4543344974517822} +02/25/2022 08:14:18 - INFO - codeparrot_training - Step 23670: {'lr': 0.0002879769388848471, 'samples': 12119552, 'steps': 23670, 'loss/train': 2.3173696994781494} +02/25/2022 08:14:22 - INFO - codeparrot_training - Step 23671: {'lr': 0.0002879607662334649, 'samples': 12120064, 'steps': 23671, 'loss/train': 0.4551342725753784} +02/25/2022 08:14:28 - INFO - codeparrot_training - Step 23672: {'lr': 0.0002879445934194709, 'samples': 12120576, 'steps': 23672, 'loss/train': 0.9114976525306702} +02/25/2022 08:14:31 - INFO - codeparrot_training - Step 23673: {'lr': 0.0002879284204429342, 'samples': 12121088, 'steps': 23673, 'loss/train': 2.378455400466919} +02/25/2022 08:14:36 - INFO - codeparrot_training - Step 23674: {'lr': 0.0002879122473039243, 'samples': 12121600, 'steps': 23674, 'loss/train': 3.390928030014038} +02/25/2022 08:14:40 - INFO - codeparrot_training - Step 23675: {'lr': 0.0002878960740025103, 'samples': 12122112, 'steps': 23675, 'loss/train': 1.4086164236068726} +02/25/2022 08:14:46 - INFO - codeparrot_training - Step 23676: {'lr': 0.00028787990053876156, 'samples': 12122624, 'steps': 23676, 'loss/train': 1.6998674869537354} +02/25/2022 08:14:49 - INFO - codeparrot_training - Step 23677: {'lr': 0.00028786372691274735, 'samples': 12123136, 'steps': 23677, 'loss/train': 1.3767874240875244} +02/25/2022 08:14:55 - INFO - codeparrot_training - Step 23678: {'lr': 0.00028784755312453704, 'samples': 12123648, 'steps': 23678, 'loss/train': 2.796020269393921} +02/25/2022 08:14:58 - INFO - codeparrot_training - Step 23679: {'lr': 0.0002878313791741997, 'samples': 12124160, 'steps': 23679, 'loss/train': 1.5031064748764038} +02/25/2022 08:15:05 - INFO - codeparrot_training - Step 23680: {'lr': 0.00028781520506180485, 'samples': 12124672, 'steps': 23680, 'loss/train': 1.7649303674697876} +02/25/2022 08:15:08 - INFO - codeparrot_training - Step 23681: {'lr': 0.0002877990307874216, 'samples': 12125184, 'steps': 23681, 'loss/train': 1.710687279701233} +02/25/2022 08:15:11 - INFO - codeparrot_training - Step 23682: {'lr': 0.00028778285635111935, 'samples': 12125696, 'steps': 23682, 'loss/train': 2.431793689727783} +02/25/2022 08:15:18 - INFO - codeparrot_training - Step 23683: {'lr': 0.0002877666817529673, 'samples': 12126208, 'steps': 23683, 'loss/train': 0.13264045119285583} +02/25/2022 08:15:21 - INFO - codeparrot_training - Step 23684: {'lr': 0.0002877505069930348, 'samples': 12126720, 'steps': 23684, 'loss/train': 0.6525019407272339} +02/25/2022 08:15:27 - INFO - codeparrot_training - Step 23685: {'lr': 0.0002877343320713911, 'samples': 12127232, 'steps': 23685, 'loss/train': 1.9331852197647095} +02/25/2022 08:15:30 - INFO - codeparrot_training - Step 23686: {'lr': 0.0002877181569881055, 'samples': 12127744, 'steps': 23686, 'loss/train': 1.9713950157165527} +02/25/2022 08:15:35 - INFO - codeparrot_training - Step 23687: {'lr': 0.00028770198174324737, 'samples': 12128256, 'steps': 23687, 'loss/train': 1.7827495336532593} +02/25/2022 08:15:39 - INFO - codeparrot_training - Step 23688: {'lr': 0.00028768580633688586, 'samples': 12128768, 'steps': 23688, 'loss/train': 1.46895170211792} +02/25/2022 08:15:45 - INFO - codeparrot_training - Step 23689: {'lr': 0.00028766963076909033, 'samples': 12129280, 'steps': 23689, 'loss/train': 0.99400794506073} +02/25/2022 08:15:48 - INFO - codeparrot_training - Step 23690: {'lr': 0.00028765345503993, 'samples': 12129792, 'steps': 23690, 'loss/train': 1.38034188747406} +02/25/2022 08:15:54 - INFO - codeparrot_training - Step 23691: {'lr': 0.0002876372791494743, 'samples': 12130304, 'steps': 23691, 'loss/train': 1.8652857542037964} +02/25/2022 08:15:57 - INFO - codeparrot_training - Step 23692: {'lr': 0.00028762110309779246, 'samples': 12130816, 'steps': 23692, 'loss/train': 2.032402515411377} +02/25/2022 08:16:03 - INFO - codeparrot_training - Step 23693: {'lr': 0.0002876049268849537, 'samples': 12131328, 'steps': 23693, 'loss/train': 1.7879059314727783} +02/25/2022 08:16:07 - INFO - codeparrot_training - Step 23694: {'lr': 0.0002875887505110274, 'samples': 12131840, 'steps': 23694, 'loss/train': 2.240527868270874} +02/25/2022 08:16:13 - INFO - codeparrot_training - Step 23695: {'lr': 0.00028757257397608285, 'samples': 12132352, 'steps': 23695, 'loss/train': 1.4836267232894897} +02/25/2022 08:16:16 - INFO - codeparrot_training - Step 23696: {'lr': 0.0002875563972801893, 'samples': 12132864, 'steps': 23696, 'loss/train': 2.52135968208313} +02/25/2022 08:16:19 - INFO - codeparrot_training - Step 23697: {'lr': 0.0002875402204234161, 'samples': 12133376, 'steps': 23697, 'loss/train': 2.2787137031555176} +02/25/2022 08:16:25 - INFO - codeparrot_training - Step 23698: {'lr': 0.0002875240434058324, 'samples': 12133888, 'steps': 23698, 'loss/train': 2.455629587173462} +02/25/2022 08:16:31 - INFO - codeparrot_training - Step 23699: {'lr': 0.0002875078662275078, 'samples': 12134400, 'steps': 23699, 'loss/train': 2.860032796859741} +02/25/2022 08:16:34 - INFO - codeparrot_training - Step 23700: {'lr': 0.00028749168888851125, 'samples': 12134912, 'steps': 23700, 'loss/train': 1.3428969383239746} +02/25/2022 08:16:39 - INFO - codeparrot_training - Step 23701: {'lr': 0.0002874755113889123, 'samples': 12135424, 'steps': 23701, 'loss/train': 1.2920112609863281} +02/25/2022 08:16:43 - INFO - codeparrot_training - Step 23702: {'lr': 0.0002874593337287801, 'samples': 12135936, 'steps': 23702, 'loss/train': 1.5097090005874634} +02/25/2022 08:16:49 - INFO - codeparrot_training - Step 23703: {'lr': 0.00028744315590818406, 'samples': 12136448, 'steps': 23703, 'loss/train': 1.324911117553711} +02/25/2022 08:16:53 - INFO - codeparrot_training - Step 23704: {'lr': 0.0002874269779271934, 'samples': 12136960, 'steps': 23704, 'loss/train': 2.5079987049102783} +02/25/2022 08:16:56 - INFO - codeparrot_training - Step 23705: {'lr': 0.0002874107997858775, 'samples': 12137472, 'steps': 23705, 'loss/train': 2.4839026927948} +02/25/2022 08:17:02 - INFO - codeparrot_training - Step 23706: {'lr': 0.00028739462148430554, 'samples': 12137984, 'steps': 23706, 'loss/train': 1.077143669128418} +02/25/2022 08:17:05 - INFO - codeparrot_training - Step 23707: {'lr': 0.0002873784430225469, 'samples': 12138496, 'steps': 23707, 'loss/train': 2.0203192234039307} +02/25/2022 08:17:11 - INFO - codeparrot_training - Step 23708: {'lr': 0.000287362264400671, 'samples': 12139008, 'steps': 23708, 'loss/train': 1.7209244966506958} +02/25/2022 08:17:14 - INFO - codeparrot_training - Step 23709: {'lr': 0.00028734608561874686, 'samples': 12139520, 'steps': 23709, 'loss/train': 1.2826449871063232} +02/25/2022 08:17:20 - INFO - codeparrot_training - Step 23710: {'lr': 0.0002873299066768441, 'samples': 12140032, 'steps': 23710, 'loss/train': 1.8688377141952515} +02/25/2022 08:17:24 - INFO - codeparrot_training - Step 23711: {'lr': 0.0002873137275750317, 'samples': 12140544, 'steps': 23711, 'loss/train': 1.4672523736953735} +02/25/2022 08:17:29 - INFO - codeparrot_training - Step 23712: {'lr': 0.0002872975483133793, 'samples': 12141056, 'steps': 23712, 'loss/train': 2.293625593185425} +02/25/2022 08:17:33 - INFO - codeparrot_training - Step 23713: {'lr': 0.00028728136889195595, 'samples': 12141568, 'steps': 23713, 'loss/train': 1.154145359992981} +02/25/2022 08:17:39 - INFO - codeparrot_training - Step 23714: {'lr': 0.0002872651893108311, 'samples': 12142080, 'steps': 23714, 'loss/train': 1.65375554561615} +02/25/2022 08:17:42 - INFO - codeparrot_training - Step 23715: {'lr': 0.000287249009570074, 'samples': 12142592, 'steps': 23715, 'loss/train': 2.7515904903411865} +02/25/2022 08:17:48 - INFO - codeparrot_training - Step 23716: {'lr': 0.000287232829669754, 'samples': 12143104, 'steps': 23716, 'loss/train': 1.6771210432052612} +02/25/2022 08:17:51 - INFO - codeparrot_training - Step 23717: {'lr': 0.0002872166496099403, 'samples': 12143616, 'steps': 23717, 'loss/train': 2.27978777885437} +02/25/2022 08:17:57 - INFO - codeparrot_training - Step 23718: {'lr': 0.0002872004693907024, 'samples': 12144128, 'steps': 23718, 'loss/train': 1.7869230508804321} +02/25/2022 08:18:00 - INFO - codeparrot_training - Step 23719: {'lr': 0.0002871842890121094, 'samples': 12144640, 'steps': 23719, 'loss/train': 1.5900501012802124} +02/25/2022 08:18:06 - INFO - codeparrot_training - Step 23720: {'lr': 0.0002871681084742308, 'samples': 12145152, 'steps': 23720, 'loss/train': 1.6827583312988281} +02/25/2022 08:18:09 - INFO - codeparrot_training - Step 23721: {'lr': 0.0002871519277771358, 'samples': 12145664, 'steps': 23721, 'loss/train': 2.6957075595855713} +02/25/2022 08:18:15 - INFO - codeparrot_training - Step 23722: {'lr': 0.0002871357469208937, 'samples': 12146176, 'steps': 23722, 'loss/train': 1.01897394657135} +02/25/2022 08:18:18 - INFO - codeparrot_training - Step 23723: {'lr': 0.0002871195659055739, 'samples': 12146688, 'steps': 23723, 'loss/train': 3.624803066253662} +02/25/2022 08:18:25 - INFO - codeparrot_training - Step 23724: {'lr': 0.0002871033847312456, 'samples': 12147200, 'steps': 23724, 'loss/train': 2.4313771724700928} +02/25/2022 08:18:28 - INFO - codeparrot_training - Step 23725: {'lr': 0.0002870872033979782, 'samples': 12147712, 'steps': 23725, 'loss/train': 2.0268619060516357} +02/25/2022 08:18:34 - INFO - codeparrot_training - Step 23726: {'lr': 0.000287071021905841, 'samples': 12148224, 'steps': 23726, 'loss/train': 1.992823839187622} +02/25/2022 08:18:37 - INFO - codeparrot_training - Step 23727: {'lr': 0.00028705484025490333, 'samples': 12148736, 'steps': 23727, 'loss/train': 1.1792811155319214} +02/25/2022 08:18:43 - INFO - codeparrot_training - Step 23728: {'lr': 0.0002870386584452345, 'samples': 12149248, 'steps': 23728, 'loss/train': 1.0752718448638916} +02/25/2022 08:18:46 - INFO - codeparrot_training - Step 23729: {'lr': 0.00028702247647690383, 'samples': 12149760, 'steps': 23729, 'loss/train': 1.4516328573226929} +02/25/2022 08:18:52 - INFO - codeparrot_training - Step 23730: {'lr': 0.0002870062943499806, 'samples': 12150272, 'steps': 23730, 'loss/train': 2.6758906841278076} +02/25/2022 08:18:55 - INFO - codeparrot_training - Step 23731: {'lr': 0.0002869901120645341, 'samples': 12150784, 'steps': 23731, 'loss/train': 1.6887352466583252} +02/25/2022 08:19:01 - INFO - codeparrot_training - Step 23732: {'lr': 0.0002869739296206338, 'samples': 12151296, 'steps': 23732, 'loss/train': 2.6629281044006348} +02/25/2022 08:19:04 - INFO - codeparrot_training - Step 23733: {'lr': 0.0002869577470183489, 'samples': 12151808, 'steps': 23733, 'loss/train': 1.8930538892745972} +02/25/2022 08:19:10 - INFO - codeparrot_training - Step 23734: {'lr': 0.00028694156425774874, 'samples': 12152320, 'steps': 23734, 'loss/train': 1.591787338256836} +02/25/2022 08:19:13 - INFO - codeparrot_training - Step 23735: {'lr': 0.00028692538133890267, 'samples': 12152832, 'steps': 23735, 'loss/train': 0.7416547536849976} +02/25/2022 08:19:19 - INFO - codeparrot_training - Step 23736: {'lr': 0.00028690919826188, 'samples': 12153344, 'steps': 23736, 'loss/train': 2.022280693054199} +02/25/2022 08:19:22 - INFO - codeparrot_training - Step 23737: {'lr': 0.00028689301502674995, 'samples': 12153856, 'steps': 23737, 'loss/train': 1.56686532497406} +02/25/2022 08:19:28 - INFO - codeparrot_training - Step 23738: {'lr': 0.00028687683163358197, 'samples': 12154368, 'steps': 23738, 'loss/train': 2.0775949954986572} +02/25/2022 08:19:31 - INFO - codeparrot_training - Step 23739: {'lr': 0.00028686064808244546, 'samples': 12154880, 'steps': 23739, 'loss/train': 2.261827230453491} +02/25/2022 08:19:38 - INFO - codeparrot_training - Step 23740: {'lr': 0.0002868444643734095, 'samples': 12155392, 'steps': 23740, 'loss/train': 1.521373987197876} +02/25/2022 08:19:41 - INFO - codeparrot_training - Step 23741: {'lr': 0.00028682828050654365, 'samples': 12155904, 'steps': 23741, 'loss/train': 1.4633139371871948} +02/25/2022 08:19:46 - INFO - codeparrot_training - Step 23742: {'lr': 0.0002868120964819171, 'samples': 12156416, 'steps': 23742, 'loss/train': 1.9177744388580322} +02/25/2022 08:19:50 - INFO - codeparrot_training - Step 23743: {'lr': 0.0002867959122995992, 'samples': 12156928, 'steps': 23743, 'loss/train': 2.566213846206665} +02/25/2022 08:19:55 - INFO - codeparrot_training - Step 23744: {'lr': 0.0002867797279596593, 'samples': 12157440, 'steps': 23744, 'loss/train': 1.6049220561981201} +02/25/2022 08:19:59 - INFO - codeparrot_training - Step 23745: {'lr': 0.0002867635434621668, 'samples': 12157952, 'steps': 23745, 'loss/train': 1.4053068161010742} +02/25/2022 08:20:04 - INFO - codeparrot_training - Step 23746: {'lr': 0.0002867473588071909, 'samples': 12158464, 'steps': 23746, 'loss/train': 1.5826157331466675} +02/25/2022 08:20:08 - INFO - codeparrot_training - Step 23747: {'lr': 0.00028673117399480096, 'samples': 12158976, 'steps': 23747, 'loss/train': 1.0591213703155518} +02/25/2022 08:20:13 - INFO - codeparrot_training - Step 23748: {'lr': 0.00028671498902506636, 'samples': 12159488, 'steps': 23748, 'loss/train': 1.4980249404907227} +02/25/2022 08:20:17 - INFO - codeparrot_training - Step 23749: {'lr': 0.00028669880389805647, 'samples': 12160000, 'steps': 23749, 'loss/train': 1.593460202217102} +02/25/2022 08:20:23 - INFO - codeparrot_training - Step 23750: {'lr': 0.00028668261861384045, 'samples': 12160512, 'steps': 23750, 'loss/train': 1.9531420469284058} +02/25/2022 08:20:27 - INFO - codeparrot_training - Step 23751: {'lr': 0.00028666643317248777, 'samples': 12161024, 'steps': 23751, 'loss/train': 1.8310507535934448} +02/25/2022 08:20:32 - INFO - codeparrot_training - Step 23752: {'lr': 0.00028665024757406775, 'samples': 12161536, 'steps': 23752, 'loss/train': 2.0640852451324463} +02/25/2022 08:20:36 - INFO - codeparrot_training - Step 23753: {'lr': 0.0002866340618186497, 'samples': 12162048, 'steps': 23753, 'loss/train': 0.6460937857627869} +02/25/2022 08:20:41 - INFO - codeparrot_training - Step 23754: {'lr': 0.00028661787590630297, 'samples': 12162560, 'steps': 23754, 'loss/train': 3.5005781650543213} +02/25/2022 08:20:45 - INFO - codeparrot_training - Step 23755: {'lr': 0.00028660168983709683, 'samples': 12163072, 'steps': 23755, 'loss/train': 1.9756114482879639} +02/25/2022 08:20:50 - INFO - codeparrot_training - Step 23756: {'lr': 0.00028658550361110075, 'samples': 12163584, 'steps': 23756, 'loss/train': 1.8267971277236938} +02/25/2022 08:20:54 - INFO - codeparrot_training - Step 23757: {'lr': 0.000286569317228384, 'samples': 12164096, 'steps': 23757, 'loss/train': 1.715086817741394} +02/25/2022 08:20:59 - INFO - codeparrot_training - Step 23758: {'lr': 0.00028655313068901586, 'samples': 12164608, 'steps': 23758, 'loss/train': 1.671566367149353} +02/25/2022 08:21:03 - INFO - codeparrot_training - Step 23759: {'lr': 0.0002865369439930657, 'samples': 12165120, 'steps': 23759, 'loss/train': 2.253812313079834} +02/25/2022 08:21:06 - INFO - codeparrot_training - Step 23760: {'lr': 0.00028652075714060294, 'samples': 12165632, 'steps': 23760, 'loss/train': 2.096128463745117} +02/25/2022 08:21:13 - INFO - codeparrot_training - Step 23761: {'lr': 0.0002865045701316968, 'samples': 12166144, 'steps': 23761, 'loss/train': 2.3430652618408203} +02/25/2022 08:21:17 - INFO - codeparrot_training - Step 23762: {'lr': 0.00028648838296641666, 'samples': 12166656, 'steps': 23762, 'loss/train': 0.8878803253173828} +02/25/2022 08:21:22 - INFO - codeparrot_training - Step 23763: {'lr': 0.00028647219564483195, 'samples': 12167168, 'steps': 23763, 'loss/train': 3.3712682723999023} +02/25/2022 08:21:26 - INFO - codeparrot_training - Step 23764: {'lr': 0.00028645600816701186, 'samples': 12167680, 'steps': 23764, 'loss/train': 1.7934439182281494} +02/25/2022 08:21:31 - INFO - codeparrot_training - Step 23765: {'lr': 0.00028643982053302584, 'samples': 12168192, 'steps': 23765, 'loss/train': 1.5670610666275024} +02/25/2022 08:21:35 - INFO - codeparrot_training - Step 23766: {'lr': 0.00028642363274294317, 'samples': 12168704, 'steps': 23766, 'loss/train': 2.365324020385742} +02/25/2022 08:21:40 - INFO - codeparrot_training - Step 23767: {'lr': 0.0002864074447968333, 'samples': 12169216, 'steps': 23767, 'loss/train': 2.1852331161499023} +02/25/2022 08:21:44 - INFO - codeparrot_training - Step 23768: {'lr': 0.0002863912566947654, 'samples': 12169728, 'steps': 23768, 'loss/train': 1.7493542432785034} +02/25/2022 08:21:49 - INFO - codeparrot_training - Step 23769: {'lr': 0.000286375068436809, 'samples': 12170240, 'steps': 23769, 'loss/train': 2.3759031295776367} +02/25/2022 08:21:53 - INFO - codeparrot_training - Step 23770: {'lr': 0.00028635888002303324, 'samples': 12170752, 'steps': 23770, 'loss/train': 1.806571364402771} +02/25/2022 08:21:59 - INFO - codeparrot_training - Step 23771: {'lr': 0.00028634269145350765, 'samples': 12171264, 'steps': 23771, 'loss/train': 1.4930880069732666} +02/25/2022 08:22:04 - INFO - codeparrot_training - Step 23772: {'lr': 0.00028632650272830153, 'samples': 12171776, 'steps': 23772, 'loss/train': 2.107416868209839} +02/25/2022 08:22:08 - INFO - codeparrot_training - Step 23773: {'lr': 0.00028631031384748426, 'samples': 12172288, 'steps': 23773, 'loss/train': 1.9796546697616577} +02/25/2022 08:22:13 - INFO - codeparrot_training - Step 23774: {'lr': 0.000286294124811125, 'samples': 12172800, 'steps': 23774, 'loss/train': 0.703578531742096} +02/25/2022 08:22:17 - INFO - codeparrot_training - Step 23775: {'lr': 0.0002862779356192933, 'samples': 12173312, 'steps': 23775, 'loss/train': 1.4263368844985962} +02/25/2022 08:22:22 - INFO - codeparrot_training - Step 23776: {'lr': 0.0002862617462720584, 'samples': 12173824, 'steps': 23776, 'loss/train': 1.785409927368164} +02/25/2022 08:22:26 - INFO - codeparrot_training - Step 23777: {'lr': 0.00028624555676948975, 'samples': 12174336, 'steps': 23777, 'loss/train': 2.2191672325134277} +02/25/2022 08:22:31 - INFO - codeparrot_training - Step 23778: {'lr': 0.00028622936711165665, 'samples': 12174848, 'steps': 23778, 'loss/train': 2.310695171356201} +02/25/2022 08:22:35 - INFO - codeparrot_training - Step 23779: {'lr': 0.00028621317729862837, 'samples': 12175360, 'steps': 23779, 'loss/train': 3.5981569290161133} +02/25/2022 08:22:38 - INFO - codeparrot_training - Step 23780: {'lr': 0.00028619698733047444, 'samples': 12175872, 'steps': 23780, 'loss/train': 2.387065887451172} +02/25/2022 08:22:44 - INFO - codeparrot_training - Step 23781: {'lr': 0.0002861807972072641, 'samples': 12176384, 'steps': 23781, 'loss/train': 3.240861654281616} +02/25/2022 08:22:49 - INFO - codeparrot_training - Step 23782: {'lr': 0.0002861646069290667, 'samples': 12176896, 'steps': 23782, 'loss/train': 2.517465114593506} +02/25/2022 08:22:53 - INFO - codeparrot_training - Step 23783: {'lr': 0.0002861484164959515, 'samples': 12177408, 'steps': 23783, 'loss/train': 2.3863937854766846} +02/25/2022 08:22:58 - INFO - codeparrot_training - Step 23784: {'lr': 0.0002861322259079881, 'samples': 12177920, 'steps': 23784, 'loss/train': 2.1893911361694336} +02/25/2022 08:23:02 - INFO - codeparrot_training - Step 23785: {'lr': 0.00028611603516524566, 'samples': 12178432, 'steps': 23785, 'loss/train': 1.6996592283248901} +02/25/2022 08:23:08 - INFO - codeparrot_training - Step 23786: {'lr': 0.00028609984426779364, 'samples': 12178944, 'steps': 23786, 'loss/train': 1.5738521814346313} +02/25/2022 08:23:11 - INFO - codeparrot_training - Step 23787: {'lr': 0.0002860836532157012, 'samples': 12179456, 'steps': 23787, 'loss/train': 2.2220871448516846} +02/25/2022 08:23:17 - INFO - codeparrot_training - Step 23788: {'lr': 0.000286067462009038, 'samples': 12179968, 'steps': 23788, 'loss/train': 2.3630354404449463} +02/25/2022 08:23:20 - INFO - codeparrot_training - Step 23789: {'lr': 0.00028605127064787315, 'samples': 12180480, 'steps': 23789, 'loss/train': 2.5233869552612305} +02/25/2022 08:23:26 - INFO - codeparrot_training - Step 23790: {'lr': 0.00028603507913227617, 'samples': 12180992, 'steps': 23790, 'loss/train': 0.14146028459072113} +02/25/2022 08:23:29 - INFO - codeparrot_training - Step 23791: {'lr': 0.00028601888746231633, 'samples': 12181504, 'steps': 23791, 'loss/train': 1.0562998056411743} +02/25/2022 08:23:35 - INFO - codeparrot_training - Step 23792: {'lr': 0.00028600269563806304, 'samples': 12182016, 'steps': 23792, 'loss/train': 2.630549907684326} +02/25/2022 08:23:39 - INFO - codeparrot_training - Step 23793: {'lr': 0.0002859865036595856, 'samples': 12182528, 'steps': 23793, 'loss/train': 1.8632588386535645} +02/25/2022 08:23:44 - INFO - codeparrot_training - Step 23794: {'lr': 0.0002859703115269534, 'samples': 12183040, 'steps': 23794, 'loss/train': 1.6509627103805542} +02/25/2022 08:23:48 - INFO - codeparrot_training - Step 23795: {'lr': 0.0002859541192402359, 'samples': 12183552, 'steps': 23795, 'loss/train': 0.23247288167476654} +02/25/2022 08:23:54 - INFO - codeparrot_training - Step 23796: {'lr': 0.00028593792679950227, 'samples': 12184064, 'steps': 23796, 'loss/train': 2.736579418182373} +02/25/2022 08:23:57 - INFO - codeparrot_training - Step 23797: {'lr': 0.00028592173420482206, 'samples': 12184576, 'steps': 23797, 'loss/train': 2.8215363025665283} +02/25/2022 08:24:03 - INFO - codeparrot_training - Step 23798: {'lr': 0.0002859055414562644, 'samples': 12185088, 'steps': 23798, 'loss/train': 1.9680349826812744} +02/25/2022 08:24:06 - INFO - codeparrot_training - Step 23799: {'lr': 0.00028588934855389885, 'samples': 12185600, 'steps': 23799, 'loss/train': 1.837709665298462} +02/25/2022 08:24:14 - INFO - codeparrot_training - Step 23800: {'lr': 0.0002858731554977948, 'samples': 12186112, 'steps': 23800, 'loss/train': 1.3905572891235352} +02/25/2022 08:24:17 - INFO - codeparrot_training - Step 23801: {'lr': 0.00028585696228802153, 'samples': 12186624, 'steps': 23801, 'loss/train': 1.984190821647644} +02/25/2022 08:24:23 - INFO - codeparrot_training - Step 23802: {'lr': 0.0002858407689246484, 'samples': 12187136, 'steps': 23802, 'loss/train': 2.0714988708496094} +02/25/2022 08:24:26 - INFO - codeparrot_training - Step 23803: {'lr': 0.0002858245754077448, 'samples': 12187648, 'steps': 23803, 'loss/train': 0.27793917059898376} +02/25/2022 08:24:32 - INFO - codeparrot_training - Step 23804: {'lr': 0.0002858083817373801, 'samples': 12188160, 'steps': 23804, 'loss/train': 1.0633853673934937} +02/25/2022 08:24:35 - INFO - codeparrot_training - Step 23805: {'lr': 0.00028579218791362367, 'samples': 12188672, 'steps': 23805, 'loss/train': 2.295172691345215} +02/25/2022 08:24:41 - INFO - codeparrot_training - Step 23806: {'lr': 0.0002857759939365449, 'samples': 12189184, 'steps': 23806, 'loss/train': 7.6252875328063965} +02/25/2022 08:24:44 - INFO - codeparrot_training - Step 23807: {'lr': 0.000285759799806213, 'samples': 12189696, 'steps': 23807, 'loss/train': 1.5053744316101074} +02/25/2022 08:24:51 - INFO - codeparrot_training - Step 23808: {'lr': 0.00028574360552269755, 'samples': 12190208, 'steps': 23808, 'loss/train': 1.590543270111084} +02/25/2022 08:24:54 - INFO - codeparrot_training - Step 23809: {'lr': 0.0002857274110860679, 'samples': 12190720, 'steps': 23809, 'loss/train': 2.0183393955230713} +02/25/2022 08:24:57 - INFO - codeparrot_training - Step 23810: {'lr': 0.00028571121649639337, 'samples': 12191232, 'steps': 23810, 'loss/train': 2.259247064590454} +02/25/2022 08:25:05 - INFO - codeparrot_training - Step 23811: {'lr': 0.0002856950217537432, 'samples': 12191744, 'steps': 23811, 'loss/train': 2.4867947101593018} +02/25/2022 08:25:08 - INFO - codeparrot_training - Step 23812: {'lr': 0.000285678826858187, 'samples': 12192256, 'steps': 23812, 'loss/train': 2.487729072570801} +02/25/2022 08:25:14 - INFO - codeparrot_training - Step 23813: {'lr': 0.000285662631809794, 'samples': 12192768, 'steps': 23813, 'loss/train': 3.44779372215271} +02/25/2022 08:25:18 - INFO - codeparrot_training - Step 23814: {'lr': 0.0002856464366086336, 'samples': 12193280, 'steps': 23814, 'loss/train': 0.6749687194824219} +02/25/2022 08:25:23 - INFO - codeparrot_training - Step 23815: {'lr': 0.0002856302412547752, 'samples': 12193792, 'steps': 23815, 'loss/train': 1.9551236629486084} +02/25/2022 08:25:27 - INFO - codeparrot_training - Step 23816: {'lr': 0.0002856140457482882, 'samples': 12194304, 'steps': 23816, 'loss/train': 1.7070772647857666} +02/25/2022 08:25:32 - INFO - codeparrot_training - Step 23817: {'lr': 0.0002855978500892419, 'samples': 12194816, 'steps': 23817, 'loss/train': 0.27305731177330017} +02/25/2022 08:25:36 - INFO - codeparrot_training - Step 23818: {'lr': 0.00028558165427770567, 'samples': 12195328, 'steps': 23818, 'loss/train': 1.9458577632904053} +02/25/2022 08:25:41 - INFO - codeparrot_training - Step 23819: {'lr': 0.00028556545831374903, 'samples': 12195840, 'steps': 23819, 'loss/train': 1.327791452407837} +02/25/2022 08:25:45 - INFO - codeparrot_training - Step 23820: {'lr': 0.0002855492621974411, 'samples': 12196352, 'steps': 23820, 'loss/train': 1.8198851346969604} +02/25/2022 08:25:52 - INFO - codeparrot_training - Step 23821: {'lr': 0.0002855330659288516, 'samples': 12196864, 'steps': 23821, 'loss/train': 0.4827019274234772} +02/25/2022 08:25:56 - INFO - codeparrot_training - Step 23822: {'lr': 0.00028551686950804964, 'samples': 12197376, 'steps': 23822, 'loss/train': 1.7372627258300781} +02/25/2022 08:26:01 - INFO - codeparrot_training - Step 23823: {'lr': 0.0002855006729351046, 'samples': 12197888, 'steps': 23823, 'loss/train': 1.6372865438461304} +02/25/2022 08:26:05 - INFO - codeparrot_training - Step 23824: {'lr': 0.0002854844762100861, 'samples': 12198400, 'steps': 23824, 'loss/train': 2.4423463344573975} +02/25/2022 08:26:10 - INFO - codeparrot_training - Step 23825: {'lr': 0.0002854682793330633, 'samples': 12198912, 'steps': 23825, 'loss/train': 2.299920082092285} +02/25/2022 08:26:14 - INFO - codeparrot_training - Step 23826: {'lr': 0.0002854520823041057, 'samples': 12199424, 'steps': 23826, 'loss/train': 1.8253332376480103} +02/25/2022 08:26:19 - INFO - codeparrot_training - Step 23827: {'lr': 0.0002854358851232826, 'samples': 12199936, 'steps': 23827, 'loss/train': 2.4432296752929688} +02/25/2022 08:26:23 - INFO - codeparrot_training - Step 23828: {'lr': 0.0002854196877906635, 'samples': 12200448, 'steps': 23828, 'loss/train': 1.1840457916259766} +02/25/2022 08:26:28 - INFO - codeparrot_training - Step 23829: {'lr': 0.00028540349030631756, 'samples': 12200960, 'steps': 23829, 'loss/train': 2.380634069442749} +02/25/2022 08:26:32 - INFO - codeparrot_training - Step 23830: {'lr': 0.0002853872926703144, 'samples': 12201472, 'steps': 23830, 'loss/train': 2.720336675643921} +02/25/2022 08:26:39 - INFO - codeparrot_training - Step 23831: {'lr': 0.0002853710948827233, 'samples': 12201984, 'steps': 23831, 'loss/train': 2.087082624435425} +02/25/2022 08:26:43 - INFO - codeparrot_training - Step 23832: {'lr': 0.00028535489694361365, 'samples': 12202496, 'steps': 23832, 'loss/train': 0.8275420069694519} +02/25/2022 08:26:48 - INFO - codeparrot_training - Step 23833: {'lr': 0.00028533869885305485, 'samples': 12203008, 'steps': 23833, 'loss/train': 1.8307034969329834} +02/25/2022 08:26:52 - INFO - codeparrot_training - Step 23834: {'lr': 0.0002853225006111163, 'samples': 12203520, 'steps': 23834, 'loss/train': 2.4690442085266113} +02/25/2022 08:26:57 - INFO - codeparrot_training - Step 23835: {'lr': 0.00028530630221786736, 'samples': 12204032, 'steps': 23835, 'loss/train': 1.9603184461593628} +02/25/2022 08:27:01 - INFO - codeparrot_training - Step 23836: {'lr': 0.00028529010367337745, 'samples': 12204544, 'steps': 23836, 'loss/train': 2.428802490234375} +02/25/2022 08:27:06 - INFO - codeparrot_training - Step 23837: {'lr': 0.0002852739049777159, 'samples': 12205056, 'steps': 23837, 'loss/train': 2.112835645675659} +02/25/2022 08:27:10 - INFO - codeparrot_training - Step 23838: {'lr': 0.00028525770613095213, 'samples': 12205568, 'steps': 23838, 'loss/train': 1.6155688762664795} +02/25/2022 08:27:15 - INFO - codeparrot_training - Step 23839: {'lr': 0.00028524150713315566, 'samples': 12206080, 'steps': 23839, 'loss/train': 8.739619255065918} +02/25/2022 08:27:19 - INFO - codeparrot_training - Step 23840: {'lr': 0.00028522530798439564, 'samples': 12206592, 'steps': 23840, 'loss/train': 1.8128621578216553} +02/25/2022 08:27:24 - INFO - codeparrot_training - Step 23841: {'lr': 0.0002852091086847417, 'samples': 12207104, 'steps': 23841, 'loss/train': 2.49391770362854} +02/25/2022 08:27:28 - INFO - codeparrot_training - Step 23842: {'lr': 0.000285192909234263, 'samples': 12207616, 'steps': 23842, 'loss/train': 1.474942684173584} +02/25/2022 08:27:35 - INFO - codeparrot_training - Step 23843: {'lr': 0.0002851767096330291, 'samples': 12208128, 'steps': 23843, 'loss/train': 2.480138063430786} +02/25/2022 08:27:39 - INFO - codeparrot_training - Step 23844: {'lr': 0.00028516050988110935, 'samples': 12208640, 'steps': 23844, 'loss/train': 2.6836564540863037} +02/25/2022 08:27:44 - INFO - codeparrot_training - Step 23845: {'lr': 0.00028514430997857317, 'samples': 12209152, 'steps': 23845, 'loss/train': 1.6603347063064575} +02/25/2022 08:27:48 - INFO - codeparrot_training - Step 23846: {'lr': 0.0002851281099254899, 'samples': 12209664, 'steps': 23846, 'loss/train': 0.36443275213241577} +02/25/2022 08:27:53 - INFO - codeparrot_training - Step 23847: {'lr': 0.000285111909721929, 'samples': 12210176, 'steps': 23847, 'loss/train': 2.6149418354034424} +02/25/2022 08:27:57 - INFO - codeparrot_training - Step 23848: {'lr': 0.0002850957093679597, 'samples': 12210688, 'steps': 23848, 'loss/train': 0.4315800368785858} +02/25/2022 08:28:02 - INFO - codeparrot_training - Step 23849: {'lr': 0.0002850795088636516, 'samples': 12211200, 'steps': 23849, 'loss/train': 2.4588608741760254} +02/25/2022 08:28:06 - INFO - codeparrot_training - Step 23850: {'lr': 0.0002850633082090741, 'samples': 12211712, 'steps': 23850, 'loss/train': 1.8169078826904297} +02/25/2022 08:28:12 - INFO - codeparrot_training - Step 23851: {'lr': 0.00028504710740429647, 'samples': 12212224, 'steps': 23851, 'loss/train': 0.9353863596916199} +02/25/2022 08:28:15 - INFO - codeparrot_training - Step 23852: {'lr': 0.0002850309064493882, 'samples': 12212736, 'steps': 23852, 'loss/train': 8.571444511413574} +02/25/2022 08:28:21 - INFO - codeparrot_training - Step 23853: {'lr': 0.00028501470534441855, 'samples': 12213248, 'steps': 23853, 'loss/train': 2.3966128826141357} +02/25/2022 08:28:24 - INFO - codeparrot_training - Step 23854: {'lr': 0.00028499850408945704, 'samples': 12213760, 'steps': 23854, 'loss/train': 1.5944530963897705} +02/25/2022 08:28:30 - INFO - codeparrot_training - Step 23855: {'lr': 0.0002849823026845731, 'samples': 12214272, 'steps': 23855, 'loss/train': 1.1452698707580566} +02/25/2022 08:28:33 - INFO - codeparrot_training - Step 23856: {'lr': 0.00028496610112983605, 'samples': 12214784, 'steps': 23856, 'loss/train': 1.9814021587371826} +02/25/2022 08:28:40 - INFO - codeparrot_training - Step 23857: {'lr': 0.00028494989942531537, 'samples': 12215296, 'steps': 23857, 'loss/train': 1.8425670862197876} +02/25/2022 08:28:44 - INFO - codeparrot_training - Step 23858: {'lr': 0.0002849336975710804, 'samples': 12215808, 'steps': 23858, 'loss/train': 2.0652003288269043} +02/25/2022 08:28:50 - INFO - codeparrot_training - Step 23859: {'lr': 0.0002849174955672005, 'samples': 12216320, 'steps': 23859, 'loss/train': 2.181178092956543} +02/25/2022 08:28:53 - INFO - codeparrot_training - Step 23860: {'lr': 0.0002849012934137452, 'samples': 12216832, 'steps': 23860, 'loss/train': 2.166696786880493} +02/25/2022 08:28:59 - INFO - codeparrot_training - Step 23861: {'lr': 0.0002848850911107838, 'samples': 12217344, 'steps': 23861, 'loss/train': 2.069958209991455} +02/25/2022 08:29:02 - INFO - codeparrot_training - Step 23862: {'lr': 0.0002848688886583858, 'samples': 12217856, 'steps': 23862, 'loss/train': 2.1374595165252686} +02/25/2022 08:29:08 - INFO - codeparrot_training - Step 23863: {'lr': 0.0002848526860566205, 'samples': 12218368, 'steps': 23863, 'loss/train': 2.445357084274292} +02/25/2022 08:29:11 - INFO - codeparrot_training - Step 23864: {'lr': 0.00028483648330555737, 'samples': 12218880, 'steps': 23864, 'loss/train': 0.9217566251754761} +02/25/2022 08:29:17 - INFO - codeparrot_training - Step 23865: {'lr': 0.0002848202804052659, 'samples': 12219392, 'steps': 23865, 'loss/train': 1.1582810878753662} +02/25/2022 08:29:20 - INFO - codeparrot_training - Step 23866: {'lr': 0.00028480407735581527, 'samples': 12219904, 'steps': 23866, 'loss/train': 1.6610405445098877} +02/25/2022 08:29:27 - INFO - codeparrot_training - Step 23867: {'lr': 0.00028478787415727515, 'samples': 12220416, 'steps': 23867, 'loss/train': 2.0354859828948975} +02/25/2022 08:29:31 - INFO - codeparrot_training - Step 23868: {'lr': 0.00028477167080971465, 'samples': 12220928, 'steps': 23868, 'loss/train': 2.06433367729187} +02/25/2022 08:29:36 - INFO - codeparrot_training - Step 23869: {'lr': 0.0002847554673132035, 'samples': 12221440, 'steps': 23869, 'loss/train': 2.5286731719970703} +02/25/2022 08:29:40 - INFO - codeparrot_training - Step 23870: {'lr': 0.00028473926366781095, 'samples': 12221952, 'steps': 23870, 'loss/train': 2.0785348415374756} +02/25/2022 08:29:46 - INFO - codeparrot_training - Step 23871: {'lr': 0.00028472305987360636, 'samples': 12222464, 'steps': 23871, 'loss/train': 3.007127523422241} +02/25/2022 08:29:50 - INFO - codeparrot_training - Step 23872: {'lr': 0.00028470685593065927, 'samples': 12222976, 'steps': 23872, 'loss/train': 1.1006990671157837} +02/25/2022 08:29:53 - INFO - codeparrot_training - Step 23873: {'lr': 0.00028469065183903895, 'samples': 12223488, 'steps': 23873, 'loss/train': 2.1557517051696777} +02/25/2022 08:29:59 - INFO - codeparrot_training - Step 23874: {'lr': 0.000284674447598815, 'samples': 12224000, 'steps': 23874, 'loss/train': 1.7379764318466187} +02/25/2022 08:30:02 - INFO - codeparrot_training - Step 23875: {'lr': 0.00028465824321005667, 'samples': 12224512, 'steps': 23875, 'loss/train': 1.7558883428573608} +02/25/2022 08:30:08 - INFO - codeparrot_training - Step 23876: {'lr': 0.0002846420386728334, 'samples': 12225024, 'steps': 23876, 'loss/train': 2.2542593479156494} +02/25/2022 08:30:11 - INFO - codeparrot_training - Step 23877: {'lr': 0.0002846258339872147, 'samples': 12225536, 'steps': 23877, 'loss/train': 1.8808306455612183} +02/25/2022 08:30:18 - INFO - codeparrot_training - Step 23878: {'lr': 0.0002846096291532699, 'samples': 12226048, 'steps': 23878, 'loss/train': 1.2494032382965088} +02/25/2022 08:30:22 - INFO - codeparrot_training - Step 23879: {'lr': 0.0002845934241710684, 'samples': 12226560, 'steps': 23879, 'loss/train': 0.8155087828636169} +02/25/2022 08:30:27 - INFO - codeparrot_training - Step 23880: {'lr': 0.0002845772190406798, 'samples': 12227072, 'steps': 23880, 'loss/train': 0.6408429145812988} +02/25/2022 08:30:31 - INFO - codeparrot_training - Step 23881: {'lr': 0.0002845610137621732, 'samples': 12227584, 'steps': 23881, 'loss/train': 3.0057241916656494} +02/25/2022 08:30:36 - INFO - codeparrot_training - Step 23882: {'lr': 0.0002845448083356183, 'samples': 12228096, 'steps': 23882, 'loss/train': 0.3577134311199188} +02/25/2022 08:30:40 - INFO - codeparrot_training - Step 23883: {'lr': 0.00028452860276108436, 'samples': 12228608, 'steps': 23883, 'loss/train': 1.316670536994934} +02/25/2022 08:30:46 - INFO - codeparrot_training - Step 23884: {'lr': 0.0002845123970386408, 'samples': 12229120, 'steps': 23884, 'loss/train': 1.7536288499832153} +02/25/2022 08:30:49 - INFO - codeparrot_training - Step 23885: {'lr': 0.00028449619116835715, 'samples': 12229632, 'steps': 23885, 'loss/train': 2.04780650138855} +02/25/2022 08:30:55 - INFO - codeparrot_training - Step 23886: {'lr': 0.0002844799851503028, 'samples': 12230144, 'steps': 23886, 'loss/train': 1.4486092329025269} +02/25/2022 08:30:58 - INFO - codeparrot_training - Step 23887: {'lr': 0.0002844637789845471, 'samples': 12230656, 'steps': 23887, 'loss/train': 1.6248226165771484} +02/25/2022 08:31:04 - INFO - codeparrot_training - Step 23888: {'lr': 0.0002844475726711595, 'samples': 12231168, 'steps': 23888, 'loss/train': 1.3015556335449219} +02/25/2022 08:31:07 - INFO - codeparrot_training - Step 23889: {'lr': 0.0002844313662102095, 'samples': 12231680, 'steps': 23889, 'loss/train': 1.826593041419983} +02/25/2022 08:31:13 - INFO - codeparrot_training - Step 23890: {'lr': 0.0002844151596017665, 'samples': 12232192, 'steps': 23890, 'loss/train': 1.816049575805664} +02/25/2022 08:31:16 - INFO - codeparrot_training - Step 23891: {'lr': 0.0002843989528458997, 'samples': 12232704, 'steps': 23891, 'loss/train': 1.9060184955596924} +02/25/2022 08:31:22 - INFO - codeparrot_training - Step 23892: {'lr': 0.0002843827459426789, 'samples': 12233216, 'steps': 23892, 'loss/train': 2.3060052394866943} +02/25/2022 08:31:25 - INFO - codeparrot_training - Step 23893: {'lr': 0.00028436653889217316, 'samples': 12233728, 'steps': 23893, 'loss/train': 1.5798581838607788} +02/25/2022 08:31:33 - INFO - codeparrot_training - Step 23894: {'lr': 0.00028435033169445223, 'samples': 12234240, 'steps': 23894, 'loss/train': 0.9531805515289307} +02/25/2022 08:31:36 - INFO - codeparrot_training - Step 23895: {'lr': 0.0002843341243495853, 'samples': 12234752, 'steps': 23895, 'loss/train': 1.2489651441574097} +02/25/2022 08:31:42 - INFO - codeparrot_training - Step 23896: {'lr': 0.0002843179168576419, 'samples': 12235264, 'steps': 23896, 'loss/train': 2.063264846801758} +02/25/2022 08:31:45 - INFO - codeparrot_training - Step 23897: {'lr': 0.00028430170921869147, 'samples': 12235776, 'steps': 23897, 'loss/train': 2.0053510665893555} +02/25/2022 08:31:51 - INFO - codeparrot_training - Step 23898: {'lr': 0.0002842855014328034, 'samples': 12236288, 'steps': 23898, 'loss/train': 1.8234277963638306} +02/25/2022 08:31:54 - INFO - codeparrot_training - Step 23899: {'lr': 0.0002842692935000471, 'samples': 12236800, 'steps': 23899, 'loss/train': 2.18677020072937} +02/25/2022 08:31:59 - INFO - codeparrot_training - Step 23900: {'lr': 0.00028425308542049207, 'samples': 12237312, 'steps': 23900, 'loss/train': 1.6970452070236206} +02/25/2022 08:32:03 - INFO - codeparrot_training - Step 23901: {'lr': 0.0002842368771942077, 'samples': 12237824, 'steps': 23901, 'loss/train': 1.960959553718567} +02/25/2022 08:32:08 - INFO - codeparrot_training - Step 23902: {'lr': 0.00028422066882126336, 'samples': 12238336, 'steps': 23902, 'loss/train': 1.6810353994369507} +02/25/2022 08:32:12 - INFO - codeparrot_training - Step 23903: {'lr': 0.0002842044603017285, 'samples': 12238848, 'steps': 23903, 'loss/train': 1.7343754768371582} +02/25/2022 08:32:19 - INFO - codeparrot_training - Step 23904: {'lr': 0.00028418825163567275, 'samples': 12239360, 'steps': 23904, 'loss/train': 2.1843326091766357} +02/25/2022 08:32:23 - INFO - codeparrot_training - Step 23905: {'lr': 0.0002841720428231653, 'samples': 12239872, 'steps': 23905, 'loss/train': 0.16531874239444733} +02/25/2022 08:32:28 - INFO - codeparrot_training - Step 23906: {'lr': 0.00028415583386427566, 'samples': 12240384, 'steps': 23906, 'loss/train': 1.6627346277236938} +02/25/2022 08:32:32 - INFO - codeparrot_training - Step 23907: {'lr': 0.0002841396247590733, 'samples': 12240896, 'steps': 23907, 'loss/train': 1.393295407295227} +02/25/2022 08:32:38 - INFO - codeparrot_training - Step 23908: {'lr': 0.00028412341550762755, 'samples': 12241408, 'steps': 23908, 'loss/train': 2.3162336349487305} +02/25/2022 08:32:41 - INFO - codeparrot_training - Step 23909: {'lr': 0.00028410720611000804, 'samples': 12241920, 'steps': 23909, 'loss/train': 1.3320385217666626} +02/25/2022 08:32:47 - INFO - codeparrot_training - Step 23910: {'lr': 0.000284090996566284, 'samples': 12242432, 'steps': 23910, 'loss/train': 2.113345146179199} +02/25/2022 08:32:50 - INFO - codeparrot_training - Step 23911: {'lr': 0.00028407478687652503, 'samples': 12242944, 'steps': 23911, 'loss/train': 3.181025505065918} +02/25/2022 08:32:56 - INFO - codeparrot_training - Step 23912: {'lr': 0.0002840585770408004, 'samples': 12243456, 'steps': 23912, 'loss/train': 2.1168301105499268} +02/25/2022 08:32:59 - INFO - codeparrot_training - Step 23913: {'lr': 0.00028404236705917974, 'samples': 12243968, 'steps': 23913, 'loss/train': 1.8107494115829468} +02/25/2022 08:33:05 - INFO - codeparrot_training - Step 23914: {'lr': 0.00028402615693173236, 'samples': 12244480, 'steps': 23914, 'loss/train': 2.187854290008545} +02/25/2022 08:33:08 - INFO - codeparrot_training - Step 23915: {'lr': 0.00028400994665852777, 'samples': 12244992, 'steps': 23915, 'loss/train': 4.253845691680908} +02/25/2022 08:33:15 - INFO - codeparrot_training - Step 23916: {'lr': 0.00028399373623963525, 'samples': 12245504, 'steps': 23916, 'loss/train': 2.3905253410339355} +02/25/2022 08:33:19 - INFO - codeparrot_training - Step 23917: {'lr': 0.0002839775256751244, 'samples': 12246016, 'steps': 23917, 'loss/train': 1.4984098672866821} +02/25/2022 08:33:24 - INFO - codeparrot_training - Step 23918: {'lr': 0.00028396131496506466, 'samples': 12246528, 'steps': 23918, 'loss/train': 0.7051137089729309} +02/25/2022 08:33:28 - INFO - codeparrot_training - Step 23919: {'lr': 0.00028394510410952544, 'samples': 12247040, 'steps': 23919, 'loss/train': 1.7828171253204346} +02/25/2022 08:33:33 - INFO - codeparrot_training - Step 23920: {'lr': 0.0002839288931085761, 'samples': 12247552, 'steps': 23920, 'loss/train': 1.4712936878204346} +02/25/2022 08:33:37 - INFO - codeparrot_training - Step 23921: {'lr': 0.0002839126819622862, 'samples': 12248064, 'steps': 23921, 'loss/train': 2.1698801517486572} +02/25/2022 08:33:42 - INFO - codeparrot_training - Step 23922: {'lr': 0.00028389647067072517, 'samples': 12248576, 'steps': 23922, 'loss/train': 1.0560258626937866} +02/25/2022 08:33:46 - INFO - codeparrot_training - Step 23923: {'lr': 0.00028388025923396234, 'samples': 12249088, 'steps': 23923, 'loss/train': 1.5148906707763672} +02/25/2022 08:33:53 - INFO - codeparrot_training - Step 23924: {'lr': 0.0002838640476520673, 'samples': 12249600, 'steps': 23924, 'loss/train': 1.8574568033218384} +02/25/2022 08:33:57 - INFO - codeparrot_training - Step 23925: {'lr': 0.00028384783592510945, 'samples': 12250112, 'steps': 23925, 'loss/train': 1.7618740797042847} +02/25/2022 08:34:02 - INFO - codeparrot_training - Step 23926: {'lr': 0.00028383162405315823, 'samples': 12250624, 'steps': 23926, 'loss/train': 0.8848698139190674} +02/25/2022 08:34:06 - INFO - codeparrot_training - Step 23927: {'lr': 0.00028381541203628295, 'samples': 12251136, 'steps': 23927, 'loss/train': 1.5072880983352661} +02/25/2022 08:34:11 - INFO - codeparrot_training - Step 23928: {'lr': 0.0002837991998745533, 'samples': 12251648, 'steps': 23928, 'loss/train': 1.8419615030288696} +02/25/2022 08:34:15 - INFO - codeparrot_training - Step 23929: {'lr': 0.0002837829875680386, 'samples': 12252160, 'steps': 23929, 'loss/train': 2.556342124938965} +02/25/2022 08:34:20 - INFO - codeparrot_training - Step 23930: {'lr': 0.00028376677511680827, 'samples': 12252672, 'steps': 23930, 'loss/train': 2.056863784790039} +02/25/2022 08:34:24 - INFO - codeparrot_training - Step 23931: {'lr': 0.0002837505625209318, 'samples': 12253184, 'steps': 23931, 'loss/train': 2.4866621494293213} +02/25/2022 08:34:29 - INFO - codeparrot_training - Step 23932: {'lr': 0.0002837343497804787, 'samples': 12253696, 'steps': 23932, 'loss/train': 1.559464693069458} +02/25/2022 08:34:33 - INFO - codeparrot_training - Step 23933: {'lr': 0.0002837181368955183, 'samples': 12254208, 'steps': 23933, 'loss/train': 1.319364070892334} +02/25/2022 08:34:38 - INFO - codeparrot_training - Step 23934: {'lr': 0.0002837019238661201, 'samples': 12254720, 'steps': 23934, 'loss/train': 2.5055911540985107} +02/25/2022 08:34:42 - INFO - codeparrot_training - Step 23935: {'lr': 0.00028368571069235354, 'samples': 12255232, 'steps': 23935, 'loss/train': 0.5492614507675171} +02/25/2022 08:34:47 - INFO - codeparrot_training - Step 23936: {'lr': 0.00028366949737428814, 'samples': 12255744, 'steps': 23936, 'loss/train': 2.809084892272949} +02/25/2022 08:34:51 - INFO - codeparrot_training - Step 23937: {'lr': 0.00028365328391199334, 'samples': 12256256, 'steps': 23937, 'loss/train': 1.8783550262451172} +02/25/2022 08:34:56 - INFO - codeparrot_training - Step 23938: {'lr': 0.0002836370703055385, 'samples': 12256768, 'steps': 23938, 'loss/train': 2.457873821258545} +02/25/2022 08:35:00 - INFO - codeparrot_training - Step 23939: {'lr': 0.0002836208565549932, 'samples': 12257280, 'steps': 23939, 'loss/train': 1.9005979299545288} +02/25/2022 08:35:07 - INFO - codeparrot_training - Step 23940: {'lr': 0.00028360464266042674, 'samples': 12257792, 'steps': 23940, 'loss/train': 1.6991955041885376} +02/25/2022 08:35:10 - INFO - codeparrot_training - Step 23941: {'lr': 0.00028358842862190873, 'samples': 12258304, 'steps': 23941, 'loss/train': 2.1745004653930664} +02/25/2022 08:35:16 - INFO - codeparrot_training - Step 23942: {'lr': 0.00028357221443950847, 'samples': 12258816, 'steps': 23942, 'loss/train': 2.757969617843628} +02/25/2022 08:35:19 - INFO - codeparrot_training - Step 23943: {'lr': 0.00028355600011329557, 'samples': 12259328, 'steps': 23943, 'loss/train': 1.3630497455596924} +02/25/2022 08:35:25 - INFO - codeparrot_training - Step 23944: {'lr': 0.00028353978564333936, 'samples': 12259840, 'steps': 23944, 'loss/train': 1.3697361946105957} +02/25/2022 08:35:28 - INFO - codeparrot_training - Step 23945: {'lr': 0.0002835235710297094, 'samples': 12260352, 'steps': 23945, 'loss/train': 1.5021661520004272} +02/25/2022 08:35:34 - INFO - codeparrot_training - Step 23946: {'lr': 0.0002835073562724751, 'samples': 12260864, 'steps': 23946, 'loss/train': 1.128860354423523} +02/25/2022 08:35:38 - INFO - codeparrot_training - Step 23947: {'lr': 0.00028349114137170593, 'samples': 12261376, 'steps': 23947, 'loss/train': 0.5817089676856995} +02/25/2022 08:35:43 - INFO - codeparrot_training - Step 23948: {'lr': 0.0002834749263274714, 'samples': 12261888, 'steps': 23948, 'loss/train': 1.8628727197647095} +02/25/2022 08:35:47 - INFO - codeparrot_training - Step 23949: {'lr': 0.00028345871113984086, 'samples': 12262400, 'steps': 23949, 'loss/train': 1.4499738216400146} +02/25/2022 08:35:54 - INFO - codeparrot_training - Step 23950: {'lr': 0.0002834424958088838, 'samples': 12262912, 'steps': 23950, 'loss/train': 1.1090614795684814} +02/25/2022 08:35:57 - INFO - codeparrot_training - Step 23951: {'lr': 0.00028342628033466974, 'samples': 12263424, 'steps': 23951, 'loss/train': 1.9155235290527344} +02/25/2022 08:36:03 - INFO - codeparrot_training - Step 23952: {'lr': 0.00028341006471726816, 'samples': 12263936, 'steps': 23952, 'loss/train': 1.125829815864563} +02/25/2022 08:36:06 - INFO - codeparrot_training - Step 23953: {'lr': 0.0002833938489567484, 'samples': 12264448, 'steps': 23953, 'loss/train': 1.8911327123641968} +02/25/2022 08:36:12 - INFO - codeparrot_training - Step 23954: {'lr': 0.00028337763305318, 'samples': 12264960, 'steps': 23954, 'loss/train': 2.3532049655914307} +02/25/2022 08:36:15 - INFO - codeparrot_training - Step 23955: {'lr': 0.00028336141700663244, 'samples': 12265472, 'steps': 23955, 'loss/train': 2.2518019676208496} +02/25/2022 08:36:21 - INFO - codeparrot_training - Step 23956: {'lr': 0.00028334520081717507, 'samples': 12265984, 'steps': 23956, 'loss/train': 1.2189196348190308} +02/25/2022 08:36:24 - INFO - codeparrot_training - Step 23957: {'lr': 0.0002833289844848776, 'samples': 12266496, 'steps': 23957, 'loss/train': 1.7994390726089478} +02/25/2022 08:36:30 - INFO - codeparrot_training - Step 23958: {'lr': 0.0002833127680098092, 'samples': 12267008, 'steps': 23958, 'loss/train': 0.6896646618843079} +02/25/2022 08:36:33 - INFO - codeparrot_training - Step 23959: {'lr': 0.0002832965513920396, 'samples': 12267520, 'steps': 23959, 'loss/train': 1.7736015319824219} +02/25/2022 08:36:39 - INFO - codeparrot_training - Step 23960: {'lr': 0.0002832803346316381, 'samples': 12268032, 'steps': 23960, 'loss/train': 2.5266411304473877} +02/25/2022 08:36:42 - INFO - codeparrot_training - Step 23961: {'lr': 0.0002832641177286742, 'samples': 12268544, 'steps': 23961, 'loss/train': 1.6412752866744995} +02/25/2022 08:36:50 - INFO - codeparrot_training - Step 23962: {'lr': 0.0002832479006832174, 'samples': 12269056, 'steps': 23962, 'loss/train': 1.9535921812057495} +02/25/2022 08:36:53 - INFO - codeparrot_training - Step 23963: {'lr': 0.0002832316834953372, 'samples': 12269568, 'steps': 23963, 'loss/train': 1.6700303554534912} +02/25/2022 08:36:59 - INFO - codeparrot_training - Step 23964: {'lr': 0.0002832154661651029, 'samples': 12270080, 'steps': 23964, 'loss/train': 1.6835803985595703} +02/25/2022 08:37:02 - INFO - codeparrot_training - Step 23965: {'lr': 0.00028319924869258425, 'samples': 12270592, 'steps': 23965, 'loss/train': 1.9687668085098267} +02/25/2022 08:37:08 - INFO - codeparrot_training - Step 23966: {'lr': 0.0002831830310778504, 'samples': 12271104, 'steps': 23966, 'loss/train': 1.5070534944534302} +02/25/2022 08:37:13 - INFO - codeparrot_training - Step 23967: {'lr': 0.0002831668133209711, 'samples': 12271616, 'steps': 23967, 'loss/train': 1.837750792503357} +02/25/2022 08:37:17 - INFO - codeparrot_training - Step 23968: {'lr': 0.0002831505954220156, 'samples': 12272128, 'steps': 23968, 'loss/train': 1.968056559562683} +02/25/2022 08:37:22 - INFO - codeparrot_training - Step 23969: {'lr': 0.00028313437738105353, 'samples': 12272640, 'steps': 23969, 'loss/train': 1.7962291240692139} +02/25/2022 08:37:26 - INFO - codeparrot_training - Step 23970: {'lr': 0.0002831181591981543, 'samples': 12273152, 'steps': 23970, 'loss/train': 2.4770469665527344} +02/25/2022 08:37:33 - INFO - codeparrot_training - Step 23971: {'lr': 0.0002831019408733874, 'samples': 12273664, 'steps': 23971, 'loss/train': 2.0747499465942383} +02/25/2022 08:37:36 - INFO - codeparrot_training - Step 23972: {'lr': 0.00028308572240682233, 'samples': 12274176, 'steps': 23972, 'loss/train': 0.2593514919281006} +02/25/2022 08:37:42 - INFO - codeparrot_training - Step 23973: {'lr': 0.00028306950379852844, 'samples': 12274688, 'steps': 23973, 'loss/train': 2.389361619949341} +02/25/2022 08:37:45 - INFO - codeparrot_training - Step 23974: {'lr': 0.0002830532850485754, 'samples': 12275200, 'steps': 23974, 'loss/train': 2.4268078804016113} +02/25/2022 08:37:51 - INFO - codeparrot_training - Step 23975: {'lr': 0.0002830370661570325, 'samples': 12275712, 'steps': 23975, 'loss/train': 1.82704496383667} +02/25/2022 08:37:54 - INFO - codeparrot_training - Step 23976: {'lr': 0.00028302084712396937, 'samples': 12276224, 'steps': 23976, 'loss/train': 1.6886565685272217} +02/25/2022 08:38:00 - INFO - codeparrot_training - Step 23977: {'lr': 0.00028300462794945535, 'samples': 12276736, 'steps': 23977, 'loss/train': 2.3774254322052} +02/25/2022 08:38:03 - INFO - codeparrot_training - Step 23978: {'lr': 0.00028298840863356006, 'samples': 12277248, 'steps': 23978, 'loss/train': 2.3185224533081055} +02/25/2022 08:38:09 - INFO - codeparrot_training - Step 23979: {'lr': 0.0002829721891763529, 'samples': 12277760, 'steps': 23979, 'loss/train': 1.9011954069137573} +02/25/2022 08:38:12 - INFO - codeparrot_training - Step 23980: {'lr': 0.00028295596957790325, 'samples': 12278272, 'steps': 23980, 'loss/train': 1.0226720571517944} +02/25/2022 08:38:18 - INFO - codeparrot_training - Step 23981: {'lr': 0.0002829397498382808, 'samples': 12278784, 'steps': 23981, 'loss/train': 1.6950478553771973} +02/25/2022 08:38:22 - INFO - codeparrot_training - Step 23982: {'lr': 0.00028292352995755487, 'samples': 12279296, 'steps': 23982, 'loss/train': 1.0232689380645752} +02/25/2022 08:38:27 - INFO - codeparrot_training - Step 23983: {'lr': 0.000282907309935795, 'samples': 12279808, 'steps': 23983, 'loss/train': 0.05856531485915184} +02/25/2022 08:38:30 - INFO - codeparrot_training - Step 23984: {'lr': 0.00028289108977307066, 'samples': 12280320, 'steps': 23984, 'loss/train': 2.1872799396514893} +02/25/2022 08:38:36 - INFO - codeparrot_training - Step 23985: {'lr': 0.00028287486946945137, 'samples': 12280832, 'steps': 23985, 'loss/train': 1.0601203441619873} +02/25/2022 08:38:39 - INFO - codeparrot_training - Step 23986: {'lr': 0.0002828586490250065, 'samples': 12281344, 'steps': 23986, 'loss/train': 3.0550875663757324} +02/25/2022 08:38:47 - INFO - codeparrot_training - Step 23987: {'lr': 0.00028284242843980566, 'samples': 12281856, 'steps': 23987, 'loss/train': 3.133599042892456} +02/25/2022 08:38:50 - INFO - codeparrot_training - Step 23988: {'lr': 0.00028282620771391824, 'samples': 12282368, 'steps': 23988, 'loss/train': 1.8133797645568848} +02/25/2022 08:38:56 - INFO - codeparrot_training - Step 23989: {'lr': 0.00028280998684741387, 'samples': 12282880, 'steps': 23989, 'loss/train': 0.9786564111709595} +02/25/2022 08:38:59 - INFO - codeparrot_training - Step 23990: {'lr': 0.00028279376584036187, 'samples': 12283392, 'steps': 23990, 'loss/train': 2.0829660892486572} +02/25/2022 08:39:05 - INFO - codeparrot_training - Step 23991: {'lr': 0.0002827775446928318, 'samples': 12283904, 'steps': 23991, 'loss/train': 0.30979397892951965} +02/25/2022 08:39:08 - INFO - codeparrot_training - Step 23992: {'lr': 0.00028276132340489306, 'samples': 12284416, 'steps': 23992, 'loss/train': 2.194631814956665} +02/25/2022 08:39:14 - INFO - codeparrot_training - Step 23993: {'lr': 0.0002827451019766153, 'samples': 12284928, 'steps': 23993, 'loss/train': 1.5144309997558594} +02/25/2022 08:39:18 - INFO - codeparrot_training - Step 23994: {'lr': 0.00028272888040806795, 'samples': 12285440, 'steps': 23994, 'loss/train': 0.11840825527906418} +02/25/2022 08:39:23 - INFO - codeparrot_training - Step 23995: {'lr': 0.0002827126586993204, 'samples': 12285952, 'steps': 23995, 'loss/train': 1.9413007497787476} +02/25/2022 08:39:27 - INFO - codeparrot_training - Step 23996: {'lr': 0.0002826964368504422, 'samples': 12286464, 'steps': 23996, 'loss/train': 1.617460012435913} +02/25/2022 08:39:34 - INFO - codeparrot_training - Step 23997: {'lr': 0.0002826802148615029, 'samples': 12286976, 'steps': 23997, 'loss/train': 3.1694045066833496} +02/25/2022 08:39:37 - INFO - codeparrot_training - Step 23998: {'lr': 0.00028266399273257193, 'samples': 12287488, 'steps': 23998, 'loss/train': 2.2219560146331787} +02/25/2022 08:39:43 - INFO - codeparrot_training - Step 23999: {'lr': 0.0002826477704637188, 'samples': 12288000, 'steps': 23999, 'loss/train': 1.6088899374008179} +02/25/2022 08:39:43 - INFO - codeparrot_training - Evaluating and saving model checkpoint