diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -6337,3 +6337,1009 @@ Use FP16 precision: False 02/24/2022 09:19:11 - INFO - codeparrot_training - Step 5998: {'lr': 0.0004914899243368279, 'samples': 3071488, 'steps': 5998, 'loss/train': 1.7195990085601807} 02/24/2022 09:19:14 - INFO - codeparrot_training - Step 5999: {'lr': 0.0004914856909717715, 'samples': 3072000, 'steps': 5999, 'loss/train': 2.396481513977051} 02/24/2022 09:19:14 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/24/2022 09:19:31 - WARNING - huggingface_hub.repository - Several commits (6) will be pushed upstream. +02/24/2022 09:19:31 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/24/2022 09:20:04 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + 8f2bfb2..8b611e7 floral-grass-11 -> floral-grass-11 + +02/24/2022 09:20:11 - INFO - codeparrot_training - Step 6000: {'lr': 0.0004914814565722671, 'samples': 3072512, 'steps': 6000, 'loss/train': 1.604397177696228} +02/24/2022 09:20:14 - INFO - codeparrot_training - Step 6001: {'lr': 0.0004914772211383327, 'samples': 3073024, 'steps': 6001, 'loss/train': 3.018603801727295} +02/24/2022 09:20:20 - INFO - codeparrot_training - Step 6002: {'lr': 0.0004914729846699867, 'samples': 3073536, 'steps': 6002, 'loss/train': 1.4417643547058105} +02/24/2022 09:20:23 - INFO - codeparrot_training - Step 6003: {'lr': 0.000491468747167247, 'samples': 3074048, 'steps': 6003, 'loss/train': 1.4821195602416992} +02/24/2022 09:20:30 - INFO - codeparrot_training - Step 6004: {'lr': 0.0004914645086301319, 'samples': 3074560, 'steps': 6004, 'loss/train': 3.401012897491455} +02/24/2022 09:20:34 - INFO - codeparrot_training - Step 6005: {'lr': 0.0004914602690586596, 'samples': 3075072, 'steps': 6005, 'loss/train': 1.865563154220581} +02/24/2022 09:20:39 - INFO - codeparrot_training - Step 6006: {'lr': 0.0004914560284528481, 'samples': 3075584, 'steps': 6006, 'loss/train': 1.9332705736160278} +02/24/2022 09:20:43 - INFO - codeparrot_training - Step 6007: {'lr': 0.0004914517868127156, 'samples': 3076096, 'steps': 6007, 'loss/train': 2.0969417095184326} +02/24/2022 09:20:49 - INFO - codeparrot_training - Step 6008: {'lr': 0.0004914475441382804, 'samples': 3076608, 'steps': 6008, 'loss/train': 2.5750367641448975} +02/24/2022 09:20:52 - INFO - codeparrot_training - Step 6009: {'lr': 0.0004914433004295605, 'samples': 3077120, 'steps': 6009, 'loss/train': 2.3179712295532227} +02/24/2022 09:20:58 - INFO - codeparrot_training - Step 6010: {'lr': 0.0004914390556865743, 'samples': 3077632, 'steps': 6010, 'loss/train': 2.3691201210021973} +02/24/2022 09:21:01 - INFO - codeparrot_training - Step 6011: {'lr': 0.0004914348099093398, 'samples': 3078144, 'steps': 6011, 'loss/train': 1.8876723051071167} +02/24/2022 09:21:07 - INFO - codeparrot_training - Step 6012: {'lr': 0.0004914305630978751, 'samples': 3078656, 'steps': 6012, 'loss/train': 1.9158037900924683} +02/24/2022 09:21:10 - INFO - codeparrot_training - Step 6013: {'lr': 0.0004914263152521987, 'samples': 3079168, 'steps': 6013, 'loss/train': 2.1041481494903564} +02/24/2022 09:21:16 - INFO - codeparrot_training - Step 6014: {'lr': 0.0004914220663723286, 'samples': 3079680, 'steps': 6014, 'loss/train': 2.2101552486419678} +02/24/2022 09:21:19 - INFO - codeparrot_training - Step 6015: {'lr': 0.0004914178164582829, 'samples': 3080192, 'steps': 6015, 'loss/train': 1.28226900100708} +02/24/2022 09:21:25 - INFO - codeparrot_training - Step 6016: {'lr': 0.0004914135655100801, 'samples': 3080704, 'steps': 6016, 'loss/train': 1.7215476036071777} +02/24/2022 09:21:28 - INFO - codeparrot_training - Step 6017: {'lr': 0.0004914093135277381, 'samples': 3081216, 'steps': 6017, 'loss/train': 2.223314046859741} +02/24/2022 09:21:34 - INFO - codeparrot_training - Step 6018: {'lr': 0.0004914050605112753, 'samples': 3081728, 'steps': 6018, 'loss/train': 2.245091199874878} +02/24/2022 09:21:37 - INFO - codeparrot_training - Step 6019: {'lr': 0.00049140080646071, 'samples': 3082240, 'steps': 6019, 'loss/train': 1.5238486528396606} +02/24/2022 09:21:45 - INFO - codeparrot_training - Step 6020: {'lr': 0.0004913965513760601, 'samples': 3082752, 'steps': 6020, 'loss/train': 2.957625389099121} +02/24/2022 09:21:50 - INFO - codeparrot_training - Step 6021: {'lr': 0.0004913922952573442, 'samples': 3083264, 'steps': 6021, 'loss/train': 2.4479918479919434} +02/24/2022 09:21:54 - INFO - codeparrot_training - Step 6022: {'lr': 0.0004913880381045803, 'samples': 3083776, 'steps': 6022, 'loss/train': 3.338038682937622} +02/24/2022 09:21:57 - INFO - codeparrot_training - Step 6023: {'lr': 0.0004913837799177867, 'samples': 3084288, 'steps': 6023, 'loss/train': 1.1308441162109375} +02/24/2022 09:22:03 - INFO - codeparrot_training - Step 6024: {'lr': 0.0004913795206969815, 'samples': 3084800, 'steps': 6024, 'loss/train': 2.617490530014038} +02/24/2022 09:22:08 - INFO - codeparrot_training - Step 6025: {'lr': 0.0004913752604421833, 'samples': 3085312, 'steps': 6025, 'loss/train': 2.5893051624298096} +02/24/2022 09:22:12 - INFO - codeparrot_training - Step 6026: {'lr': 0.0004913709991534099, 'samples': 3085824, 'steps': 6026, 'loss/train': 2.3469371795654297} +02/24/2022 09:22:17 - INFO - codeparrot_training - Step 6027: {'lr': 0.00049136673683068, 'samples': 3086336, 'steps': 6027, 'loss/train': 2.451702117919922} +02/24/2022 09:22:21 - INFO - codeparrot_training - Step 6028: {'lr': 0.0004913624734740115, 'samples': 3086848, 'steps': 6028, 'loss/train': 2.1170425415039062} +02/24/2022 09:22:28 - INFO - codeparrot_training - Step 6029: {'lr': 0.0004913582090834229, 'samples': 3087360, 'steps': 6029, 'loss/train': 2.244554281234741} +02/24/2022 09:22:31 - INFO - codeparrot_training - Step 6030: {'lr': 0.0004913539436589323, 'samples': 3087872, 'steps': 6030, 'loss/train': 2.9589085578918457} +02/24/2022 09:22:35 - INFO - codeparrot_training - Step 6031: {'lr': 0.0004913496772005581, 'samples': 3088384, 'steps': 6031, 'loss/train': 3.629499673843384} +02/24/2022 09:22:40 - INFO - codeparrot_training - Step 6032: {'lr': 0.0004913454097083185, 'samples': 3088896, 'steps': 6032, 'loss/train': 2.1499903202056885} +02/24/2022 09:22:46 - INFO - codeparrot_training - Step 6033: {'lr': 0.0004913411411822318, 'samples': 3089408, 'steps': 6033, 'loss/train': 1.62830650806427} +02/24/2022 09:22:49 - INFO - codeparrot_training - Step 6034: {'lr': 0.0004913368716223162, 'samples': 3089920, 'steps': 6034, 'loss/train': 2.0829288959503174} +02/24/2022 09:22:55 - INFO - codeparrot_training - Step 6035: {'lr': 0.0004913326010285902, 'samples': 3090432, 'steps': 6035, 'loss/train': 1.9751555919647217} +02/24/2022 09:22:58 - INFO - codeparrot_training - Step 6036: {'lr': 0.0004913283294010719, 'samples': 3090944, 'steps': 6036, 'loss/train': 2.6310036182403564} +02/24/2022 09:23:04 - INFO - codeparrot_training - Step 6037: {'lr': 0.0004913240567397797, 'samples': 3091456, 'steps': 6037, 'loss/train': 2.592632532119751} +02/24/2022 09:23:07 - INFO - codeparrot_training - Step 6038: {'lr': 0.0004913197830447319, 'samples': 3091968, 'steps': 6038, 'loss/train': 2.8282647132873535} +02/24/2022 09:23:15 - INFO - codeparrot_training - Step 6039: {'lr': 0.0004913155083159467, 'samples': 3092480, 'steps': 6039, 'loss/train': 1.9306542873382568} +02/24/2022 09:23:18 - INFO - codeparrot_training - Step 6040: {'lr': 0.0004913112325534426, 'samples': 3092992, 'steps': 6040, 'loss/train': 3.1631524562835693} +02/24/2022 09:23:24 - INFO - codeparrot_training - Step 6041: {'lr': 0.0004913069557572376, 'samples': 3093504, 'steps': 6041, 'loss/train': 4.818521499633789} +02/24/2022 09:23:27 - INFO - codeparrot_training - Step 6042: {'lr': 0.0004913026779273504, 'samples': 3094016, 'steps': 6042, 'loss/train': 2.5206692218780518} +02/24/2022 09:23:33 - INFO - codeparrot_training - Step 6043: {'lr': 0.0004912983990637992, 'samples': 3094528, 'steps': 6043, 'loss/train': 2.197874069213867} +02/24/2022 09:23:36 - INFO - codeparrot_training - Step 6044: {'lr': 0.0004912941191666021, 'samples': 3095040, 'steps': 6044, 'loss/train': 1.9329408407211304} +02/24/2022 09:23:42 - INFO - codeparrot_training - Step 6045: {'lr': 0.0004912898382357777, 'samples': 3095552, 'steps': 6045, 'loss/train': 2.4365339279174805} +02/24/2022 09:23:45 - INFO - codeparrot_training - Step 6046: {'lr': 0.0004912855562713443, 'samples': 3096064, 'steps': 6046, 'loss/train': 2.190722703933716} +02/24/2022 09:23:51 - INFO - codeparrot_training - Step 6047: {'lr': 0.0004912812732733201, 'samples': 3096576, 'steps': 6047, 'loss/train': 1.9168121814727783} +02/24/2022 09:23:54 - INFO - codeparrot_training - Step 6048: {'lr': 0.0004912769892417236, 'samples': 3097088, 'steps': 6048, 'loss/train': 2.0972447395324707} +02/24/2022 09:24:02 - INFO - codeparrot_training - Step 6049: {'lr': 0.000491272704176573, 'samples': 3097600, 'steps': 6049, 'loss/train': 2.4642345905303955} +02/24/2022 09:24:05 - INFO - codeparrot_training - Step 6050: {'lr': 0.0004912684180778869, 'samples': 3098112, 'steps': 6050, 'loss/train': 1.5909693241119385} +02/24/2022 09:24:10 - INFO - codeparrot_training - Step 6051: {'lr': 0.0004912641309456834, 'samples': 3098624, 'steps': 6051, 'loss/train': 2.454633951187134} +02/24/2022 09:24:14 - INFO - codeparrot_training - Step 6052: {'lr': 0.000491259842779981, 'samples': 3099136, 'steps': 6052, 'loss/train': 1.9100749492645264} +02/24/2022 09:24:19 - INFO - codeparrot_training - Step 6053: {'lr': 0.0004912555535807981, 'samples': 3099648, 'steps': 6053, 'loss/train': 2.113274097442627} +02/24/2022 09:24:23 - INFO - codeparrot_training - Step 6054: {'lr': 0.0004912512633481529, 'samples': 3100160, 'steps': 6054, 'loss/train': 2.7591514587402344} +02/24/2022 09:24:28 - INFO - codeparrot_training - Step 6055: {'lr': 0.0004912469720820639, 'samples': 3100672, 'steps': 6055, 'loss/train': 3.426459312438965} +02/24/2022 09:24:32 - INFO - codeparrot_training - Step 6056: {'lr': 0.0004912426797825495, 'samples': 3101184, 'steps': 6056, 'loss/train': 1.6065919399261475} +02/24/2022 09:24:37 - INFO - codeparrot_training - Step 6057: {'lr': 0.0004912383864496281, 'samples': 3101696, 'steps': 6057, 'loss/train': 2.546449899673462} +02/24/2022 09:24:41 - INFO - codeparrot_training - Step 6058: {'lr': 0.0004912340920833182, 'samples': 3102208, 'steps': 6058, 'loss/train': 2.28745698928833} +02/24/2022 09:24:46 - INFO - codeparrot_training - Step 6059: {'lr': 0.0004912297966836378, 'samples': 3102720, 'steps': 6059, 'loss/train': 2.5935282707214355} +02/24/2022 09:24:50 - INFO - codeparrot_training - Step 6060: {'lr': 0.0004912255002506057, 'samples': 3103232, 'steps': 6060, 'loss/train': 2.5242879390716553} +02/24/2022 09:24:55 - INFO - codeparrot_training - Step 6061: {'lr': 0.00049122120278424, 'samples': 3103744, 'steps': 6061, 'loss/train': 3.297246217727661} +02/24/2022 09:24:59 - INFO - codeparrot_training - Step 6062: {'lr': 0.0004912169042845595, 'samples': 3104256, 'steps': 6062, 'loss/train': 2.0308449268341064} +02/24/2022 09:25:04 - INFO - codeparrot_training - Step 6063: {'lr': 0.0004912126047515821, 'samples': 3104768, 'steps': 6063, 'loss/train': 1.0052578449249268} +02/24/2022 09:25:08 - INFO - codeparrot_training - Step 6064: {'lr': 0.0004912083041853267, 'samples': 3105280, 'steps': 6064, 'loss/train': 5.7387518882751465} +02/24/2022 09:25:15 - INFO - codeparrot_training - Step 6065: {'lr': 0.0004912040025858114, 'samples': 3105792, 'steps': 6065, 'loss/train': 2.9070844650268555} +02/24/2022 09:25:19 - INFO - codeparrot_training - Step 6066: {'lr': 0.0004911996999530548, 'samples': 3106304, 'steps': 6066, 'loss/train': 3.1185414791107178} +02/24/2022 09:25:24 - INFO - codeparrot_training - Step 6067: {'lr': 0.0004911953962870754, 'samples': 3106816, 'steps': 6067, 'loss/train': 3.52547550201416} +02/24/2022 09:25:28 - INFO - codeparrot_training - Step 6068: {'lr': 0.0004911910915878913, 'samples': 3107328, 'steps': 6068, 'loss/train': 2.851006507873535} +02/24/2022 09:25:33 - INFO - codeparrot_training - Step 6069: {'lr': 0.0004911867858555212, 'samples': 3107840, 'steps': 6069, 'loss/train': 3.4294042587280273} +02/24/2022 09:25:37 - INFO - codeparrot_training - Step 6070: {'lr': 0.0004911824790899836, 'samples': 3108352, 'steps': 6070, 'loss/train': 1.884102702140808} +02/24/2022 09:25:42 - INFO - codeparrot_training - Step 6071: {'lr': 0.0004911781712912968, 'samples': 3108864, 'steps': 6071, 'loss/train': 1.6893190145492554} +02/24/2022 09:25:46 - INFO - codeparrot_training - Step 6072: {'lr': 0.0004911738624594793, 'samples': 3109376, 'steps': 6072, 'loss/train': 2.1315715312957764} +02/24/2022 09:25:51 - INFO - codeparrot_training - Step 6073: {'lr': 0.0004911695525945494, 'samples': 3109888, 'steps': 6073, 'loss/train': 2.465468168258667} +02/24/2022 09:25:58 - INFO - codeparrot_training - Step 6074: {'lr': 0.0004911652416965259, 'samples': 3110400, 'steps': 6074, 'loss/train': 2.6952435970306396} +02/24/2022 09:26:02 - INFO - codeparrot_training - Step 6075: {'lr': 0.000491160929765427, 'samples': 3110912, 'steps': 6075, 'loss/train': 3.1071462631225586} +02/24/2022 09:26:05 - INFO - codeparrot_training - Step 6076: {'lr': 0.0004911566168012714, 'samples': 3111424, 'steps': 6076, 'loss/train': 2.949596881866455} +02/24/2022 09:26:11 - INFO - codeparrot_training - Step 6077: {'lr': 0.0004911523028040772, 'samples': 3111936, 'steps': 6077, 'loss/train': 2.3221099376678467} +02/24/2022 09:26:16 - INFO - codeparrot_training - Step 6078: {'lr': 0.0004911479877738633, 'samples': 3112448, 'steps': 6078, 'loss/train': 2.6894407272338867} +02/24/2022 09:26:20 - INFO - codeparrot_training - Step 6079: {'lr': 0.0004911436717106478, 'samples': 3112960, 'steps': 6079, 'loss/train': 2.227316379547119} +02/24/2022 09:26:25 - INFO - codeparrot_training - Step 6080: {'lr': 0.0004911393546144495, 'samples': 3113472, 'steps': 6080, 'loss/train': 2.730419397354126} +02/24/2022 09:26:29 - INFO - codeparrot_training - Step 6081: {'lr': 0.0004911350364852868, 'samples': 3113984, 'steps': 6081, 'loss/train': 1.5856051445007324} +02/24/2022 09:26:34 - INFO - codeparrot_training - Step 6082: {'lr': 0.0004911307173231782, 'samples': 3114496, 'steps': 6082, 'loss/train': 2.1041507720947266} +02/24/2022 09:26:38 - INFO - codeparrot_training - Step 6083: {'lr': 0.000491126397128142, 'samples': 3115008, 'steps': 6083, 'loss/train': 1.594551682472229} +02/24/2022 09:26:45 - INFO - codeparrot_training - Step 6084: {'lr': 0.0004911220759001971, 'samples': 3115520, 'steps': 6084, 'loss/train': 2.4654273986816406} +02/24/2022 09:26:49 - INFO - codeparrot_training - Step 6085: {'lr': 0.0004911177536393616, 'samples': 3116032, 'steps': 6085, 'loss/train': 1.576617956161499} +02/24/2022 09:26:52 - INFO - codeparrot_training - Step 6086: {'lr': 0.0004911134303456543, 'samples': 3116544, 'steps': 6086, 'loss/train': 2.4746272563934326} +02/24/2022 09:26:58 - INFO - codeparrot_training - Step 6087: {'lr': 0.0004911091060190937, 'samples': 3117056, 'steps': 6087, 'loss/train': 2.7446303367614746} +02/24/2022 09:27:01 - INFO - codeparrot_training - Step 6088: {'lr': 0.0004911047806596981, 'samples': 3117568, 'steps': 6088, 'loss/train': 2.535982131958008} +02/24/2022 09:27:07 - INFO - codeparrot_training - Step 6089: {'lr': 0.0004911004542674863, 'samples': 3118080, 'steps': 6089, 'loss/train': 2.5550506114959717} +02/24/2022 09:27:10 - INFO - codeparrot_training - Step 6090: {'lr': 0.0004910961268424766, 'samples': 3118592, 'steps': 6090, 'loss/train': 2.3968963623046875} +02/24/2022 09:27:16 - INFO - codeparrot_training - Step 6091: {'lr': 0.0004910917983846877, 'samples': 3119104, 'steps': 6091, 'loss/train': 2.9215710163116455} +02/24/2022 09:27:19 - INFO - codeparrot_training - Step 6092: {'lr': 0.0004910874688941381, 'samples': 3119616, 'steps': 6092, 'loss/train': 3.4735233783721924} +02/24/2022 09:27:25 - INFO - codeparrot_training - Step 6093: {'lr': 0.0004910831383708464, 'samples': 3120128, 'steps': 6093, 'loss/train': 2.906996726989746} +02/24/2022 09:27:28 - INFO - codeparrot_training - Step 6094: {'lr': 0.000491078806814831, 'samples': 3120640, 'steps': 6094, 'loss/train': 1.6564717292785645} +02/24/2022 09:27:34 - INFO - codeparrot_training - Step 6095: {'lr': 0.0004910744742261106, 'samples': 3121152, 'steps': 6095, 'loss/train': 2.2512950897216797} +02/24/2022 09:27:37 - INFO - codeparrot_training - Step 6096: {'lr': 0.0004910701406047037, 'samples': 3121664, 'steps': 6096, 'loss/train': 2.8152337074279785} +02/24/2022 09:27:44 - INFO - codeparrot_training - Step 6097: {'lr': 0.0004910658059506289, 'samples': 3122176, 'steps': 6097, 'loss/train': 3.291065216064453} +02/24/2022 09:27:48 - INFO - codeparrot_training - Step 6098: {'lr': 0.0004910614702639045, 'samples': 3122688, 'steps': 6098, 'loss/train': 1.9583368301391602} +02/24/2022 09:27:53 - INFO - codeparrot_training - Step 6099: {'lr': 0.0004910571335445496, 'samples': 3123200, 'steps': 6099, 'loss/train': 2.0853025913238525} +02/24/2022 09:27:57 - INFO - codeparrot_training - Step 6100: {'lr': 0.0004910527957925823, 'samples': 3123712, 'steps': 6100, 'loss/train': 2.9351468086242676} +02/24/2022 09:28:03 - INFO - codeparrot_training - Step 6101: {'lr': 0.0004910484570080215, 'samples': 3124224, 'steps': 6101, 'loss/train': 2.6556482315063477} +02/24/2022 09:28:06 - INFO - codeparrot_training - Step 6102: {'lr': 0.0004910441171908855, 'samples': 3124736, 'steps': 6102, 'loss/train': 1.123103141784668} +02/24/2022 09:28:12 - INFO - codeparrot_training - Step 6103: {'lr': 0.0004910397763411931, 'samples': 3125248, 'steps': 6103, 'loss/train': 2.4701147079467773} +02/24/2022 09:28:15 - INFO - codeparrot_training - Step 6104: {'lr': 0.000491035434458963, 'samples': 3125760, 'steps': 6104, 'loss/train': 2.011813163757324} +02/24/2022 09:28:21 - INFO - codeparrot_training - Step 6105: {'lr': 0.0004910310915442135, 'samples': 3126272, 'steps': 6105, 'loss/train': 2.420161008834839} +02/24/2022 09:28:24 - INFO - codeparrot_training - Step 6106: {'lr': 0.0004910267475969633, 'samples': 3126784, 'steps': 6106, 'loss/train': 1.6771225929260254} +02/24/2022 09:28:30 - INFO - codeparrot_training - Step 6107: {'lr': 0.000491022402617231, 'samples': 3127296, 'steps': 6107, 'loss/train': 1.8822206258773804} +02/24/2022 09:28:33 - INFO - codeparrot_training - Step 6108: {'lr': 0.0004910180566050354, 'samples': 3127808, 'steps': 6108, 'loss/train': 2.1303060054779053} +02/24/2022 09:28:39 - INFO - codeparrot_training - Step 6109: {'lr': 0.0004910137095603949, 'samples': 3128320, 'steps': 6109, 'loss/train': 2.4091217517852783} +02/24/2022 09:28:42 - INFO - codeparrot_training - Step 6110: {'lr': 0.0004910093614833282, 'samples': 3128832, 'steps': 6110, 'loss/train': 2.64959454536438} +02/24/2022 09:28:49 - INFO - codeparrot_training - Step 6111: {'lr': 0.000491005012373854, 'samples': 3129344, 'steps': 6111, 'loss/train': 2.0367701053619385} +02/24/2022 09:28:53 - INFO - codeparrot_training - Step 6112: {'lr': 0.0004910006622319908, 'samples': 3129856, 'steps': 6112, 'loss/train': 1.6172839403152466} +02/24/2022 09:28:59 - INFO - codeparrot_training - Step 6113: {'lr': 0.0004909963110577573, 'samples': 3130368, 'steps': 6113, 'loss/train': 2.6684956550598145} +02/24/2022 09:29:02 - INFO - codeparrot_training - Step 6114: {'lr': 0.000490991958851172, 'samples': 3130880, 'steps': 6114, 'loss/train': 1.8965198993682861} +02/24/2022 09:29:08 - INFO - codeparrot_training - Step 6115: {'lr': 0.0004909876056122538, 'samples': 3131392, 'steps': 6115, 'loss/train': 2.818169116973877} +02/24/2022 09:29:11 - INFO - codeparrot_training - Step 6116: {'lr': 0.0004909832513410213, 'samples': 3131904, 'steps': 6116, 'loss/train': 2.6958489418029785} +02/24/2022 09:29:17 - INFO - codeparrot_training - Step 6117: {'lr': 0.000490978896037493, 'samples': 3132416, 'steps': 6117, 'loss/train': 3.0597338676452637} +02/24/2022 09:29:20 - INFO - codeparrot_training - Step 6118: {'lr': 0.0004909745397016876, 'samples': 3132928, 'steps': 6118, 'loss/train': 1.8273980617523193} +02/24/2022 09:29:25 - INFO - codeparrot_training - Step 6119: {'lr': 0.0004909701823336238, 'samples': 3133440, 'steps': 6119, 'loss/train': 2.6896169185638428} +02/24/2022 09:29:29 - INFO - codeparrot_training - Step 6120: {'lr': 0.0004909658239333202, 'samples': 3133952, 'steps': 6120, 'loss/train': 2.7352962493896484} +02/24/2022 09:29:36 - INFO - codeparrot_training - Step 6121: {'lr': 0.0004909614645007956, 'samples': 3134464, 'steps': 6121, 'loss/train': 2.0802161693573} +02/24/2022 09:29:40 - INFO - codeparrot_training - Step 6122: {'lr': 0.0004909571040360686, 'samples': 3134976, 'steps': 6122, 'loss/train': 3.7975966930389404} +02/24/2022 09:29:45 - INFO - codeparrot_training - Step 6123: {'lr': 0.0004909527425391579, 'samples': 3135488, 'steps': 6123, 'loss/train': 1.557277798652649} +02/24/2022 09:29:49 - INFO - codeparrot_training - Step 6124: {'lr': 0.0004909483800100822, 'samples': 3136000, 'steps': 6124, 'loss/train': 2.73663067817688} +02/24/2022 09:29:54 - INFO - codeparrot_training - Step 6125: {'lr': 0.00049094401644886, 'samples': 3136512, 'steps': 6125, 'loss/train': 1.576921820640564} +02/24/2022 09:29:58 - INFO - codeparrot_training - Step 6126: {'lr': 0.0004909396518555102, 'samples': 3137024, 'steps': 6126, 'loss/train': 2.7907321453094482} +02/24/2022 09:30:03 - INFO - codeparrot_training - Step 6127: {'lr': 0.0004909352862300514, 'samples': 3137536, 'steps': 6127, 'loss/train': 1.7028225660324097} +02/24/2022 09:30:07 - INFO - codeparrot_training - Step 6128: {'lr': 0.0004909309195725024, 'samples': 3138048, 'steps': 6128, 'loss/train': 2.3685123920440674} +02/24/2022 09:30:12 - INFO - codeparrot_training - Step 6129: {'lr': 0.0004909265518828819, 'samples': 3138560, 'steps': 6129, 'loss/train': 1.558257818222046} +02/24/2022 09:30:16 - INFO - codeparrot_training - Step 6130: {'lr': 0.0004909221831612085, 'samples': 3139072, 'steps': 6130, 'loss/train': 2.0935728549957275} +02/24/2022 09:30:23 - INFO - codeparrot_training - Step 6131: {'lr': 0.000490917813407501, 'samples': 3139584, 'steps': 6131, 'loss/train': 2.4071693420410156} +02/24/2022 09:30:26 - INFO - codeparrot_training - Step 6132: {'lr': 0.0004909134426217779, 'samples': 3140096, 'steps': 6132, 'loss/train': 3.3161418437957764} +02/24/2022 09:30:32 - INFO - codeparrot_training - Step 6133: {'lr': 0.0004909090708040583, 'samples': 3140608, 'steps': 6133, 'loss/train': 1.6046791076660156} +02/24/2022 09:30:37 - INFO - codeparrot_training - Step 6134: {'lr': 0.0004909046979543608, 'samples': 3141120, 'steps': 6134, 'loss/train': 1.3531415462493896} +02/24/2022 09:30:41 - INFO - codeparrot_training - Step 6135: {'lr': 0.000490900324072704, 'samples': 3141632, 'steps': 6135, 'loss/train': 1.4257930517196655} +02/24/2022 09:30:46 - INFO - codeparrot_training - Step 6136: {'lr': 0.0004908959491591065, 'samples': 3142144, 'steps': 6136, 'loss/train': 3.079061985015869} +02/24/2022 09:30:50 - INFO - codeparrot_training - Step 6137: {'lr': 0.0004908915732135874, 'samples': 3142656, 'steps': 6137, 'loss/train': 1.9684590101242065} +02/24/2022 09:30:53 - INFO - codeparrot_training - Step 6138: {'lr': 0.0004908871962361654, 'samples': 3143168, 'steps': 6138, 'loss/train': 0.4464505612850189} +02/24/2022 09:31:00 - INFO - codeparrot_training - Step 6139: {'lr': 0.0004908828182268591, 'samples': 3143680, 'steps': 6139, 'loss/train': 1.4488576650619507} +02/24/2022 09:31:03 - INFO - codeparrot_training - Step 6140: {'lr': 0.0004908784391856872, 'samples': 3144192, 'steps': 6140, 'loss/train': 2.665416955947876} +02/24/2022 09:31:10 - INFO - codeparrot_training - Step 6141: {'lr': 0.0004908740591126686, 'samples': 3144704, 'steps': 6141, 'loss/train': 1.779059648513794} +02/24/2022 09:31:14 - INFO - codeparrot_training - Step 6142: {'lr': 0.000490869678007822, 'samples': 3145216, 'steps': 6142, 'loss/train': 2.6022825241088867} +02/24/2022 09:31:19 - INFO - codeparrot_training - Step 6143: {'lr': 0.0004908652958711663, 'samples': 3145728, 'steps': 6143, 'loss/train': 2.301237106323242} +02/24/2022 09:31:23 - INFO - codeparrot_training - Step 6144: {'lr': 0.00049086091270272, 'samples': 3146240, 'steps': 6144, 'loss/train': 1.6943293809890747} +02/24/2022 09:31:28 - INFO - codeparrot_training - Step 6145: {'lr': 0.0004908565285025021, 'samples': 3146752, 'steps': 6145, 'loss/train': 1.8279458284378052} +02/24/2022 09:31:32 - INFO - codeparrot_training - Step 6146: {'lr': 0.0004908521432705312, 'samples': 3147264, 'steps': 6146, 'loss/train': 3.0824453830718994} +02/24/2022 09:31:37 - INFO - codeparrot_training - Step 6147: {'lr': 0.0004908477570068263, 'samples': 3147776, 'steps': 6147, 'loss/train': 2.9719717502593994} +02/24/2022 09:31:41 - INFO - codeparrot_training - Step 6148: {'lr': 0.0004908433697114062, 'samples': 3148288, 'steps': 6148, 'loss/train': 2.696833610534668} +02/24/2022 09:31:46 - INFO - codeparrot_training - Step 6149: {'lr': 0.0004908389813842894, 'samples': 3148800, 'steps': 6149, 'loss/train': 2.525071144104004} +02/24/2022 09:31:50 - INFO - codeparrot_training - Step 6150: {'lr': 0.0004908345920254949, 'samples': 3149312, 'steps': 6150, 'loss/train': 2.4214484691619873} +02/24/2022 09:31:55 - INFO - codeparrot_training - Step 6151: {'lr': 0.0004908302016350416, 'samples': 3149824, 'steps': 6151, 'loss/train': 2.583103656768799} +02/24/2022 09:31:59 - INFO - codeparrot_training - Step 6152: {'lr': 0.0004908258102129481, 'samples': 3150336, 'steps': 6152, 'loss/train': 1.8836390972137451} +02/24/2022 09:32:04 - INFO - codeparrot_training - Step 6153: {'lr': 0.0004908214177592334, 'samples': 3150848, 'steps': 6153, 'loss/train': 3.045994997024536} +02/24/2022 09:32:08 - INFO - codeparrot_training - Step 6154: {'lr': 0.000490817024273916, 'samples': 3151360, 'steps': 6154, 'loss/train': 0.5500620007514954} +02/24/2022 09:32:13 - INFO - codeparrot_training - Step 6155: {'lr': 0.0004908126297570152, 'samples': 3151872, 'steps': 6155, 'loss/train': 2.1244826316833496} +02/24/2022 09:32:17 - INFO - codeparrot_training - Step 6156: {'lr': 0.0004908082342085494, 'samples': 3152384, 'steps': 6156, 'loss/train': 2.3692545890808105} +02/24/2022 09:32:22 - INFO - codeparrot_training - Step 6157: {'lr': 0.0004908038376285375, 'samples': 3152896, 'steps': 6157, 'loss/train': 4.187961578369141} +02/24/2022 09:32:26 - INFO - codeparrot_training - Step 6158: {'lr': 0.0004907994400169986, 'samples': 3153408, 'steps': 6158, 'loss/train': 2.536123514175415} +02/24/2022 09:32:33 - INFO - codeparrot_training - Step 6159: {'lr': 0.0004907950413739514, 'samples': 3153920, 'steps': 6159, 'loss/train': 2.9844295978546143} +02/24/2022 09:32:37 - INFO - codeparrot_training - Step 6160: {'lr': 0.0004907906416994146, 'samples': 3154432, 'steps': 6160, 'loss/train': 2.656822443008423} +02/24/2022 09:32:42 - INFO - codeparrot_training - Step 6161: {'lr': 0.0004907862409934071, 'samples': 3154944, 'steps': 6161, 'loss/train': 3.5315799713134766} +02/24/2022 09:32:46 - INFO - codeparrot_training - Step 6162: {'lr': 0.0004907818392559479, 'samples': 3155456, 'steps': 6162, 'loss/train': 2.341357707977295} +02/24/2022 09:32:51 - INFO - codeparrot_training - Step 6163: {'lr': 0.0004907774364870557, 'samples': 3155968, 'steps': 6163, 'loss/train': 2.1793057918548584} +02/24/2022 09:32:55 - INFO - codeparrot_training - Step 6164: {'lr': 0.0004907730326867495, 'samples': 3156480, 'steps': 6164, 'loss/train': 2.491849422454834} +02/24/2022 09:33:00 - INFO - codeparrot_training - Step 6165: {'lr': 0.0004907686278550479, 'samples': 3156992, 'steps': 6165, 'loss/train': 1.4215545654296875} +02/24/2022 09:33:04 - INFO - codeparrot_training - Step 6166: {'lr': 0.0004907642219919701, 'samples': 3157504, 'steps': 6166, 'loss/train': 2.2843611240386963} +02/24/2022 09:33:11 - INFO - codeparrot_training - Step 6167: {'lr': 0.0004907598150975348, 'samples': 3158016, 'steps': 6167, 'loss/train': 3.4909417629241943} +02/24/2022 09:33:14 - INFO - codeparrot_training - Step 6168: {'lr': 0.0004907554071717609, 'samples': 3158528, 'steps': 6168, 'loss/train': 1.2315435409545898} +02/24/2022 09:33:20 - INFO - codeparrot_training - Step 6169: {'lr': 0.0004907509982146673, 'samples': 3159040, 'steps': 6169, 'loss/train': 3.2568936347961426} +02/24/2022 09:33:23 - INFO - codeparrot_training - Step 6170: {'lr': 0.0004907465882262728, 'samples': 3159552, 'steps': 6170, 'loss/train': 1.9939494132995605} +02/24/2022 09:33:29 - INFO - codeparrot_training - Step 6171: {'lr': 0.0004907421772065965, 'samples': 3160064, 'steps': 6171, 'loss/train': 3.2389276027679443} +02/24/2022 09:33:32 - INFO - codeparrot_training - Step 6172: {'lr': 0.000490737765155657, 'samples': 3160576, 'steps': 6172, 'loss/train': 2.159992218017578} +02/24/2022 09:33:38 - INFO - codeparrot_training - Step 6173: {'lr': 0.0004907333520734734, 'samples': 3161088, 'steps': 6173, 'loss/train': 1.7988319396972656} +02/24/2022 09:33:41 - INFO - codeparrot_training - Step 6174: {'lr': 0.0004907289379600646, 'samples': 3161600, 'steps': 6174, 'loss/train': 1.812206506729126} +02/24/2022 09:33:47 - INFO - codeparrot_training - Step 6175: {'lr': 0.0004907245228154495, 'samples': 3162112, 'steps': 6175, 'loss/train': 1.9122754335403442} +02/24/2022 09:33:50 - INFO - codeparrot_training - Step 6176: {'lr': 0.0004907201066396469, 'samples': 3162624, 'steps': 6176, 'loss/train': 0.5126199126243591} +02/24/2022 09:33:58 - INFO - codeparrot_training - Step 6177: {'lr': 0.0004907156894326758, 'samples': 3163136, 'steps': 6177, 'loss/train': 2.3578402996063232} +02/24/2022 09:34:01 - INFO - codeparrot_training - Step 6178: {'lr': 0.0004907112711945552, 'samples': 3163648, 'steps': 6178, 'loss/train': 1.8555666208267212} +02/24/2022 09:34:07 - INFO - codeparrot_training - Step 6179: {'lr': 0.000490706851925304, 'samples': 3164160, 'steps': 6179, 'loss/train': 2.283424139022827} +02/24/2022 09:34:10 - INFO - codeparrot_training - Step 6180: {'lr': 0.0004907024316249408, 'samples': 3164672, 'steps': 6180, 'loss/train': 2.6915483474731445} +02/24/2022 09:34:15 - INFO - codeparrot_training - Step 6181: {'lr': 0.0004906980102934852, 'samples': 3165184, 'steps': 6181, 'loss/train': 2.5719361305236816} +02/24/2022 09:34:21 - INFO - codeparrot_training - Step 6182: {'lr': 0.0004906935879309555, 'samples': 3165696, 'steps': 6182, 'loss/train': 1.5978578329086304} +02/24/2022 09:34:24 - INFO - codeparrot_training - Step 6183: {'lr': 0.0004906891645373709, 'samples': 3166208, 'steps': 6183, 'loss/train': 1.8750826120376587} +02/24/2022 09:34:30 - INFO - codeparrot_training - Step 6184: {'lr': 0.0004906847401127504, 'samples': 3166720, 'steps': 6184, 'loss/train': 1.9642512798309326} +02/24/2022 09:34:33 - INFO - codeparrot_training - Step 6185: {'lr': 0.0004906803146571129, 'samples': 3167232, 'steps': 6185, 'loss/train': 2.1332035064697266} +02/24/2022 09:34:39 - INFO - codeparrot_training - Step 6186: {'lr': 0.0004906758881704774, 'samples': 3167744, 'steps': 6186, 'loss/train': 2.0426807403564453} +02/24/2022 09:34:42 - INFO - codeparrot_training - Step 6187: {'lr': 0.0004906714606528628, 'samples': 3168256, 'steps': 6187, 'loss/train': 3.2241833209991455} +02/24/2022 09:34:50 - INFO - codeparrot_training - Step 6188: {'lr': 0.0004906670321042881, 'samples': 3168768, 'steps': 6188, 'loss/train': 3.909959316253662} +02/24/2022 09:34:53 - INFO - codeparrot_training - Step 6189: {'lr': 0.0004906626025247722, 'samples': 3169280, 'steps': 6189, 'loss/train': 4.036428451538086} +02/24/2022 09:34:59 - INFO - codeparrot_training - Step 6190: {'lr': 0.000490658171914334, 'samples': 3169792, 'steps': 6190, 'loss/train': 2.8523690700531006} +02/24/2022 09:35:02 - INFO - codeparrot_training - Step 6191: {'lr': 0.0004906537402729928, 'samples': 3170304, 'steps': 6191, 'loss/train': 2.8157458305358887} +02/24/2022 09:35:08 - INFO - codeparrot_training - Step 6192: {'lr': 0.0004906493076007675, 'samples': 3170816, 'steps': 6192, 'loss/train': 2.9344265460968018} +02/24/2022 09:35:12 - INFO - codeparrot_training - Step 6193: {'lr': 0.0004906448738976768, 'samples': 3171328, 'steps': 6193, 'loss/train': 2.2063586711883545} +02/24/2022 09:35:17 - INFO - codeparrot_training - Step 6194: {'lr': 0.0004906404391637397, 'samples': 3171840, 'steps': 6194, 'loss/train': 3.1388185024261475} +02/24/2022 09:35:21 - INFO - codeparrot_training - Step 6195: {'lr': 0.0004906360033989758, 'samples': 3172352, 'steps': 6195, 'loss/train': 3.407362461090088} +02/24/2022 09:35:26 - INFO - codeparrot_training - Step 6196: {'lr': 0.0004906315666034034, 'samples': 3172864, 'steps': 6196, 'loss/train': 2.4597115516662598} +02/24/2022 09:35:30 - INFO - codeparrot_training - Step 6197: {'lr': 0.0004906271287770418, 'samples': 3173376, 'steps': 6197, 'loss/train': 2.4107017517089844} +02/24/2022 09:35:35 - INFO - codeparrot_training - Step 6198: {'lr': 0.00049062268991991, 'samples': 3173888, 'steps': 6198, 'loss/train': 2.347597599029541} +02/24/2022 09:35:39 - INFO - codeparrot_training - Step 6199: {'lr': 0.0004906182500320269, 'samples': 3174400, 'steps': 6199, 'loss/train': 2.123532772064209} +02/24/2022 09:35:44 - INFO - codeparrot_training - Step 6200: {'lr': 0.0004906138091134118, 'samples': 3174912, 'steps': 6200, 'loss/train': 3.8120334148406982} +02/24/2022 09:35:48 - INFO - codeparrot_training - Step 6201: {'lr': 0.0004906093671640836, 'samples': 3175424, 'steps': 6201, 'loss/train': 3.379196882247925} +02/24/2022 09:35:51 - INFO - codeparrot_training - Step 6202: {'lr': 0.0004906049241840612, 'samples': 3175936, 'steps': 6202, 'loss/train': 2.8560476303100586} +02/24/2022 09:35:59 - INFO - codeparrot_training - Step 6203: {'lr': 0.0004906004801733635, 'samples': 3176448, 'steps': 6203, 'loss/train': 2.3828630447387695} +02/24/2022 09:36:04 - INFO - codeparrot_training - Step 6204: {'lr': 0.0004905960351320099, 'samples': 3176960, 'steps': 6204, 'loss/train': 1.625923752784729} +02/24/2022 09:36:07 - INFO - codeparrot_training - Step 6205: {'lr': 0.0004905915890600194, 'samples': 3177472, 'steps': 6205, 'loss/train': 0.898340106010437} +02/24/2022 09:36:13 - INFO - codeparrot_training - Step 6206: {'lr': 0.0004905871419574107, 'samples': 3177984, 'steps': 6206, 'loss/train': 3.0943093299865723} +02/24/2022 09:36:16 - INFO - codeparrot_training - Step 6207: {'lr': 0.0004905826938242032, 'samples': 3178496, 'steps': 6207, 'loss/train': 2.0490376949310303} +02/24/2022 09:36:22 - INFO - codeparrot_training - Step 6208: {'lr': 0.0004905782446604158, 'samples': 3179008, 'steps': 6208, 'loss/train': 1.7287081480026245} +02/24/2022 09:36:25 - INFO - codeparrot_training - Step 6209: {'lr': 0.0004905737944660676, 'samples': 3179520, 'steps': 6209, 'loss/train': 2.7915098667144775} +02/24/2022 09:36:31 - INFO - codeparrot_training - Step 6210: {'lr': 0.0004905693432411777, 'samples': 3180032, 'steps': 6210, 'loss/train': 3.150730609893799} +02/24/2022 09:36:35 - INFO - codeparrot_training - Step 6211: {'lr': 0.0004905648909857652, 'samples': 3180544, 'steps': 6211, 'loss/train': 3.344449520111084} +02/24/2022 09:36:40 - INFO - codeparrot_training - Step 6212: {'lr': 0.0004905604376998489, 'samples': 3181056, 'steps': 6212, 'loss/train': 2.2071542739868164} +02/24/2022 09:36:44 - INFO - codeparrot_training - Step 6213: {'lr': 0.0004905559833834482, 'samples': 3181568, 'steps': 6213, 'loss/train': 1.9531646966934204} +02/24/2022 09:36:51 - INFO - codeparrot_training - Step 6214: {'lr': 0.000490551528036582, 'samples': 3182080, 'steps': 6214, 'loss/train': 2.1269495487213135} +02/24/2022 09:36:54 - INFO - codeparrot_training - Step 6215: {'lr': 0.0004905470716592695, 'samples': 3182592, 'steps': 6215, 'loss/train': 2.228341579437256} +02/24/2022 09:37:00 - INFO - codeparrot_training - Step 6216: {'lr': 0.0004905426142515296, 'samples': 3183104, 'steps': 6216, 'loss/train': 0.1706215888261795} +02/24/2022 09:37:03 - INFO - codeparrot_training - Step 6217: {'lr': 0.0004905381558133817, 'samples': 3183616, 'steps': 6217, 'loss/train': 2.2180795669555664} +02/24/2022 09:37:09 - INFO - codeparrot_training - Step 6218: {'lr': 0.0004905336963448446, 'samples': 3184128, 'steps': 6218, 'loss/train': 2.185758352279663} +02/24/2022 09:37:12 - INFO - codeparrot_training - Step 6219: {'lr': 0.0004905292358459375, 'samples': 3184640, 'steps': 6219, 'loss/train': 3.3639838695526123} +02/24/2022 09:37:18 - INFO - codeparrot_training - Step 6220: {'lr': 0.0004905247743166796, 'samples': 3185152, 'steps': 6220, 'loss/train': 1.8612608909606934} +02/24/2022 09:37:21 - INFO - codeparrot_training - Step 6221: {'lr': 0.0004905203117570899, 'samples': 3185664, 'steps': 6221, 'loss/train': 2.313664436340332} +02/24/2022 09:37:27 - INFO - codeparrot_training - Step 6222: {'lr': 0.0004905158481671876, 'samples': 3186176, 'steps': 6222, 'loss/train': 1.283319354057312} +02/24/2022 09:37:30 - INFO - codeparrot_training - Step 6223: {'lr': 0.0004905113835469918, 'samples': 3186688, 'steps': 6223, 'loss/train': 0.3306739330291748} +02/24/2022 09:37:38 - INFO - codeparrot_training - Step 6224: {'lr': 0.0004905069178965214, 'samples': 3187200, 'steps': 6224, 'loss/train': 0.4955524504184723} +02/24/2022 09:37:41 - INFO - codeparrot_training - Step 6225: {'lr': 0.0004905024512157959, 'samples': 3187712, 'steps': 6225, 'loss/train': 1.918975591659546} +02/24/2022 09:37:47 - INFO - codeparrot_training - Step 6226: {'lr': 0.0004904979835048343, 'samples': 3188224, 'steps': 6226, 'loss/train': 2.428847074508667} +02/24/2022 09:37:50 - INFO - codeparrot_training - Step 6227: {'lr': 0.0004904935147636557, 'samples': 3188736, 'steps': 6227, 'loss/train': 1.7641596794128418} +02/24/2022 09:37:56 - INFO - codeparrot_training - Step 6228: {'lr': 0.0004904890449922792, 'samples': 3189248, 'steps': 6228, 'loss/train': 2.264202117919922} +02/24/2022 09:37:59 - INFO - codeparrot_training - Step 6229: {'lr': 0.0004904845741907241, 'samples': 3189760, 'steps': 6229, 'loss/train': 2.421705961227417} +02/24/2022 09:38:05 - INFO - codeparrot_training - Step 6230: {'lr': 0.0004904801023590094, 'samples': 3190272, 'steps': 6230, 'loss/train': 0.2653506398200989} +02/24/2022 09:38:08 - INFO - codeparrot_training - Step 6231: {'lr': 0.0004904756294971541, 'samples': 3190784, 'steps': 6231, 'loss/train': 2.5598371028900146} +02/24/2022 09:38:14 - INFO - codeparrot_training - Step 6232: {'lr': 0.0004904711556051778, 'samples': 3191296, 'steps': 6232, 'loss/train': 3.231922149658203} +02/24/2022 09:38:17 - INFO - codeparrot_training - Step 6233: {'lr': 0.0004904666806830992, 'samples': 3191808, 'steps': 6233, 'loss/train': 2.381088972091675} +02/24/2022 09:38:25 - INFO - codeparrot_training - Step 6234: {'lr': 0.0004904622047309379, 'samples': 3192320, 'steps': 6234, 'loss/train': 2.4190409183502197} +02/24/2022 09:38:28 - INFO - codeparrot_training - Step 6235: {'lr': 0.0004904577277487129, 'samples': 3192832, 'steps': 6235, 'loss/train': 2.964531898498535} +02/24/2022 09:38:34 - INFO - codeparrot_training - Step 6236: {'lr': 0.0004904532497364432, 'samples': 3193344, 'steps': 6236, 'loss/train': 2.159140110015869} +02/24/2022 09:38:37 - INFO - codeparrot_training - Step 6237: {'lr': 0.0004904487706941481, 'samples': 3193856, 'steps': 6237, 'loss/train': 1.2483268976211548} +02/24/2022 09:38:43 - INFO - codeparrot_training - Step 6238: {'lr': 0.000490444290621847, 'samples': 3194368, 'steps': 6238, 'loss/train': 3.2070467472076416} +02/24/2022 09:38:47 - INFO - codeparrot_training - Step 6239: {'lr': 0.0004904398095195588, 'samples': 3194880, 'steps': 6239, 'loss/train': 1.5273369550704956} +02/24/2022 09:38:50 - INFO - codeparrot_training - Step 6240: {'lr': 0.0004904353273873028, 'samples': 3195392, 'steps': 6240, 'loss/train': 3.5635406970977783} +02/24/2022 09:38:56 - INFO - codeparrot_training - Step 6241: {'lr': 0.0004904308442250983, 'samples': 3195904, 'steps': 6241, 'loss/train': 2.4514782428741455} +02/24/2022 09:38:59 - INFO - codeparrot_training - Step 6242: {'lr': 0.0004904263600329643, 'samples': 3196416, 'steps': 6242, 'loss/train': 2.590282917022705} +02/24/2022 09:39:05 - INFO - codeparrot_training - Step 6243: {'lr': 0.0004904218748109201, 'samples': 3196928, 'steps': 6243, 'loss/train': 1.0154826641082764} +02/24/2022 09:39:08 - INFO - codeparrot_training - Step 6244: {'lr': 0.000490417388558985, 'samples': 3197440, 'steps': 6244, 'loss/train': 2.410426378250122} +02/24/2022 09:39:14 - INFO - codeparrot_training - Step 6245: {'lr': 0.0004904129012771782, 'samples': 3197952, 'steps': 6245, 'loss/train': 2.0995585918426514} +02/24/2022 09:39:17 - INFO - codeparrot_training - Step 6246: {'lr': 0.0004904084129655188, 'samples': 3198464, 'steps': 6246, 'loss/train': 0.30579087138175964} +02/24/2022 09:39:23 - INFO - codeparrot_training - Step 6247: {'lr': 0.000490403923624026, 'samples': 3198976, 'steps': 6247, 'loss/train': 1.0417059659957886} +02/24/2022 09:39:26 - INFO - codeparrot_training - Step 6248: {'lr': 0.0004903994332527193, 'samples': 3199488, 'steps': 6248, 'loss/train': 2.3782055377960205} +02/24/2022 09:39:31 - INFO - codeparrot_training - Step 6249: {'lr': 0.0004903949418516178, 'samples': 3200000, 'steps': 6249, 'loss/train': 2.0115253925323486} +02/24/2022 09:39:35 - INFO - codeparrot_training - Step 6250: {'lr': 0.0004903904494207405, 'samples': 3200512, 'steps': 6250, 'loss/train': 2.2685794830322266} +02/24/2022 09:39:42 - INFO - codeparrot_training - Step 6251: {'lr': 0.000490385955960107, 'samples': 3201024, 'steps': 6251, 'loss/train': 2.6550660133361816} +02/24/2022 09:39:46 - INFO - codeparrot_training - Step 6252: {'lr': 0.0004903814614697363, 'samples': 3201536, 'steps': 6252, 'loss/train': 3.2062313556671143} +02/24/2022 09:39:51 - INFO - codeparrot_training - Step 6253: {'lr': 0.0004903769659496478, 'samples': 3202048, 'steps': 6253, 'loss/train': 3.9077563285827637} +02/24/2022 09:39:57 - INFO - codeparrot_training - Step 6254: {'lr': 0.0004903724693998607, 'samples': 3202560, 'steps': 6254, 'loss/train': 3.3275156021118164} +02/24/2022 09:40:00 - INFO - codeparrot_training - Step 6255: {'lr': 0.0004903679718203942, 'samples': 3203072, 'steps': 6255, 'loss/train': 1.2602351903915405} +02/24/2022 09:40:04 - INFO - codeparrot_training - Step 6256: {'lr': 0.0004903634732112678, 'samples': 3203584, 'steps': 6256, 'loss/train': 2.4089462757110596} +02/24/2022 09:40:09 - INFO - codeparrot_training - Step 6257: {'lr': 0.0004903589735725004, 'samples': 3204096, 'steps': 6257, 'loss/train': 1.7523692846298218} +02/24/2022 09:40:13 - INFO - codeparrot_training - Step 6258: {'lr': 0.0004903544729041116, 'samples': 3204608, 'steps': 6258, 'loss/train': 2.9210150241851807} +02/24/2022 09:40:18 - INFO - codeparrot_training - Step 6259: {'lr': 0.0004903499712061206, 'samples': 3205120, 'steps': 6259, 'loss/train': 3.1442933082580566} +02/24/2022 09:40:22 - INFO - codeparrot_training - Step 6260: {'lr': 0.0004903454684785465, 'samples': 3205632, 'steps': 6260, 'loss/train': 1.6726661920547485} +02/24/2022 09:40:29 - INFO - codeparrot_training - Step 6261: {'lr': 0.0004903409647214088, 'samples': 3206144, 'steps': 6261, 'loss/train': 2.656792402267456} +02/24/2022 09:40:32 - INFO - codeparrot_training - Step 6262: {'lr': 0.0004903364599347268, 'samples': 3206656, 'steps': 6262, 'loss/train': 2.310574769973755} +02/24/2022 09:40:38 - INFO - codeparrot_training - Step 6263: {'lr': 0.0004903319541185196, 'samples': 3207168, 'steps': 6263, 'loss/train': 2.427889823913574} +02/24/2022 09:40:41 - INFO - codeparrot_training - Step 6264: {'lr': 0.0004903274472728067, 'samples': 3207680, 'steps': 6264, 'loss/train': 2.4989237785339355} +02/24/2022 09:40:47 - INFO - codeparrot_training - Step 6265: {'lr': 0.0004903229393976073, 'samples': 3208192, 'steps': 6265, 'loss/train': 3.0231387615203857} +02/24/2022 09:40:50 - INFO - codeparrot_training - Step 6266: {'lr': 0.0004903184304929408, 'samples': 3208704, 'steps': 6266, 'loss/train': 2.4457943439483643} +02/24/2022 09:40:56 - INFO - codeparrot_training - Step 6267: {'lr': 0.0004903139205588264, 'samples': 3209216, 'steps': 6267, 'loss/train': 1.440609097480774} +02/24/2022 09:40:59 - INFO - codeparrot_training - Step 6268: {'lr': 0.0004903094095952834, 'samples': 3209728, 'steps': 6268, 'loss/train': 2.4216387271881104} +02/24/2022 09:41:07 - INFO - codeparrot_training - Step 6269: {'lr': 0.0004903048976023313, 'samples': 3210240, 'steps': 6269, 'loss/train': 1.8143019676208496} +02/24/2022 09:41:12 - INFO - codeparrot_training - Step 6270: {'lr': 0.0004903003845799893, 'samples': 3210752, 'steps': 6270, 'loss/train': 2.3480634689331055} +02/24/2022 09:41:16 - INFO - codeparrot_training - Step 6271: {'lr': 0.0004902958705282767, 'samples': 3211264, 'steps': 6271, 'loss/train': 0.1486256718635559} +02/24/2022 09:41:21 - INFO - codeparrot_training - Step 6272: {'lr': 0.000490291355447213, 'samples': 3211776, 'steps': 6272, 'loss/train': 1.9411009550094604} +02/24/2022 09:41:25 - INFO - codeparrot_training - Step 6273: {'lr': 0.0004902868393368174, 'samples': 3212288, 'steps': 6273, 'loss/train': 2.6457629203796387} +02/24/2022 09:41:30 - INFO - codeparrot_training - Step 6274: {'lr': 0.0004902823221971092, 'samples': 3212800, 'steps': 6274, 'loss/train': 3.7133326530456543} +02/24/2022 09:41:34 - INFO - codeparrot_training - Step 6275: {'lr': 0.000490277804028108, 'samples': 3213312, 'steps': 6275, 'loss/train': 2.910630226135254} +02/24/2022 09:41:39 - INFO - codeparrot_training - Step 6276: {'lr': 0.0004902732848298328, 'samples': 3213824, 'steps': 6276, 'loss/train': 2.3521180152893066} +02/24/2022 09:41:43 - INFO - codeparrot_training - Step 6277: {'lr': 0.0004902687646023032, 'samples': 3214336, 'steps': 6277, 'loss/train': 3.0850539207458496} +02/24/2022 09:41:49 - INFO - codeparrot_training - Step 6278: {'lr': 0.0004902642433455385, 'samples': 3214848, 'steps': 6278, 'loss/train': 1.6708606481552124} +02/24/2022 09:41:52 - INFO - codeparrot_training - Step 6279: {'lr': 0.0004902597210595581, 'samples': 3215360, 'steps': 6279, 'loss/train': 2.247377872467041} +02/24/2022 09:41:58 - INFO - codeparrot_training - Step 6280: {'lr': 0.0004902551977443813, 'samples': 3215872, 'steps': 6280, 'loss/train': 1.9600398540496826} +02/24/2022 09:42:01 - INFO - codeparrot_training - Step 6281: {'lr': 0.0004902506734000276, 'samples': 3216384, 'steps': 6281, 'loss/train': 1.8372225761413574} +02/24/2022 09:42:07 - INFO - codeparrot_training - Step 6282: {'lr': 0.0004902461480265163, 'samples': 3216896, 'steps': 6282, 'loss/train': 2.0583794116973877} +02/24/2022 09:42:10 - INFO - codeparrot_training - Step 6283: {'lr': 0.0004902416216238667, 'samples': 3217408, 'steps': 6283, 'loss/train': 2.1329660415649414} +02/24/2022 09:42:16 - INFO - codeparrot_training - Step 6284: {'lr': 0.0004902370941920984, 'samples': 3217920, 'steps': 6284, 'loss/train': 2.5528974533081055} +02/24/2022 09:42:19 - INFO - codeparrot_training - Step 6285: {'lr': 0.0004902325657312306, 'samples': 3218432, 'steps': 6285, 'loss/train': 3.1858463287353516} +02/24/2022 09:42:27 - INFO - codeparrot_training - Step 6286: {'lr': 0.0004902280362412828, 'samples': 3218944, 'steps': 6286, 'loss/train': 0.1197754368185997} +02/24/2022 09:42:30 - INFO - codeparrot_training - Step 6287: {'lr': 0.0004902235057222743, 'samples': 3219456, 'steps': 6287, 'loss/train': 2.2900893688201904} +02/24/2022 09:42:36 - INFO - codeparrot_training - Step 6288: {'lr': 0.0004902189741742246, 'samples': 3219968, 'steps': 6288, 'loss/train': 2.8684144020080566} +02/24/2022 09:42:39 - INFO - codeparrot_training - Step 6289: {'lr': 0.0004902144415971532, 'samples': 3220480, 'steps': 6289, 'loss/train': 2.5648696422576904} +02/24/2022 09:42:45 - INFO - codeparrot_training - Step 6290: {'lr': 0.0004902099079910794, 'samples': 3220992, 'steps': 6290, 'loss/train': 2.3180229663848877} +02/24/2022 09:42:48 - INFO - codeparrot_training - Step 6291: {'lr': 0.0004902053733560225, 'samples': 3221504, 'steps': 6291, 'loss/train': 1.8863275051116943} +02/24/2022 09:42:54 - INFO - codeparrot_training - Step 6292: {'lr': 0.0004902008376920021, 'samples': 3222016, 'steps': 6292, 'loss/train': 2.350266695022583} +02/24/2022 09:42:57 - INFO - codeparrot_training - Step 6293: {'lr': 0.0004901963009990376, 'samples': 3222528, 'steps': 6293, 'loss/train': 1.3136851787567139} +02/24/2022 09:43:02 - INFO - codeparrot_training - Step 6294: {'lr': 0.0004901917632771485, 'samples': 3223040, 'steps': 6294, 'loss/train': 4.365197658538818} +02/24/2022 09:43:06 - INFO - codeparrot_training - Step 6295: {'lr': 0.000490187224526354, 'samples': 3223552, 'steps': 6295, 'loss/train': 0.6939431428909302} +02/24/2022 09:43:13 - INFO - codeparrot_training - Step 6296: {'lr': 0.0004901826847466738, 'samples': 3224064, 'steps': 6296, 'loss/train': 1.7623052597045898} +02/24/2022 09:43:17 - INFO - codeparrot_training - Step 6297: {'lr': 0.0004901781439381272, 'samples': 3224576, 'steps': 6297, 'loss/train': 1.0246156454086304} +02/24/2022 09:43:22 - INFO - codeparrot_training - Step 6298: {'lr': 0.0004901736021007337, 'samples': 3225088, 'steps': 6298, 'loss/train': 3.066565752029419} +02/24/2022 09:43:26 - INFO - codeparrot_training - Step 6299: {'lr': 0.0004901690592345127, 'samples': 3225600, 'steps': 6299, 'loss/train': 2.188586711883545} +02/24/2022 09:43:31 - INFO - codeparrot_training - Step 6300: {'lr': 0.0004901645153394838, 'samples': 3226112, 'steps': 6300, 'loss/train': 1.838338851928711} +02/24/2022 09:43:35 - INFO - codeparrot_training - Step 6301: {'lr': 0.0004901599704156664, 'samples': 3226624, 'steps': 6301, 'loss/train': 2.1207802295684814} +02/24/2022 09:43:40 - INFO - codeparrot_training - Step 6302: {'lr': 0.00049015542446308, 'samples': 3227136, 'steps': 6302, 'loss/train': 3.0751938819885254} +02/24/2022 09:43:44 - INFO - codeparrot_training - Step 6303: {'lr': 0.0004901508774817438, 'samples': 3227648, 'steps': 6303, 'loss/train': 2.528035879135132} +02/24/2022 09:43:49 - INFO - codeparrot_training - Step 6304: {'lr': 0.0004901463294716776, 'samples': 3228160, 'steps': 6304, 'loss/train': 2.6392056941986084} +02/24/2022 09:43:53 - INFO - codeparrot_training - Step 6305: {'lr': 0.0004901417804329008, 'samples': 3228672, 'steps': 6305, 'loss/train': 2.7946455478668213} +02/24/2022 09:44:00 - INFO - codeparrot_training - Step 6306: {'lr': 0.0004901372303654329, 'samples': 3229184, 'steps': 6306, 'loss/train': 2.7061655521392822} +02/24/2022 09:44:03 - INFO - codeparrot_training - Step 6307: {'lr': 0.0004901326792692934, 'samples': 3229696, 'steps': 6307, 'loss/train': 2.344365358352661} +02/24/2022 09:44:09 - INFO - codeparrot_training - Step 6308: {'lr': 0.0004901281271445016, 'samples': 3230208, 'steps': 6308, 'loss/train': 2.365645170211792} +02/24/2022 09:44:13 - INFO - codeparrot_training - Step 6309: {'lr': 0.0004901235739910772, 'samples': 3230720, 'steps': 6309, 'loss/train': 2.601585626602173} +02/24/2022 09:44:18 - INFO - codeparrot_training - Step 6310: {'lr': 0.0004901190198090397, 'samples': 3231232, 'steps': 6310, 'loss/train': 3.2509396076202393} +02/24/2022 09:44:22 - INFO - codeparrot_training - Step 6311: {'lr': 0.0004901144645984086, 'samples': 3231744, 'steps': 6311, 'loss/train': 0.9778910279273987} +02/24/2022 09:44:27 - INFO - codeparrot_training - Step 6312: {'lr': 0.0004901099083592034, 'samples': 3232256, 'steps': 6312, 'loss/train': 2.777360677719116} +02/24/2022 09:44:31 - INFO - codeparrot_training - Step 6313: {'lr': 0.0004901053510914434, 'samples': 3232768, 'steps': 6313, 'loss/train': 2.0888760089874268} +02/24/2022 09:44:36 - INFO - codeparrot_training - Step 6314: {'lr': 0.0004901007927951485, 'samples': 3233280, 'steps': 6314, 'loss/train': 2.6442201137542725} +02/24/2022 09:44:43 - INFO - codeparrot_training - Step 6315: {'lr': 0.000490096233470338, 'samples': 3233792, 'steps': 6315, 'loss/train': 2.640913724899292} +02/24/2022 09:44:47 - INFO - codeparrot_training - Step 6316: {'lr': 0.0004900916731170314, 'samples': 3234304, 'steps': 6316, 'loss/train': 2.3063735961914062} +02/24/2022 09:44:50 - INFO - codeparrot_training - Step 6317: {'lr': 0.0004900871117352484, 'samples': 3234816, 'steps': 6317, 'loss/train': 2.2915847301483154} +02/24/2022 09:44:56 - INFO - codeparrot_training - Step 6318: {'lr': 0.0004900825493250084, 'samples': 3235328, 'steps': 6318, 'loss/train': 2.087425708770752} +02/24/2022 09:44:59 - INFO - codeparrot_training - Step 6319: {'lr': 0.000490077985886331, 'samples': 3235840, 'steps': 6319, 'loss/train': 2.266817569732666} +02/24/2022 09:45:05 - INFO - codeparrot_training - Step 6320: {'lr': 0.0004900734214192358, 'samples': 3236352, 'steps': 6320, 'loss/train': 0.4193190336227417} +02/24/2022 09:45:08 - INFO - codeparrot_training - Step 6321: {'lr': 0.0004900688559237422, 'samples': 3236864, 'steps': 6321, 'loss/train': 2.32072377204895} +02/24/2022 09:45:14 - INFO - codeparrot_training - Step 6322: {'lr': 0.0004900642893998699, 'samples': 3237376, 'steps': 6322, 'loss/train': 1.146812081336975} +02/24/2022 09:45:17 - INFO - codeparrot_training - Step 6323: {'lr': 0.0004900597218476385, 'samples': 3237888, 'steps': 6323, 'loss/train': 3.1454150676727295} +02/24/2022 09:45:23 - INFO - codeparrot_training - Step 6324: {'lr': 0.0004900551532670673, 'samples': 3238400, 'steps': 6324, 'loss/train': 2.032609701156616} +02/24/2022 09:45:26 - INFO - codeparrot_training - Step 6325: {'lr': 0.0004900505836581763, 'samples': 3238912, 'steps': 6325, 'loss/train': 2.3087079524993896} +02/24/2022 09:45:32 - INFO - codeparrot_training - Step 6326: {'lr': 0.0004900460130209845, 'samples': 3239424, 'steps': 6326, 'loss/train': 2.2144553661346436} +02/24/2022 09:45:35 - INFO - codeparrot_training - Step 6327: {'lr': 0.000490041441355512, 'samples': 3239936, 'steps': 6327, 'loss/train': 3.5378143787384033} +02/24/2022 09:45:41 - INFO - codeparrot_training - Step 6328: {'lr': 0.0004900368686617783, 'samples': 3240448, 'steps': 6328, 'loss/train': 2.095506191253662} +02/24/2022 09:45:44 - INFO - codeparrot_training - Step 6329: {'lr': 0.0004900322949398026, 'samples': 3240960, 'steps': 6329, 'loss/train': 2.1152613162994385} +02/24/2022 09:45:50 - INFO - codeparrot_training - Step 6330: {'lr': 0.000490027720189605, 'samples': 3241472, 'steps': 6330, 'loss/train': 2.2639265060424805} +02/24/2022 09:45:53 - INFO - codeparrot_training - Step 6331: {'lr': 0.0004900231444112047, 'samples': 3241984, 'steps': 6331, 'loss/train': 1.6293816566467285} +02/24/2022 09:45:59 - INFO - codeparrot_training - Step 6332: {'lr': 0.0004900185676046214, 'samples': 3242496, 'steps': 6332, 'loss/train': 2.488816022872925} +02/24/2022 09:46:06 - INFO - codeparrot_training - Step 6333: {'lr': 0.0004900139897698751, 'samples': 3243008, 'steps': 6333, 'loss/train': 2.4937851428985596} +02/24/2022 09:46:10 - INFO - codeparrot_training - Step 6334: {'lr': 0.0004900094109069848, 'samples': 3243520, 'steps': 6334, 'loss/train': 1.6818186044692993} +02/24/2022 09:46:15 - INFO - codeparrot_training - Step 6335: {'lr': 0.0004900048310159705, 'samples': 3244032, 'steps': 6335, 'loss/train': 2.6479344367980957} +02/24/2022 09:46:19 - INFO - codeparrot_training - Step 6336: {'lr': 0.0004900002500968516, 'samples': 3244544, 'steps': 6336, 'loss/train': 2.1148204803466797} +02/24/2022 09:46:24 - INFO - codeparrot_training - Step 6337: {'lr': 0.000489995668149648, 'samples': 3245056, 'steps': 6337, 'loss/train': 2.4322540760040283} +02/24/2022 09:46:27 - INFO - codeparrot_training - Step 6338: {'lr': 0.0004899910851743791, 'samples': 3245568, 'steps': 6338, 'loss/train': 1.680912971496582} +02/24/2022 09:46:33 - INFO - codeparrot_training - Step 6339: {'lr': 0.0004899865011710646, 'samples': 3246080, 'steps': 6339, 'loss/train': 1.9332932233810425} +02/24/2022 09:46:37 - INFO - codeparrot_training - Step 6340: {'lr': 0.0004899819161397241, 'samples': 3246592, 'steps': 6340, 'loss/train': 8.981110572814941} +02/24/2022 09:46:42 - INFO - codeparrot_training - Step 6341: {'lr': 0.0004899773300803774, 'samples': 3247104, 'steps': 6341, 'loss/train': 2.545008659362793} +02/24/2022 09:46:46 - INFO - codeparrot_training - Step 6342: {'lr': 0.0004899727429930438, 'samples': 3247616, 'steps': 6342, 'loss/train': 2.2221126556396484} +02/24/2022 09:46:53 - INFO - codeparrot_training - Step 6343: {'lr': 0.0004899681548777434, 'samples': 3248128, 'steps': 6343, 'loss/train': 2.2362680435180664} +02/24/2022 09:46:57 - INFO - codeparrot_training - Step 6344: {'lr': 0.0004899635657344954, 'samples': 3248640, 'steps': 6344, 'loss/train': 2.0811617374420166} +02/24/2022 09:47:02 - INFO - codeparrot_training - Step 6345: {'lr': 0.0004899589755633198, 'samples': 3249152, 'steps': 6345, 'loss/train': 2.934920310974121} +02/24/2022 09:47:06 - INFO - codeparrot_training - Step 6346: {'lr': 0.0004899543843642362, 'samples': 3249664, 'steps': 6346, 'loss/train': 3.1778531074523926} +02/24/2022 09:47:11 - INFO - codeparrot_training - Step 6347: {'lr': 0.0004899497921372641, 'samples': 3250176, 'steps': 6347, 'loss/train': 2.080864429473877} +02/24/2022 09:47:15 - INFO - codeparrot_training - Step 6348: {'lr': 0.0004899451988824233, 'samples': 3250688, 'steps': 6348, 'loss/train': 1.7839866876602173} +02/24/2022 09:47:20 - INFO - codeparrot_training - Step 6349: {'lr': 0.0004899406045997336, 'samples': 3251200, 'steps': 6349, 'loss/train': 2.041637420654297} +02/24/2022 09:47:23 - INFO - codeparrot_training - Step 6350: {'lr': 0.0004899360092892143, 'samples': 3251712, 'steps': 6350, 'loss/train': 2.313155174255371} +02/24/2022 09:47:29 - INFO - codeparrot_training - Step 6351: {'lr': 0.0004899314129508855, 'samples': 3252224, 'steps': 6351, 'loss/train': 1.9384204149246216} +02/24/2022 09:47:32 - INFO - codeparrot_training - Step 6352: {'lr': 0.0004899268155847667, 'samples': 3252736, 'steps': 6352, 'loss/train': 3.5875790119171143} +02/24/2022 09:47:40 - INFO - codeparrot_training - Step 6353: {'lr': 0.0004899222171908776, 'samples': 3253248, 'steps': 6353, 'loss/train': 1.1855950355529785} +02/24/2022 09:47:43 - INFO - codeparrot_training - Step 6354: {'lr': 0.0004899176177692379, 'samples': 3253760, 'steps': 6354, 'loss/train': 1.4508904218673706} +02/24/2022 09:47:49 - INFO - codeparrot_training - Step 6355: {'lr': 0.0004899130173198672, 'samples': 3254272, 'steps': 6355, 'loss/train': 2.6990723609924316} +02/24/2022 09:47:52 - INFO - codeparrot_training - Step 6356: {'lr': 0.0004899084158427855, 'samples': 3254784, 'steps': 6356, 'loss/train': 3.017900228500366} +02/24/2022 09:47:58 - INFO - codeparrot_training - Step 6357: {'lr': 0.0004899038133380121, 'samples': 3255296, 'steps': 6357, 'loss/train': 1.9416550397872925} +02/24/2022 09:48:01 - INFO - codeparrot_training - Step 6358: {'lr': 0.0004898992098055671, 'samples': 3255808, 'steps': 6358, 'loss/train': 1.2342463731765747} +02/24/2022 09:48:07 - INFO - codeparrot_training - Step 6359: {'lr': 0.00048989460524547, 'samples': 3256320, 'steps': 6359, 'loss/train': 2.060485601425171} +02/24/2022 09:48:10 - INFO - codeparrot_training - Step 6360: {'lr': 0.0004898899996577407, 'samples': 3256832, 'steps': 6360, 'loss/train': 2.177398920059204} +02/24/2022 09:48:16 - INFO - codeparrot_training - Step 6361: {'lr': 0.0004898853930423987, 'samples': 3257344, 'steps': 6361, 'loss/train': 2.6436080932617188} +02/24/2022 09:48:19 - INFO - codeparrot_training - Step 6362: {'lr': 0.0004898807853994639, 'samples': 3257856, 'steps': 6362, 'loss/train': 1.580167293548584} +02/24/2022 09:48:26 - INFO - codeparrot_training - Step 6363: {'lr': 0.000489876176728956, 'samples': 3258368, 'steps': 6363, 'loss/train': 3.411489963531494} +02/24/2022 09:48:30 - INFO - codeparrot_training - Step 6364: {'lr': 0.0004898715670308947, 'samples': 3258880, 'steps': 6364, 'loss/train': 2.096208333969116} +02/24/2022 09:48:35 - INFO - codeparrot_training - Step 6365: {'lr': 0.0004898669563052997, 'samples': 3259392, 'steps': 6365, 'loss/train': 2.6656792163848877} +02/24/2022 09:48:39 - INFO - codeparrot_training - Step 6366: {'lr': 0.0004898623445521909, 'samples': 3259904, 'steps': 6366, 'loss/train': 3.165503740310669} +02/24/2022 09:48:44 - INFO - codeparrot_training - Step 6367: {'lr': 0.000489857731771588, 'samples': 3260416, 'steps': 6367, 'loss/train': 2.6996541023254395} +02/24/2022 09:48:48 - INFO - codeparrot_training - Step 6368: {'lr': 0.0004898531179635108, 'samples': 3260928, 'steps': 6368, 'loss/train': 2.4503157138824463} +02/24/2022 09:48:53 - INFO - codeparrot_training - Step 6369: {'lr': 0.0004898485031279788, 'samples': 3261440, 'steps': 6369, 'loss/train': 4.438111782073975} +02/24/2022 09:48:57 - INFO - codeparrot_training - Step 6370: {'lr': 0.0004898438872650121, 'samples': 3261952, 'steps': 6370, 'loss/train': 2.9785115718841553} +02/24/2022 09:49:02 - INFO - codeparrot_training - Step 6371: {'lr': 0.0004898392703746304, 'samples': 3262464, 'steps': 6371, 'loss/train': 3.1848602294921875} +02/24/2022 09:49:06 - INFO - codeparrot_training - Step 6372: {'lr': 0.0004898346524568533, 'samples': 3262976, 'steps': 6372, 'loss/train': 3.0334019660949707} +02/24/2022 09:49:11 - INFO - codeparrot_training - Step 6373: {'lr': 0.0004898300335117008, 'samples': 3263488, 'steps': 6373, 'loss/train': 2.1745877265930176} +02/24/2022 09:49:15 - INFO - codeparrot_training - Step 6374: {'lr': 0.0004898254135391926, 'samples': 3264000, 'steps': 6374, 'loss/train': 2.3121368885040283} +02/24/2022 09:49:20 - INFO - codeparrot_training - Step 6375: {'lr': 0.0004898207925393485, 'samples': 3264512, 'steps': 6375, 'loss/train': 2.5151071548461914} +02/24/2022 09:49:24 - INFO - codeparrot_training - Step 6376: {'lr': 0.0004898161705121882, 'samples': 3265024, 'steps': 6376, 'loss/train': 1.7923381328582764} +02/24/2022 09:49:31 - INFO - codeparrot_training - Step 6377: {'lr': 0.0004898115474577315, 'samples': 3265536, 'steps': 6377, 'loss/train': 2.277191638946533} +02/24/2022 09:49:35 - INFO - codeparrot_training - Step 6378: {'lr': 0.0004898069233759985, 'samples': 3266048, 'steps': 6378, 'loss/train': 1.523551106452942} +02/24/2022 09:49:40 - INFO - codeparrot_training - Step 6379: {'lr': 0.0004898022982670085, 'samples': 3266560, 'steps': 6379, 'loss/train': 1.7236496210098267} +02/24/2022 09:49:44 - INFO - codeparrot_training - Step 6380: {'lr': 0.0004897976721307818, 'samples': 3267072, 'steps': 6380, 'loss/train': 2.342064619064331} +02/24/2022 09:49:49 - INFO - codeparrot_training - Step 6381: {'lr': 0.000489793044967338, 'samples': 3267584, 'steps': 6381, 'loss/train': 2.5133140087127686} +02/24/2022 09:49:53 - INFO - codeparrot_training - Step 6382: {'lr': 0.0004897884167766969, 'samples': 3268096, 'steps': 6382, 'loss/train': 2.8323864936828613} +02/24/2022 09:49:58 - INFO - codeparrot_training - Step 6383: {'lr': 0.0004897837875588784, 'samples': 3268608, 'steps': 6383, 'loss/train': 2.454118490219116} +02/24/2022 09:50:02 - INFO - codeparrot_training - Step 6384: {'lr': 0.0004897791573139022, 'samples': 3269120, 'steps': 6384, 'loss/train': 2.2801244258880615} +02/24/2022 09:50:08 - INFO - codeparrot_training - Step 6385: {'lr': 0.0004897745260417884, 'samples': 3269632, 'steps': 6385, 'loss/train': 1.8247528076171875} +02/24/2022 09:50:11 - INFO - codeparrot_training - Step 6386: {'lr': 0.0004897698937425566, 'samples': 3270144, 'steps': 6386, 'loss/train': 1.270604133605957} +02/24/2022 09:50:17 - INFO - codeparrot_training - Step 6387: {'lr': 0.0004897652604162266, 'samples': 3270656, 'steps': 6387, 'loss/train': 1.784490704536438} +02/24/2022 09:50:20 - INFO - codeparrot_training - Step 6388: {'lr': 0.0004897606260628184, 'samples': 3271168, 'steps': 6388, 'loss/train': 2.2898740768432617} +02/24/2022 09:50:28 - INFO - codeparrot_training - Step 6389: {'lr': 0.0004897559906823518, 'samples': 3271680, 'steps': 6389, 'loss/train': 1.523402214050293} +02/24/2022 09:50:31 - INFO - codeparrot_training - Step 6390: {'lr': 0.0004897513542748468, 'samples': 3272192, 'steps': 6390, 'loss/train': 3.0109260082244873} +02/24/2022 09:50:37 - INFO - codeparrot_training - Step 6391: {'lr': 0.0004897467168403231, 'samples': 3272704, 'steps': 6391, 'loss/train': 1.308800220489502} +02/24/2022 09:50:40 - INFO - codeparrot_training - Step 6392: {'lr': 0.0004897420783788006, 'samples': 3273216, 'steps': 6392, 'loss/train': 2.723564624786377} +02/24/2022 09:50:46 - INFO - codeparrot_training - Step 6393: {'lr': 0.0004897374388902991, 'samples': 3273728, 'steps': 6393, 'loss/train': 1.9558881521224976} +02/24/2022 09:50:49 - INFO - codeparrot_training - Step 6394: {'lr': 0.0004897327983748385, 'samples': 3274240, 'steps': 6394, 'loss/train': 0.46763065457344055} +02/24/2022 09:50:55 - INFO - codeparrot_training - Step 6395: {'lr': 0.0004897281568324387, 'samples': 3274752, 'steps': 6395, 'loss/train': 2.242149829864502} +02/24/2022 09:50:58 - INFO - codeparrot_training - Step 6396: {'lr': 0.0004897235142631197, 'samples': 3275264, 'steps': 6396, 'loss/train': 2.9120097160339355} +02/24/2022 09:51:03 - INFO - codeparrot_training - Step 6397: {'lr': 0.0004897188706669012, 'samples': 3275776, 'steps': 6397, 'loss/train': 3.2725422382354736} +02/24/2022 09:51:07 - INFO - codeparrot_training - Step 6398: {'lr': 0.0004897142260438032, 'samples': 3276288, 'steps': 6398, 'loss/train': 0.6915867924690247} +02/24/2022 09:51:14 - INFO - codeparrot_training - Step 6399: {'lr': 0.0004897095803938456, 'samples': 3276800, 'steps': 6399, 'loss/train': 2.606157064437866} +02/24/2022 09:51:18 - INFO - codeparrot_training - Step 6400: {'lr': 0.0004897049337170483, 'samples': 3277312, 'steps': 6400, 'loss/train': 3.485391855239868} +02/24/2022 09:51:23 - INFO - codeparrot_training - Step 6401: {'lr': 0.0004897002860134311, 'samples': 3277824, 'steps': 6401, 'loss/train': 2.1284947395324707} +02/24/2022 09:51:27 - INFO - codeparrot_training - Step 6402: {'lr': 0.0004896956372830141, 'samples': 3278336, 'steps': 6402, 'loss/train': 2.415383815765381} +02/24/2022 09:51:32 - INFO - codeparrot_training - Step 6403: {'lr': 0.000489690987525817, 'samples': 3278848, 'steps': 6403, 'loss/train': 2.4144012928009033} +02/24/2022 09:51:36 - INFO - codeparrot_training - Step 6404: {'lr': 0.0004896863367418598, 'samples': 3279360, 'steps': 6404, 'loss/train': 1.3919150829315186} +02/24/2022 09:51:41 - INFO - codeparrot_training - Step 6405: {'lr': 0.0004896816849311625, 'samples': 3279872, 'steps': 6405, 'loss/train': 3.055471420288086} +02/24/2022 09:51:45 - INFO - codeparrot_training - Step 6406: {'lr': 0.000489677032093745, 'samples': 3280384, 'steps': 6406, 'loss/train': 2.631194829940796} +02/24/2022 09:51:50 - INFO - codeparrot_training - Step 6407: {'lr': 0.0004896723782296272, 'samples': 3280896, 'steps': 6407, 'loss/train': 1.6270899772644043} +02/24/2022 09:51:54 - INFO - codeparrot_training - Step 6408: {'lr': 0.0004896677233388289, 'samples': 3281408, 'steps': 6408, 'loss/train': 1.408095121383667} +02/24/2022 09:52:01 - INFO - codeparrot_training - Step 6409: {'lr': 0.0004896630674213703, 'samples': 3281920, 'steps': 6409, 'loss/train': 3.0135111808776855} +02/24/2022 09:52:04 - INFO - codeparrot_training - Step 6410: {'lr': 0.0004896584104772712, 'samples': 3282432, 'steps': 6410, 'loss/train': 3.475926637649536} +02/24/2022 09:52:10 - INFO - codeparrot_training - Step 6411: {'lr': 0.0004896537525065516, 'samples': 3282944, 'steps': 6411, 'loss/train': 2.8265891075134277} +02/24/2022 09:52:13 - INFO - codeparrot_training - Step 6412: {'lr': 0.0004896490935092314, 'samples': 3283456, 'steps': 6412, 'loss/train': 3.4042012691497803} +02/24/2022 09:52:19 - INFO - codeparrot_training - Step 6413: {'lr': 0.0004896444334853305, 'samples': 3283968, 'steps': 6413, 'loss/train': 2.973315715789795} +02/24/2022 09:52:22 - INFO - codeparrot_training - Step 6414: {'lr': 0.000489639772434869, 'samples': 3284480, 'steps': 6414, 'loss/train': 2.2527859210968018} +02/24/2022 09:52:28 - INFO - codeparrot_training - Step 6415: {'lr': 0.0004896351103578669, 'samples': 3284992, 'steps': 6415, 'loss/train': 1.876029372215271} +02/24/2022 09:52:31 - INFO - codeparrot_training - Step 6416: {'lr': 0.0004896304472543439, 'samples': 3285504, 'steps': 6416, 'loss/train': 9.057655334472656} +02/24/2022 09:52:37 - INFO - codeparrot_training - Step 6417: {'lr': 0.0004896257831243204, 'samples': 3286016, 'steps': 6417, 'loss/train': 3.0918171405792236} +02/24/2022 09:52:40 - INFO - codeparrot_training - Step 6418: {'lr': 0.0004896211179678159, 'samples': 3286528, 'steps': 6418, 'loss/train': 3.134316921234131} +02/24/2022 09:52:48 - INFO - codeparrot_training - Step 6419: {'lr': 0.0004896164517848508, 'samples': 3287040, 'steps': 6419, 'loss/train': 2.4525632858276367} +02/24/2022 09:52:51 - INFO - codeparrot_training - Step 6420: {'lr': 0.0004896117845754448, 'samples': 3287552, 'steps': 6420, 'loss/train': 2.562161922454834} +02/24/2022 09:52:57 - INFO - codeparrot_training - Step 6421: {'lr': 0.0004896071163396179, 'samples': 3288064, 'steps': 6421, 'loss/train': 2.3492627143859863} +02/24/2022 09:53:00 - INFO - codeparrot_training - Step 6422: {'lr': 0.0004896024470773904, 'samples': 3288576, 'steps': 6422, 'loss/train': 1.8552896976470947} +02/24/2022 09:53:06 - INFO - codeparrot_training - Step 6423: {'lr': 0.000489597776788782, 'samples': 3289088, 'steps': 6423, 'loss/train': 1.3254491090774536} +02/24/2022 09:53:09 - INFO - codeparrot_training - Step 6424: {'lr': 0.0004895931054738128, 'samples': 3289600, 'steps': 6424, 'loss/train': 2.5252487659454346} +02/24/2022 09:53:15 - INFO - codeparrot_training - Step 6425: {'lr': 0.0004895884331325028, 'samples': 3290112, 'steps': 6425, 'loss/train': 2.3808586597442627} +02/24/2022 09:53:18 - INFO - codeparrot_training - Step 6426: {'lr': 0.0004895837597648721, 'samples': 3290624, 'steps': 6426, 'loss/train': 1.6469117403030396} +02/24/2022 09:53:24 - INFO - codeparrot_training - Step 6427: {'lr': 0.0004895790853709406, 'samples': 3291136, 'steps': 6427, 'loss/train': 2.1365747451782227} +02/24/2022 09:53:27 - INFO - codeparrot_training - Step 6428: {'lr': 0.0004895744099507284, 'samples': 3291648, 'steps': 6428, 'loss/train': 2.845057249069214} +02/24/2022 09:53:33 - INFO - codeparrot_training - Step 6429: {'lr': 0.0004895697335042555, 'samples': 3292160, 'steps': 6429, 'loss/train': 2.1584692001342773} +02/24/2022 09:53:38 - INFO - codeparrot_training - Step 6430: {'lr': 0.0004895650560315419, 'samples': 3292672, 'steps': 6430, 'loss/train': 2.8787264823913574} +02/24/2022 09:53:42 - INFO - codeparrot_training - Step 6431: {'lr': 0.0004895603775326077, 'samples': 3293184, 'steps': 6431, 'loss/train': 1.3936365842819214} +02/24/2022 09:53:47 - INFO - codeparrot_training - Step 6432: {'lr': 0.0004895556980074729, 'samples': 3293696, 'steps': 6432, 'loss/train': 1.6991254091262817} +02/24/2022 09:53:51 - INFO - codeparrot_training - Step 6433: {'lr': 0.0004895510174561576, 'samples': 3294208, 'steps': 6433, 'loss/train': 2.6044275760650635} +02/24/2022 09:53:58 - INFO - codeparrot_training - Step 6434: {'lr': 0.0004895463358786818, 'samples': 3294720, 'steps': 6434, 'loss/train': 1.8530462980270386} +02/24/2022 09:54:02 - INFO - codeparrot_training - Step 6435: {'lr': 0.0004895416532750655, 'samples': 3295232, 'steps': 6435, 'loss/train': 1.9207227230072021} +02/24/2022 09:54:07 - INFO - codeparrot_training - Step 6436: {'lr': 0.0004895369696453289, 'samples': 3295744, 'steps': 6436, 'loss/train': 2.309866428375244} +02/24/2022 09:54:11 - INFO - codeparrot_training - Step 6437: {'lr': 0.0004895322849894918, 'samples': 3296256, 'steps': 6437, 'loss/train': 2.4831600189208984} +02/24/2022 09:54:16 - INFO - codeparrot_training - Step 6438: {'lr': 0.0004895275993075747, 'samples': 3296768, 'steps': 6438, 'loss/train': 2.2389132976531982} +02/24/2022 09:54:20 - INFO - codeparrot_training - Step 6439: {'lr': 0.0004895229125995973, 'samples': 3297280, 'steps': 6439, 'loss/train': 1.4974126815795898} +02/24/2022 09:54:23 - INFO - codeparrot_training - Step 6440: {'lr': 0.0004895182248655798, 'samples': 3297792, 'steps': 6440, 'loss/train': 2.6597371101379395} +02/24/2022 09:54:29 - INFO - codeparrot_training - Step 6441: {'lr': 0.0004895135361055422, 'samples': 3298304, 'steps': 6441, 'loss/train': 2.5425000190734863} +02/24/2022 09:54:32 - INFO - codeparrot_training - Step 6442: {'lr': 0.0004895088463195049, 'samples': 3298816, 'steps': 6442, 'loss/train': 2.8660354614257812} +02/24/2022 09:54:38 - INFO - codeparrot_training - Step 6443: {'lr': 0.0004895041555074875, 'samples': 3299328, 'steps': 6443, 'loss/train': 2.6452040672302246} +02/24/2022 09:54:41 - INFO - codeparrot_training - Step 6444: {'lr': 0.0004894994636695105, 'samples': 3299840, 'steps': 6444, 'loss/train': 2.966923952102661} +02/24/2022 09:54:49 - INFO - codeparrot_training - Step 6445: {'lr': 0.0004894947708055938, 'samples': 3300352, 'steps': 6445, 'loss/train': 2.6917126178741455} +02/24/2022 09:54:54 - INFO - codeparrot_training - Step 6446: {'lr': 0.0004894900769157576, 'samples': 3300864, 'steps': 6446, 'loss/train': 1.5543131828308105} +02/24/2022 09:54:57 - INFO - codeparrot_training - Step 6447: {'lr': 0.0004894853820000219, 'samples': 3301376, 'steps': 6447, 'loss/train': 2.8028717041015625} +02/24/2022 09:55:03 - INFO - codeparrot_training - Step 6448: {'lr': 0.000489480686058407, 'samples': 3301888, 'steps': 6448, 'loss/train': 2.402170419692993} +02/24/2022 09:55:06 - INFO - codeparrot_training - Step 6449: {'lr': 0.0004894759890909326, 'samples': 3302400, 'steps': 6449, 'loss/train': 1.7828624248504639} +02/24/2022 09:55:12 - INFO - codeparrot_training - Step 6450: {'lr': 0.0004894712910976193, 'samples': 3302912, 'steps': 6450, 'loss/train': 2.1325206756591797} +02/24/2022 09:55:15 - INFO - codeparrot_training - Step 6451: {'lr': 0.000489466592078487, 'samples': 3303424, 'steps': 6451, 'loss/train': 0.8790604472160339} +02/24/2022 09:55:21 - INFO - codeparrot_training - Step 6452: {'lr': 0.0004894618920335558, 'samples': 3303936, 'steps': 6452, 'loss/train': 0.5870147347450256} +02/24/2022 09:55:24 - INFO - codeparrot_training - Step 6453: {'lr': 0.000489457190962846, 'samples': 3304448, 'steps': 6453, 'loss/train': 2.9822442531585693} +02/24/2022 09:55:31 - INFO - codeparrot_training - Step 6454: {'lr': 0.0004894524888663776, 'samples': 3304960, 'steps': 6454, 'loss/train': 2.5562589168548584} +02/24/2022 09:55:35 - INFO - codeparrot_training - Step 6455: {'lr': 0.0004894477857441707, 'samples': 3305472, 'steps': 6455, 'loss/train': 2.2890918254852295} +02/24/2022 09:55:40 - INFO - codeparrot_training - Step 6456: {'lr': 0.0004894430815962456, 'samples': 3305984, 'steps': 6456, 'loss/train': 2.3057167530059814} +02/24/2022 09:55:44 - INFO - codeparrot_training - Step 6457: {'lr': 0.0004894383764226224, 'samples': 3306496, 'steps': 6457, 'loss/train': 1.25165855884552} +02/24/2022 09:55:49 - INFO - codeparrot_training - Step 6458: {'lr': 0.0004894336702233212, 'samples': 3307008, 'steps': 6458, 'loss/train': 1.5881156921386719} +02/24/2022 09:55:53 - INFO - codeparrot_training - Step 6459: {'lr': 0.0004894289629983621, 'samples': 3307520, 'steps': 6459, 'loss/train': 2.387951135635376} +02/24/2022 09:55:58 - INFO - codeparrot_training - Step 6460: {'lr': 0.0004894242547477654, 'samples': 3308032, 'steps': 6460, 'loss/train': 3.081812858581543} +02/24/2022 09:56:02 - INFO - codeparrot_training - Step 6461: {'lr': 0.0004894195454715512, 'samples': 3308544, 'steps': 6461, 'loss/train': 2.0218260288238525} +02/24/2022 09:56:10 - INFO - codeparrot_training - Step 6462: {'lr': 0.0004894148351697398, 'samples': 3309056, 'steps': 6462, 'loss/train': 1.4597071409225464} +02/24/2022 09:56:13 - INFO - codeparrot_training - Step 6463: {'lr': 0.0004894101238423512, 'samples': 3309568, 'steps': 6463, 'loss/train': 1.536042332649231} +02/24/2022 09:56:19 - INFO - codeparrot_training - Step 6464: {'lr': 0.0004894054114894055, 'samples': 3310080, 'steps': 6464, 'loss/train': 1.6616467237472534} +02/24/2022 09:56:22 - INFO - codeparrot_training - Step 6465: {'lr': 0.0004894006981109232, 'samples': 3310592, 'steps': 6465, 'loss/train': 2.6865811347961426} +02/24/2022 09:56:28 - INFO - codeparrot_training - Step 6466: {'lr': 0.0004893959837069243, 'samples': 3311104, 'steps': 6466, 'loss/train': 3.2605366706848145} +02/24/2022 09:56:31 - INFO - codeparrot_training - Step 6467: {'lr': 0.0004893912682774291, 'samples': 3311616, 'steps': 6467, 'loss/train': 1.0845344066619873} +02/24/2022 09:56:37 - INFO - codeparrot_training - Step 6468: {'lr': 0.0004893865518224576, 'samples': 3312128, 'steps': 6468, 'loss/train': 2.5008442401885986} +02/24/2022 09:56:40 - INFO - codeparrot_training - Step 6469: {'lr': 0.0004893818343420302, 'samples': 3312640, 'steps': 6469, 'loss/train': 3.2619946002960205} +02/24/2022 09:56:46 - INFO - codeparrot_training - Step 6470: {'lr': 0.000489377115836167, 'samples': 3313152, 'steps': 6470, 'loss/train': 1.6051849126815796} +02/24/2022 09:56:49 - INFO - codeparrot_training - Step 6471: {'lr': 0.0004893723963048882, 'samples': 3313664, 'steps': 6471, 'loss/train': 2.2450814247131348} +02/24/2022 09:56:53 - INFO - codeparrot_training - Step 6472: {'lr': 0.0004893676757482142, 'samples': 3314176, 'steps': 6472, 'loss/train': 2.335479259490967} +02/24/2022 09:57:00 - INFO - codeparrot_training - Step 6473: {'lr': 0.0004893629541661649, 'samples': 3314688, 'steps': 6473, 'loss/train': 1.8403065204620361} +02/24/2022 09:57:04 - INFO - codeparrot_training - Step 6474: {'lr': 0.0004893582315587608, 'samples': 3315200, 'steps': 6474, 'loss/train': 1.6207207441329956} +02/24/2022 09:57:09 - INFO - codeparrot_training - Step 6475: {'lr': 0.0004893535079260221, 'samples': 3315712, 'steps': 6475, 'loss/train': 2.3650307655334473} +02/24/2022 09:57:13 - INFO - codeparrot_training - Step 6476: {'lr': 0.000489348783267969, 'samples': 3316224, 'steps': 6476, 'loss/train': 0.23510637879371643} +02/24/2022 09:57:18 - INFO - codeparrot_training - Step 6477: {'lr': 0.0004893440575846215, 'samples': 3316736, 'steps': 6477, 'loss/train': 1.888055443763733} +02/24/2022 09:57:22 - INFO - codeparrot_training - Step 6478: {'lr': 0.0004893393308760002, 'samples': 3317248, 'steps': 6478, 'loss/train': 2.07065749168396} +02/24/2022 09:57:27 - INFO - codeparrot_training - Step 6479: {'lr': 0.0004893346031421253, 'samples': 3317760, 'steps': 6479, 'loss/train': 2.236597776412964} +02/24/2022 09:57:31 - INFO - codeparrot_training - Step 6480: {'lr': 0.0004893298743830168, 'samples': 3318272, 'steps': 6480, 'loss/train': 2.4800174236297607} +02/24/2022 09:57:36 - INFO - codeparrot_training - Step 6481: {'lr': 0.0004893251445986952, 'samples': 3318784, 'steps': 6481, 'loss/train': 3.0782835483551025} +02/24/2022 09:57:40 - INFO - codeparrot_training - Step 6482: {'lr': 0.0004893204137891807, 'samples': 3319296, 'steps': 6482, 'loss/train': 2.900688409805298} +02/24/2022 09:57:47 - INFO - codeparrot_training - Step 6483: {'lr': 0.0004893156819544935, 'samples': 3319808, 'steps': 6483, 'loss/train': 2.7950785160064697} +02/24/2022 09:57:51 - INFO - codeparrot_training - Step 6484: {'lr': 0.0004893109490946539, 'samples': 3320320, 'steps': 6484, 'loss/train': 1.3772821426391602} +02/24/2022 09:57:56 - INFO - codeparrot_training - Step 6485: {'lr': 0.0004893062152096821, 'samples': 3320832, 'steps': 6485, 'loss/train': 2.1796483993530273} +02/24/2022 09:58:00 - INFO - codeparrot_training - Step 6486: {'lr': 0.0004893014802995985, 'samples': 3321344, 'steps': 6486, 'loss/train': 2.3409743309020996} +02/24/2022 09:58:05 - INFO - codeparrot_training - Step 6487: {'lr': 0.0004892967443644235, 'samples': 3321856, 'steps': 6487, 'loss/train': 2.5255603790283203} +02/24/2022 09:58:08 - INFO - codeparrot_training - Step 6488: {'lr': 0.0004892920074041771, 'samples': 3322368, 'steps': 6488, 'loss/train': 2.121716260910034} +02/24/2022 09:58:14 - INFO - codeparrot_training - Step 6489: {'lr': 0.0004892872694188797, 'samples': 3322880, 'steps': 6489, 'loss/train': 2.704684257507324} +02/24/2022 09:58:18 - INFO - codeparrot_training - Step 6490: {'lr': 0.0004892825304085517, 'samples': 3323392, 'steps': 6490, 'loss/train': 1.4401229619979858} +02/24/2022 09:58:23 - INFO - codeparrot_training - Step 6491: {'lr': 0.0004892777903732133, 'samples': 3323904, 'steps': 6491, 'loss/train': 1.776444911956787} +02/24/2022 09:58:26 - INFO - codeparrot_training - Step 6492: {'lr': 0.0004892730493128848, 'samples': 3324416, 'steps': 6492, 'loss/train': 3.5959036350250244} +02/24/2022 09:58:34 - INFO - codeparrot_training - Step 6493: {'lr': 0.0004892683072275865, 'samples': 3324928, 'steps': 6493, 'loss/train': 2.111992359161377} +02/24/2022 09:58:37 - INFO - codeparrot_training - Step 6494: {'lr': 0.0004892635641173389, 'samples': 3325440, 'steps': 6494, 'loss/train': 3.305833578109741} +02/24/2022 09:58:43 - INFO - codeparrot_training - Step 6495: {'lr': 0.0004892588199821619, 'samples': 3325952, 'steps': 6495, 'loss/train': 2.2491681575775146} +02/24/2022 09:58:46 - INFO - codeparrot_training - Step 6496: {'lr': 0.0004892540748220763, 'samples': 3326464, 'steps': 6496, 'loss/train': 2.2219161987304688} +02/24/2022 09:58:52 - INFO - codeparrot_training - Step 6497: {'lr': 0.0004892493286371022, 'samples': 3326976, 'steps': 6497, 'loss/train': 2.0182578563690186} +02/24/2022 09:58:55 - INFO - codeparrot_training - Step 6498: {'lr': 0.00048924458142726, 'samples': 3327488, 'steps': 6498, 'loss/train': 2.817366361618042} +02/24/2022 09:59:01 - INFO - codeparrot_training - Step 6499: {'lr': 0.0004892398331925698, 'samples': 3328000, 'steps': 6499, 'loss/train': 2.3096659183502197} +02/24/2022 09:59:04 - INFO - codeparrot_training - Step 6500: {'lr': 0.0004892350839330522, 'samples': 3328512, 'steps': 6500, 'loss/train': 2.039881944656372} +02/24/2022 09:59:10 - INFO - codeparrot_training - Step 6501: {'lr': 0.0004892303336487275, 'samples': 3329024, 'steps': 6501, 'loss/train': 1.4357212781906128} +02/24/2022 09:59:13 - INFO - codeparrot_training - Step 6502: {'lr': 0.000489225582339616, 'samples': 3329536, 'steps': 6502, 'loss/train': 2.9906527996063232} +02/24/2022 09:59:19 - INFO - codeparrot_training - Step 6503: {'lr': 0.000489220830005738, 'samples': 3330048, 'steps': 6503, 'loss/train': 1.452683925628662} +02/24/2022 09:59:22 - INFO - codeparrot_training - Step 6504: {'lr': 0.0004892160766471141, 'samples': 3330560, 'steps': 6504, 'loss/train': 1.4622899293899536} +02/24/2022 09:59:29 - INFO - codeparrot_training - Step 6505: {'lr': 0.0004892113222637643, 'samples': 3331072, 'steps': 6505, 'loss/train': 3.0667734146118164} +02/24/2022 09:59:35 - INFO - codeparrot_training - Step 6506: {'lr': 0.0004892065668557093, 'samples': 3331584, 'steps': 6506, 'loss/train': 2.964228630065918} +02/24/2022 09:59:39 - INFO - codeparrot_training - Step 6507: {'lr': 0.0004892018104229692, 'samples': 3332096, 'steps': 6507, 'loss/train': 3.244946241378784} +02/24/2022 09:59:42 - INFO - codeparrot_training - Step 6508: {'lr': 0.0004891970529655646, 'samples': 3332608, 'steps': 6508, 'loss/train': 1.858280897140503} +02/24/2022 09:59:48 - INFO - codeparrot_training - Step 6509: {'lr': 0.0004891922944835158, 'samples': 3333120, 'steps': 6509, 'loss/train': 3.8298871517181396} +02/24/2022 09:59:51 - INFO - codeparrot_training - Step 6510: {'lr': 0.000489187534976843, 'samples': 3333632, 'steps': 6510, 'loss/train': 2.3593363761901855} +02/24/2022 09:59:57 - INFO - codeparrot_training - Step 6511: {'lr': 0.0004891827744455668, 'samples': 3334144, 'steps': 6511, 'loss/train': 3.423246145248413} +02/24/2022 10:00:00 - INFO - codeparrot_training - Step 6512: {'lr': 0.0004891780128897077, 'samples': 3334656, 'steps': 6512, 'loss/train': 2.655895471572876} +02/24/2022 10:00:06 - INFO - codeparrot_training - Step 6513: {'lr': 0.0004891732503092858, 'samples': 3335168, 'steps': 6513, 'loss/train': 2.022547721862793} +02/24/2022 10:00:11 - INFO - codeparrot_training - Step 6514: {'lr': 0.0004891684867043216, 'samples': 3335680, 'steps': 6514, 'loss/train': 2.3471121788024902} +02/24/2022 10:00:15 - INFO - codeparrot_training - Step 6515: {'lr': 0.0004891637220748356, 'samples': 3336192, 'steps': 6515, 'loss/train': 1.8252805471420288} +02/24/2022 10:00:20 - INFO - codeparrot_training - Step 6516: {'lr': 0.0004891589564208482, 'samples': 3336704, 'steps': 6516, 'loss/train': 0.5026594400405884} +02/24/2022 10:00:23 - INFO - codeparrot_training - Step 6517: {'lr': 0.0004891541897423798, 'samples': 3337216, 'steps': 6517, 'loss/train': 1.8229773044586182} +02/24/2022 10:00:31 - INFO - codeparrot_training - Step 6518: {'lr': 0.0004891494220394507, 'samples': 3337728, 'steps': 6518, 'loss/train': 3.6086223125457764} +02/24/2022 10:00:34 - INFO - codeparrot_training - Step 6519: {'lr': 0.0004891446533120815, 'samples': 3338240, 'steps': 6519, 'loss/train': 2.2332448959350586} +02/24/2022 10:00:40 - INFO - codeparrot_training - Step 6520: {'lr': 0.0004891398835602925, 'samples': 3338752, 'steps': 6520, 'loss/train': 3.0605921745300293} +02/24/2022 10:00:44 - INFO - codeparrot_training - Step 6521: {'lr': 0.0004891351127841041, 'samples': 3339264, 'steps': 6521, 'loss/train': 5.062290668487549} +02/24/2022 10:00:47 - INFO - codeparrot_training - Step 6522: {'lr': 0.0004891303409835369, 'samples': 3339776, 'steps': 6522, 'loss/train': 2.3840460777282715} +02/24/2022 10:00:53 - INFO - codeparrot_training - Step 6523: {'lr': 0.0004891255681586113, 'samples': 3340288, 'steps': 6523, 'loss/train': 1.9411540031433105} +02/24/2022 10:00:56 - INFO - codeparrot_training - Step 6524: {'lr': 0.0004891207943093476, 'samples': 3340800, 'steps': 6524, 'loss/train': 2.0542826652526855} +02/24/2022 10:01:01 - INFO - codeparrot_training - Step 6525: {'lr': 0.0004891160194357663, 'samples': 3341312, 'steps': 6525, 'loss/train': 2.3490045070648193} +02/24/2022 10:01:05 - INFO - codeparrot_training - Step 6526: {'lr': 0.0004891112435378881, 'samples': 3341824, 'steps': 6526, 'loss/train': 2.155965566635132} +02/24/2022 10:01:11 - INFO - codeparrot_training - Step 6527: {'lr': 0.0004891064666157331, 'samples': 3342336, 'steps': 6527, 'loss/train': 2.277674674987793} +02/24/2022 10:01:14 - INFO - codeparrot_training - Step 6528: {'lr': 0.0004891016886693219, 'samples': 3342848, 'steps': 6528, 'loss/train': 1.6484726667404175} +02/24/2022 10:01:21 - INFO - codeparrot_training - Step 6529: {'lr': 0.0004890969096986751, 'samples': 3343360, 'steps': 6529, 'loss/train': 1.8307876586914062} +02/24/2022 10:01:27 - INFO - codeparrot_training - Step 6530: {'lr': 0.000489092129703813, 'samples': 3343872, 'steps': 6530, 'loss/train': 3.480984687805176} +02/24/2022 10:01:30 - INFO - codeparrot_training - Step 6531: {'lr': 0.0004890873486847561, 'samples': 3344384, 'steps': 6531, 'loss/train': 1.3929086923599243} +02/24/2022 10:01:36 - INFO - codeparrot_training - Step 6532: {'lr': 0.000489082566641525, 'samples': 3344896, 'steps': 6532, 'loss/train': 2.7073380947113037} +02/24/2022 10:01:39 - INFO - codeparrot_training - Step 6533: {'lr': 0.00048907778357414, 'samples': 3345408, 'steps': 6533, 'loss/train': 2.1377365589141846} +02/24/2022 10:01:45 - INFO - codeparrot_training - Step 6534: {'lr': 0.0004890729994826218, 'samples': 3345920, 'steps': 6534, 'loss/train': 3.1660683155059814} +02/24/2022 10:01:48 - INFO - codeparrot_training - Step 6535: {'lr': 0.0004890682143669908, 'samples': 3346432, 'steps': 6535, 'loss/train': 3.0280263423919678} +02/24/2022 10:01:52 - INFO - codeparrot_training - Step 6536: {'lr': 0.0004890634282272673, 'samples': 3346944, 'steps': 6536, 'loss/train': 0.28646907210350037} +02/24/2022 10:01:57 - INFO - codeparrot_training - Step 6537: {'lr': 0.0004890586410634722, 'samples': 3347456, 'steps': 6537, 'loss/train': 3.232842445373535} +02/24/2022 10:02:01 - INFO - codeparrot_training - Step 6538: {'lr': 0.0004890538528756256, 'samples': 3347968, 'steps': 6538, 'loss/train': 1.879063606262207} +02/24/2022 10:02:08 - INFO - codeparrot_training - Step 6539: {'lr': 0.0004890490636637484, 'samples': 3348480, 'steps': 6539, 'loss/train': 2.724001884460449} +02/24/2022 10:02:14 - INFO - codeparrot_training - Step 6540: {'lr': 0.0004890442734278608, 'samples': 3348992, 'steps': 6540, 'loss/train': 1.7801892757415771} +02/24/2022 10:02:18 - INFO - codeparrot_training - Step 6541: {'lr': 0.0004890394821679834, 'samples': 3349504, 'steps': 6541, 'loss/train': 1.900373935699463} +02/24/2022 10:02:23 - INFO - codeparrot_training - Step 6542: {'lr': 0.0004890346898841369, 'samples': 3350016, 'steps': 6542, 'loss/train': 2.83417010307312} +02/24/2022 10:02:27 - INFO - codeparrot_training - Step 6543: {'lr': 0.0004890298965763416, 'samples': 3350528, 'steps': 6543, 'loss/train': 0.7365725040435791} +02/24/2022 10:02:32 - INFO - codeparrot_training - Step 6544: {'lr': 0.0004890251022446181, 'samples': 3351040, 'steps': 6544, 'loss/train': 2.0251264572143555} +02/24/2022 10:02:36 - INFO - codeparrot_training - Step 6545: {'lr': 0.000489020306888987, 'samples': 3351552, 'steps': 6545, 'loss/train': 3.1508102416992188} +02/24/2022 10:02:41 - INFO - codeparrot_training - Step 6546: {'lr': 0.0004890155105094688, 'samples': 3352064, 'steps': 6546, 'loss/train': 1.8642537593841553} +02/24/2022 10:02:45 - INFO - codeparrot_training - Step 6547: {'lr': 0.0004890107131060841, 'samples': 3352576, 'steps': 6547, 'loss/train': 1.9416359663009644} +02/24/2022 10:02:52 - INFO - codeparrot_training - Step 6548: {'lr': 0.0004890059146788532, 'samples': 3353088, 'steps': 6548, 'loss/train': 2.778923749923706} +02/24/2022 10:02:55 - INFO - codeparrot_training - Step 6549: {'lr': 0.000489001115227797, 'samples': 3353600, 'steps': 6549, 'loss/train': 1.9057215452194214} +02/24/2022 10:03:01 - INFO - codeparrot_training - Step 6550: {'lr': 0.000488996314752936, 'samples': 3354112, 'steps': 6550, 'loss/train': 3.6777164936065674} +02/24/2022 10:03:04 - INFO - codeparrot_training - Step 6551: {'lr': 0.0004889915132542906, 'samples': 3354624, 'steps': 6551, 'loss/train': 0.23695413768291473} +02/24/2022 10:03:10 - INFO - codeparrot_training - Step 6552: {'lr': 0.0004889867107318814, 'samples': 3355136, 'steps': 6552, 'loss/train': 2.2470178604125977} +02/24/2022 10:03:13 - INFO - codeparrot_training - Step 6553: {'lr': 0.0004889819071857291, 'samples': 3355648, 'steps': 6553, 'loss/train': 1.051946997642517} +02/24/2022 10:03:19 - INFO - codeparrot_training - Step 6554: {'lr': 0.0004889771026158541, 'samples': 3356160, 'steps': 6554, 'loss/train': 2.677356243133545} +02/24/2022 10:03:22 - INFO - codeparrot_training - Step 6555: {'lr': 0.0004889722970222772, 'samples': 3356672, 'steps': 6555, 'loss/train': 2.6603970527648926} +02/24/2022 10:03:28 - INFO - codeparrot_training - Step 6556: {'lr': 0.0004889674904050188, 'samples': 3357184, 'steps': 6556, 'loss/train': 2.640587329864502} +02/24/2022 10:03:31 - INFO - codeparrot_training - Step 6557: {'lr': 0.0004889626827640994, 'samples': 3357696, 'steps': 6557, 'loss/train': 2.5008530616760254} +02/24/2022 10:03:37 - INFO - codeparrot_training - Step 6558: {'lr': 0.00048895787409954, 'samples': 3358208, 'steps': 6558, 'loss/train': 2.5547757148742676} +02/24/2022 10:03:41 - INFO - codeparrot_training - Step 6559: {'lr': 0.0004889530644113608, 'samples': 3358720, 'steps': 6559, 'loss/train': 2.7593259811401367} +02/24/2022 10:03:46 - INFO - codeparrot_training - Step 6560: {'lr': 0.0004889482536995825, 'samples': 3359232, 'steps': 6560, 'loss/train': 2.433361530303955} +02/24/2022 10:03:50 - INFO - codeparrot_training - Step 6561: {'lr': 0.0004889434419642259, 'samples': 3359744, 'steps': 6561, 'loss/train': 3.0989346504211426} +02/24/2022 10:03:55 - INFO - codeparrot_training - Step 6562: {'lr': 0.0004889386292053114, 'samples': 3360256, 'steps': 6562, 'loss/train': 0.5503135919570923} +02/24/2022 10:03:59 - INFO - codeparrot_training - Step 6563: {'lr': 0.0004889338154228596, 'samples': 3360768, 'steps': 6563, 'loss/train': 3.1529862880706787} +02/24/2022 10:04:06 - INFO - codeparrot_training - Step 6564: {'lr': 0.0004889290006168913, 'samples': 3361280, 'steps': 6564, 'loss/train': 2.479491710662842} +02/24/2022 10:04:09 - INFO - codeparrot_training - Step 6565: {'lr': 0.0004889241847874271, 'samples': 3361792, 'steps': 6565, 'loss/train': 1.8109992742538452} +02/24/2022 10:04:15 - INFO - codeparrot_training - Step 6566: {'lr': 0.0004889193679344874, 'samples': 3362304, 'steps': 6566, 'loss/train': 2.2763566970825195} +02/24/2022 10:04:18 - INFO - codeparrot_training - Step 6567: {'lr': 0.0004889145500580932, 'samples': 3362816, 'steps': 6567, 'loss/train': 2.791712522506714} +02/24/2022 10:04:22 - INFO - codeparrot_training - Step 6568: {'lr': 0.0004889097311582647, 'samples': 3363328, 'steps': 6568, 'loss/train': 1.8300089836120605} +02/24/2022 10:04:27 - INFO - codeparrot_training - Step 6569: {'lr': 0.000488904911235023, 'samples': 3363840, 'steps': 6569, 'loss/train': 2.486496686935425} +02/24/2022 10:04:33 - INFO - codeparrot_training - Step 6570: {'lr': 0.0004889000902883883, 'samples': 3364352, 'steps': 6570, 'loss/train': 2.468989133834839} +02/24/2022 10:04:36 - INFO - codeparrot_training - Step 6571: {'lr': 0.0004888952683183816, 'samples': 3364864, 'steps': 6571, 'loss/train': 2.3142385482788086} +02/24/2022 10:04:42 - INFO - codeparrot_training - Step 6572: {'lr': 0.0004888904453250233, 'samples': 3365376, 'steps': 6572, 'loss/train': 3.6092429161071777} +02/24/2022 10:04:45 - INFO - codeparrot_training - Step 6573: {'lr': 0.0004888856213083343, 'samples': 3365888, 'steps': 6573, 'loss/train': 2.0679149627685547} +02/24/2022 10:04:53 - INFO - codeparrot_training - Step 6574: {'lr': 0.0004888807962683353, 'samples': 3366400, 'steps': 6574, 'loss/train': 2.371832847595215} +02/24/2022 10:04:56 - INFO - codeparrot_training - Step 6575: {'lr': 0.0004888759702050466, 'samples': 3366912, 'steps': 6575, 'loss/train': 2.7449398040771484} +02/24/2022 10:05:02 - INFO - codeparrot_training - Step 6576: {'lr': 0.0004888711431184892, 'samples': 3367424, 'steps': 6576, 'loss/train': 1.9852746725082397} +02/24/2022 10:05:05 - INFO - codeparrot_training - Step 6577: {'lr': 0.0004888663150086835, 'samples': 3367936, 'steps': 6577, 'loss/train': 1.6808832883834839} +02/24/2022 10:05:09 - INFO - codeparrot_training - Step 6578: {'lr': 0.0004888614858756505, 'samples': 3368448, 'steps': 6578, 'loss/train': 3.339327096939087} +02/24/2022 10:05:15 - INFO - codeparrot_training - Step 6579: {'lr': 0.0004888566557194107, 'samples': 3368960, 'steps': 6579, 'loss/train': 2.0961108207702637} +02/24/2022 10:05:20 - INFO - codeparrot_training - Step 6580: {'lr': 0.0004888518245399849, 'samples': 3369472, 'steps': 6580, 'loss/train': 1.8694839477539062} +02/24/2022 10:05:24 - INFO - codeparrot_training - Step 6581: {'lr': 0.0004888469923373937, 'samples': 3369984, 'steps': 6581, 'loss/train': 2.3084683418273926} +02/24/2022 10:05:29 - INFO - codeparrot_training - Step 6582: {'lr': 0.0004888421591116578, 'samples': 3370496, 'steps': 6582, 'loss/train': 1.8113888502120972} +02/24/2022 10:05:33 - INFO - codeparrot_training - Step 6583: {'lr': 0.000488837324862798, 'samples': 3371008, 'steps': 6583, 'loss/train': 2.4665839672088623} +02/24/2022 10:05:40 - INFO - codeparrot_training - Step 6584: {'lr': 0.0004888324895908349, 'samples': 3371520, 'steps': 6584, 'loss/train': 3.1531171798706055} +02/24/2022 10:05:43 - INFO - codeparrot_training - Step 6585: {'lr': 0.0004888276532957892, 'samples': 3372032, 'steps': 6585, 'loss/train': 2.2655367851257324} +02/24/2022 10:05:49 - INFO - codeparrot_training - Step 6586: {'lr': 0.0004888228159776818, 'samples': 3372544, 'steps': 6586, 'loss/train': 1.1006475687026978} +02/24/2022 10:05:52 - INFO - codeparrot_training - Step 6587: {'lr': 0.0004888179776365331, 'samples': 3373056, 'steps': 6587, 'loss/train': 2.708115816116333} +02/24/2022 10:05:58 - INFO - codeparrot_training - Step 6588: {'lr': 0.0004888131382723641, 'samples': 3373568, 'steps': 6588, 'loss/train': 2.7441513538360596} +02/24/2022 10:06:01 - INFO - codeparrot_training - Step 6589: {'lr': 0.0004888082978851954, 'samples': 3374080, 'steps': 6589, 'loss/train': 7.269293785095215} +02/24/2022 10:06:07 - INFO - codeparrot_training - Step 6590: {'lr': 0.000488803456475048, 'samples': 3374592, 'steps': 6590, 'loss/train': 2.0342698097229004} +02/24/2022 10:06:10 - INFO - codeparrot_training - Step 6591: {'lr': 0.0004887986140419422, 'samples': 3375104, 'steps': 6591, 'loss/train': 3.4777088165283203} +02/24/2022 10:06:16 - INFO - codeparrot_training - Step 6592: {'lr': 0.000488793770585899, 'samples': 3375616, 'steps': 6592, 'loss/train': 2.5299012660980225} +02/24/2022 10:06:19 - INFO - codeparrot_training - Step 6593: {'lr': 0.0004887889261069392, 'samples': 3376128, 'steps': 6593, 'loss/train': 0.4990481436252594} +02/24/2022 10:06:25 - INFO - codeparrot_training - Step 6594: {'lr': 0.0004887840806050834, 'samples': 3376640, 'steps': 6594, 'loss/train': 2.1829071044921875} +02/24/2022 10:06:28 - INFO - codeparrot_training - Step 6595: {'lr': 0.0004887792340803524, 'samples': 3377152, 'steps': 6595, 'loss/train': 2.905155658721924} +02/24/2022 10:06:35 - INFO - codeparrot_training - Step 6596: {'lr': 0.000488774386532767, 'samples': 3377664, 'steps': 6596, 'loss/train': 1.8029149770736694} +02/24/2022 10:06:39 - INFO - codeparrot_training - Step 6597: {'lr': 0.0004887695379623481, 'samples': 3378176, 'steps': 6597, 'loss/train': 2.8679449558258057} +02/24/2022 10:06:45 - INFO - codeparrot_training - Step 6598: {'lr': 0.000488764688369116, 'samples': 3378688, 'steps': 6598, 'loss/train': 2.7318615913391113} +02/24/2022 10:06:48 - INFO - codeparrot_training - Step 6599: {'lr': 0.000488759837753092, 'samples': 3379200, 'steps': 6599, 'loss/train': 2.086902141571045} +02/24/2022 10:06:54 - INFO - codeparrot_training - Step 6600: {'lr': 0.0004887549861142967, 'samples': 3379712, 'steps': 6600, 'loss/train': 2.0606794357299805} +02/24/2022 10:06:57 - INFO - codeparrot_training - Step 6601: {'lr': 0.0004887501334527507, 'samples': 3380224, 'steps': 6601, 'loss/train': 2.4570682048797607} +02/24/2022 10:07:03 - INFO - codeparrot_training - Step 6602: {'lr': 0.000488745279768475, 'samples': 3380736, 'steps': 6602, 'loss/train': 2.378192186355591} +02/24/2022 10:07:06 - INFO - codeparrot_training - Step 6603: {'lr': 0.0004887404250614904, 'samples': 3381248, 'steps': 6603, 'loss/train': 2.676232099533081} +02/24/2022 10:07:10 - INFO - codeparrot_training - Step 6604: {'lr': 0.0004887355693318176, 'samples': 3381760, 'steps': 6604, 'loss/train': 1.8846713304519653} +02/24/2022 10:07:15 - INFO - codeparrot_training - Step 6605: {'lr': 0.0004887307125794775, 'samples': 3382272, 'steps': 6605, 'loss/train': 2.3327300548553467} +02/24/2022 10:07:21 - INFO - codeparrot_training - Step 6606: {'lr': 0.0004887258548044907, 'samples': 3382784, 'steps': 6606, 'loss/train': 2.2616655826568604} +02/24/2022 10:07:24 - INFO - codeparrot_training - Step 6607: {'lr': 0.0004887209960068782, 'samples': 3383296, 'steps': 6607, 'loss/train': 2.124143123626709} +02/24/2022 10:07:30 - INFO - codeparrot_training - Step 6608: {'lr': 0.0004887161361866607, 'samples': 3383808, 'steps': 6608, 'loss/train': 2.479696035385132} +02/24/2022 10:07:34 - INFO - codeparrot_training - Step 6609: {'lr': 0.0004887112753438592, 'samples': 3384320, 'steps': 6609, 'loss/train': 1.5773290395736694} +02/24/2022 10:07:41 - INFO - codeparrot_training - Step 6610: {'lr': 0.0004887064134784943, 'samples': 3384832, 'steps': 6610, 'loss/train': 1.6292513608932495} +02/24/2022 10:07:44 - INFO - codeparrot_training - Step 6611: {'lr': 0.0004887015505905869, 'samples': 3385344, 'steps': 6611, 'loss/train': 2.8065037727355957} +02/24/2022 10:07:50 - INFO - codeparrot_training - Step 6612: {'lr': 0.0004886966866801579, 'samples': 3385856, 'steps': 6612, 'loss/train': 1.255644679069519} +02/24/2022 10:07:53 - INFO - codeparrot_training - Step 6613: {'lr': 0.0004886918217472281, 'samples': 3386368, 'steps': 6613, 'loss/train': 2.8977606296539307} +02/24/2022 10:07:59 - INFO - codeparrot_training - Step 6614: {'lr': 0.0004886869557918183, 'samples': 3386880, 'steps': 6614, 'loss/train': 2.5531868934631348} +02/24/2022 10:08:02 - INFO - codeparrot_training - Step 6615: {'lr': 0.0004886820888139494, 'samples': 3387392, 'steps': 6615, 'loss/train': 2.9990012645721436} +02/24/2022 10:08:08 - INFO - codeparrot_training - Step 6616: {'lr': 0.0004886772208136422, 'samples': 3387904, 'steps': 6616, 'loss/train': 2.3838999271392822} +02/24/2022 10:08:11 - INFO - codeparrot_training - Step 6617: {'lr': 0.0004886723517909176, 'samples': 3388416, 'steps': 6617, 'loss/train': 1.844799518585205} +02/24/2022 10:08:17 - INFO - codeparrot_training - Step 6618: {'lr': 0.0004886674817457964, 'samples': 3388928, 'steps': 6618, 'loss/train': 2.758025884628296} +02/24/2022 10:08:20 - INFO - codeparrot_training - Step 6619: {'lr': 0.0004886626106782995, 'samples': 3389440, 'steps': 6619, 'loss/train': 3.207862377166748} +02/24/2022 10:08:27 - INFO - codeparrot_training - Step 6620: {'lr': 0.0004886577385884478, 'samples': 3389952, 'steps': 6620, 'loss/train': 2.013533592224121} +02/24/2022 10:08:31 - INFO - codeparrot_training - Step 6621: {'lr': 0.0004886528654762621, 'samples': 3390464, 'steps': 6621, 'loss/train': 2.4255099296569824} +02/24/2022 10:08:37 - INFO - codeparrot_training - Step 6622: {'lr': 0.0004886479913417633, 'samples': 3390976, 'steps': 6622, 'loss/train': 2.250338077545166} +02/24/2022 10:08:40 - INFO - codeparrot_training - Step 6623: {'lr': 0.0004886431161849722, 'samples': 3391488, 'steps': 6623, 'loss/train': 2.052408456802368} +02/24/2022 10:08:46 - INFO - codeparrot_training - Step 6624: {'lr': 0.0004886382400059099, 'samples': 3392000, 'steps': 6624, 'loss/train': 2.5310869216918945} +02/24/2022 10:08:49 - INFO - codeparrot_training - Step 6625: {'lr': 0.0004886333628045972, 'samples': 3392512, 'steps': 6625, 'loss/train': 2.2756600379943848} +02/24/2022 10:08:55 - INFO - codeparrot_training - Step 6626: {'lr': 0.0004886284845810548, 'samples': 3393024, 'steps': 6626, 'loss/train': 2.1461868286132812} +02/24/2022 10:08:58 - INFO - codeparrot_training - Step 6627: {'lr': 0.0004886236053353038, 'samples': 3393536, 'steps': 6627, 'loss/train': 1.1681115627288818} +02/24/2022 10:09:04 - INFO - codeparrot_training - Step 6628: {'lr': 0.000488618725067365, 'samples': 3394048, 'steps': 6628, 'loss/train': 3.136591672897339} +02/24/2022 10:09:07 - INFO - codeparrot_training - Step 6629: {'lr': 0.0004886138437772594, 'samples': 3394560, 'steps': 6629, 'loss/train': 1.9597866535186768} +02/24/2022 10:09:13 - INFO - codeparrot_training - Step 6630: {'lr': 0.0004886089614650078, 'samples': 3395072, 'steps': 6630, 'loss/train': 2.4114599227905273} +02/24/2022 10:09:16 - INFO - codeparrot_training - Step 6631: {'lr': 0.0004886040781306313, 'samples': 3395584, 'steps': 6631, 'loss/train': 5.021580696105957} +02/24/2022 10:09:23 - INFO - codeparrot_training - Step 6632: {'lr': 0.0004885991937741506, 'samples': 3396096, 'steps': 6632, 'loss/train': 3.04402756690979} +02/24/2022 10:09:26 - INFO - codeparrot_training - Step 6633: {'lr': 0.0004885943083955868, 'samples': 3396608, 'steps': 6633, 'loss/train': 2.7292466163635254} +02/24/2022 10:09:32 - INFO - codeparrot_training - Step 6634: {'lr': 0.0004885894219949607, 'samples': 3397120, 'steps': 6634, 'loss/train': 2.0121607780456543} +02/24/2022 10:09:35 - INFO - codeparrot_training - Step 6635: {'lr': 0.0004885845345722932, 'samples': 3397632, 'steps': 6635, 'loss/train': 1.775314450263977} +02/24/2022 10:09:41 - INFO - codeparrot_training - Step 6636: {'lr': 0.0004885796461276055, 'samples': 3398144, 'steps': 6636, 'loss/train': 2.724120855331421} +02/24/2022 10:09:44 - INFO - codeparrot_training - Step 6637: {'lr': 0.0004885747566609182, 'samples': 3398656, 'steps': 6637, 'loss/train': 2.1954236030578613} +02/24/2022 10:09:50 - INFO - codeparrot_training - Step 6638: {'lr': 0.0004885698661722524, 'samples': 3399168, 'steps': 6638, 'loss/train': 2.8365275859832764} +02/24/2022 10:09:53 - INFO - codeparrot_training - Step 6639: {'lr': 0.0004885649746616291, 'samples': 3399680, 'steps': 6639, 'loss/train': 3.1536190509796143} +02/24/2022 10:09:59 - INFO - codeparrot_training - Step 6640: {'lr': 0.0004885600821290692, 'samples': 3400192, 'steps': 6640, 'loss/train': 3.166158437728882} +02/24/2022 10:10:02 - INFO - codeparrot_training - Step 6641: {'lr': 0.0004885551885745937, 'samples': 3400704, 'steps': 6641, 'loss/train': 2.1950392723083496} +02/24/2022 10:10:09 - INFO - codeparrot_training - Step 6642: {'lr': 0.0004885502939982235, 'samples': 3401216, 'steps': 6642, 'loss/train': 2.4782874584198} +02/24/2022 10:10:13 - INFO - codeparrot_training - Step 6643: {'lr': 0.0004885453983999795, 'samples': 3401728, 'steps': 6643, 'loss/train': 1.5839953422546387} +02/24/2022 10:10:18 - INFO - codeparrot_training - Step 6644: {'lr': 0.0004885405017798828, 'samples': 3402240, 'steps': 6644, 'loss/train': 1.575046181678772} +02/24/2022 10:10:21 - INFO - codeparrot_training - Step 6645: {'lr': 0.0004885356041379544, 'samples': 3402752, 'steps': 6645, 'loss/train': 2.4168448448181152} +02/24/2022 10:10:27 - INFO - codeparrot_training - Step 6646: {'lr': 0.0004885307054742151, 'samples': 3403264, 'steps': 6646, 'loss/train': 1.755401611328125} +02/24/2022 10:10:30 - INFO - codeparrot_training - Step 6647: {'lr': 0.0004885258057886861, 'samples': 3403776, 'steps': 6647, 'loss/train': 1.2887043952941895} +02/24/2022 10:10:36 - INFO - codeparrot_training - Step 6648: {'lr': 0.0004885209050813882, 'samples': 3404288, 'steps': 6648, 'loss/train': 1.9484624862670898} +02/24/2022 10:10:39 - INFO - codeparrot_training - Step 6649: {'lr': 0.0004885160033523426, 'samples': 3404800, 'steps': 6649, 'loss/train': 1.7089738845825195} +02/24/2022 10:10:45 - INFO - codeparrot_training - Step 6650: {'lr': 0.0004885111006015701, 'samples': 3405312, 'steps': 6650, 'loss/train': 1.8647420406341553} +02/24/2022 10:10:48 - INFO - codeparrot_training - Step 6651: {'lr': 0.0004885061968290919, 'samples': 3405824, 'steps': 6651, 'loss/train': 1.6634795665740967} +02/24/2022 10:10:54 - INFO - codeparrot_training - Step 6652: {'lr': 0.0004885012920349287, 'samples': 3406336, 'steps': 6652, 'loss/train': 1.3363686800003052} +02/24/2022 10:10:58 - INFO - codeparrot_training - Step 6653: {'lr': 0.0004884963862191018, 'samples': 3406848, 'steps': 6653, 'loss/train': 0.18189160525798798} +02/24/2022 10:11:03 - INFO - codeparrot_training - Step 6654: {'lr': 0.0004884914793816321, 'samples': 3407360, 'steps': 6654, 'loss/train': 3.6349596977233887} +02/24/2022 10:11:07 - INFO - codeparrot_training - Step 6655: {'lr': 0.0004884865715225407, 'samples': 3407872, 'steps': 6655, 'loss/train': 1.5691801309585571} +02/24/2022 10:11:13 - INFO - codeparrot_training - Step 6656: {'lr': 0.0004884816626418484, 'samples': 3408384, 'steps': 6656, 'loss/train': 3.0810370445251465} +02/24/2022 10:11:17 - INFO - codeparrot_training - Step 6657: {'lr': 0.0004884767527395765, 'samples': 3408896, 'steps': 6657, 'loss/train': 3.0394320487976074} +02/24/2022 10:11:22 - INFO - codeparrot_training - Step 6658: {'lr': 0.0004884718418157459, 'samples': 3409408, 'steps': 6658, 'loss/train': 2.349281072616577} +02/24/2022 10:11:28 - INFO - codeparrot_training - Step 6659: {'lr': 0.0004884669298703775, 'samples': 3409920, 'steps': 6659, 'loss/train': 1.5560840368270874} +02/24/2022 10:11:32 - INFO - codeparrot_training - Step 6660: {'lr': 0.0004884620169034927, 'samples': 3410432, 'steps': 6660, 'loss/train': 2.6041526794433594} +02/24/2022 10:11:35 - INFO - codeparrot_training - Step 6661: {'lr': 0.0004884571029151123, 'samples': 3410944, 'steps': 6661, 'loss/train': 3.0832126140594482} +02/24/2022 10:11:41 - INFO - codeparrot_training - Step 6662: {'lr': 0.0004884521879052573, 'samples': 3411456, 'steps': 6662, 'loss/train': 2.4900426864624023} +02/24/2022 10:11:44 - INFO - codeparrot_training - Step 6663: {'lr': 0.000488447271873949, 'samples': 3411968, 'steps': 6663, 'loss/train': 3.26607084274292} +02/24/2022 10:11:50 - INFO - codeparrot_training - Step 6664: {'lr': 0.0004884423548212082, 'samples': 3412480, 'steps': 6664, 'loss/train': 3.2601382732391357} +02/24/2022 10:11:53 - INFO - codeparrot_training - Step 6665: {'lr': 0.000488437436747056, 'samples': 3412992, 'steps': 6665, 'loss/train': 0.47087666392326355} +02/24/2022 10:11:59 - INFO - codeparrot_training - Step 6666: {'lr': 0.0004884325176515137, 'samples': 3413504, 'steps': 6666, 'loss/train': 3.041734218597412} +02/24/2022 10:12:02 - INFO - codeparrot_training - Step 6667: {'lr': 0.000488427597534602, 'samples': 3414016, 'steps': 6667, 'loss/train': 1.6828534603118896} +02/24/2022 10:12:09 - INFO - codeparrot_training - Step 6668: {'lr': 0.0004884226763963423, 'samples': 3414528, 'steps': 6668, 'loss/train': 2.149714946746826} +02/24/2022 10:12:12 - INFO - codeparrot_training - Step 6669: {'lr': 0.0004884177542367556, 'samples': 3415040, 'steps': 6669, 'loss/train': 1.6940264701843262} +02/24/2022 10:12:18 - INFO - codeparrot_training - Step 6670: {'lr': 0.0004884128310558628, 'samples': 3415552, 'steps': 6670, 'loss/train': 1.0381897687911987} +02/24/2022 10:12:21 - INFO - codeparrot_training - Step 6671: {'lr': 0.0004884079068536853, 'samples': 3416064, 'steps': 6671, 'loss/train': 2.336825132369995} +02/24/2022 10:12:27 - INFO - codeparrot_training - Step 6672: {'lr': 0.000488402981630244, 'samples': 3416576, 'steps': 6672, 'loss/train': 2.4532577991485596} +02/24/2022 10:12:30 - INFO - codeparrot_training - Step 6673: {'lr': 0.00048839805538556, 'samples': 3417088, 'steps': 6673, 'loss/train': 2.9381401538848877} +02/24/2022 10:12:36 - INFO - codeparrot_training - Step 6674: {'lr': 0.0004883931281196544, 'samples': 3417600, 'steps': 6674, 'loss/train': 2.611172914505005} +02/24/2022 10:12:39 - INFO - codeparrot_training - Step 6675: {'lr': 0.0004883881998325484, 'samples': 3418112, 'steps': 6675, 'loss/train': 3.408902883529663} +02/24/2022 10:12:45 - INFO - codeparrot_training - Step 6676: {'lr': 0.000488383270524263, 'samples': 3418624, 'steps': 6676, 'loss/train': 1.6392360925674438} +02/24/2022 10:12:49 - INFO - codeparrot_training - Step 6677: {'lr': 0.0004883783401948194, 'samples': 3419136, 'steps': 6677, 'loss/train': 1.9712547063827515} +02/24/2022 10:12:54 - INFO - codeparrot_training - Step 6678: {'lr': 0.0004883734088442387, 'samples': 3419648, 'steps': 6678, 'loss/train': 2.7229816913604736} +02/24/2022 10:12:58 - INFO - codeparrot_training - Step 6679: {'lr': 0.0004883684764725419, 'samples': 3420160, 'steps': 6679, 'loss/train': 2.0310049057006836} +02/24/2022 10:13:04 - INFO - codeparrot_training - Step 6680: {'lr': 0.0004883635430797502, 'samples': 3420672, 'steps': 6680, 'loss/train': 2.5254099369049072} +02/24/2022 10:13:07 - INFO - codeparrot_training - Step 6681: {'lr': 0.000488358608665885, 'samples': 3421184, 'steps': 6681, 'loss/train': 2.7194700241088867} +02/24/2022 10:13:13 - INFO - codeparrot_training - Step 6682: {'lr': 0.000488353673230967, 'samples': 3421696, 'steps': 6682, 'loss/train': 2.399972677230835} +02/24/2022 10:13:16 - INFO - codeparrot_training - Step 6683: {'lr': 0.0004883487367750177, 'samples': 3422208, 'steps': 6683, 'loss/train': 2.159799098968506} +02/24/2022 10:13:22 - INFO - codeparrot_training - Step 6684: {'lr': 0.0004883437992980581, 'samples': 3422720, 'steps': 6684, 'loss/train': 1.2849992513656616} +02/24/2022 10:13:25 - INFO - codeparrot_training - Step 6685: {'lr': 0.0004883388608001093, 'samples': 3423232, 'steps': 6685, 'loss/train': 2.4541096687316895} +02/24/2022 10:13:31 - INFO - codeparrot_training - Step 6686: {'lr': 0.0004883339212811924, 'samples': 3423744, 'steps': 6686, 'loss/train': 1.4725836515426636} +02/24/2022 10:13:35 - INFO - codeparrot_training - Step 6687: {'lr': 0.0004883289807413288, 'samples': 3424256, 'steps': 6687, 'loss/train': 2.3076071739196777} +02/24/2022 10:13:40 - INFO - codeparrot_training - Step 6688: {'lr': 0.0004883240391805394, 'samples': 3424768, 'steps': 6688, 'loss/train': 3.1735002994537354} +02/24/2022 10:13:44 - INFO - codeparrot_training - Step 6689: {'lr': 0.0004883190965988455, 'samples': 3425280, 'steps': 6689, 'loss/train': 2.6082816123962402} +02/24/2022 10:13:49 - INFO - codeparrot_training - Step 6690: {'lr': 0.0004883141529962683, 'samples': 3425792, 'steps': 6690, 'loss/train': 2.8360228538513184} +02/24/2022 10:13:52 - INFO - codeparrot_training - Step 6691: {'lr': 0.000488309208372829, 'samples': 3426304, 'steps': 6691, 'loss/train': 2.1867799758911133} +02/24/2022 10:13:58 - INFO - codeparrot_training - Step 6692: {'lr': 0.0004883042627285488, 'samples': 3426816, 'steps': 6692, 'loss/train': 0.40237507224082947} +02/24/2022 10:14:02 - INFO - codeparrot_training - Step 6693: {'lr': 0.0004882993160634487, 'samples': 3427328, 'steps': 6693, 'loss/train': 1.9413496255874634} +02/24/2022 10:14:07 - INFO - codeparrot_training - Step 6694: {'lr': 0.0004882943683775499, 'samples': 3427840, 'steps': 6694, 'loss/train': 0.8966919183731079} +02/24/2022 10:14:11 - INFO - codeparrot_training - Step 6695: {'lr': 0.0004882894196708738, 'samples': 3428352, 'steps': 6695, 'loss/train': 3.11983585357666} +02/24/2022 10:14:16 - INFO - codeparrot_training - Step 6696: {'lr': 0.0004882844699434415, 'samples': 3428864, 'steps': 6696, 'loss/train': 3.249206066131592} +02/24/2022 10:14:20 - INFO - codeparrot_training - Step 6697: {'lr': 0.0004882795191952741, 'samples': 3429376, 'steps': 6697, 'loss/train': 1.4275602102279663} +02/24/2022 10:14:25 - INFO - codeparrot_training - Step 6698: {'lr': 0.0004882745674263931, 'samples': 3429888, 'steps': 6698, 'loss/train': 1.953503131866455} +02/24/2022 10:14:29 - INFO - codeparrot_training - Step 6699: {'lr': 0.00048826961463681936, 'samples': 3430400, 'steps': 6699, 'loss/train': 2.1883325576782227} +02/24/2022 10:14:34 - INFO - codeparrot_training - Step 6700: {'lr': 0.00048826466082657426, 'samples': 3430912, 'steps': 6700, 'loss/train': 2.350893020629883} +02/24/2022 10:14:38 - INFO - codeparrot_training - Step 6701: {'lr': 0.000488259705995679, 'samples': 3431424, 'steps': 6701, 'loss/train': 2.4833803176879883} +02/24/2022 10:14:44 - INFO - codeparrot_training - Step 6702: {'lr': 0.0004882547501441549, 'samples': 3431936, 'steps': 6702, 'loss/train': 2.158377170562744} +02/24/2022 10:14:50 - INFO - codeparrot_training - Step 6703: {'lr': 0.000488249793272023, 'samples': 3432448, 'steps': 6703, 'loss/train': 1.896710991859436} +02/24/2022 10:14:53 - INFO - codeparrot_training - Step 6704: {'lr': 0.0004882448353793048, 'samples': 3432960, 'steps': 6704, 'loss/train': 0.7881848812103271} +02/24/2022 10:14:59 - INFO - codeparrot_training - Step 6705: {'lr': 0.0004882398764660212, 'samples': 3433472, 'steps': 6705, 'loss/train': 2.688445568084717} +02/24/2022 10:15:02 - INFO - codeparrot_training - Step 6706: {'lr': 0.00048823491653219366, 'samples': 3433984, 'steps': 6706, 'loss/train': 2.4517712593078613} +02/24/2022 10:15:08 - INFO - codeparrot_training - Step 6707: {'lr': 0.00048822995557784343, 'samples': 3434496, 'steps': 6707, 'loss/train': 2.814756393432617} +02/24/2022 10:15:11 - INFO - codeparrot_training - Step 6708: {'lr': 0.00048822499360299165, 'samples': 3435008, 'steps': 6708, 'loss/train': 3.1927785873413086} +02/24/2022 10:15:17 - INFO - codeparrot_training - Step 6709: {'lr': 0.00048822003060765973, 'samples': 3435520, 'steps': 6709, 'loss/train': 1.3512688875198364} +02/24/2022 10:15:20 - INFO - codeparrot_training - Step 6710: {'lr': 0.00048821506659186875, 'samples': 3436032, 'steps': 6710, 'loss/train': 3.470193862915039} +02/24/2022 10:15:26 - INFO - codeparrot_training - Step 6711: {'lr': 0.0004882101015556402, 'samples': 3436544, 'steps': 6711, 'loss/train': 2.6784918308258057} +02/24/2022 10:15:29 - INFO - codeparrot_training - Step 6712: {'lr': 0.00048820513549899507, 'samples': 3437056, 'steps': 6712, 'loss/train': 3.514265537261963} +02/24/2022 10:15:33 - INFO - codeparrot_training - Step 6713: {'lr': 0.00048820016842195487, 'samples': 3437568, 'steps': 6713, 'loss/train': 2.6792969703674316} +02/24/2022 10:15:39 - INFO - codeparrot_training - Step 6714: {'lr': 0.0004881952003245408, 'samples': 3438080, 'steps': 6714, 'loss/train': 2.1285791397094727} +02/24/2022 10:15:42 - INFO - codeparrot_training - Step 6715: {'lr': 0.00048819023120677405, 'samples': 3438592, 'steps': 6715, 'loss/train': 1.4544459581375122} +02/24/2022 10:15:48 - INFO - codeparrot_training - Step 6716: {'lr': 0.000488185261068676, 'samples': 3439104, 'steps': 6716, 'loss/train': 2.244325637817383} +02/24/2022 10:15:54 - INFO - codeparrot_training - Step 6717: {'lr': 0.000488180289910268, 'samples': 3439616, 'steps': 6717, 'loss/train': 3.0336530208587646} +02/24/2022 10:15:57 - INFO - codeparrot_training - Step 6718: {'lr': 0.0004881753177315711, 'samples': 3440128, 'steps': 6718, 'loss/train': 2.2515945434570312} +02/24/2022 10:16:03 - INFO - codeparrot_training - Step 6719: {'lr': 0.0004881703445326069, 'samples': 3440640, 'steps': 6719, 'loss/train': 3.2806403636932373} +02/24/2022 10:16:06 - INFO - codeparrot_training - Step 6720: {'lr': 0.0004881653703133966, 'samples': 3441152, 'steps': 6720, 'loss/train': 3.607123851776123} +02/24/2022 10:16:12 - INFO - codeparrot_training - Step 6721: {'lr': 0.00048816039507396135, 'samples': 3441664, 'steps': 6721, 'loss/train': 2.0291852951049805} +02/24/2022 10:16:15 - INFO - codeparrot_training - Step 6722: {'lr': 0.00048815541881432273, 'samples': 3442176, 'steps': 6722, 'loss/train': 0.6921824812889099} +02/24/2022 10:16:22 - INFO - codeparrot_training - Step 6723: {'lr': 0.00048815044153450185, 'samples': 3442688, 'steps': 6723, 'loss/train': 2.089301586151123} +02/24/2022 10:16:25 - INFO - codeparrot_training - Step 6724: {'lr': 0.00048814546323452013, 'samples': 3443200, 'steps': 6724, 'loss/train': 3.177513360977173} +02/24/2022 10:16:31 - INFO - codeparrot_training - Step 6725: {'lr': 0.0004881404839143988, 'samples': 3443712, 'steps': 6725, 'loss/train': 2.178401231765747} +02/24/2022 10:16:34 - INFO - codeparrot_training - Step 6726: {'lr': 0.00048813550357415937, 'samples': 3444224, 'steps': 6726, 'loss/train': 1.9097955226898193} +02/24/2022 10:16:40 - INFO - codeparrot_training - Step 6727: {'lr': 0.00048813052221382294, 'samples': 3444736, 'steps': 6727, 'loss/train': 1.7666443586349487} +02/24/2022 10:16:43 - INFO - codeparrot_training - Step 6728: {'lr': 0.000488125539833411, 'samples': 3445248, 'steps': 6728, 'loss/train': 2.45284366607666} +02/24/2022 10:16:49 - INFO - codeparrot_training - Step 6729: {'lr': 0.0004881205564329449, 'samples': 3445760, 'steps': 6729, 'loss/train': 3.046703338623047} +02/24/2022 10:16:52 - INFO - codeparrot_training - Step 6730: {'lr': 0.00048811557201244594, 'samples': 3446272, 'steps': 6730, 'loss/train': 2.918105363845825} +02/24/2022 10:16:58 - INFO - codeparrot_training - Step 6731: {'lr': 0.0004881105865719355, 'samples': 3446784, 'steps': 6731, 'loss/train': 3.2985289096832275} +02/24/2022 10:17:01 - INFO - codeparrot_training - Step 6732: {'lr': 0.00048810560011143485, 'samples': 3447296, 'steps': 6732, 'loss/train': 2.6072769165039062} +02/24/2022 10:17:07 - INFO - codeparrot_training - Step 6733: {'lr': 0.0004881006126309654, 'samples': 3447808, 'steps': 6733, 'loss/train': 1.818380355834961} +02/24/2022 10:17:10 - INFO - codeparrot_training - Step 6734: {'lr': 0.00048809562413054864, 'samples': 3448320, 'steps': 6734, 'loss/train': 2.0489141941070557} +02/24/2022 10:17:16 - INFO - codeparrot_training - Step 6735: {'lr': 0.00048809063461020575, 'samples': 3448832, 'steps': 6735, 'loss/train': 3.461740016937256} +02/24/2022 10:17:19 - INFO - codeparrot_training - Step 6736: {'lr': 0.0004880856440699582, 'samples': 3449344, 'steps': 6736, 'loss/train': 2.1043686866760254} +02/24/2022 10:17:25 - INFO - codeparrot_training - Step 6737: {'lr': 0.00048808065250982737, 'samples': 3449856, 'steps': 6737, 'loss/train': 2.4198501110076904} +02/24/2022 10:17:28 - INFO - codeparrot_training - Step 6738: {'lr': 0.0004880756599298346, 'samples': 3450368, 'steps': 6738, 'loss/train': 0.9834362268447876} +02/24/2022 10:17:34 - INFO - codeparrot_training - Step 6739: {'lr': 0.0004880706663300013, 'samples': 3450880, 'steps': 6739, 'loss/train': 1.7150791883468628} +02/24/2022 10:17:38 - INFO - codeparrot_training - Step 6740: {'lr': 0.0004880656717103489, 'samples': 3451392, 'steps': 6740, 'loss/train': 3.4452342987060547} +02/24/2022 10:17:43 - INFO - codeparrot_training - Step 6741: {'lr': 0.00048806067607089866, 'samples': 3451904, 'steps': 6741, 'loss/train': 2.5446107387542725} +02/24/2022 10:17:47 - INFO - codeparrot_training - Step 6742: {'lr': 0.00048805567941167215, 'samples': 3452416, 'steps': 6742, 'loss/train': 1.396004319190979} +02/24/2022 10:17:52 - INFO - codeparrot_training - Step 6743: {'lr': 0.0004880506817326907, 'samples': 3452928, 'steps': 6743, 'loss/train': 1.0757395029067993} +02/24/2022 10:17:56 - INFO - codeparrot_training - Step 6744: {'lr': 0.0004880456830339757, 'samples': 3453440, 'steps': 6744, 'loss/train': 4.272819519042969} +02/24/2022 10:18:01 - INFO - codeparrot_training - Step 6745: {'lr': 0.00048804068331554864, 'samples': 3453952, 'steps': 6745, 'loss/train': 2.1425302028656006} +02/24/2022 10:18:05 - INFO - codeparrot_training - Step 6746: {'lr': 0.00048803568257743083, 'samples': 3454464, 'steps': 6746, 'loss/train': 2.6007919311523438} +02/24/2022 10:18:10 - INFO - codeparrot_training - Step 6747: {'lr': 0.00048803068081964375, 'samples': 3454976, 'steps': 6747, 'loss/train': 2.067784309387207} +02/24/2022 10:18:14 - INFO - codeparrot_training - Step 6748: {'lr': 0.00048802567804220875, 'samples': 3455488, 'steps': 6748, 'loss/train': 2.053147554397583} +02/24/2022 10:18:20 - INFO - codeparrot_training - Step 6749: {'lr': 0.0004880206742451474, 'samples': 3456000, 'steps': 6749, 'loss/train': 1.5851526260375977} +02/24/2022 10:18:23 - INFO - codeparrot_training - Step 6750: {'lr': 0.0004880156694284811, 'samples': 3456512, 'steps': 6750, 'loss/train': 2.108826160430908} +02/24/2022 10:18:29 - INFO - codeparrot_training - Step 6751: {'lr': 0.00048801066359223117, 'samples': 3457024, 'steps': 6751, 'loss/train': 2.5202291011810303} +02/24/2022 10:18:32 - INFO - codeparrot_training - Step 6752: {'lr': 0.00048800565673641917, 'samples': 3457536, 'steps': 6752, 'loss/train': 2.684048652648926} +02/24/2022 10:18:38 - INFO - codeparrot_training - Step 6753: {'lr': 0.00048800064886106654, 'samples': 3458048, 'steps': 6753, 'loss/train': 2.449248790740967} +02/24/2022 10:18:41 - INFO - codeparrot_training - Step 6754: {'lr': 0.0004879956399661947, 'samples': 3458560, 'steps': 6754, 'loss/train': 2.1509690284729004} +02/24/2022 10:18:47 - INFO - codeparrot_training - Step 6755: {'lr': 0.000487990630051825, 'samples': 3459072, 'steps': 6755, 'loss/train': 2.5240941047668457} +02/24/2022 10:18:50 - INFO - codeparrot_training - Step 6756: {'lr': 0.00048798561911797913, 'samples': 3459584, 'steps': 6756, 'loss/train': 1.8330283164978027} +02/24/2022 10:18:56 - INFO - codeparrot_training - Step 6757: {'lr': 0.0004879806071646784, 'samples': 3460096, 'steps': 6757, 'loss/train': 3.353050470352173} +02/24/2022 10:18:59 - INFO - codeparrot_training - Step 6758: {'lr': 0.00048797559419194427, 'samples': 3460608, 'steps': 6758, 'loss/train': 1.622212290763855} +02/24/2022 10:19:06 - INFO - codeparrot_training - Step 6759: {'lr': 0.00048797058019979837, 'samples': 3461120, 'steps': 6759, 'loss/train': 1.6564418077468872} +02/24/2022 10:19:09 - INFO - codeparrot_training - Step 6760: {'lr': 0.00048796556518826195, 'samples': 3461632, 'steps': 6760, 'loss/train': 2.486419439315796} +02/24/2022 10:19:15 - INFO - codeparrot_training - Step 6761: {'lr': 0.00048796054915735664, 'samples': 3462144, 'steps': 6761, 'loss/train': 2.234741449356079} +02/24/2022 10:19:18 - INFO - codeparrot_training - Step 6762: {'lr': 0.00048795553210710397, 'samples': 3462656, 'steps': 6762, 'loss/train': 2.0524232387542725} +02/24/2022 10:19:24 - INFO - codeparrot_training - Step 6763: {'lr': 0.00048795051403752534, 'samples': 3463168, 'steps': 6763, 'loss/train': 2.742936372756958} +02/24/2022 10:19:27 - INFO - codeparrot_training - Step 6764: {'lr': 0.0004879454949486422, 'samples': 3463680, 'steps': 6764, 'loss/train': 2.63751220703125} +02/24/2022 10:19:33 - INFO - codeparrot_training - Step 6765: {'lr': 0.00048794047484047615, 'samples': 3464192, 'steps': 6765, 'loss/train': 3.1249783039093018} +02/24/2022 10:19:36 - INFO - codeparrot_training - Step 6766: {'lr': 0.00048793545371304863, 'samples': 3464704, 'steps': 6766, 'loss/train': 2.2070140838623047} +02/24/2022 10:19:42 - INFO - codeparrot_training - Step 6767: {'lr': 0.0004879304315663813, 'samples': 3465216, 'steps': 6767, 'loss/train': 2.943758249282837} +02/24/2022 10:19:45 - INFO - codeparrot_training - Step 6768: {'lr': 0.00048792540840049544, 'samples': 3465728, 'steps': 6768, 'loss/train': 1.2936748266220093} +02/24/2022 10:19:51 - INFO - codeparrot_training - Step 6769: {'lr': 0.00048792038421541266, 'samples': 3466240, 'steps': 6769, 'loss/train': 1.001870036125183} +02/24/2022 10:19:55 - INFO - codeparrot_training - Step 6770: {'lr': 0.0004879153590111546, 'samples': 3466752, 'steps': 6770, 'loss/train': 2.885420083999634} +02/24/2022 10:20:00 - INFO - codeparrot_training - Step 6771: {'lr': 0.0004879103327877426, 'samples': 3467264, 'steps': 6771, 'loss/train': 2.096390962600708} +02/24/2022 10:20:04 - INFO - codeparrot_training - Step 6772: {'lr': 0.0004879053055451983, 'samples': 3467776, 'steps': 6772, 'loss/train': 2.433114528656006} +02/24/2022 10:20:09 - INFO - codeparrot_training - Step 6773: {'lr': 0.00048790027728354323, 'samples': 3468288, 'steps': 6773, 'loss/train': 2.698664903640747} +02/24/2022 10:20:12 - INFO - codeparrot_training - Step 6774: {'lr': 0.0004878952480027989, 'samples': 3468800, 'steps': 6774, 'loss/train': 2.100001573562622} +02/24/2022 10:20:18 - INFO - codeparrot_training - Step 6775: {'lr': 0.0004878902177029869, 'samples': 3469312, 'steps': 6775, 'loss/train': 2.992250919342041} +02/24/2022 10:20:21 - INFO - codeparrot_training - Step 6776: {'lr': 0.0004878851863841287, 'samples': 3469824, 'steps': 6776, 'loss/train': 1.8872079849243164} +02/24/2022 10:20:27 - INFO - codeparrot_training - Step 6777: {'lr': 0.00048788015404624597, 'samples': 3470336, 'steps': 6777, 'loss/train': 6.179102420806885} +02/24/2022 10:20:30 - INFO - codeparrot_training - Step 6778: {'lr': 0.0004878751206893601, 'samples': 3470848, 'steps': 6778, 'loss/train': 2.476759910583496} +02/24/2022 10:20:36 - INFO - codeparrot_training - Step 6779: {'lr': 0.0004878700863134928, 'samples': 3471360, 'steps': 6779, 'loss/train': 2.795654535293579} +02/24/2022 10:20:40 - INFO - codeparrot_training - Step 6780: {'lr': 0.00048786505091866564, 'samples': 3471872, 'steps': 6780, 'loss/train': 2.3990864753723145} +02/24/2022 10:20:45 - INFO - codeparrot_training - Step 6781: {'lr': 0.0004878600145049001, 'samples': 3472384, 'steps': 6781, 'loss/train': 1.354610562324524} +02/24/2022 10:20:51 - INFO - codeparrot_training - Step 6782: {'lr': 0.0004878549770722177, 'samples': 3472896, 'steps': 6782, 'loss/train': 2.642122983932495} +02/24/2022 10:20:54 - INFO - codeparrot_training - Step 6783: {'lr': 0.0004878499386206402, 'samples': 3473408, 'steps': 6783, 'loss/train': 2.210378885269165} +02/24/2022 10:20:58 - INFO - codeparrot_training - Step 6784: {'lr': 0.000487844899150189, 'samples': 3473920, 'steps': 6784, 'loss/train': 1.9647313356399536} +02/24/2022 10:21:04 - INFO - codeparrot_training - Step 6785: {'lr': 0.0004878398586608859, 'samples': 3474432, 'steps': 6785, 'loss/train': 2.1268482208251953} +02/24/2022 10:21:08 - INFO - codeparrot_training - Step 6786: {'lr': 0.0004878348171527523, 'samples': 3474944, 'steps': 6786, 'loss/train': 1.8752449750900269} +02/24/2022 10:21:13 - INFO - codeparrot_training - Step 6787: {'lr': 0.0004878297746258099, 'samples': 3475456, 'steps': 6787, 'loss/train': 4.299953460693359} +02/24/2022 10:21:19 - INFO - codeparrot_training - Step 6788: {'lr': 0.0004878247310800802, 'samples': 3475968, 'steps': 6788, 'loss/train': 2.0954201221466064} +02/24/2022 10:21:23 - INFO - codeparrot_training - Step 6789: {'lr': 0.0004878196865155849, 'samples': 3476480, 'steps': 6789, 'loss/train': 1.4868054389953613} +02/24/2022 10:21:26 - INFO - codeparrot_training - Step 6790: {'lr': 0.0004878146409323456, 'samples': 3476992, 'steps': 6790, 'loss/train': 2.5076186656951904} +02/24/2022 10:21:32 - INFO - codeparrot_training - Step 6791: {'lr': 0.00048780959433038386, 'samples': 3477504, 'steps': 6791, 'loss/train': 2.459852457046509} +02/24/2022 10:21:35 - INFO - codeparrot_training - Step 6792: {'lr': 0.00048780454670972127, 'samples': 3478016, 'steps': 6792, 'loss/train': 2.1706223487854004} +02/24/2022 10:21:41 - INFO - codeparrot_training - Step 6793: {'lr': 0.00048779949807037967, 'samples': 3478528, 'steps': 6793, 'loss/train': 2.4286062717437744} +02/24/2022 10:21:44 - INFO - codeparrot_training - Step 6794: {'lr': 0.0004877944484123804, 'samples': 3479040, 'steps': 6794, 'loss/train': 2.3741579055786133} +02/24/2022 10:21:51 - INFO - codeparrot_training - Step 6795: {'lr': 0.00048778939773574525, 'samples': 3479552, 'steps': 6795, 'loss/train': 3.1717588901519775} +02/24/2022 10:21:54 - INFO - codeparrot_training - Step 6796: {'lr': 0.0004877843460404959, 'samples': 3480064, 'steps': 6796, 'loss/train': 3.162328004837036} +02/24/2022 10:22:00 - INFO - codeparrot_training - Step 6797: {'lr': 0.00048777929332665385, 'samples': 3480576, 'steps': 6797, 'loss/train': 2.5856754779815674} +02/24/2022 10:22:03 - INFO - codeparrot_training - Step 6798: {'lr': 0.00048777423959424083, 'samples': 3481088, 'steps': 6798, 'loss/train': 3.2038838863372803} +02/24/2022 10:22:09 - INFO - codeparrot_training - Step 6799: {'lr': 0.00048776918484327847, 'samples': 3481600, 'steps': 6799, 'loss/train': 2.595067024230957} +02/24/2022 10:22:12 - INFO - codeparrot_training - Step 6800: {'lr': 0.0004877641290737884, 'samples': 3482112, 'steps': 6800, 'loss/train': 2.380418300628662} +02/24/2022 10:22:18 - INFO - codeparrot_training - Step 6801: {'lr': 0.0004877590722857923, 'samples': 3482624, 'steps': 6801, 'loss/train': 3.4098522663116455} +02/24/2022 10:22:21 - INFO - codeparrot_training - Step 6802: {'lr': 0.00048775401447931187, 'samples': 3483136, 'steps': 6802, 'loss/train': 1.665794014930725} +02/24/2022 10:22:27 - INFO - codeparrot_training - Step 6803: {'lr': 0.0004877489556543687, 'samples': 3483648, 'steps': 6803, 'loss/train': 1.0300036668777466} +02/24/2022 10:22:30 - INFO - codeparrot_training - Step 6804: {'lr': 0.00048774389581098454, 'samples': 3484160, 'steps': 6804, 'loss/train': 1.4774547815322876} +02/24/2022 10:22:36 - INFO - codeparrot_training - Step 6805: {'lr': 0.00048773883494918096, 'samples': 3484672, 'steps': 6805, 'loss/train': 0.5300992727279663} +02/24/2022 10:22:40 - INFO - codeparrot_training - Step 6806: {'lr': 0.0004877337730689797, 'samples': 3485184, 'steps': 6806, 'loss/train': 2.1091196537017822} +02/24/2022 10:22:45 - INFO - codeparrot_training - Step 6807: {'lr': 0.00048772871017040256, 'samples': 3485696, 'steps': 6807, 'loss/train': 0.8454818725585938} +02/24/2022 10:22:49 - INFO - codeparrot_training - Step 6808: {'lr': 0.000487723646253471, 'samples': 3486208, 'steps': 6808, 'loss/train': 2.7593982219696045} +02/24/2022 10:22:54 - INFO - codeparrot_training - Step 6809: {'lr': 0.00048771858131820684, 'samples': 3486720, 'steps': 6809, 'loss/train': 0.9625942707061768} +02/24/2022 10:22:58 - INFO - codeparrot_training - Step 6810: {'lr': 0.0004877135153646318, 'samples': 3487232, 'steps': 6810, 'loss/train': 2.464139938354492} +02/24/2022 10:23:03 - INFO - codeparrot_training - Step 6811: {'lr': 0.0004877084483927675, 'samples': 3487744, 'steps': 6811, 'loss/train': 2.0602457523345947} +02/24/2022 10:23:07 - INFO - codeparrot_training - Step 6812: {'lr': 0.00048770338040263574, 'samples': 3488256, 'steps': 6812, 'loss/train': 2.813868522644043} +02/24/2022 10:23:12 - INFO - codeparrot_training - Step 6813: {'lr': 0.00048769831139425815, 'samples': 3488768, 'steps': 6813, 'loss/train': 2.190279483795166} +02/24/2022 10:23:16 - INFO - codeparrot_training - Step 6814: {'lr': 0.0004876932413676565, 'samples': 3489280, 'steps': 6814, 'loss/train': 3.5951104164123535} +02/24/2022 10:23:22 - INFO - codeparrot_training - Step 6815: {'lr': 0.0004876881703228524, 'samples': 3489792, 'steps': 6815, 'loss/train': 1.5487090349197388} +02/24/2022 10:23:26 - INFO - codeparrot_training - Step 6816: {'lr': 0.0004876830982598677, 'samples': 3490304, 'steps': 6816, 'loss/train': 3.0427701473236084} +02/24/2022 10:23:31 - INFO - codeparrot_training - Step 6817: {'lr': 0.0004876780251787241, 'samples': 3490816, 'steps': 6817, 'loss/train': 2.7557144165039062} +02/24/2022 10:23:35 - INFO - codeparrot_training - Step 6818: {'lr': 0.0004876729510794433, 'samples': 3491328, 'steps': 6818, 'loss/train': 1.4410158395767212} +02/24/2022 10:23:40 - INFO - codeparrot_training - Step 6819: {'lr': 0.00048766787596204704, 'samples': 3491840, 'steps': 6819, 'loss/train': 2.8650803565979004} +02/24/2022 10:23:44 - INFO - codeparrot_training - Step 6820: {'lr': 0.000487662799826557, 'samples': 3492352, 'steps': 6820, 'loss/train': 3.0239808559417725} +02/24/2022 10:23:49 - INFO - codeparrot_training - Step 6821: {'lr': 0.00048765772267299513, 'samples': 3492864, 'steps': 6821, 'loss/train': 2.364173173904419} +02/24/2022 10:23:53 - INFO - codeparrot_training - Step 6822: {'lr': 0.00048765264450138297, 'samples': 3493376, 'steps': 6822, 'loss/train': 3.3968331813812256} +02/24/2022 10:23:58 - INFO - codeparrot_training - Step 6823: {'lr': 0.00048764756531174237, 'samples': 3493888, 'steps': 6823, 'loss/train': 2.6022391319274902} +02/24/2022 10:24:02 - INFO - codeparrot_training - Step 6824: {'lr': 0.000487642485104095, 'samples': 3494400, 'steps': 6824, 'loss/train': 1.6639845371246338} +02/24/2022 10:24:07 - INFO - codeparrot_training - Step 6825: {'lr': 0.0004876374038784627, 'samples': 3494912, 'steps': 6825, 'loss/train': 0.5440935492515564} +02/24/2022 10:24:11 - INFO - codeparrot_training - Step 6826: {'lr': 0.0004876323216348673, 'samples': 3495424, 'steps': 6826, 'loss/train': 1.8696203231811523} +02/24/2022 10:24:16 - INFO - codeparrot_training - Step 6827: {'lr': 0.0004876272383733304, 'samples': 3495936, 'steps': 6827, 'loss/train': 1.1684650182724} +02/24/2022 10:24:20 - INFO - codeparrot_training - Step 6828: {'lr': 0.0004876221540938739, 'samples': 3496448, 'steps': 6828, 'loss/train': 2.7044548988342285} +02/24/2022 10:24:26 - INFO - codeparrot_training - Step 6829: {'lr': 0.00048761706879651956, 'samples': 3496960, 'steps': 6829, 'loss/train': 2.303992748260498} +02/24/2022 10:24:31 - INFO - codeparrot_training - Step 6830: {'lr': 0.00048761198248128913, 'samples': 3497472, 'steps': 6830, 'loss/train': 2.1266443729400635} +02/24/2022 10:24:35 - INFO - codeparrot_training - Step 6831: {'lr': 0.00048760689514820444, 'samples': 3497984, 'steps': 6831, 'loss/train': 3.373574733734131} +02/24/2022 10:24:38 - INFO - codeparrot_training - Step 6832: {'lr': 0.0004876018067972872, 'samples': 3498496, 'steps': 6832, 'loss/train': 2.0352985858917236} +02/24/2022 10:24:44 - INFO - codeparrot_training - Step 6833: {'lr': 0.00048759671742855935, 'samples': 3499008, 'steps': 6833, 'loss/train': 2.7940714359283447} +02/24/2022 10:24:48 - INFO - codeparrot_training - Step 6834: {'lr': 0.00048759162704204253, 'samples': 3499520, 'steps': 6834, 'loss/train': 2.209381341934204} +02/24/2022 10:24:53 - INFO - codeparrot_training - Step 6835: {'lr': 0.0004875865356377587, 'samples': 3500032, 'steps': 6835, 'loss/train': 3.131401538848877} +02/24/2022 10:24:57 - INFO - codeparrot_training - Step 6836: {'lr': 0.0004875814432157295, 'samples': 3500544, 'steps': 6836, 'loss/train': 1.1384847164154053} +02/24/2022 10:25:02 - INFO - codeparrot_training - Step 6837: {'lr': 0.0004875763497759769, 'samples': 3501056, 'steps': 6837, 'loss/train': 2.843522071838379} +02/24/2022 10:25:06 - INFO - codeparrot_training - Step 6838: {'lr': 0.00048757125531852263, 'samples': 3501568, 'steps': 6838, 'loss/train': 1.1262342929840088} +02/24/2022 10:25:11 - INFO - codeparrot_training - Step 6839: {'lr': 0.00048756615984338857, 'samples': 3502080, 'steps': 6839, 'loss/train': 2.284484386444092} +02/24/2022 10:25:15 - INFO - codeparrot_training - Step 6840: {'lr': 0.0004875610633505965, 'samples': 3502592, 'steps': 6840, 'loss/train': 3.321861505508423} +02/24/2022 10:25:20 - INFO - codeparrot_training - Step 6841: {'lr': 0.00048755596584016824, 'samples': 3503104, 'steps': 6841, 'loss/train': 1.028484582901001} +02/24/2022 10:25:24 - INFO - codeparrot_training - Step 6842: {'lr': 0.0004875508673121257, 'samples': 3503616, 'steps': 6842, 'loss/train': 4.017027854919434} +02/24/2022 10:25:30 - INFO - codeparrot_training - Step 6843: {'lr': 0.00048754576776649066, 'samples': 3504128, 'steps': 6843, 'loss/train': 2.2528154850006104} +02/24/2022 10:25:34 - INFO - codeparrot_training - Step 6844: {'lr': 0.000487540667203285, 'samples': 3504640, 'steps': 6844, 'loss/train': 2.3947060108184814} +02/24/2022 10:25:39 - INFO - codeparrot_training - Step 6845: {'lr': 0.0004875355656225305, 'samples': 3505152, 'steps': 6845, 'loss/train': 3.461034059524536} +02/24/2022 10:25:43 - INFO - codeparrot_training - Step 6846: {'lr': 0.0004875304630242491, 'samples': 3505664, 'steps': 6846, 'loss/train': 1.7348238229751587} +02/24/2022 10:25:48 - INFO - codeparrot_training - Step 6847: {'lr': 0.00048752535940846267, 'samples': 3506176, 'steps': 6847, 'loss/train': 2.5499794483184814} +02/24/2022 10:25:54 - INFO - codeparrot_training - Step 6848: {'lr': 0.0004875202547751929, 'samples': 3506688, 'steps': 6848, 'loss/train': 0.7180147767066956} +02/24/2022 10:25:57 - INFO - codeparrot_training - Step 6849: {'lr': 0.00048751514912446185, 'samples': 3507200, 'steps': 6849, 'loss/train': 1.0598224401474} +02/24/2022 10:26:03 - INFO - codeparrot_training - Step 6850: {'lr': 0.0004875100424562914, 'samples': 3507712, 'steps': 6850, 'loss/train': 2.8479793071746826} +02/24/2022 10:26:06 - INFO - codeparrot_training - Step 6851: {'lr': 0.0004875049347707032, 'samples': 3508224, 'steps': 6851, 'loss/train': 1.7459993362426758} +02/24/2022 10:26:10 - INFO - codeparrot_training - Step 6852: {'lr': 0.00048749982606771934, 'samples': 3508736, 'steps': 6852, 'loss/train': 1.103472113609314} +02/24/2022 10:26:17 - INFO - codeparrot_training - Step 6853: {'lr': 0.00048749471634736163, 'samples': 3509248, 'steps': 6853, 'loss/train': 2.467160224914551} +02/24/2022 10:26:20 - INFO - codeparrot_training - Step 6854: {'lr': 0.0004874896056096521, 'samples': 3509760, 'steps': 6854, 'loss/train': 2.8990769386291504} +02/24/2022 10:26:26 - INFO - codeparrot_training - Step 6855: {'lr': 0.0004874844938546123, 'samples': 3510272, 'steps': 6855, 'loss/train': 1.8433427810668945} +02/24/2022 10:26:29 - INFO - codeparrot_training - Step 6856: {'lr': 0.0004874793810822644, 'samples': 3510784, 'steps': 6856, 'loss/train': 2.625093698501587} +02/24/2022 10:26:35 - INFO - codeparrot_training - Step 6857: {'lr': 0.00048747426729263036, 'samples': 3511296, 'steps': 6857, 'loss/train': 2.515077829360962} +02/24/2022 10:26:38 - INFO - codeparrot_training - Step 6858: {'lr': 0.0004874691524857318, 'samples': 3511808, 'steps': 6858, 'loss/train': 2.274050235748291} +02/24/2022 10:26:44 - INFO - codeparrot_training - Step 6859: {'lr': 0.00048746403666159087, 'samples': 3512320, 'steps': 6859, 'loss/train': 3.2021071910858154} +02/24/2022 10:26:47 - INFO - codeparrot_training - Step 6860: {'lr': 0.0004874589198202294, 'samples': 3512832, 'steps': 6860, 'loss/train': 2.418423652648926} +02/24/2022 10:26:52 - INFO - codeparrot_training - Step 6861: {'lr': 0.0004874538019616693, 'samples': 3513344, 'steps': 6861, 'loss/train': 2.798901081085205} +02/24/2022 10:26:56 - INFO - codeparrot_training - Step 6862: {'lr': 0.0004874486830859326, 'samples': 3513856, 'steps': 6862, 'loss/train': 2.0700607299804688} +02/24/2022 10:27:03 - INFO - codeparrot_training - Step 6863: {'lr': 0.0004874435631930411, 'samples': 3514368, 'steps': 6863, 'loss/train': 3.3550164699554443} +02/24/2022 10:27:08 - INFO - codeparrot_training - Step 6864: {'lr': 0.0004874384422830167, 'samples': 3514880, 'steps': 6864, 'loss/train': 1.916977047920227} +02/24/2022 10:27:12 - INFO - codeparrot_training - Step 6865: {'lr': 0.0004874333203558815, 'samples': 3515392, 'steps': 6865, 'loss/train': 2.3820817470550537} +02/24/2022 10:27:15 - INFO - codeparrot_training - Step 6866: {'lr': 0.0004874281974116573, 'samples': 3515904, 'steps': 6866, 'loss/train': 2.2846484184265137} +02/24/2022 10:27:21 - INFO - codeparrot_training - Step 6867: {'lr': 0.0004874230734503661, 'samples': 3516416, 'steps': 6867, 'loss/train': 1.8415343761444092} +02/24/2022 10:27:24 - INFO - codeparrot_training - Step 6868: {'lr': 0.00048741794847202984, 'samples': 3516928, 'steps': 6868, 'loss/train': 6.344202518463135} +02/24/2022 10:27:30 - INFO - codeparrot_training - Step 6869: {'lr': 0.00048741282247667054, 'samples': 3517440, 'steps': 6869, 'loss/train': 3.0049126148223877} +02/24/2022 10:27:33 - INFO - codeparrot_training - Step 6870: {'lr': 0.00048740769546431, 'samples': 3517952, 'steps': 6870, 'loss/train': 2.6367475986480713} +02/24/2022 10:27:39 - INFO - codeparrot_training - Step 6871: {'lr': 0.0004874025674349704, 'samples': 3518464, 'steps': 6871, 'loss/train': 1.9336625337600708} +02/24/2022 10:27:42 - INFO - codeparrot_training - Step 6872: {'lr': 0.00048739743838867344, 'samples': 3518976, 'steps': 6872, 'loss/train': 2.931097984313965} +02/24/2022 10:27:48 - INFO - codeparrot_training - Step 6873: {'lr': 0.0004873923083254413, 'samples': 3519488, 'steps': 6873, 'loss/train': 0.9362912774085999} +02/24/2022 10:27:54 - INFO - codeparrot_training - Step 6874: {'lr': 0.0004873871772452959, 'samples': 3520000, 'steps': 6874, 'loss/train': 0.2874855101108551} +02/24/2022 10:27:57 - INFO - codeparrot_training - Step 6875: {'lr': 0.00048738204514825917, 'samples': 3520512, 'steps': 6875, 'loss/train': 2.248539686203003} +02/24/2022 10:28:04 - INFO - codeparrot_training - Step 6876: {'lr': 0.0004873769120343532, 'samples': 3521024, 'steps': 6876, 'loss/train': 2.1301231384277344} +02/24/2022 10:28:07 - INFO - codeparrot_training - Step 6877: {'lr': 0.0004873717779035999, 'samples': 3521536, 'steps': 6877, 'loss/train': 1.4798696041107178} +02/24/2022 10:28:13 - INFO - codeparrot_training - Step 6878: {'lr': 0.00048736664275602124, 'samples': 3522048, 'steps': 6878, 'loss/train': 2.549281120300293} +02/24/2022 10:28:17 - INFO - codeparrot_training - Step 6879: {'lr': 0.00048736150659163925, 'samples': 3522560, 'steps': 6879, 'loss/train': 2.0744125843048096} +02/24/2022 10:28:20 - INFO - codeparrot_training - Step 6880: {'lr': 0.000487356369410476, 'samples': 3523072, 'steps': 6880, 'loss/train': 3.563528299331665} +02/24/2022 10:28:26 - INFO - codeparrot_training - Step 6881: {'lr': 0.00048735123121255335, 'samples': 3523584, 'steps': 6881, 'loss/train': 1.9544663429260254} +02/24/2022 10:28:29 - INFO - codeparrot_training - Step 6882: {'lr': 0.0004873460919978935, 'samples': 3524096, 'steps': 6882, 'loss/train': 2.161992311477661} +02/24/2022 10:28:35 - INFO - codeparrot_training - Step 6883: {'lr': 0.00048734095176651825, 'samples': 3524608, 'steps': 6883, 'loss/train': 2.713358163833618} +02/24/2022 10:28:38 - INFO - codeparrot_training - Step 6884: {'lr': 0.00048733581051844976, 'samples': 3525120, 'steps': 6884, 'loss/train': 3.582826614379883} +02/24/2022 10:28:44 - INFO - codeparrot_training - Step 6885: {'lr': 0.0004873306682537101, 'samples': 3525632, 'steps': 6885, 'loss/train': 2.634448766708374} +02/24/2022 10:28:47 - INFO - codeparrot_training - Step 6886: {'lr': 0.0004873255249723211, 'samples': 3526144, 'steps': 6886, 'loss/train': 2.45340895652771} +02/24/2022 10:28:53 - INFO - codeparrot_training - Step 6887: {'lr': 0.000487320380674305, 'samples': 3526656, 'steps': 6887, 'loss/train': 2.3462326526641846} +02/24/2022 10:28:56 - INFO - codeparrot_training - Step 6888: {'lr': 0.0004873152353596837, 'samples': 3527168, 'steps': 6888, 'loss/train': 1.6121147871017456} +02/24/2022 10:29:02 - INFO - codeparrot_training - Step 6889: {'lr': 0.00048731008902847927, 'samples': 3527680, 'steps': 6889, 'loss/train': 2.879335641860962} +02/24/2022 10:29:06 - INFO - codeparrot_training - Step 6890: {'lr': 0.0004873049416807138, 'samples': 3528192, 'steps': 6890, 'loss/train': 2.384963274002075} +02/24/2022 10:29:11 - INFO - codeparrot_training - Step 6891: {'lr': 0.00048729979331640927, 'samples': 3528704, 'steps': 6891, 'loss/train': 2.614638566970825} +02/24/2022 10:29:15 - INFO - codeparrot_training - Step 6892: {'lr': 0.0004872946439355879, 'samples': 3529216, 'steps': 6892, 'loss/train': 2.283127546310425} +02/24/2022 10:29:20 - INFO - codeparrot_training - Step 6893: {'lr': 0.0004872894935382715, 'samples': 3529728, 'steps': 6893, 'loss/train': 2.558030366897583} +02/24/2022 10:29:24 - INFO - codeparrot_training - Step 6894: {'lr': 0.00048728434212448233, 'samples': 3530240, 'steps': 6894, 'loss/train': 2.4514951705932617} +02/24/2022 10:29:29 - INFO - codeparrot_training - Step 6895: {'lr': 0.0004872791896942423, 'samples': 3530752, 'steps': 6895, 'loss/train': 1.9218103885650635} +02/24/2022 10:29:33 - INFO - codeparrot_training - Step 6896: {'lr': 0.0004872740362475737, 'samples': 3531264, 'steps': 6896, 'loss/train': 2.0093512535095215} +02/24/2022 10:29:38 - INFO - codeparrot_training - Step 6897: {'lr': 0.00048726888178449835, 'samples': 3531776, 'steps': 6897, 'loss/train': 1.5521132946014404} +02/24/2022 10:29:44 - INFO - codeparrot_training - Step 6898: {'lr': 0.00048726372630503845, 'samples': 3532288, 'steps': 6898, 'loss/train': 3.6632180213928223} +02/24/2022 10:29:48 - INFO - codeparrot_training - Step 6899: {'lr': 0.00048725856980921616, 'samples': 3532800, 'steps': 6899, 'loss/train': 1.849155306816101} +02/24/2022 10:29:53 - INFO - codeparrot_training - Step 6900: {'lr': 0.0004872534122970535, 'samples': 3533312, 'steps': 6900, 'loss/train': 0.6469098925590515} +02/24/2022 10:29:57 - INFO - codeparrot_training - Step 6901: {'lr': 0.00048724825376857253, 'samples': 3533824, 'steps': 6901, 'loss/train': 1.8186110258102417} +02/24/2022 10:30:03 - INFO - codeparrot_training - Step 6902: {'lr': 0.0004872430942237953, 'samples': 3534336, 'steps': 6902, 'loss/train': 1.0362597703933716} +02/24/2022 10:30:06 - INFO - codeparrot_training - Step 6903: {'lr': 0.0004872379336627441, 'samples': 3534848, 'steps': 6903, 'loss/train': 2.4244754314422607} +02/24/2022 10:30:11 - INFO - codeparrot_training - Step 6904: {'lr': 0.0004872327720854409, 'samples': 3535360, 'steps': 6904, 'loss/train': 1.561259388923645} +02/24/2022 10:30:15 - INFO - codeparrot_training - Step 6905: {'lr': 0.0004872276094919078, 'samples': 3535872, 'steps': 6905, 'loss/train': 2.42621111869812} +02/24/2022 10:30:20 - INFO - codeparrot_training - Step 6906: {'lr': 0.00048722244588216695, 'samples': 3536384, 'steps': 6906, 'loss/train': 2.059445381164551} +02/24/2022 10:30:24 - INFO - codeparrot_training - Step 6907: {'lr': 0.00048721728125624054, 'samples': 3536896, 'steps': 6907, 'loss/train': 2.377080202102661} +02/24/2022 10:30:30 - INFO - codeparrot_training - Step 6908: {'lr': 0.0004872121156141506, 'samples': 3537408, 'steps': 6908, 'loss/train': 0.697849452495575} +02/24/2022 10:30:33 - INFO - codeparrot_training - Step 6909: {'lr': 0.0004872069489559192, 'samples': 3537920, 'steps': 6909, 'loss/train': 2.2627804279327393} +02/24/2022 10:30:37 - INFO - codeparrot_training - Step 6910: {'lr': 0.00048720178128156856, 'samples': 3538432, 'steps': 6910, 'loss/train': 2.5727944374084473} +02/24/2022 10:30:42 - INFO - codeparrot_training - Step 6911: {'lr': 0.00048719661259112086, 'samples': 3538944, 'steps': 6911, 'loss/train': 1.2028506994247437} +02/24/2022 10:30:46 - INFO - codeparrot_training - Step 6912: {'lr': 0.0004871914428845982, 'samples': 3539456, 'steps': 6912, 'loss/train': 3.0690088272094727} +02/24/2022 10:30:51 - INFO - codeparrot_training - Step 6913: {'lr': 0.0004871862721620227, 'samples': 3539968, 'steps': 6913, 'loss/train': 3.2593812942504883} +02/24/2022 10:30:55 - INFO - codeparrot_training - Step 6914: {'lr': 0.0004871811004234165, 'samples': 3540480, 'steps': 6914, 'loss/train': 3.0025129318237305} +02/24/2022 10:31:01 - INFO - codeparrot_training - Step 6915: {'lr': 0.0004871759276688018, 'samples': 3540992, 'steps': 6915, 'loss/train': 3.003502607345581} +02/24/2022 10:31:05 - INFO - codeparrot_training - Step 6916: {'lr': 0.00048717075389820074, 'samples': 3541504, 'steps': 6916, 'loss/train': 2.9868600368499756} +02/24/2022 10:31:10 - INFO - codeparrot_training - Step 6917: {'lr': 0.0004871655791116355, 'samples': 3542016, 'steps': 6917, 'loss/train': 2.282160520553589} +02/24/2022 10:31:14 - INFO - codeparrot_training - Step 6918: {'lr': 0.00048716040330912816, 'samples': 3542528, 'steps': 6918, 'loss/train': 2.1020379066467285} +02/24/2022 10:31:19 - INFO - codeparrot_training - Step 6919: {'lr': 0.000487155226490701, 'samples': 3543040, 'steps': 6919, 'loss/train': 2.961427688598633} +02/24/2022 10:31:23 - INFO - codeparrot_training - Step 6920: {'lr': 0.0004871500486563761, 'samples': 3543552, 'steps': 6920, 'loss/train': 1.9515552520751953} +02/24/2022 10:31:29 - INFO - codeparrot_training - Step 6921: {'lr': 0.00048714486980617577, 'samples': 3544064, 'steps': 6921, 'loss/train': 2.1839396953582764} +02/24/2022 10:31:32 - INFO - codeparrot_training - Step 6922: {'lr': 0.00048713968994012216, 'samples': 3544576, 'steps': 6922, 'loss/train': 3.6249005794525146} +02/24/2022 10:31:38 - INFO - codeparrot_training - Step 6923: {'lr': 0.00048713450905823736, 'samples': 3545088, 'steps': 6923, 'loss/train': 1.58771550655365} +02/24/2022 10:31:41 - INFO - codeparrot_training - Step 6924: {'lr': 0.0004871293271605436, 'samples': 3545600, 'steps': 6924, 'loss/train': 3.40474534034729} +02/24/2022 10:31:47 - INFO - codeparrot_training - Step 6925: {'lr': 0.00048712414424706315, 'samples': 3546112, 'steps': 6925, 'loss/train': 1.908578634262085} +02/24/2022 10:31:51 - INFO - codeparrot_training - Step 6926: {'lr': 0.0004871189603178181, 'samples': 3546624, 'steps': 6926, 'loss/train': 2.4061009883880615} +02/24/2022 10:31:56 - INFO - codeparrot_training - Step 6927: {'lr': 0.00048711377537283073, 'samples': 3547136, 'steps': 6927, 'loss/train': 1.7508264780044556} +02/24/2022 10:32:00 - INFO - codeparrot_training - Step 6928: {'lr': 0.0004871085894121233, 'samples': 3547648, 'steps': 6928, 'loss/train': 2.1758227348327637} +02/24/2022 10:32:05 - INFO - codeparrot_training - Step 6929: {'lr': 0.00048710340243571796, 'samples': 3548160, 'steps': 6929, 'loss/train': 2.505138635635376} +02/24/2022 10:32:09 - INFO - codeparrot_training - Step 6930: {'lr': 0.0004870982144436369, 'samples': 3548672, 'steps': 6930, 'loss/train': 1.9964663982391357} +02/24/2022 10:32:14 - INFO - codeparrot_training - Step 6931: {'lr': 0.0004870930254359023, 'samples': 3549184, 'steps': 6931, 'loss/train': 2.8455898761749268} +02/24/2022 10:32:18 - INFO - codeparrot_training - Step 6932: {'lr': 0.00048708783541253655, 'samples': 3549696, 'steps': 6932, 'loss/train': 1.9579960107803345} +02/24/2022 10:32:23 - INFO - codeparrot_training - Step 6933: {'lr': 0.0004870826443735618, 'samples': 3550208, 'steps': 6933, 'loss/train': 3.1020195484161377} +02/24/2022 10:32:27 - INFO - codeparrot_training - Step 6934: {'lr': 0.0004870774523190003, 'samples': 3550720, 'steps': 6934, 'loss/train': 2.6237692832946777} +02/24/2022 10:32:33 - INFO - codeparrot_training - Step 6935: {'lr': 0.00048707225924887423, 'samples': 3551232, 'steps': 6935, 'loss/train': 1.7438384294509888} +02/24/2022 10:32:36 - INFO - codeparrot_training - Step 6936: {'lr': 0.0004870670651632059, 'samples': 3551744, 'steps': 6936, 'loss/train': 1.662680745124817} +02/24/2022 10:32:42 - INFO - codeparrot_training - Step 6937: {'lr': 0.0004870618700620175, 'samples': 3552256, 'steps': 6937, 'loss/train': 2.110260009765625} +02/24/2022 10:32:45 - INFO - codeparrot_training - Step 6938: {'lr': 0.0004870566739453314, 'samples': 3552768, 'steps': 6938, 'loss/train': 2.6720337867736816} +02/24/2022 10:32:51 - INFO - codeparrot_training - Step 6939: {'lr': 0.00048705147681316974, 'samples': 3553280, 'steps': 6939, 'loss/train': 2.551278591156006} +02/24/2022 10:32:54 - INFO - codeparrot_training - Step 6940: {'lr': 0.00048704627866555486, 'samples': 3553792, 'steps': 6940, 'loss/train': 1.92770254611969} +02/24/2022 10:33:00 - INFO - codeparrot_training - Step 6941: {'lr': 0.00048704107950250887, 'samples': 3554304, 'steps': 6941, 'loss/train': 1.7141563892364502} +02/24/2022 10:33:03 - INFO - codeparrot_training - Step 6942: {'lr': 0.0004870358793240543, 'samples': 3554816, 'steps': 6942, 'loss/train': 2.0228426456451416} +02/24/2022 10:33:09 - INFO - codeparrot_training - Step 6943: {'lr': 0.00048703067813021323, 'samples': 3555328, 'steps': 6943, 'loss/train': 2.6666617393493652} +02/24/2022 10:33:12 - INFO - codeparrot_training - Step 6944: {'lr': 0.000487025475921008, 'samples': 3555840, 'steps': 6944, 'loss/train': 1.5000585317611694} +02/24/2022 10:33:19 - INFO - codeparrot_training - Step 6945: {'lr': 0.0004870202726964609, 'samples': 3556352, 'steps': 6945, 'loss/train': 2.0828866958618164} +02/24/2022 10:33:22 - INFO - codeparrot_training - Step 6946: {'lr': 0.0004870150684565943, 'samples': 3556864, 'steps': 6946, 'loss/train': 0.9956005811691284} +02/24/2022 10:33:27 - INFO - codeparrot_training - Step 6947: {'lr': 0.00048700986320143026, 'samples': 3557376, 'steps': 6947, 'loss/train': 1.6129735708236694} +02/24/2022 10:33:31 - INFO - codeparrot_training - Step 6948: {'lr': 0.0004870046569309913, 'samples': 3557888, 'steps': 6948, 'loss/train': 2.7539186477661133} +02/24/2022 10:33:36 - INFO - codeparrot_training - Step 6949: {'lr': 0.0004869994496452996, 'samples': 3558400, 'steps': 6949, 'loss/train': 3.8030338287353516} +02/24/2022 10:33:40 - INFO - codeparrot_training - Step 6950: {'lr': 0.0004869942413443776, 'samples': 3558912, 'steps': 6950, 'loss/train': 3.0598342418670654} +02/24/2022 10:33:45 - INFO - codeparrot_training - Step 6951: {'lr': 0.0004869890320282475, 'samples': 3559424, 'steps': 6951, 'loss/train': 1.4042478799819946} +02/24/2022 10:33:49 - INFO - codeparrot_training - Step 6952: {'lr': 0.0004869838216969316, 'samples': 3559936, 'steps': 6952, 'loss/train': 1.1975939273834229} +02/24/2022 10:33:54 - INFO - codeparrot_training - Step 6953: {'lr': 0.0004869786103504523, 'samples': 3560448, 'steps': 6953, 'loss/train': 3.341160535812378} +02/24/2022 10:33:58 - INFO - codeparrot_training - Step 6954: {'lr': 0.0004869733979888319, 'samples': 3560960, 'steps': 6954, 'loss/train': 2.8100550174713135} +02/24/2022 10:34:03 - INFO - codeparrot_training - Step 6955: {'lr': 0.00048696818461209265, 'samples': 3561472, 'steps': 6955, 'loss/train': 1.7726314067840576} +02/24/2022 10:34:09 - INFO - codeparrot_training - Step 6956: {'lr': 0.0004869629702202569, 'samples': 3561984, 'steps': 6956, 'loss/train': 2.453686475753784} +02/24/2022 10:34:12 - INFO - codeparrot_training - Step 6957: {'lr': 0.0004869577548133471, 'samples': 3562496, 'steps': 6957, 'loss/train': 1.8270617723464966} +02/24/2022 10:34:18 - INFO - codeparrot_training - Step 6958: {'lr': 0.00048695253839138553, 'samples': 3563008, 'steps': 6958, 'loss/train': 2.497138023376465} +02/24/2022 10:34:21 - INFO - codeparrot_training - Step 6959: {'lr': 0.0004869473209543945, 'samples': 3563520, 'steps': 6959, 'loss/train': 3.538966417312622} +02/24/2022 10:34:28 - INFO - codeparrot_training - Step 6960: {'lr': 0.00048694210250239646, 'samples': 3564032, 'steps': 6960, 'loss/train': 4.415807723999023} +02/24/2022 10:34:31 - INFO - codeparrot_training - Step 6961: {'lr': 0.0004869368830354136, 'samples': 3564544, 'steps': 6961, 'loss/train': 3.716559410095215} +02/24/2022 10:34:37 - INFO - codeparrot_training - Step 6962: {'lr': 0.00048693166255346843, 'samples': 3565056, 'steps': 6962, 'loss/train': 2.985011100769043} +02/24/2022 10:34:40 - INFO - codeparrot_training - Step 6963: {'lr': 0.0004869264410565832, 'samples': 3565568, 'steps': 6963, 'loss/train': 3.226262331008911} +02/24/2022 10:34:46 - INFO - codeparrot_training - Step 6964: {'lr': 0.00048692121854478033, 'samples': 3566080, 'steps': 6964, 'loss/train': 2.240480899810791} +02/24/2022 10:34:49 - INFO - codeparrot_training - Step 6965: {'lr': 0.00048691599501808223, 'samples': 3566592, 'steps': 6965, 'loss/train': 1.6684958934783936} +02/24/2022 10:34:55 - INFO - codeparrot_training - Step 6966: {'lr': 0.0004869107704765112, 'samples': 3567104, 'steps': 6966, 'loss/train': 8.854053497314453} +02/24/2022 10:34:58 - INFO - codeparrot_training - Step 6967: {'lr': 0.00048690554492008967, 'samples': 3567616, 'steps': 6967, 'loss/train': 2.2074220180511475} +02/24/2022 10:35:04 - INFO - codeparrot_training - Step 6968: {'lr': 0.00048690031834884004, 'samples': 3568128, 'steps': 6968, 'loss/train': 3.1572303771972656} +02/24/2022 10:35:07 - INFO - codeparrot_training - Step 6969: {'lr': 0.0004868950907627846, 'samples': 3568640, 'steps': 6969, 'loss/train': 3.5579230785369873} +02/24/2022 10:35:13 - INFO - codeparrot_training - Step 6970: {'lr': 0.00048688986216194585, 'samples': 3569152, 'steps': 6970, 'loss/train': 3.30551815032959} +02/24/2022 10:35:17 - INFO - codeparrot_training - Step 6971: {'lr': 0.0004868846325463462, 'samples': 3569664, 'steps': 6971, 'loss/train': 3.2069735527038574} +02/24/2022 10:35:22 - INFO - codeparrot_training - Step 6972: {'lr': 0.000486879401916008, 'samples': 3570176, 'steps': 6972, 'loss/train': 1.6296110153198242} +02/24/2022 10:35:26 - INFO - codeparrot_training - Step 6973: {'lr': 0.0004868741702709536, 'samples': 3570688, 'steps': 6973, 'loss/train': 2.4016482830047607} +02/24/2022 10:35:29 - INFO - codeparrot_training - Step 6974: {'lr': 0.0004868689376112055, 'samples': 3571200, 'steps': 6974, 'loss/train': 3.2272472381591797} +02/24/2022 10:35:35 - INFO - codeparrot_training - Step 6975: {'lr': 0.000486863703936786, 'samples': 3571712, 'steps': 6975, 'loss/train': 3.6157171726226807} +02/24/2022 10:35:38 - INFO - codeparrot_training - Step 6976: {'lr': 0.0004868584692477178, 'samples': 3572224, 'steps': 6976, 'loss/train': 1.9117414951324463} +02/24/2022 10:35:44 - INFO - codeparrot_training - Step 6977: {'lr': 0.000486853233544023, 'samples': 3572736, 'steps': 6977, 'loss/train': 2.51379656791687} +02/24/2022 10:35:47 - INFO - codeparrot_training - Step 6978: {'lr': 0.0004868479968257241, 'samples': 3573248, 'steps': 6978, 'loss/train': 3.8637757301330566} +02/24/2022 10:35:53 - INFO - codeparrot_training - Step 6979: {'lr': 0.0004868427590928437, 'samples': 3573760, 'steps': 6979, 'loss/train': 2.8012845516204834} +02/24/2022 10:35:59 - INFO - codeparrot_training - Step 6980: {'lr': 0.0004868375203454041, 'samples': 3574272, 'steps': 6980, 'loss/train': 2.2839508056640625} +02/24/2022 10:36:02 - INFO - codeparrot_training - Step 6981: {'lr': 0.0004868322805834278, 'samples': 3574784, 'steps': 6981, 'loss/train': 2.9954354763031006} +02/24/2022 10:36:08 - INFO - codeparrot_training - Step 6982: {'lr': 0.0004868270398069371, 'samples': 3575296, 'steps': 6982, 'loss/train': 2.1910762786865234} +02/24/2022 10:36:11 - INFO - codeparrot_training - Step 6983: {'lr': 0.0004868217980159546, 'samples': 3575808, 'steps': 6983, 'loss/train': 2.2371723651885986} +02/24/2022 10:36:17 - INFO - codeparrot_training - Step 6984: {'lr': 0.0004868165552105028, 'samples': 3576320, 'steps': 6984, 'loss/train': 1.5835167169570923} +02/24/2022 10:36:20 - INFO - codeparrot_training - Step 6985: {'lr': 0.000486811311390604, 'samples': 3576832, 'steps': 6985, 'loss/train': 0.7449057698249817} +02/24/2022 10:36:26 - INFO - codeparrot_training - Step 6986: {'lr': 0.0004868060665562808, 'samples': 3577344, 'steps': 6986, 'loss/train': 0.6410086750984192} +02/24/2022 10:36:30 - INFO - codeparrot_training - Step 6987: {'lr': 0.0004868008207075555, 'samples': 3577856, 'steps': 6987, 'loss/train': 1.8432601690292358} +02/24/2022 10:36:35 - INFO - codeparrot_training - Step 6988: {'lr': 0.0004867955738444508, 'samples': 3578368, 'steps': 6988, 'loss/train': 2.0191097259521484} +02/24/2022 10:36:38 - INFO - codeparrot_training - Step 6989: {'lr': 0.000486790325966989, 'samples': 3578880, 'steps': 6989, 'loss/train': 1.547868013381958} +02/24/2022 10:36:44 - INFO - codeparrot_training - Step 6990: {'lr': 0.0004867850770751926, 'samples': 3579392, 'steps': 6990, 'loss/train': 3.213212251663208} +02/24/2022 10:36:47 - INFO - codeparrot_training - Step 6991: {'lr': 0.00048677982716908416, 'samples': 3579904, 'steps': 6991, 'loss/train': 2.3578999042510986} +02/24/2022 10:36:54 - INFO - codeparrot_training - Step 6992: {'lr': 0.0004867745762486861, 'samples': 3580416, 'steps': 6992, 'loss/train': 3.327143430709839} +02/24/2022 10:36:57 - INFO - codeparrot_training - Step 6993: {'lr': 0.0004867693243140209, 'samples': 3580928, 'steps': 6993, 'loss/train': 2.5125010013580322} +02/24/2022 10:37:01 - INFO - codeparrot_training - Step 6994: {'lr': 0.0004867640713651112, 'samples': 3581440, 'steps': 6994, 'loss/train': 3.126490592956543} +02/24/2022 10:37:06 - INFO - codeparrot_training - Step 6995: {'lr': 0.0004867588174019794, 'samples': 3581952, 'steps': 6995, 'loss/train': 2.614426374435425} +02/24/2022 10:37:10 - INFO - codeparrot_training - Step 6996: {'lr': 0.00048675356242464785, 'samples': 3582464, 'steps': 6996, 'loss/train': 2.09812593460083} +02/24/2022 10:37:15 - INFO - codeparrot_training - Step 6997: {'lr': 0.0004867483064331394, 'samples': 3582976, 'steps': 6997, 'loss/train': 2.2243432998657227} +02/24/2022 10:37:19 - INFO - codeparrot_training - Step 6998: {'lr': 0.00048674304942747626, 'samples': 3583488, 'steps': 6998, 'loss/train': 2.046218156814575} +02/24/2022 10:37:24 - INFO - codeparrot_training - Step 6999: {'lr': 0.0004867377914076811, 'samples': 3584000, 'steps': 6999, 'loss/train': 2.682992696762085} +02/24/2022 10:37:24 - INFO - codeparrot_training - Evaluating and saving model checkpoint