diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -22433,3 +22433,1009 @@ Use FP16 precision: False 02/25/2022 06:04:08 - INFO - codeparrot_training - Step 21998: {'lr': 0.00031473637056993837, 'samples': 11263488, 'steps': 21998, 'loss/train': 2.339315176010132} 02/25/2022 06:04:12 - INFO - codeparrot_training - Step 21999: {'lr': 0.0003147205660614055, 'samples': 11264000, 'steps': 21999, 'loss/train': 2.7713630199432373} 02/25/2022 06:04:12 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/25/2022 06:04:30 - WARNING - huggingface_hub.repository - Several commits (22) will be pushed upstream. +02/25/2022 06:04:30 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/25/2022 06:05:08 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + 1098822..de93bae floral-grass-11 -> floral-grass-11 + +02/25/2022 06:05:16 - INFO - codeparrot_training - Step 22000: {'lr': 0.00031470476127563017, 'samples': 11264512, 'steps': 22000, 'loss/train': 2.5917716026306152} +02/25/2022 06:05:19 - INFO - codeparrot_training - Step 22001: {'lr': 0.00031468895621268036, 'samples': 11265024, 'steps': 22001, 'loss/train': 2.364344358444214} +02/25/2022 06:05:25 - INFO - codeparrot_training - Step 22002: {'lr': 0.00031467315087262344, 'samples': 11265536, 'steps': 22002, 'loss/train': 2.4098546504974365} +02/25/2022 06:05:28 - INFO - codeparrot_training - Step 22003: {'lr': 0.0003146573452555274, 'samples': 11266048, 'steps': 22003, 'loss/train': 1.268041968345642} +02/25/2022 06:05:34 - INFO - codeparrot_training - Step 22004: {'lr': 0.0003146415393614597, 'samples': 11266560, 'steps': 22004, 'loss/train': 1.6502779722213745} +02/25/2022 06:05:37 - INFO - codeparrot_training - Step 22005: {'lr': 0.00031462573319048827, 'samples': 11267072, 'steps': 22005, 'loss/train': 2.3571839332580566} +02/25/2022 06:05:43 - INFO - codeparrot_training - Step 22006: {'lr': 0.0003146099267426806, 'samples': 11267584, 'steps': 22006, 'loss/train': 1.2613369226455688} +02/25/2022 06:05:46 - INFO - codeparrot_training - Step 22007: {'lr': 0.00031459412001810474, 'samples': 11268096, 'steps': 22007, 'loss/train': 1.7785935401916504} +02/25/2022 06:05:52 - INFO - codeparrot_training - Step 22008: {'lr': 0.000314578313016828, 'samples': 11268608, 'steps': 22008, 'loss/train': 2.1543264389038086} +02/25/2022 06:05:55 - INFO - codeparrot_training - Step 22009: {'lr': 0.00031456250573891825, 'samples': 11269120, 'steps': 22009, 'loss/train': 1.5843356847763062} +02/25/2022 06:06:02 - INFO - codeparrot_training - Step 22010: {'lr': 0.0003145466981844434, 'samples': 11269632, 'steps': 22010, 'loss/train': 2.041003465652466} +02/25/2022 06:06:05 - INFO - codeparrot_training - Step 22011: {'lr': 0.00031453089035347084, 'samples': 11270144, 'steps': 22011, 'loss/train': 1.2788344621658325} +02/25/2022 06:06:11 - INFO - codeparrot_training - Step 22012: {'lr': 0.0003145150822460685, 'samples': 11270656, 'steps': 22012, 'loss/train': 1.6295868158340454} +02/25/2022 06:06:15 - INFO - codeparrot_training - Step 22013: {'lr': 0.00031449927386230397, 'samples': 11271168, 'steps': 22013, 'loss/train': 2.3790078163146973} +02/25/2022 06:06:18 - INFO - codeparrot_training - Step 22014: {'lr': 0.0003144834652022451, 'samples': 11271680, 'steps': 22014, 'loss/train': 2.0947465896606445} +02/25/2022 06:06:24 - INFO - codeparrot_training - Step 22015: {'lr': 0.0003144676562659595, 'samples': 11272192, 'steps': 22015, 'loss/train': 0.5828628540039062} +02/25/2022 06:06:27 - INFO - codeparrot_training - Step 22016: {'lr': 0.000314451847053515, 'samples': 11272704, 'steps': 22016, 'loss/train': 0.27841484546661377} +02/25/2022 06:06:33 - INFO - codeparrot_training - Step 22017: {'lr': 0.0003144360375649792, 'samples': 11273216, 'steps': 22017, 'loss/train': 2.2132320404052734} +02/25/2022 06:06:36 - INFO - codeparrot_training - Step 22018: {'lr': 0.0003144202278004199, 'samples': 11273728, 'steps': 22018, 'loss/train': 1.1934279203414917} +02/25/2022 06:06:42 - INFO - codeparrot_training - Step 22019: {'lr': 0.0003144044177599047, 'samples': 11274240, 'steps': 22019, 'loss/train': 2.107335090637207} +02/25/2022 06:06:45 - INFO - codeparrot_training - Step 22020: {'lr': 0.00031438860744350156, 'samples': 11274752, 'steps': 22020, 'loss/train': 1.3147578239440918} +02/25/2022 06:06:51 - INFO - codeparrot_training - Step 22021: {'lr': 0.000314372796851278, 'samples': 11275264, 'steps': 22021, 'loss/train': 2.705869436264038} +02/25/2022 06:06:55 - INFO - codeparrot_training - Step 22022: {'lr': 0.00031435698598330185, 'samples': 11275776, 'steps': 22022, 'loss/train': 2.0425286293029785} +02/25/2022 06:07:00 - INFO - codeparrot_training - Step 22023: {'lr': 0.0003143411748396408, 'samples': 11276288, 'steps': 22023, 'loss/train': 2.396756649017334} +02/25/2022 06:07:04 - INFO - codeparrot_training - Step 22024: {'lr': 0.00031432536342036255, 'samples': 11276800, 'steps': 22024, 'loss/train': 1.321191430091858} +02/25/2022 06:07:09 - INFO - codeparrot_training - Step 22025: {'lr': 0.00031430955172553497, 'samples': 11277312, 'steps': 22025, 'loss/train': 2.703444480895996} +02/25/2022 06:07:13 - INFO - codeparrot_training - Step 22026: {'lr': 0.00031429373975522555, 'samples': 11277824, 'steps': 22026, 'loss/train': 2.1531193256378174} +02/25/2022 06:07:18 - INFO - codeparrot_training - Step 22027: {'lr': 0.00031427792750950227, 'samples': 11278336, 'steps': 22027, 'loss/train': 1.317335605621338} +02/25/2022 06:07:22 - INFO - codeparrot_training - Step 22028: {'lr': 0.0003142621149884327, 'samples': 11278848, 'steps': 22028, 'loss/train': 0.8760442137718201} +02/25/2022 06:07:27 - INFO - codeparrot_training - Step 22029: {'lr': 0.00031424630219208474, 'samples': 11279360, 'steps': 22029, 'loss/train': 1.9641828536987305} +02/25/2022 06:07:31 - INFO - codeparrot_training - Step 22030: {'lr': 0.00031423048912052585, 'samples': 11279872, 'steps': 22030, 'loss/train': 2.4220521450042725} +02/25/2022 06:07:37 - INFO - codeparrot_training - Step 22031: {'lr': 0.0003142146757738241, 'samples': 11280384, 'steps': 22031, 'loss/train': 1.4144482612609863} +02/25/2022 06:07:40 - INFO - codeparrot_training - Step 22032: {'lr': 0.000314198862152047, 'samples': 11280896, 'steps': 22032, 'loss/train': 1.148971676826477} +02/25/2022 06:07:46 - INFO - codeparrot_training - Step 22033: {'lr': 0.00031418304825526236, 'samples': 11281408, 'steps': 22033, 'loss/train': 2.127129077911377} +02/25/2022 06:07:50 - INFO - codeparrot_training - Step 22034: {'lr': 0.0003141672340835379, 'samples': 11281920, 'steps': 22034, 'loss/train': 1.512710452079773} +02/25/2022 06:07:53 - INFO - codeparrot_training - Step 22035: {'lr': 0.0003141514196369414, 'samples': 11282432, 'steps': 22035, 'loss/train': 2.4291930198669434} +02/25/2022 06:07:59 - INFO - codeparrot_training - Step 22036: {'lr': 0.0003141356049155406, 'samples': 11282944, 'steps': 22036, 'loss/train': 1.2321540117263794} +02/25/2022 06:08:02 - INFO - codeparrot_training - Step 22037: {'lr': 0.00031411978991940324, 'samples': 11283456, 'steps': 22037, 'loss/train': 1.7294766902923584} +02/25/2022 06:08:08 - INFO - codeparrot_training - Step 22038: {'lr': 0.000314103974648597, 'samples': 11283968, 'steps': 22038, 'loss/train': 1.537401556968689} +02/25/2022 06:08:12 - INFO - codeparrot_training - Step 22039: {'lr': 0.0003140881591031898, 'samples': 11284480, 'steps': 22039, 'loss/train': 1.3926559686660767} +02/25/2022 06:08:18 - INFO - codeparrot_training - Step 22040: {'lr': 0.0003140723432832492, 'samples': 11284992, 'steps': 22040, 'loss/train': 1.8262780904769897} +02/25/2022 06:08:21 - INFO - codeparrot_training - Step 22041: {'lr': 0.00031405652718884304, 'samples': 11285504, 'steps': 22041, 'loss/train': 3.38923716545105} +02/25/2022 06:08:24 - INFO - codeparrot_training - Step 22042: {'lr': 0.00031404071082003903, 'samples': 11286016, 'steps': 22042, 'loss/train': 1.4119133949279785} +02/25/2022 06:08:30 - INFO - codeparrot_training - Step 22043: {'lr': 0.000314024894176905, 'samples': 11286528, 'steps': 22043, 'loss/train': 2.4981746673583984} +02/25/2022 06:08:34 - INFO - codeparrot_training - Step 22044: {'lr': 0.00031400907725950865, 'samples': 11287040, 'steps': 22044, 'loss/train': 1.7532103061676025} +02/25/2022 06:08:40 - INFO - codeparrot_training - Step 22045: {'lr': 0.00031399326006791765, 'samples': 11287552, 'steps': 22045, 'loss/train': 2.3768534660339355} +02/25/2022 06:08:43 - INFO - codeparrot_training - Step 22046: {'lr': 0.00031397744260219996, 'samples': 11288064, 'steps': 22046, 'loss/train': 2.332976818084717} +02/25/2022 06:08:49 - INFO - codeparrot_training - Step 22047: {'lr': 0.00031396162486242317, 'samples': 11288576, 'steps': 22047, 'loss/train': 2.721773862838745} +02/25/2022 06:08:52 - INFO - codeparrot_training - Step 22048: {'lr': 0.0003139458068486551, 'samples': 11289088, 'steps': 22048, 'loss/train': 2.5646514892578125} +02/25/2022 06:08:58 - INFO - codeparrot_training - Step 22049: {'lr': 0.0003139299885609635, 'samples': 11289600, 'steps': 22049, 'loss/train': 2.6611459255218506} +02/25/2022 06:09:02 - INFO - codeparrot_training - Step 22050: {'lr': 0.00031391416999941606, 'samples': 11290112, 'steps': 22050, 'loss/train': 1.656847357749939} +02/25/2022 06:09:07 - INFO - codeparrot_training - Step 22051: {'lr': 0.00031389835116408073, 'samples': 11290624, 'steps': 22051, 'loss/train': 0.536063015460968} +02/25/2022 06:09:11 - INFO - codeparrot_training - Step 22052: {'lr': 0.000313882532055025, 'samples': 11291136, 'steps': 22052, 'loss/train': 2.067643404006958} +02/25/2022 06:09:16 - INFO - codeparrot_training - Step 22053: {'lr': 0.000313866712672317, 'samples': 11291648, 'steps': 22053, 'loss/train': 0.26975131034851074} +02/25/2022 06:09:20 - INFO - codeparrot_training - Step 22054: {'lr': 0.0003138508930160241, 'samples': 11292160, 'steps': 22054, 'loss/train': 2.1808390617370605} +02/25/2022 06:09:25 - INFO - codeparrot_training - Step 22055: {'lr': 0.0003138350730862144, 'samples': 11292672, 'steps': 22055, 'loss/train': 1.4270166158676147} +02/25/2022 06:09:29 - INFO - codeparrot_training - Step 22056: {'lr': 0.00031381925288295536, 'samples': 11293184, 'steps': 22056, 'loss/train': 2.1215500831604004} +02/25/2022 06:09:34 - INFO - codeparrot_training - Step 22057: {'lr': 0.000313803432406315, 'samples': 11293696, 'steps': 22057, 'loss/train': 1.5915385484695435} +02/25/2022 06:09:37 - INFO - codeparrot_training - Step 22058: {'lr': 0.000313787611656361, 'samples': 11294208, 'steps': 22058, 'loss/train': 2.786083698272705} +02/25/2022 06:09:43 - INFO - codeparrot_training - Step 22059: {'lr': 0.00031377179063316106, 'samples': 11294720, 'steps': 22059, 'loss/train': 1.9742531776428223} +02/25/2022 06:09:46 - INFO - codeparrot_training - Step 22060: {'lr': 0.0003137559693367831, 'samples': 11295232, 'steps': 22060, 'loss/train': 1.7338865995407104} +02/25/2022 06:09:53 - INFO - codeparrot_training - Step 22061: {'lr': 0.0003137401477672947, 'samples': 11295744, 'steps': 22061, 'loss/train': 2.4261152744293213} +02/25/2022 06:09:56 - INFO - codeparrot_training - Step 22062: {'lr': 0.00031372432592476383, 'samples': 11296256, 'steps': 22062, 'loss/train': 1.083591103553772} +02/25/2022 06:10:02 - INFO - codeparrot_training - Step 22063: {'lr': 0.0003137085038092582, 'samples': 11296768, 'steps': 22063, 'loss/train': 2.613905668258667} +02/25/2022 06:10:07 - INFO - codeparrot_training - Step 22064: {'lr': 0.00031369268142084555, 'samples': 11297280, 'steps': 22064, 'loss/train': 1.549139380455017} +02/25/2022 06:10:11 - INFO - codeparrot_training - Step 22065: {'lr': 0.00031367685875959375, 'samples': 11297792, 'steps': 22065, 'loss/train': 2.2094218730926514} +02/25/2022 06:10:14 - INFO - codeparrot_training - Step 22066: {'lr': 0.0003136610358255704, 'samples': 11298304, 'steps': 22066, 'loss/train': 2.8319671154022217} +02/25/2022 06:10:20 - INFO - codeparrot_training - Step 22067: {'lr': 0.00031364521261884336, 'samples': 11298816, 'steps': 22067, 'loss/train': 1.9258968830108643} +02/25/2022 06:10:23 - INFO - codeparrot_training - Step 22068: {'lr': 0.00031362938913948046, 'samples': 11299328, 'steps': 22068, 'loss/train': 1.7411415576934814} +02/25/2022 06:10:30 - INFO - codeparrot_training - Step 22069: {'lr': 0.0003136135653875495, 'samples': 11299840, 'steps': 22069, 'loss/train': 1.7595075368881226} +02/25/2022 06:10:33 - INFO - codeparrot_training - Step 22070: {'lr': 0.00031359774136311823, 'samples': 11300352, 'steps': 22070, 'loss/train': 1.4619450569152832} +02/25/2022 06:10:39 - INFO - codeparrot_training - Step 22071: {'lr': 0.0003135819170662545, 'samples': 11300864, 'steps': 22071, 'loss/train': 1.2482984066009521} +02/25/2022 06:10:42 - INFO - codeparrot_training - Step 22072: {'lr': 0.00031356609249702587, 'samples': 11301376, 'steps': 22072, 'loss/train': 1.7502570152282715} +02/25/2022 06:10:48 - INFO - codeparrot_training - Step 22073: {'lr': 0.0003135502676555004, 'samples': 11301888, 'steps': 22073, 'loss/train': 1.2703635692596436} +02/25/2022 06:10:51 - INFO - codeparrot_training - Step 22074: {'lr': 0.0003135344425417457, 'samples': 11302400, 'steps': 22074, 'loss/train': 1.7558841705322266} +02/25/2022 06:10:57 - INFO - codeparrot_training - Step 22075: {'lr': 0.0003135186171558297, 'samples': 11302912, 'steps': 22075, 'loss/train': 1.6188462972640991} +02/25/2022 06:11:00 - INFO - codeparrot_training - Step 22076: {'lr': 0.00031350279149782004, 'samples': 11303424, 'steps': 22076, 'loss/train': 1.442162275314331} +02/25/2022 06:11:06 - INFO - codeparrot_training - Step 22077: {'lr': 0.0003134869655677846, 'samples': 11303936, 'steps': 22077, 'loss/train': 2.6089706420898438} +02/25/2022 06:11:09 - INFO - codeparrot_training - Step 22078: {'lr': 0.00031347113936579116, 'samples': 11304448, 'steps': 22078, 'loss/train': 2.9049012660980225} +02/25/2022 06:11:16 - INFO - codeparrot_training - Step 22079: {'lr': 0.00031345531289190756, 'samples': 11304960, 'steps': 22079, 'loss/train': 0.896712601184845} +02/25/2022 06:11:19 - INFO - codeparrot_training - Step 22080: {'lr': 0.0003134394861462014, 'samples': 11305472, 'steps': 22080, 'loss/train': 0.7773104906082153} +02/25/2022 06:11:25 - INFO - codeparrot_training - Step 22081: {'lr': 0.0003134236591287407, 'samples': 11305984, 'steps': 22081, 'loss/train': 1.9527983665466309} +02/25/2022 06:11:28 - INFO - codeparrot_training - Step 22082: {'lr': 0.0003134078318395933, 'samples': 11306496, 'steps': 22082, 'loss/train': 2.2992241382598877} +02/25/2022 06:11:34 - INFO - codeparrot_training - Step 22083: {'lr': 0.00031339200427882676, 'samples': 11307008, 'steps': 22083, 'loss/train': 8.847299575805664} +02/25/2022 06:11:37 - INFO - codeparrot_training - Step 22084: {'lr': 0.00031337617644650907, 'samples': 11307520, 'steps': 22084, 'loss/train': 2.2997782230377197} +02/25/2022 06:11:43 - INFO - codeparrot_training - Step 22085: {'lr': 0.00031336034834270786, 'samples': 11308032, 'steps': 22085, 'loss/train': 1.500760793685913} +02/25/2022 06:11:47 - INFO - codeparrot_training - Step 22086: {'lr': 0.00031334451996749117, 'samples': 11308544, 'steps': 22086, 'loss/train': 2.2298805713653564} +02/25/2022 06:11:52 - INFO - codeparrot_training - Step 22087: {'lr': 0.00031332869132092654, 'samples': 11309056, 'steps': 22087, 'loss/train': 2.9004907608032227} +02/25/2022 06:11:56 - INFO - codeparrot_training - Step 22088: {'lr': 0.00031331286240308205, 'samples': 11309568, 'steps': 22088, 'loss/train': 2.2678866386413574} +02/25/2022 06:12:01 - INFO - codeparrot_training - Step 22089: {'lr': 0.00031329703321402526, 'samples': 11310080, 'steps': 22089, 'loss/train': 2.3201212882995605} +02/25/2022 06:12:05 - INFO - codeparrot_training - Step 22090: {'lr': 0.00031328120375382414, 'samples': 11310592, 'steps': 22090, 'loss/train': 2.5450994968414307} +02/25/2022 06:12:11 - INFO - codeparrot_training - Step 22091: {'lr': 0.0003132653740225464, 'samples': 11311104, 'steps': 22091, 'loss/train': 2.115913152694702} +02/25/2022 06:12:15 - INFO - codeparrot_training - Step 22092: {'lr': 0.0003132495440202599, 'samples': 11311616, 'steps': 22092, 'loss/train': 1.537142276763916} +02/25/2022 06:12:20 - INFO - codeparrot_training - Step 22093: {'lr': 0.0003132337137470324, 'samples': 11312128, 'steps': 22093, 'loss/train': 2.2702367305755615} +02/25/2022 06:12:24 - INFO - codeparrot_training - Step 22094: {'lr': 0.00031321788320293176, 'samples': 11312640, 'steps': 22094, 'loss/train': 2.2105822563171387} +02/25/2022 06:12:29 - INFO - codeparrot_training - Step 22095: {'lr': 0.00031320205238802583, 'samples': 11313152, 'steps': 22095, 'loss/train': 0.5547376275062561} +02/25/2022 06:12:33 - INFO - codeparrot_training - Step 22096: {'lr': 0.0003131862213023823, 'samples': 11313664, 'steps': 22096, 'loss/train': 3.415804862976074} +02/25/2022 06:12:38 - INFO - codeparrot_training - Step 22097: {'lr': 0.0003131703899460692, 'samples': 11314176, 'steps': 22097, 'loss/train': 2.715240001678467} +02/25/2022 06:12:42 - INFO - codeparrot_training - Step 22098: {'lr': 0.0003131545583191541, 'samples': 11314688, 'steps': 22098, 'loss/train': 0.7137669324874878} +02/25/2022 06:12:47 - INFO - codeparrot_training - Step 22099: {'lr': 0.00031313872642170493, 'samples': 11315200, 'steps': 22099, 'loss/train': 1.9726505279541016} +02/25/2022 06:12:51 - INFO - codeparrot_training - Step 22100: {'lr': 0.0003131228942537895, 'samples': 11315712, 'steps': 22100, 'loss/train': 2.0028703212738037} +02/25/2022 06:12:56 - INFO - codeparrot_training - Step 22101: {'lr': 0.00031310706181547567, 'samples': 11316224, 'steps': 22101, 'loss/train': 0.7319560647010803} +02/25/2022 06:13:00 - INFO - codeparrot_training - Step 22102: {'lr': 0.0003130912291068312, 'samples': 11316736, 'steps': 22102, 'loss/train': 1.1150904893875122} +02/25/2022 06:13:05 - INFO - codeparrot_training - Step 22103: {'lr': 0.000313075396127924, 'samples': 11317248, 'steps': 22103, 'loss/train': 1.8547435998916626} +02/25/2022 06:13:09 - INFO - codeparrot_training - Step 22104: {'lr': 0.0003130595628788217, 'samples': 11317760, 'steps': 22104, 'loss/train': 2.63070011138916} +02/25/2022 06:13:14 - INFO - codeparrot_training - Step 22105: {'lr': 0.00031304372935959235, 'samples': 11318272, 'steps': 22105, 'loss/train': 2.5418074131011963} +02/25/2022 06:13:18 - INFO - codeparrot_training - Step 22106: {'lr': 0.00031302789557030364, 'samples': 11318784, 'steps': 22106, 'loss/train': 1.7129013538360596} +02/25/2022 06:13:24 - INFO - codeparrot_training - Step 22107: {'lr': 0.00031301206151102353, 'samples': 11319296, 'steps': 22107, 'loss/train': 1.9842053651809692} +02/25/2022 06:13:27 - INFO - codeparrot_training - Step 22108: {'lr': 0.00031299622718181964, 'samples': 11319808, 'steps': 22108, 'loss/train': 1.3814620971679688} +02/25/2022 06:13:33 - INFO - codeparrot_training - Step 22109: {'lr': 0.00031298039258276, 'samples': 11320320, 'steps': 22109, 'loss/train': 1.87350332736969} +02/25/2022 06:13:36 - INFO - codeparrot_training - Step 22110: {'lr': 0.0003129645577139123, 'samples': 11320832, 'steps': 22110, 'loss/train': 0.9727237224578857} +02/25/2022 06:13:42 - INFO - codeparrot_training - Step 22111: {'lr': 0.00031294872257534446, 'samples': 11321344, 'steps': 22111, 'loss/train': 1.3958938121795654} +02/25/2022 06:13:48 - INFO - codeparrot_training - Step 22112: {'lr': 0.0003129328871671243, 'samples': 11321856, 'steps': 22112, 'loss/train': 1.9277312755584717} +02/25/2022 06:13:51 - INFO - codeparrot_training - Step 22113: {'lr': 0.0003129170514893196, 'samples': 11322368, 'steps': 22113, 'loss/train': 2.7265465259552} +02/25/2022 06:13:55 - INFO - codeparrot_training - Step 22114: {'lr': 0.0003129012155419983, 'samples': 11322880, 'steps': 22114, 'loss/train': 1.2648028135299683} +02/25/2022 06:14:02 - INFO - codeparrot_training - Step 22115: {'lr': 0.00031288537932522807, 'samples': 11323392, 'steps': 22115, 'loss/train': 1.6724538803100586} +02/25/2022 06:14:05 - INFO - codeparrot_training - Step 22116: {'lr': 0.0003128695428390769, 'samples': 11323904, 'steps': 22116, 'loss/train': 1.838234543800354} +02/25/2022 06:14:11 - INFO - codeparrot_training - Step 22117: {'lr': 0.0003128537060836125, 'samples': 11324416, 'steps': 22117, 'loss/train': 1.3269188404083252} +02/25/2022 06:14:14 - INFO - codeparrot_training - Step 22118: {'lr': 0.0003128378690589028, 'samples': 11324928, 'steps': 22118, 'loss/train': 0.9925171732902527} +02/25/2022 06:14:20 - INFO - codeparrot_training - Step 22119: {'lr': 0.00031282203176501573, 'samples': 11325440, 'steps': 22119, 'loss/train': 0.9470146298408508} +02/25/2022 06:14:23 - INFO - codeparrot_training - Step 22120: {'lr': 0.0003128061942020189, 'samples': 11325952, 'steps': 22120, 'loss/train': 2.619854211807251} +02/25/2022 06:14:29 - INFO - codeparrot_training - Step 22121: {'lr': 0.00031279035636998037, 'samples': 11326464, 'steps': 22121, 'loss/train': 1.4416135549545288} +02/25/2022 06:14:32 - INFO - codeparrot_training - Step 22122: {'lr': 0.0003127745182689678, 'samples': 11326976, 'steps': 22122, 'loss/train': 1.8916935920715332} +02/25/2022 06:14:38 - INFO - codeparrot_training - Step 22123: {'lr': 0.00031275867989904923, 'samples': 11327488, 'steps': 22123, 'loss/train': 2.0850741863250732} +02/25/2022 06:14:41 - INFO - codeparrot_training - Step 22124: {'lr': 0.0003127428412602923, 'samples': 11328000, 'steps': 22124, 'loss/train': 1.721575140953064} +02/25/2022 06:14:47 - INFO - codeparrot_training - Step 22125: {'lr': 0.00031272700235276507, 'samples': 11328512, 'steps': 22125, 'loss/train': 2.039088726043701} +02/25/2022 06:14:50 - INFO - codeparrot_training - Step 22126: {'lr': 0.00031271116317653513, 'samples': 11329024, 'steps': 22126, 'loss/train': 2.3093457221984863} +02/25/2022 06:14:57 - INFO - codeparrot_training - Step 22127: {'lr': 0.00031269532373167063, 'samples': 11329536, 'steps': 22127, 'loss/train': 1.8865641355514526} +02/25/2022 06:15:01 - INFO - codeparrot_training - Step 22128: {'lr': 0.0003126794840182392, 'samples': 11330048, 'steps': 22128, 'loss/train': 1.6816167831420898} +02/25/2022 06:15:06 - INFO - codeparrot_training - Step 22129: {'lr': 0.00031266364403630874, 'samples': 11330560, 'steps': 22129, 'loss/train': 1.5853948593139648} +02/25/2022 06:15:10 - INFO - codeparrot_training - Step 22130: {'lr': 0.0003126478037859472, 'samples': 11331072, 'steps': 22130, 'loss/train': 1.2221128940582275} +02/25/2022 06:15:15 - INFO - codeparrot_training - Step 22131: {'lr': 0.0003126319632672223, 'samples': 11331584, 'steps': 22131, 'loss/train': 1.6561076641082764} +02/25/2022 06:15:19 - INFO - codeparrot_training - Step 22132: {'lr': 0.00031261612248020197, 'samples': 11332096, 'steps': 22132, 'loss/train': 2.2861557006835938} +02/25/2022 06:15:24 - INFO - codeparrot_training - Step 22133: {'lr': 0.00031260028142495404, 'samples': 11332608, 'steps': 22133, 'loss/train': 2.2173848152160645} +02/25/2022 06:15:28 - INFO - codeparrot_training - Step 22134: {'lr': 0.0003125844401015465, 'samples': 11333120, 'steps': 22134, 'loss/train': 0.941705584526062} +02/25/2022 06:15:33 - INFO - codeparrot_training - Step 22135: {'lr': 0.0003125685985100469, 'samples': 11333632, 'steps': 22135, 'loss/train': 1.8156362771987915} +02/25/2022 06:15:37 - INFO - codeparrot_training - Step 22136: {'lr': 0.0003125527566505234, 'samples': 11334144, 'steps': 22136, 'loss/train': 1.6366169452667236} +02/25/2022 06:15:43 - INFO - codeparrot_training - Step 22137: {'lr': 0.0003125369145230438, 'samples': 11334656, 'steps': 22137, 'loss/train': 2.258678674697876} +02/25/2022 06:15:46 - INFO - codeparrot_training - Step 22138: {'lr': 0.0003125210721276758, 'samples': 11335168, 'steps': 22138, 'loss/train': 1.7923755645751953} +02/25/2022 06:15:52 - INFO - codeparrot_training - Step 22139: {'lr': 0.0003125052294644874, 'samples': 11335680, 'steps': 22139, 'loss/train': 0.8070864677429199} +02/25/2022 06:15:55 - INFO - codeparrot_training - Step 22140: {'lr': 0.0003124893865335466, 'samples': 11336192, 'steps': 22140, 'loss/train': 2.3999979496002197} +02/25/2022 06:16:01 - INFO - codeparrot_training - Step 22141: {'lr': 0.00031247354333492096, 'samples': 11336704, 'steps': 22141, 'loss/train': 0.48616787791252136} +02/25/2022 06:16:04 - INFO - codeparrot_training - Step 22142: {'lr': 0.00031245769986867845, 'samples': 11337216, 'steps': 22142, 'loss/train': 0.303365021944046} +02/25/2022 06:16:10 - INFO - codeparrot_training - Step 22143: {'lr': 0.0003124418561348871, 'samples': 11337728, 'steps': 22143, 'loss/train': 2.018758535385132} +02/25/2022 06:16:13 - INFO - codeparrot_training - Step 22144: {'lr': 0.0003124260121336146, 'samples': 11338240, 'steps': 22144, 'loss/train': 2.5217652320861816} +02/25/2022 06:16:19 - INFO - codeparrot_training - Step 22145: {'lr': 0.000312410167864929, 'samples': 11338752, 'steps': 22145, 'loss/train': 0.6321930885314941} +02/25/2022 06:16:22 - INFO - codeparrot_training - Step 22146: {'lr': 0.00031239432332889796, 'samples': 11339264, 'steps': 22146, 'loss/train': 1.5276532173156738} +02/25/2022 06:16:28 - INFO - codeparrot_training - Step 22147: {'lr': 0.00031237847852558947, 'samples': 11339776, 'steps': 22147, 'loss/train': 3.1110036373138428} +02/25/2022 06:16:31 - INFO - codeparrot_training - Step 22148: {'lr': 0.00031236263345507133, 'samples': 11340288, 'steps': 22148, 'loss/train': 1.7653257846832275} +02/25/2022 06:16:36 - INFO - codeparrot_training - Step 22149: {'lr': 0.0003123467881174116, 'samples': 11340800, 'steps': 22149, 'loss/train': 1.5785020589828491} +02/25/2022 06:16:42 - INFO - codeparrot_training - Step 22150: {'lr': 0.0003123309425126779, 'samples': 11341312, 'steps': 22150, 'loss/train': 1.8085739612579346} +02/25/2022 06:16:45 - INFO - codeparrot_training - Step 22151: {'lr': 0.00031231509664093833, 'samples': 11341824, 'steps': 22151, 'loss/train': 2.071397066116333} +02/25/2022 06:16:52 - INFO - codeparrot_training - Step 22152: {'lr': 0.00031229925050226063, 'samples': 11342336, 'steps': 22152, 'loss/train': 2.453071117401123} +02/25/2022 06:16:55 - INFO - codeparrot_training - Step 22153: {'lr': 0.0003122834040967127, 'samples': 11342848, 'steps': 22153, 'loss/train': 1.8134874105453491} +02/25/2022 06:17:01 - INFO - codeparrot_training - Step 22154: {'lr': 0.00031226755742436255, 'samples': 11343360, 'steps': 22154, 'loss/train': 2.8095500469207764} +02/25/2022 06:17:04 - INFO - codeparrot_training - Step 22155: {'lr': 0.0003122517104852778, 'samples': 11343872, 'steps': 22155, 'loss/train': 1.751172423362732} +02/25/2022 06:17:10 - INFO - codeparrot_training - Step 22156: {'lr': 0.0003122358632795266, 'samples': 11344384, 'steps': 22156, 'loss/train': 1.9667421579360962} +02/25/2022 06:17:13 - INFO - codeparrot_training - Step 22157: {'lr': 0.00031222001580717663, 'samples': 11344896, 'steps': 22157, 'loss/train': 0.8395716547966003} +02/25/2022 06:17:19 - INFO - codeparrot_training - Step 22158: {'lr': 0.000312204168068296, 'samples': 11345408, 'steps': 22158, 'loss/train': 2.086977005004883} +02/25/2022 06:17:22 - INFO - codeparrot_training - Step 22159: {'lr': 0.00031218832006295235, 'samples': 11345920, 'steps': 22159, 'loss/train': 1.1217615604400635} +02/25/2022 06:17:27 - INFO - codeparrot_training - Step 22160: {'lr': 0.0003121724717912138, 'samples': 11346432, 'steps': 22160, 'loss/train': 1.074906349182129} +02/25/2022 06:17:31 - INFO - codeparrot_training - Step 22161: {'lr': 0.000312156623253148, 'samples': 11346944, 'steps': 22161, 'loss/train': 1.9170221090316772} +02/25/2022 06:17:37 - INFO - codeparrot_training - Step 22162: {'lr': 0.00031214077444882297, 'samples': 11347456, 'steps': 22162, 'loss/train': 1.7256731986999512} +02/25/2022 06:17:40 - INFO - codeparrot_training - Step 22163: {'lr': 0.0003121249253783067, 'samples': 11347968, 'steps': 22163, 'loss/train': 1.3001811504364014} +02/25/2022 06:17:46 - INFO - codeparrot_training - Step 22164: {'lr': 0.00031210907604166686, 'samples': 11348480, 'steps': 22164, 'loss/train': 1.930328607559204} +02/25/2022 06:17:49 - INFO - codeparrot_training - Step 22165: {'lr': 0.0003120932264389715, 'samples': 11348992, 'steps': 22165, 'loss/train': 1.9401648044586182} +02/25/2022 06:17:55 - INFO - codeparrot_training - Step 22166: {'lr': 0.0003120773765702885, 'samples': 11349504, 'steps': 22166, 'loss/train': 1.5868417024612427} +02/25/2022 06:17:59 - INFO - codeparrot_training - Step 22167: {'lr': 0.00031206152643568577, 'samples': 11350016, 'steps': 22167, 'loss/train': 2.1357803344726562} +02/25/2022 06:18:04 - INFO - codeparrot_training - Step 22168: {'lr': 0.00031204567603523105, 'samples': 11350528, 'steps': 22168, 'loss/train': 2.0386104583740234} +02/25/2022 06:18:08 - INFO - codeparrot_training - Step 22169: {'lr': 0.00031202982536899246, 'samples': 11351040, 'steps': 22169, 'loss/train': 1.4887747764587402} +02/25/2022 06:18:11 - INFO - codeparrot_training - Step 22170: {'lr': 0.0003120139744370377, 'samples': 11351552, 'steps': 22170, 'loss/train': 2.29206919670105} +02/25/2022 06:18:17 - INFO - codeparrot_training - Step 22171: {'lr': 0.0003119981232394349, 'samples': 11352064, 'steps': 22171, 'loss/train': 1.4098385572433472} +02/25/2022 06:18:20 - INFO - codeparrot_training - Step 22172: {'lr': 0.0003119822717762517, 'samples': 11352576, 'steps': 22172, 'loss/train': 1.6300408840179443} +02/25/2022 06:18:26 - INFO - codeparrot_training - Step 22173: {'lr': 0.0003119664200475562, 'samples': 11353088, 'steps': 22173, 'loss/train': 1.2397927045822144} +02/25/2022 06:18:30 - INFO - codeparrot_training - Step 22174: {'lr': 0.0003119505680534162, 'samples': 11353600, 'steps': 22174, 'loss/train': 1.7889060974121094} +02/25/2022 06:18:35 - INFO - codeparrot_training - Step 22175: {'lr': 0.00031193471579389967, 'samples': 11354112, 'steps': 22175, 'loss/train': 1.9722788333892822} +02/25/2022 06:18:39 - INFO - codeparrot_training - Step 22176: {'lr': 0.00031191886326907445, 'samples': 11354624, 'steps': 22176, 'loss/train': 2.2971391677856445} +02/25/2022 06:18:44 - INFO - codeparrot_training - Step 22177: {'lr': 0.0003119030104790085, 'samples': 11355136, 'steps': 22177, 'loss/train': 1.3095362186431885} +02/25/2022 06:18:48 - INFO - codeparrot_training - Step 22178: {'lr': 0.00031188715742376966, 'samples': 11355648, 'steps': 22178, 'loss/train': 1.6352877616882324} +02/25/2022 06:18:53 - INFO - codeparrot_training - Step 22179: {'lr': 0.0003118713041034259, 'samples': 11356160, 'steps': 22179, 'loss/train': 2.841221570968628} +02/25/2022 06:18:57 - INFO - codeparrot_training - Step 22180: {'lr': 0.0003118554505180452, 'samples': 11356672, 'steps': 22180, 'loss/train': 3.101526975631714} +02/25/2022 06:19:02 - INFO - codeparrot_training - Step 22181: {'lr': 0.0003118395966676953, 'samples': 11357184, 'steps': 22181, 'loss/train': 2.118783950805664} +02/25/2022 06:19:06 - INFO - codeparrot_training - Step 22182: {'lr': 0.00031182374255244426, 'samples': 11357696, 'steps': 22182, 'loss/train': 2.5226573944091797} +02/25/2022 06:19:12 - INFO - codeparrot_training - Step 22183: {'lr': 0.0003118078881723599, 'samples': 11358208, 'steps': 22183, 'loss/train': 1.4040274620056152} +02/25/2022 06:19:16 - INFO - codeparrot_training - Step 22184: {'lr': 0.0003117920335275102, 'samples': 11358720, 'steps': 22184, 'loss/train': 2.7390952110290527} +02/25/2022 06:19:21 - INFO - codeparrot_training - Step 22185: {'lr': 0.000311776178617963, 'samples': 11359232, 'steps': 22185, 'loss/train': 1.879380226135254} +02/25/2022 06:19:25 - INFO - codeparrot_training - Step 22186: {'lr': 0.0003117603234437864, 'samples': 11359744, 'steps': 22186, 'loss/train': 1.598555564880371} +02/25/2022 06:19:30 - INFO - codeparrot_training - Step 22187: {'lr': 0.0003117444680050481, 'samples': 11360256, 'steps': 22187, 'loss/train': 1.8586094379425049} +02/25/2022 06:19:34 - INFO - codeparrot_training - Step 22188: {'lr': 0.00031172861230181607, 'samples': 11360768, 'steps': 22188, 'loss/train': 1.9133294820785522} +02/25/2022 06:19:39 - INFO - codeparrot_training - Step 22189: {'lr': 0.0003117127563341583, 'samples': 11361280, 'steps': 22189, 'loss/train': 3.0111265182495117} +02/25/2022 06:19:43 - INFO - codeparrot_training - Step 22190: {'lr': 0.0003116969001021427, 'samples': 11361792, 'steps': 22190, 'loss/train': 2.1985645294189453} +02/25/2022 06:19:48 - INFO - codeparrot_training - Step 22191: {'lr': 0.00031168104360583716, 'samples': 11362304, 'steps': 22191, 'loss/train': 2.684141159057617} +02/25/2022 06:19:52 - INFO - codeparrot_training - Step 22192: {'lr': 0.00031166518684530966, 'samples': 11362816, 'steps': 22192, 'loss/train': 1.6079938411712646} +02/25/2022 06:19:57 - INFO - codeparrot_training - Step 22193: {'lr': 0.000311649329820628, 'samples': 11363328, 'steps': 22193, 'loss/train': 2.01727294921875} +02/25/2022 06:20:01 - INFO - codeparrot_training - Step 22194: {'lr': 0.0003116334725318602, 'samples': 11363840, 'steps': 22194, 'loss/train': 2.1195273399353027} +02/25/2022 06:20:07 - INFO - codeparrot_training - Step 22195: {'lr': 0.00031161761497907416, 'samples': 11364352, 'steps': 22195, 'loss/train': 1.7520968914031982} +02/25/2022 06:20:10 - INFO - codeparrot_training - Step 22196: {'lr': 0.00031160175716233793, 'samples': 11364864, 'steps': 22196, 'loss/train': 1.125702977180481} +02/25/2022 06:20:16 - INFO - codeparrot_training - Step 22197: {'lr': 0.0003115858990817192, 'samples': 11365376, 'steps': 22197, 'loss/train': 1.9171411991119385} +02/25/2022 06:20:21 - INFO - codeparrot_training - Step 22198: {'lr': 0.0003115700407372861, 'samples': 11365888, 'steps': 22198, 'loss/train': 2.5504097938537598} +02/25/2022 06:20:25 - INFO - codeparrot_training - Step 22199: {'lr': 0.00031155418212910647, 'samples': 11366400, 'steps': 22199, 'loss/train': 1.4988442659378052} +02/25/2022 06:20:31 - INFO - codeparrot_training - Step 22200: {'lr': 0.00031153832325724826, 'samples': 11366912, 'steps': 22200, 'loss/train': 2.427340507507324} +02/25/2022 06:20:34 - INFO - codeparrot_training - Step 22201: {'lr': 0.0003115224641217795, 'samples': 11367424, 'steps': 22201, 'loss/train': 1.7603408098220825} +02/25/2022 06:20:37 - INFO - codeparrot_training - Step 22202: {'lr': 0.0003115066047227679, 'samples': 11367936, 'steps': 22202, 'loss/train': 1.614784836769104} +02/25/2022 06:20:43 - INFO - codeparrot_training - Step 22203: {'lr': 0.00031149074506028163, 'samples': 11368448, 'steps': 22203, 'loss/train': 2.251432180404663} +02/25/2022 06:20:47 - INFO - codeparrot_training - Step 22204: {'lr': 0.00031147488513438853, 'samples': 11368960, 'steps': 22204, 'loss/train': 1.5265086889266968} +02/25/2022 06:20:52 - INFO - codeparrot_training - Step 22205: {'lr': 0.00031145902494515655, 'samples': 11369472, 'steps': 22205, 'loss/train': 1.5114670991897583} +02/25/2022 06:20:56 - INFO - codeparrot_training - Step 22206: {'lr': 0.0003114431644926536, 'samples': 11369984, 'steps': 22206, 'loss/train': 2.287856340408325} +02/25/2022 06:21:01 - INFO - codeparrot_training - Step 22207: {'lr': 0.00031142730377694763, 'samples': 11370496, 'steps': 22207, 'loss/train': 1.7188853025436401} +02/25/2022 06:21:04 - INFO - codeparrot_training - Step 22208: {'lr': 0.0003114114427981066, 'samples': 11371008, 'steps': 22208, 'loss/train': 2.1920838356018066} +02/25/2022 06:21:12 - INFO - codeparrot_training - Step 22209: {'lr': 0.00031139558155619844, 'samples': 11371520, 'steps': 22209, 'loss/train': 2.505232334136963} +02/25/2022 06:21:15 - INFO - codeparrot_training - Step 22210: {'lr': 0.0003113797200512912, 'samples': 11372032, 'steps': 22210, 'loss/train': 2.104592800140381} +02/25/2022 06:21:20 - INFO - codeparrot_training - Step 22211: {'lr': 0.0003113638582834526, 'samples': 11372544, 'steps': 22211, 'loss/train': 1.9102720022201538} +02/25/2022 06:21:24 - INFO - codeparrot_training - Step 22212: {'lr': 0.00031134799625275077, 'samples': 11373056, 'steps': 22212, 'loss/train': 2.0225603580474854} +02/25/2022 06:21:29 - INFO - codeparrot_training - Step 22213: {'lr': 0.0003113321339592536, 'samples': 11373568, 'steps': 22213, 'loss/train': 2.8301875591278076} +02/25/2022 06:21:33 - INFO - codeparrot_training - Step 22214: {'lr': 0.000311316271403029, 'samples': 11374080, 'steps': 22214, 'loss/train': 0.8736248016357422} +02/25/2022 06:21:38 - INFO - codeparrot_training - Step 22215: {'lr': 0.00031130040858414506, 'samples': 11374592, 'steps': 22215, 'loss/train': 1.6126081943511963} +02/25/2022 06:21:42 - INFO - codeparrot_training - Step 22216: {'lr': 0.00031128454550266956, 'samples': 11375104, 'steps': 22216, 'loss/train': 1.715529203414917} +02/25/2022 06:21:47 - INFO - codeparrot_training - Step 22217: {'lr': 0.0003112686821586706, 'samples': 11375616, 'steps': 22217, 'loss/train': 1.7984647750854492} +02/25/2022 06:21:51 - INFO - codeparrot_training - Step 22218: {'lr': 0.000311252818552216, 'samples': 11376128, 'steps': 22218, 'loss/train': 2.9339468479156494} +02/25/2022 06:21:57 - INFO - codeparrot_training - Step 22219: {'lr': 0.00031123695468337375, 'samples': 11376640, 'steps': 22219, 'loss/train': 2.3963327407836914} +02/25/2022 06:22:01 - INFO - codeparrot_training - Step 22220: {'lr': 0.00031122109055221187, 'samples': 11377152, 'steps': 22220, 'loss/train': 2.3818411827087402} +02/25/2022 06:22:06 - INFO - codeparrot_training - Step 22221: {'lr': 0.00031120522615879834, 'samples': 11377664, 'steps': 22221, 'loss/train': 8.73620891571045} +02/25/2022 06:22:10 - INFO - codeparrot_training - Step 22222: {'lr': 0.00031118936150320093, 'samples': 11378176, 'steps': 22222, 'loss/train': 1.4981305599212646} +02/25/2022 06:22:16 - INFO - codeparrot_training - Step 22223: {'lr': 0.00031117349658548783, 'samples': 11378688, 'steps': 22223, 'loss/train': 1.4015003442764282} +02/25/2022 06:22:19 - INFO - codeparrot_training - Step 22224: {'lr': 0.00031115763140572686, 'samples': 11379200, 'steps': 22224, 'loss/train': 1.9452404975891113} +02/25/2022 06:22:25 - INFO - codeparrot_training - Step 22225: {'lr': 0.000311141765963986, 'samples': 11379712, 'steps': 22225, 'loss/train': 1.7791557312011719} +02/25/2022 06:22:28 - INFO - codeparrot_training - Step 22226: {'lr': 0.00031112590026033323, 'samples': 11380224, 'steps': 22226, 'loss/train': 2.105590343475342} +02/25/2022 06:22:34 - INFO - codeparrot_training - Step 22227: {'lr': 0.00031111003429483647, 'samples': 11380736, 'steps': 22227, 'loss/train': 2.030669927597046} +02/25/2022 06:22:37 - INFO - codeparrot_training - Step 22228: {'lr': 0.00031109416806756387, 'samples': 11381248, 'steps': 22228, 'loss/train': 1.9703563451766968} +02/25/2022 06:22:43 - INFO - codeparrot_training - Step 22229: {'lr': 0.0003110783015785831, 'samples': 11381760, 'steps': 22229, 'loss/train': 1.6472063064575195} +02/25/2022 06:22:46 - INFO - codeparrot_training - Step 22230: {'lr': 0.00031106243482796234, 'samples': 11382272, 'steps': 22230, 'loss/train': 1.0945477485656738} +02/25/2022 06:22:52 - INFO - codeparrot_training - Step 22231: {'lr': 0.0003110465678157695, 'samples': 11382784, 'steps': 22231, 'loss/train': 2.600090980529785} +02/25/2022 06:22:55 - INFO - codeparrot_training - Step 22232: {'lr': 0.0003110307005420726, 'samples': 11383296, 'steps': 22232, 'loss/train': 1.1109521389007568} +02/25/2022 06:23:03 - INFO - codeparrot_training - Step 22233: {'lr': 0.00031101483300693944, 'samples': 11383808, 'steps': 22233, 'loss/train': 1.9960497617721558} +02/25/2022 06:23:06 - INFO - codeparrot_training - Step 22234: {'lr': 0.00031099896521043826, 'samples': 11384320, 'steps': 22234, 'loss/train': 1.584606409072876} +02/25/2022 06:23:12 - INFO - codeparrot_training - Step 22235: {'lr': 0.0003109830971526367, 'samples': 11384832, 'steps': 22235, 'loss/train': 3.2077078819274902} +02/25/2022 06:23:16 - INFO - codeparrot_training - Step 22236: {'lr': 0.0003109672288336031, 'samples': 11385344, 'steps': 22236, 'loss/train': 0.8509630560874939} +02/25/2022 06:23:21 - INFO - codeparrot_training - Step 22237: {'lr': 0.00031095136025340514, 'samples': 11385856, 'steps': 22237, 'loss/train': 2.147021770477295} +02/25/2022 06:23:25 - INFO - codeparrot_training - Step 22238: {'lr': 0.00031093549141211096, 'samples': 11386368, 'steps': 22238, 'loss/train': 2.7605669498443604} +02/25/2022 06:23:30 - INFO - codeparrot_training - Step 22239: {'lr': 0.00031091962230978844, 'samples': 11386880, 'steps': 22239, 'loss/train': 2.381690263748169} +02/25/2022 06:23:34 - INFO - codeparrot_training - Step 22240: {'lr': 0.0003109037529465056, 'samples': 11387392, 'steps': 22240, 'loss/train': 2.512786626815796} +02/25/2022 06:23:39 - INFO - codeparrot_training - Step 22241: {'lr': 0.0003108878833223305, 'samples': 11387904, 'steps': 22241, 'loss/train': 1.117661714553833} +02/25/2022 06:23:43 - INFO - codeparrot_training - Step 22242: {'lr': 0.00031087201343733096, 'samples': 11388416, 'steps': 22242, 'loss/train': 1.766898274421692} +02/25/2022 06:23:50 - INFO - codeparrot_training - Step 22243: {'lr': 0.00031085614329157515, 'samples': 11388928, 'steps': 22243, 'loss/train': 1.4396743774414062} +02/25/2022 06:23:53 - INFO - codeparrot_training - Step 22244: {'lr': 0.00031084027288513083, 'samples': 11389440, 'steps': 22244, 'loss/train': 1.9462149143218994} +02/25/2022 06:23:59 - INFO - codeparrot_training - Step 22245: {'lr': 0.0003108244022180661, 'samples': 11389952, 'steps': 22245, 'loss/train': 1.7819745540618896} +02/25/2022 06:24:02 - INFO - codeparrot_training - Step 22246: {'lr': 0.000310808531290449, 'samples': 11390464, 'steps': 22246, 'loss/train': 2.3998100757598877} +02/25/2022 06:24:08 - INFO - codeparrot_training - Step 22247: {'lr': 0.00031079266010234746, 'samples': 11390976, 'steps': 22247, 'loss/train': 2.242130994796753} +02/25/2022 06:24:11 - INFO - codeparrot_training - Step 22248: {'lr': 0.00031077678865382944, 'samples': 11391488, 'steps': 22248, 'loss/train': 0.9377748370170593} +02/25/2022 06:24:17 - INFO - codeparrot_training - Step 22249: {'lr': 0.000310760916944963, 'samples': 11392000, 'steps': 22249, 'loss/train': 2.5221896171569824} +02/25/2022 06:24:20 - INFO - codeparrot_training - Step 22250: {'lr': 0.000310745044975816, 'samples': 11392512, 'steps': 22250, 'loss/train': 1.9791074991226196} +02/25/2022 06:24:26 - INFO - codeparrot_training - Step 22251: {'lr': 0.00031072917274645656, 'samples': 11393024, 'steps': 22251, 'loss/train': 1.8299438953399658} +02/25/2022 06:24:29 - INFO - codeparrot_training - Step 22252: {'lr': 0.00031071330025695266, 'samples': 11393536, 'steps': 22252, 'loss/train': 2.5882506370544434} +02/25/2022 06:24:35 - INFO - codeparrot_training - Step 22253: {'lr': 0.0003106974275073722, 'samples': 11394048, 'steps': 22253, 'loss/train': 1.676809549331665} +02/25/2022 06:24:39 - INFO - codeparrot_training - Step 22254: {'lr': 0.0003106815544977833, 'samples': 11394560, 'steps': 22254, 'loss/train': 1.1664416790008545} +02/25/2022 06:24:46 - INFO - codeparrot_training - Step 22255: {'lr': 0.00031066568122825383, 'samples': 11395072, 'steps': 22255, 'loss/train': 2.075968027114868} +02/25/2022 06:24:49 - INFO - codeparrot_training - Step 22256: {'lr': 0.0003106498076988519, 'samples': 11395584, 'steps': 22256, 'loss/train': 2.494464159011841} +02/25/2022 06:24:55 - INFO - codeparrot_training - Step 22257: {'lr': 0.0003106339339096454, 'samples': 11396096, 'steps': 22257, 'loss/train': 1.869131326675415} +02/25/2022 06:24:58 - INFO - codeparrot_training - Step 22258: {'lr': 0.0003106180598607024, 'samples': 11396608, 'steps': 22258, 'loss/train': 2.144507646560669} +02/25/2022 06:25:04 - INFO - codeparrot_training - Step 22259: {'lr': 0.00031060218555209094, 'samples': 11397120, 'steps': 22259, 'loss/train': 1.9071341753005981} +02/25/2022 06:25:07 - INFO - codeparrot_training - Step 22260: {'lr': 0.0003105863109838789, 'samples': 11397632, 'steps': 22260, 'loss/train': 1.9524785280227661} +02/25/2022 06:25:13 - INFO - codeparrot_training - Step 22261: {'lr': 0.0003105704361561343, 'samples': 11398144, 'steps': 22261, 'loss/train': 1.5737897157669067} +02/25/2022 06:25:16 - INFO - codeparrot_training - Step 22262: {'lr': 0.00031055456106892526, 'samples': 11398656, 'steps': 22262, 'loss/train': 2.6269309520721436} +02/25/2022 06:25:22 - INFO - codeparrot_training - Step 22263: {'lr': 0.0003105386857223197, 'samples': 11399168, 'steps': 22263, 'loss/train': 1.1933808326721191} +02/25/2022 06:25:25 - INFO - codeparrot_training - Step 22264: {'lr': 0.0003105228101163856, 'samples': 11399680, 'steps': 22264, 'loss/train': 1.6305445432662964} +02/25/2022 06:25:33 - INFO - codeparrot_training - Step 22265: {'lr': 0.000310506934251191, 'samples': 11400192, 'steps': 22265, 'loss/train': 2.9968018531799316} +02/25/2022 06:25:36 - INFO - codeparrot_training - Step 22266: {'lr': 0.0003104910581268039, 'samples': 11400704, 'steps': 22266, 'loss/train': 1.2612520456314087} +02/25/2022 06:25:42 - INFO - codeparrot_training - Step 22267: {'lr': 0.00031047518174329234, 'samples': 11401216, 'steps': 22267, 'loss/train': 1.0289467573165894} +02/25/2022 06:25:45 - INFO - codeparrot_training - Step 22268: {'lr': 0.00031045930510072427, 'samples': 11401728, 'steps': 22268, 'loss/train': 1.7810465097427368} +02/25/2022 06:25:51 - INFO - codeparrot_training - Step 22269: {'lr': 0.00031044342819916784, 'samples': 11402240, 'steps': 22269, 'loss/train': 2.5308055877685547} +02/25/2022 06:25:54 - INFO - codeparrot_training - Step 22270: {'lr': 0.0003104275510386908, 'samples': 11402752, 'steps': 22270, 'loss/train': 3.20619797706604} +02/25/2022 06:26:00 - INFO - codeparrot_training - Step 22271: {'lr': 0.0003104116736193615, 'samples': 11403264, 'steps': 22271, 'loss/train': 0.8564321398735046} +02/25/2022 06:26:03 - INFO - codeparrot_training - Step 22272: {'lr': 0.00031039579594124763, 'samples': 11403776, 'steps': 22272, 'loss/train': 2.959088087081909} +02/25/2022 06:26:09 - INFO - codeparrot_training - Step 22273: {'lr': 0.0003103799180044174, 'samples': 11404288, 'steps': 22273, 'loss/train': 1.844323754310608} +02/25/2022 06:26:12 - INFO - codeparrot_training - Step 22274: {'lr': 0.00031036403980893874, 'samples': 11404800, 'steps': 22274, 'loss/train': 1.3666863441467285} +02/25/2022 06:26:18 - INFO - codeparrot_training - Step 22275: {'lr': 0.0003103481613548797, 'samples': 11405312, 'steps': 22275, 'loss/train': 1.5148844718933105} +02/25/2022 06:26:21 - INFO - codeparrot_training - Step 22276: {'lr': 0.00031033228264230834, 'samples': 11405824, 'steps': 22276, 'loss/train': 1.8171871900558472} +02/25/2022 06:26:27 - INFO - codeparrot_training - Step 22277: {'lr': 0.0003103164036712926, 'samples': 11406336, 'steps': 22277, 'loss/train': 1.1824021339416504} +02/25/2022 06:26:30 - INFO - codeparrot_training - Step 22278: {'lr': 0.0003103005244419006, 'samples': 11406848, 'steps': 22278, 'loss/train': 1.8386484384536743} +02/25/2022 06:26:37 - INFO - codeparrot_training - Step 22279: {'lr': 0.00031028464495420026, 'samples': 11407360, 'steps': 22279, 'loss/train': 0.8456698656082153} +02/25/2022 06:26:41 - INFO - codeparrot_training - Step 22280: {'lr': 0.0003102687652082597, 'samples': 11407872, 'steps': 22280, 'loss/train': 2.0187506675720215} +02/25/2022 06:26:47 - INFO - codeparrot_training - Step 22281: {'lr': 0.00031025288520414686, 'samples': 11408384, 'steps': 22281, 'loss/train': 1.7374745607376099} +02/25/2022 06:26:50 - INFO - codeparrot_training - Step 22282: {'lr': 0.0003102370049419297, 'samples': 11408896, 'steps': 22282, 'loss/train': 0.9777387380599976} +02/25/2022 06:26:56 - INFO - codeparrot_training - Step 22283: {'lr': 0.0003102211244216764, 'samples': 11409408, 'steps': 22283, 'loss/train': 1.8480573892593384} +02/25/2022 06:26:59 - INFO - codeparrot_training - Step 22284: {'lr': 0.000310205243643455, 'samples': 11409920, 'steps': 22284, 'loss/train': 1.9416848421096802} +02/25/2022 06:27:05 - INFO - codeparrot_training - Step 22285: {'lr': 0.00031018936260733337, 'samples': 11410432, 'steps': 22285, 'loss/train': 1.6788299083709717} +02/25/2022 06:27:08 - INFO - codeparrot_training - Step 22286: {'lr': 0.00031017348131337963, 'samples': 11410944, 'steps': 22286, 'loss/train': 1.5511828660964966} +02/25/2022 06:27:14 - INFO - codeparrot_training - Step 22287: {'lr': 0.00031015759976166186, 'samples': 11411456, 'steps': 22287, 'loss/train': 0.8448664546012878} +02/25/2022 06:27:17 - INFO - codeparrot_training - Step 22288: {'lr': 0.00031014171795224794, 'samples': 11411968, 'steps': 22288, 'loss/train': 1.511526107788086} +02/25/2022 06:27:25 - INFO - codeparrot_training - Step 22289: {'lr': 0.00031012583588520607, 'samples': 11412480, 'steps': 22289, 'loss/train': 0.6437299847602844} +02/25/2022 06:27:28 - INFO - codeparrot_training - Step 22290: {'lr': 0.00031010995356060416, 'samples': 11412992, 'steps': 22290, 'loss/train': 2.339708089828491} +02/25/2022 06:27:34 - INFO - codeparrot_training - Step 22291: {'lr': 0.00031009407097851036, 'samples': 11413504, 'steps': 22291, 'loss/train': 1.4100689888000488} +02/25/2022 06:27:37 - INFO - codeparrot_training - Step 22292: {'lr': 0.0003100781881389926, 'samples': 11414016, 'steps': 22292, 'loss/train': 1.462053894996643} +02/25/2022 06:27:42 - INFO - codeparrot_training - Step 22293: {'lr': 0.00031006230504211895, 'samples': 11414528, 'steps': 22293, 'loss/train': 1.3496007919311523} +02/25/2022 06:27:46 - INFO - codeparrot_training - Step 22294: {'lr': 0.0003100464216879574, 'samples': 11415040, 'steps': 22294, 'loss/train': 0.5380419492721558} +02/25/2022 06:27:51 - INFO - codeparrot_training - Step 22295: {'lr': 0.0003100305380765762, 'samples': 11415552, 'steps': 22295, 'loss/train': 1.5153319835662842} +02/25/2022 06:27:55 - INFO - codeparrot_training - Step 22296: {'lr': 0.00031001465420804316, 'samples': 11416064, 'steps': 22296, 'loss/train': 2.651585102081299} +02/25/2022 06:28:00 - INFO - codeparrot_training - Step 22297: {'lr': 0.0003099987700824264, 'samples': 11416576, 'steps': 22297, 'loss/train': 1.4443249702453613} +02/25/2022 06:28:04 - INFO - codeparrot_training - Step 22298: {'lr': 0.00030998288569979393, 'samples': 11417088, 'steps': 22298, 'loss/train': 1.9768311977386475} +02/25/2022 06:28:09 - INFO - codeparrot_training - Step 22299: {'lr': 0.0003099670010602138, 'samples': 11417600, 'steps': 22299, 'loss/train': 1.9314266443252563} +02/25/2022 06:28:13 - INFO - codeparrot_training - Step 22300: {'lr': 0.00030995111616375417, 'samples': 11418112, 'steps': 22300, 'loss/train': 2.394134998321533} +02/25/2022 06:28:20 - INFO - codeparrot_training - Step 22301: {'lr': 0.00030993523101048294, 'samples': 11418624, 'steps': 22301, 'loss/train': 1.2647587060928345} +02/25/2022 06:28:24 - INFO - codeparrot_training - Step 22302: {'lr': 0.0003099193456004682, 'samples': 11419136, 'steps': 22302, 'loss/train': 0.9063059687614441} +02/25/2022 06:28:29 - INFO - codeparrot_training - Step 22303: {'lr': 0.00030990345993377807, 'samples': 11419648, 'steps': 22303, 'loss/train': 1.9191433191299438} +02/25/2022 06:28:33 - INFO - codeparrot_training - Step 22304: {'lr': 0.0003098875740104805, 'samples': 11420160, 'steps': 22304, 'loss/train': 2.153932571411133} +02/25/2022 06:28:38 - INFO - codeparrot_training - Step 22305: {'lr': 0.00030987168783064355, 'samples': 11420672, 'steps': 22305, 'loss/train': 0.8334032893180847} +02/25/2022 06:28:42 - INFO - codeparrot_training - Step 22306: {'lr': 0.0003098558013943353, 'samples': 11421184, 'steps': 22306, 'loss/train': 2.282377243041992} +02/25/2022 06:28:47 - INFO - codeparrot_training - Step 22307: {'lr': 0.00030983991470162386, 'samples': 11421696, 'steps': 22307, 'loss/train': 2.2862932682037354} +02/25/2022 06:28:51 - INFO - codeparrot_training - Step 22308: {'lr': 0.00030982402775257725, 'samples': 11422208, 'steps': 22308, 'loss/train': 0.5140820145606995} +02/25/2022 06:28:56 - INFO - codeparrot_training - Step 22309: {'lr': 0.0003098081405472634, 'samples': 11422720, 'steps': 22309, 'loss/train': 2.301920175552368} +02/25/2022 06:29:00 - INFO - codeparrot_training - Step 22310: {'lr': 0.0003097922530857505, 'samples': 11423232, 'steps': 22310, 'loss/train': 1.5764747858047485} +02/25/2022 06:29:07 - INFO - codeparrot_training - Step 22311: {'lr': 0.0003097763653681066, 'samples': 11423744, 'steps': 22311, 'loss/train': 1.5203237533569336} +02/25/2022 06:29:11 - INFO - codeparrot_training - Step 22312: {'lr': 0.00030976047739439974, 'samples': 11424256, 'steps': 22312, 'loss/train': 0.7984387278556824} +02/25/2022 06:29:16 - INFO - codeparrot_training - Step 22313: {'lr': 0.000309744589164698, 'samples': 11424768, 'steps': 22313, 'loss/train': 1.663845181465149} +02/25/2022 06:29:20 - INFO - codeparrot_training - Step 22314: {'lr': 0.00030972870067906934, 'samples': 11425280, 'steps': 22314, 'loss/train': 1.4970442056655884} +02/25/2022 06:29:25 - INFO - codeparrot_training - Step 22315: {'lr': 0.00030971281193758197, 'samples': 11425792, 'steps': 22315, 'loss/train': 2.329415798187256} +02/25/2022 06:29:29 - INFO - codeparrot_training - Step 22316: {'lr': 0.00030969692294030376, 'samples': 11426304, 'steps': 22316, 'loss/train': 1.0922491550445557} +02/25/2022 06:29:34 - INFO - codeparrot_training - Step 22317: {'lr': 0.000309681033687303, 'samples': 11426816, 'steps': 22317, 'loss/train': 1.4815539121627808} +02/25/2022 06:29:38 - INFO - codeparrot_training - Step 22318: {'lr': 0.0003096651441786476, 'samples': 11427328, 'steps': 22318, 'loss/train': 7.239895820617676} +02/25/2022 06:29:43 - INFO - codeparrot_training - Step 22319: {'lr': 0.0003096492544144056, 'samples': 11427840, 'steps': 22319, 'loss/train': 1.5799311399459839} +02/25/2022 06:29:47 - INFO - codeparrot_training - Step 22320: {'lr': 0.00030963336439464523, 'samples': 11428352, 'steps': 22320, 'loss/train': 2.619276523590088} +02/25/2022 06:29:54 - INFO - codeparrot_training - Step 22321: {'lr': 0.0003096174741194344, 'samples': 11428864, 'steps': 22321, 'loss/train': 1.6923171281814575} +02/25/2022 06:29:57 - INFO - codeparrot_training - Step 22322: {'lr': 0.00030960158358884127, 'samples': 11429376, 'steps': 22322, 'loss/train': 2.112273931503296} +02/25/2022 06:30:03 - INFO - codeparrot_training - Step 22323: {'lr': 0.0003095856928029339, 'samples': 11429888, 'steps': 22323, 'loss/train': 0.857179582118988} +02/25/2022 06:30:06 - INFO - codeparrot_training - Step 22324: {'lr': 0.00030956980176178033, 'samples': 11430400, 'steps': 22324, 'loss/train': 1.9736242294311523} +02/25/2022 06:30:12 - INFO - codeparrot_training - Step 22325: {'lr': 0.00030955391046544865, 'samples': 11430912, 'steps': 22325, 'loss/train': 2.1934549808502197} +02/25/2022 06:30:16 - INFO - codeparrot_training - Step 22326: {'lr': 0.0003095380189140069, 'samples': 11431424, 'steps': 22326, 'loss/train': 3.762875556945801} +02/25/2022 06:30:21 - INFO - codeparrot_training - Step 22327: {'lr': 0.00030952212710752325, 'samples': 11431936, 'steps': 22327, 'loss/train': 2.293222188949585} +02/25/2022 06:30:25 - INFO - codeparrot_training - Step 22328: {'lr': 0.00030950623504606565, 'samples': 11432448, 'steps': 22328, 'loss/train': 1.8966045379638672} +02/25/2022 06:30:31 - INFO - codeparrot_training - Step 22329: {'lr': 0.0003094903427297023, 'samples': 11432960, 'steps': 22329, 'loss/train': 1.7513986825942993} +02/25/2022 06:30:35 - INFO - codeparrot_training - Step 22330: {'lr': 0.00030947445015850114, 'samples': 11433472, 'steps': 22330, 'loss/train': 2.750603199005127} +02/25/2022 06:30:38 - INFO - codeparrot_training - Step 22331: {'lr': 0.0003094585573325303, 'samples': 11433984, 'steps': 22331, 'loss/train': 2.1039719581604004} +02/25/2022 06:30:44 - INFO - codeparrot_training - Step 22332: {'lr': 0.00030944266425185794, 'samples': 11434496, 'steps': 22332, 'loss/train': 3.583482265472412} +02/25/2022 06:30:47 - INFO - codeparrot_training - Step 22333: {'lr': 0.000309426770916552, 'samples': 11435008, 'steps': 22333, 'loss/train': 0.25457778573036194} +02/25/2022 06:30:53 - INFO - codeparrot_training - Step 22334: {'lr': 0.0003094108773266808, 'samples': 11435520, 'steps': 22334, 'loss/train': 1.7558698654174805} +02/25/2022 06:30:56 - INFO - codeparrot_training - Step 22335: {'lr': 0.00030939498348231214, 'samples': 11436032, 'steps': 22335, 'loss/train': 1.6925777196884155} +02/25/2022 06:31:02 - INFO - codeparrot_training - Step 22336: {'lr': 0.00030937908938351424, 'samples': 11436544, 'steps': 22336, 'loss/train': 1.1856589317321777} +02/25/2022 06:31:05 - INFO - codeparrot_training - Step 22337: {'lr': 0.0003093631950303552, 'samples': 11437056, 'steps': 22337, 'loss/train': 1.2924731969833374} +02/25/2022 06:31:13 - INFO - codeparrot_training - Step 22338: {'lr': 0.00030934730042290304, 'samples': 11437568, 'steps': 22338, 'loss/train': 1.7724864482879639} +02/25/2022 06:31:16 - INFO - codeparrot_training - Step 22339: {'lr': 0.00030933140556122597, 'samples': 11438080, 'steps': 22339, 'loss/train': 1.5146642923355103} +02/25/2022 06:31:22 - INFO - codeparrot_training - Step 22340: {'lr': 0.00030931551044539196, 'samples': 11438592, 'steps': 22340, 'loss/train': 2.4913101196289062} +02/25/2022 06:31:25 - INFO - codeparrot_training - Step 22341: {'lr': 0.00030929961507546915, 'samples': 11439104, 'steps': 22341, 'loss/train': 2.6212687492370605} +02/25/2022 06:31:31 - INFO - codeparrot_training - Step 22342: {'lr': 0.0003092837194515256, 'samples': 11439616, 'steps': 22342, 'loss/train': 1.9838272333145142} +02/25/2022 06:31:34 - INFO - codeparrot_training - Step 22343: {'lr': 0.00030926782357362944, 'samples': 11440128, 'steps': 22343, 'loss/train': 2.157978057861328} +02/25/2022 06:31:40 - INFO - codeparrot_training - Step 22344: {'lr': 0.0003092519274418487, 'samples': 11440640, 'steps': 22344, 'loss/train': 3.0668160915374756} +02/25/2022 06:31:43 - INFO - codeparrot_training - Step 22345: {'lr': 0.0003092360310562516, 'samples': 11441152, 'steps': 22345, 'loss/train': 1.5791271924972534} +02/25/2022 06:31:49 - INFO - codeparrot_training - Step 22346: {'lr': 0.00030922013441690607, 'samples': 11441664, 'steps': 22346, 'loss/train': 2.672255277633667} +02/25/2022 06:31:53 - INFO - codeparrot_training - Step 22347: {'lr': 0.0003092042375238803, 'samples': 11442176, 'steps': 22347, 'loss/train': 3.058821439743042} +02/25/2022 06:31:56 - INFO - codeparrot_training - Step 22348: {'lr': 0.00030918834037724235, 'samples': 11442688, 'steps': 22348, 'loss/train': 2.3949058055877686} +02/25/2022 06:32:03 - INFO - codeparrot_training - Step 22349: {'lr': 0.0003091724429770604, 'samples': 11443200, 'steps': 22349, 'loss/train': 1.860541582107544} +02/25/2022 06:32:07 - INFO - codeparrot_training - Step 22350: {'lr': 0.0003091565453234025, 'samples': 11443712, 'steps': 22350, 'loss/train': 2.587707757949829} +02/25/2022 06:32:12 - INFO - codeparrot_training - Step 22351: {'lr': 0.0003091406474163367, 'samples': 11444224, 'steps': 22351, 'loss/train': 2.50187611579895} +02/25/2022 06:32:16 - INFO - codeparrot_training - Step 22352: {'lr': 0.0003091247492559312, 'samples': 11444736, 'steps': 22352, 'loss/train': 1.755619764328003} +02/25/2022 06:32:21 - INFO - codeparrot_training - Step 22353: {'lr': 0.00030910885084225396, 'samples': 11445248, 'steps': 22353, 'loss/train': 2.078305959701538} +02/25/2022 06:32:25 - INFO - codeparrot_training - Step 22354: {'lr': 0.0003090929521753733, 'samples': 11445760, 'steps': 22354, 'loss/train': 1.433426022529602} +02/25/2022 06:32:30 - INFO - codeparrot_training - Step 22355: {'lr': 0.00030907705325535704, 'samples': 11446272, 'steps': 22355, 'loss/train': 1.9524354934692383} +02/25/2022 06:32:34 - INFO - codeparrot_training - Step 22356: {'lr': 0.0003090611540822736, 'samples': 11446784, 'steps': 22356, 'loss/train': 1.4155231714248657} +02/25/2022 06:32:39 - INFO - codeparrot_training - Step 22357: {'lr': 0.0003090452546561908, 'samples': 11447296, 'steps': 22357, 'loss/train': 1.8659199476242065} +02/25/2022 06:32:42 - INFO - codeparrot_training - Step 22358: {'lr': 0.000309029354977177, 'samples': 11447808, 'steps': 22358, 'loss/train': 1.7597140073776245} +02/25/2022 06:32:50 - INFO - codeparrot_training - Step 22359: {'lr': 0.00030901345504530007, 'samples': 11448320, 'steps': 22359, 'loss/train': 1.6660492420196533} +02/25/2022 06:32:53 - INFO - codeparrot_training - Step 22360: {'lr': 0.0003089975548606283, 'samples': 11448832, 'steps': 22360, 'loss/train': 1.9426867961883545} +02/25/2022 06:32:59 - INFO - codeparrot_training - Step 22361: {'lr': 0.0003089816544232298, 'samples': 11449344, 'steps': 22361, 'loss/train': 2.3940839767456055} +02/25/2022 06:33:02 - INFO - codeparrot_training - Step 22362: {'lr': 0.00030896575373317247, 'samples': 11449856, 'steps': 22362, 'loss/train': 1.415296196937561} +02/25/2022 06:33:08 - INFO - codeparrot_training - Step 22363: {'lr': 0.0003089498527905247, 'samples': 11450368, 'steps': 22363, 'loss/train': 0.4083438515663147} +02/25/2022 06:33:11 - INFO - codeparrot_training - Step 22364: {'lr': 0.00030893395159535444, 'samples': 11450880, 'steps': 22364, 'loss/train': 1.8211491107940674} +02/25/2022 06:33:17 - INFO - codeparrot_training - Step 22365: {'lr': 0.00030891805014772987, 'samples': 11451392, 'steps': 22365, 'loss/train': 1.9234308004379272} +02/25/2022 06:33:20 - INFO - codeparrot_training - Step 22366: {'lr': 0.000308902148447719, 'samples': 11451904, 'steps': 22366, 'loss/train': 2.0892865657806396} +02/25/2022 06:33:26 - INFO - codeparrot_training - Step 22367: {'lr': 0.00030888624649539015, 'samples': 11452416, 'steps': 22367, 'loss/train': 1.4807833433151245} +02/25/2022 06:33:29 - INFO - codeparrot_training - Step 22368: {'lr': 0.0003088703442908112, 'samples': 11452928, 'steps': 22368, 'loss/train': 2.2732961177825928} +02/25/2022 06:33:37 - INFO - codeparrot_training - Step 22369: {'lr': 0.0003088544418340505, 'samples': 11453440, 'steps': 22369, 'loss/train': 0.744574248790741} +02/25/2022 06:33:40 - INFO - codeparrot_training - Step 22370: {'lr': 0.000308838539125176, 'samples': 11453952, 'steps': 22370, 'loss/train': 2.6158666610717773} +02/25/2022 06:33:46 - INFO - codeparrot_training - Step 22371: {'lr': 0.00030882263616425587, 'samples': 11454464, 'steps': 22371, 'loss/train': 2.2508530616760254} +02/25/2022 06:33:49 - INFO - codeparrot_training - Step 22372: {'lr': 0.0003088067329513583, 'samples': 11454976, 'steps': 22372, 'loss/train': 2.4914891719818115} +02/25/2022 06:33:55 - INFO - codeparrot_training - Step 22373: {'lr': 0.0003087908294865513, 'samples': 11455488, 'steps': 22373, 'loss/train': 1.806899070739746} +02/25/2022 06:33:58 - INFO - codeparrot_training - Step 22374: {'lr': 0.00030877492576990306, 'samples': 11456000, 'steps': 22374, 'loss/train': 1.1799976825714111} +02/25/2022 06:34:04 - INFO - codeparrot_training - Step 22375: {'lr': 0.0003087590218014817, 'samples': 11456512, 'steps': 22375, 'loss/train': 2.5723395347595215} +02/25/2022 06:34:07 - INFO - codeparrot_training - Step 22376: {'lr': 0.00030874311758135535, 'samples': 11457024, 'steps': 22376, 'loss/train': 1.8258635997772217} +02/25/2022 06:34:13 - INFO - codeparrot_training - Step 22377: {'lr': 0.00030872721310959216, 'samples': 11457536, 'steps': 22377, 'loss/train': 1.6009079217910767} +02/25/2022 06:34:19 - INFO - codeparrot_training - Step 22378: {'lr': 0.00030871130838626025, 'samples': 11458048, 'steps': 22378, 'loss/train': 1.4505228996276855} +02/25/2022 06:34:22 - INFO - codeparrot_training - Step 22379: {'lr': 0.0003086954034114277, 'samples': 11458560, 'steps': 22379, 'loss/train': 2.5557217597961426} +02/25/2022 06:34:26 - INFO - codeparrot_training - Step 22380: {'lr': 0.00030867949818516274, 'samples': 11459072, 'steps': 22380, 'loss/train': 2.4533960819244385} +02/25/2022 06:34:31 - INFO - codeparrot_training - Step 22381: {'lr': 0.00030866359270753337, 'samples': 11459584, 'steps': 22381, 'loss/train': 0.8030892014503479} +02/25/2022 06:34:35 - INFO - codeparrot_training - Step 22382: {'lr': 0.00030864768697860784, 'samples': 11460096, 'steps': 22382, 'loss/train': 1.9053959846496582} +02/25/2022 06:34:40 - INFO - codeparrot_training - Step 22383: {'lr': 0.0003086317809984542, 'samples': 11460608, 'steps': 22383, 'loss/train': 3.842362642288208} +02/25/2022 06:34:44 - INFO - codeparrot_training - Step 22384: {'lr': 0.0003086158747671406, 'samples': 11461120, 'steps': 22384, 'loss/train': 0.8807100057601929} +02/25/2022 06:34:51 - INFO - codeparrot_training - Step 22385: {'lr': 0.0003085999682847353, 'samples': 11461632, 'steps': 22385, 'loss/train': 1.4275264739990234} +02/25/2022 06:34:54 - INFO - codeparrot_training - Step 22386: {'lr': 0.00030858406155130625, 'samples': 11462144, 'steps': 22386, 'loss/train': 1.7387988567352295} +02/25/2022 06:35:00 - INFO - codeparrot_training - Step 22387: {'lr': 0.00030856815456692177, 'samples': 11462656, 'steps': 22387, 'loss/train': 2.484557628631592} +02/25/2022 06:35:03 - INFO - codeparrot_training - Step 22388: {'lr': 0.00030855224733164987, 'samples': 11463168, 'steps': 22388, 'loss/train': 1.8729536533355713} +02/25/2022 06:35:09 - INFO - codeparrot_training - Step 22389: {'lr': 0.00030853633984555875, 'samples': 11463680, 'steps': 22389, 'loss/train': 2.02634859085083} +02/25/2022 06:35:12 - INFO - codeparrot_training - Step 22390: {'lr': 0.0003085204321087165, 'samples': 11464192, 'steps': 22390, 'loss/train': 8.760651588439941} +02/25/2022 06:35:18 - INFO - codeparrot_training - Step 22391: {'lr': 0.00030850452412119135, 'samples': 11464704, 'steps': 22391, 'loss/train': 1.6826472282409668} +02/25/2022 06:35:21 - INFO - codeparrot_training - Step 22392: {'lr': 0.00030848861588305136, 'samples': 11465216, 'steps': 22392, 'loss/train': 2.1556930541992188} +02/25/2022 06:35:27 - INFO - codeparrot_training - Step 22393: {'lr': 0.0003084727073943648, 'samples': 11465728, 'steps': 22393, 'loss/train': 2.5755672454833984} +02/25/2022 06:35:31 - INFO - codeparrot_training - Step 22394: {'lr': 0.0003084567986551996, 'samples': 11466240, 'steps': 22394, 'loss/train': 2.094748020172119} +02/25/2022 06:35:38 - INFO - codeparrot_training - Step 22395: {'lr': 0.0003084408896656241, 'samples': 11466752, 'steps': 22395, 'loss/train': 1.9773863554000854} +02/25/2022 06:35:41 - INFO - codeparrot_training - Step 22396: {'lr': 0.0003084249804257064, 'samples': 11467264, 'steps': 22396, 'loss/train': 1.619940996170044} +02/25/2022 06:35:47 - INFO - codeparrot_training - Step 22397: {'lr': 0.00030840907093551456, 'samples': 11467776, 'steps': 22397, 'loss/train': 1.7091479301452637} +02/25/2022 06:35:50 - INFO - codeparrot_training - Step 22398: {'lr': 0.0003083931611951169, 'samples': 11468288, 'steps': 22398, 'loss/train': 0.6624564528465271} +02/25/2022 06:35:56 - INFO - codeparrot_training - Step 22399: {'lr': 0.0003083772512045814, 'samples': 11468800, 'steps': 22399, 'loss/train': 1.4868395328521729} +02/25/2022 06:35:59 - INFO - codeparrot_training - Step 22400: {'lr': 0.0003083613409639764, 'samples': 11469312, 'steps': 22400, 'loss/train': 2.3430211544036865} +02/25/2022 06:36:05 - INFO - codeparrot_training - Step 22401: {'lr': 0.0003083454304733698, 'samples': 11469824, 'steps': 22401, 'loss/train': 2.3103830814361572} +02/25/2022 06:36:08 - INFO - codeparrot_training - Step 22402: {'lr': 0.00030832951973283, 'samples': 11470336, 'steps': 22402, 'loss/train': 1.3244562149047852} +02/25/2022 06:36:14 - INFO - codeparrot_training - Step 22403: {'lr': 0.000308313608742425, 'samples': 11470848, 'steps': 22403, 'loss/train': 1.2192047834396362} +02/25/2022 06:36:17 - INFO - codeparrot_training - Step 22404: {'lr': 0.00030829769750222315, 'samples': 11471360, 'steps': 22404, 'loss/train': 1.416987657546997} +02/25/2022 06:36:23 - INFO - codeparrot_training - Step 22405: {'lr': 0.00030828178601229235, 'samples': 11471872, 'steps': 22405, 'loss/train': 1.8768166303634644} +02/25/2022 06:36:27 - INFO - codeparrot_training - Step 22406: {'lr': 0.00030826587427270095, 'samples': 11472384, 'steps': 22406, 'loss/train': 1.5243351459503174} +02/25/2022 06:36:32 - INFO - codeparrot_training - Step 22407: {'lr': 0.000308249962283517, 'samples': 11472896, 'steps': 22407, 'loss/train': 1.476779580116272} +02/25/2022 06:36:36 - INFO - codeparrot_training - Step 22408: {'lr': 0.0003082340500448087, 'samples': 11473408, 'steps': 22408, 'loss/train': 2.0404796600341797} +02/25/2022 06:36:42 - INFO - codeparrot_training - Step 22409: {'lr': 0.0003082181375566443, 'samples': 11473920, 'steps': 22409, 'loss/train': 2.1903507709503174} +02/25/2022 06:36:45 - INFO - codeparrot_training - Step 22410: {'lr': 0.0003082022248190918, 'samples': 11474432, 'steps': 22410, 'loss/train': 1.9735642671585083} +02/25/2022 06:36:51 - INFO - codeparrot_training - Step 22411: {'lr': 0.00030818631183221945, 'samples': 11474944, 'steps': 22411, 'loss/train': 1.6710984706878662} +02/25/2022 06:36:54 - INFO - codeparrot_training - Step 22412: {'lr': 0.0003081703985960955, 'samples': 11475456, 'steps': 22412, 'loss/train': 2.2112793922424316} +02/25/2022 06:37:00 - INFO - codeparrot_training - Step 22413: {'lr': 0.00030815448511078796, 'samples': 11475968, 'steps': 22413, 'loss/train': 1.548136830329895} +02/25/2022 06:37:03 - INFO - codeparrot_training - Step 22414: {'lr': 0.0003081385713763651, 'samples': 11476480, 'steps': 22414, 'loss/train': 1.352513074874878} +02/25/2022 06:37:09 - INFO - codeparrot_training - Step 22415: {'lr': 0.00030812265739289503, 'samples': 11476992, 'steps': 22415, 'loss/train': 2.00291109085083} +02/25/2022 06:37:13 - INFO - codeparrot_training - Step 22416: {'lr': 0.000308106743160446, 'samples': 11477504, 'steps': 22416, 'loss/train': 1.8740191459655762} +02/25/2022 06:37:18 - INFO - codeparrot_training - Step 22417: {'lr': 0.00030809082867908614, 'samples': 11478016, 'steps': 22417, 'loss/train': 1.6866564750671387} +02/25/2022 06:37:22 - INFO - codeparrot_training - Step 22418: {'lr': 0.0003080749139488836, 'samples': 11478528, 'steps': 22418, 'loss/train': 1.7677775621414185} +02/25/2022 06:37:27 - INFO - codeparrot_training - Step 22419: {'lr': 0.0003080589989699066, 'samples': 11479040, 'steps': 22419, 'loss/train': 5.318525791168213} +02/25/2022 06:37:31 - INFO - codeparrot_training - Step 22420: {'lr': 0.00030804308374222315, 'samples': 11479552, 'steps': 22420, 'loss/train': 1.7014484405517578} +02/25/2022 06:37:36 - INFO - codeparrot_training - Step 22421: {'lr': 0.00030802716826590164, 'samples': 11480064, 'steps': 22421, 'loss/train': 1.4869434833526611} +02/25/2022 06:37:39 - INFO - codeparrot_training - Step 22422: {'lr': 0.0003080112525410102, 'samples': 11480576, 'steps': 22422, 'loss/train': 2.2668511867523193} +02/25/2022 06:37:45 - INFO - codeparrot_training - Step 22423: {'lr': 0.0003079953365676169, 'samples': 11481088, 'steps': 22423, 'loss/train': 0.7269768714904785} +02/25/2022 06:37:49 - INFO - codeparrot_training - Step 22424: {'lr': 0.00030797942034579013, 'samples': 11481600, 'steps': 22424, 'loss/train': 1.968507170677185} +02/25/2022 06:37:55 - INFO - codeparrot_training - Step 22425: {'lr': 0.0003079635038755978, 'samples': 11482112, 'steps': 22425, 'loss/train': 0.9637570381164551} +02/25/2022 06:37:58 - INFO - codeparrot_training - Step 22426: {'lr': 0.0003079475871571083, 'samples': 11482624, 'steps': 22426, 'loss/train': 1.7919520139694214} +02/25/2022 06:38:04 - INFO - codeparrot_training - Step 22427: {'lr': 0.0003079316701903897, 'samples': 11483136, 'steps': 22427, 'loss/train': 1.6670854091644287} +02/25/2022 06:38:07 - INFO - codeparrot_training - Step 22428: {'lr': 0.0003079157529755102, 'samples': 11483648, 'steps': 22428, 'loss/train': 0.763849139213562} +02/25/2022 06:38:13 - INFO - codeparrot_training - Step 22429: {'lr': 0.0003078998355125381, 'samples': 11484160, 'steps': 22429, 'loss/train': 1.8103898763656616} +02/25/2022 06:38:16 - INFO - codeparrot_training - Step 22430: {'lr': 0.0003078839178015414, 'samples': 11484672, 'steps': 22430, 'loss/train': 1.5743341445922852} +02/25/2022 06:38:22 - INFO - codeparrot_training - Step 22431: {'lr': 0.0003078679998425884, 'samples': 11485184, 'steps': 22431, 'loss/train': 2.0739946365356445} +02/25/2022 06:38:25 - INFO - codeparrot_training - Step 22432: {'lr': 0.0003078520816357472, 'samples': 11485696, 'steps': 22432, 'loss/train': 2.7872555255889893} +02/25/2022 06:38:31 - INFO - codeparrot_training - Step 22433: {'lr': 0.00030783616318108613, 'samples': 11486208, 'steps': 22433, 'loss/train': 1.7650415897369385} +02/25/2022 06:38:35 - INFO - codeparrot_training - Step 22434: {'lr': 0.0003078202444786733, 'samples': 11486720, 'steps': 22434, 'loss/train': 1.876613736152649} +02/25/2022 06:38:40 - INFO - codeparrot_training - Step 22435: {'lr': 0.0003078043255285769, 'samples': 11487232, 'steps': 22435, 'loss/train': 0.8877896666526794} +02/25/2022 06:38:44 - INFO - codeparrot_training - Step 22436: {'lr': 0.00030778840633086514, 'samples': 11487744, 'steps': 22436, 'loss/train': 3.0490896701812744} +02/25/2022 06:38:49 - INFO - codeparrot_training - Step 22437: {'lr': 0.00030777248688560615, 'samples': 11488256, 'steps': 22437, 'loss/train': 2.067582368850708} +02/25/2022 06:38:53 - INFO - codeparrot_training - Step 22438: {'lr': 0.0003077565671928682, 'samples': 11488768, 'steps': 22438, 'loss/train': 2.9970216751098633} +02/25/2022 06:38:58 - INFO - codeparrot_training - Step 22439: {'lr': 0.00030774064725271944, 'samples': 11489280, 'steps': 22439, 'loss/train': 0.872682511806488} +02/25/2022 06:39:02 - INFO - codeparrot_training - Step 22440: {'lr': 0.00030772472706522806, 'samples': 11489792, 'steps': 22440, 'loss/train': 9.422595024108887} +02/25/2022 06:39:05 - INFO - codeparrot_training - Step 22441: {'lr': 0.00030770880663046236, 'samples': 11490304, 'steps': 22441, 'loss/train': 2.216752052307129} +02/25/2022 06:39:11 - INFO - codeparrot_training - Step 22442: {'lr': 0.00030769288594849044, 'samples': 11490816, 'steps': 22442, 'loss/train': 1.1536403894424438} +02/25/2022 06:39:17 - INFO - codeparrot_training - Step 22443: {'lr': 0.0003076769650193805, 'samples': 11491328, 'steps': 22443, 'loss/train': 2.079317331314087} +02/25/2022 06:39:20 - INFO - codeparrot_training - Step 22444: {'lr': 0.0003076610438432007, 'samples': 11491840, 'steps': 22444, 'loss/train': 1.0838345289230347} +02/25/2022 06:39:26 - INFO - codeparrot_training - Step 22445: {'lr': 0.00030764512242001927, 'samples': 11492352, 'steps': 22445, 'loss/train': 1.7356657981872559} +02/25/2022 06:39:29 - INFO - codeparrot_training - Step 22446: {'lr': 0.00030762920074990457, 'samples': 11492864, 'steps': 22446, 'loss/train': 1.089828372001648} +02/25/2022 06:39:33 - INFO - codeparrot_training - Step 22447: {'lr': 0.00030761327883292456, 'samples': 11493376, 'steps': 22447, 'loss/train': 2.4420320987701416} +02/25/2022 06:39:39 - INFO - codeparrot_training - Step 22448: {'lr': 0.00030759735666914767, 'samples': 11493888, 'steps': 22448, 'loss/train': 1.6346549987792969} +02/25/2022 06:39:42 - INFO - codeparrot_training - Step 22449: {'lr': 0.00030758143425864187, 'samples': 11494400, 'steps': 22449, 'loss/train': 0.16934257745742798} +02/25/2022 06:39:47 - INFO - codeparrot_training - Step 22450: {'lr': 0.00030756551160147563, 'samples': 11494912, 'steps': 22450, 'loss/train': 2.335850715637207} +02/25/2022 06:39:51 - INFO - codeparrot_training - Step 22451: {'lr': 0.0003075495886977169, 'samples': 11495424, 'steps': 22451, 'loss/train': 2.307386636734009} +02/25/2022 06:39:57 - INFO - codeparrot_training - Step 22452: {'lr': 0.0003075336655474341, 'samples': 11495936, 'steps': 22452, 'loss/train': 1.7017930746078491} +02/25/2022 06:40:01 - INFO - codeparrot_training - Step 22453: {'lr': 0.0003075177421506952, 'samples': 11496448, 'steps': 22453, 'loss/train': 1.9930847883224487} +02/25/2022 06:40:06 - INFO - codeparrot_training - Step 22454: {'lr': 0.0003075018185075687, 'samples': 11496960, 'steps': 22454, 'loss/train': 2.1117916107177734} +02/25/2022 06:40:12 - INFO - codeparrot_training - Step 22455: {'lr': 0.0003074858946181226, 'samples': 11497472, 'steps': 22455, 'loss/train': 1.4634958505630493} +02/25/2022 06:40:15 - INFO - codeparrot_training - Step 22456: {'lr': 0.0003074699704824252, 'samples': 11497984, 'steps': 22456, 'loss/train': 2.485975980758667} +02/25/2022 06:40:21 - INFO - codeparrot_training - Step 22457: {'lr': 0.0003074540461005447, 'samples': 11498496, 'steps': 22457, 'loss/train': 2.007704496383667} +02/25/2022 06:40:24 - INFO - codeparrot_training - Step 22458: {'lr': 0.00030743812147254935, 'samples': 11499008, 'steps': 22458, 'loss/train': 1.2556360960006714} +02/25/2022 06:40:30 - INFO - codeparrot_training - Step 22459: {'lr': 0.0003074221965985073, 'samples': 11499520, 'steps': 22459, 'loss/train': 2.854020595550537} +02/25/2022 06:40:33 - INFO - codeparrot_training - Step 22460: {'lr': 0.0003074062714784867, 'samples': 11500032, 'steps': 22460, 'loss/train': 1.9867686033248901} +02/25/2022 06:40:39 - INFO - codeparrot_training - Step 22461: {'lr': 0.000307390346112556, 'samples': 11500544, 'steps': 22461, 'loss/train': 2.059323310852051} +02/25/2022 06:40:43 - INFO - codeparrot_training - Step 22462: {'lr': 0.0003073744205007832, 'samples': 11501056, 'steps': 22462, 'loss/train': 1.5115104913711548} +02/25/2022 06:40:48 - INFO - codeparrot_training - Step 22463: {'lr': 0.0003073584946432366, 'samples': 11501568, 'steps': 22463, 'loss/train': 1.714331865310669} +02/25/2022 06:40:52 - INFO - codeparrot_training - Step 22464: {'lr': 0.00030734256853998446, 'samples': 11502080, 'steps': 22464, 'loss/train': 2.7452924251556396} +02/25/2022 06:40:57 - INFO - codeparrot_training - Step 22465: {'lr': 0.00030732664219109497, 'samples': 11502592, 'steps': 22465, 'loss/train': 1.7802088260650635} +02/25/2022 06:41:01 - INFO - codeparrot_training - Step 22466: {'lr': 0.00030731071559663624, 'samples': 11503104, 'steps': 22466, 'loss/train': 2.6749212741851807} +02/25/2022 06:41:06 - INFO - codeparrot_training - Step 22467: {'lr': 0.0003072947887566768, 'samples': 11503616, 'steps': 22467, 'loss/train': 2.516126871109009} +02/25/2022 06:41:10 - INFO - codeparrot_training - Step 22468: {'lr': 0.0003072788616712845, 'samples': 11504128, 'steps': 22468, 'loss/train': 2.287215232849121} +02/25/2022 06:41:15 - INFO - codeparrot_training - Step 22469: {'lr': 0.0003072629343405278, 'samples': 11504640, 'steps': 22469, 'loss/train': 2.2294540405273438} +02/25/2022 06:41:19 - INFO - codeparrot_training - Step 22470: {'lr': 0.00030724700676447485, 'samples': 11505152, 'steps': 22470, 'loss/train': 2.8658690452575684} +02/25/2022 06:41:24 - INFO - codeparrot_training - Step 22471: {'lr': 0.00030723107894319393, 'samples': 11505664, 'steps': 22471, 'loss/train': 1.7094885110855103} +02/25/2022 06:41:28 - INFO - codeparrot_training - Step 22472: {'lr': 0.00030721515087675326, 'samples': 11506176, 'steps': 22472, 'loss/train': 2.535473585128784} +02/25/2022 06:41:33 - INFO - codeparrot_training - Step 22473: {'lr': 0.00030719922256522105, 'samples': 11506688, 'steps': 22473, 'loss/train': 2.112297534942627} +02/25/2022 06:41:37 - INFO - codeparrot_training - Step 22474: {'lr': 0.0003071832940086655, 'samples': 11507200, 'steps': 22474, 'loss/train': 1.5509376525878906} +02/25/2022 06:41:42 - INFO - codeparrot_training - Step 22475: {'lr': 0.0003071673652071549, 'samples': 11507712, 'steps': 22475, 'loss/train': 1.7644466161727905} +02/25/2022 06:41:46 - INFO - codeparrot_training - Step 22476: {'lr': 0.0003071514361607575, 'samples': 11508224, 'steps': 22476, 'loss/train': 1.521186351776123} +02/25/2022 06:41:52 - INFO - codeparrot_training - Step 22477: {'lr': 0.0003071355068695414, 'samples': 11508736, 'steps': 22477, 'loss/train': 1.7518539428710938} +02/25/2022 06:41:56 - INFO - codeparrot_training - Step 22478: {'lr': 0.000307119577333575, 'samples': 11509248, 'steps': 22478, 'loss/train': 1.415287971496582} +02/25/2022 06:42:01 - INFO - codeparrot_training - Step 22479: {'lr': 0.0003071036475529264, 'samples': 11509760, 'steps': 22479, 'loss/train': 2.7647347450256348} +02/25/2022 06:42:05 - INFO - codeparrot_training - Step 22480: {'lr': 0.00030708771752766396, 'samples': 11510272, 'steps': 22480, 'loss/train': 0.5685231685638428} +02/25/2022 06:42:10 - INFO - codeparrot_training - Step 22481: {'lr': 0.0003070717872578558, 'samples': 11510784, 'steps': 22481, 'loss/train': 2.328578472137451} +02/25/2022 06:42:14 - INFO - codeparrot_training - Step 22482: {'lr': 0.0003070558567435703, 'samples': 11511296, 'steps': 22482, 'loss/train': 1.4170125722885132} +02/25/2022 06:42:19 - INFO - codeparrot_training - Step 22483: {'lr': 0.00030703992598487564, 'samples': 11511808, 'steps': 22483, 'loss/train': 2.0182225704193115} +02/25/2022 06:42:23 - INFO - codeparrot_training - Step 22484: {'lr': 0.00030702399498184005, 'samples': 11512320, 'steps': 22484, 'loss/train': 2.1395626068115234} +02/25/2022 06:42:28 - INFO - codeparrot_training - Step 22485: {'lr': 0.0003070080637345317, 'samples': 11512832, 'steps': 22485, 'loss/train': 2.0802996158599854} +02/25/2022 06:42:32 - INFO - codeparrot_training - Step 22486: {'lr': 0.00030699213224301896, 'samples': 11513344, 'steps': 22486, 'loss/train': 1.6864551305770874} +02/25/2022 06:42:38 - INFO - codeparrot_training - Step 22487: {'lr': 0.00030697620050737, 'samples': 11513856, 'steps': 22487, 'loss/train': 1.5342485904693604} +02/25/2022 06:42:42 - INFO - codeparrot_training - Step 22488: {'lr': 0.00030696026852765313, 'samples': 11514368, 'steps': 22488, 'loss/train': 2.027613401412964} +02/25/2022 06:42:47 - INFO - codeparrot_training - Step 22489: {'lr': 0.00030694433630393654, 'samples': 11514880, 'steps': 22489, 'loss/train': 2.235450267791748} +02/25/2022 06:42:51 - INFO - codeparrot_training - Step 22490: {'lr': 0.00030692840383628845, 'samples': 11515392, 'steps': 22490, 'loss/train': 2.5430638790130615} +02/25/2022 06:42:56 - INFO - codeparrot_training - Step 22491: {'lr': 0.0003069124711247772, 'samples': 11515904, 'steps': 22491, 'loss/train': 1.5179001092910767} +02/25/2022 06:42:59 - INFO - codeparrot_training - Step 22492: {'lr': 0.000306896538169471, 'samples': 11516416, 'steps': 22492, 'loss/train': 1.5861786603927612} +02/25/2022 06:43:05 - INFO - codeparrot_training - Step 22493: {'lr': 0.000306880604970438, 'samples': 11516928, 'steps': 22493, 'loss/train': 2.1795713901519775} +02/25/2022 06:43:09 - INFO - codeparrot_training - Step 22494: {'lr': 0.00030686467152774667, 'samples': 11517440, 'steps': 22494, 'loss/train': 1.5836273431777954} +02/25/2022 06:43:14 - INFO - codeparrot_training - Step 22495: {'lr': 0.0003068487378414651, 'samples': 11517952, 'steps': 22495, 'loss/train': 1.8670686483383179} +02/25/2022 06:43:18 - INFO - codeparrot_training - Step 22496: {'lr': 0.0003068328039116616, 'samples': 11518464, 'steps': 22496, 'loss/train': 2.420645236968994} +02/25/2022 06:43:23 - INFO - codeparrot_training - Step 22497: {'lr': 0.0003068168697384044, 'samples': 11518976, 'steps': 22497, 'loss/train': 1.6798491477966309} +02/25/2022 06:43:27 - INFO - codeparrot_training - Step 22498: {'lr': 0.0003068009353217618, 'samples': 11519488, 'steps': 22498, 'loss/train': 1.7972851991653442} +02/25/2022 06:43:33 - INFO - codeparrot_training - Step 22499: {'lr': 0.00030678500066180206, 'samples': 11520000, 'steps': 22499, 'loss/train': 1.7461363077163696} +02/25/2022 06:43:36 - INFO - codeparrot_training - Step 22500: {'lr': 0.0003067690657585933, 'samples': 11520512, 'steps': 22500, 'loss/train': 2.513232707977295} +02/25/2022 06:43:42 - INFO - codeparrot_training - Step 22501: {'lr': 0.000306753130612204, 'samples': 11521024, 'steps': 22501, 'loss/train': 1.3351699113845825} +02/25/2022 06:43:45 - INFO - codeparrot_training - Step 22502: {'lr': 0.0003067371952227022, 'samples': 11521536, 'steps': 22502, 'loss/train': 0.22736585140228271} +02/25/2022 06:43:51 - INFO - codeparrot_training - Step 22503: {'lr': 0.0003067212595901564, 'samples': 11522048, 'steps': 22503, 'loss/train': 2.130584716796875} +02/25/2022 06:43:54 - INFO - codeparrot_training - Step 22504: {'lr': 0.00030670532371463463, 'samples': 11522560, 'steps': 22504, 'loss/train': 1.7992980480194092} +02/25/2022 06:44:00 - INFO - codeparrot_training - Step 22505: {'lr': 0.0003066893875962053, 'samples': 11523072, 'steps': 22505, 'loss/train': 1.839167833328247} +02/25/2022 06:44:03 - INFO - codeparrot_training - Step 22506: {'lr': 0.0003066734512349366, 'samples': 11523584, 'steps': 22506, 'loss/train': 0.9062573909759521} +02/25/2022 06:44:09 - INFO - codeparrot_training - Step 22507: {'lr': 0.00030665751463089687, 'samples': 11524096, 'steps': 22507, 'loss/train': 2.3805291652679443} +02/25/2022 06:44:12 - INFO - codeparrot_training - Step 22508: {'lr': 0.0003066415777841543, 'samples': 11524608, 'steps': 22508, 'loss/train': 2.0209567546844482} +02/25/2022 06:44:18 - INFO - codeparrot_training - Step 22509: {'lr': 0.0003066256406947773, 'samples': 11525120, 'steps': 22509, 'loss/train': 2.0216166973114014} +02/25/2022 06:44:22 - INFO - codeparrot_training - Step 22510: {'lr': 0.0003066097033628339, 'samples': 11525632, 'steps': 22510, 'loss/train': 3.337493658065796} +02/25/2022 06:44:27 - INFO - codeparrot_training - Step 22511: {'lr': 0.0003065937657883926, 'samples': 11526144, 'steps': 22511, 'loss/train': 2.563512086868286} +02/25/2022 06:44:31 - INFO - codeparrot_training - Step 22512: {'lr': 0.0003065778279715215, 'samples': 11526656, 'steps': 22512, 'loss/train': 1.7350542545318604} +02/25/2022 06:44:36 - INFO - codeparrot_training - Step 22513: {'lr': 0.000306561889912289, 'samples': 11527168, 'steps': 22513, 'loss/train': 1.6011396646499634} +02/25/2022 06:44:40 - INFO - codeparrot_training - Step 22514: {'lr': 0.0003065459516107633, 'samples': 11527680, 'steps': 22514, 'loss/train': 2.065385580062866} +02/25/2022 06:44:45 - INFO - codeparrot_training - Step 22515: {'lr': 0.0003065300130670127, 'samples': 11528192, 'steps': 22515, 'loss/train': 1.7015151977539062} +02/25/2022 06:44:49 - INFO - codeparrot_training - Step 22516: {'lr': 0.0003065140742811055, 'samples': 11528704, 'steps': 22516, 'loss/train': 1.1297411918640137} +02/25/2022 06:44:54 - INFO - codeparrot_training - Step 22517: {'lr': 0.0003064981352531099, 'samples': 11529216, 'steps': 22517, 'loss/train': 1.5894063711166382} +02/25/2022 06:44:58 - INFO - codeparrot_training - Step 22518: {'lr': 0.0003064821959830943, 'samples': 11529728, 'steps': 22518, 'loss/train': 2.866004228591919} +02/25/2022 06:45:04 - INFO - codeparrot_training - Step 22519: {'lr': 0.00030646625647112686, 'samples': 11530240, 'steps': 22519, 'loss/train': 1.7332532405853271} +02/25/2022 06:45:08 - INFO - codeparrot_training - Step 22520: {'lr': 0.000306450316717276, 'samples': 11530752, 'steps': 22520, 'loss/train': 1.2880399227142334} +02/25/2022 06:45:13 - INFO - codeparrot_training - Step 22521: {'lr': 0.0003064343767216098, 'samples': 11531264, 'steps': 22521, 'loss/train': 3.3929717540740967} +02/25/2022 06:45:17 - INFO - codeparrot_training - Step 22522: {'lr': 0.00030641843648419664, 'samples': 11531776, 'steps': 22522, 'loss/train': 1.7112188339233398} +02/25/2022 06:45:22 - INFO - codeparrot_training - Step 22523: {'lr': 0.0003064024960051048, 'samples': 11532288, 'steps': 22523, 'loss/train': 2.9232795238494873} +02/25/2022 06:45:26 - INFO - codeparrot_training - Step 22524: {'lr': 0.00030638655528440273, 'samples': 11532800, 'steps': 22524, 'loss/train': 1.5316267013549805} +02/25/2022 06:45:31 - INFO - codeparrot_training - Step 22525: {'lr': 0.0003063706143221584, 'samples': 11533312, 'steps': 22525, 'loss/train': 1.7459677457809448} +02/25/2022 06:45:35 - INFO - codeparrot_training - Step 22526: {'lr': 0.00030635467311844033, 'samples': 11533824, 'steps': 22526, 'loss/train': 1.469390869140625} +02/25/2022 06:45:40 - INFO - codeparrot_training - Step 22527: {'lr': 0.00030633873167331674, 'samples': 11534336, 'steps': 22527, 'loss/train': 2.3531734943389893} +02/25/2022 06:45:44 - INFO - codeparrot_training - Step 22528: {'lr': 0.0003063227899868559, 'samples': 11534848, 'steps': 22528, 'loss/train': 1.3716802597045898} +02/25/2022 06:45:49 - INFO - codeparrot_training - Step 22529: {'lr': 0.00030630684805912613, 'samples': 11535360, 'steps': 22529, 'loss/train': 2.041776657104492} +02/25/2022 06:45:53 - INFO - codeparrot_training - Step 22530: {'lr': 0.00030629090589019567, 'samples': 11535872, 'steps': 22530, 'loss/train': 2.340679168701172} +02/25/2022 06:45:58 - INFO - codeparrot_training - Step 22531: {'lr': 0.00030627496348013285, 'samples': 11536384, 'steps': 22531, 'loss/train': 2.0246407985687256} +02/25/2022 06:46:02 - INFO - codeparrot_training - Step 22532: {'lr': 0.0003062590208290059, 'samples': 11536896, 'steps': 22532, 'loss/train': 2.2053232192993164} +02/25/2022 06:46:07 - INFO - codeparrot_training - Step 22533: {'lr': 0.00030624307793688334, 'samples': 11537408, 'steps': 22533, 'loss/train': 2.2110037803649902} +02/25/2022 06:46:11 - INFO - codeparrot_training - Step 22534: {'lr': 0.00030622713480383314, 'samples': 11537920, 'steps': 22534, 'loss/train': 1.9606841802597046} +02/25/2022 06:46:17 - INFO - codeparrot_training - Step 22535: {'lr': 0.0003062111914299238, 'samples': 11538432, 'steps': 22535, 'loss/train': 2.604990005493164} +02/25/2022 06:46:20 - INFO - codeparrot_training - Step 22536: {'lr': 0.0003061952478152236, 'samples': 11538944, 'steps': 22536, 'loss/train': 1.3748756647109985} +02/25/2022 06:46:26 - INFO - codeparrot_training - Step 22537: {'lr': 0.0003061793039598007, 'samples': 11539456, 'steps': 22537, 'loss/train': 1.0223584175109863} +02/25/2022 06:46:29 - INFO - codeparrot_training - Step 22538: {'lr': 0.0003061633598637236, 'samples': 11539968, 'steps': 22538, 'loss/train': 1.9234931468963623} +02/25/2022 06:46:35 - INFO - codeparrot_training - Step 22539: {'lr': 0.00030614741552706045, 'samples': 11540480, 'steps': 22539, 'loss/train': 2.226428508758545} +02/25/2022 06:46:38 - INFO - codeparrot_training - Step 22540: {'lr': 0.0003061314709498796, 'samples': 11540992, 'steps': 22540, 'loss/train': 0.689652144908905} +02/25/2022 06:46:44 - INFO - codeparrot_training - Step 22541: {'lr': 0.00030611552613224935, 'samples': 11541504, 'steps': 22541, 'loss/train': 2.6056675910949707} +02/25/2022 06:46:47 - INFO - codeparrot_training - Step 22542: {'lr': 0.00030609958107423804, 'samples': 11542016, 'steps': 22542, 'loss/train': 2.230349540710449} +02/25/2022 06:46:54 - INFO - codeparrot_training - Step 22543: {'lr': 0.0003060836357759139, 'samples': 11542528, 'steps': 22543, 'loss/train': 1.6245497465133667} +02/25/2022 06:46:57 - INFO - codeparrot_training - Step 22544: {'lr': 0.00030606769023734534, 'samples': 11543040, 'steps': 22544, 'loss/train': 1.721929907798767} +02/25/2022 06:47:03 - INFO - codeparrot_training - Step 22545: {'lr': 0.0003060517444586005, 'samples': 11543552, 'steps': 22545, 'loss/train': 1.7743027210235596} +02/25/2022 06:47:06 - INFO - codeparrot_training - Step 22546: {'lr': 0.0003060357984397479, 'samples': 11544064, 'steps': 22546, 'loss/train': 2.000258684158325} +02/25/2022 06:47:12 - INFO - codeparrot_training - Step 22547: {'lr': 0.00030601985218085565, 'samples': 11544576, 'steps': 22547, 'loss/train': 8.809915542602539} +02/25/2022 06:47:15 - INFO - codeparrot_training - Step 22548: {'lr': 0.00030600390568199213, 'samples': 11545088, 'steps': 22548, 'loss/train': 3.437715768814087} +02/25/2022 06:47:21 - INFO - codeparrot_training - Step 22549: {'lr': 0.00030598795894322574, 'samples': 11545600, 'steps': 22549, 'loss/train': 2.2694318294525146} +02/25/2022 06:47:24 - INFO - codeparrot_training - Step 22550: {'lr': 0.00030597201196462466, 'samples': 11546112, 'steps': 22550, 'loss/train': 0.8924354314804077} +02/25/2022 06:47:30 - INFO - codeparrot_training - Step 22551: {'lr': 0.00030595606474625726, 'samples': 11546624, 'steps': 22551, 'loss/train': 1.3325576782226562} +02/25/2022 06:47:33 - INFO - codeparrot_training - Step 22552: {'lr': 0.00030594011728819184, 'samples': 11547136, 'steps': 22552, 'loss/train': 1.0910444259643555} +02/25/2022 06:47:39 - INFO - codeparrot_training - Step 22553: {'lr': 0.00030592416959049666, 'samples': 11547648, 'steps': 22553, 'loss/train': 0.49670591950416565} +02/25/2022 06:47:42 - INFO - codeparrot_training - Step 22554: {'lr': 0.00030590822165324017, 'samples': 11548160, 'steps': 22554, 'loss/train': 2.146921157836914} +02/25/2022 06:47:48 - INFO - codeparrot_training - Step 22555: {'lr': 0.00030589227347649063, 'samples': 11548672, 'steps': 22555, 'loss/train': 2.43127179145813} +02/25/2022 06:47:52 - INFO - codeparrot_training - Step 22556: {'lr': 0.00030587632506031624, 'samples': 11549184, 'steps': 22556, 'loss/train': 2.116577625274658} +02/25/2022 06:47:58 - INFO - codeparrot_training - Step 22557: {'lr': 0.0003058603764047855, 'samples': 11549696, 'steps': 22557, 'loss/train': 1.4954054355621338} +02/25/2022 06:48:01 - INFO - codeparrot_training - Step 22558: {'lr': 0.00030584442750996666, 'samples': 11550208, 'steps': 22558, 'loss/train': 2.4073333740234375} +02/25/2022 06:48:06 - INFO - codeparrot_training - Step 22559: {'lr': 0.000305828478375928, 'samples': 11550720, 'steps': 22559, 'loss/train': 1.8182624578475952} +02/25/2022 06:48:10 - INFO - codeparrot_training - Step 22560: {'lr': 0.00030581252900273786, 'samples': 11551232, 'steps': 22560, 'loss/train': 1.1319001913070679} +02/25/2022 06:48:15 - INFO - codeparrot_training - Step 22561: {'lr': 0.0003057965793904646, 'samples': 11551744, 'steps': 22561, 'loss/train': 2.161604642868042} +02/25/2022 06:48:19 - INFO - codeparrot_training - Step 22562: {'lr': 0.00030578062953917645, 'samples': 11552256, 'steps': 22562, 'loss/train': 1.9175020456314087} +02/25/2022 06:48:24 - INFO - codeparrot_training - Step 22563: {'lr': 0.00030576467944894186, 'samples': 11552768, 'steps': 22563, 'loss/train': 2.0127949714660645} +02/25/2022 06:48:28 - INFO - codeparrot_training - Step 22564: {'lr': 0.000305748729119829, 'samples': 11553280, 'steps': 22564, 'loss/train': 1.4060845375061035} +02/25/2022 06:48:33 - INFO - codeparrot_training - Step 22565: {'lr': 0.00030573277855190634, 'samples': 11553792, 'steps': 22565, 'loss/train': 2.1600468158721924} +02/25/2022 06:48:37 - INFO - codeparrot_training - Step 22566: {'lr': 0.0003057168277452422, 'samples': 11554304, 'steps': 22566, 'loss/train': 1.054341435432434} +02/25/2022 06:48:42 - INFO - codeparrot_training - Step 22567: {'lr': 0.0003057008766999048, 'samples': 11554816, 'steps': 22567, 'loss/train': 1.209962010383606} +02/25/2022 06:48:46 - INFO - codeparrot_training - Step 22568: {'lr': 0.0003056849254159625, 'samples': 11555328, 'steps': 22568, 'loss/train': 1.5850061178207397} +02/25/2022 06:48:52 - INFO - codeparrot_training - Step 22569: {'lr': 0.00030566897389348375, 'samples': 11555840, 'steps': 22569, 'loss/train': 3.0002191066741943} +02/25/2022 06:48:56 - INFO - codeparrot_training - Step 22570: {'lr': 0.00030565302213253677, 'samples': 11556352, 'steps': 22570, 'loss/train': 2.1758604049682617} +02/25/2022 06:49:01 - INFO - codeparrot_training - Step 22571: {'lr': 0.0003056370701331899, 'samples': 11556864, 'steps': 22571, 'loss/train': 2.4038541316986084} +02/25/2022 06:49:05 - INFO - codeparrot_training - Step 22572: {'lr': 0.0003056211178955115, 'samples': 11557376, 'steps': 22572, 'loss/train': 1.7791229486465454} +02/25/2022 06:49:10 - INFO - codeparrot_training - Step 22573: {'lr': 0.00030560516541956983, 'samples': 11557888, 'steps': 22573, 'loss/train': 1.9129211902618408} +02/25/2022 06:49:14 - INFO - codeparrot_training - Step 22574: {'lr': 0.0003055892127054334, 'samples': 11558400, 'steps': 22574, 'loss/train': 1.517530918121338} +02/25/2022 06:49:19 - INFO - codeparrot_training - Step 22575: {'lr': 0.00030557325975317037, 'samples': 11558912, 'steps': 22575, 'loss/train': 1.8406928777694702} +02/25/2022 06:49:23 - INFO - codeparrot_training - Step 22576: {'lr': 0.00030555730656284914, 'samples': 11559424, 'steps': 22576, 'loss/train': 1.956127643585205} +02/25/2022 06:49:28 - INFO - codeparrot_training - Step 22577: {'lr': 0.000305541353134538, 'samples': 11559936, 'steps': 22577, 'loss/train': 1.8516039848327637} +02/25/2022 06:49:32 - INFO - codeparrot_training - Step 22578: {'lr': 0.0003055253994683054, 'samples': 11560448, 'steps': 22578, 'loss/train': 1.3101212978363037} +02/25/2022 06:49:37 - INFO - codeparrot_training - Step 22579: {'lr': 0.0003055094455642196, 'samples': 11560960, 'steps': 22579, 'loss/train': 1.2690118551254272} +02/25/2022 06:49:41 - INFO - codeparrot_training - Step 22580: {'lr': 0.000305493491422349, 'samples': 11561472, 'steps': 22580, 'loss/train': 1.714158058166504} +02/25/2022 06:49:47 - INFO - codeparrot_training - Step 22581: {'lr': 0.0003054775370427619, 'samples': 11561984, 'steps': 22581, 'loss/train': 1.7413963079452515} +02/25/2022 06:49:50 - INFO - codeparrot_training - Step 22582: {'lr': 0.00030546158242552657, 'samples': 11562496, 'steps': 22582, 'loss/train': 1.9933078289031982} +02/25/2022 06:49:56 - INFO - codeparrot_training - Step 22583: {'lr': 0.00030544562757071154, 'samples': 11563008, 'steps': 22583, 'loss/train': 1.5428571701049805} +02/25/2022 06:49:59 - INFO - codeparrot_training - Step 22584: {'lr': 0.000305429672478385, 'samples': 11563520, 'steps': 22584, 'loss/train': 1.779737949371338} +02/25/2022 06:50:05 - INFO - codeparrot_training - Step 22585: {'lr': 0.0003054137171486153, 'samples': 11564032, 'steps': 22585, 'loss/train': 1.7626997232437134} +02/25/2022 06:50:08 - INFO - codeparrot_training - Step 22586: {'lr': 0.0003053977615814709, 'samples': 11564544, 'steps': 22586, 'loss/train': 2.107056140899658} +02/25/2022 06:50:14 - INFO - codeparrot_training - Step 22587: {'lr': 0.00030538180577702005, 'samples': 11565056, 'steps': 22587, 'loss/train': 1.5872962474822998} +02/25/2022 06:50:17 - INFO - codeparrot_training - Step 22588: {'lr': 0.00030536584973533113, 'samples': 11565568, 'steps': 22588, 'loss/train': 1.234094262123108} +02/25/2022 06:50:23 - INFO - codeparrot_training - Step 22589: {'lr': 0.0003053498934564725, 'samples': 11566080, 'steps': 22589, 'loss/train': 1.825188159942627} +02/25/2022 06:50:27 - INFO - codeparrot_training - Step 22590: {'lr': 0.00030533393694051256, 'samples': 11566592, 'steps': 22590, 'loss/train': 1.8414422273635864} +02/25/2022 06:50:33 - INFO - codeparrot_training - Step 22591: {'lr': 0.0003053179801875195, 'samples': 11567104, 'steps': 22591, 'loss/train': 0.3257031738758087} +02/25/2022 06:50:36 - INFO - codeparrot_training - Step 22592: {'lr': 0.00030530202319756184, 'samples': 11567616, 'steps': 22592, 'loss/train': 1.338649034500122} +02/25/2022 06:50:41 - INFO - codeparrot_training - Step 22593: {'lr': 0.0003052860659707078, 'samples': 11568128, 'steps': 22593, 'loss/train': 2.513063669204712} +02/25/2022 06:50:47 - INFO - codeparrot_training - Step 22594: {'lr': 0.0003052701085070259, 'samples': 11568640, 'steps': 22594, 'loss/train': 2.3820440769195557} +02/25/2022 06:50:50 - INFO - codeparrot_training - Step 22595: {'lr': 0.00030525415080658437, 'samples': 11569152, 'steps': 22595, 'loss/train': 2.8618366718292236} +02/25/2022 06:50:56 - INFO - codeparrot_training - Step 22596: {'lr': 0.0003052381928694516, 'samples': 11569664, 'steps': 22596, 'loss/train': 2.079991340637207} +02/25/2022 06:50:59 - INFO - codeparrot_training - Step 22597: {'lr': 0.00030522223469569594, 'samples': 11570176, 'steps': 22597, 'loss/train': 1.8576678037643433} +02/25/2022 06:51:05 - INFO - codeparrot_training - Step 22598: {'lr': 0.00030520627628538577, 'samples': 11570688, 'steps': 22598, 'loss/train': 2.1208832263946533} +02/25/2022 06:51:08 - INFO - codeparrot_training - Step 22599: {'lr': 0.0003051903176385894, 'samples': 11571200, 'steps': 22599, 'loss/train': 1.9460619688034058} +02/25/2022 06:51:15 - INFO - codeparrot_training - Step 22600: {'lr': 0.00030517435875537536, 'samples': 11571712, 'steps': 22600, 'loss/train': 0.8828034400939941} +02/25/2022 06:51:18 - INFO - codeparrot_training - Step 22601: {'lr': 0.0003051583996358118, 'samples': 11572224, 'steps': 22601, 'loss/train': 2.2202556133270264} +02/25/2022 06:51:24 - INFO - codeparrot_training - Step 22602: {'lr': 0.00030514244027996705, 'samples': 11572736, 'steps': 22602, 'loss/train': 1.241764783859253} +02/25/2022 06:51:27 - INFO - codeparrot_training - Step 22603: {'lr': 0.00030512648068790985, 'samples': 11573248, 'steps': 22603, 'loss/train': 1.4387774467468262} +02/25/2022 06:51:33 - INFO - codeparrot_training - Step 22604: {'lr': 0.0003051105208597081, 'samples': 11573760, 'steps': 22604, 'loss/train': 1.8418468236923218} +02/25/2022 06:51:36 - INFO - codeparrot_training - Step 22605: {'lr': 0.00030509456079543044, 'samples': 11574272, 'steps': 22605, 'loss/train': 1.8847973346710205} +02/25/2022 06:51:42 - INFO - codeparrot_training - Step 22606: {'lr': 0.0003050786004951452, 'samples': 11574784, 'steps': 22606, 'loss/train': 1.0247842073440552} +02/25/2022 06:51:45 - INFO - codeparrot_training - Step 22607: {'lr': 0.00030506263995892075, 'samples': 11575296, 'steps': 22607, 'loss/train': 2.8790903091430664} +02/25/2022 06:51:51 - INFO - codeparrot_training - Step 22608: {'lr': 0.0003050466791868254, 'samples': 11575808, 'steps': 22608, 'loss/train': 2.366002321243286} +02/25/2022 06:51:54 - INFO - codeparrot_training - Step 22609: {'lr': 0.0003050307181789276, 'samples': 11576320, 'steps': 22609, 'loss/train': 0.2187565416097641} +02/25/2022 06:52:00 - INFO - codeparrot_training - Step 22610: {'lr': 0.0003050147569352956, 'samples': 11576832, 'steps': 22610, 'loss/train': 1.1164970397949219} +02/25/2022 06:52:03 - INFO - codeparrot_training - Step 22611: {'lr': 0.0003049987954559979, 'samples': 11577344, 'steps': 22611, 'loss/train': 2.433436870574951} +02/25/2022 06:52:09 - INFO - codeparrot_training - Step 22612: {'lr': 0.0003049828337411028, 'samples': 11577856, 'steps': 22612, 'loss/train': 1.5017539262771606} +02/25/2022 06:52:12 - INFO - codeparrot_training - Step 22613: {'lr': 0.00030496687179067865, 'samples': 11578368, 'steps': 22613, 'loss/train': 2.2317848205566406} +02/25/2022 06:52:18 - INFO - codeparrot_training - Step 22614: {'lr': 0.000304950909604794, 'samples': 11578880, 'steps': 22614, 'loss/train': 2.0177857875823975} +02/25/2022 06:52:22 - INFO - codeparrot_training - Step 22615: {'lr': 0.000304934947183517, 'samples': 11579392, 'steps': 22615, 'loss/train': 0.43425843119621277} +02/25/2022 06:52:27 - INFO - codeparrot_training - Step 22616: {'lr': 0.00030491898452691626, 'samples': 11579904, 'steps': 22616, 'loss/train': 1.6392916440963745} +02/25/2022 06:52:31 - INFO - codeparrot_training - Step 22617: {'lr': 0.0003049030216350599, 'samples': 11580416, 'steps': 22617, 'loss/train': 2.577558755874634} +02/25/2022 06:52:36 - INFO - codeparrot_training - Step 22618: {'lr': 0.00030488705850801646, 'samples': 11580928, 'steps': 22618, 'loss/train': 1.6446527242660522} +02/25/2022 06:52:40 - INFO - codeparrot_training - Step 22619: {'lr': 0.00030487109514585426, 'samples': 11581440, 'steps': 22619, 'loss/train': 2.1492116451263428} +02/25/2022 06:52:45 - INFO - codeparrot_training - Step 22620: {'lr': 0.0003048551315486418, 'samples': 11581952, 'steps': 22620, 'loss/train': 2.0937840938568115} +02/25/2022 06:52:49 - INFO - codeparrot_training - Step 22621: {'lr': 0.00030483916771644734, 'samples': 11582464, 'steps': 22621, 'loss/train': 1.7621278762817383} +02/25/2022 06:52:54 - INFO - codeparrot_training - Step 22622: {'lr': 0.0003048232036493392, 'samples': 11582976, 'steps': 22622, 'loss/train': 4.088159084320068} +02/25/2022 06:52:58 - INFO - codeparrot_training - Step 22623: {'lr': 0.00030480723934738597, 'samples': 11583488, 'steps': 22623, 'loss/train': 0.8258746266365051} +02/25/2022 06:53:03 - INFO - codeparrot_training - Step 22624: {'lr': 0.00030479127481065595, 'samples': 11584000, 'steps': 22624, 'loss/train': 1.5218144655227661} +02/25/2022 06:53:07 - INFO - codeparrot_training - Step 22625: {'lr': 0.0003047753100392174, 'samples': 11584512, 'steps': 22625, 'loss/train': 2.1196799278259277} +02/25/2022 06:53:13 - INFO - codeparrot_training - Step 22626: {'lr': 0.00030475934503313893, 'samples': 11585024, 'steps': 22626, 'loss/train': 1.9896446466445923} +02/25/2022 06:53:17 - INFO - codeparrot_training - Step 22627: {'lr': 0.0003047433797924888, 'samples': 11585536, 'steps': 22627, 'loss/train': 1.3188180923461914} +02/25/2022 06:53:22 - INFO - codeparrot_training - Step 22628: {'lr': 0.00030472741431733535, 'samples': 11586048, 'steps': 22628, 'loss/train': 1.964693307876587} +02/25/2022 06:53:26 - INFO - codeparrot_training - Step 22629: {'lr': 0.0003047114486077471, 'samples': 11586560, 'steps': 22629, 'loss/train': 2.071418046951294} +02/25/2022 06:53:31 - INFO - codeparrot_training - Step 22630: {'lr': 0.0003046954826637923, 'samples': 11587072, 'steps': 22630, 'loss/train': 1.6267136335372925} +02/25/2022 06:53:34 - INFO - codeparrot_training - Step 22631: {'lr': 0.0003046795164855395, 'samples': 11587584, 'steps': 22631, 'loss/train': 1.8542166948318481} +02/25/2022 06:53:40 - INFO - codeparrot_training - Step 22632: {'lr': 0.000304663550073057, 'samples': 11588096, 'steps': 22632, 'loss/train': 1.6350902318954468} +02/25/2022 06:53:43 - INFO - codeparrot_training - Step 22633: {'lr': 0.00030464758342641315, 'samples': 11588608, 'steps': 22633, 'loss/train': 2.111021041870117} +02/25/2022 06:53:49 - INFO - codeparrot_training - Step 22634: {'lr': 0.00030463161654567647, 'samples': 11589120, 'steps': 22634, 'loss/train': 1.3240230083465576} +02/25/2022 06:53:53 - INFO - codeparrot_training - Step 22635: {'lr': 0.00030461564943091524, 'samples': 11589632, 'steps': 22635, 'loss/train': 1.9727163314819336} +02/25/2022 06:53:59 - INFO - codeparrot_training - Step 22636: {'lr': 0.0003045996820821979, 'samples': 11590144, 'steps': 22636, 'loss/train': 2.2866928577423096} +02/25/2022 06:54:02 - INFO - codeparrot_training - Step 22637: {'lr': 0.00030458371449959293, 'samples': 11590656, 'steps': 22637, 'loss/train': 1.6979786157608032} +02/25/2022 06:54:08 - INFO - codeparrot_training - Step 22638: {'lr': 0.00030456774668316864, 'samples': 11591168, 'steps': 22638, 'loss/train': 1.0800909996032715} +02/25/2022 06:54:11 - INFO - codeparrot_training - Step 22639: {'lr': 0.0003045517786329934, 'samples': 11591680, 'steps': 22639, 'loss/train': 1.8978074789047241} +02/25/2022 06:54:17 - INFO - codeparrot_training - Step 22640: {'lr': 0.0003045358103491357, 'samples': 11592192, 'steps': 22640, 'loss/train': 1.728190541267395} +02/25/2022 06:54:20 - INFO - codeparrot_training - Step 22641: {'lr': 0.00030451984183166384, 'samples': 11592704, 'steps': 22641, 'loss/train': 2.319401741027832} +02/25/2022 06:54:26 - INFO - codeparrot_training - Step 22642: {'lr': 0.0003045038730806464, 'samples': 11593216, 'steps': 22642, 'loss/train': 2.149230718612671} +02/25/2022 06:54:29 - INFO - codeparrot_training - Step 22643: {'lr': 0.00030448790409615155, 'samples': 11593728, 'steps': 22643, 'loss/train': 1.6373363733291626} +02/25/2022 06:54:35 - INFO - codeparrot_training - Step 22644: {'lr': 0.00030447193487824796, 'samples': 11594240, 'steps': 22644, 'loss/train': 2.55241322517395} +02/25/2022 06:54:38 - INFO - codeparrot_training - Step 22645: {'lr': 0.00030445596542700383, 'samples': 11594752, 'steps': 22645, 'loss/train': 1.3099442720413208} +02/25/2022 06:54:44 - INFO - codeparrot_training - Step 22646: {'lr': 0.00030443999574248764, 'samples': 11595264, 'steps': 22646, 'loss/train': 1.0037115812301636} +02/25/2022 06:54:48 - INFO - codeparrot_training - Step 22647: {'lr': 0.00030442402582476775, 'samples': 11595776, 'steps': 22647, 'loss/train': 2.4970531463623047} +02/25/2022 06:54:53 - INFO - codeparrot_training - Step 22648: {'lr': 0.00030440805567391274, 'samples': 11596288, 'steps': 22648, 'loss/train': 1.5511350631713867} +02/25/2022 06:54:57 - INFO - codeparrot_training - Step 22649: {'lr': 0.00030439208528999074, 'samples': 11596800, 'steps': 22649, 'loss/train': 1.1514108180999756} +02/25/2022 06:55:02 - INFO - codeparrot_training - Step 22650: {'lr': 0.0003043761146730704, 'samples': 11597312, 'steps': 22650, 'loss/train': 1.9396324157714844} +02/25/2022 06:55:06 - INFO - codeparrot_training - Step 22651: {'lr': 0.00030436014382322004, 'samples': 11597824, 'steps': 22651, 'loss/train': 0.1052892878651619} +02/25/2022 06:55:12 - INFO - codeparrot_training - Step 22652: {'lr': 0.00030434417274050805, 'samples': 11598336, 'steps': 22652, 'loss/train': 2.3675334453582764} +02/25/2022 06:55:15 - INFO - codeparrot_training - Step 22653: {'lr': 0.00030432820142500296, 'samples': 11598848, 'steps': 22653, 'loss/train': 2.3279995918273926} +02/25/2022 06:55:20 - INFO - codeparrot_training - Step 22654: {'lr': 0.00030431222987677305, 'samples': 11599360, 'steps': 22654, 'loss/train': 2.154399871826172} +02/25/2022 06:55:24 - INFO - codeparrot_training - Step 22655: {'lr': 0.0003042962580958868, 'samples': 11599872, 'steps': 22655, 'loss/train': 1.4444462060928345} +02/25/2022 06:55:29 - INFO - codeparrot_training - Step 22656: {'lr': 0.00030428028608241257, 'samples': 11600384, 'steps': 22656, 'loss/train': 2.0426950454711914} +02/25/2022 06:55:33 - INFO - codeparrot_training - Step 22657: {'lr': 0.00030426431383641893, 'samples': 11600896, 'steps': 22657, 'loss/train': 3.1355977058410645} +02/25/2022 06:55:39 - INFO - codeparrot_training - Step 22658: {'lr': 0.00030424834135797413, 'samples': 11601408, 'steps': 22658, 'loss/train': 1.8279017210006714} +02/25/2022 06:55:42 - INFO - codeparrot_training - Step 22659: {'lr': 0.00030423236864714676, 'samples': 11601920, 'steps': 22659, 'loss/train': 1.784300684928894} +02/25/2022 06:55:47 - INFO - codeparrot_training - Step 22660: {'lr': 0.00030421639570400505, 'samples': 11602432, 'steps': 22660, 'loss/train': 1.6259421110153198} +02/25/2022 06:55:51 - INFO - codeparrot_training - Step 22661: {'lr': 0.0003042004225286176, 'samples': 11602944, 'steps': 22661, 'loss/train': 1.819698452949524} +02/25/2022 06:55:57 - INFO - codeparrot_training - Step 22662: {'lr': 0.00030418444912105256, 'samples': 11603456, 'steps': 22662, 'loss/train': 2.13950777053833} +02/25/2022 06:56:01 - INFO - codeparrot_training - Step 22663: {'lr': 0.0003041684754813787, 'samples': 11603968, 'steps': 22663, 'loss/train': 1.4907573461532593} +02/25/2022 06:56:06 - INFO - codeparrot_training - Step 22664: {'lr': 0.0003041525016096643, 'samples': 11604480, 'steps': 22664, 'loss/train': 1.2841544151306152} +02/25/2022 06:56:10 - INFO - codeparrot_training - Step 22665: {'lr': 0.0003041365275059777, 'samples': 11604992, 'steps': 22665, 'loss/train': 1.6701160669326782} +02/25/2022 06:56:15 - INFO - codeparrot_training - Step 22666: {'lr': 0.0003041205531703875, 'samples': 11605504, 'steps': 22666, 'loss/train': 2.068481683731079} +02/25/2022 06:56:19 - INFO - codeparrot_training - Step 22667: {'lr': 0.0003041045786029619, 'samples': 11606016, 'steps': 22667, 'loss/train': 2.0479958057403564} +02/25/2022 06:56:24 - INFO - codeparrot_training - Step 22668: {'lr': 0.0003040886038037696, 'samples': 11606528, 'steps': 22668, 'loss/train': 2.3006644248962402} +02/25/2022 06:56:28 - INFO - codeparrot_training - Step 22669: {'lr': 0.00030407262877287883, 'samples': 11607040, 'steps': 22669, 'loss/train': 0.9008424878120422} +02/25/2022 06:56:33 - INFO - codeparrot_training - Step 22670: {'lr': 0.00030405665351035816, 'samples': 11607552, 'steps': 22670, 'loss/train': 1.136783480644226} +02/25/2022 06:56:37 - INFO - codeparrot_training - Step 22671: {'lr': 0.0003040406780162759, 'samples': 11608064, 'steps': 22671, 'loss/train': 2.953453302383423} +02/25/2022 06:56:43 - INFO - codeparrot_training - Step 22672: {'lr': 0.00030402470229070054, 'samples': 11608576, 'steps': 22672, 'loss/train': 2.416731357574463} +02/25/2022 06:56:47 - INFO - codeparrot_training - Step 22673: {'lr': 0.00030400872633370047, 'samples': 11609088, 'steps': 22673, 'loss/train': 2.1524155139923096} +02/25/2022 06:56:52 - INFO - codeparrot_training - Step 22674: {'lr': 0.0003039927501453442, 'samples': 11609600, 'steps': 22674, 'loss/train': 1.6534168720245361} +02/25/2022 06:56:56 - INFO - codeparrot_training - Step 22675: {'lr': 0.0003039767737257002, 'samples': 11610112, 'steps': 22675, 'loss/train': 2.829561471939087} +02/25/2022 06:57:01 - INFO - codeparrot_training - Step 22676: {'lr': 0.0003039607970748368, 'samples': 11610624, 'steps': 22676, 'loss/train': 1.9732459783554077} +02/25/2022 06:57:05 - INFO - codeparrot_training - Step 22677: {'lr': 0.00030394482019282246, 'samples': 11611136, 'steps': 22677, 'loss/train': 2.2965991497039795} +02/25/2022 06:57:10 - INFO - codeparrot_training - Step 22678: {'lr': 0.0003039288430797256, 'samples': 11611648, 'steps': 22678, 'loss/train': 2.116220235824585} +02/25/2022 06:57:14 - INFO - codeparrot_training - Step 22679: {'lr': 0.0003039128657356147, 'samples': 11612160, 'steps': 22679, 'loss/train': 1.1863832473754883} +02/25/2022 06:57:19 - INFO - codeparrot_training - Step 22680: {'lr': 0.0003038968881605583, 'samples': 11612672, 'steps': 22680, 'loss/train': 1.2606350183486938} +02/25/2022 06:57:23 - INFO - codeparrot_training - Step 22681: {'lr': 0.00030388091035462466, 'samples': 11613184, 'steps': 22681, 'loss/train': 1.0160527229309082} +02/25/2022 06:57:29 - INFO - codeparrot_training - Step 22682: {'lr': 0.00030386493231788234, 'samples': 11613696, 'steps': 22682, 'loss/train': 1.9279738664627075} +02/25/2022 06:57:33 - INFO - codeparrot_training - Step 22683: {'lr': 0.0003038489540503997, 'samples': 11614208, 'steps': 22683, 'loss/train': 2.256998300552368} +02/25/2022 06:57:38 - INFO - codeparrot_training - Step 22684: {'lr': 0.0003038329755522453, 'samples': 11614720, 'steps': 22684, 'loss/train': 0.8462879061698914} +02/25/2022 06:57:42 - INFO - codeparrot_training - Step 22685: {'lr': 0.0003038169968234875, 'samples': 11615232, 'steps': 22685, 'loss/train': 2.325924873352051} +02/25/2022 06:57:47 - INFO - codeparrot_training - Step 22686: {'lr': 0.0003038010178641948, 'samples': 11615744, 'steps': 22686, 'loss/train': 1.0872704982757568} +02/25/2022 06:57:51 - INFO - codeparrot_training - Step 22687: {'lr': 0.00030378503867443555, 'samples': 11616256, 'steps': 22687, 'loss/train': 1.750091314315796} +02/25/2022 06:57:56 - INFO - codeparrot_training - Step 22688: {'lr': 0.0003037690592542784, 'samples': 11616768, 'steps': 22688, 'loss/train': 2.5829105377197266} +02/25/2022 06:58:00 - INFO - codeparrot_training - Step 22689: {'lr': 0.0003037530796037916, 'samples': 11617280, 'steps': 22689, 'loss/train': 1.859145998954773} +02/25/2022 06:58:05 - INFO - codeparrot_training - Step 22690: {'lr': 0.0003037370997230436, 'samples': 11617792, 'steps': 22690, 'loss/train': 1.6390535831451416} +02/25/2022 06:58:09 - INFO - codeparrot_training - Step 22691: {'lr': 0.000303721119612103, 'samples': 11618304, 'steps': 22691, 'loss/train': 1.4562523365020752} +02/25/2022 06:58:15 - INFO - codeparrot_training - Step 22692: {'lr': 0.00030370513927103826, 'samples': 11618816, 'steps': 22692, 'loss/train': 2.485083818435669} +02/25/2022 06:58:18 - INFO - codeparrot_training - Step 22693: {'lr': 0.0003036891586999176, 'samples': 11619328, 'steps': 22693, 'loss/train': 2.0450894832611084} +02/25/2022 06:58:24 - INFO - codeparrot_training - Step 22694: {'lr': 0.00030367317789880985, 'samples': 11619840, 'steps': 22694, 'loss/train': 2.5536746978759766} +02/25/2022 06:58:27 - INFO - codeparrot_training - Step 22695: {'lr': 0.000303657196867783, 'samples': 11620352, 'steps': 22695, 'loss/train': 1.9993013143539429} +02/25/2022 06:58:33 - INFO - codeparrot_training - Step 22696: {'lr': 0.0003036412156069059, 'samples': 11620864, 'steps': 22696, 'loss/train': 1.2640838623046875} +02/25/2022 06:58:36 - INFO - codeparrot_training - Step 22697: {'lr': 0.00030362523411624686, 'samples': 11621376, 'steps': 22697, 'loss/train': 1.169634461402893} +02/25/2022 06:58:42 - INFO - codeparrot_training - Step 22698: {'lr': 0.0003036092523958743, 'samples': 11621888, 'steps': 22698, 'loss/train': 1.7449144124984741} +02/25/2022 06:58:45 - INFO - codeparrot_training - Step 22699: {'lr': 0.00030359327044585685, 'samples': 11622400, 'steps': 22699, 'loss/train': 1.1600278615951538} +02/25/2022 06:58:51 - INFO - codeparrot_training - Step 22700: {'lr': 0.00030357728826626266, 'samples': 11622912, 'steps': 22700, 'loss/train': 1.6789931058883667} +02/25/2022 06:58:55 - INFO - codeparrot_training - Step 22701: {'lr': 0.0003035613058571605, 'samples': 11623424, 'steps': 22701, 'loss/train': 1.6943378448486328} +02/25/2022 06:59:00 - INFO - codeparrot_training - Step 22702: {'lr': 0.00030354532321861865, 'samples': 11623936, 'steps': 22702, 'loss/train': 1.4822139739990234} +02/25/2022 06:59:03 - INFO - codeparrot_training - Step 22703: {'lr': 0.0003035293403507057, 'samples': 11624448, 'steps': 22703, 'loss/train': 2.8570351600646973} +02/25/2022 06:59:09 - INFO - codeparrot_training - Step 22704: {'lr': 0.00030351335725349, 'samples': 11624960, 'steps': 22704, 'loss/train': 2.175259828567505} +02/25/2022 06:59:13 - INFO - codeparrot_training - Step 22705: {'lr': 0.0003034973739270401, 'samples': 11625472, 'steps': 22705, 'loss/train': 1.9089807271957397} +02/25/2022 06:59:18 - INFO - codeparrot_training - Step 22706: {'lr': 0.0003034813903714244, 'samples': 11625984, 'steps': 22706, 'loss/train': 1.247238278388977} +02/25/2022 06:59:21 - INFO - codeparrot_training - Step 22707: {'lr': 0.0003034654065867115, 'samples': 11626496, 'steps': 22707, 'loss/train': 2.4615378379821777} +02/25/2022 06:59:28 - INFO - codeparrot_training - Step 22708: {'lr': 0.0003034494225729697, 'samples': 11627008, 'steps': 22708, 'loss/train': 1.6899162530899048} +02/25/2022 06:59:33 - INFO - codeparrot_training - Step 22709: {'lr': 0.0003034334383302676, 'samples': 11627520, 'steps': 22709, 'loss/train': 1.9336423873901367} +02/25/2022 06:59:37 - INFO - codeparrot_training - Step 22710: {'lr': 0.0003034174538586735, 'samples': 11628032, 'steps': 22710, 'loss/train': 1.7221230268478394} +02/25/2022 06:59:42 - INFO - codeparrot_training - Step 22711: {'lr': 0.00030340146915825605, 'samples': 11628544, 'steps': 22711, 'loss/train': 1.6769905090332031} +02/25/2022 06:59:46 - INFO - codeparrot_training - Step 22712: {'lr': 0.00030338548422908373, 'samples': 11629056, 'steps': 22712, 'loss/train': 2.2398524284362793} +02/25/2022 06:59:52 - INFO - codeparrot_training - Step 22713: {'lr': 0.00030336949907122483, 'samples': 11629568, 'steps': 22713, 'loss/train': 1.357518196105957} +02/25/2022 06:59:55 - INFO - codeparrot_training - Step 22714: {'lr': 0.0003033535136847481, 'samples': 11630080, 'steps': 22714, 'loss/train': 0.7835584282875061} +02/25/2022 06:59:59 - INFO - codeparrot_training - Step 22715: {'lr': 0.0003033375280697218, 'samples': 11630592, 'steps': 22715, 'loss/train': 1.5173217058181763} +02/25/2022 07:00:04 - INFO - codeparrot_training - Step 22716: {'lr': 0.0003033215422262144, 'samples': 11631104, 'steps': 22716, 'loss/train': 1.8177915811538696} +02/25/2022 07:00:08 - INFO - codeparrot_training - Step 22717: {'lr': 0.0003033055561542945, 'samples': 11631616, 'steps': 22717, 'loss/train': 0.5128215551376343} +02/25/2022 07:00:14 - INFO - codeparrot_training - Step 22718: {'lr': 0.00030328956985403043, 'samples': 11632128, 'steps': 22718, 'loss/train': 2.6350560188293457} +02/25/2022 07:00:18 - INFO - codeparrot_training - Step 22719: {'lr': 0.0003032735833254909, 'samples': 11632640, 'steps': 22719, 'loss/train': 2.1741247177124023} +02/25/2022 07:00:23 - INFO - codeparrot_training - Step 22720: {'lr': 0.0003032575965687442, 'samples': 11633152, 'steps': 22720, 'loss/train': 1.713106393814087} +02/25/2022 07:00:27 - INFO - codeparrot_training - Step 22721: {'lr': 0.0003032416095838588, 'samples': 11633664, 'steps': 22721, 'loss/train': 8.00403118133545} +02/25/2022 07:00:32 - INFO - codeparrot_training - Step 22722: {'lr': 0.00030322562237090336, 'samples': 11634176, 'steps': 22722, 'loss/train': 1.9899433851242065} +02/25/2022 07:00:36 - INFO - codeparrot_training - Step 22723: {'lr': 0.00030320963492994616, 'samples': 11634688, 'steps': 22723, 'loss/train': 2.51070237159729} +02/25/2022 07:00:41 - INFO - codeparrot_training - Step 22724: {'lr': 0.00030319364726105584, 'samples': 11635200, 'steps': 22724, 'loss/train': 2.5846736431121826} +02/25/2022 07:00:45 - INFO - codeparrot_training - Step 22725: {'lr': 0.00030317765936430086, 'samples': 11635712, 'steps': 22725, 'loss/train': 1.0974221229553223} +02/25/2022 07:00:50 - INFO - codeparrot_training - Step 22726: {'lr': 0.0003031616712397496, 'samples': 11636224, 'steps': 22726, 'loss/train': 1.6972495317459106} +02/25/2022 07:00:54 - INFO - codeparrot_training - Step 22727: {'lr': 0.00030314568288747064, 'samples': 11636736, 'steps': 22727, 'loss/train': 1.8170210123062134} +02/25/2022 07:01:00 - INFO - codeparrot_training - Step 22728: {'lr': 0.00030312969430753244, 'samples': 11637248, 'steps': 22728, 'loss/train': 1.1301255226135254} +02/25/2022 07:01:03 - INFO - codeparrot_training - Step 22729: {'lr': 0.00030311370550000356, 'samples': 11637760, 'steps': 22729, 'loss/train': 1.0847762823104858} +02/25/2022 07:01:09 - INFO - codeparrot_training - Step 22730: {'lr': 0.0003030977164649523, 'samples': 11638272, 'steps': 22730, 'loss/train': 2.3306732177734375} +02/25/2022 07:01:12 - INFO - codeparrot_training - Step 22731: {'lr': 0.0003030817272024474, 'samples': 11638784, 'steps': 22731, 'loss/train': 1.3591725826263428} +02/25/2022 07:01:18 - INFO - codeparrot_training - Step 22732: {'lr': 0.0003030657377125572, 'samples': 11639296, 'steps': 22732, 'loss/train': 2.274630308151245} +02/25/2022 07:01:21 - INFO - codeparrot_training - Step 22733: {'lr': 0.0003030497479953503, 'samples': 11639808, 'steps': 22733, 'loss/train': 1.75478994846344} +02/25/2022 07:01:27 - INFO - codeparrot_training - Step 22734: {'lr': 0.00030303375805089503, 'samples': 11640320, 'steps': 22734, 'loss/train': 2.390540599822998} +02/25/2022 07:01:31 - INFO - codeparrot_training - Step 22735: {'lr': 0.00030301776787925995, 'samples': 11640832, 'steps': 22735, 'loss/train': 1.2266637086868286} +02/25/2022 07:01:34 - INFO - codeparrot_training - Step 22736: {'lr': 0.00030300177748051373, 'samples': 11641344, 'steps': 22736, 'loss/train': 0.8526584506034851} +02/25/2022 07:01:40 - INFO - codeparrot_training - Step 22737: {'lr': 0.0003029857868547246, 'samples': 11641856, 'steps': 22737, 'loss/train': 1.6764588356018066} +02/25/2022 07:01:43 - INFO - codeparrot_training - Step 22738: {'lr': 0.0003029697960019612, 'samples': 11642368, 'steps': 22738, 'loss/train': 1.7571521997451782} +02/25/2022 07:01:50 - INFO - codeparrot_training - Step 22739: {'lr': 0.0003029538049222921, 'samples': 11642880, 'steps': 22739, 'loss/train': 1.9561799764633179} +02/25/2022 07:01:53 - INFO - codeparrot_training - Step 22740: {'lr': 0.00030293781361578567, 'samples': 11643392, 'steps': 22740, 'loss/train': 0.8752596974372864} +02/25/2022 07:01:59 - INFO - codeparrot_training - Step 22741: {'lr': 0.0003029218220825104, 'samples': 11643904, 'steps': 22741, 'loss/train': 1.9061131477355957} +02/25/2022 07:02:02 - INFO - codeparrot_training - Step 22742: {'lr': 0.00030290583032253486, 'samples': 11644416, 'steps': 22742, 'loss/train': 1.623331904411316} +02/25/2022 07:02:08 - INFO - codeparrot_training - Step 22743: {'lr': 0.00030288983833592757, 'samples': 11644928, 'steps': 22743, 'loss/train': 1.826436996459961} +02/25/2022 07:02:11 - INFO - codeparrot_training - Step 22744: {'lr': 0.00030287384612275704, 'samples': 11645440, 'steps': 22744, 'loss/train': 1.921907663345337} +02/25/2022 07:02:17 - INFO - codeparrot_training - Step 22745: {'lr': 0.00030285785368309174, 'samples': 11645952, 'steps': 22745, 'loss/train': 2.0599167346954346} +02/25/2022 07:02:20 - INFO - codeparrot_training - Step 22746: {'lr': 0.0003028418610170001, 'samples': 11646464, 'steps': 22746, 'loss/train': 2.42594051361084} +02/25/2022 07:02:26 - INFO - codeparrot_training - Step 22747: {'lr': 0.00030282586812455076, 'samples': 11646976, 'steps': 22747, 'loss/train': 1.217502474784851} +02/25/2022 07:02:29 - INFO - codeparrot_training - Step 22748: {'lr': 0.00030280987500581213, 'samples': 11647488, 'steps': 22748, 'loss/train': 2.151460886001587} +02/25/2022 07:02:35 - INFO - codeparrot_training - Step 22749: {'lr': 0.00030279388166085287, 'samples': 11648000, 'steps': 22749, 'loss/train': 8.756054878234863} +02/25/2022 07:02:38 - INFO - codeparrot_training - Step 22750: {'lr': 0.0003027778880897413, 'samples': 11648512, 'steps': 22750, 'loss/train': 2.2929177284240723} +02/25/2022 07:02:44 - INFO - codeparrot_training - Step 22751: {'lr': 0.000302761894292546, 'samples': 11649024, 'steps': 22751, 'loss/train': 1.9948689937591553} +02/25/2022 07:02:47 - INFO - codeparrot_training - Step 22752: {'lr': 0.0003027459002693356, 'samples': 11649536, 'steps': 22752, 'loss/train': 1.965183973312378} +02/25/2022 07:02:53 - INFO - codeparrot_training - Step 22753: {'lr': 0.00030272990602017843, 'samples': 11650048, 'steps': 22753, 'loss/train': 4.539176940917969} +02/25/2022 07:02:56 - INFO - codeparrot_training - Step 22754: {'lr': 0.0003027139115451431, 'samples': 11650560, 'steps': 22754, 'loss/train': 2.828314781188965} +02/25/2022 07:03:04 - INFO - codeparrot_training - Step 22755: {'lr': 0.0003026979168442982, 'samples': 11651072, 'steps': 22755, 'loss/train': 0.9160630702972412} +02/25/2022 07:03:08 - INFO - codeparrot_training - Step 22756: {'lr': 0.000302681921917712, 'samples': 11651584, 'steps': 22756, 'loss/train': 2.709104299545288} +02/25/2022 07:03:14 - INFO - codeparrot_training - Step 22757: {'lr': 0.00030266592676545326, 'samples': 11652096, 'steps': 22757, 'loss/train': 2.413318157196045} +02/25/2022 07:03:17 - INFO - codeparrot_training - Step 22758: {'lr': 0.0003026499313875903, 'samples': 11652608, 'steps': 22758, 'loss/train': 1.2083826065063477} +02/25/2022 07:03:23 - INFO - codeparrot_training - Step 22759: {'lr': 0.00030263393578419196, 'samples': 11653120, 'steps': 22759, 'loss/train': 1.7300212383270264} +02/25/2022 07:03:26 - INFO - codeparrot_training - Step 22760: {'lr': 0.0003026179399553264, 'samples': 11653632, 'steps': 22760, 'loss/train': 1.4456440210342407} +02/25/2022 07:03:32 - INFO - codeparrot_training - Step 22761: {'lr': 0.0003026019439010624, 'samples': 11654144, 'steps': 22761, 'loss/train': 1.550153136253357} +02/25/2022 07:03:36 - INFO - codeparrot_training - Step 22762: {'lr': 0.0003025859476214683, 'samples': 11654656, 'steps': 22762, 'loss/train': 0.6863503456115723} +02/25/2022 07:03:41 - INFO - codeparrot_training - Step 22763: {'lr': 0.00030256995111661275, 'samples': 11655168, 'steps': 22763, 'loss/train': 1.0480544567108154} +02/25/2022 07:03:45 - INFO - codeparrot_training - Step 22764: {'lr': 0.0003025539543865642, 'samples': 11655680, 'steps': 22764, 'loss/train': 2.2686541080474854} +02/25/2022 07:03:50 - INFO - codeparrot_training - Step 22765: {'lr': 0.00030253795743139113, 'samples': 11656192, 'steps': 22765, 'loss/train': 2.4163551330566406} +02/25/2022 07:03:54 - INFO - codeparrot_training - Step 22766: {'lr': 0.0003025219602511622, 'samples': 11656704, 'steps': 22766, 'loss/train': 2.0099222660064697} +02/25/2022 07:04:01 - INFO - codeparrot_training - Step 22767: {'lr': 0.00030250596284594583, 'samples': 11657216, 'steps': 22767, 'loss/train': 1.368360996246338} +02/25/2022 07:04:05 - INFO - codeparrot_training - Step 22768: {'lr': 0.0003024899652158107, 'samples': 11657728, 'steps': 22768, 'loss/train': 2.145542621612549} +02/25/2022 07:04:10 - INFO - codeparrot_training - Step 22769: {'lr': 0.00030247396736082506, 'samples': 11658240, 'steps': 22769, 'loss/train': 1.8363025188446045} +02/25/2022 07:04:14 - INFO - codeparrot_training - Step 22770: {'lr': 0.0003024579692810577, 'samples': 11658752, 'steps': 22770, 'loss/train': 2.3849079608917236} +02/25/2022 07:04:19 - INFO - codeparrot_training - Step 22771: {'lr': 0.00030244197097657705, 'samples': 11659264, 'steps': 22771, 'loss/train': 1.414366602897644} +02/25/2022 07:04:23 - INFO - codeparrot_training - Step 22772: {'lr': 0.0003024259724474516, 'samples': 11659776, 'steps': 22772, 'loss/train': 3.4421193599700928} +02/25/2022 07:04:28 - INFO - codeparrot_training - Step 22773: {'lr': 0.00030240997369375, 'samples': 11660288, 'steps': 22773, 'loss/train': 1.5428099632263184} +02/25/2022 07:04:32 - INFO - codeparrot_training - Step 22774: {'lr': 0.0003023939747155406, 'samples': 11660800, 'steps': 22774, 'loss/train': 0.8700119853019714} +02/25/2022 07:04:37 - INFO - codeparrot_training - Step 22775: {'lr': 0.00030237797551289225, 'samples': 11661312, 'steps': 22775, 'loss/train': 2.581195831298828} +02/25/2022 07:04:41 - INFO - codeparrot_training - Step 22776: {'lr': 0.0003023619760858731, 'samples': 11661824, 'steps': 22776, 'loss/train': 2.4814250469207764} +02/25/2022 07:04:48 - INFO - codeparrot_training - Step 22777: {'lr': 0.000302345976434552, 'samples': 11662336, 'steps': 22777, 'loss/train': 1.3026471138000488} +02/25/2022 07:04:51 - INFO - codeparrot_training - Step 22778: {'lr': 0.0003023299765589973, 'samples': 11662848, 'steps': 22778, 'loss/train': 1.9169707298278809} +02/25/2022 07:04:57 - INFO - codeparrot_training - Step 22779: {'lr': 0.0003023139764592776, 'samples': 11663360, 'steps': 22779, 'loss/train': 3.4005424976348877} +02/25/2022 07:05:00 - INFO - codeparrot_training - Step 22780: {'lr': 0.0003022979761354614, 'samples': 11663872, 'steps': 22780, 'loss/train': 1.7546783685684204} +02/25/2022 07:05:06 - INFO - codeparrot_training - Step 22781: {'lr': 0.00030228197558761737, 'samples': 11664384, 'steps': 22781, 'loss/train': 2.767155885696411} +02/25/2022 07:05:09 - INFO - codeparrot_training - Step 22782: {'lr': 0.00030226597481581387, 'samples': 11664896, 'steps': 22782, 'loss/train': 1.376948356628418} +02/25/2022 07:05:15 - INFO - codeparrot_training - Step 22783: {'lr': 0.0003022499738201195, 'samples': 11665408, 'steps': 22783, 'loss/train': 8.588897705078125} +02/25/2022 07:05:18 - INFO - codeparrot_training - Step 22784: {'lr': 0.0003022339726006029, 'samples': 11665920, 'steps': 22784, 'loss/train': 2.3826470375061035} +02/25/2022 07:05:24 - INFO - codeparrot_training - Step 22785: {'lr': 0.0003022179711573326, 'samples': 11666432, 'steps': 22785, 'loss/train': 2.0467541217803955} +02/25/2022 07:05:28 - INFO - codeparrot_training - Step 22786: {'lr': 0.000302201969490377, 'samples': 11666944, 'steps': 22786, 'loss/train': 2.019573211669922} +02/25/2022 07:05:31 - INFO - codeparrot_training - Step 22787: {'lr': 0.00030218596759980476, 'samples': 11667456, 'steps': 22787, 'loss/train': 2.2680153846740723} +02/25/2022 07:05:39 - INFO - codeparrot_training - Step 22788: {'lr': 0.00030216996548568443, 'samples': 11667968, 'steps': 22788, 'loss/train': 1.7450085878372192} +02/25/2022 07:05:43 - INFO - codeparrot_training - Step 22789: {'lr': 0.0003021539631480845, 'samples': 11668480, 'steps': 22789, 'loss/train': 2.415402889251709} +02/25/2022 07:05:46 - INFO - codeparrot_training - Step 22790: {'lr': 0.0003021379605870736, 'samples': 11668992, 'steps': 22790, 'loss/train': 2.3866066932678223} +02/25/2022 07:05:51 - INFO - codeparrot_training - Step 22791: {'lr': 0.00030212195780272025, 'samples': 11669504, 'steps': 22791, 'loss/train': 2.0148651599884033} +02/25/2022 07:05:55 - INFO - codeparrot_training - Step 22792: {'lr': 0.000302105954795093, 'samples': 11670016, 'steps': 22792, 'loss/train': 1.82499361038208} +02/25/2022 07:06:00 - INFO - codeparrot_training - Step 22793: {'lr': 0.00030208995156426024, 'samples': 11670528, 'steps': 22793, 'loss/train': 2.2363693714141846} +02/25/2022 07:06:04 - INFO - codeparrot_training - Step 22794: {'lr': 0.0003020739481102908, 'samples': 11671040, 'steps': 22794, 'loss/train': 2.268592119216919} +02/25/2022 07:06:09 - INFO - codeparrot_training - Step 22795: {'lr': 0.00030205794443325296, 'samples': 11671552, 'steps': 22795, 'loss/train': 1.5524967908859253} +02/25/2022 07:06:13 - INFO - codeparrot_training - Step 22796: {'lr': 0.00030204194053321556, 'samples': 11672064, 'steps': 22796, 'loss/train': 1.9256868362426758} +02/25/2022 07:06:18 - INFO - codeparrot_training - Step 22797: {'lr': 0.00030202593641024696, 'samples': 11672576, 'steps': 22797, 'loss/train': 1.6537578105926514} +02/25/2022 07:06:22 - INFO - codeparrot_training - Step 22798: {'lr': 0.0003020099320644157, 'samples': 11673088, 'steps': 22798, 'loss/train': 0.9273232817649841} +02/25/2022 07:06:29 - INFO - codeparrot_training - Step 22799: {'lr': 0.00030199392749579053, 'samples': 11673600, 'steps': 22799, 'loss/train': 2.043025255203247} +02/25/2022 07:06:33 - INFO - codeparrot_training - Step 22800: {'lr': 0.0003019779227044398, 'samples': 11674112, 'steps': 22800, 'loss/train': 2.1142122745513916} +02/25/2022 07:06:38 - INFO - codeparrot_training - Step 22801: {'lr': 0.0003019619176904322, 'samples': 11674624, 'steps': 22801, 'loss/train': 1.5707728862762451} +02/25/2022 07:06:42 - INFO - codeparrot_training - Step 22802: {'lr': 0.00030194591245383625, 'samples': 11675136, 'steps': 22802, 'loss/train': 1.275285005569458} +02/25/2022 07:06:47 - INFO - codeparrot_training - Step 22803: {'lr': 0.00030192990699472053, 'samples': 11675648, 'steps': 22803, 'loss/train': 1.4428529739379883} +02/25/2022 07:06:51 - INFO - codeparrot_training - Step 22804: {'lr': 0.00030191390131315357, 'samples': 11676160, 'steps': 22804, 'loss/train': 1.102246880531311} +02/25/2022 07:06:56 - INFO - codeparrot_training - Step 22805: {'lr': 0.0003018978954092039, 'samples': 11676672, 'steps': 22805, 'loss/train': 1.888580560684204} +02/25/2022 07:07:02 - INFO - codeparrot_training - Step 22806: {'lr': 0.0003018818892829401, 'samples': 11677184, 'steps': 22806, 'loss/train': 1.2770750522613525} +02/25/2022 07:07:05 - INFO - codeparrot_training - Step 22807: {'lr': 0.00030186588293443077, 'samples': 11677696, 'steps': 22807, 'loss/train': 1.3058511018753052} +02/25/2022 07:07:11 - INFO - codeparrot_training - Step 22808: {'lr': 0.0003018498763637445, 'samples': 11678208, 'steps': 22808, 'loss/train': 1.4961966276168823} +02/25/2022 07:07:14 - INFO - codeparrot_training - Step 22809: {'lr': 0.0003018338695709498, 'samples': 11678720, 'steps': 22809, 'loss/train': 1.1272844076156616} +02/25/2022 07:07:18 - INFO - codeparrot_training - Step 22810: {'lr': 0.0003018178625561153, 'samples': 11679232, 'steps': 22810, 'loss/train': 0.9606180787086487} +02/25/2022 07:07:24 - INFO - codeparrot_training - Step 22811: {'lr': 0.0003018018553193095, 'samples': 11679744, 'steps': 22811, 'loss/train': 1.904482126235962} +02/25/2022 07:07:27 - INFO - codeparrot_training - Step 22812: {'lr': 0.00030178584786060106, 'samples': 11680256, 'steps': 22812, 'loss/train': 0.4109419584274292} +02/25/2022 07:07:33 - INFO - codeparrot_training - Step 22813: {'lr': 0.00030176984018005836, 'samples': 11680768, 'steps': 22813, 'loss/train': 0.9216345548629761} +02/25/2022 07:07:36 - INFO - codeparrot_training - Step 22814: {'lr': 0.0003017538322777502, 'samples': 11681280, 'steps': 22814, 'loss/train': 1.3572323322296143} +02/25/2022 07:07:43 - INFO - codeparrot_training - Step 22815: {'lr': 0.00030173782415374503, 'samples': 11681792, 'steps': 22815, 'loss/train': 1.9806339740753174} +02/25/2022 07:07:47 - INFO - codeparrot_training - Step 22816: {'lr': 0.00030172181580811146, 'samples': 11682304, 'steps': 22816, 'loss/train': 3.126826763153076} +02/25/2022 07:07:52 - INFO - codeparrot_training - Step 22817: {'lr': 0.0003017058072409181, 'samples': 11682816, 'steps': 22817, 'loss/train': 1.5081586837768555} +02/25/2022 07:07:56 - INFO - codeparrot_training - Step 22818: {'lr': 0.0003016897984522334, 'samples': 11683328, 'steps': 22818, 'loss/train': 1.6155692338943481} +02/25/2022 07:08:01 - INFO - codeparrot_training - Step 22819: {'lr': 0.00030167378944212606, 'samples': 11683840, 'steps': 22819, 'loss/train': 3.465468406677246} +02/25/2022 07:08:05 - INFO - codeparrot_training - Step 22820: {'lr': 0.0003016577802106645, 'samples': 11684352, 'steps': 22820, 'loss/train': 2.298074722290039} +02/25/2022 07:08:10 - INFO - codeparrot_training - Step 22821: {'lr': 0.0003016417707579176, 'samples': 11684864, 'steps': 22821, 'loss/train': 1.7258412837982178} +02/25/2022 07:08:14 - INFO - codeparrot_training - Step 22822: {'lr': 0.00030162576108395364, 'samples': 11685376, 'steps': 22822, 'loss/train': 2.8233084678649902} +02/25/2022 07:08:21 - INFO - codeparrot_training - Step 22823: {'lr': 0.0003016097511888414, 'samples': 11685888, 'steps': 22823, 'loss/train': 2.577441930770874} +02/25/2022 07:08:25 - INFO - codeparrot_training - Step 22824: {'lr': 0.0003015937410726493, 'samples': 11686400, 'steps': 22824, 'loss/train': 2.5049593448638916} +02/25/2022 07:08:30 - INFO - codeparrot_training - Step 22825: {'lr': 0.000301577730735446, 'samples': 11686912, 'steps': 22825, 'loss/train': 2.476512908935547} +02/25/2022 07:08:34 - INFO - codeparrot_training - Step 22826: {'lr': 0.00030156172017730006, 'samples': 11687424, 'steps': 22826, 'loss/train': 0.84358811378479} +02/25/2022 07:08:39 - INFO - codeparrot_training - Step 22827: {'lr': 0.00030154570939828015, 'samples': 11687936, 'steps': 22827, 'loss/train': 2.8828892707824707} +02/25/2022 07:08:43 - INFO - codeparrot_training - Step 22828: {'lr': 0.0003015296983984547, 'samples': 11688448, 'steps': 22828, 'loss/train': 5.055017471313477} +02/25/2022 07:08:48 - INFO - codeparrot_training - Step 22829: {'lr': 0.00030151368717789244, 'samples': 11688960, 'steps': 22829, 'loss/train': 1.475337028503418} +02/25/2022 07:08:52 - INFO - codeparrot_training - Step 22830: {'lr': 0.0003014976757366619, 'samples': 11689472, 'steps': 22830, 'loss/train': 2.0757195949554443} +02/25/2022 07:08:57 - INFO - codeparrot_training - Step 22831: {'lr': 0.0003014816640748316, 'samples': 11689984, 'steps': 22831, 'loss/train': 2.0199716091156006} +02/25/2022 07:09:01 - INFO - codeparrot_training - Step 22832: {'lr': 0.00030146565219247033, 'samples': 11690496, 'steps': 22832, 'loss/train': 1.2075138092041016} +02/25/2022 07:09:06 - INFO - codeparrot_training - Step 22833: {'lr': 0.0003014496400896465, 'samples': 11691008, 'steps': 22833, 'loss/train': 3.0877788066864014} +02/25/2022 07:09:10 - INFO - codeparrot_training - Step 22834: {'lr': 0.0003014336277664287, 'samples': 11691520, 'steps': 22834, 'loss/train': 2.5128939151763916} +02/25/2022 07:09:17 - INFO - codeparrot_training - Step 22835: {'lr': 0.0003014176152228856, 'samples': 11692032, 'steps': 22835, 'loss/train': 1.7883402109146118} +02/25/2022 07:09:21 - INFO - codeparrot_training - Step 22836: {'lr': 0.00030140160245908584, 'samples': 11692544, 'steps': 22836, 'loss/train': 1.8974905014038086} +02/25/2022 07:09:26 - INFO - codeparrot_training - Step 22837: {'lr': 0.0003013855894750978, 'samples': 11693056, 'steps': 22837, 'loss/train': 2.3971943855285645} +02/25/2022 07:09:30 - INFO - codeparrot_training - Step 22838: {'lr': 0.0003013695762709903, 'samples': 11693568, 'steps': 22838, 'loss/train': 2.310260057449341} +02/25/2022 07:09:35 - INFO - codeparrot_training - Step 22839: {'lr': 0.0003013535628468318, 'samples': 11694080, 'steps': 22839, 'loss/train': 3.591212034225464} +02/25/2022 07:09:39 - INFO - codeparrot_training - Step 22840: {'lr': 0.000301337549202691, 'samples': 11694592, 'steps': 22840, 'loss/train': 2.19614315032959} +02/25/2022 07:09:44 - INFO - codeparrot_training - Step 22841: {'lr': 0.0003013215353386364, 'samples': 11695104, 'steps': 22841, 'loss/train': 1.233171820640564} +02/25/2022 07:09:48 - INFO - codeparrot_training - Step 22842: {'lr': 0.00030130552125473667, 'samples': 11695616, 'steps': 22842, 'loss/train': 0.5252600908279419} +02/25/2022 07:09:53 - INFO - codeparrot_training - Step 22843: {'lr': 0.00030128950695106034, 'samples': 11696128, 'steps': 22843, 'loss/train': 2.377997875213623} +02/25/2022 07:09:57 - INFO - codeparrot_training - Step 22844: {'lr': 0.00030127349242767607, 'samples': 11696640, 'steps': 22844, 'loss/train': 1.7373830080032349} +02/25/2022 07:10:04 - INFO - codeparrot_training - Step 22845: {'lr': 0.0003012574776846524, 'samples': 11697152, 'steps': 22845, 'loss/train': 0.16632595658302307} +02/25/2022 07:10:07 - INFO - codeparrot_training - Step 22846: {'lr': 0.00030124146272205804, 'samples': 11697664, 'steps': 22846, 'loss/train': 1.4264466762542725} +02/25/2022 07:10:13 - INFO - codeparrot_training - Step 22847: {'lr': 0.00030122544753996143, 'samples': 11698176, 'steps': 22847, 'loss/train': 2.196805238723755} +02/25/2022 07:10:16 - INFO - codeparrot_training - Step 22848: {'lr': 0.00030120943213843136, 'samples': 11698688, 'steps': 22848, 'loss/train': 1.886110782623291} +02/25/2022 07:10:22 - INFO - codeparrot_training - Step 22849: {'lr': 0.0003011934165175363, 'samples': 11699200, 'steps': 22849, 'loss/train': 2.2160489559173584} +02/25/2022 07:10:26 - INFO - codeparrot_training - Step 22850: {'lr': 0.00030117740067734495, 'samples': 11699712, 'steps': 22850, 'loss/train': 1.6635175943374634} +02/25/2022 07:10:31 - INFO - codeparrot_training - Step 22851: {'lr': 0.0003011613846179258, 'samples': 11700224, 'steps': 22851, 'loss/train': 1.7605781555175781} +02/25/2022 07:10:35 - INFO - codeparrot_training - Step 22852: {'lr': 0.0003011453683393476, 'samples': 11700736, 'steps': 22852, 'loss/train': 0.5744600296020508} +02/25/2022 07:10:40 - INFO - codeparrot_training - Step 22853: {'lr': 0.0003011293518416788, 'samples': 11701248, 'steps': 22853, 'loss/train': 0.6563553810119629} +02/25/2022 07:10:44 - INFO - codeparrot_training - Step 22854: {'lr': 0.00030111333512498813, 'samples': 11701760, 'steps': 22854, 'loss/train': 2.2709381580352783} +02/25/2022 07:10:51 - INFO - codeparrot_training - Step 22855: {'lr': 0.00030109731818934413, 'samples': 11702272, 'steps': 22855, 'loss/train': 1.6041088104248047} +02/25/2022 07:10:54 - INFO - codeparrot_training - Step 22856: {'lr': 0.00030108130103481554, 'samples': 11702784, 'steps': 22856, 'loss/train': 1.9734938144683838} +02/25/2022 07:11:00 - INFO - codeparrot_training - Step 22857: {'lr': 0.0003010652836614707, 'samples': 11703296, 'steps': 22857, 'loss/train': 2.181708812713623} +02/25/2022 07:11:03 - INFO - codeparrot_training - Step 22858: {'lr': 0.00030104926606937856, 'samples': 11703808, 'steps': 22858, 'loss/train': 1.8034135103225708} +02/25/2022 07:11:09 - INFO - codeparrot_training - Step 22859: {'lr': 0.0003010332482586075, 'samples': 11704320, 'steps': 22859, 'loss/train': 1.9789999723434448} +02/25/2022 07:11:13 - INFO - codeparrot_training - Step 22860: {'lr': 0.0003010172302292263, 'samples': 11704832, 'steps': 22860, 'loss/train': 2.2439792156219482} +02/25/2022 07:11:18 - INFO - codeparrot_training - Step 22861: {'lr': 0.00030100121198130335, 'samples': 11705344, 'steps': 22861, 'loss/train': 0.6301648616790771} +02/25/2022 07:11:22 - INFO - codeparrot_training - Step 22862: {'lr': 0.0003009851935149075, 'samples': 11705856, 'steps': 22862, 'loss/train': 1.5742487907409668} +02/25/2022 07:11:27 - INFO - codeparrot_training - Step 22863: {'lr': 0.0003009691748301072, 'samples': 11706368, 'steps': 22863, 'loss/train': 0.7274398803710938} +02/25/2022 07:11:31 - INFO - codeparrot_training - Step 22864: {'lr': 0.0003009531559269713, 'samples': 11706880, 'steps': 22864, 'loss/train': 3.308727502822876} +02/25/2022 07:11:36 - INFO - codeparrot_training - Step 22865: {'lr': 0.00030093713680556805, 'samples': 11707392, 'steps': 22865, 'loss/train': 0.9269284605979919} +02/25/2022 07:11:40 - INFO - codeparrot_training - Step 22866: {'lr': 0.0003009211174659664, 'samples': 11707904, 'steps': 22866, 'loss/train': 1.522827386856079} +02/25/2022 07:11:45 - INFO - codeparrot_training - Step 22867: {'lr': 0.00030090509790823476, 'samples': 11708416, 'steps': 22867, 'loss/train': 1.4396413564682007} +02/25/2022 07:11:49 - INFO - codeparrot_training - Step 22868: {'lr': 0.0003008890781324419, 'samples': 11708928, 'steps': 22868, 'loss/train': 2.7254207134246826} +02/25/2022 07:11:54 - INFO - codeparrot_training - Step 22869: {'lr': 0.0003008730581386564, 'samples': 11709440, 'steps': 22869, 'loss/train': 1.9003928899765015} +02/25/2022 07:11:58 - INFO - codeparrot_training - Step 22870: {'lr': 0.00030085703792694687, 'samples': 11709952, 'steps': 22870, 'loss/train': 1.429430365562439} +02/25/2022 07:12:05 - INFO - codeparrot_training - Step 22871: {'lr': 0.00030084101749738195, 'samples': 11710464, 'steps': 22871, 'loss/train': 1.5849227905273438} +02/25/2022 07:12:09 - INFO - codeparrot_training - Step 22872: {'lr': 0.00030082499685003025, 'samples': 11710976, 'steps': 22872, 'loss/train': 1.8664072751998901} +02/25/2022 07:12:14 - INFO - codeparrot_training - Step 22873: {'lr': 0.0003008089759849604, 'samples': 11711488, 'steps': 22873, 'loss/train': 2.0416150093078613} +02/25/2022 07:12:18 - INFO - codeparrot_training - Step 22874: {'lr': 0.000300792954902241, 'samples': 11712000, 'steps': 22874, 'loss/train': 1.8513036966323853} +02/25/2022 07:12:23 - INFO - codeparrot_training - Step 22875: {'lr': 0.00030077693360194076, 'samples': 11712512, 'steps': 22875, 'loss/train': 3.3205151557922363} +02/25/2022 07:12:27 - INFO - codeparrot_training - Step 22876: {'lr': 0.0003007609120841282, 'samples': 11713024, 'steps': 22876, 'loss/train': 2.2465617656707764} +02/25/2022 07:12:32 - INFO - codeparrot_training - Step 22877: {'lr': 0.0003007448903488721, 'samples': 11713536, 'steps': 22877, 'loss/train': 1.8106093406677246} +02/25/2022 07:12:36 - INFO - codeparrot_training - Step 22878: {'lr': 0.00030072886839624093, 'samples': 11714048, 'steps': 22878, 'loss/train': 1.3157438039779663} +02/25/2022 07:12:41 - INFO - codeparrot_training - Step 22879: {'lr': 0.0003007128462263034, 'samples': 11714560, 'steps': 22879, 'loss/train': 0.8539958000183105} +02/25/2022 07:12:45 - INFO - codeparrot_training - Step 22880: {'lr': 0.0003006968238391281, 'samples': 11715072, 'steps': 22880, 'loss/train': 3.133535623550415} +02/25/2022 07:12:52 - INFO - codeparrot_training - Step 22881: {'lr': 0.00030068080123478376, 'samples': 11715584, 'steps': 22881, 'loss/train': 0.6200644969940186} +02/25/2022 07:12:55 - INFO - codeparrot_training - Step 22882: {'lr': 0.000300664778413339, 'samples': 11716096, 'steps': 22882, 'loss/train': 2.0040032863616943} +02/25/2022 07:13:01 - INFO - codeparrot_training - Step 22883: {'lr': 0.00030064875537486236, 'samples': 11716608, 'steps': 22883, 'loss/train': 0.9974362254142761} +02/25/2022 07:13:04 - INFO - codeparrot_training - Step 22884: {'lr': 0.00030063273211942254, 'samples': 11717120, 'steps': 22884, 'loss/train': 1.7254163026809692} +02/25/2022 07:13:10 - INFO - codeparrot_training - Step 22885: {'lr': 0.0003006167086470882, 'samples': 11717632, 'steps': 22885, 'loss/train': 2.538034677505493} +02/25/2022 07:13:13 - INFO - codeparrot_training - Step 22886: {'lr': 0.00030060068495792793, 'samples': 11718144, 'steps': 22886, 'loss/train': 2.075181245803833} +02/25/2022 07:13:19 - INFO - codeparrot_training - Step 22887: {'lr': 0.0003005846610520104, 'samples': 11718656, 'steps': 22887, 'loss/train': 1.473665475845337} +02/25/2022 07:13:22 - INFO - codeparrot_training - Step 22888: {'lr': 0.00030056863692940426, 'samples': 11719168, 'steps': 22888, 'loss/train': 1.9633461236953735} +02/25/2022 07:13:28 - INFO - codeparrot_training - Step 22889: {'lr': 0.00030055261259017807, 'samples': 11719680, 'steps': 22889, 'loss/train': 0.8838751316070557} +02/25/2022 07:13:31 - INFO - codeparrot_training - Step 22890: {'lr': 0.00030053658803440064, 'samples': 11720192, 'steps': 22890, 'loss/train': 2.0789523124694824} +02/25/2022 07:13:39 - INFO - codeparrot_training - Step 22891: {'lr': 0.00030052056326214046, 'samples': 11720704, 'steps': 22891, 'loss/train': 1.9491651058197021} +02/25/2022 07:13:42 - INFO - codeparrot_training - Step 22892: {'lr': 0.00030050453827346627, 'samples': 11721216, 'steps': 22892, 'loss/train': 1.710429310798645} +02/25/2022 07:13:48 - INFO - codeparrot_training - Step 22893: {'lr': 0.0003004885130684467, 'samples': 11721728, 'steps': 22893, 'loss/train': 2.135662794113159} +02/25/2022 07:13:51 - INFO - codeparrot_training - Step 22894: {'lr': 0.00030047248764715023, 'samples': 11722240, 'steps': 22894, 'loss/train': 0.6546010971069336} +02/25/2022 07:13:57 - INFO - codeparrot_training - Step 22895: {'lr': 0.0003004564620096457, 'samples': 11722752, 'steps': 22895, 'loss/train': 2.801112174987793} +02/25/2022 07:14:00 - INFO - codeparrot_training - Step 22896: {'lr': 0.00030044043615600174, 'samples': 11723264, 'steps': 22896, 'loss/train': 1.4357532262802124} +02/25/2022 07:14:06 - INFO - codeparrot_training - Step 22897: {'lr': 0.000300424410086287, 'samples': 11723776, 'steps': 22897, 'loss/train': 2.0789871215820312} +02/25/2022 07:14:09 - INFO - codeparrot_training - Step 22898: {'lr': 0.00030040838380057005, 'samples': 11724288, 'steps': 22898, 'loss/train': 1.5147205591201782} +02/25/2022 07:14:15 - INFO - codeparrot_training - Step 22899: {'lr': 0.00030039235729891964, 'samples': 11724800, 'steps': 22899, 'loss/train': 2.2267744541168213} +02/25/2022 07:14:18 - INFO - codeparrot_training - Step 22900: {'lr': 0.0003003763305814043, 'samples': 11725312, 'steps': 22900, 'loss/train': 1.8106625080108643} +02/25/2022 07:14:24 - INFO - codeparrot_training - Step 22901: {'lr': 0.00030036030364809284, 'samples': 11725824, 'steps': 22901, 'loss/train': 1.4188302755355835} +02/25/2022 07:14:27 - INFO - codeparrot_training - Step 22902: {'lr': 0.00030034427649905377, 'samples': 11726336, 'steps': 22902, 'loss/train': 1.9424360990524292} +02/25/2022 07:14:33 - INFO - codeparrot_training - Step 22903: {'lr': 0.0003003282491343559, 'samples': 11726848, 'steps': 22903, 'loss/train': 2.2287771701812744} +02/25/2022 07:14:36 - INFO - codeparrot_training - Step 22904: {'lr': 0.00030031222155406763, 'samples': 11727360, 'steps': 22904, 'loss/train': 1.9816529750823975} +02/25/2022 07:14:42 - INFO - codeparrot_training - Step 22905: {'lr': 0.00030029619375825784, 'samples': 11727872, 'steps': 22905, 'loss/train': 1.6138992309570312} +02/25/2022 07:14:45 - INFO - codeparrot_training - Step 22906: {'lr': 0.00030028016574699517, 'samples': 11728384, 'steps': 22906, 'loss/train': 2.4598488807678223} +02/25/2022 07:14:53 - INFO - codeparrot_training - Step 22907: {'lr': 0.0003002641375203482, 'samples': 11728896, 'steps': 22907, 'loss/train': 2.3908636569976807} +02/25/2022 07:14:56 - INFO - codeparrot_training - Step 22908: {'lr': 0.0003002481090783856, 'samples': 11729408, 'steps': 22908, 'loss/train': 1.5112683773040771} +02/25/2022 07:15:02 - INFO - codeparrot_training - Step 22909: {'lr': 0.0003002320804211761, 'samples': 11729920, 'steps': 22909, 'loss/train': 1.861320972442627} +02/25/2022 07:15:05 - INFO - codeparrot_training - Step 22910: {'lr': 0.00030021605154878836, 'samples': 11730432, 'steps': 22910, 'loss/train': 1.053736686706543} +02/25/2022 07:15:10 - INFO - codeparrot_training - Step 22911: {'lr': 0.0003002000224612909, 'samples': 11730944, 'steps': 22911, 'loss/train': 1.0247892141342163} +02/25/2022 07:15:14 - INFO - codeparrot_training - Step 22912: {'lr': 0.0003001839931587526, 'samples': 11731456, 'steps': 22912, 'loss/train': 1.086834192276001} +02/25/2022 07:15:20 - INFO - codeparrot_training - Step 22913: {'lr': 0.0003001679636412419, 'samples': 11731968, 'steps': 22913, 'loss/train': 0.8913392424583435} +02/25/2022 07:15:23 - INFO - codeparrot_training - Step 22914: {'lr': 0.0003001519339088277, 'samples': 11732480, 'steps': 22914, 'loss/train': 1.4678913354873657} +02/25/2022 07:15:29 - INFO - codeparrot_training - Step 22915: {'lr': 0.00030013590396157843, 'samples': 11732992, 'steps': 22915, 'loss/train': 2.1191389560699463} +02/25/2022 07:15:33 - INFO - codeparrot_training - Step 22916: {'lr': 0.0003001198737995628, 'samples': 11733504, 'steps': 22916, 'loss/train': 2.638381004333496} +02/25/2022 07:15:36 - INFO - codeparrot_training - Step 22917: {'lr': 0.0003001038434228497, 'samples': 11734016, 'steps': 22917, 'loss/train': 2.4855711460113525} +02/25/2022 07:15:43 - INFO - codeparrot_training - Step 22918: {'lr': 0.00030008781283150755, 'samples': 11734528, 'steps': 22918, 'loss/train': 2.275350570678711} +02/25/2022 07:15:47 - INFO - codeparrot_training - Step 22919: {'lr': 0.0003000717820256052, 'samples': 11735040, 'steps': 22919, 'loss/train': 1.6692391633987427} +02/25/2022 07:15:52 - INFO - codeparrot_training - Step 22920: {'lr': 0.00030005575100521117, 'samples': 11735552, 'steps': 22920, 'loss/train': 2.2804603576660156} +02/25/2022 07:15:56 - INFO - codeparrot_training - Step 22921: {'lr': 0.0003000397197703942, 'samples': 11736064, 'steps': 22921, 'loss/train': 2.3221590518951416} +02/25/2022 07:16:02 - INFO - codeparrot_training - Step 22922: {'lr': 0.00030002368832122295, 'samples': 11736576, 'steps': 22922, 'loss/train': 2.876267433166504} +02/25/2022 07:16:05 - INFO - codeparrot_training - Step 22923: {'lr': 0.00030000765665776617, 'samples': 11737088, 'steps': 22923, 'loss/train': 8.902372360229492} +02/25/2022 07:16:11 - INFO - codeparrot_training - Step 22924: {'lr': 0.0002999916247800924, 'samples': 11737600, 'steps': 22924, 'loss/train': 2.0848114490509033} +02/25/2022 07:16:16 - INFO - codeparrot_training - Step 22925: {'lr': 0.00029997559268827044, 'samples': 11738112, 'steps': 22925, 'loss/train': 2.377110719680786} +02/25/2022 07:16:20 - INFO - codeparrot_training - Step 22926: {'lr': 0.0002999595603823689, 'samples': 11738624, 'steps': 22926, 'loss/train': 0.3089900016784668} +02/25/2022 07:16:26 - INFO - codeparrot_training - Step 22927: {'lr': 0.00029994352786245643, 'samples': 11739136, 'steps': 22927, 'loss/train': 1.363265872001648} +02/25/2022 07:16:29 - INFO - codeparrot_training - Step 22928: {'lr': 0.0002999274951286017, 'samples': 11739648, 'steps': 22928, 'loss/train': 1.5553009510040283} +02/25/2022 07:16:35 - INFO - codeparrot_training - Step 22929: {'lr': 0.0002999114621808735, 'samples': 11740160, 'steps': 22929, 'loss/train': 1.5626431703567505} +02/25/2022 07:16:38 - INFO - codeparrot_training - Step 22930: {'lr': 0.0002998954290193405, 'samples': 11740672, 'steps': 22930, 'loss/train': 1.7593611478805542} +02/25/2022 07:16:43 - INFO - codeparrot_training - Step 22931: {'lr': 0.00029987939564407124, 'samples': 11741184, 'steps': 22931, 'loss/train': 1.8623770475387573} +02/25/2022 07:16:47 - INFO - codeparrot_training - Step 22932: {'lr': 0.00029986336205513456, 'samples': 11741696, 'steps': 22932, 'loss/train': 1.893572211265564} +02/25/2022 07:16:53 - INFO - codeparrot_training - Step 22933: {'lr': 0.00029984732825259904, 'samples': 11742208, 'steps': 22933, 'loss/train': 0.6987547874450684} +02/25/2022 07:16:56 - INFO - codeparrot_training - Step 22934: {'lr': 0.00029983129423653333, 'samples': 11742720, 'steps': 22934, 'loss/train': 1.8675495386123657} +02/25/2022 07:17:02 - INFO - codeparrot_training - Step 22935: {'lr': 0.00029981526000700626, 'samples': 11743232, 'steps': 22935, 'loss/train': 2.0508322715759277} +02/25/2022 07:17:05 - INFO - codeparrot_training - Step 22936: {'lr': 0.0002997992255640864, 'samples': 11743744, 'steps': 22936, 'loss/train': 3.23275089263916} +02/25/2022 07:17:10 - INFO - codeparrot_training - Step 22937: {'lr': 0.0002997831909078425, 'samples': 11744256, 'steps': 22937, 'loss/train': 1.193077564239502} +02/25/2022 07:17:14 - INFO - codeparrot_training - Step 22938: {'lr': 0.00029976715603834315, 'samples': 11744768, 'steps': 22938, 'loss/train': 2.0761213302612305} +02/25/2022 07:17:20 - INFO - codeparrot_training - Step 22939: {'lr': 0.00029975112095565723, 'samples': 11745280, 'steps': 22939, 'loss/train': 0.33442795276641846} +02/25/2022 07:17:24 - INFO - codeparrot_training - Step 22940: {'lr': 0.00029973508565985316, 'samples': 11745792, 'steps': 22940, 'loss/train': 1.3022972345352173} +02/25/2022 07:17:29 - INFO - codeparrot_training - Step 22941: {'lr': 0.0002997190501509999, 'samples': 11746304, 'steps': 22941, 'loss/train': 2.723212480545044} +02/25/2022 07:17:33 - INFO - codeparrot_training - Step 22942: {'lr': 0.00029970301442916594, 'samples': 11746816, 'steps': 22942, 'loss/train': 0.7927182912826538} +02/25/2022 07:17:38 - INFO - codeparrot_training - Step 22943: {'lr': 0.00029968697849442006, 'samples': 11747328, 'steps': 22943, 'loss/train': 1.8818817138671875} +02/25/2022 07:17:42 - INFO - codeparrot_training - Step 22944: {'lr': 0.0002996709423468309, 'samples': 11747840, 'steps': 22944, 'loss/train': 2.369805335998535} +02/25/2022 07:17:47 - INFO - codeparrot_training - Step 22945: {'lr': 0.00029965490598646727, 'samples': 11748352, 'steps': 22945, 'loss/train': 1.4444445371627808} +02/25/2022 07:17:51 - INFO - codeparrot_training - Step 22946: {'lr': 0.00029963886941339774, 'samples': 11748864, 'steps': 22946, 'loss/train': 2.2752997875213623} +02/25/2022 07:17:56 - INFO - codeparrot_training - Step 22947: {'lr': 0.0002996228326276911, 'samples': 11749376, 'steps': 22947, 'loss/train': 1.4901851415634155} +02/25/2022 07:18:00 - INFO - codeparrot_training - Step 22948: {'lr': 0.0002996067956294159, 'samples': 11749888, 'steps': 22948, 'loss/train': 1.6299219131469727} +02/25/2022 07:18:05 - INFO - codeparrot_training - Step 22949: {'lr': 0.0002995907584186411, 'samples': 11750400, 'steps': 22949, 'loss/train': 0.42957594990730286} +02/25/2022 07:18:09 - INFO - codeparrot_training - Step 22950: {'lr': 0.00029957472099543516, 'samples': 11750912, 'steps': 22950, 'loss/train': 3.1684324741363525} +02/25/2022 07:18:14 - INFO - codeparrot_training - Step 22951: {'lr': 0.00029955868335986686, 'samples': 11751424, 'steps': 22951, 'loss/train': 1.6765109300613403} +02/25/2022 07:18:18 - INFO - codeparrot_training - Step 22952: {'lr': 0.0002995426455120049, 'samples': 11751936, 'steps': 22952, 'loss/train': 2.0211105346679688} +02/25/2022 07:18:25 - INFO - codeparrot_training - Step 22953: {'lr': 0.0002995266074519179, 'samples': 11752448, 'steps': 22953, 'loss/train': 1.6748194694519043} +02/25/2022 07:18:28 - INFO - codeparrot_training - Step 22954: {'lr': 0.00029951056917967476, 'samples': 11752960, 'steps': 22954, 'loss/train': 3.1632049083709717} +02/25/2022 07:18:34 - INFO - codeparrot_training - Step 22955: {'lr': 0.000299494530695344, 'samples': 11753472, 'steps': 22955, 'loss/train': 1.021226406097412} +02/25/2022 07:18:37 - INFO - codeparrot_training - Step 22956: {'lr': 0.0002994784919989944, 'samples': 11753984, 'steps': 22956, 'loss/train': 1.8655604124069214} +02/25/2022 07:18:41 - INFO - codeparrot_training - Step 22957: {'lr': 0.00029946245309069464, 'samples': 11754496, 'steps': 22957, 'loss/train': 1.7605639696121216} +02/25/2022 07:18:46 - INFO - codeparrot_training - Step 22958: {'lr': 0.0002994464139705135, 'samples': 11755008, 'steps': 22958, 'loss/train': 1.7276771068572998} +02/25/2022 07:18:50 - INFO - codeparrot_training - Step 22959: {'lr': 0.00029943037463851953, 'samples': 11755520, 'steps': 22959, 'loss/train': 1.2175084352493286} +02/25/2022 07:18:55 - INFO - codeparrot_training - Step 22960: {'lr': 0.00029941433509478153, 'samples': 11756032, 'steps': 22960, 'loss/train': 1.2298845052719116} +02/25/2022 07:18:59 - INFO - codeparrot_training - Step 22961: {'lr': 0.00029939829533936823, 'samples': 11756544, 'steps': 22961, 'loss/train': 1.3954131603240967} +02/25/2022 07:19:04 - INFO - codeparrot_training - Step 22962: {'lr': 0.00029938225537234843, 'samples': 11757056, 'steps': 22962, 'loss/train': 0.8541293740272522} +02/25/2022 07:19:08 - INFO - codeparrot_training - Step 22963: {'lr': 0.0002993662151937906, 'samples': 11757568, 'steps': 22963, 'loss/train': 1.5400323867797852} +02/25/2022 07:19:14 - INFO - codeparrot_training - Step 22964: {'lr': 0.00029935017480376357, 'samples': 11758080, 'steps': 22964, 'loss/train': 1.5345503091812134} +02/25/2022 07:19:18 - INFO - codeparrot_training - Step 22965: {'lr': 0.00029933413420233615, 'samples': 11758592, 'steps': 22965, 'loss/train': 2.783712863922119} +02/25/2022 07:19:23 - INFO - codeparrot_training - Step 22966: {'lr': 0.0002993180933895769, 'samples': 11759104, 'steps': 22966, 'loss/train': 1.6588160991668701} +02/25/2022 07:19:29 - INFO - codeparrot_training - Step 22967: {'lr': 0.00029930205236555464, 'samples': 11759616, 'steps': 22967, 'loss/train': 2.9335856437683105} +02/25/2022 07:19:33 - INFO - codeparrot_training - Step 22968: {'lr': 0.000299286011130338, 'samples': 11760128, 'steps': 22968, 'loss/train': 2.218116283416748} +02/25/2022 07:19:36 - INFO - codeparrot_training - Step 22969: {'lr': 0.00029926996968399576, 'samples': 11760640, 'steps': 22969, 'loss/train': 2.2955267429351807} +02/25/2022 07:19:42 - INFO - codeparrot_training - Step 22970: {'lr': 0.0002992539280265966, 'samples': 11761152, 'steps': 22970, 'loss/train': 2.4329981803894043} +02/25/2022 07:19:46 - INFO - codeparrot_training - Step 22971: {'lr': 0.00029923788615820936, 'samples': 11761664, 'steps': 22971, 'loss/train': 2.0191218852996826} +02/25/2022 07:19:51 - INFO - codeparrot_training - Step 22972: {'lr': 0.0002992218440789025, 'samples': 11762176, 'steps': 22972, 'loss/train': 2.4375100135803223} +02/25/2022 07:19:54 - INFO - codeparrot_training - Step 22973: {'lr': 0.00029920580178874497, 'samples': 11762688, 'steps': 22973, 'loss/train': 1.9143273830413818} +02/25/2022 07:20:01 - INFO - codeparrot_training - Step 22974: {'lr': 0.00029918975928780537, 'samples': 11763200, 'steps': 22974, 'loss/train': 1.580680251121521} +02/25/2022 07:20:04 - INFO - codeparrot_training - Step 22975: {'lr': 0.0002991737165761525, 'samples': 11763712, 'steps': 22975, 'loss/train': 2.1008057594299316} +02/25/2022 07:20:10 - INFO - codeparrot_training - Step 22976: {'lr': 0.000299157673653855, 'samples': 11764224, 'steps': 22976, 'loss/train': 2.170525550842285} +02/25/2022 07:20:13 - INFO - codeparrot_training - Step 22977: {'lr': 0.0002991416305209817, 'samples': 11764736, 'steps': 22977, 'loss/train': 2.5588033199310303} +02/25/2022 07:20:19 - INFO - codeparrot_training - Step 22978: {'lr': 0.0002991255871776012, 'samples': 11765248, 'steps': 22978, 'loss/train': 1.8432810306549072} +02/25/2022 07:20:22 - INFO - codeparrot_training - Step 22979: {'lr': 0.0002991095436237823, 'samples': 11765760, 'steps': 22979, 'loss/train': 2.135591745376587} +02/25/2022 07:20:28 - INFO - codeparrot_training - Step 22980: {'lr': 0.00029909349985959377, 'samples': 11766272, 'steps': 22980, 'loss/train': 2.1418495178222656} +02/25/2022 07:20:32 - INFO - codeparrot_training - Step 22981: {'lr': 0.00029907745588510416, 'samples': 11766784, 'steps': 22981, 'loss/train': 2.6582584381103516} +02/25/2022 07:20:37 - INFO - codeparrot_training - Step 22982: {'lr': 0.00029906141170038243, 'samples': 11767296, 'steps': 22982, 'loss/train': 0.7367690801620483} +02/25/2022 07:20:41 - INFO - codeparrot_training - Step 22983: {'lr': 0.00029904536730549706, 'samples': 11767808, 'steps': 22983, 'loss/train': 2.2438948154449463} +02/25/2022 07:20:47 - INFO - codeparrot_training - Step 22984: {'lr': 0.00029902932270051705, 'samples': 11768320, 'steps': 22984, 'loss/train': 1.4486254453659058} +02/25/2022 07:20:51 - INFO - codeparrot_training - Step 22985: {'lr': 0.00029901327788551087, 'samples': 11768832, 'steps': 22985, 'loss/train': 1.742058277130127} +02/25/2022 07:20:56 - INFO - codeparrot_training - Step 22986: {'lr': 0.0002989972328605475, 'samples': 11769344, 'steps': 22986, 'loss/train': 1.4603650569915771} +02/25/2022 07:21:00 - INFO - codeparrot_training - Step 22987: {'lr': 0.0002989811876256954, 'samples': 11769856, 'steps': 22987, 'loss/train': 2.547775983810425} +02/25/2022 07:21:05 - INFO - codeparrot_training - Step 22988: {'lr': 0.0002989651421810235, 'samples': 11770368, 'steps': 22988, 'loss/train': 2.5148606300354004} +02/25/2022 07:21:09 - INFO - codeparrot_training - Step 22989: {'lr': 0.00029894909652660053, 'samples': 11770880, 'steps': 22989, 'loss/train': 2.832615613937378} +02/25/2022 07:21:14 - INFO - codeparrot_training - Step 22990: {'lr': 0.0002989330506624951, 'samples': 11771392, 'steps': 22990, 'loss/train': 1.343993067741394} +02/25/2022 07:21:18 - INFO - codeparrot_training - Step 22991: {'lr': 0.0002989170045887761, 'samples': 11771904, 'steps': 22991, 'loss/train': 1.6383126974105835} +02/25/2022 07:21:24 - INFO - codeparrot_training - Step 22992: {'lr': 0.00029890095830551204, 'samples': 11772416, 'steps': 22992, 'loss/train': 1.9948525428771973} +02/25/2022 07:21:27 - INFO - codeparrot_training - Step 22993: {'lr': 0.00029888491181277195, 'samples': 11772928, 'steps': 22993, 'loss/train': 0.6399610638618469} +02/25/2022 07:21:33 - INFO - codeparrot_training - Step 22994: {'lr': 0.00029886886511062434, 'samples': 11773440, 'steps': 22994, 'loss/train': 2.6558916568756104} +02/25/2022 07:21:36 - INFO - codeparrot_training - Step 22995: {'lr': 0.0002988528181991381, 'samples': 11773952, 'steps': 22995, 'loss/train': 0.29033002257347107} +02/25/2022 07:21:42 - INFO - codeparrot_training - Step 22996: {'lr': 0.00029883677107838183, 'samples': 11774464, 'steps': 22996, 'loss/train': 1.6300299167633057} +02/25/2022 07:21:46 - INFO - codeparrot_training - Step 22997: {'lr': 0.0002988207237484244, 'samples': 11774976, 'steps': 22997, 'loss/train': 2.483008623123169} +02/25/2022 07:21:51 - INFO - codeparrot_training - Step 22998: {'lr': 0.0002988046762093344, 'samples': 11775488, 'steps': 22998, 'loss/train': 1.0141639709472656} +02/25/2022 07:21:55 - INFO - codeparrot_training - Step 22999: {'lr': 0.00029878862846118075, 'samples': 11776000, 'steps': 22999, 'loss/train': 0.8634439706802368} +02/25/2022 07:21:55 - INFO - codeparrot_training - Evaluating and saving model checkpoint