diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -7343,3 +7343,1009 @@ Use FP16 precision: False 02/24/2022 10:37:19 - INFO - codeparrot_training - Step 6998: {'lr': 0.00048674304942747626, 'samples': 3583488, 'steps': 6998, 'loss/train': 2.046218156814575} 02/24/2022 10:37:24 - INFO - codeparrot_training - Step 6999: {'lr': 0.0004867377914076811, 'samples': 3584000, 'steps': 6999, 'loss/train': 2.682992696762085} 02/24/2022 10:37:24 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/24/2022 10:37:41 - WARNING - huggingface_hub.repository - Several commits (7) will be pushed upstream. +02/24/2022 10:37:41 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/24/2022 10:38:15 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + 8b611e7..1deb887 floral-grass-11 -> floral-grass-11 + +02/24/2022 10:38:20 - INFO - codeparrot_training - Step 7000: {'lr': 0.00048673253237377644, 'samples': 3584512, 'steps': 7000, 'loss/train': 1.8298699855804443} +02/24/2022 10:38:25 - INFO - codeparrot_training - Step 7001: {'lr': 0.00048672727232578476, 'samples': 3585024, 'steps': 7001, 'loss/train': 1.4196652173995972} +02/24/2022 10:38:29 - INFO - codeparrot_training - Step 7002: {'lr': 0.0004867220112637286, 'samples': 3585536, 'steps': 7002, 'loss/train': 1.1515252590179443} +02/24/2022 10:38:36 - INFO - codeparrot_training - Step 7003: {'lr': 0.00048671674918763055, 'samples': 3586048, 'steps': 7003, 'loss/train': 1.9506548643112183} +02/24/2022 10:38:40 - INFO - codeparrot_training - Step 7004: {'lr': 0.00048671148609751307, 'samples': 3586560, 'steps': 7004, 'loss/train': 2.682981491088867} +02/24/2022 10:38:45 - INFO - codeparrot_training - Step 7005: {'lr': 0.0004867062219933988, 'samples': 3587072, 'steps': 7005, 'loss/train': 4.058497905731201} +02/24/2022 10:38:49 - INFO - codeparrot_training - Step 7006: {'lr': 0.00048670095687531023, 'samples': 3587584, 'steps': 7006, 'loss/train': 1.0610548257827759} +02/24/2022 10:38:54 - INFO - codeparrot_training - Step 7007: {'lr': 0.0004866956907432699, 'samples': 3588096, 'steps': 7007, 'loss/train': 1.5664478540420532} +02/24/2022 10:38:58 - INFO - codeparrot_training - Step 7008: {'lr': 0.00048669042359730043, 'samples': 3588608, 'steps': 7008, 'loss/train': 2.1107544898986816} +02/24/2022 10:39:03 - INFO - codeparrot_training - Step 7009: {'lr': 0.00048668515543742426, 'samples': 3589120, 'steps': 7009, 'loss/train': 2.958948850631714} +02/24/2022 10:39:07 - INFO - codeparrot_training - Step 7010: {'lr': 0.0004866798862636641, 'samples': 3589632, 'steps': 7010, 'loss/train': 3.252990961074829} +02/24/2022 10:39:12 - INFO - codeparrot_training - Step 7011: {'lr': 0.0004866746160760425, 'samples': 3590144, 'steps': 7011, 'loss/train': 1.740478515625} +02/24/2022 10:39:16 - INFO - codeparrot_training - Step 7012: {'lr': 0.0004866693448745819, 'samples': 3590656, 'steps': 7012, 'loss/train': 2.313049077987671} +02/24/2022 10:39:23 - INFO - codeparrot_training - Step 7013: {'lr': 0.000486664072659305, 'samples': 3591168, 'steps': 7013, 'loss/train': 6.489420413970947} +02/24/2022 10:39:29 - INFO - codeparrot_training - Step 7014: {'lr': 0.0004866587994302344, 'samples': 3591680, 'steps': 7014, 'loss/train': 2.7838709354400635} +02/24/2022 10:39:32 - INFO - codeparrot_training - Step 7015: {'lr': 0.0004866535251873926, 'samples': 3592192, 'steps': 7015, 'loss/train': 0.38497358560562134} +02/24/2022 10:39:38 - INFO - codeparrot_training - Step 7016: {'lr': 0.0004866482499308023, 'samples': 3592704, 'steps': 7016, 'loss/train': 2.105242967605591} +02/24/2022 10:39:41 - INFO - codeparrot_training - Step 7017: {'lr': 0.000486642973660486, 'samples': 3593216, 'steps': 7017, 'loss/train': 1.8313387632369995} +02/24/2022 10:39:47 - INFO - codeparrot_training - Step 7018: {'lr': 0.00048663769637646636, 'samples': 3593728, 'steps': 7018, 'loss/train': 1.5400100946426392} +02/24/2022 10:39:50 - INFO - codeparrot_training - Step 7019: {'lr': 0.000486632418078766, 'samples': 3594240, 'steps': 7019, 'loss/train': 2.302502393722534} +02/24/2022 10:39:56 - INFO - codeparrot_training - Step 7020: {'lr': 0.0004866271387674075, 'samples': 3594752, 'steps': 7020, 'loss/train': 1.95855712890625} +02/24/2022 10:39:59 - INFO - codeparrot_training - Step 7021: {'lr': 0.00048662185844241347, 'samples': 3595264, 'steps': 7021, 'loss/train': 2.7917532920837402} +02/24/2022 10:40:05 - INFO - codeparrot_training - Step 7022: {'lr': 0.00048661657710380647, 'samples': 3595776, 'steps': 7022, 'loss/train': 2.4886507987976074} +02/24/2022 10:40:08 - INFO - codeparrot_training - Step 7023: {'lr': 0.00048661129475160926, 'samples': 3596288, 'steps': 7023, 'loss/train': 0.8547494411468506} +02/24/2022 10:40:16 - INFO - codeparrot_training - Step 7024: {'lr': 0.00048660601138584436, 'samples': 3596800, 'steps': 7024, 'loss/train': 1.7446014881134033} +02/24/2022 10:40:19 - INFO - codeparrot_training - Step 7025: {'lr': 0.00048660072700653446, 'samples': 3597312, 'steps': 7025, 'loss/train': 2.086608409881592} +02/24/2022 10:40:23 - INFO - codeparrot_training - Step 7026: {'lr': 0.0004865954416137022, 'samples': 3597824, 'steps': 7026, 'loss/train': 1.7990435361862183} +02/24/2022 10:40:28 - INFO - codeparrot_training - Step 7027: {'lr': 0.0004865901552073701, 'samples': 3598336, 'steps': 7027, 'loss/train': 0.3683944642543793} +02/24/2022 10:40:34 - INFO - codeparrot_training - Step 7028: {'lr': 0.00048658486778756097, 'samples': 3598848, 'steps': 7028, 'loss/train': 2.3204288482666016} +02/24/2022 10:40:37 - INFO - codeparrot_training - Step 7029: {'lr': 0.00048657957935429734, 'samples': 3599360, 'steps': 7029, 'loss/train': 2.0324978828430176} +02/24/2022 10:40:43 - INFO - codeparrot_training - Step 7030: {'lr': 0.000486574289907602, 'samples': 3599872, 'steps': 7030, 'loss/train': 2.7280023097991943} +02/24/2022 10:40:46 - INFO - codeparrot_training - Step 7031: {'lr': 0.0004865689994474974, 'samples': 3600384, 'steps': 7031, 'loss/train': 2.8704378604888916} +02/24/2022 10:40:54 - INFO - codeparrot_training - Step 7032: {'lr': 0.00048656370797400643, 'samples': 3600896, 'steps': 7032, 'loss/train': 2.6442599296569824} +02/24/2022 10:40:57 - INFO - codeparrot_training - Step 7033: {'lr': 0.00048655841548715163, 'samples': 3601408, 'steps': 7033, 'loss/train': 2.491255760192871} +02/24/2022 10:41:01 - INFO - codeparrot_training - Step 7034: {'lr': 0.00048655312198695567, 'samples': 3601920, 'steps': 7034, 'loss/train': 3.174478769302368} +02/24/2022 10:41:06 - INFO - codeparrot_training - Step 7035: {'lr': 0.00048654782747344126, 'samples': 3602432, 'steps': 7035, 'loss/train': 2.215595245361328} +02/24/2022 10:41:12 - INFO - codeparrot_training - Step 7036: {'lr': 0.00048654253194663113, 'samples': 3602944, 'steps': 7036, 'loss/train': 1.9380426406860352} +02/24/2022 10:41:15 - INFO - codeparrot_training - Step 7037: {'lr': 0.0004865372354065478, 'samples': 3603456, 'steps': 7037, 'loss/train': 2.1176156997680664} +02/24/2022 10:41:21 - INFO - codeparrot_training - Step 7038: {'lr': 0.00048653193785321415, 'samples': 3603968, 'steps': 7038, 'loss/train': 3.197876453399658} +02/24/2022 10:41:24 - INFO - codeparrot_training - Step 7039: {'lr': 0.00048652663928665273, 'samples': 3604480, 'steps': 7039, 'loss/train': 2.3217735290527344} +02/24/2022 10:41:30 - INFO - codeparrot_training - Step 7040: {'lr': 0.00048652133970688633, 'samples': 3604992, 'steps': 7040, 'loss/train': 2.416092872619629} +02/24/2022 10:41:34 - INFO - codeparrot_training - Step 7041: {'lr': 0.0004865160391139376, 'samples': 3605504, 'steps': 7041, 'loss/train': 2.928356409072876} +02/24/2022 10:41:37 - INFO - codeparrot_training - Step 7042: {'lr': 0.0004865107375078293, 'samples': 3606016, 'steps': 7042, 'loss/train': 2.22717022895813} +02/24/2022 10:41:43 - INFO - codeparrot_training - Step 7043: {'lr': 0.000486505434888584, 'samples': 3606528, 'steps': 7043, 'loss/train': 1.0353965759277344} +02/24/2022 10:41:46 - INFO - codeparrot_training - Step 7044: {'lr': 0.0004865001312562246, 'samples': 3607040, 'steps': 7044, 'loss/train': 3.5584471225738525} +02/24/2022 10:41:54 - INFO - codeparrot_training - Step 7045: {'lr': 0.0004864948266107737, 'samples': 3607552, 'steps': 7045, 'loss/train': 2.675661325454712} +02/24/2022 10:41:57 - INFO - codeparrot_training - Step 7046: {'lr': 0.0004864895209522541, 'samples': 3608064, 'steps': 7046, 'loss/train': 2.382627248764038} +02/24/2022 10:42:03 - INFO - codeparrot_training - Step 7047: {'lr': 0.00048648421428068843, 'samples': 3608576, 'steps': 7047, 'loss/train': 1.7274190187454224} +02/24/2022 10:42:06 - INFO - codeparrot_training - Step 7048: {'lr': 0.0004864789065960995, 'samples': 3609088, 'steps': 7048, 'loss/train': 2.419377326965332} +02/24/2022 10:42:12 - INFO - codeparrot_training - Step 7049: {'lr': 0.00048647359789851, 'samples': 3609600, 'steps': 7049, 'loss/train': 3.056361198425293} +02/24/2022 10:42:15 - INFO - codeparrot_training - Step 7050: {'lr': 0.00048646828818794274, 'samples': 3610112, 'steps': 7050, 'loss/train': 2.801461696624756} +02/24/2022 10:42:21 - INFO - codeparrot_training - Step 7051: {'lr': 0.00048646297746442044, 'samples': 3610624, 'steps': 7051, 'loss/train': 1.286455750465393} +02/24/2022 10:42:24 - INFO - codeparrot_training - Step 7052: {'lr': 0.0004864576657279658, 'samples': 3611136, 'steps': 7052, 'loss/train': 3.068563461303711} +02/24/2022 10:42:30 - INFO - codeparrot_training - Step 7053: {'lr': 0.0004864523529786016, 'samples': 3611648, 'steps': 7053, 'loss/train': 3.2697532176971436} +02/24/2022 10:42:33 - INFO - codeparrot_training - Step 7054: {'lr': 0.0004864470392163506, 'samples': 3612160, 'steps': 7054, 'loss/train': 2.8305177688598633} +02/24/2022 10:42:39 - INFO - codeparrot_training - Step 7055: {'lr': 0.0004864417244412355, 'samples': 3612672, 'steps': 7055, 'loss/train': 1.4956492185592651} +02/24/2022 10:42:42 - INFO - codeparrot_training - Step 7056: {'lr': 0.0004864364086532792, 'samples': 3613184, 'steps': 7056, 'loss/train': 3.037086248397827} +02/24/2022 10:42:48 - INFO - codeparrot_training - Step 7057: {'lr': 0.00048643109185250445, 'samples': 3613696, 'steps': 7057, 'loss/train': 2.29018497467041} +02/24/2022 10:42:51 - INFO - codeparrot_training - Step 7058: {'lr': 0.0004864257740389338, 'samples': 3614208, 'steps': 7058, 'loss/train': 2.9040513038635254} +02/24/2022 10:42:57 - INFO - codeparrot_training - Step 7059: {'lr': 0.00048642045521259044, 'samples': 3614720, 'steps': 7059, 'loss/train': 2.4331135749816895} +02/24/2022 10:43:00 - INFO - codeparrot_training - Step 7060: {'lr': 0.0004864151353734968, 'samples': 3615232, 'steps': 7060, 'loss/train': 2.06575345993042} +02/24/2022 10:43:08 - INFO - codeparrot_training - Step 7061: {'lr': 0.0004864098145216758, 'samples': 3615744, 'steps': 7061, 'loss/train': 1.384238362312317} +02/24/2022 10:43:11 - INFO - codeparrot_training - Step 7062: {'lr': 0.0004864044926571503, 'samples': 3616256, 'steps': 7062, 'loss/train': 1.0450429916381836} +02/24/2022 10:43:17 - INFO - codeparrot_training - Step 7063: {'lr': 0.00048639916977994286, 'samples': 3616768, 'steps': 7063, 'loss/train': 2.0263302326202393} +02/24/2022 10:43:20 - INFO - codeparrot_training - Step 7064: {'lr': 0.0004863938458900765, 'samples': 3617280, 'steps': 7064, 'loss/train': 3.104705333709717} +02/24/2022 10:43:26 - INFO - codeparrot_training - Step 7065: {'lr': 0.000486388520987574, 'samples': 3617792, 'steps': 7065, 'loss/train': 2.1649348735809326} +02/24/2022 10:43:29 - INFO - codeparrot_training - Step 7066: {'lr': 0.0004863831950724582, 'samples': 3618304, 'steps': 7066, 'loss/train': 2.2347323894500732} +02/24/2022 10:43:35 - INFO - codeparrot_training - Step 7067: {'lr': 0.00048637786814475175, 'samples': 3618816, 'steps': 7067, 'loss/train': 3.1933720111846924} +02/24/2022 10:43:38 - INFO - codeparrot_training - Step 7068: {'lr': 0.0004863725402044776, 'samples': 3619328, 'steps': 7068, 'loss/train': 2.077728748321533} +02/24/2022 10:43:45 - INFO - codeparrot_training - Step 7069: {'lr': 0.00048636721125165855, 'samples': 3619840, 'steps': 7069, 'loss/train': 2.586998701095581} +02/24/2022 10:43:49 - INFO - codeparrot_training - Step 7070: {'lr': 0.0004863618812863174, 'samples': 3620352, 'steps': 7070, 'loss/train': 2.167773485183716} +02/24/2022 10:43:54 - INFO - codeparrot_training - Step 7071: {'lr': 0.0004863565503084771, 'samples': 3620864, 'steps': 7071, 'loss/train': 2.40755558013916} +02/24/2022 10:43:58 - INFO - codeparrot_training - Step 7072: {'lr': 0.0004863512183181603, 'samples': 3621376, 'steps': 7072, 'loss/train': 3.4997470378875732} +02/24/2022 10:44:03 - INFO - codeparrot_training - Step 7073: {'lr': 0.0004863458853153899, 'samples': 3621888, 'steps': 7073, 'loss/train': 2.8361222743988037} +02/24/2022 10:44:07 - INFO - codeparrot_training - Step 7074: {'lr': 0.00048634055130018886, 'samples': 3622400, 'steps': 7074, 'loss/train': 2.2650959491729736} +02/24/2022 10:44:13 - INFO - codeparrot_training - Step 7075: {'lr': 0.00048633521627257993, 'samples': 3622912, 'steps': 7075, 'loss/train': 2.1696739196777344} +02/24/2022 10:44:16 - INFO - codeparrot_training - Step 7076: {'lr': 0.00048632988023258596, 'samples': 3623424, 'steps': 7076, 'loss/train': 1.913853406906128} +02/24/2022 10:44:22 - INFO - codeparrot_training - Step 7077: {'lr': 0.0004863245431802298, 'samples': 3623936, 'steps': 7077, 'loss/train': 1.8230143785476685} +02/24/2022 10:44:25 - INFO - codeparrot_training - Step 7078: {'lr': 0.0004863192051155344, 'samples': 3624448, 'steps': 7078, 'loss/train': 0.14730876684188843} +02/24/2022 10:44:31 - INFO - codeparrot_training - Step 7079: {'lr': 0.0004863138660385225, 'samples': 3624960, 'steps': 7079, 'loss/train': 0.562419593334198} +02/24/2022 10:44:34 - INFO - codeparrot_training - Step 7080: {'lr': 0.00048630852594921703, 'samples': 3625472, 'steps': 7080, 'loss/train': 4.191076755523682} +02/24/2022 10:44:41 - INFO - codeparrot_training - Step 7081: {'lr': 0.00048630318484764093, 'samples': 3625984, 'steps': 7081, 'loss/train': 3.7572033405303955} +02/24/2022 10:44:45 - INFO - codeparrot_training - Step 7082: {'lr': 0.000486297842733817, 'samples': 3626496, 'steps': 7082, 'loss/train': 2.141685724258423} +02/24/2022 10:44:50 - INFO - codeparrot_training - Step 7083: {'lr': 0.0004862924996077682, 'samples': 3627008, 'steps': 7083, 'loss/train': 1.2183120250701904} +02/24/2022 10:44:54 - INFO - codeparrot_training - Step 7084: {'lr': 0.0004862871554695173, 'samples': 3627520, 'steps': 7084, 'loss/train': 2.786996364593506} +02/24/2022 10:44:59 - INFO - codeparrot_training - Step 7085: {'lr': 0.00048628181031908725, 'samples': 3628032, 'steps': 7085, 'loss/train': 0.9586674571037292} +02/24/2022 10:45:03 - INFO - codeparrot_training - Step 7086: {'lr': 0.00048627646415650094, 'samples': 3628544, 'steps': 7086, 'loss/train': 3.023047924041748} +02/24/2022 10:45:08 - INFO - codeparrot_training - Step 7087: {'lr': 0.0004862711169817813, 'samples': 3629056, 'steps': 7087, 'loss/train': 1.8972781896591187} +02/24/2022 10:45:12 - INFO - codeparrot_training - Step 7088: {'lr': 0.0004862657687949512, 'samples': 3629568, 'steps': 7088, 'loss/train': 2.777175188064575} +02/24/2022 10:45:18 - INFO - codeparrot_training - Step 7089: {'lr': 0.0004862604195960336, 'samples': 3630080, 'steps': 7089, 'loss/train': 2.29752779006958} +02/24/2022 10:45:21 - INFO - codeparrot_training - Step 7090: {'lr': 0.00048625506938505136, 'samples': 3630592, 'steps': 7090, 'loss/train': 0.6763121485710144} +02/24/2022 10:45:27 - INFO - codeparrot_training - Step 7091: {'lr': 0.00048624971816202747, 'samples': 3631104, 'steps': 7091, 'loss/train': 2.6189115047454834} +02/24/2022 10:45:30 - INFO - codeparrot_training - Step 7092: {'lr': 0.0004862443659269848, 'samples': 3631616, 'steps': 7092, 'loss/train': 1.3140772581100464} +02/24/2022 10:45:36 - INFO - codeparrot_training - Step 7093: {'lr': 0.00048623901267994625, 'samples': 3632128, 'steps': 7093, 'loss/train': 2.539118766784668} +02/24/2022 10:45:39 - INFO - codeparrot_training - Step 7094: {'lr': 0.00048623365842093483, 'samples': 3632640, 'steps': 7094, 'loss/train': 1.5668152570724487} +02/24/2022 10:45:47 - INFO - codeparrot_training - Step 7095: {'lr': 0.00048622830314997334, 'samples': 3633152, 'steps': 7095, 'loss/train': 1.4457467794418335} +02/24/2022 10:45:50 - INFO - codeparrot_training - Step 7096: {'lr': 0.0004862229468670849, 'samples': 3633664, 'steps': 7096, 'loss/train': 2.4714720249176025} +02/24/2022 10:45:56 - INFO - codeparrot_training - Step 7097: {'lr': 0.0004862175895722923, 'samples': 3634176, 'steps': 7097, 'loss/train': 1.8877856731414795} +02/24/2022 10:45:59 - INFO - codeparrot_training - Step 7098: {'lr': 0.0004862122312656186, 'samples': 3634688, 'steps': 7098, 'loss/train': 2.70857834815979} +02/24/2022 10:46:05 - INFO - codeparrot_training - Step 7099: {'lr': 0.0004862068719470867, 'samples': 3635200, 'steps': 7099, 'loss/train': 1.7322553396224976} +02/24/2022 10:46:08 - INFO - codeparrot_training - Step 7100: {'lr': 0.00048620151161671955, 'samples': 3635712, 'steps': 7100, 'loss/train': 1.8875893354415894} +02/24/2022 10:46:14 - INFO - codeparrot_training - Step 7101: {'lr': 0.0004861961502745401, 'samples': 3636224, 'steps': 7101, 'loss/train': 2.606764078140259} +02/24/2022 10:46:17 - INFO - codeparrot_training - Step 7102: {'lr': 0.00048619078792057135, 'samples': 3636736, 'steps': 7102, 'loss/train': 3.5200204849243164} +02/24/2022 10:46:23 - INFO - codeparrot_training - Step 7103: {'lr': 0.00048618542455483625, 'samples': 3637248, 'steps': 7103, 'loss/train': 2.3999087810516357} +02/24/2022 10:46:26 - INFO - codeparrot_training - Step 7104: {'lr': 0.0004861800601773579, 'samples': 3637760, 'steps': 7104, 'loss/train': 2.8969504833221436} +02/24/2022 10:46:32 - INFO - codeparrot_training - Step 7105: {'lr': 0.00048617469478815905, 'samples': 3638272, 'steps': 7105, 'loss/train': 1.8044992685317993} +02/24/2022 10:46:35 - INFO - codeparrot_training - Step 7106: {'lr': 0.00048616932838726286, 'samples': 3638784, 'steps': 7106, 'loss/train': 3.151644706726074} +02/24/2022 10:46:43 - INFO - codeparrot_training - Step 7107: {'lr': 0.0004861639609746923, 'samples': 3639296, 'steps': 7107, 'loss/train': 2.1009297370910645} +02/24/2022 10:46:47 - INFO - codeparrot_training - Step 7108: {'lr': 0.0004861585925504702, 'samples': 3639808, 'steps': 7108, 'loss/train': 3.0692696571350098} +02/24/2022 10:46:52 - INFO - codeparrot_training - Step 7109: {'lr': 0.00048615322311461973, 'samples': 3640320, 'steps': 7109, 'loss/train': 3.0109736919403076} +02/24/2022 10:46:56 - INFO - codeparrot_training - Step 7110: {'lr': 0.0004861478526671639, 'samples': 3640832, 'steps': 7110, 'loss/train': 2.0940754413604736} +02/24/2022 10:47:01 - INFO - codeparrot_training - Step 7111: {'lr': 0.0004861424812081256, 'samples': 3641344, 'steps': 7111, 'loss/train': 1.7864515781402588} +02/24/2022 10:47:05 - INFO - codeparrot_training - Step 7112: {'lr': 0.0004861371087375279, 'samples': 3641856, 'steps': 7112, 'loss/train': 2.3304591178894043} +02/24/2022 10:47:10 - INFO - codeparrot_training - Step 7113: {'lr': 0.0004861317352553938, 'samples': 3642368, 'steps': 7113, 'loss/train': 3.2613091468811035} +02/24/2022 10:47:14 - INFO - codeparrot_training - Step 7114: {'lr': 0.0004861263607617463, 'samples': 3642880, 'steps': 7114, 'loss/train': 2.572568893432617} +02/24/2022 10:47:19 - INFO - codeparrot_training - Step 7115: {'lr': 0.00048612098525660855, 'samples': 3643392, 'steps': 7115, 'loss/train': 2.615438938140869} +02/24/2022 10:47:23 - INFO - codeparrot_training - Step 7116: {'lr': 0.00048611560874000335, 'samples': 3643904, 'steps': 7116, 'loss/train': 1.7394640445709229} +02/24/2022 10:47:30 - INFO - codeparrot_training - Step 7117: {'lr': 0.000486110231211954, 'samples': 3644416, 'steps': 7117, 'loss/train': 0.3638428747653961} +02/24/2022 10:47:33 - INFO - codeparrot_training - Step 7118: {'lr': 0.0004861048526724833, 'samples': 3644928, 'steps': 7118, 'loss/train': 2.4352030754089355} +02/24/2022 10:47:39 - INFO - codeparrot_training - Step 7119: {'lr': 0.00048609947312161435, 'samples': 3645440, 'steps': 7119, 'loss/train': 2.163443088531494} +02/24/2022 10:47:42 - INFO - codeparrot_training - Step 7120: {'lr': 0.0004860940925593703, 'samples': 3645952, 'steps': 7120, 'loss/train': 1.8580036163330078} +02/24/2022 10:47:48 - INFO - codeparrot_training - Step 7121: {'lr': 0.0004860887109857741, 'samples': 3646464, 'steps': 7121, 'loss/train': 1.9512933492660522} +02/24/2022 10:47:51 - INFO - codeparrot_training - Step 7122: {'lr': 0.0004860833284008488, 'samples': 3646976, 'steps': 7122, 'loss/train': 1.955841302871704} +02/24/2022 10:47:57 - INFO - codeparrot_training - Step 7123: {'lr': 0.00048607794480461753, 'samples': 3647488, 'steps': 7123, 'loss/train': 1.6247124671936035} +02/24/2022 10:48:00 - INFO - codeparrot_training - Step 7124: {'lr': 0.00048607256019710327, 'samples': 3648000, 'steps': 7124, 'loss/train': 2.3678576946258545} +02/24/2022 10:48:07 - INFO - codeparrot_training - Step 7125: {'lr': 0.0004860671745783292, 'samples': 3648512, 'steps': 7125, 'loss/train': 2.619330644607544} +02/24/2022 10:48:10 - INFO - codeparrot_training - Step 7126: {'lr': 0.0004860617879483182, 'samples': 3649024, 'steps': 7126, 'loss/train': 2.1089675426483154} +02/24/2022 10:48:14 - INFO - codeparrot_training - Step 7127: {'lr': 0.0004860564003070935, 'samples': 3649536, 'steps': 7127, 'loss/train': 2.939650774002075} +02/24/2022 10:48:20 - INFO - codeparrot_training - Step 7128: {'lr': 0.00048605101165467813, 'samples': 3650048, 'steps': 7128, 'loss/train': 3.040771484375} +02/24/2022 10:48:23 - INFO - codeparrot_training - Step 7129: {'lr': 0.00048604562199109524, 'samples': 3650560, 'steps': 7129, 'loss/train': 2.6924564838409424} +02/24/2022 10:48:29 - INFO - codeparrot_training - Step 7130: {'lr': 0.00048604023131636784, 'samples': 3651072, 'steps': 7130, 'loss/train': 2.4744179248809814} +02/24/2022 10:48:32 - INFO - codeparrot_training - Step 7131: {'lr': 0.00048603483963051896, 'samples': 3651584, 'steps': 7131, 'loss/train': 2.6079609394073486} +02/24/2022 10:48:38 - INFO - codeparrot_training - Step 7132: {'lr': 0.0004860294469335719, 'samples': 3652096, 'steps': 7132, 'loss/train': 1.791207194328308} +02/24/2022 10:48:41 - INFO - codeparrot_training - Step 7133: {'lr': 0.00048602405322554956, 'samples': 3652608, 'steps': 7133, 'loss/train': 2.0393855571746826} +02/24/2022 10:48:49 - INFO - codeparrot_training - Step 7134: {'lr': 0.00048601865850647516, 'samples': 3653120, 'steps': 7134, 'loss/train': 1.391977310180664} +02/24/2022 10:48:52 - INFO - codeparrot_training - Step 7135: {'lr': 0.0004860132627763717, 'samples': 3653632, 'steps': 7135, 'loss/train': 1.6162645816802979} +02/24/2022 10:48:58 - INFO - codeparrot_training - Step 7136: {'lr': 0.0004860078660352625, 'samples': 3654144, 'steps': 7136, 'loss/train': 2.4756364822387695} +02/24/2022 10:49:01 - INFO - codeparrot_training - Step 7137: {'lr': 0.0004860024682831704, 'samples': 3654656, 'steps': 7137, 'loss/train': 1.660615086555481} +02/24/2022 10:49:07 - INFO - codeparrot_training - Step 7138: {'lr': 0.0004859970695201187, 'samples': 3655168, 'steps': 7138, 'loss/train': 2.9241085052490234} +02/24/2022 10:49:11 - INFO - codeparrot_training - Step 7139: {'lr': 0.00048599166974613053, 'samples': 3655680, 'steps': 7139, 'loss/train': 1.275688886642456} +02/24/2022 10:49:16 - INFO - codeparrot_training - Step 7140: {'lr': 0.000485986268961229, 'samples': 3656192, 'steps': 7140, 'loss/train': 2.078481912612915} +02/24/2022 10:49:20 - INFO - codeparrot_training - Step 7141: {'lr': 0.0004859808671654372, 'samples': 3656704, 'steps': 7141, 'loss/train': 4.897994518280029} +02/24/2022 10:49:27 - INFO - codeparrot_training - Step 7142: {'lr': 0.00048597546435877824, 'samples': 3657216, 'steps': 7142, 'loss/train': 1.9437363147735596} +02/24/2022 10:49:30 - INFO - codeparrot_training - Step 7143: {'lr': 0.0004859700605412754, 'samples': 3657728, 'steps': 7143, 'loss/train': 2.2802278995513916} +02/24/2022 10:49:36 - INFO - codeparrot_training - Step 7144: {'lr': 0.0004859646557129517, 'samples': 3658240, 'steps': 7144, 'loss/train': 2.515331506729126} +02/24/2022 10:49:39 - INFO - codeparrot_training - Step 7145: {'lr': 0.0004859592498738304, 'samples': 3658752, 'steps': 7145, 'loss/train': 0.2366783618927002} +02/24/2022 10:49:45 - INFO - codeparrot_training - Step 7146: {'lr': 0.00048595384302393453, 'samples': 3659264, 'steps': 7146, 'loss/train': 3.8160417079925537} +02/24/2022 10:49:48 - INFO - codeparrot_training - Step 7147: {'lr': 0.00048594843516328734, 'samples': 3659776, 'steps': 7147, 'loss/train': 1.748029351234436} +02/24/2022 10:49:54 - INFO - codeparrot_training - Step 7148: {'lr': 0.000485943026291912, 'samples': 3660288, 'steps': 7148, 'loss/train': 1.8070690631866455} +02/24/2022 10:49:57 - INFO - codeparrot_training - Step 7149: {'lr': 0.0004859376164098317, 'samples': 3660800, 'steps': 7149, 'loss/train': 3.185234785079956} +02/24/2022 10:50:03 - INFO - codeparrot_training - Step 7150: {'lr': 0.0004859322055170695, 'samples': 3661312, 'steps': 7150, 'loss/train': 2.15921688079834} +02/24/2022 10:50:06 - INFO - codeparrot_training - Step 7151: {'lr': 0.00048592679361364867, 'samples': 3661824, 'steps': 7151, 'loss/train': 2.3122177124023438} +02/24/2022 10:50:12 - INFO - codeparrot_training - Step 7152: {'lr': 0.00048592138069959235, 'samples': 3662336, 'steps': 7152, 'loss/train': 2.613693952560425} +02/24/2022 10:50:16 - INFO - codeparrot_training - Step 7153: {'lr': 0.0004859159667749238, 'samples': 3662848, 'steps': 7153, 'loss/train': 2.06919002532959} +02/24/2022 10:50:23 - INFO - codeparrot_training - Step 7154: {'lr': 0.000485910551839666, 'samples': 3663360, 'steps': 7154, 'loss/train': 3.6004843711853027} +02/24/2022 10:50:26 - INFO - codeparrot_training - Step 7155: {'lr': 0.0004859051358938425, 'samples': 3663872, 'steps': 7155, 'loss/train': 1.2813680171966553} +02/24/2022 10:50:32 - INFO - codeparrot_training - Step 7156: {'lr': 0.00048589971893747626, 'samples': 3664384, 'steps': 7156, 'loss/train': 2.6149206161499023} +02/24/2022 10:50:35 - INFO - codeparrot_training - Step 7157: {'lr': 0.0004858943009705905, 'samples': 3664896, 'steps': 7157, 'loss/train': 3.144535541534424} +02/24/2022 10:50:41 - INFO - codeparrot_training - Step 7158: {'lr': 0.00048588888199320847, 'samples': 3665408, 'steps': 7158, 'loss/train': 1.6206649541854858} +02/24/2022 10:50:44 - INFO - codeparrot_training - Step 7159: {'lr': 0.0004858834620053534, 'samples': 3665920, 'steps': 7159, 'loss/train': 2.4694418907165527} +02/24/2022 10:50:50 - INFO - codeparrot_training - Step 7160: {'lr': 0.0004858780410070484, 'samples': 3666432, 'steps': 7160, 'loss/train': 1.972827434539795} +02/24/2022 10:50:53 - INFO - codeparrot_training - Step 7161: {'lr': 0.0004858726189983168, 'samples': 3666944, 'steps': 7161, 'loss/train': 1.6937215328216553} +02/24/2022 10:50:59 - INFO - codeparrot_training - Step 7162: {'lr': 0.00048586719597918185, 'samples': 3667456, 'steps': 7162, 'loss/train': 1.9544861316680908} +02/24/2022 10:51:02 - INFO - codeparrot_training - Step 7163: {'lr': 0.0004858617719496667, 'samples': 3667968, 'steps': 7163, 'loss/train': 2.36794376373291} +02/24/2022 10:51:10 - INFO - codeparrot_training - Step 7164: {'lr': 0.0004858563469097946, 'samples': 3668480, 'steps': 7164, 'loss/train': 2.1638131141662598} +02/24/2022 10:51:13 - INFO - codeparrot_training - Step 7165: {'lr': 0.0004858509208595888, 'samples': 3668992, 'steps': 7165, 'loss/train': 2.0760090351104736} +02/24/2022 10:51:19 - INFO - codeparrot_training - Step 7166: {'lr': 0.0004858454937990726, 'samples': 3669504, 'steps': 7166, 'loss/train': 2.2443134784698486} +02/24/2022 10:51:22 - INFO - codeparrot_training - Step 7167: {'lr': 0.0004858400657282691, 'samples': 3670016, 'steps': 7167, 'loss/train': 2.4196226596832275} +02/24/2022 10:51:28 - INFO - codeparrot_training - Step 7168: {'lr': 0.00048583463664720174, 'samples': 3670528, 'steps': 7168, 'loss/train': 3.0884318351745605} +02/24/2022 10:51:31 - INFO - codeparrot_training - Step 7169: {'lr': 0.00048582920655589366, 'samples': 3671040, 'steps': 7169, 'loss/train': 2.4953513145446777} +02/24/2022 10:51:37 - INFO - codeparrot_training - Step 7170: {'lr': 0.0004858237754543681, 'samples': 3671552, 'steps': 7170, 'loss/train': 1.272633671760559} +02/24/2022 10:51:40 - INFO - codeparrot_training - Step 7171: {'lr': 0.0004858183433426484, 'samples': 3672064, 'steps': 7171, 'loss/train': 2.6103909015655518} +02/24/2022 10:51:46 - INFO - codeparrot_training - Step 7172: {'lr': 0.0004858129102207578, 'samples': 3672576, 'steps': 7172, 'loss/train': 2.377828359603882} +02/24/2022 10:51:49 - INFO - codeparrot_training - Step 7173: {'lr': 0.00048580747608871955, 'samples': 3673088, 'steps': 7173, 'loss/train': 3.352987766265869} +02/24/2022 10:51:55 - INFO - codeparrot_training - Step 7174: {'lr': 0.000485802040946557, 'samples': 3673600, 'steps': 7174, 'loss/train': 2.291076898574829} +02/24/2022 10:51:58 - INFO - codeparrot_training - Step 7175: {'lr': 0.00048579660479429335, 'samples': 3674112, 'steps': 7175, 'loss/train': 2.5165326595306396} +02/24/2022 10:52:04 - INFO - codeparrot_training - Step 7176: {'lr': 0.00048579116763195184, 'samples': 3674624, 'steps': 7176, 'loss/train': 3.4540293216705322} +02/24/2022 10:52:07 - INFO - codeparrot_training - Step 7177: {'lr': 0.00048578572945955594, 'samples': 3675136, 'steps': 7177, 'loss/train': 2.3993983268737793} +02/24/2022 10:52:13 - INFO - codeparrot_training - Step 7178: {'lr': 0.00048578029027712883, 'samples': 3675648, 'steps': 7178, 'loss/train': 2.3527095317840576} +02/24/2022 10:52:16 - INFO - codeparrot_training - Step 7179: {'lr': 0.0004857748500846938, 'samples': 3676160, 'steps': 7179, 'loss/train': 1.432336449623108} +02/24/2022 10:52:23 - INFO - codeparrot_training - Step 7180: {'lr': 0.0004857694088822742, 'samples': 3676672, 'steps': 7180, 'loss/train': 1.1556800603866577} +02/24/2022 10:52:27 - INFO - codeparrot_training - Step 7181: {'lr': 0.00048576396666989333, 'samples': 3677184, 'steps': 7181, 'loss/train': 1.8337124586105347} +02/24/2022 10:52:32 - INFO - codeparrot_training - Step 7182: {'lr': 0.0004857585234475745, 'samples': 3677696, 'steps': 7182, 'loss/train': 3.0596120357513428} +02/24/2022 10:52:36 - INFO - codeparrot_training - Step 7183: {'lr': 0.00048575307921534095, 'samples': 3678208, 'steps': 7183, 'loss/train': 1.1250718832015991} +02/24/2022 10:52:41 - INFO - codeparrot_training - Step 7184: {'lr': 0.0004857476339732161, 'samples': 3678720, 'steps': 7184, 'loss/train': 1.85762619972229} +02/24/2022 10:52:45 - INFO - codeparrot_training - Step 7185: {'lr': 0.0004857421877212233, 'samples': 3679232, 'steps': 7185, 'loss/train': 2.8390424251556396} +02/24/2022 10:52:50 - INFO - codeparrot_training - Step 7186: {'lr': 0.00048573674045938577, 'samples': 3679744, 'steps': 7186, 'loss/train': 2.185783624649048} +02/24/2022 10:52:54 - INFO - codeparrot_training - Step 7187: {'lr': 0.00048573129218772686, 'samples': 3680256, 'steps': 7187, 'loss/train': 2.4759747982025146} +02/24/2022 10:52:59 - INFO - codeparrot_training - Step 7188: {'lr': 0.00048572584290627, 'samples': 3680768, 'steps': 7188, 'loss/train': 2.599696397781372} +02/24/2022 10:53:03 - INFO - codeparrot_training - Step 7189: {'lr': 0.00048572039261503855, 'samples': 3681280, 'steps': 7189, 'loss/train': 2.3892900943756104} +02/24/2022 10:53:10 - INFO - codeparrot_training - Step 7190: {'lr': 0.00048571494131405567, 'samples': 3681792, 'steps': 7190, 'loss/train': 2.8847944736480713} +02/24/2022 10:53:13 - INFO - codeparrot_training - Step 7191: {'lr': 0.0004857094890033449, 'samples': 3682304, 'steps': 7191, 'loss/train': 2.7730712890625} +02/24/2022 10:53:19 - INFO - codeparrot_training - Step 7192: {'lr': 0.0004857040356829295, 'samples': 3682816, 'steps': 7192, 'loss/train': 2.233396291732788} +02/24/2022 10:53:23 - INFO - codeparrot_training - Step 7193: {'lr': 0.00048569858135283285, 'samples': 3683328, 'steps': 7193, 'loss/train': 1.2707794904708862} +02/24/2022 10:53:28 - INFO - codeparrot_training - Step 7194: {'lr': 0.00048569312601307827, 'samples': 3683840, 'steps': 7194, 'loss/train': 3.1571671962738037} +02/24/2022 10:53:32 - INFO - codeparrot_training - Step 7195: {'lr': 0.00048568766966368925, 'samples': 3684352, 'steps': 7195, 'loss/train': 2.1208324432373047} +02/24/2022 10:53:37 - INFO - codeparrot_training - Step 7196: {'lr': 0.00048568221230468905, 'samples': 3684864, 'steps': 7196, 'loss/train': 2.512352705001831} +02/24/2022 10:53:41 - INFO - codeparrot_training - Step 7197: {'lr': 0.0004856767539361011, 'samples': 3685376, 'steps': 7197, 'loss/train': 1.8583602905273438} +02/24/2022 10:53:46 - INFO - codeparrot_training - Step 7198: {'lr': 0.0004856712945579488, 'samples': 3685888, 'steps': 7198, 'loss/train': 1.7931087017059326} +02/24/2022 10:53:49 - INFO - codeparrot_training - Step 7199: {'lr': 0.00048566583417025553, 'samples': 3686400, 'steps': 7199, 'loss/train': 2.4703731536865234} +02/24/2022 10:53:57 - INFO - codeparrot_training - Step 7200: {'lr': 0.00048566037277304465, 'samples': 3686912, 'steps': 7200, 'loss/train': 2.562755823135376} +02/24/2022 10:54:00 - INFO - codeparrot_training - Step 7201: {'lr': 0.00048565491036633946, 'samples': 3687424, 'steps': 7201, 'loss/train': 3.199000358581543} +02/24/2022 10:54:06 - INFO - codeparrot_training - Step 7202: {'lr': 0.00048564944695016356, 'samples': 3687936, 'steps': 7202, 'loss/train': 2.8276875019073486} +02/24/2022 10:54:11 - INFO - codeparrot_training - Step 7203: {'lr': 0.00048564398252454026, 'samples': 3688448, 'steps': 7203, 'loss/train': 2.888237953186035} +02/24/2022 10:54:15 - INFO - codeparrot_training - Step 7204: {'lr': 0.0004856385170894929, 'samples': 3688960, 'steps': 7204, 'loss/train': 2.2383034229278564} +02/24/2022 10:54:20 - INFO - codeparrot_training - Step 7205: {'lr': 0.00048563305064504503, 'samples': 3689472, 'steps': 7205, 'loss/train': 1.4651906490325928} +02/24/2022 10:54:24 - INFO - codeparrot_training - Step 7206: {'lr': 0.00048562758319121996, 'samples': 3689984, 'steps': 7206, 'loss/train': 1.9588793516159058} +02/24/2022 10:54:29 - INFO - codeparrot_training - Step 7207: {'lr': 0.00048562211472804115, 'samples': 3690496, 'steps': 7207, 'loss/train': 1.9141972064971924} +02/24/2022 10:54:33 - INFO - codeparrot_training - Step 7208: {'lr': 0.000485616645255532, 'samples': 3691008, 'steps': 7208, 'loss/train': 2.395052671432495} +02/24/2022 10:54:40 - INFO - codeparrot_training - Step 7209: {'lr': 0.00048561117477371595, 'samples': 3691520, 'steps': 7209, 'loss/train': 2.777357339859009} +02/24/2022 10:54:44 - INFO - codeparrot_training - Step 7210: {'lr': 0.0004856057032826165, 'samples': 3692032, 'steps': 7210, 'loss/train': 1.645520567893982} +02/24/2022 10:54:49 - INFO - codeparrot_training - Step 7211: {'lr': 0.000485600230782257, 'samples': 3692544, 'steps': 7211, 'loss/train': 2.1502439975738525} +02/24/2022 10:54:53 - INFO - codeparrot_training - Step 7212: {'lr': 0.00048559475727266086, 'samples': 3693056, 'steps': 7212, 'loss/train': 2.699481725692749} +02/24/2022 10:54:58 - INFO - codeparrot_training - Step 7213: {'lr': 0.00048558928275385167, 'samples': 3693568, 'steps': 7213, 'loss/train': 1.5347621440887451} +02/24/2022 10:55:02 - INFO - codeparrot_training - Step 7214: {'lr': 0.00048558380722585283, 'samples': 3694080, 'steps': 7214, 'loss/train': 3.3592445850372314} +02/24/2022 10:55:07 - INFO - codeparrot_training - Step 7215: {'lr': 0.00048557833068868766, 'samples': 3694592, 'steps': 7215, 'loss/train': 2.003713846206665} +02/24/2022 10:55:11 - INFO - codeparrot_training - Step 7216: {'lr': 0.00048557285314237975, 'samples': 3695104, 'steps': 7216, 'loss/train': 2.9488580226898193} +02/24/2022 10:55:16 - INFO - codeparrot_training - Step 7217: {'lr': 0.0004855673745869526, 'samples': 3695616, 'steps': 7217, 'loss/train': 2.2138144969940186} +02/24/2022 10:55:20 - INFO - codeparrot_training - Step 7218: {'lr': 0.00048556189502242956, 'samples': 3696128, 'steps': 7218, 'loss/train': 2.137820243835449} +02/24/2022 10:55:25 - INFO - codeparrot_training - Step 7219: {'lr': 0.00048555641444883424, 'samples': 3696640, 'steps': 7219, 'loss/train': 2.91664981842041} +02/24/2022 10:55:28 - INFO - codeparrot_training - Step 7220: {'lr': 0.00048555093286618996, 'samples': 3697152, 'steps': 7220, 'loss/train': 2.5578601360321045} +02/24/2022 10:55:34 - INFO - codeparrot_training - Step 7221: {'lr': 0.00048554545027452035, 'samples': 3697664, 'steps': 7221, 'loss/train': 2.4842004776000977} +02/24/2022 10:55:38 - INFO - codeparrot_training - Step 7222: {'lr': 0.00048553996667384877, 'samples': 3698176, 'steps': 7222, 'loss/train': 2.053813934326172} +02/24/2022 10:55:43 - INFO - codeparrot_training - Step 7223: {'lr': 0.00048553448206419876, 'samples': 3698688, 'steps': 7223, 'loss/train': 2.055654287338257} +02/24/2022 10:55:47 - INFO - codeparrot_training - Step 7224: {'lr': 0.0004855289964455938, 'samples': 3699200, 'steps': 7224, 'loss/train': 2.218465566635132} +02/24/2022 10:55:54 - INFO - codeparrot_training - Step 7225: {'lr': 0.0004855235098180575, 'samples': 3699712, 'steps': 7225, 'loss/train': 1.195311427116394} +02/24/2022 10:55:57 - INFO - codeparrot_training - Step 7226: {'lr': 0.00048551802218161315, 'samples': 3700224, 'steps': 7226, 'loss/train': 2.4262242317199707} +02/24/2022 10:56:04 - INFO - codeparrot_training - Step 7227: {'lr': 0.00048551253353628444, 'samples': 3700736, 'steps': 7227, 'loss/train': 3.3374390602111816} +02/24/2022 10:56:07 - INFO - codeparrot_training - Step 7228: {'lr': 0.0004855070438820949, 'samples': 3701248, 'steps': 7228, 'loss/train': 2.7477335929870605} +02/24/2022 10:56:11 - INFO - codeparrot_training - Step 7229: {'lr': 0.0004855015532190679, 'samples': 3701760, 'steps': 7229, 'loss/train': 2.0309486389160156} +02/24/2022 10:56:16 - INFO - codeparrot_training - Step 7230: {'lr': 0.0004854960615472269, 'samples': 3702272, 'steps': 7230, 'loss/train': 2.0270018577575684} +02/24/2022 10:56:20 - INFO - codeparrot_training - Step 7231: {'lr': 0.0004854905688665957, 'samples': 3702784, 'steps': 7231, 'loss/train': 1.3398030996322632} +02/24/2022 10:56:25 - INFO - codeparrot_training - Step 7232: {'lr': 0.00048548507517719766, 'samples': 3703296, 'steps': 7232, 'loss/train': 2.0843541622161865} +02/24/2022 10:56:29 - INFO - codeparrot_training - Step 7233: {'lr': 0.00048547958047905635, 'samples': 3703808, 'steps': 7233, 'loss/train': 2.3666794300079346} +02/24/2022 10:56:34 - INFO - codeparrot_training - Step 7234: {'lr': 0.00048547408477219524, 'samples': 3704320, 'steps': 7234, 'loss/train': 2.985812187194824} +02/24/2022 10:56:38 - INFO - codeparrot_training - Step 7235: {'lr': 0.00048546858805663797, 'samples': 3704832, 'steps': 7235, 'loss/train': 1.1768949031829834} +02/24/2022 10:56:43 - INFO - codeparrot_training - Step 7236: {'lr': 0.000485463090332408, 'samples': 3705344, 'steps': 7236, 'loss/train': 3.4861700534820557} +02/24/2022 10:56:47 - INFO - codeparrot_training - Step 7237: {'lr': 0.0004854575915995289, 'samples': 3705856, 'steps': 7237, 'loss/train': 1.7150379419326782} +02/24/2022 10:56:54 - INFO - codeparrot_training - Step 7238: {'lr': 0.0004854520918580243, 'samples': 3706368, 'steps': 7238, 'loss/train': 2.8902151584625244} +02/24/2022 10:56:57 - INFO - codeparrot_training - Step 7239: {'lr': 0.00048544659110791766, 'samples': 3706880, 'steps': 7239, 'loss/train': 2.8760859966278076} +02/24/2022 10:57:03 - INFO - codeparrot_training - Step 7240: {'lr': 0.0004854410893492326, 'samples': 3707392, 'steps': 7240, 'loss/train': 2.2347939014434814} +02/24/2022 10:57:06 - INFO - codeparrot_training - Step 7241: {'lr': 0.00048543558658199266, 'samples': 3707904, 'steps': 7241, 'loss/train': 2.0554895401000977} +02/24/2022 10:57:12 - INFO - codeparrot_training - Step 7242: {'lr': 0.0004854300828062215, 'samples': 3708416, 'steps': 7242, 'loss/train': 2.3495774269104004} +02/24/2022 10:57:15 - INFO - codeparrot_training - Step 7243: {'lr': 0.0004854245780219425, 'samples': 3708928, 'steps': 7243, 'loss/train': 1.511998176574707} +02/24/2022 10:57:21 - INFO - codeparrot_training - Step 7244: {'lr': 0.00048541907222917946, 'samples': 3709440, 'steps': 7244, 'loss/train': 0.863852322101593} +02/24/2022 10:57:24 - INFO - codeparrot_training - Step 7245: {'lr': 0.0004854135654279558, 'samples': 3709952, 'steps': 7245, 'loss/train': 2.0903382301330566} +02/24/2022 10:57:32 - INFO - codeparrot_training - Step 7246: {'lr': 0.0004854080576182952, 'samples': 3710464, 'steps': 7246, 'loss/train': 2.5613043308258057} +02/24/2022 10:57:35 - INFO - codeparrot_training - Step 7247: {'lr': 0.00048540254880022126, 'samples': 3710976, 'steps': 7247, 'loss/train': 2.1796469688415527} +02/24/2022 10:57:41 - INFO - codeparrot_training - Step 7248: {'lr': 0.00048539703897375753, 'samples': 3711488, 'steps': 7248, 'loss/train': 2.8391802310943604} +02/24/2022 10:57:44 - INFO - codeparrot_training - Step 7249: {'lr': 0.0004853915281389276, 'samples': 3712000, 'steps': 7249, 'loss/train': 2.178542137145996} +02/24/2022 10:57:50 - INFO - codeparrot_training - Step 7250: {'lr': 0.0004853860162957552, 'samples': 3712512, 'steps': 7250, 'loss/train': 2.441507339477539} +02/24/2022 10:57:53 - INFO - codeparrot_training - Step 7251: {'lr': 0.00048538050344426375, 'samples': 3713024, 'steps': 7251, 'loss/train': 1.7413239479064941} +02/24/2022 10:57:59 - INFO - codeparrot_training - Step 7252: {'lr': 0.0004853749895844771, 'samples': 3713536, 'steps': 7252, 'loss/train': 2.78940486907959} +02/24/2022 10:58:02 - INFO - codeparrot_training - Step 7253: {'lr': 0.00048536947471641855, 'samples': 3714048, 'steps': 7253, 'loss/train': 3.2126731872558594} +02/24/2022 10:58:08 - INFO - codeparrot_training - Step 7254: {'lr': 0.00048536395884011207, 'samples': 3714560, 'steps': 7254, 'loss/train': 8.389498710632324} +02/24/2022 10:58:11 - INFO - codeparrot_training - Step 7255: {'lr': 0.00048535844195558104, 'samples': 3715072, 'steps': 7255, 'loss/train': 1.8467371463775635} +02/24/2022 10:58:18 - INFO - codeparrot_training - Step 7256: {'lr': 0.0004853529240628493, 'samples': 3715584, 'steps': 7256, 'loss/train': 1.7006131410598755} +02/24/2022 10:58:22 - INFO - codeparrot_training - Step 7257: {'lr': 0.0004853474051619402, 'samples': 3716096, 'steps': 7257, 'loss/train': 2.6924703121185303} +02/24/2022 10:58:27 - INFO - codeparrot_training - Step 7258: {'lr': 0.0004853418852528776, 'samples': 3716608, 'steps': 7258, 'loss/train': 2.2776970863342285} +02/24/2022 10:58:31 - INFO - codeparrot_training - Step 7259: {'lr': 0.00048533636433568505, 'samples': 3717120, 'steps': 7259, 'loss/train': 1.6656603813171387} +02/24/2022 10:58:36 - INFO - codeparrot_training - Step 7260: {'lr': 0.00048533084241038637, 'samples': 3717632, 'steps': 7260, 'loss/train': 2.825521230697632} +02/24/2022 10:58:40 - INFO - codeparrot_training - Step 7261: {'lr': 0.00048532531947700496, 'samples': 3718144, 'steps': 7261, 'loss/train': 2.580369710922241} +02/24/2022 10:58:45 - INFO - codeparrot_training - Step 7262: {'lr': 0.00048531979553556473, 'samples': 3718656, 'steps': 7262, 'loss/train': 0.9673537015914917} +02/24/2022 10:58:49 - INFO - codeparrot_training - Step 7263: {'lr': 0.0004853142705860891, 'samples': 3719168, 'steps': 7263, 'loss/train': 1.1317963600158691} +02/24/2022 10:58:54 - INFO - codeparrot_training - Step 7264: {'lr': 0.00048530874462860194, 'samples': 3719680, 'steps': 7264, 'loss/train': 1.9410359859466553} +02/24/2022 10:59:00 - INFO - codeparrot_training - Step 7265: {'lr': 0.0004853032176631268, 'samples': 3720192, 'steps': 7265, 'loss/train': 2.244546413421631} +02/24/2022 10:59:03 - INFO - codeparrot_training - Step 7266: {'lr': 0.0004852976896896874, 'samples': 3720704, 'steps': 7266, 'loss/train': 1.9455451965332031} +02/24/2022 10:59:09 - INFO - codeparrot_training - Step 7267: {'lr': 0.0004852921607083074, 'samples': 3721216, 'steps': 7267, 'loss/train': 1.6454341411590576} +02/24/2022 10:59:12 - INFO - codeparrot_training - Step 7268: {'lr': 0.00048528663071901047, 'samples': 3721728, 'steps': 7268, 'loss/train': 2.1452298164367676} +02/24/2022 10:59:18 - INFO - codeparrot_training - Step 7269: {'lr': 0.00048528109972182043, 'samples': 3722240, 'steps': 7269, 'loss/train': 1.0160413980484009} +02/24/2022 10:59:21 - INFO - codeparrot_training - Step 7270: {'lr': 0.0004852755677167607, 'samples': 3722752, 'steps': 7270, 'loss/train': 2.9078474044799805} +02/24/2022 10:59:29 - INFO - codeparrot_training - Step 7271: {'lr': 0.00048527003470385534, 'samples': 3723264, 'steps': 7271, 'loss/train': 2.056849479675293} +02/24/2022 10:59:32 - INFO - codeparrot_training - Step 7272: {'lr': 0.0004852645006831278, 'samples': 3723776, 'steps': 7272, 'loss/train': 2.4244182109832764} +02/24/2022 10:59:38 - INFO - codeparrot_training - Step 7273: {'lr': 0.00048525896565460177, 'samples': 3724288, 'steps': 7273, 'loss/train': 2.0066919326782227} +02/24/2022 10:59:41 - INFO - codeparrot_training - Step 7274: {'lr': 0.00048525342961830106, 'samples': 3724800, 'steps': 7274, 'loss/train': 2.268441677093506} +02/24/2022 10:59:47 - INFO - codeparrot_training - Step 7275: {'lr': 0.0004852478925742494, 'samples': 3725312, 'steps': 7275, 'loss/train': 3.278693437576294} +02/24/2022 10:59:50 - INFO - codeparrot_training - Step 7276: {'lr': 0.0004852423545224704, 'samples': 3725824, 'steps': 7276, 'loss/train': 1.9232362508773804} +02/24/2022 10:59:56 - INFO - codeparrot_training - Step 7277: {'lr': 0.00048523681546298793, 'samples': 3726336, 'steps': 7277, 'loss/train': 1.712380051612854} +02/24/2022 10:59:59 - INFO - codeparrot_training - Step 7278: {'lr': 0.0004852312753958256, 'samples': 3726848, 'steps': 7278, 'loss/train': 3.691619873046875} +02/24/2022 11:00:05 - INFO - codeparrot_training - Step 7279: {'lr': 0.00048522573432100715, 'samples': 3727360, 'steps': 7279, 'loss/train': 1.2366740703582764} +02/24/2022 11:00:08 - INFO - codeparrot_training - Step 7280: {'lr': 0.0004852201922385564, 'samples': 3727872, 'steps': 7280, 'loss/train': 3.183555841445923} +02/24/2022 11:00:14 - INFO - codeparrot_training - Step 7281: {'lr': 0.000485214649148497, 'samples': 3728384, 'steps': 7281, 'loss/train': 2.197190523147583} +02/24/2022 11:00:17 - INFO - codeparrot_training - Step 7282: {'lr': 0.00048520910505085274, 'samples': 3728896, 'steps': 7282, 'loss/train': 1.9557613134384155} +02/24/2022 11:00:24 - INFO - codeparrot_training - Step 7283: {'lr': 0.0004852035599456474, 'samples': 3729408, 'steps': 7283, 'loss/train': 2.2826309204101562} +02/24/2022 11:00:28 - INFO - codeparrot_training - Step 7284: {'lr': 0.0004851980138329046, 'samples': 3729920, 'steps': 7284, 'loss/train': 2.773719549179077} +02/24/2022 11:00:33 - INFO - codeparrot_training - Step 7285: {'lr': 0.00048519246671264825, 'samples': 3730432, 'steps': 7285, 'loss/train': 3.537505626678467} +02/24/2022 11:00:37 - INFO - codeparrot_training - Step 7286: {'lr': 0.0004851869185849021, 'samples': 3730944, 'steps': 7286, 'loss/train': 2.2186882495880127} +02/24/2022 11:00:42 - INFO - codeparrot_training - Step 7287: {'lr': 0.0004851813694496898, 'samples': 3731456, 'steps': 7287, 'loss/train': 2.055197238922119} +02/24/2022 11:00:46 - INFO - codeparrot_training - Step 7288: {'lr': 0.00048517581930703526, 'samples': 3731968, 'steps': 7288, 'loss/train': 1.3483860492706299} +02/24/2022 11:00:51 - INFO - codeparrot_training - Step 7289: {'lr': 0.0004851702681569621, 'samples': 3732480, 'steps': 7289, 'loss/train': 3.1877801418304443} +02/24/2022 11:00:55 - INFO - codeparrot_training - Step 7290: {'lr': 0.0004851647159994943, 'samples': 3732992, 'steps': 7290, 'loss/train': 2.2647533416748047} +02/24/2022 11:01:00 - INFO - codeparrot_training - Step 7291: {'lr': 0.00048515916283465546, 'samples': 3733504, 'steps': 7291, 'loss/train': 2.734210729598999} +02/24/2022 11:01:04 - INFO - codeparrot_training - Step 7292: {'lr': 0.00048515360866246943, 'samples': 3734016, 'steps': 7292, 'loss/train': 2.4495162963867188} +02/24/2022 11:01:11 - INFO - codeparrot_training - Step 7293: {'lr': 0.00048514805348296, 'samples': 3734528, 'steps': 7293, 'loss/train': 1.844407320022583} +02/24/2022 11:01:14 - INFO - codeparrot_training - Step 7294: {'lr': 0.000485142497296151, 'samples': 3735040, 'steps': 7294, 'loss/train': 4.059875011444092} +02/24/2022 11:01:20 - INFO - codeparrot_training - Step 7295: {'lr': 0.00048513694010206623, 'samples': 3735552, 'steps': 7295, 'loss/train': 1.5836923122406006} +02/24/2022 11:01:23 - INFO - codeparrot_training - Step 7296: {'lr': 0.0004851313819007295, 'samples': 3736064, 'steps': 7296, 'loss/train': 2.7153892517089844} +02/24/2022 11:01:29 - INFO - codeparrot_training - Step 7297: {'lr': 0.0004851258226921645, 'samples': 3736576, 'steps': 7297, 'loss/train': 2.5042052268981934} +02/24/2022 11:01:32 - INFO - codeparrot_training - Step 7298: {'lr': 0.0004851202624763952, 'samples': 3737088, 'steps': 7298, 'loss/train': 2.5344438552856445} +02/24/2022 11:01:38 - INFO - codeparrot_training - Step 7299: {'lr': 0.0004851147012534453, 'samples': 3737600, 'steps': 7299, 'loss/train': 2.899864673614502} +02/24/2022 11:01:41 - INFO - codeparrot_training - Step 7300: {'lr': 0.00048510913902333875, 'samples': 3738112, 'steps': 7300, 'loss/train': 1.7969651222229004} +02/24/2022 11:01:47 - INFO - codeparrot_training - Step 7301: {'lr': 0.0004851035757860992, 'samples': 3738624, 'steps': 7301, 'loss/train': 3.2442026138305664} +02/24/2022 11:01:50 - INFO - codeparrot_training - Step 7302: {'lr': 0.0004850980115417507, 'samples': 3739136, 'steps': 7302, 'loss/train': 2.2781991958618164} +02/24/2022 11:01:58 - INFO - codeparrot_training - Step 7303: {'lr': 0.0004850924462903169, 'samples': 3739648, 'steps': 7303, 'loss/train': 2.26617169380188} +02/24/2022 11:02:01 - INFO - codeparrot_training - Step 7304: {'lr': 0.0004850868800318218, 'samples': 3740160, 'steps': 7304, 'loss/train': 2.0905508995056152} +02/24/2022 11:02:07 - INFO - codeparrot_training - Step 7305: {'lr': 0.00048508131276628905, 'samples': 3740672, 'steps': 7305, 'loss/train': 1.6095443964004517} +02/24/2022 11:02:10 - INFO - codeparrot_training - Step 7306: {'lr': 0.0004850757444937426, 'samples': 3741184, 'steps': 7306, 'loss/train': 2.637087821960449} +02/24/2022 11:02:16 - INFO - codeparrot_training - Step 7307: {'lr': 0.00048507017521420636, 'samples': 3741696, 'steps': 7307, 'loss/train': 1.8647308349609375} +02/24/2022 11:02:19 - INFO - codeparrot_training - Step 7308: {'lr': 0.0004850646049277041, 'samples': 3742208, 'steps': 7308, 'loss/train': 2.1614322662353516} +02/24/2022 11:02:25 - INFO - codeparrot_training - Step 7309: {'lr': 0.00048505903363425974, 'samples': 3742720, 'steps': 7309, 'loss/train': 2.0116732120513916} +02/24/2022 11:02:28 - INFO - codeparrot_training - Step 7310: {'lr': 0.0004850534613338972, 'samples': 3743232, 'steps': 7310, 'loss/train': 4.1565775871276855} +02/24/2022 11:02:34 - INFO - codeparrot_training - Step 7311: {'lr': 0.00048504788802664013, 'samples': 3743744, 'steps': 7311, 'loss/train': 2.4235846996307373} +02/24/2022 11:02:37 - INFO - codeparrot_training - Step 7312: {'lr': 0.00048504231371251255, 'samples': 3744256, 'steps': 7312, 'loss/train': 1.5881248712539673} +02/24/2022 11:02:43 - INFO - codeparrot_training - Step 7313: {'lr': 0.0004850367383915384, 'samples': 3744768, 'steps': 7313, 'loss/train': 1.9319747686386108} +02/24/2022 11:02:46 - INFO - codeparrot_training - Step 7314: {'lr': 0.00048503116206374147, 'samples': 3745280, 'steps': 7314, 'loss/train': 4.7429680824279785} +02/24/2022 11:02:52 - INFO - codeparrot_training - Step 7315: {'lr': 0.00048502558472914573, 'samples': 3745792, 'steps': 7315, 'loss/train': 0.5526663064956665} +02/24/2022 11:02:55 - INFO - codeparrot_training - Step 7316: {'lr': 0.00048502000638777487, 'samples': 3746304, 'steps': 7316, 'loss/train': 3.684763193130493} +02/24/2022 11:03:01 - INFO - codeparrot_training - Step 7317: {'lr': 0.000485014427039653, 'samples': 3746816, 'steps': 7317, 'loss/train': 2.744223117828369} +02/24/2022 11:03:04 - INFO - codeparrot_training - Step 7318: {'lr': 0.00048500884668480407, 'samples': 3747328, 'steps': 7318, 'loss/train': 2.4987661838531494} +02/24/2022 11:03:12 - INFO - codeparrot_training - Step 7319: {'lr': 0.00048500326532325167, 'samples': 3747840, 'steps': 7319, 'loss/train': 2.580307960510254} +02/24/2022 11:03:15 - INFO - codeparrot_training - Step 7320: {'lr': 0.00048499768295502, 'samples': 3748352, 'steps': 7320, 'loss/train': 3.0824406147003174} +02/24/2022 11:03:21 - INFO - codeparrot_training - Step 7321: {'lr': 0.0004849920995801329, 'samples': 3748864, 'steps': 7321, 'loss/train': 2.096445322036743} +02/24/2022 11:03:24 - INFO - codeparrot_training - Step 7322: {'lr': 0.00048498651519861426, 'samples': 3749376, 'steps': 7322, 'loss/train': 2.05833101272583} +02/24/2022 11:03:30 - INFO - codeparrot_training - Step 7323: {'lr': 0.00048498092981048797, 'samples': 3749888, 'steps': 7323, 'loss/train': 1.8068619966506958} +02/24/2022 11:03:33 - INFO - codeparrot_training - Step 7324: {'lr': 0.000484975343415778, 'samples': 3750400, 'steps': 7324, 'loss/train': 2.3918323516845703} +02/24/2022 11:03:39 - INFO - codeparrot_training - Step 7325: {'lr': 0.00048496975601450835, 'samples': 3750912, 'steps': 7325, 'loss/train': 2.4769656658172607} +02/24/2022 11:03:42 - INFO - codeparrot_training - Step 7326: {'lr': 0.0004849641676067027, 'samples': 3751424, 'steps': 7326, 'loss/train': 2.3107967376708984} +02/24/2022 11:03:48 - INFO - codeparrot_training - Step 7327: {'lr': 0.0004849585781923853, 'samples': 3751936, 'steps': 7327, 'loss/train': 2.417308807373047} +02/24/2022 11:03:51 - INFO - codeparrot_training - Step 7328: {'lr': 0.00048495298777157994, 'samples': 3752448, 'steps': 7328, 'loss/train': 0.8975260853767395} +02/24/2022 11:03:59 - INFO - codeparrot_training - Step 7329: {'lr': 0.00048494739634431057, 'samples': 3752960, 'steps': 7329, 'loss/train': 1.2878053188323975} +02/24/2022 11:04:04 - INFO - codeparrot_training - Step 7330: {'lr': 0.00048494180391060114, 'samples': 3753472, 'steps': 7330, 'loss/train': 2.769775390625} +02/24/2022 11:04:08 - INFO - codeparrot_training - Step 7331: {'lr': 0.0004849362104704756, 'samples': 3753984, 'steps': 7331, 'loss/train': 1.1494196653366089} +02/24/2022 11:04:13 - INFO - codeparrot_training - Step 7332: {'lr': 0.00048493061602395803, 'samples': 3754496, 'steps': 7332, 'loss/train': 2.467677354812622} +02/24/2022 11:04:17 - INFO - codeparrot_training - Step 7333: {'lr': 0.0004849250205710722, 'samples': 3755008, 'steps': 7333, 'loss/train': 1.2712596654891968} +02/24/2022 11:04:22 - INFO - codeparrot_training - Step 7334: {'lr': 0.0004849194241118423, 'samples': 3755520, 'steps': 7334, 'loss/train': 0.40487274527549744} +02/24/2022 11:04:26 - INFO - codeparrot_training - Step 7335: {'lr': 0.0004849138266462921, 'samples': 3756032, 'steps': 7335, 'loss/train': 1.7233330011367798} +02/24/2022 11:04:31 - INFO - codeparrot_training - Step 7336: {'lr': 0.0004849082281744457, 'samples': 3756544, 'steps': 7336, 'loss/train': 2.4098403453826904} +02/24/2022 11:04:35 - INFO - codeparrot_training - Step 7337: {'lr': 0.00048490262869632693, 'samples': 3757056, 'steps': 7337, 'loss/train': 2.3194680213928223} +02/24/2022 11:04:42 - INFO - codeparrot_training - Step 7338: {'lr': 0.00048489702821196003, 'samples': 3757568, 'steps': 7338, 'loss/train': 1.9878824949264526} +02/24/2022 11:04:46 - INFO - codeparrot_training - Step 7339: {'lr': 0.0004848914267213688, 'samples': 3758080, 'steps': 7339, 'loss/train': 2.571725845336914} +02/24/2022 11:04:51 - INFO - codeparrot_training - Step 7340: {'lr': 0.00048488582422457726, 'samples': 3758592, 'steps': 7340, 'loss/train': 2.17806077003479} +02/24/2022 11:04:54 - INFO - codeparrot_training - Step 7341: {'lr': 0.0004848802207216094, 'samples': 3759104, 'steps': 7341, 'loss/train': 2.1164746284484863} +02/24/2022 11:05:00 - INFO - codeparrot_training - Step 7342: {'lr': 0.0004848746162124894, 'samples': 3759616, 'steps': 7342, 'loss/train': 1.9124101400375366} +02/24/2022 11:05:03 - INFO - codeparrot_training - Step 7343: {'lr': 0.00048486901069724097, 'samples': 3760128, 'steps': 7343, 'loss/train': 2.7095611095428467} +02/24/2022 11:05:09 - INFO - codeparrot_training - Step 7344: {'lr': 0.0004848634041758884, 'samples': 3760640, 'steps': 7344, 'loss/train': 1.5187373161315918} +02/24/2022 11:05:12 - INFO - codeparrot_training - Step 7345: {'lr': 0.00048485779664845553, 'samples': 3761152, 'steps': 7345, 'loss/train': 2.075495481491089} +02/24/2022 11:05:18 - INFO - codeparrot_training - Step 7346: {'lr': 0.0004848521881149664, 'samples': 3761664, 'steps': 7346, 'loss/train': 2.744697332382202} +02/24/2022 11:05:21 - INFO - codeparrot_training - Step 7347: {'lr': 0.00048484657857544513, 'samples': 3762176, 'steps': 7347, 'loss/train': 1.3915081024169922} +02/24/2022 11:05:29 - INFO - codeparrot_training - Step 7348: {'lr': 0.0004848409680299156, 'samples': 3762688, 'steps': 7348, 'loss/train': 1.8964418172836304} +02/24/2022 11:05:32 - INFO - codeparrot_training - Step 7349: {'lr': 0.00048483535647840206, 'samples': 3763200, 'steps': 7349, 'loss/train': 2.719987154006958} +02/24/2022 11:05:38 - INFO - codeparrot_training - Step 7350: {'lr': 0.00048482974392092827, 'samples': 3763712, 'steps': 7350, 'loss/train': 1.4325661659240723} +02/24/2022 11:05:41 - INFO - codeparrot_training - Step 7351: {'lr': 0.0004848241303575185, 'samples': 3764224, 'steps': 7351, 'loss/train': 2.5114574432373047} +02/24/2022 11:05:47 - INFO - codeparrot_training - Step 7352: {'lr': 0.0004848185157881968, 'samples': 3764736, 'steps': 7352, 'loss/train': 2.7234508991241455} +02/24/2022 11:05:50 - INFO - codeparrot_training - Step 7353: {'lr': 0.0004848129002129871, 'samples': 3765248, 'steps': 7353, 'loss/train': 1.158555030822754} +02/24/2022 11:05:56 - INFO - codeparrot_training - Step 7354: {'lr': 0.0004848072836319134, 'samples': 3765760, 'steps': 7354, 'loss/train': 2.1142032146453857} +02/24/2022 11:05:59 - INFO - codeparrot_training - Step 7355: {'lr': 0.000484801666045, 'samples': 3766272, 'steps': 7355, 'loss/train': 1.7687695026397705} +02/24/2022 11:06:04 - INFO - codeparrot_training - Step 7356: {'lr': 0.0004847960474522707, 'samples': 3766784, 'steps': 7356, 'loss/train': 3.242324113845825} +02/24/2022 11:06:08 - INFO - codeparrot_training - Step 7357: {'lr': 0.00048479042785374974, 'samples': 3767296, 'steps': 7357, 'loss/train': 1.59627366065979} +02/24/2022 11:06:13 - INFO - codeparrot_training - Step 7358: {'lr': 0.0004847848072494611, 'samples': 3767808, 'steps': 7358, 'loss/train': 2.9463582038879395} +02/24/2022 11:06:17 - INFO - codeparrot_training - Step 7359: {'lr': 0.0004847791856394289, 'samples': 3768320, 'steps': 7359, 'loss/train': 1.2039793729782104} +02/24/2022 11:06:23 - INFO - codeparrot_training - Step 7360: {'lr': 0.00048477356302367724, 'samples': 3768832, 'steps': 7360, 'loss/train': 2.4084179401397705} +02/24/2022 11:06:26 - INFO - codeparrot_training - Step 7361: {'lr': 0.00048476793940223026, 'samples': 3769344, 'steps': 7361, 'loss/train': 2.2719168663024902} +02/24/2022 11:06:32 - INFO - codeparrot_training - Step 7362: {'lr': 0.0004847623147751119, 'samples': 3769856, 'steps': 7362, 'loss/train': 2.434413433074951} +02/24/2022 11:06:35 - INFO - codeparrot_training - Step 7363: {'lr': 0.00048475668914234636, 'samples': 3770368, 'steps': 7363, 'loss/train': 2.3244428634643555} +02/24/2022 11:06:42 - INFO - codeparrot_training - Step 7364: {'lr': 0.0004847510625039577, 'samples': 3770880, 'steps': 7364, 'loss/train': 1.4526770114898682} +02/24/2022 11:06:46 - INFO - codeparrot_training - Step 7365: {'lr': 0.00048474543485997005, 'samples': 3771392, 'steps': 7365, 'loss/train': 0.6961601376533508} +02/24/2022 11:06:51 - INFO - codeparrot_training - Step 7366: {'lr': 0.00048473980621040744, 'samples': 3771904, 'steps': 7366, 'loss/train': 3.000654697418213} +02/24/2022 11:06:55 - INFO - codeparrot_training - Step 7367: {'lr': 0.00048473417655529405, 'samples': 3772416, 'steps': 7367, 'loss/train': 1.0492222309112549} +02/24/2022 11:07:00 - INFO - codeparrot_training - Step 7368: {'lr': 0.000484728545894654, 'samples': 3772928, 'steps': 7368, 'loss/train': 2.8229119777679443} +02/24/2022 11:07:04 - INFO - codeparrot_training - Step 7369: {'lr': 0.00048472291422851135, 'samples': 3773440, 'steps': 7369, 'loss/train': 2.4911160469055176} +02/24/2022 11:07:09 - INFO - codeparrot_training - Step 7370: {'lr': 0.00048471728155689034, 'samples': 3773952, 'steps': 7370, 'loss/train': 2.662477970123291} +02/24/2022 11:07:13 - INFO - codeparrot_training - Step 7371: {'lr': 0.000484711647879815, 'samples': 3774464, 'steps': 7371, 'loss/train': 2.269019842147827} +02/24/2022 11:07:18 - INFO - codeparrot_training - Step 7372: {'lr': 0.00048470601319730946, 'samples': 3774976, 'steps': 7372, 'loss/train': 2.9981753826141357} +02/24/2022 11:07:22 - INFO - codeparrot_training - Step 7373: {'lr': 0.00048470037750939795, 'samples': 3775488, 'steps': 7373, 'loss/train': 1.798475742340088} +02/24/2022 11:07:29 - INFO - codeparrot_training - Step 7374: {'lr': 0.0004846947408161045, 'samples': 3776000, 'steps': 7374, 'loss/train': 2.5420162677764893} +02/24/2022 11:07:32 - INFO - codeparrot_training - Step 7375: {'lr': 0.0004846891031174533, 'samples': 3776512, 'steps': 7375, 'loss/train': 1.373158574104309} +02/24/2022 11:07:38 - INFO - codeparrot_training - Step 7376: {'lr': 0.00048468346441346853, 'samples': 3777024, 'steps': 7376, 'loss/train': 2.904468059539795} +02/24/2022 11:07:41 - INFO - codeparrot_training - Step 7377: {'lr': 0.00048467782470417434, 'samples': 3777536, 'steps': 7377, 'loss/train': 1.616076111793518} +02/24/2022 11:07:47 - INFO - codeparrot_training - Step 7378: {'lr': 0.0004846721839895948, 'samples': 3778048, 'steps': 7378, 'loss/train': 2.5133163928985596} +02/24/2022 11:07:50 - INFO - codeparrot_training - Step 7379: {'lr': 0.00048466654226975414, 'samples': 3778560, 'steps': 7379, 'loss/train': 1.7621402740478516} +02/24/2022 11:07:56 - INFO - codeparrot_training - Step 7380: {'lr': 0.00048466089954467663, 'samples': 3779072, 'steps': 7380, 'loss/train': 2.6987743377685547} +02/24/2022 11:07:59 - INFO - codeparrot_training - Step 7381: {'lr': 0.0004846552558143863, 'samples': 3779584, 'steps': 7381, 'loss/train': 2.5966460704803467} +02/24/2022 11:08:05 - INFO - codeparrot_training - Step 7382: {'lr': 0.00048464961107890734, 'samples': 3780096, 'steps': 7382, 'loss/train': 2.0526061058044434} +02/24/2022 11:08:08 - INFO - codeparrot_training - Step 7383: {'lr': 0.00048464396533826396, 'samples': 3780608, 'steps': 7383, 'loss/train': 2.589766263961792} +02/24/2022 11:08:16 - INFO - codeparrot_training - Step 7384: {'lr': 0.0004846383185924803, 'samples': 3781120, 'steps': 7384, 'loss/train': 2.858475685119629} +02/24/2022 11:08:19 - INFO - codeparrot_training - Step 7385: {'lr': 0.0004846326708415806, 'samples': 3781632, 'steps': 7385, 'loss/train': 0.6818851232528687} +02/24/2022 11:08:25 - INFO - codeparrot_training - Step 7386: {'lr': 0.00048462702208558906, 'samples': 3782144, 'steps': 7386, 'loss/train': 2.251932144165039} +02/24/2022 11:08:28 - INFO - codeparrot_training - Step 7387: {'lr': 0.0004846213723245299, 'samples': 3782656, 'steps': 7387, 'loss/train': 3.389828681945801} +02/24/2022 11:08:34 - INFO - codeparrot_training - Step 7388: {'lr': 0.00048461572155842725, 'samples': 3783168, 'steps': 7388, 'loss/train': 3.451742649078369} +02/24/2022 11:08:37 - INFO - codeparrot_training - Step 7389: {'lr': 0.0004846100697873054, 'samples': 3783680, 'steps': 7389, 'loss/train': 2.0010650157928467} +02/24/2022 11:08:43 - INFO - codeparrot_training - Step 7390: {'lr': 0.0004846044170111884, 'samples': 3784192, 'steps': 7390, 'loss/train': 2.340823173522949} +02/24/2022 11:08:46 - INFO - codeparrot_training - Step 7391: {'lr': 0.00048459876323010063, 'samples': 3784704, 'steps': 7391, 'loss/train': 3.3236348628997803} +02/24/2022 11:08:52 - INFO - codeparrot_training - Step 7392: {'lr': 0.00048459310844406624, 'samples': 3785216, 'steps': 7392, 'loss/train': 1.8938822746276855} +02/24/2022 11:08:55 - INFO - codeparrot_training - Step 7393: {'lr': 0.0004845874526531095, 'samples': 3785728, 'steps': 7393, 'loss/train': 2.624589204788208} +02/24/2022 11:09:03 - INFO - codeparrot_training - Step 7394: {'lr': 0.0004845817958572546, 'samples': 3786240, 'steps': 7394, 'loss/train': 1.487290620803833} +02/24/2022 11:09:06 - INFO - codeparrot_training - Step 7395: {'lr': 0.0004845761380565257, 'samples': 3786752, 'steps': 7395, 'loss/train': 2.2486672401428223} +02/24/2022 11:09:12 - INFO - codeparrot_training - Step 7396: {'lr': 0.0004845704792509472, 'samples': 3787264, 'steps': 7396, 'loss/train': 1.6457959413528442} +02/24/2022 11:09:15 - INFO - codeparrot_training - Step 7397: {'lr': 0.0004845648194405432, 'samples': 3787776, 'steps': 7397, 'loss/train': 2.4682440757751465} +02/24/2022 11:09:21 - INFO - codeparrot_training - Step 7398: {'lr': 0.00048455915862533804, 'samples': 3788288, 'steps': 7398, 'loss/train': 3.274388551712036} +02/24/2022 11:09:24 - INFO - codeparrot_training - Step 7399: {'lr': 0.0004845534968053559, 'samples': 3788800, 'steps': 7399, 'loss/train': 2.3076016902923584} +02/24/2022 11:09:30 - INFO - codeparrot_training - Step 7400: {'lr': 0.0004845478339806211, 'samples': 3789312, 'steps': 7400, 'loss/train': 1.9895148277282715} +02/24/2022 11:09:33 - INFO - codeparrot_training - Step 7401: {'lr': 0.0004845421701511578, 'samples': 3789824, 'steps': 7401, 'loss/train': 3.1556906700134277} +02/24/2022 11:09:39 - INFO - codeparrot_training - Step 7402: {'lr': 0.0004845365053169903, 'samples': 3790336, 'steps': 7402, 'loss/train': 1.161543846130371} +02/24/2022 11:09:42 - INFO - codeparrot_training - Step 7403: {'lr': 0.0004845308394781429, 'samples': 3790848, 'steps': 7403, 'loss/train': 1.674891471862793} +02/24/2022 11:09:48 - INFO - codeparrot_training - Step 7404: {'lr': 0.0004845251726346399, 'samples': 3791360, 'steps': 7404, 'loss/train': 1.5679104328155518} +02/24/2022 11:09:51 - INFO - codeparrot_training - Step 7405: {'lr': 0.0004845195047865055, 'samples': 3791872, 'steps': 7405, 'loss/train': 3.752072811126709} +02/24/2022 11:09:57 - INFO - codeparrot_training - Step 7406: {'lr': 0.00048451383593376394, 'samples': 3792384, 'steps': 7406, 'loss/train': 2.251842498779297} +02/24/2022 11:10:00 - INFO - codeparrot_training - Step 7407: {'lr': 0.0004845081660764397, 'samples': 3792896, 'steps': 7407, 'loss/train': 2.050541639328003} +02/24/2022 11:10:06 - INFO - codeparrot_training - Step 7408: {'lr': 0.0004845024952145569, 'samples': 3793408, 'steps': 7408, 'loss/train': 3.354140520095825} +02/24/2022 11:10:09 - INFO - codeparrot_training - Step 7409: {'lr': 0.00048449682334813983, 'samples': 3793920, 'steps': 7409, 'loss/train': 2.8606581687927246} +02/24/2022 11:10:17 - INFO - codeparrot_training - Step 7410: {'lr': 0.00048449115047721286, 'samples': 3794432, 'steps': 7410, 'loss/train': 1.5060209035873413} +02/24/2022 11:10:21 - INFO - codeparrot_training - Step 7411: {'lr': 0.00048448547660180034, 'samples': 3794944, 'steps': 7411, 'loss/train': 2.3049936294555664} +02/24/2022 11:10:26 - INFO - codeparrot_training - Step 7412: {'lr': 0.0004844798017219264, 'samples': 3795456, 'steps': 7412, 'loss/train': 2.2139499187469482} +02/24/2022 11:10:30 - INFO - codeparrot_training - Step 7413: {'lr': 0.00048447412583761543, 'samples': 3795968, 'steps': 7413, 'loss/train': 2.282728910446167} +02/24/2022 11:10:35 - INFO - codeparrot_training - Step 7414: {'lr': 0.00048446844894889173, 'samples': 3796480, 'steps': 7414, 'loss/train': 3.0450706481933594} +02/24/2022 11:10:39 - INFO - codeparrot_training - Step 7415: {'lr': 0.00048446277105577973, 'samples': 3796992, 'steps': 7415, 'loss/train': 0.5314150452613831} +02/24/2022 11:10:44 - INFO - codeparrot_training - Step 7416: {'lr': 0.0004844570921583037, 'samples': 3797504, 'steps': 7416, 'loss/train': 4.11193323135376} +02/24/2022 11:10:48 - INFO - codeparrot_training - Step 7417: {'lr': 0.00048445141225648785, 'samples': 3798016, 'steps': 7417, 'loss/train': 1.738006591796875} +02/24/2022 11:10:53 - INFO - codeparrot_training - Step 7418: {'lr': 0.00048444573135035665, 'samples': 3798528, 'steps': 7418, 'loss/train': 2.838282346725464} +02/24/2022 11:10:57 - INFO - codeparrot_training - Step 7419: {'lr': 0.00048444004943993434, 'samples': 3799040, 'steps': 7419, 'loss/train': 2.3490233421325684} +02/24/2022 11:11:05 - INFO - codeparrot_training - Step 7420: {'lr': 0.0004844343665252453, 'samples': 3799552, 'steps': 7420, 'loss/train': 2.526664972305298} +02/24/2022 11:11:08 - INFO - codeparrot_training - Step 7421: {'lr': 0.0004844286826063139, 'samples': 3800064, 'steps': 7421, 'loss/train': 2.28163480758667} +02/24/2022 11:11:12 - INFO - codeparrot_training - Step 7422: {'lr': 0.0004844229976831645, 'samples': 3800576, 'steps': 7422, 'loss/train': 2.114687919616699} +02/24/2022 11:11:17 - INFO - codeparrot_training - Step 7423: {'lr': 0.00048441731175582136, 'samples': 3801088, 'steps': 7423, 'loss/train': 2.468702554702759} +02/24/2022 11:11:21 - INFO - codeparrot_training - Step 7424: {'lr': 0.0004844116248243089, 'samples': 3801600, 'steps': 7424, 'loss/train': 2.777743101119995} +02/24/2022 11:11:26 - INFO - codeparrot_training - Step 7425: {'lr': 0.00048440593688865155, 'samples': 3802112, 'steps': 7425, 'loss/train': 2.7870445251464844} +02/24/2022 11:11:30 - INFO - codeparrot_training - Step 7426: {'lr': 0.0004844002479488735, 'samples': 3802624, 'steps': 7426, 'loss/train': 1.713482141494751} +02/24/2022 11:11:35 - INFO - codeparrot_training - Step 7427: {'lr': 0.0004843945580049992, 'samples': 3803136, 'steps': 7427, 'loss/train': 0.9529287219047546} +02/24/2022 11:11:39 - INFO - codeparrot_training - Step 7428: {'lr': 0.0004843888670570531, 'samples': 3803648, 'steps': 7428, 'loss/train': 0.5740416049957275} +02/24/2022 11:11:44 - INFO - codeparrot_training - Step 7429: {'lr': 0.00048438317510505954, 'samples': 3804160, 'steps': 7429, 'loss/train': 1.3869069814682007} +02/24/2022 11:11:48 - INFO - codeparrot_training - Step 7430: {'lr': 0.0004843774821490429, 'samples': 3804672, 'steps': 7430, 'loss/train': 2.052302837371826} +02/24/2022 11:11:55 - INFO - codeparrot_training - Step 7431: {'lr': 0.0004843717881890275, 'samples': 3805184, 'steps': 7431, 'loss/train': 3.2143967151641846} +02/24/2022 11:11:59 - INFO - codeparrot_training - Step 7432: {'lr': 0.0004843660932250378, 'samples': 3805696, 'steps': 7432, 'loss/train': 2.6610870361328125} +02/24/2022 11:12:04 - INFO - codeparrot_training - Step 7433: {'lr': 0.0004843603972570981, 'samples': 3806208, 'steps': 7433, 'loss/train': 0.7759596109390259} +02/24/2022 11:12:08 - INFO - codeparrot_training - Step 7434: {'lr': 0.00048435470028523295, 'samples': 3806720, 'steps': 7434, 'loss/train': 2.1584222316741943} +02/24/2022 11:12:13 - INFO - codeparrot_training - Step 7435: {'lr': 0.00048434900230946666, 'samples': 3807232, 'steps': 7435, 'loss/train': 2.5876851081848145} +02/24/2022 11:12:16 - INFO - codeparrot_training - Step 7436: {'lr': 0.0004843433033298237, 'samples': 3807744, 'steps': 7436, 'loss/train': 1.3958079814910889} +02/24/2022 11:12:22 - INFO - codeparrot_training - Step 7437: {'lr': 0.00048433760334632835, 'samples': 3808256, 'steps': 7437, 'loss/train': 2.6977572441101074} +02/24/2022 11:12:25 - INFO - codeparrot_training - Step 7438: {'lr': 0.0004843319023590052, 'samples': 3808768, 'steps': 7438, 'loss/train': 1.7582217454910278} +02/24/2022 11:12:31 - INFO - codeparrot_training - Step 7439: {'lr': 0.0004843262003678786, 'samples': 3809280, 'steps': 7439, 'loss/train': 1.7335035800933838} +02/24/2022 11:12:34 - INFO - codeparrot_training - Step 7440: {'lr': 0.0004843204973729729, 'samples': 3809792, 'steps': 7440, 'loss/train': 2.367358922958374} +02/24/2022 11:12:42 - INFO - codeparrot_training - Step 7441: {'lr': 0.0004843147933743126, 'samples': 3810304, 'steps': 7441, 'loss/train': 3.0420961380004883} +02/24/2022 11:12:45 - INFO - codeparrot_training - Step 7442: {'lr': 0.0004843090883719222, 'samples': 3810816, 'steps': 7442, 'loss/train': 1.9123307466506958} +02/24/2022 11:12:51 - INFO - codeparrot_training - Step 7443: {'lr': 0.00048430338236582596, 'samples': 3811328, 'steps': 7443, 'loss/train': 1.2915576696395874} +02/24/2022 11:12:57 - INFO - codeparrot_training - Step 7444: {'lr': 0.0004842976753560485, 'samples': 3811840, 'steps': 7444, 'loss/train': 2.6454339027404785} +02/24/2022 11:13:00 - INFO - codeparrot_training - Step 7445: {'lr': 0.00048429196734261413, 'samples': 3812352, 'steps': 7445, 'loss/train': 0.4806378483772278} +02/24/2022 11:13:03 - INFO - codeparrot_training - Step 7446: {'lr': 0.00048428625832554754, 'samples': 3812864, 'steps': 7446, 'loss/train': 4.000292778015137} +02/24/2022 11:13:09 - INFO - codeparrot_training - Step 7447: {'lr': 0.0004842805483048728, 'samples': 3813376, 'steps': 7447, 'loss/train': 2.2990851402282715} +02/24/2022 11:13:13 - INFO - codeparrot_training - Step 7448: {'lr': 0.0004842748372806147, 'samples': 3813888, 'steps': 7448, 'loss/train': 2.6723527908325195} +02/24/2022 11:13:18 - INFO - codeparrot_training - Step 7449: {'lr': 0.0004842691252527976, 'samples': 3814400, 'steps': 7449, 'loss/train': 2.4262754917144775} +02/24/2022 11:13:21 - INFO - codeparrot_training - Step 7450: {'lr': 0.00048426341222144586, 'samples': 3814912, 'steps': 7450, 'loss/train': 1.598927617073059} +02/24/2022 11:13:27 - INFO - codeparrot_training - Step 7451: {'lr': 0.00048425769818658416, 'samples': 3815424, 'steps': 7451, 'loss/train': 1.358352541923523} +02/24/2022 11:13:31 - INFO - codeparrot_training - Step 7452: {'lr': 0.0004842519831482368, 'samples': 3815936, 'steps': 7452, 'loss/train': 2.3954668045043945} +02/24/2022 11:13:36 - INFO - codeparrot_training - Step 7453: {'lr': 0.00048424626710642836, 'samples': 3816448, 'steps': 7453, 'loss/train': 1.1535263061523438} +02/24/2022 11:13:40 - INFO - codeparrot_training - Step 7454: {'lr': 0.0004842405500611833, 'samples': 3816960, 'steps': 7454, 'loss/train': 3.4303042888641357} +02/24/2022 11:13:45 - INFO - codeparrot_training - Step 7455: {'lr': 0.00048423483201252604, 'samples': 3817472, 'steps': 7455, 'loss/train': 2.8033041954040527} +02/24/2022 11:13:49 - INFO - codeparrot_training - Step 7456: {'lr': 0.0004842291129604812, 'samples': 3817984, 'steps': 7456, 'loss/train': 2.044224262237549} +02/24/2022 11:13:56 - INFO - codeparrot_training - Step 7457: {'lr': 0.0004842233929050732, 'samples': 3818496, 'steps': 7457, 'loss/train': 2.26042103767395} +02/24/2022 11:14:00 - INFO - codeparrot_training - Step 7458: {'lr': 0.00048421767184632657, 'samples': 3819008, 'steps': 7458, 'loss/train': 1.8025332689285278} +02/24/2022 11:14:05 - INFO - codeparrot_training - Step 7459: {'lr': 0.00048421194978426574, 'samples': 3819520, 'steps': 7459, 'loss/train': 2.354947328567505} +02/24/2022 11:14:09 - INFO - codeparrot_training - Step 7460: {'lr': 0.00048420622671891533, 'samples': 3820032, 'steps': 7460, 'loss/train': 1.2694612741470337} +02/24/2022 11:14:14 - INFO - codeparrot_training - Step 7461: {'lr': 0.0004842005026502999, 'samples': 3820544, 'steps': 7461, 'loss/train': 3.551039457321167} +02/24/2022 11:14:18 - INFO - codeparrot_training - Step 7462: {'lr': 0.00048419477757844376, 'samples': 3821056, 'steps': 7462, 'loss/train': 2.7074220180511475} +02/24/2022 11:14:23 - INFO - codeparrot_training - Step 7463: {'lr': 0.00048418905150337166, 'samples': 3821568, 'steps': 7463, 'loss/train': 2.501370906829834} +02/24/2022 11:14:27 - INFO - codeparrot_training - Step 7464: {'lr': 0.00048418332442510794, 'samples': 3822080, 'steps': 7464, 'loss/train': 1.9361931085586548} +02/24/2022 11:14:32 - INFO - codeparrot_training - Step 7465: {'lr': 0.00048417759634367726, 'samples': 3822592, 'steps': 7465, 'loss/train': 2.5320217609405518} +02/24/2022 11:14:36 - INFO - codeparrot_training - Step 7466: {'lr': 0.00048417186725910414, 'samples': 3823104, 'steps': 7466, 'loss/train': 2.402559280395508} +02/24/2022 11:14:43 - INFO - codeparrot_training - Step 7467: {'lr': 0.000484166137171413, 'samples': 3823616, 'steps': 7467, 'loss/train': 2.707710027694702} +02/24/2022 11:14:46 - INFO - codeparrot_training - Step 7468: {'lr': 0.0004841604060806286, 'samples': 3824128, 'steps': 7468, 'loss/train': 3.385481595993042} +02/24/2022 11:14:52 - INFO - codeparrot_training - Step 7469: {'lr': 0.00048415467398677534, 'samples': 3824640, 'steps': 7469, 'loss/train': 1.0140942335128784} +02/24/2022 11:14:55 - INFO - codeparrot_training - Step 7470: {'lr': 0.0004841489408898778, 'samples': 3825152, 'steps': 7470, 'loss/train': 2.7896058559417725} +02/24/2022 11:15:01 - INFO - codeparrot_training - Step 7471: {'lr': 0.0004841432067899605, 'samples': 3825664, 'steps': 7471, 'loss/train': 2.2625505924224854} +02/24/2022 11:15:04 - INFO - codeparrot_training - Step 7472: {'lr': 0.0004841374716870481, 'samples': 3826176, 'steps': 7472, 'loss/train': 2.1733639240264893} +02/24/2022 11:15:10 - INFO - codeparrot_training - Step 7473: {'lr': 0.0004841317355811651, 'samples': 3826688, 'steps': 7473, 'loss/train': 2.0763137340545654} +02/24/2022 11:15:15 - INFO - codeparrot_training - Step 7474: {'lr': 0.00048412599847233613, 'samples': 3827200, 'steps': 7474, 'loss/train': 2.003067970275879} +02/24/2022 11:15:19 - INFO - codeparrot_training - Step 7475: {'lr': 0.0004841202603605857, 'samples': 3827712, 'steps': 7475, 'loss/train': 1.6955280303955078} +02/24/2022 11:15:26 - INFO - codeparrot_training - Step 7476: {'lr': 0.0004841145212459384, 'samples': 3828224, 'steps': 7476, 'loss/train': 2.7507684230804443} +02/24/2022 11:15:30 - INFO - codeparrot_training - Step 7477: {'lr': 0.0004841087811284188, 'samples': 3828736, 'steps': 7477, 'loss/train': 2.3904590606689453} +02/24/2022 11:15:35 - INFO - codeparrot_training - Step 7478: {'lr': 0.0004841030400080516, 'samples': 3829248, 'steps': 7478, 'loss/train': 2.1411893367767334} +02/24/2022 11:15:39 - INFO - codeparrot_training - Step 7479: {'lr': 0.00048409729788486127, 'samples': 3829760, 'steps': 7479, 'loss/train': 2.5110843181610107} +02/24/2022 11:15:42 - INFO - codeparrot_training - Step 7480: {'lr': 0.00048409155475887244, 'samples': 3830272, 'steps': 7480, 'loss/train': 0.2696789503097534} +02/24/2022 11:15:48 - INFO - codeparrot_training - Step 7481: {'lr': 0.00048408581063010973, 'samples': 3830784, 'steps': 7481, 'loss/train': 1.666459083557129} +02/24/2022 11:15:51 - INFO - codeparrot_training - Step 7482: {'lr': 0.00048408006549859777, 'samples': 3831296, 'steps': 7482, 'loss/train': 2.4707143306732178} +02/24/2022 11:15:57 - INFO - codeparrot_training - Step 7483: {'lr': 0.00048407431936436116, 'samples': 3831808, 'steps': 7483, 'loss/train': 1.0383611917495728} +02/24/2022 11:16:00 - INFO - codeparrot_training - Step 7484: {'lr': 0.0004840685722274244, 'samples': 3832320, 'steps': 7484, 'loss/train': 0.20521792769432068} +02/24/2022 11:16:06 - INFO - codeparrot_training - Step 7485: {'lr': 0.00048406282408781226, 'samples': 3832832, 'steps': 7485, 'loss/train': 2.5948879718780518} +02/24/2022 11:16:09 - INFO - codeparrot_training - Step 7486: {'lr': 0.0004840570749455493, 'samples': 3833344, 'steps': 7486, 'loss/train': 1.5987448692321777} +02/24/2022 11:16:16 - INFO - codeparrot_training - Step 7487: {'lr': 0.00048405132480066015, 'samples': 3833856, 'steps': 7487, 'loss/train': 2.2542054653167725} +02/24/2022 11:16:20 - INFO - codeparrot_training - Step 7488: {'lr': 0.00048404557365316946, 'samples': 3834368, 'steps': 7488, 'loss/train': 2.9628989696502686} +02/24/2022 11:16:25 - INFO - codeparrot_training - Step 7489: {'lr': 0.00048403982150310184, 'samples': 3834880, 'steps': 7489, 'loss/train': 0.8160163760185242} +02/24/2022 11:16:29 - INFO - codeparrot_training - Step 7490: {'lr': 0.0004840340683504819, 'samples': 3835392, 'steps': 7490, 'loss/train': 0.9846836924552917} +02/24/2022 11:16:35 - INFO - codeparrot_training - Step 7491: {'lr': 0.0004840283141953343, 'samples': 3835904, 'steps': 7491, 'loss/train': 3.9652154445648193} +02/24/2022 11:16:38 - INFO - codeparrot_training - Step 7492: {'lr': 0.0004840225590376839, 'samples': 3836416, 'steps': 7492, 'loss/train': 2.300816297531128} +02/24/2022 11:16:44 - INFO - codeparrot_training - Step 7493: {'lr': 0.000484016802877555, 'samples': 3836928, 'steps': 7493, 'loss/train': 1.4028760194778442} +02/24/2022 11:16:47 - INFO - codeparrot_training - Step 7494: {'lr': 0.00048401104571497245, 'samples': 3837440, 'steps': 7494, 'loss/train': 2.2573134899139404} +02/24/2022 11:16:53 - INFO - codeparrot_training - Step 7495: {'lr': 0.00048400528754996086, 'samples': 3837952, 'steps': 7495, 'loss/train': 2.2583260536193848} +02/24/2022 11:16:56 - INFO - codeparrot_training - Step 7496: {'lr': 0.000483999528382545, 'samples': 3838464, 'steps': 7496, 'loss/train': 4.183949947357178} +02/24/2022 11:17:03 - INFO - codeparrot_training - Step 7497: {'lr': 0.00048399376821274943, 'samples': 3838976, 'steps': 7497, 'loss/train': 1.9686617851257324} +02/24/2022 11:17:07 - INFO - codeparrot_training - Step 7498: {'lr': 0.00048398800704059887, 'samples': 3839488, 'steps': 7498, 'loss/train': 2.413862466812134} +02/24/2022 11:17:13 - INFO - codeparrot_training - Step 7499: {'lr': 0.000483982244866118, 'samples': 3840000, 'steps': 7499, 'loss/train': 1.8729325532913208} +02/24/2022 11:17:16 - INFO - codeparrot_training - Step 7500: {'lr': 0.00048397648168933144, 'samples': 3840512, 'steps': 7500, 'loss/train': 1.2530790567398071} +02/24/2022 11:17:22 - INFO - codeparrot_training - Step 7501: {'lr': 0.00048397071751026395, 'samples': 3841024, 'steps': 7501, 'loss/train': 1.9784280061721802} +02/24/2022 11:17:25 - INFO - codeparrot_training - Step 7502: {'lr': 0.00048396495232894024, 'samples': 3841536, 'steps': 7502, 'loss/train': 2.662201166152954} +02/24/2022 11:17:29 - INFO - codeparrot_training - Step 7503: {'lr': 0.0004839591861453849, 'samples': 3842048, 'steps': 7503, 'loss/train': 8.562796592712402} +02/24/2022 11:17:34 - INFO - codeparrot_training - Step 7504: {'lr': 0.00048395341895962277, 'samples': 3842560, 'steps': 7504, 'loss/train': 2.5060017108917236} +02/24/2022 11:17:38 - INFO - codeparrot_training - Step 7505: {'lr': 0.0004839476507716784, 'samples': 3843072, 'steps': 7505, 'loss/train': 2.0978822708129883} +02/24/2022 11:17:43 - INFO - codeparrot_training - Step 7506: {'lr': 0.0004839418815815766, 'samples': 3843584, 'steps': 7506, 'loss/train': 3.61733078956604} +02/24/2022 11:17:47 - INFO - codeparrot_training - Step 7507: {'lr': 0.0004839361113893421, 'samples': 3844096, 'steps': 7507, 'loss/train': 2.0031683444976807} +02/24/2022 11:17:52 - INFO - codeparrot_training - Step 7508: {'lr': 0.0004839303401949996, 'samples': 3844608, 'steps': 7508, 'loss/train': 1.562530517578125} +02/24/2022 11:17:56 - INFO - codeparrot_training - Step 7509: {'lr': 0.00048392456799857374, 'samples': 3845120, 'steps': 7509, 'loss/train': 2.808486223220825} +02/24/2022 11:18:01 - INFO - codeparrot_training - Step 7510: {'lr': 0.0004839187948000893, 'samples': 3845632, 'steps': 7510, 'loss/train': 2.772757053375244} +02/24/2022 11:18:05 - INFO - codeparrot_training - Step 7511: {'lr': 0.0004839130205995711, 'samples': 3846144, 'steps': 7511, 'loss/train': 2.466223955154419} +02/24/2022 11:18:10 - INFO - codeparrot_training - Step 7512: {'lr': 0.0004839072453970438, 'samples': 3846656, 'steps': 7512, 'loss/train': 0.5400563478469849} +02/24/2022 11:18:14 - INFO - codeparrot_training - Step 7513: {'lr': 0.00048390146919253206, 'samples': 3847168, 'steps': 7513, 'loss/train': 2.3244948387145996} +02/24/2022 11:18:21 - INFO - codeparrot_training - Step 7514: {'lr': 0.0004838956919860607, 'samples': 3847680, 'steps': 7514, 'loss/train': 2.395681858062744} +02/24/2022 11:18:27 - INFO - codeparrot_training - Step 7515: {'lr': 0.0004838899137776545, 'samples': 3848192, 'steps': 7515, 'loss/train': 1.977725625038147} +02/24/2022 11:18:30 - INFO - codeparrot_training - Step 7516: {'lr': 0.00048388413456733814, 'samples': 3848704, 'steps': 7516, 'loss/train': 3.4100310802459717} +02/24/2022 11:18:34 - INFO - codeparrot_training - Step 7517: {'lr': 0.0004838783543551365, 'samples': 3849216, 'steps': 7517, 'loss/train': 2.434039354324341} +02/24/2022 11:18:39 - INFO - codeparrot_training - Step 7518: {'lr': 0.0004838725731410742, 'samples': 3849728, 'steps': 7518, 'loss/train': 0.9382692575454712} +02/24/2022 11:18:43 - INFO - codeparrot_training - Step 7519: {'lr': 0.00048386679092517605, 'samples': 3850240, 'steps': 7519, 'loss/train': 1.0077052116394043} +02/24/2022 11:18:48 - INFO - codeparrot_training - Step 7520: {'lr': 0.00048386100770746686, 'samples': 3850752, 'steps': 7520, 'loss/train': 1.7812910079956055} +02/24/2022 11:18:52 - INFO - codeparrot_training - Step 7521: {'lr': 0.00048385522348797134, 'samples': 3851264, 'steps': 7521, 'loss/train': 2.62276291847229} +02/24/2022 11:18:58 - INFO - codeparrot_training - Step 7522: {'lr': 0.0004838494382667143, 'samples': 3851776, 'steps': 7522, 'loss/train': 1.857709288597107} +02/24/2022 11:19:01 - INFO - codeparrot_training - Step 7523: {'lr': 0.0004838436520437205, 'samples': 3852288, 'steps': 7523, 'loss/train': 3.001187562942505} +02/24/2022 11:19:08 - INFO - codeparrot_training - Step 7524: {'lr': 0.00048383786481901483, 'samples': 3852800, 'steps': 7524, 'loss/train': 2.8849003314971924} +02/24/2022 11:19:12 - INFO - codeparrot_training - Step 7525: {'lr': 0.00048383207659262196, 'samples': 3853312, 'steps': 7525, 'loss/train': 3.820636034011841} +02/24/2022 11:19:17 - INFO - codeparrot_training - Step 7526: {'lr': 0.0004838262873645667, 'samples': 3853824, 'steps': 7526, 'loss/train': 1.8931299448013306} +02/24/2022 11:19:21 - INFO - codeparrot_training - Step 7527: {'lr': 0.00048382049713487383, 'samples': 3854336, 'steps': 7527, 'loss/train': 9.131762504577637} +02/24/2022 11:19:26 - INFO - codeparrot_training - Step 7528: {'lr': 0.00048381470590356835, 'samples': 3854848, 'steps': 7528, 'loss/train': 3.472043752670288} +02/24/2022 11:19:30 - INFO - codeparrot_training - Step 7529: {'lr': 0.00048380891367067483, 'samples': 3855360, 'steps': 7529, 'loss/train': 1.9659978151321411} +02/24/2022 11:19:35 - INFO - codeparrot_training - Step 7530: {'lr': 0.0004838031204362181, 'samples': 3855872, 'steps': 7530, 'loss/train': 2.6228482723236084} +02/24/2022 11:19:39 - INFO - codeparrot_training - Step 7531: {'lr': 0.0004837973262002231, 'samples': 3856384, 'steps': 7531, 'loss/train': 3.1839160919189453} +02/24/2022 11:19:44 - INFO - codeparrot_training - Step 7532: {'lr': 0.0004837915309627146, 'samples': 3856896, 'steps': 7532, 'loss/train': 2.2843639850616455} +02/24/2022 11:19:48 - INFO - codeparrot_training - Step 7533: {'lr': 0.00048378573472371744, 'samples': 3857408, 'steps': 7533, 'loss/train': 2.2604622840881348} +02/24/2022 11:19:53 - INFO - codeparrot_training - Step 7534: {'lr': 0.0004837799374832564, 'samples': 3857920, 'steps': 7534, 'loss/train': 1.4056577682495117} +02/24/2022 11:19:57 - INFO - codeparrot_training - Step 7535: {'lr': 0.0004837741392413563, 'samples': 3858432, 'steps': 7535, 'loss/train': 2.7577364444732666} +02/24/2022 11:20:03 - INFO - codeparrot_training - Step 7536: {'lr': 0.000483768339998042, 'samples': 3858944, 'steps': 7536, 'loss/train': 1.8355239629745483} +02/24/2022 11:20:06 - INFO - codeparrot_training - Step 7537: {'lr': 0.0004837625397533385, 'samples': 3859456, 'steps': 7537, 'loss/train': 1.8684333562850952} +02/24/2022 11:20:12 - INFO - codeparrot_training - Step 7538: {'lr': 0.00048375673850727043, 'samples': 3859968, 'steps': 7538, 'loss/train': 2.1198551654815674} +02/24/2022 11:20:15 - INFO - codeparrot_training - Step 7539: {'lr': 0.00048375093625986274, 'samples': 3860480, 'steps': 7539, 'loss/train': 2.906947135925293} +02/24/2022 11:20:21 - INFO - codeparrot_training - Step 7540: {'lr': 0.0004837451330111402, 'samples': 3860992, 'steps': 7540, 'loss/train': 1.2826128005981445} +02/24/2022 11:20:25 - INFO - codeparrot_training - Step 7541: {'lr': 0.0004837393287611278, 'samples': 3861504, 'steps': 7541, 'loss/train': 2.5487778186798096} +02/24/2022 11:20:30 - INFO - codeparrot_training - Step 7542: {'lr': 0.0004837335235098503, 'samples': 3862016, 'steps': 7542, 'loss/train': 1.4793578386306763} +02/24/2022 11:20:34 - INFO - codeparrot_training - Step 7543: {'lr': 0.0004837277172573326, 'samples': 3862528, 'steps': 7543, 'loss/train': 2.404090166091919} +02/24/2022 11:20:39 - INFO - codeparrot_training - Step 7544: {'lr': 0.00048372191000359955, 'samples': 3863040, 'steps': 7544, 'loss/train': 2.2764785289764404} +02/24/2022 11:20:43 - INFO - codeparrot_training - Step 7545: {'lr': 0.00048371610174867614, 'samples': 3863552, 'steps': 7545, 'loss/train': 2.0950586795806885} +02/24/2022 11:20:48 - INFO - codeparrot_training - Step 7546: {'lr': 0.00048371029249258716, 'samples': 3864064, 'steps': 7546, 'loss/train': 2.016496181488037} +02/24/2022 11:20:52 - INFO - codeparrot_training - Step 7547: {'lr': 0.0004837044822353574, 'samples': 3864576, 'steps': 7547, 'loss/train': 3.632533550262451} +02/24/2022 11:20:57 - INFO - codeparrot_training - Step 7548: {'lr': 0.0004836986709770119, 'samples': 3865088, 'steps': 7548, 'loss/train': 1.8181231021881104} +02/24/2022 11:21:01 - INFO - codeparrot_training - Step 7549: {'lr': 0.00048369285871757554, 'samples': 3865600, 'steps': 7549, 'loss/train': 1.7309651374816895} +02/24/2022 11:21:07 - INFO - codeparrot_training - Step 7550: {'lr': 0.0004836870454570731, 'samples': 3866112, 'steps': 7550, 'loss/train': 2.7915351390838623} +02/24/2022 11:21:10 - INFO - codeparrot_training - Step 7551: {'lr': 0.00048368123119552965, 'samples': 3866624, 'steps': 7551, 'loss/train': 2.872307300567627} +02/24/2022 11:21:16 - INFO - codeparrot_training - Step 7552: {'lr': 0.00048367541593296996, 'samples': 3867136, 'steps': 7552, 'loss/train': 2.6407103538513184} +02/24/2022 11:21:19 - INFO - codeparrot_training - Step 7553: {'lr': 0.00048366959966941893, 'samples': 3867648, 'steps': 7553, 'loss/train': 2.058753728866577} +02/24/2022 11:21:25 - INFO - codeparrot_training - Step 7554: {'lr': 0.0004836637824049016, 'samples': 3868160, 'steps': 7554, 'loss/train': 1.3336979150772095} +02/24/2022 11:21:28 - INFO - codeparrot_training - Step 7555: {'lr': 0.00048365796413944284, 'samples': 3868672, 'steps': 7555, 'loss/train': 1.9933503866195679} +02/24/2022 11:21:34 - INFO - codeparrot_training - Step 7556: {'lr': 0.00048365214487306753, 'samples': 3869184, 'steps': 7556, 'loss/train': 1.4395159482955933} +02/24/2022 11:21:37 - INFO - codeparrot_training - Step 7557: {'lr': 0.0004836463246058006, 'samples': 3869696, 'steps': 7557, 'loss/train': 1.4961059093475342} +02/24/2022 11:21:43 - INFO - codeparrot_training - Step 7558: {'lr': 0.0004836405033376671, 'samples': 3870208, 'steps': 7558, 'loss/train': 2.4053282737731934} +02/24/2022 11:21:49 - INFO - codeparrot_training - Step 7559: {'lr': 0.00048363468106869177, 'samples': 3870720, 'steps': 7559, 'loss/train': 3.01051664352417} +02/24/2022 11:21:52 - INFO - codeparrot_training - Step 7560: {'lr': 0.00048362885779889967, 'samples': 3871232, 'steps': 7560, 'loss/train': 1.41871178150177} +02/24/2022 11:21:58 - INFO - codeparrot_training - Step 7561: {'lr': 0.0004836230335283158, 'samples': 3871744, 'steps': 7561, 'loss/train': 0.9912055134773254} +02/24/2022 11:22:01 - INFO - codeparrot_training - Step 7562: {'lr': 0.00048361720825696494, 'samples': 3872256, 'steps': 7562, 'loss/train': 2.072070837020874} +02/24/2022 11:22:07 - INFO - codeparrot_training - Step 7563: {'lr': 0.0004836113819848722, 'samples': 3872768, 'steps': 7563, 'loss/train': 2.1366806030273438} +02/24/2022 11:22:10 - INFO - codeparrot_training - Step 7564: {'lr': 0.0004836055547120625, 'samples': 3873280, 'steps': 7564, 'loss/train': 1.8939872980117798} +02/24/2022 11:22:16 - INFO - codeparrot_training - Step 7565: {'lr': 0.0004835997264385607, 'samples': 3873792, 'steps': 7565, 'loss/train': 1.8421449661254883} +02/24/2022 11:22:19 - INFO - codeparrot_training - Step 7566: {'lr': 0.0004835938971643919, 'samples': 3874304, 'steps': 7566, 'loss/train': 2.350156545639038} +02/24/2022 11:22:25 - INFO - codeparrot_training - Step 7567: {'lr': 0.000483588066889581, 'samples': 3874816, 'steps': 7567, 'loss/train': 3.722226858139038} +02/24/2022 11:22:28 - INFO - codeparrot_training - Step 7568: {'lr': 0.00048358223561415306, 'samples': 3875328, 'steps': 7568, 'loss/train': 2.0755059719085693} +02/24/2022 11:22:35 - INFO - codeparrot_training - Step 7569: {'lr': 0.0004835764033381329, 'samples': 3875840, 'steps': 7569, 'loss/train': 0.9213374257087708} +02/24/2022 11:22:38 - INFO - codeparrot_training - Step 7570: {'lr': 0.00048357057006154566, 'samples': 3876352, 'steps': 7570, 'loss/train': 0.18913401663303375} +02/24/2022 11:22:44 - INFO - codeparrot_training - Step 7571: {'lr': 0.0004835647357844162, 'samples': 3876864, 'steps': 7571, 'loss/train': 2.3233468532562256} +02/24/2022 11:22:47 - INFO - codeparrot_training - Step 7572: {'lr': 0.00048355890050676966, 'samples': 3877376, 'steps': 7572, 'loss/train': 1.3482396602630615} +02/24/2022 11:22:53 - INFO - codeparrot_training - Step 7573: {'lr': 0.0004835530642286309, 'samples': 3877888, 'steps': 7573, 'loss/train': 1.8638635873794556} +02/24/2022 11:22:56 - INFO - codeparrot_training - Step 7574: {'lr': 0.000483547226950025, 'samples': 3878400, 'steps': 7574, 'loss/train': 2.173956871032715} +02/24/2022 11:23:02 - INFO - codeparrot_training - Step 7575: {'lr': 0.00048354138867097695, 'samples': 3878912, 'steps': 7575, 'loss/train': 1.251124620437622} +02/24/2022 11:23:05 - INFO - codeparrot_training - Step 7576: {'lr': 0.00048353554939151167, 'samples': 3879424, 'steps': 7576, 'loss/train': 1.7908861637115479} +02/24/2022 11:23:11 - INFO - codeparrot_training - Step 7577: {'lr': 0.00048352970911165434, 'samples': 3879936, 'steps': 7577, 'loss/train': 1.157549500465393} +02/24/2022 11:23:14 - INFO - codeparrot_training - Step 7578: {'lr': 0.0004835238678314299, 'samples': 3880448, 'steps': 7578, 'loss/train': 2.919440269470215} +02/24/2022 11:23:19 - INFO - codeparrot_training - Step 7579: {'lr': 0.00048351802555086335, 'samples': 3880960, 'steps': 7579, 'loss/train': 1.6097110509872437} +02/24/2022 11:23:23 - INFO - codeparrot_training - Step 7580: {'lr': 0.0004835121822699796, 'samples': 3881472, 'steps': 7580, 'loss/train': 2.9920217990875244} +02/24/2022 11:23:29 - INFO - codeparrot_training - Step 7581: {'lr': 0.00048350633798880397, 'samples': 3881984, 'steps': 7581, 'loss/train': 1.9408460855484009} +02/24/2022 11:23:32 - INFO - codeparrot_training - Step 7582: {'lr': 0.0004835004927073613, 'samples': 3882496, 'steps': 7582, 'loss/train': 1.9490169286727905} +02/24/2022 11:23:38 - INFO - codeparrot_training - Step 7583: {'lr': 0.0004834946464256766, 'samples': 3883008, 'steps': 7583, 'loss/train': 0.8694325685501099} +02/24/2022 11:23:42 - INFO - codeparrot_training - Step 7584: {'lr': 0.00048348879914377504, 'samples': 3883520, 'steps': 7584, 'loss/train': 1.966606616973877} +02/24/2022 11:23:45 - INFO - codeparrot_training - Step 7585: {'lr': 0.0004834829508616816, 'samples': 3884032, 'steps': 7585, 'loss/train': 2.655318260192871} +02/24/2022 11:23:52 - INFO - codeparrot_training - Step 7586: {'lr': 0.00048347710157942126, 'samples': 3884544, 'steps': 7586, 'loss/train': 2.219027042388916} +02/24/2022 11:23:55 - INFO - codeparrot_training - Step 7587: {'lr': 0.00048347125129701924, 'samples': 3885056, 'steps': 7587, 'loss/train': 1.47925865650177} +02/24/2022 11:24:00 - INFO - codeparrot_training - Step 7588: {'lr': 0.00048346540001450045, 'samples': 3885568, 'steps': 7588, 'loss/train': 0.6150884628295898} +02/24/2022 11:24:04 - INFO - codeparrot_training - Step 7589: {'lr': 0.0004834595477318901, 'samples': 3886080, 'steps': 7589, 'loss/train': 1.8040697574615479} +02/24/2022 11:24:09 - INFO - codeparrot_training - Step 7590: {'lr': 0.00048345369444921315, 'samples': 3886592, 'steps': 7590, 'loss/train': 1.8759030103683472} +02/24/2022 11:24:13 - INFO - codeparrot_training - Step 7591: {'lr': 0.00048344784016649467, 'samples': 3887104, 'steps': 7591, 'loss/train': 2.656978130340576} +02/24/2022 11:24:18 - INFO - codeparrot_training - Step 7592: {'lr': 0.0004834419848837598, 'samples': 3887616, 'steps': 7592, 'loss/train': 1.5592306852340698} +02/24/2022 11:24:22 - INFO - codeparrot_training - Step 7593: {'lr': 0.0004834361286010336, 'samples': 3888128, 'steps': 7593, 'loss/train': 2.9403769969940186} +02/24/2022 11:24:27 - INFO - codeparrot_training - Step 7594: {'lr': 0.0004834302713183411, 'samples': 3888640, 'steps': 7594, 'loss/train': 1.8358407020568848} +02/24/2022 11:24:31 - INFO - codeparrot_training - Step 7595: {'lr': 0.0004834244130357075, 'samples': 3889152, 'steps': 7595, 'loss/train': 2.9446747303009033} +02/24/2022 11:24:37 - INFO - codeparrot_training - Step 7596: {'lr': 0.0004834185537531578, 'samples': 3889664, 'steps': 7596, 'loss/train': 2.3520755767822266} +02/24/2022 11:24:40 - INFO - codeparrot_training - Step 7597: {'lr': 0.00048341269347071717, 'samples': 3890176, 'steps': 7597, 'loss/train': 2.0623204708099365} +02/24/2022 11:24:46 - INFO - codeparrot_training - Step 7598: {'lr': 0.00048340683218841066, 'samples': 3890688, 'steps': 7598, 'loss/train': 1.848036289215088} +02/24/2022 11:24:49 - INFO - codeparrot_training - Step 7599: {'lr': 0.00048340096990626336, 'samples': 3891200, 'steps': 7599, 'loss/train': 1.7646814584732056} +02/24/2022 11:24:55 - INFO - codeparrot_training - Step 7600: {'lr': 0.00048339510662430044, 'samples': 3891712, 'steps': 7600, 'loss/train': 2.6719794273376465} +02/24/2022 11:24:59 - INFO - codeparrot_training - Step 7601: {'lr': 0.000483389242342547, 'samples': 3892224, 'steps': 7601, 'loss/train': 2.8692915439605713} +02/24/2022 11:25:04 - INFO - codeparrot_training - Step 7602: {'lr': 0.00048338337706102817, 'samples': 3892736, 'steps': 7602, 'loss/train': 1.6060826778411865} +02/24/2022 11:25:08 - INFO - codeparrot_training - Step 7603: {'lr': 0.00048337751077976907, 'samples': 3893248, 'steps': 7603, 'loss/train': 3.6554031372070312} +02/24/2022 11:25:13 - INFO - codeparrot_training - Step 7604: {'lr': 0.0004833716434987948, 'samples': 3893760, 'steps': 7604, 'loss/train': 1.902369499206543} +02/24/2022 11:25:16 - INFO - codeparrot_training - Step 7605: {'lr': 0.0004833657752181305, 'samples': 3894272, 'steps': 7605, 'loss/train': 2.3780405521392822} +02/24/2022 11:25:23 - INFO - codeparrot_training - Step 7606: {'lr': 0.00048335990593780133, 'samples': 3894784, 'steps': 7606, 'loss/train': 2.47692608833313} +02/24/2022 11:25:27 - INFO - codeparrot_training - Step 7607: {'lr': 0.00048335403565783245, 'samples': 3895296, 'steps': 7607, 'loss/train': 2.3229684829711914} +02/24/2022 11:25:32 - INFO - codeparrot_training - Step 7608: {'lr': 0.0004833481643782489, 'samples': 3895808, 'steps': 7608, 'loss/train': 9.008742332458496} +02/24/2022 11:25:36 - INFO - codeparrot_training - Step 7609: {'lr': 0.000483342292099076, 'samples': 3896320, 'steps': 7609, 'loss/train': 1.9355552196502686} +02/24/2022 11:25:41 - INFO - codeparrot_training - Step 7610: {'lr': 0.0004833364188203387, 'samples': 3896832, 'steps': 7610, 'loss/train': 1.3129560947418213} +02/24/2022 11:25:45 - INFO - codeparrot_training - Step 7611: {'lr': 0.0004833305445420624, 'samples': 3897344, 'steps': 7611, 'loss/train': 0.2687065303325653} +02/24/2022 11:25:50 - INFO - codeparrot_training - Step 7612: {'lr': 0.0004833246692642721, 'samples': 3897856, 'steps': 7612, 'loss/train': 1.6502118110656738} +02/24/2022 11:25:54 - INFO - codeparrot_training - Step 7613: {'lr': 0.000483318792986993, 'samples': 3898368, 'steps': 7613, 'loss/train': 2.6306190490722656} +02/24/2022 11:25:59 - INFO - codeparrot_training - Step 7614: {'lr': 0.00048331291571025026, 'samples': 3898880, 'steps': 7614, 'loss/train': 2.7483718395233154} +02/24/2022 11:26:03 - INFO - codeparrot_training - Step 7615: {'lr': 0.0004833070374340691, 'samples': 3899392, 'steps': 7615, 'loss/train': 1.1649190187454224} +02/24/2022 11:26:10 - INFO - codeparrot_training - Step 7616: {'lr': 0.00048330115815847465, 'samples': 3899904, 'steps': 7616, 'loss/train': 1.7381356954574585} +02/24/2022 11:26:13 - INFO - codeparrot_training - Step 7617: {'lr': 0.00048329527788349216, 'samples': 3900416, 'steps': 7617, 'loss/train': 1.486620545387268} +02/24/2022 11:26:19 - INFO - codeparrot_training - Step 7618: {'lr': 0.0004832893966091467, 'samples': 3900928, 'steps': 7618, 'loss/train': 2.6989986896514893} +02/24/2022 11:26:22 - INFO - codeparrot_training - Step 7619: {'lr': 0.00048328351433546364, 'samples': 3901440, 'steps': 7619, 'loss/train': 1.774891972541809} +02/24/2022 11:26:27 - INFO - codeparrot_training - Step 7620: {'lr': 0.000483277631062468, 'samples': 3901952, 'steps': 7620, 'loss/train': 1.8501554727554321} +02/24/2022 11:26:31 - INFO - codeparrot_training - Step 7621: {'lr': 0.00048327174679018515, 'samples': 3902464, 'steps': 7621, 'loss/train': 3.5334436893463135} +02/24/2022 11:26:36 - INFO - codeparrot_training - Step 7622: {'lr': 0.00048326586151864015, 'samples': 3902976, 'steps': 7622, 'loss/train': 2.5045037269592285} +02/24/2022 11:26:40 - INFO - codeparrot_training - Step 7623: {'lr': 0.00048325997524785826, 'samples': 3903488, 'steps': 7623, 'loss/train': 2.4404690265655518} +02/24/2022 11:26:45 - INFO - codeparrot_training - Step 7624: {'lr': 0.00048325408797786476, 'samples': 3904000, 'steps': 7624, 'loss/train': 2.6604700088500977} +02/24/2022 11:26:49 - INFO - codeparrot_training - Step 7625: {'lr': 0.00048324819970868473, 'samples': 3904512, 'steps': 7625, 'loss/train': 1.8641486167907715} +02/24/2022 11:26:55 - INFO - codeparrot_training - Step 7626: {'lr': 0.0004832423104403435, 'samples': 3905024, 'steps': 7626, 'loss/train': 3.499929904937744} +02/24/2022 11:26:58 - INFO - codeparrot_training - Step 7627: {'lr': 0.0004832364201728663, 'samples': 3905536, 'steps': 7627, 'loss/train': 2.5727362632751465} +02/24/2022 11:27:04 - INFO - codeparrot_training - Step 7628: {'lr': 0.0004832305289062784, 'samples': 3906048, 'steps': 7628, 'loss/train': 2.061204671859741} +02/24/2022 11:27:07 - INFO - codeparrot_training - Step 7629: {'lr': 0.0004832246366406049, 'samples': 3906560, 'steps': 7629, 'loss/train': 1.4982973337173462} +02/24/2022 11:27:13 - INFO - codeparrot_training - Step 7630: {'lr': 0.00048321874337587105, 'samples': 3907072, 'steps': 7630, 'loss/train': 1.3451931476593018} +02/24/2022 11:27:16 - INFO - codeparrot_training - Step 7631: {'lr': 0.0004832128491121023, 'samples': 3907584, 'steps': 7631, 'loss/train': 1.123176097869873} +02/24/2022 11:27:23 - INFO - codeparrot_training - Step 7632: {'lr': 0.00048320695384932366, 'samples': 3908096, 'steps': 7632, 'loss/train': 2.087005853652954} +02/24/2022 11:27:26 - INFO - codeparrot_training - Step 7633: {'lr': 0.0004832010575875605, 'samples': 3908608, 'steps': 7633, 'loss/train': 1.8638484477996826} +02/24/2022 11:27:32 - INFO - codeparrot_training - Step 7634: {'lr': 0.0004831951603268381, 'samples': 3909120, 'steps': 7634, 'loss/train': 2.5032124519348145} +02/24/2022 11:27:35 - INFO - codeparrot_training - Step 7635: {'lr': 0.0004831892620671816, 'samples': 3909632, 'steps': 7635, 'loss/train': 1.503653645515442} +02/24/2022 11:27:41 - INFO - codeparrot_training - Step 7636: {'lr': 0.0004831833628086164, 'samples': 3910144, 'steps': 7636, 'loss/train': 1.7145801782608032} +02/24/2022 11:27:44 - INFO - codeparrot_training - Step 7637: {'lr': 0.0004831774625511677, 'samples': 3910656, 'steps': 7637, 'loss/train': 1.6310853958129883} +02/24/2022 11:27:50 - INFO - codeparrot_training - Step 7638: {'lr': 0.00048317156129486086, 'samples': 3911168, 'steps': 7638, 'loss/train': 1.7415744066238403} +02/24/2022 11:27:53 - INFO - codeparrot_training - Step 7639: {'lr': 0.000483165659039721, 'samples': 3911680, 'steps': 7639, 'loss/train': 2.416231393814087} +02/24/2022 11:27:59 - INFO - codeparrot_training - Step 7640: {'lr': 0.0004831597557857735, 'samples': 3912192, 'steps': 7640, 'loss/train': 2.7516090869903564} +02/24/2022 11:28:02 - INFO - codeparrot_training - Step 7641: {'lr': 0.0004831538515330437, 'samples': 3912704, 'steps': 7641, 'loss/train': 2.9884231090545654} +02/24/2022 11:28:09 - INFO - codeparrot_training - Step 7642: {'lr': 0.0004831479462815568, 'samples': 3913216, 'steps': 7642, 'loss/train': 2.332920551300049} +02/24/2022 11:28:12 - INFO - codeparrot_training - Step 7643: {'lr': 0.00048314204003133815, 'samples': 3913728, 'steps': 7643, 'loss/train': 3.0780816078186035} +02/24/2022 11:28:18 - INFO - codeparrot_training - Step 7644: {'lr': 0.00048313613278241305, 'samples': 3914240, 'steps': 7644, 'loss/train': 2.5479977130889893} +02/24/2022 11:28:21 - INFO - codeparrot_training - Step 7645: {'lr': 0.0004831302245348068, 'samples': 3914752, 'steps': 7645, 'loss/train': 1.8576456308364868} +02/24/2022 11:28:27 - INFO - codeparrot_training - Step 7646: {'lr': 0.0004831243152885446, 'samples': 3915264, 'steps': 7646, 'loss/train': 2.0708115100860596} +02/24/2022 11:28:32 - INFO - codeparrot_training - Step 7647: {'lr': 0.0004831184050436519, 'samples': 3915776, 'steps': 7647, 'loss/train': 0.713884174823761} +02/24/2022 11:28:36 - INFO - codeparrot_training - Step 7648: {'lr': 0.000483112493800154, 'samples': 3916288, 'steps': 7648, 'loss/train': 3.53208065032959} +02/24/2022 11:28:41 - INFO - codeparrot_training - Step 7649: {'lr': 0.0004831065815580762, 'samples': 3916800, 'steps': 7649, 'loss/train': 2.173996686935425} +02/24/2022 11:28:45 - INFO - codeparrot_training - Step 7650: {'lr': 0.0004831006683174438, 'samples': 3917312, 'steps': 7650, 'loss/train': 0.8287095427513123} +02/24/2022 11:28:51 - INFO - codeparrot_training - Step 7651: {'lr': 0.0004830947540782822, 'samples': 3917824, 'steps': 7651, 'loss/train': 2.5224125385284424} +02/24/2022 11:28:55 - INFO - codeparrot_training - Step 7652: {'lr': 0.0004830888388406166, 'samples': 3918336, 'steps': 7652, 'loss/train': 1.5298198461532593} +02/24/2022 11:29:00 - INFO - codeparrot_training - Step 7653: {'lr': 0.0004830829226044725, 'samples': 3918848, 'steps': 7653, 'loss/train': 1.9724185466766357} +02/24/2022 11:29:03 - INFO - codeparrot_training - Step 7654: {'lr': 0.0004830770053698752, 'samples': 3919360, 'steps': 7654, 'loss/train': 2.5069825649261475} +02/24/2022 11:29:09 - INFO - codeparrot_training - Step 7655: {'lr': 0.00048307108713684994, 'samples': 3919872, 'steps': 7655, 'loss/train': 2.3742592334747314} +02/24/2022 11:29:12 - INFO - codeparrot_training - Step 7656: {'lr': 0.00048306516790542223, 'samples': 3920384, 'steps': 7656, 'loss/train': 1.044836401939392} +02/24/2022 11:29:18 - INFO - codeparrot_training - Step 7657: {'lr': 0.00048305924767561725, 'samples': 3920896, 'steps': 7657, 'loss/train': 2.8518283367156982} +02/24/2022 11:29:21 - INFO - codeparrot_training - Step 7658: {'lr': 0.00048305332644746053, 'samples': 3921408, 'steps': 7658, 'loss/train': 2.051703929901123} +02/24/2022 11:29:27 - INFO - codeparrot_training - Step 7659: {'lr': 0.0004830474042209774, 'samples': 3921920, 'steps': 7659, 'loss/train': 1.8091627359390259} +02/24/2022 11:29:30 - INFO - codeparrot_training - Step 7660: {'lr': 0.00048304148099619304, 'samples': 3922432, 'steps': 7660, 'loss/train': 1.4749717712402344} +02/24/2022 11:29:36 - INFO - codeparrot_training - Step 7661: {'lr': 0.0004830355567731331, 'samples': 3922944, 'steps': 7661, 'loss/train': 2.0379819869995117} +02/24/2022 11:29:39 - INFO - codeparrot_training - Step 7662: {'lr': 0.0004830296315518228, 'samples': 3923456, 'steps': 7662, 'loss/train': 2.104915142059326} +02/24/2022 11:29:46 - INFO - codeparrot_training - Step 7663: {'lr': 0.00048302370533228754, 'samples': 3923968, 'steps': 7663, 'loss/train': 2.3708560466766357} +02/24/2022 11:29:49 - INFO - codeparrot_training - Step 7664: {'lr': 0.00048301777811455274, 'samples': 3924480, 'steps': 7664, 'loss/train': 2.7781424522399902} +02/24/2022 11:29:54 - INFO - codeparrot_training - Step 7665: {'lr': 0.0004830118498986438, 'samples': 3924992, 'steps': 7665, 'loss/train': 2.476651191711426} +02/24/2022 11:29:58 - INFO - codeparrot_training - Step 7666: {'lr': 0.000483005920684586, 'samples': 3925504, 'steps': 7666, 'loss/train': 2.8014779090881348} +02/24/2022 11:30:04 - INFO - codeparrot_training - Step 7667: {'lr': 0.0004829999904724049, 'samples': 3926016, 'steps': 7667, 'loss/train': 2.835610866546631} +02/24/2022 11:30:07 - INFO - codeparrot_training - Step 7668: {'lr': 0.0004829940592621258, 'samples': 3926528, 'steps': 7668, 'loss/train': 2.361767292022705} +02/24/2022 11:30:12 - INFO - codeparrot_training - Step 7669: {'lr': 0.00048298812705377414, 'samples': 3927040, 'steps': 7669, 'loss/train': 2.4721925258636475} +02/24/2022 11:30:16 - INFO - codeparrot_training - Step 7670: {'lr': 0.0004829821938473753, 'samples': 3927552, 'steps': 7670, 'loss/train': 2.1274666786193848} +02/24/2022 11:30:21 - INFO - codeparrot_training - Step 7671: {'lr': 0.0004829762596429548, 'samples': 3928064, 'steps': 7671, 'loss/train': 1.3183720111846924} +02/24/2022 11:30:25 - INFO - codeparrot_training - Step 7672: {'lr': 0.0004829703244405379, 'samples': 3928576, 'steps': 7672, 'loss/train': 2.008500576019287} +02/24/2022 11:30:31 - INFO - codeparrot_training - Step 7673: {'lr': 0.0004829643882401501, 'samples': 3929088, 'steps': 7673, 'loss/train': 2.1050355434417725} +02/24/2022 11:30:34 - INFO - codeparrot_training - Step 7674: {'lr': 0.0004829584510418169, 'samples': 3929600, 'steps': 7674, 'loss/train': 2.4840757846832275} +02/24/2022 11:30:40 - INFO - codeparrot_training - Step 7675: {'lr': 0.00048295251284556363, 'samples': 3930112, 'steps': 7675, 'loss/train': 2.3247227668762207} +02/24/2022 11:30:43 - INFO - codeparrot_training - Step 7676: {'lr': 0.0004829465736514157, 'samples': 3930624, 'steps': 7676, 'loss/train': 2.8374712467193604} +02/24/2022 11:30:49 - INFO - codeparrot_training - Step 7677: {'lr': 0.00048294063345939877, 'samples': 3931136, 'steps': 7677, 'loss/train': 1.3187438249588013} +02/24/2022 11:30:53 - INFO - codeparrot_training - Step 7678: {'lr': 0.000482934692269538, 'samples': 3931648, 'steps': 7678, 'loss/train': 2.2495052814483643} +02/24/2022 11:30:58 - INFO - codeparrot_training - Step 7679: {'lr': 0.00048292875008185896, 'samples': 3932160, 'steps': 7679, 'loss/train': 1.7550535202026367} +02/24/2022 11:31:02 - INFO - codeparrot_training - Step 7680: {'lr': 0.0004829228068963872, 'samples': 3932672, 'steps': 7680, 'loss/train': 2.868255615234375} +02/24/2022 11:31:07 - INFO - codeparrot_training - Step 7681: {'lr': 0.00048291686271314816, 'samples': 3933184, 'steps': 7681, 'loss/train': 1.4639145135879517} +02/24/2022 11:31:11 - INFO - codeparrot_training - Step 7682: {'lr': 0.0004829109175321671, 'samples': 3933696, 'steps': 7682, 'loss/train': 3.0324151515960693} +02/24/2022 11:31:16 - INFO - codeparrot_training - Step 7683: {'lr': 0.00048290497135346965, 'samples': 3934208, 'steps': 7683, 'loss/train': 2.0329792499542236} +02/24/2022 11:31:20 - INFO - codeparrot_training - Step 7684: {'lr': 0.0004828990241770813, 'samples': 3934720, 'steps': 7684, 'loss/train': 2.7160253524780273} +02/24/2022 11:31:25 - INFO - codeparrot_training - Step 7685: {'lr': 0.0004828930760030275, 'samples': 3935232, 'steps': 7685, 'loss/train': 3.1744160652160645} +02/24/2022 11:31:29 - INFO - codeparrot_training - Step 7686: {'lr': 0.0004828871268313337, 'samples': 3935744, 'steps': 7686, 'loss/train': 2.5418992042541504} +02/24/2022 11:31:34 - INFO - codeparrot_training - Step 7687: {'lr': 0.0004828811766620254, 'samples': 3936256, 'steps': 7687, 'loss/train': 4.689566612243652} +02/24/2022 11:31:38 - INFO - codeparrot_training - Step 7688: {'lr': 0.00048287522549512806, 'samples': 3936768, 'steps': 7688, 'loss/train': 1.0280145406723022} +02/24/2022 11:31:44 - INFO - codeparrot_training - Step 7689: {'lr': 0.0004828692733306672, 'samples': 3937280, 'steps': 7689, 'loss/train': 1.7278858423233032} +02/24/2022 11:31:47 - INFO - codeparrot_training - Step 7690: {'lr': 0.0004828633201686684, 'samples': 3937792, 'steps': 7690, 'loss/train': 2.4728946685791016} +02/24/2022 11:31:53 - INFO - codeparrot_training - Step 7691: {'lr': 0.00048285736600915696, 'samples': 3938304, 'steps': 7691, 'loss/train': 2.1119372844696045} +02/24/2022 11:31:56 - INFO - codeparrot_training - Step 7692: {'lr': 0.00048285141085215857, 'samples': 3938816, 'steps': 7692, 'loss/train': 0.23492492735385895} +02/24/2022 11:32:02 - INFO - codeparrot_training - Step 7693: {'lr': 0.0004828454546976987, 'samples': 3939328, 'steps': 7693, 'loss/train': 2.8611409664154053} +02/24/2022 11:32:05 - INFO - codeparrot_training - Step 7694: {'lr': 0.00048283949754580283, 'samples': 3939840, 'steps': 7694, 'loss/train': 1.9664379358291626} +02/24/2022 11:32:11 - INFO - codeparrot_training - Step 7695: {'lr': 0.00048283353939649644, 'samples': 3940352, 'steps': 7695, 'loss/train': 1.3437795639038086} +02/24/2022 11:32:14 - INFO - codeparrot_training - Step 7696: {'lr': 0.0004828275802498051, 'samples': 3940864, 'steps': 7696, 'loss/train': 2.141622543334961} +02/24/2022 11:32:20 - INFO - codeparrot_training - Step 7697: {'lr': 0.0004828216201057544, 'samples': 3941376, 'steps': 7697, 'loss/train': 0.8001936674118042} +02/24/2022 11:32:23 - INFO - codeparrot_training - Step 7698: {'lr': 0.00048281565896436966, 'samples': 3941888, 'steps': 7698, 'loss/train': 1.9529004096984863} +02/24/2022 11:32:29 - INFO - codeparrot_training - Step 7699: {'lr': 0.0004828096968256767, 'samples': 3942400, 'steps': 7699, 'loss/train': 2.188166856765747} +02/24/2022 11:32:33 - INFO - codeparrot_training - Step 7700: {'lr': 0.00048280373368970086, 'samples': 3942912, 'steps': 7700, 'loss/train': 2.3147594928741455} +02/24/2022 11:32:38 - INFO - codeparrot_training - Step 7701: {'lr': 0.0004827977695564678, 'samples': 3943424, 'steps': 7701, 'loss/train': 2.0119380950927734} +02/24/2022 11:32:42 - INFO - codeparrot_training - Step 7702: {'lr': 0.000482791804426003, 'samples': 3943936, 'steps': 7702, 'loss/train': 1.545360803604126} +02/24/2022 11:32:48 - INFO - codeparrot_training - Step 7703: {'lr': 0.00048278583829833207, 'samples': 3944448, 'steps': 7703, 'loss/train': 2.4977641105651855} +02/24/2022 11:32:51 - INFO - codeparrot_training - Step 7704: {'lr': 0.00048277987117348043, 'samples': 3944960, 'steps': 7704, 'loss/train': 8.53597354888916} +02/24/2022 11:32:57 - INFO - codeparrot_training - Step 7705: {'lr': 0.00048277390305147386, 'samples': 3945472, 'steps': 7705, 'loss/train': 0.19285978376865387} +02/24/2022 11:33:00 - INFO - codeparrot_training - Step 7706: {'lr': 0.0004827679339323377, 'samples': 3945984, 'steps': 7706, 'loss/train': 2.566706895828247} +02/24/2022 11:33:06 - INFO - codeparrot_training - Step 7707: {'lr': 0.0004827619638160977, 'samples': 3946496, 'steps': 7707, 'loss/train': 1.779285192489624} +02/24/2022 11:33:09 - INFO - codeparrot_training - Step 7708: {'lr': 0.00048275599270277927, 'samples': 3947008, 'steps': 7708, 'loss/train': 2.663087844848633} +02/24/2022 11:33:15 - INFO - codeparrot_training - Step 7709: {'lr': 0.00048275002059240815, 'samples': 3947520, 'steps': 7709, 'loss/train': 1.724759578704834} +02/24/2022 11:33:19 - INFO - codeparrot_training - Step 7710: {'lr': 0.00048274404748500975, 'samples': 3948032, 'steps': 7710, 'loss/train': 2.8975305557250977} +02/24/2022 11:33:24 - INFO - codeparrot_training - Step 7711: {'lr': 0.0004827380733806099, 'samples': 3948544, 'steps': 7711, 'loss/train': 2.604024648666382} +02/24/2022 11:33:28 - INFO - codeparrot_training - Step 7712: {'lr': 0.0004827320982792339, 'samples': 3949056, 'steps': 7712, 'loss/train': 2.541714668273926} +02/24/2022 11:33:33 - INFO - codeparrot_training - Step 7713: {'lr': 0.0004827261221809076, 'samples': 3949568, 'steps': 7713, 'loss/train': 2.7684316635131836} +02/24/2022 11:33:37 - INFO - codeparrot_training - Step 7714: {'lr': 0.00048272014508565645, 'samples': 3950080, 'steps': 7714, 'loss/train': 1.9567121267318726} +02/24/2022 11:33:43 - INFO - codeparrot_training - Step 7715: {'lr': 0.00048271416699350613, 'samples': 3950592, 'steps': 7715, 'loss/train': 1.8389208316802979} +02/24/2022 11:33:46 - INFO - codeparrot_training - Step 7716: {'lr': 0.0004827081879044821, 'samples': 3951104, 'steps': 7716, 'loss/train': 1.0541294813156128} +02/24/2022 11:33:52 - INFO - codeparrot_training - Step 7717: {'lr': 0.00048270220781861025, 'samples': 3951616, 'steps': 7717, 'loss/train': 0.4620235562324524} +02/24/2022 11:33:55 - INFO - codeparrot_training - Step 7718: {'lr': 0.000482696226735916, 'samples': 3952128, 'steps': 7718, 'loss/train': 1.644763469696045} +02/24/2022 11:34:01 - INFO - codeparrot_training - Step 7719: {'lr': 0.00048269024465642487, 'samples': 3952640, 'steps': 7719, 'loss/train': 2.339843988418579} +02/24/2022 11:34:04 - INFO - codeparrot_training - Step 7720: {'lr': 0.00048268426158016274, 'samples': 3953152, 'steps': 7720, 'loss/train': 1.6591130495071411} +02/24/2022 11:34:10 - INFO - codeparrot_training - Step 7721: {'lr': 0.0004826782775071551, 'samples': 3953664, 'steps': 7721, 'loss/train': 2.216982364654541} +02/24/2022 11:34:13 - INFO - codeparrot_training - Step 7722: {'lr': 0.00048267229243742753, 'samples': 3954176, 'steps': 7722, 'loss/train': 1.771715521812439} +02/24/2022 11:34:19 - INFO - codeparrot_training - Step 7723: {'lr': 0.00048266630637100585, 'samples': 3954688, 'steps': 7723, 'loss/train': 2.678609848022461} +02/24/2022 11:34:22 - INFO - codeparrot_training - Step 7724: {'lr': 0.00048266031930791555, 'samples': 3955200, 'steps': 7724, 'loss/train': 2.1432509422302246} +02/24/2022 11:34:28 - INFO - codeparrot_training - Step 7725: {'lr': 0.00048265433124818226, 'samples': 3955712, 'steps': 7725, 'loss/train': 0.9142553806304932} +02/24/2022 11:34:32 - INFO - codeparrot_training - Step 7726: {'lr': 0.00048264834219183175, 'samples': 3956224, 'steps': 7726, 'loss/train': 1.7260949611663818} +02/24/2022 11:34:37 - INFO - codeparrot_training - Step 7727: {'lr': 0.00048264235213888964, 'samples': 3956736, 'steps': 7727, 'loss/train': 1.4306950569152832} +02/24/2022 11:34:41 - INFO - codeparrot_training - Step 7728: {'lr': 0.00048263636108938153, 'samples': 3957248, 'steps': 7728, 'loss/train': 1.7211476564407349} +02/24/2022 11:34:46 - INFO - codeparrot_training - Step 7729: {'lr': 0.0004826303690433331, 'samples': 3957760, 'steps': 7729, 'loss/train': 2.3091349601745605} +02/24/2022 11:34:50 - INFO - codeparrot_training - Step 7730: {'lr': 0.0004826243760007701, 'samples': 3958272, 'steps': 7730, 'loss/train': 2.0332255363464355} +02/24/2022 11:34:56 - INFO - codeparrot_training - Step 7731: {'lr': 0.00048261838196171804, 'samples': 3958784, 'steps': 7731, 'loss/train': 2.8401684761047363} +02/24/2022 11:34:59 - INFO - codeparrot_training - Step 7732: {'lr': 0.0004826123869262028, 'samples': 3959296, 'steps': 7732, 'loss/train': 1.2814948558807373} +02/24/2022 11:35:05 - INFO - codeparrot_training - Step 7733: {'lr': 0.0004826063908942499, 'samples': 3959808, 'steps': 7733, 'loss/train': 2.7540884017944336} +02/24/2022 11:35:08 - INFO - codeparrot_training - Step 7734: {'lr': 0.00048260039386588513, 'samples': 3960320, 'steps': 7734, 'loss/train': 0.18227140605449677} +02/24/2022 11:35:15 - INFO - codeparrot_training - Step 7735: {'lr': 0.00048259439584113405, 'samples': 3960832, 'steps': 7735, 'loss/train': 0.3952483534812927} +02/24/2022 11:35:18 - INFO - codeparrot_training - Step 7736: {'lr': 0.00048258839682002253, 'samples': 3961344, 'steps': 7736, 'loss/train': 0.6021316051483154} +02/24/2022 11:35:24 - INFO - codeparrot_training - Step 7737: {'lr': 0.0004825823968025761, 'samples': 3961856, 'steps': 7737, 'loss/train': 0.1311848759651184} +02/24/2022 11:35:27 - INFO - codeparrot_training - Step 7738: {'lr': 0.0004825763957888206, 'samples': 3962368, 'steps': 7738, 'loss/train': 2.5508365631103516} +02/24/2022 11:35:33 - INFO - codeparrot_training - Step 7739: {'lr': 0.00048257039377878165, 'samples': 3962880, 'steps': 7739, 'loss/train': 2.9970643520355225} +02/24/2022 11:35:36 - INFO - codeparrot_training - Step 7740: {'lr': 0.00048256439077248495, 'samples': 3963392, 'steps': 7740, 'loss/train': 1.5972727537155151} +02/24/2022 11:35:42 - INFO - codeparrot_training - Step 7741: {'lr': 0.00048255838676995624, 'samples': 3963904, 'steps': 7741, 'loss/train': 1.511894941329956} +02/24/2022 11:35:45 - INFO - codeparrot_training - Step 7742: {'lr': 0.00048255238177122127, 'samples': 3964416, 'steps': 7742, 'loss/train': 3.0409815311431885} +02/24/2022 11:35:51 - INFO - codeparrot_training - Step 7743: {'lr': 0.0004825463757763058, 'samples': 3964928, 'steps': 7743, 'loss/train': 2.1414434909820557} +02/24/2022 11:35:54 - INFO - codeparrot_training - Step 7744: {'lr': 0.00048254036878523537, 'samples': 3965440, 'steps': 7744, 'loss/train': 1.647324562072754} +02/24/2022 11:36:00 - INFO - codeparrot_training - Step 7745: {'lr': 0.00048253436079803594, 'samples': 3965952, 'steps': 7745, 'loss/train': 2.089505910873413} +02/24/2022 11:36:04 - INFO - codeparrot_training - Step 7746: {'lr': 0.0004825283518147331, 'samples': 3966464, 'steps': 7746, 'loss/train': 1.6967997550964355} +02/24/2022 11:36:09 - INFO - codeparrot_training - Step 7747: {'lr': 0.00048252234183535265, 'samples': 3966976, 'steps': 7747, 'loss/train': 2.582773447036743} +02/24/2022 11:36:13 - INFO - codeparrot_training - Step 7748: {'lr': 0.0004825163308599203, 'samples': 3967488, 'steps': 7748, 'loss/train': 2.2498679161071777} +02/24/2022 11:36:18 - INFO - codeparrot_training - Step 7749: {'lr': 0.0004825103188884619, 'samples': 3968000, 'steps': 7749, 'loss/train': 1.5776172876358032} +02/24/2022 11:36:22 - INFO - codeparrot_training - Step 7750: {'lr': 0.000482504305921003, 'samples': 3968512, 'steps': 7750, 'loss/train': 2.4894113540649414} +02/24/2022 11:36:27 - INFO - codeparrot_training - Step 7751: {'lr': 0.00048249829195756954, 'samples': 3969024, 'steps': 7751, 'loss/train': 2.1953022480010986} +02/24/2022 11:36:30 - INFO - codeparrot_training - Step 7752: {'lr': 0.0004824922769981873, 'samples': 3969536, 'steps': 7752, 'loss/train': 2.151811122894287} +02/24/2022 11:36:36 - INFO - codeparrot_training - Step 7753: {'lr': 0.0004824862610428819, 'samples': 3970048, 'steps': 7753, 'loss/train': 3.196906566619873} +02/24/2022 11:36:39 - INFO - codeparrot_training - Step 7754: {'lr': 0.0004824802440916792, 'samples': 3970560, 'steps': 7754, 'loss/train': 3.315129041671753} +02/24/2022 11:36:46 - INFO - codeparrot_training - Step 7755: {'lr': 0.0004824742261446049, 'samples': 3971072, 'steps': 7755, 'loss/train': 0.5079135298728943} +02/24/2022 11:36:49 - INFO - codeparrot_training - Step 7756: {'lr': 0.0004824682072016849, 'samples': 3971584, 'steps': 7756, 'loss/train': 3.0118930339813232} +02/24/2022 11:36:55 - INFO - codeparrot_training - Step 7757: {'lr': 0.00048246218726294486, 'samples': 3972096, 'steps': 7757, 'loss/train': 2.474109411239624} +02/24/2022 11:36:58 - INFO - codeparrot_training - Step 7758: {'lr': 0.0004824561663284107, 'samples': 3972608, 'steps': 7758, 'loss/train': 0.28871428966522217} +02/24/2022 11:37:04 - INFO - codeparrot_training - Step 7759: {'lr': 0.0004824501443981081, 'samples': 3973120, 'steps': 7759, 'loss/train': 2.8747596740722656} +02/24/2022 11:37:07 - INFO - codeparrot_training - Step 7760: {'lr': 0.00048244412147206283, 'samples': 3973632, 'steps': 7760, 'loss/train': 0.5813701152801514} +02/24/2022 11:37:13 - INFO - codeparrot_training - Step 7761: {'lr': 0.00048243809755030086, 'samples': 3974144, 'steps': 7761, 'loss/train': 1.5651233196258545} +02/24/2022 11:37:16 - INFO - codeparrot_training - Step 7762: {'lr': 0.00048243207263284785, 'samples': 3974656, 'steps': 7762, 'loss/train': 2.242506742477417} +02/24/2022 11:37:22 - INFO - codeparrot_training - Step 7763: {'lr': 0.0004824260467197296, 'samples': 3975168, 'steps': 7763, 'loss/train': 1.8729777336120605} +02/24/2022 11:37:25 - INFO - codeparrot_training - Step 7764: {'lr': 0.000482420019810972, 'samples': 3975680, 'steps': 7764, 'loss/train': 1.818366527557373} +02/24/2022 11:37:31 - INFO - codeparrot_training - Step 7765: {'lr': 0.00048241399190660086, 'samples': 3976192, 'steps': 7765, 'loss/train': 2.531325578689575} +02/24/2022 11:37:35 - INFO - codeparrot_training - Step 7766: {'lr': 0.0004824079630066419, 'samples': 3976704, 'steps': 7766, 'loss/train': 2.398423910140991} +02/24/2022 11:37:40 - INFO - codeparrot_training - Step 7767: {'lr': 0.0004824019331111211, 'samples': 3977216, 'steps': 7767, 'loss/train': 3.0106937885284424} +02/24/2022 11:37:43 - INFO - codeparrot_training - Step 7768: {'lr': 0.0004823959022200642, 'samples': 3977728, 'steps': 7768, 'loss/train': 2.110288381576538} +02/24/2022 11:37:49 - INFO - codeparrot_training - Step 7769: {'lr': 0.00048238987033349706, 'samples': 3978240, 'steps': 7769, 'loss/train': 3.1188998222351074} +02/24/2022 11:37:52 - INFO - codeparrot_training - Step 7770: {'lr': 0.0004823838374514455, 'samples': 3978752, 'steps': 7770, 'loss/train': 1.878414511680603} +02/24/2022 11:37:59 - INFO - codeparrot_training - Step 7771: {'lr': 0.00048237780357393535, 'samples': 3979264, 'steps': 7771, 'loss/train': 1.8256481885910034} +02/24/2022 11:38:02 - INFO - codeparrot_training - Step 7772: {'lr': 0.00048237176870099256, 'samples': 3979776, 'steps': 7772, 'loss/train': 2.308722734451294} +02/24/2022 11:38:08 - INFO - codeparrot_training - Step 7773: {'lr': 0.0004823657328326427, 'samples': 3980288, 'steps': 7773, 'loss/train': 2.7769384384155273} +02/24/2022 11:38:11 - INFO - codeparrot_training - Step 7774: {'lr': 0.000482359695968912, 'samples': 3980800, 'steps': 7774, 'loss/train': 5.388213157653809} +02/24/2022 11:38:17 - INFO - codeparrot_training - Step 7775: {'lr': 0.0004823536581098261, 'samples': 3981312, 'steps': 7775, 'loss/train': 1.8067668676376343} +02/24/2022 11:38:20 - INFO - codeparrot_training - Step 7776: {'lr': 0.00048234761925541094, 'samples': 3981824, 'steps': 7776, 'loss/train': 3.3182616233825684} +02/24/2022 11:38:26 - INFO - codeparrot_training - Step 7777: {'lr': 0.0004823415794056923, 'samples': 3982336, 'steps': 7777, 'loss/train': 2.4434127807617188} +02/24/2022 11:38:29 - INFO - codeparrot_training - Step 7778: {'lr': 0.00048233553856069617, 'samples': 3982848, 'steps': 7778, 'loss/train': 2.5438945293426514} +02/24/2022 11:38:35 - INFO - codeparrot_training - Step 7779: {'lr': 0.00048232949672044834, 'samples': 3983360, 'steps': 7779, 'loss/train': 2.4991538524627686} +02/24/2022 11:38:38 - INFO - codeparrot_training - Step 7780: {'lr': 0.0004823234538849747, 'samples': 3983872, 'steps': 7780, 'loss/train': 2.984445333480835} +02/24/2022 11:38:44 - INFO - codeparrot_training - Step 7781: {'lr': 0.0004823174100543012, 'samples': 3984384, 'steps': 7781, 'loss/train': 2.254769802093506} +02/24/2022 11:38:48 - INFO - codeparrot_training - Step 7782: {'lr': 0.0004823113652284536, 'samples': 3984896, 'steps': 7782, 'loss/train': 1.9577986001968384} +02/24/2022 11:38:53 - INFO - codeparrot_training - Step 7783: {'lr': 0.00048230531940745793, 'samples': 3985408, 'steps': 7783, 'loss/train': 2.592783212661743} +02/24/2022 11:38:57 - INFO - codeparrot_training - Step 7784: {'lr': 0.0004822992725913401, 'samples': 3985920, 'steps': 7784, 'loss/train': 2.5873801708221436} +02/24/2022 11:39:02 - INFO - codeparrot_training - Step 7785: {'lr': 0.00048229322478012584, 'samples': 3986432, 'steps': 7785, 'loss/train': 2.0520689487457275} +02/24/2022 11:39:06 - INFO - codeparrot_training - Step 7786: {'lr': 0.0004822871759738412, 'samples': 3986944, 'steps': 7786, 'loss/train': 1.9086414575576782} +02/24/2022 11:39:11 - INFO - codeparrot_training - Step 7787: {'lr': 0.0004822811261725121, 'samples': 3987456, 'steps': 7787, 'loss/train': 2.414551258087158} +02/24/2022 11:39:15 - INFO - codeparrot_training - Step 7788: {'lr': 0.0004822750753761644, 'samples': 3987968, 'steps': 7788, 'loss/train': 2.2617695331573486} +02/24/2022 11:39:20 - INFO - codeparrot_training - Step 7789: {'lr': 0.00048226902358482405, 'samples': 3988480, 'steps': 7789, 'loss/train': 2.2469849586486816} +02/24/2022 11:39:24 - INFO - codeparrot_training - Step 7790: {'lr': 0.0004822629707985169, 'samples': 3988992, 'steps': 7790, 'loss/train': 3.169412851333618} +02/24/2022 11:39:30 - INFO - codeparrot_training - Step 7791: {'lr': 0.00048225691701726895, 'samples': 3989504, 'steps': 7791, 'loss/train': 2.17946457862854} +02/24/2022 11:39:34 - INFO - codeparrot_training - Step 7792: {'lr': 0.00048225086224110614, 'samples': 3990016, 'steps': 7792, 'loss/train': 0.47140467166900635} +02/24/2022 11:39:39 - INFO - codeparrot_training - Step 7793: {'lr': 0.00048224480647005437, 'samples': 3990528, 'steps': 7793, 'loss/train': 0.39862060546875} +02/24/2022 11:39:43 - INFO - codeparrot_training - Step 7794: {'lr': 0.0004822387497041396, 'samples': 3991040, 'steps': 7794, 'loss/train': 2.600405693054199} +02/24/2022 11:39:48 - INFO - codeparrot_training - Step 7795: {'lr': 0.00048223269194338776, 'samples': 3991552, 'steps': 7795, 'loss/train': 3.442826509475708} +02/24/2022 11:39:52 - INFO - codeparrot_training - Step 7796: {'lr': 0.0004822266331878248, 'samples': 3992064, 'steps': 7796, 'loss/train': 3.0081872940063477} +02/24/2022 11:39:57 - INFO - codeparrot_training - Step 7797: {'lr': 0.0004822205734374767, 'samples': 3992576, 'steps': 7797, 'loss/train': 1.3744837045669556} +02/24/2022 11:40:01 - INFO - codeparrot_training - Step 7798: {'lr': 0.00048221451269236937, 'samples': 3993088, 'steps': 7798, 'loss/train': 1.2777047157287598} +02/24/2022 11:40:06 - INFO - codeparrot_training - Step 7799: {'lr': 0.0004822084509525289, 'samples': 3993600, 'steps': 7799, 'loss/train': 2.253067970275879} +02/24/2022 11:40:10 - INFO - codeparrot_training - Step 7800: {'lr': 0.0004822023882179811, 'samples': 3994112, 'steps': 7800, 'loss/train': 2.231689929962158} +02/24/2022 11:40:15 - INFO - codeparrot_training - Step 7801: {'lr': 0.00048219632448875195, 'samples': 3994624, 'steps': 7801, 'loss/train': 2.465588331222534} +02/24/2022 11:40:21 - INFO - codeparrot_training - Step 7802: {'lr': 0.0004821902597648675, 'samples': 3995136, 'steps': 7802, 'loss/train': 2.680110454559326} +02/24/2022 11:40:24 - INFO - codeparrot_training - Step 7803: {'lr': 0.0004821841940463538, 'samples': 3995648, 'steps': 7803, 'loss/train': 1.8885477781295776} +02/24/2022 11:40:30 - INFO - codeparrot_training - Step 7804: {'lr': 0.0004821781273332366, 'samples': 3996160, 'steps': 7804, 'loss/train': 2.3127224445343018} +02/24/2022 11:40:33 - INFO - codeparrot_training - Step 7805: {'lr': 0.00048217205962554214, 'samples': 3996672, 'steps': 7805, 'loss/train': 2.5576257705688477} +02/24/2022 11:40:40 - INFO - codeparrot_training - Step 7806: {'lr': 0.0004821659909232963, 'samples': 3997184, 'steps': 7806, 'loss/train': 2.561901330947876} +02/24/2022 11:40:43 - INFO - codeparrot_training - Step 7807: {'lr': 0.000482159921226525, 'samples': 3997696, 'steps': 7807, 'loss/train': 2.2070515155792236} +02/24/2022 11:40:49 - INFO - codeparrot_training - Step 7808: {'lr': 0.00048215385053525434, 'samples': 3998208, 'steps': 7808, 'loss/train': 1.6095865964889526} +02/24/2022 11:40:52 - INFO - codeparrot_training - Step 7809: {'lr': 0.0004821477788495103, 'samples': 3998720, 'steps': 7809, 'loss/train': 2.2978527545928955} +02/24/2022 11:40:58 - INFO - codeparrot_training - Step 7810: {'lr': 0.0004821417061693189, 'samples': 3999232, 'steps': 7810, 'loss/train': 2.2504847049713135} +02/24/2022 11:41:01 - INFO - codeparrot_training - Step 7811: {'lr': 0.00048213563249470615, 'samples': 3999744, 'steps': 7811, 'loss/train': 2.2726497650146484} +02/24/2022 11:41:07 - INFO - codeparrot_training - Step 7812: {'lr': 0.00048212955782569805, 'samples': 4000256, 'steps': 7812, 'loss/train': 1.8694270849227905} +02/24/2022 11:41:10 - INFO - codeparrot_training - Step 7813: {'lr': 0.00048212348216232064, 'samples': 4000768, 'steps': 7813, 'loss/train': 1.7889246940612793} +02/24/2022 11:41:16 - INFO - codeparrot_training - Step 7814: {'lr': 0.0004821174055045999, 'samples': 4001280, 'steps': 7814, 'loss/train': 2.1357905864715576} +02/24/2022 11:41:19 - INFO - codeparrot_training - Step 7815: {'lr': 0.000482111327852562, 'samples': 4001792, 'steps': 7815, 'loss/train': 2.235445022583008} +02/24/2022 11:41:26 - INFO - codeparrot_training - Step 7816: {'lr': 0.0004821052492062328, 'samples': 4002304, 'steps': 7816, 'loss/train': 2.826972723007202} +02/24/2022 11:41:30 - INFO - codeparrot_training - Step 7817: {'lr': 0.0004820991695656385, 'samples': 4002816, 'steps': 7817, 'loss/train': 1.0054012537002563} +02/24/2022 11:41:33 - INFO - codeparrot_training - Step 7818: {'lr': 0.00048209308893080495, 'samples': 4003328, 'steps': 7818, 'loss/train': 2.198528528213501} +02/24/2022 11:41:39 - INFO - codeparrot_training - Step 7819: {'lr': 0.00048208700730175834, 'samples': 4003840, 'steps': 7819, 'loss/train': 2.274714708328247} +02/24/2022 11:41:42 - INFO - codeparrot_training - Step 7820: {'lr': 0.0004820809246785247, 'samples': 4004352, 'steps': 7820, 'loss/train': 3.313187599182129} +02/24/2022 11:41:48 - INFO - codeparrot_training - Step 7821: {'lr': 0.00048207484106113, 'samples': 4004864, 'steps': 7821, 'loss/train': 3.0961296558380127} +02/24/2022 11:41:51 - INFO - codeparrot_training - Step 7822: {'lr': 0.0004820687564496005, 'samples': 4005376, 'steps': 7822, 'loss/train': 0.856377363204956} +02/24/2022 11:41:57 - INFO - codeparrot_training - Step 7823: {'lr': 0.00048206267084396204, 'samples': 4005888, 'steps': 7823, 'loss/train': 2.9520039558410645} +02/24/2022 11:42:00 - INFO - codeparrot_training - Step 7824: {'lr': 0.0004820565842442408, 'samples': 4006400, 'steps': 7824, 'loss/train': 2.825042963027954} +02/24/2022 11:42:06 - INFO - codeparrot_training - Step 7825: {'lr': 0.00048205049665046287, 'samples': 4006912, 'steps': 7825, 'loss/train': 3.0491065979003906} +02/24/2022 11:42:09 - INFO - codeparrot_training - Step 7826: {'lr': 0.0004820444080626543, 'samples': 4007424, 'steps': 7826, 'loss/train': 2.5948143005371094} +02/24/2022 11:42:15 - INFO - codeparrot_training - Step 7827: {'lr': 0.00048203831848084115, 'samples': 4007936, 'steps': 7827, 'loss/train': 3.128384590148926} +02/24/2022 11:42:19 - INFO - codeparrot_training - Step 7828: {'lr': 0.0004820322279050495, 'samples': 4008448, 'steps': 7828, 'loss/train': 1.2974281311035156} +02/24/2022 11:42:24 - INFO - codeparrot_training - Step 7829: {'lr': 0.00048202613633530555, 'samples': 4008960, 'steps': 7829, 'loss/train': 2.6432459354400635} +02/24/2022 11:42:28 - INFO - codeparrot_training - Step 7830: {'lr': 0.00048202004377163524, 'samples': 4009472, 'steps': 7830, 'loss/train': 2.479123592376709} +02/24/2022 11:42:33 - INFO - codeparrot_training - Step 7831: {'lr': 0.00048201395021406476, 'samples': 4009984, 'steps': 7831, 'loss/train': 1.8705668449401855} +02/24/2022 11:42:37 - INFO - codeparrot_training - Step 7832: {'lr': 0.0004820078556626202, 'samples': 4010496, 'steps': 7832, 'loss/train': 1.2155200242996216} +02/24/2022 11:42:42 - INFO - codeparrot_training - Step 7833: {'lr': 0.0004820017601173276, 'samples': 4011008, 'steps': 7833, 'loss/train': 2.29663348197937} +02/24/2022 11:42:46 - INFO - codeparrot_training - Step 7834: {'lr': 0.00048199566357821314, 'samples': 4011520, 'steps': 7834, 'loss/train': 2.2540931701660156} +02/24/2022 11:42:51 - INFO - codeparrot_training - Step 7835: {'lr': 0.00048198956604530297, 'samples': 4012032, 'steps': 7835, 'loss/train': 2.5729522705078125} +02/24/2022 11:42:55 - INFO - codeparrot_training - Step 7836: {'lr': 0.0004819834675186231, 'samples': 4012544, 'steps': 7836, 'loss/train': 2.475855827331543} +02/24/2022 11:43:00 - INFO - codeparrot_training - Step 7837: {'lr': 0.0004819773679981998, 'samples': 4013056, 'steps': 7837, 'loss/train': 2.0142223834991455} +02/24/2022 11:43:04 - INFO - codeparrot_training - Step 7838: {'lr': 0.0004819712674840591, 'samples': 4013568, 'steps': 7838, 'loss/train': 1.6875241994857788} +02/24/2022 11:43:10 - INFO - codeparrot_training - Step 7839: {'lr': 0.00048196516597622706, 'samples': 4014080, 'steps': 7839, 'loss/train': 2.1893794536590576} +02/24/2022 11:43:13 - INFO - codeparrot_training - Step 7840: {'lr': 0.00048195906347473, 'samples': 4014592, 'steps': 7840, 'loss/train': 1.9948806762695312} +02/24/2022 11:43:19 - INFO - codeparrot_training - Step 7841: {'lr': 0.00048195295997959393, 'samples': 4015104, 'steps': 7841, 'loss/train': 2.6629714965820312} +02/24/2022 11:43:22 - INFO - codeparrot_training - Step 7842: {'lr': 0.00048194685549084507, 'samples': 4015616, 'steps': 7842, 'loss/train': 2.457061290740967} +02/24/2022 11:43:28 - INFO - codeparrot_training - Step 7843: {'lr': 0.00048194075000850944, 'samples': 4016128, 'steps': 7843, 'loss/train': 2.4506239891052246} +02/24/2022 11:43:31 - INFO - codeparrot_training - Step 7844: {'lr': 0.0004819346435326134, 'samples': 4016640, 'steps': 7844, 'loss/train': 2.20335054397583} +02/24/2022 11:43:37 - INFO - codeparrot_training - Step 7845: {'lr': 0.000481928536063183, 'samples': 4017152, 'steps': 7845, 'loss/train': 1.9625605344772339} +02/24/2022 11:43:40 - INFO - codeparrot_training - Step 7846: {'lr': 0.0004819224276002443, 'samples': 4017664, 'steps': 7846, 'loss/train': 2.8505752086639404} +02/24/2022 11:43:46 - INFO - codeparrot_training - Step 7847: {'lr': 0.0004819163181438236, 'samples': 4018176, 'steps': 7847, 'loss/train': 1.4816619157791138} +02/24/2022 11:43:49 - INFO - codeparrot_training - Step 7848: {'lr': 0.000481910207693947, 'samples': 4018688, 'steps': 7848, 'loss/train': 2.219057559967041} +02/24/2022 11:43:55 - INFO - codeparrot_training - Step 7849: {'lr': 0.0004819040962506408, 'samples': 4019200, 'steps': 7849, 'loss/train': 1.3804471492767334} +02/24/2022 11:43:58 - INFO - codeparrot_training - Step 7850: {'lr': 0.000481897983813931, 'samples': 4019712, 'steps': 7850, 'loss/train': 1.5484976768493652} +02/24/2022 11:44:04 - INFO - codeparrot_training - Step 7851: {'lr': 0.00048189187038384396, 'samples': 4020224, 'steps': 7851, 'loss/train': 3.0784573554992676} +02/24/2022 11:44:07 - INFO - codeparrot_training - Step 7852: {'lr': 0.00048188575596040575, 'samples': 4020736, 'steps': 7852, 'loss/train': 2.2429795265197754} +02/24/2022 11:44:14 - INFO - codeparrot_training - Step 7853: {'lr': 0.00048187964054364254, 'samples': 4021248, 'steps': 7853, 'loss/train': 2.7370266914367676} +02/24/2022 11:44:19 - INFO - codeparrot_training - Step 7854: {'lr': 0.0004818735241335807, 'samples': 4021760, 'steps': 7854, 'loss/train': 1.6324163675308228} +02/24/2022 11:44:23 - INFO - codeparrot_training - Step 7855: {'lr': 0.00048186740673024614, 'samples': 4022272, 'steps': 7855, 'loss/train': 1.7768681049346924} +02/24/2022 11:44:28 - INFO - codeparrot_training - Step 7856: {'lr': 0.00048186128833366536, 'samples': 4022784, 'steps': 7856, 'loss/train': 2.867032289505005} +02/24/2022 11:44:31 - INFO - codeparrot_training - Step 7857: {'lr': 0.0004818551689438644, 'samples': 4023296, 'steps': 7857, 'loss/train': 2.4767074584960938} +02/24/2022 11:44:37 - INFO - codeparrot_training - Step 7858: {'lr': 0.00048184904856086953, 'samples': 4023808, 'steps': 7858, 'loss/train': 2.5349812507629395} +02/24/2022 11:44:40 - INFO - codeparrot_training - Step 7859: {'lr': 0.0004818429271847069, 'samples': 4024320, 'steps': 7859, 'loss/train': 2.389404058456421} +02/24/2022 11:44:46 - INFO - codeparrot_training - Step 7860: {'lr': 0.00048183680481540293, 'samples': 4024832, 'steps': 7860, 'loss/train': 2.769411087036133} +02/24/2022 11:44:49 - INFO - codeparrot_training - Step 7861: {'lr': 0.0004818306814529836, 'samples': 4025344, 'steps': 7861, 'loss/train': 2.349100112915039} +02/24/2022 11:44:55 - INFO - codeparrot_training - Step 7862: {'lr': 0.00048182455709747525, 'samples': 4025856, 'steps': 7862, 'loss/train': 2.544666051864624} +02/24/2022 11:44:58 - INFO - codeparrot_training - Step 7863: {'lr': 0.0004818184317489041, 'samples': 4026368, 'steps': 7863, 'loss/train': 2.097475290298462} +02/24/2022 11:45:04 - INFO - codeparrot_training - Step 7864: {'lr': 0.00048181230540729643, 'samples': 4026880, 'steps': 7864, 'loss/train': 1.2829848527908325} +02/24/2022 11:45:08 - INFO - codeparrot_training - Step 7865: {'lr': 0.00048180617807267844, 'samples': 4027392, 'steps': 7865, 'loss/train': 3.1394948959350586} +02/24/2022 11:45:13 - INFO - codeparrot_training - Step 7866: {'lr': 0.0004818000497450764, 'samples': 4027904, 'steps': 7866, 'loss/train': 1.9057754278182983} +02/24/2022 11:45:17 - INFO - codeparrot_training - Step 7867: {'lr': 0.00048179392042451655, 'samples': 4028416, 'steps': 7867, 'loss/train': 1.6702289581298828} +02/24/2022 11:45:22 - INFO - codeparrot_training - Step 7868: {'lr': 0.0004817877901110251, 'samples': 4028928, 'steps': 7868, 'loss/train': 2.5258617401123047} +02/24/2022 11:45:26 - INFO - codeparrot_training - Step 7869: {'lr': 0.00048178165880462845, 'samples': 4029440, 'steps': 7869, 'loss/train': 2.1727590560913086} +02/24/2022 11:45:31 - INFO - codeparrot_training - Step 7870: {'lr': 0.0004817755265053527, 'samples': 4029952, 'steps': 7870, 'loss/train': 2.1347312927246094} +02/24/2022 11:45:35 - INFO - codeparrot_training - Step 7871: {'lr': 0.0004817693932132242, 'samples': 4030464, 'steps': 7871, 'loss/train': 1.851364254951477} +02/24/2022 11:45:40 - INFO - codeparrot_training - Step 7872: {'lr': 0.0004817632589282693, 'samples': 4030976, 'steps': 7872, 'loss/train': 1.3407553434371948} +02/24/2022 11:45:44 - INFO - codeparrot_training - Step 7873: {'lr': 0.00048175712365051407, 'samples': 4031488, 'steps': 7873, 'loss/train': 2.8045785427093506} +02/24/2022 11:45:50 - INFO - codeparrot_training - Step 7874: {'lr': 0.00048175098737998504, 'samples': 4032000, 'steps': 7874, 'loss/train': 2.6032423973083496} +02/24/2022 11:45:53 - INFO - codeparrot_training - Step 7875: {'lr': 0.0004817448501167082, 'samples': 4032512, 'steps': 7875, 'loss/train': 1.6382709741592407} +02/24/2022 11:45:59 - INFO - codeparrot_training - Step 7876: {'lr': 0.0004817387118607102, 'samples': 4033024, 'steps': 7876, 'loss/train': 1.7451155185699463} +02/24/2022 11:46:02 - INFO - codeparrot_training - Step 7877: {'lr': 0.00048173257261201695, 'samples': 4033536, 'steps': 7877, 'loss/train': 1.59663724899292} +02/24/2022 11:46:08 - INFO - codeparrot_training - Step 7878: {'lr': 0.00048172643237065504, 'samples': 4034048, 'steps': 7878, 'loss/train': 1.880545973777771} +02/24/2022 11:46:11 - INFO - codeparrot_training - Step 7879: {'lr': 0.00048172029113665075, 'samples': 4034560, 'steps': 7879, 'loss/train': 2.4292068481445312} +02/24/2022 11:46:17 - INFO - codeparrot_training - Step 7880: {'lr': 0.0004817141489100302, 'samples': 4035072, 'steps': 7880, 'loss/train': 2.0743541717529297} +02/24/2022 11:46:20 - INFO - codeparrot_training - Step 7881: {'lr': 0.00048170800569081985, 'samples': 4035584, 'steps': 7881, 'loss/train': 2.9938931465148926} +02/24/2022 11:46:26 - INFO - codeparrot_training - Step 7882: {'lr': 0.000481701861479046, 'samples': 4036096, 'steps': 7882, 'loss/train': 2.032970428466797} +02/24/2022 11:46:29 - INFO - codeparrot_training - Step 7883: {'lr': 0.000481695716274735, 'samples': 4036608, 'steps': 7883, 'loss/train': 1.8447604179382324} +02/24/2022 11:46:36 - INFO - codeparrot_training - Step 7884: {'lr': 0.000481689570077913, 'samples': 4037120, 'steps': 7884, 'loss/train': 2.6717259883880615} +02/24/2022 11:46:39 - INFO - codeparrot_training - Step 7885: {'lr': 0.00048168342288860646, 'samples': 4037632, 'steps': 7885, 'loss/train': 2.673819065093994} +02/24/2022 11:46:44 - INFO - codeparrot_training - Step 7886: {'lr': 0.00048167727470684176, 'samples': 4038144, 'steps': 7886, 'loss/train': 1.8334239721298218} +02/24/2022 11:46:48 - INFO - codeparrot_training - Step 7887: {'lr': 0.0004816711255326452, 'samples': 4038656, 'steps': 7887, 'loss/train': 2.5789427757263184} +02/24/2022 11:46:53 - INFO - codeparrot_training - Step 7888: {'lr': 0.00048166497536604306, 'samples': 4039168, 'steps': 7888, 'loss/train': 2.0291426181793213} +02/24/2022 11:46:57 - INFO - codeparrot_training - Step 7889: {'lr': 0.00048165882420706175, 'samples': 4039680, 'steps': 7889, 'loss/train': 2.59883975982666} +02/24/2022 11:47:02 - INFO - codeparrot_training - Step 7890: {'lr': 0.0004816526720557276, 'samples': 4040192, 'steps': 7890, 'loss/train': 0.48059460520744324} +02/24/2022 11:47:06 - INFO - codeparrot_training - Step 7891: {'lr': 0.0004816465189120669, 'samples': 4040704, 'steps': 7891, 'loss/train': 2.426546573638916} +02/24/2022 11:47:12 - INFO - codeparrot_training - Step 7892: {'lr': 0.00048164036477610616, 'samples': 4041216, 'steps': 7892, 'loss/train': 2.4815969467163086} +02/24/2022 11:47:15 - INFO - codeparrot_training - Step 7893: {'lr': 0.0004816342096478716, 'samples': 4041728, 'steps': 7893, 'loss/train': 2.3024590015411377} +02/24/2022 11:47:21 - INFO - codeparrot_training - Step 7894: {'lr': 0.00048162805352738966, 'samples': 4042240, 'steps': 7894, 'loss/train': 2.7481632232666016} +02/24/2022 11:47:24 - INFO - codeparrot_training - Step 7895: {'lr': 0.0004816218964146867, 'samples': 4042752, 'steps': 7895, 'loss/train': 2.0822837352752686} +02/24/2022 11:47:30 - INFO - codeparrot_training - Step 7896: {'lr': 0.000481615738309789, 'samples': 4043264, 'steps': 7896, 'loss/train': 2.5999977588653564} +02/24/2022 11:47:33 - INFO - codeparrot_training - Step 7897: {'lr': 0.00048160957921272306, 'samples': 4043776, 'steps': 7897, 'loss/train': 2.230018377304077} +02/24/2022 11:47:39 - INFO - codeparrot_training - Step 7898: {'lr': 0.00048160341912351523, 'samples': 4044288, 'steps': 7898, 'loss/train': 1.790880799293518} +02/24/2022 11:47:45 - INFO - codeparrot_training - Step 7899: {'lr': 0.00048159725804219195, 'samples': 4044800, 'steps': 7899, 'loss/train': 1.1448894739151} +02/24/2022 11:47:49 - INFO - codeparrot_training - Step 7900: {'lr': 0.00048159109596877954, 'samples': 4045312, 'steps': 7900, 'loss/train': 2.8563191890716553} +02/24/2022 11:47:54 - INFO - codeparrot_training - Step 7901: {'lr': 0.00048158493290330443, 'samples': 4045824, 'steps': 7901, 'loss/train': 2.86163592338562} +02/24/2022 11:47:58 - INFO - codeparrot_training - Step 7902: {'lr': 0.00048157876884579294, 'samples': 4046336, 'steps': 7902, 'loss/train': 1.7894618511199951} +02/24/2022 11:48:03 - INFO - codeparrot_training - Step 7903: {'lr': 0.00048157260379627154, 'samples': 4046848, 'steps': 7903, 'loss/train': 1.9722492694854736} +02/24/2022 11:48:07 - INFO - codeparrot_training - Step 7904: {'lr': 0.0004815664377547667, 'samples': 4047360, 'steps': 7904, 'loss/train': 1.7103582620620728} +02/24/2022 11:48:12 - INFO - codeparrot_training - Step 7905: {'lr': 0.0004815602707213047, 'samples': 4047872, 'steps': 7905, 'loss/train': 1.7719565629959106} +02/24/2022 11:48:16 - INFO - codeparrot_training - Step 7906: {'lr': 0.00048155410269591203, 'samples': 4048384, 'steps': 7906, 'loss/train': 2.8015847206115723} +02/24/2022 11:48:21 - INFO - codeparrot_training - Step 7907: {'lr': 0.00048154793367861514, 'samples': 4048896, 'steps': 7907, 'loss/train': 1.719612717628479} +02/24/2022 11:48:25 - INFO - codeparrot_training - Step 7908: {'lr': 0.00048154176366944045, 'samples': 4049408, 'steps': 7908, 'loss/train': 3.0167155265808105} +02/24/2022 11:48:31 - INFO - codeparrot_training - Step 7909: {'lr': 0.0004815355926684144, 'samples': 4049920, 'steps': 7909, 'loss/train': 2.863046169281006} +02/24/2022 11:48:35 - INFO - codeparrot_training - Step 7910: {'lr': 0.0004815294206755633, 'samples': 4050432, 'steps': 7910, 'loss/train': 1.4275684356689453} +02/24/2022 11:48:40 - INFO - codeparrot_training - Step 7911: {'lr': 0.0004815232476909137, 'samples': 4050944, 'steps': 7911, 'loss/train': 2.777683973312378} +02/24/2022 11:48:43 - INFO - codeparrot_training - Step 7912: {'lr': 0.00048151707371449213, 'samples': 4051456, 'steps': 7912, 'loss/train': 2.2816970348358154} +02/24/2022 11:48:49 - INFO - codeparrot_training - Step 7913: {'lr': 0.0004815108987463248, 'samples': 4051968, 'steps': 7913, 'loss/train': 2.0520927906036377} +02/24/2022 11:48:53 - INFO - codeparrot_training - Step 7914: {'lr': 0.00048150472278643834, 'samples': 4052480, 'steps': 7914, 'loss/train': 2.3371365070343018} +02/24/2022 11:48:58 - INFO - codeparrot_training - Step 7915: {'lr': 0.0004814985458348592, 'samples': 4052992, 'steps': 7915, 'loss/train': 1.3985146284103394} +02/24/2022 11:49:02 - INFO - codeparrot_training - Step 7916: {'lr': 0.00048149236789161374, 'samples': 4053504, 'steps': 7916, 'loss/train': 2.6492886543273926} +02/24/2022 11:49:08 - INFO - codeparrot_training - Step 7917: {'lr': 0.00048148618895672846, 'samples': 4054016, 'steps': 7917, 'loss/train': 1.949591040611267} +02/24/2022 11:49:11 - INFO - codeparrot_training - Step 7918: {'lr': 0.0004814800090302299, 'samples': 4054528, 'steps': 7918, 'loss/train': 1.3352735042572021} +02/24/2022 11:49:17 - INFO - codeparrot_training - Step 7919: {'lr': 0.00048147382811214445, 'samples': 4055040, 'steps': 7919, 'loss/train': 2.5388824939727783} +02/24/2022 11:49:20 - INFO - codeparrot_training - Step 7920: {'lr': 0.0004814676462024987, 'samples': 4055552, 'steps': 7920, 'loss/train': 0.9629352688789368} +02/24/2022 11:49:26 - INFO - codeparrot_training - Step 7921: {'lr': 0.000481461463301319, 'samples': 4056064, 'steps': 7921, 'loss/train': 2.6115283966064453} +02/24/2022 11:49:29 - INFO - codeparrot_training - Step 7922: {'lr': 0.00048145527940863186, 'samples': 4056576, 'steps': 7922, 'loss/train': 1.4665732383728027} +02/24/2022 11:49:35 - INFO - codeparrot_training - Step 7923: {'lr': 0.00048144909452446384, 'samples': 4057088, 'steps': 7923, 'loss/train': 2.37501859664917} +02/24/2022 11:49:38 - INFO - codeparrot_training - Step 7924: {'lr': 0.00048144290864884145, 'samples': 4057600, 'steps': 7924, 'loss/train': 2.3301010131835938} +02/24/2022 11:49:44 - INFO - codeparrot_training - Step 7925: {'lr': 0.000481436721781791, 'samples': 4058112, 'steps': 7925, 'loss/train': 1.3217674493789673} +02/24/2022 11:49:48 - INFO - codeparrot_training - Step 7926: {'lr': 0.00048143053392333917, 'samples': 4058624, 'steps': 7926, 'loss/train': 2.278921127319336} +02/24/2022 11:49:53 - INFO - codeparrot_training - Step 7927: {'lr': 0.00048142434507351245, 'samples': 4059136, 'steps': 7927, 'loss/train': 2.2163450717926025} +02/24/2022 11:49:57 - INFO - codeparrot_training - Step 7928: {'lr': 0.00048141815523233735, 'samples': 4059648, 'steps': 7928, 'loss/train': 2.4262280464172363} +02/24/2022 11:50:03 - INFO - codeparrot_training - Step 7929: {'lr': 0.00048141196439984026, 'samples': 4060160, 'steps': 7929, 'loss/train': 3.123971462249756} +02/24/2022 11:50:06 - INFO - codeparrot_training - Step 7930: {'lr': 0.0004814057725760479, 'samples': 4060672, 'steps': 7930, 'loss/train': 2.2646522521972656} +02/24/2022 11:50:12 - INFO - codeparrot_training - Step 7931: {'lr': 0.0004813995797609866, 'samples': 4061184, 'steps': 7931, 'loss/train': 3.0059664249420166} +02/24/2022 11:50:15 - INFO - codeparrot_training - Step 7932: {'lr': 0.000481393385954683, 'samples': 4061696, 'steps': 7932, 'loss/train': 0.17604969441890717} +02/24/2022 11:50:21 - INFO - codeparrot_training - Step 7933: {'lr': 0.00048138719115716367, 'samples': 4062208, 'steps': 7933, 'loss/train': 2.906609058380127} +02/24/2022 11:50:24 - INFO - codeparrot_training - Step 7934: {'lr': 0.00048138099536845503, 'samples': 4062720, 'steps': 7934, 'loss/train': 2.1302058696746826} +02/24/2022 11:50:30 - INFO - codeparrot_training - Step 7935: {'lr': 0.0004813747985885837, 'samples': 4063232, 'steps': 7935, 'loss/train': 2.5546796321868896} +02/24/2022 11:50:33 - INFO - codeparrot_training - Step 7936: {'lr': 0.00048136860081757617, 'samples': 4063744, 'steps': 7936, 'loss/train': 2.7443885803222656} +02/24/2022 11:50:39 - INFO - codeparrot_training - Step 7937: {'lr': 0.00048136240205545907, 'samples': 4064256, 'steps': 7937, 'loss/train': 2.2444028854370117} +02/24/2022 11:50:43 - INFO - codeparrot_training - Step 7938: {'lr': 0.0004813562023022588, 'samples': 4064768, 'steps': 7938, 'loss/train': 1.5303294658660889} +02/24/2022 11:50:48 - INFO - codeparrot_training - Step 7939: {'lr': 0.00048135000155800217, 'samples': 4065280, 'steps': 7939, 'loss/train': 2.5860888957977295} +02/24/2022 11:50:52 - INFO - codeparrot_training - Step 7940: {'lr': 0.0004813437998227155, 'samples': 4065792, 'steps': 7940, 'loss/train': 2.6325581073760986} +02/24/2022 11:50:57 - INFO - codeparrot_training - Step 7941: {'lr': 0.00048133759709642556, 'samples': 4066304, 'steps': 7941, 'loss/train': 2.1561849117279053} +02/24/2022 11:51:01 - INFO - codeparrot_training - Step 7942: {'lr': 0.00048133139337915866, 'samples': 4066816, 'steps': 7942, 'loss/train': 1.200860619544983} +02/24/2022 11:51:06 - INFO - codeparrot_training - Step 7943: {'lr': 0.00048132518867094167, 'samples': 4067328, 'steps': 7943, 'loss/train': 2.813310146331787} +02/24/2022 11:51:10 - INFO - codeparrot_training - Step 7944: {'lr': 0.00048131898297180085, 'samples': 4067840, 'steps': 7944, 'loss/train': 1.2833945751190186} +02/24/2022 11:51:16 - INFO - codeparrot_training - Step 7945: {'lr': 0.0004813127762817631, 'samples': 4068352, 'steps': 7945, 'loss/train': 2.419658899307251} +02/24/2022 11:51:20 - INFO - codeparrot_training - Step 7946: {'lr': 0.00048130656860085485, 'samples': 4068864, 'steps': 7946, 'loss/train': 1.3830996751785278} +02/24/2022 11:51:25 - INFO - codeparrot_training - Step 7947: {'lr': 0.0004813003599291027, 'samples': 4069376, 'steps': 7947, 'loss/train': 2.92842435836792} +02/24/2022 11:51:29 - INFO - codeparrot_training - Step 7948: {'lr': 0.0004812941502665332, 'samples': 4069888, 'steps': 7948, 'loss/train': 2.1732382774353027} +02/24/2022 11:51:34 - INFO - codeparrot_training - Step 7949: {'lr': 0.0004812879396131731, 'samples': 4070400, 'steps': 7949, 'loss/train': 1.84319269657135} +02/24/2022 11:51:38 - INFO - codeparrot_training - Step 7950: {'lr': 0.0004812817279690488, 'samples': 4070912, 'steps': 7950, 'loss/train': 2.645608425140381} +02/24/2022 11:51:43 - INFO - codeparrot_training - Step 7951: {'lr': 0.00048127551533418714, 'samples': 4071424, 'steps': 7951, 'loss/train': 2.622865676879883} +02/24/2022 11:51:47 - INFO - codeparrot_training - Step 7952: {'lr': 0.0004812693017086145, 'samples': 4071936, 'steps': 7952, 'loss/train': 2.6795732975006104} +02/24/2022 11:51:53 - INFO - codeparrot_training - Step 7953: {'lr': 0.0004812630870923577, 'samples': 4072448, 'steps': 7953, 'loss/train': 2.2243003845214844} +02/24/2022 11:51:56 - INFO - codeparrot_training - Step 7954: {'lr': 0.00048125687148544316, 'samples': 4072960, 'steps': 7954, 'loss/train': 2.7206146717071533} +02/24/2022 11:52:00 - INFO - codeparrot_training - Step 7955: {'lr': 0.0004812506548878977, 'samples': 4073472, 'steps': 7955, 'loss/train': 0.573676347732544} +02/24/2022 11:52:06 - INFO - codeparrot_training - Step 7956: {'lr': 0.0004812444372997479, 'samples': 4073984, 'steps': 7956, 'loss/train': 1.9819824695587158} +02/24/2022 11:52:09 - INFO - codeparrot_training - Step 7957: {'lr': 0.00048123821872102023, 'samples': 4074496, 'steps': 7957, 'loss/train': 1.3413965702056885} +02/24/2022 11:52:15 - INFO - codeparrot_training - Step 7958: {'lr': 0.00048123199915174153, 'samples': 4075008, 'steps': 7958, 'loss/train': 2.1608805656433105} +02/24/2022 11:52:21 - INFO - codeparrot_training - Step 7959: {'lr': 0.0004812257785919384, 'samples': 4075520, 'steps': 7959, 'loss/train': 1.45054292678833} +02/24/2022 11:52:24 - INFO - codeparrot_training - Step 7960: {'lr': 0.00048121955704163744, 'samples': 4076032, 'steps': 7960, 'loss/train': 1.766263484954834} +02/24/2022 11:52:30 - INFO - codeparrot_training - Step 7961: {'lr': 0.00048121333450086524, 'samples': 4076544, 'steps': 7961, 'loss/train': 0.1166217252612114} +02/24/2022 11:52:33 - INFO - codeparrot_training - Step 7962: {'lr': 0.00048120711096964866, 'samples': 4077056, 'steps': 7962, 'loss/train': 2.2934818267822266} +02/24/2022 11:52:39 - INFO - codeparrot_training - Step 7963: {'lr': 0.0004812008864480142, 'samples': 4077568, 'steps': 7963, 'loss/train': 3.1890931129455566} +02/24/2022 11:52:43 - INFO - codeparrot_training - Step 7964: {'lr': 0.0004811946609359885, 'samples': 4078080, 'steps': 7964, 'loss/train': 2.035816192626953} +02/24/2022 11:52:46 - INFO - codeparrot_training - Step 7965: {'lr': 0.00048118843443359827, 'samples': 4078592, 'steps': 7965, 'loss/train': 0.8988781571388245} +02/24/2022 11:52:52 - INFO - codeparrot_training - Step 7966: {'lr': 0.00048118220694087023, 'samples': 4079104, 'steps': 7966, 'loss/train': 2.727423906326294} +02/24/2022 11:52:56 - INFO - codeparrot_training - Step 7967: {'lr': 0.00048117597845783106, 'samples': 4079616, 'steps': 7967, 'loss/train': 2.156682014465332} +02/24/2022 11:53:01 - INFO - codeparrot_training - Step 7968: {'lr': 0.0004811697489845074, 'samples': 4080128, 'steps': 7968, 'loss/train': 1.8921197652816772} +02/24/2022 11:53:05 - INFO - codeparrot_training - Step 7969: {'lr': 0.0004811635185209259, 'samples': 4080640, 'steps': 7969, 'loss/train': 2.3413808345794678} +02/24/2022 11:53:10 - INFO - codeparrot_training - Step 7970: {'lr': 0.0004811572870671133, 'samples': 4081152, 'steps': 7970, 'loss/train': 2.5551137924194336} +02/24/2022 11:53:14 - INFO - codeparrot_training - Step 7971: {'lr': 0.0004811510546230963, 'samples': 4081664, 'steps': 7971, 'loss/train': 2.3741261959075928} +02/24/2022 11:53:19 - INFO - codeparrot_training - Step 7972: {'lr': 0.0004811448211889016, 'samples': 4082176, 'steps': 7972, 'loss/train': 2.4906249046325684} +02/24/2022 11:53:25 - INFO - codeparrot_training - Step 7973: {'lr': 0.0004811385867645558, 'samples': 4082688, 'steps': 7973, 'loss/train': 2.2428839206695557} +02/24/2022 11:53:28 - INFO - codeparrot_training - Step 7974: {'lr': 0.00048113235135008574, 'samples': 4083200, 'steps': 7974, 'loss/train': 2.148123025894165} +02/24/2022 11:53:34 - INFO - codeparrot_training - Step 7975: {'lr': 0.0004811261149455181, 'samples': 4083712, 'steps': 7975, 'loss/train': 2.495208501815796} +02/24/2022 11:53:38 - INFO - codeparrot_training - Step 7976: {'lr': 0.0004811198775508796, 'samples': 4084224, 'steps': 7976, 'loss/train': 2.1607701778411865} +02/24/2022 11:53:41 - INFO - codeparrot_training - Step 7977: {'lr': 0.0004811136391661969, 'samples': 4084736, 'steps': 7977, 'loss/train': 2.479353904724121} +02/24/2022 11:53:47 - INFO - codeparrot_training - Step 7978: {'lr': 0.0004811073997914967, 'samples': 4085248, 'steps': 7978, 'loss/train': 2.566638469696045} +02/24/2022 11:53:52 - INFO - codeparrot_training - Step 7979: {'lr': 0.00048110115942680585, 'samples': 4085760, 'steps': 7979, 'loss/train': 2.3170058727264404} +02/24/2022 11:53:56 - INFO - codeparrot_training - Step 7980: {'lr': 0.000481094918072151, 'samples': 4086272, 'steps': 7980, 'loss/train': 1.4209965467453003} +02/24/2022 11:54:01 - INFO - codeparrot_training - Step 7981: {'lr': 0.0004810886757275589, 'samples': 4086784, 'steps': 7981, 'loss/train': 2.3535349369049072} +02/24/2022 11:54:05 - INFO - codeparrot_training - Step 7982: {'lr': 0.0004810824323930563, 'samples': 4087296, 'steps': 7982, 'loss/train': 2.260396718978882} +02/24/2022 11:54:10 - INFO - codeparrot_training - Step 7983: {'lr': 0.00048107618806866994, 'samples': 4087808, 'steps': 7983, 'loss/train': 1.198960542678833} +02/24/2022 11:54:14 - INFO - codeparrot_training - Step 7984: {'lr': 0.0004810699427544265, 'samples': 4088320, 'steps': 7984, 'loss/train': 2.7593555450439453} +02/24/2022 11:54:19 - INFO - codeparrot_training - Step 7985: {'lr': 0.00048106369645035284, 'samples': 4088832, 'steps': 7985, 'loss/train': 1.948966383934021} +02/24/2022 11:54:23 - INFO - codeparrot_training - Step 7986: {'lr': 0.0004810574491564757, 'samples': 4089344, 'steps': 7986, 'loss/train': 3.0611252784729004} +02/24/2022 11:54:28 - INFO - codeparrot_training - Step 7987: {'lr': 0.0004810512008728218, 'samples': 4089856, 'steps': 7987, 'loss/train': 2.946183443069458} +02/24/2022 11:54:32 - INFO - codeparrot_training - Step 7988: {'lr': 0.00048104495159941794, 'samples': 4090368, 'steps': 7988, 'loss/train': 2.5046749114990234} +02/24/2022 11:54:37 - INFO - codeparrot_training - Step 7989: {'lr': 0.00048103870133629084, 'samples': 4090880, 'steps': 7989, 'loss/train': 1.8661895990371704} +02/24/2022 11:54:41 - INFO - codeparrot_training - Step 7990: {'lr': 0.00048103245008346735, 'samples': 4091392, 'steps': 7990, 'loss/train': 1.6969434022903442} +02/24/2022 11:54:47 - INFO - codeparrot_training - Step 7991: {'lr': 0.0004810261978409742, 'samples': 4091904, 'steps': 7991, 'loss/train': 2.6047844886779785} +02/24/2022 11:54:51 - INFO - codeparrot_training - Step 7992: {'lr': 0.00048101994460883815, 'samples': 4092416, 'steps': 7992, 'loss/train': 2.207082748413086} +02/24/2022 11:54:54 - INFO - codeparrot_training - Step 7993: {'lr': 0.00048101369038708596, 'samples': 4092928, 'steps': 7993, 'loss/train': 0.4118365943431854} +02/24/2022 11:55:00 - INFO - codeparrot_training - Step 7994: {'lr': 0.0004810074351757446, 'samples': 4093440, 'steps': 7994, 'loss/train': 2.19514536857605} +02/24/2022 11:55:03 - INFO - codeparrot_training - Step 7995: {'lr': 0.00048100117897484064, 'samples': 4093952, 'steps': 7995, 'loss/train': 2.972121238708496} +02/24/2022 11:55:09 - INFO - codeparrot_training - Step 7996: {'lr': 0.0004809949217844011, 'samples': 4094464, 'steps': 7996, 'loss/train': 2.363495349884033} +02/24/2022 11:55:12 - INFO - codeparrot_training - Step 7997: {'lr': 0.00048098866360445254, 'samples': 4094976, 'steps': 7997, 'loss/train': 1.8781957626342773} +02/24/2022 11:55:18 - INFO - codeparrot_training - Step 7998: {'lr': 0.00048098240443502195, 'samples': 4095488, 'steps': 7998, 'loss/train': 2.3064351081848145} +02/24/2022 11:55:21 - INFO - codeparrot_training - Step 7999: {'lr': 0.000480976144276136, 'samples': 4096000, 'steps': 7999, 'loss/train': 2.3386523723602295} +02/24/2022 11:55:21 - INFO - codeparrot_training - Evaluating and saving model checkpoint