diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -3319,3 +3319,1009 @@ Use FP16 precision: False 02/24/2022 05:27:15 - INFO - codeparrot_training - Step 2998: {'lr': 0.0004994668689847885, 'samples': 1535488, 'steps': 2998, 'loss/train': 3.5141983032226562} 02/24/2022 05:27:21 - INFO - codeparrot_training - Step 2999: {'lr': 0.0004994658004315358, 'samples': 1536000, 'steps': 2999, 'loss/train': 4.3175482749938965} 02/24/2022 05:27:21 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/24/2022 05:27:38 - WARNING - huggingface_hub.repository - Several commits (3) will be pushed upstream. +02/24/2022 05:27:38 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/24/2022 05:28:10 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + 0998bf2..292ce9a floral-grass-11 -> floral-grass-11 + +02/24/2022 05:28:15 - INFO - codeparrot_training - Step 3000: {'lr': 0.0004994647308096509, 'samples': 1536512, 'steps': 3000, 'loss/train': 2.8926448822021484} +02/24/2022 05:28:21 - INFO - codeparrot_training - Step 3001: {'lr': 0.0004994636601191383, 'samples': 1537024, 'steps': 3001, 'loss/train': 2.7182507514953613} +02/24/2022 05:28:25 - INFO - codeparrot_training - Step 3002: {'lr': 0.0004994625883600025, 'samples': 1537536, 'steps': 3002, 'loss/train': 2.4522995948791504} +02/24/2022 05:28:30 - INFO - codeparrot_training - Step 3003: {'lr': 0.0004994615155322483, 'samples': 1538048, 'steps': 3003, 'loss/train': 2.787111282348633} +02/24/2022 05:28:34 - INFO - codeparrot_training - Step 3004: {'lr': 0.0004994604416358801, 'samples': 1538560, 'steps': 3004, 'loss/train': 3.58676815032959} +02/24/2022 05:28:39 - INFO - codeparrot_training - Step 3005: {'lr': 0.0004994593666709027, 'samples': 1539072, 'steps': 3005, 'loss/train': 3.154186725616455} +02/24/2022 05:28:43 - INFO - codeparrot_training - Step 3006: {'lr': 0.0004994582906373205, 'samples': 1539584, 'steps': 3006, 'loss/train': 2.457848310470581} +02/24/2022 05:28:48 - INFO - codeparrot_training - Step 3007: {'lr': 0.0004994572135351382, 'samples': 1540096, 'steps': 3007, 'loss/train': 3.2163219451904297} +02/24/2022 05:28:52 - INFO - codeparrot_training - Step 3008: {'lr': 0.0004994561353643604, 'samples': 1540608, 'steps': 3008, 'loss/train': 3.254566192626953} +02/24/2022 05:28:57 - INFO - codeparrot_training - Step 3009: {'lr': 0.0004994550561249917, 'samples': 1541120, 'steps': 3009, 'loss/train': 2.7579524517059326} +02/24/2022 05:29:01 - INFO - codeparrot_training - Step 3010: {'lr': 0.0004994539758170367, 'samples': 1541632, 'steps': 3010, 'loss/train': 3.395554542541504} +02/24/2022 05:29:06 - INFO - codeparrot_training - Step 3011: {'lr': 0.0004994528944405002, 'samples': 1542144, 'steps': 3011, 'loss/train': 3.2159061431884766} +02/24/2022 05:29:10 - INFO - codeparrot_training - Step 3012: {'lr': 0.0004994518119953867, 'samples': 1542656, 'steps': 3012, 'loss/train': 3.997093677520752} +02/24/2022 05:29:16 - INFO - codeparrot_training - Step 3013: {'lr': 0.0004994507284817009, 'samples': 1543168, 'steps': 3013, 'loss/train': 2.1546213626861572} +02/24/2022 05:29:19 - INFO - codeparrot_training - Step 3014: {'lr': 0.0004994496438994472, 'samples': 1543680, 'steps': 3014, 'loss/train': 3.6459944248199463} +02/24/2022 05:29:25 - INFO - codeparrot_training - Step 3015: {'lr': 0.0004994485582486306, 'samples': 1544192, 'steps': 3015, 'loss/train': 2.3420541286468506} +02/24/2022 05:29:30 - INFO - codeparrot_training - Step 3016: {'lr': 0.0004994474715292555, 'samples': 1544704, 'steps': 3016, 'loss/train': 3.2091407775878906} +02/24/2022 05:29:34 - INFO - codeparrot_training - Step 3017: {'lr': 0.0004994463837413268, 'samples': 1545216, 'steps': 3017, 'loss/train': 3.6281821727752686} +02/24/2022 05:29:40 - INFO - codeparrot_training - Step 3018: {'lr': 0.0004994452948848488, 'samples': 1545728, 'steps': 3018, 'loss/train': 2.7767629623413086} +02/24/2022 05:29:43 - INFO - codeparrot_training - Step 3019: {'lr': 0.0004994442049598265, 'samples': 1546240, 'steps': 3019, 'loss/train': 1.3820394277572632} +02/24/2022 05:29:47 - INFO - codeparrot_training - Step 3020: {'lr': 0.0004994431139662643, 'samples': 1546752, 'steps': 3020, 'loss/train': 2.668639898300171} +02/24/2022 05:29:52 - INFO - codeparrot_training - Step 3021: {'lr': 0.0004994420219041671, 'samples': 1547264, 'steps': 3021, 'loss/train': 4.131133079528809} +02/24/2022 05:29:56 - INFO - codeparrot_training - Step 3022: {'lr': 0.0004994409287735394, 'samples': 1547776, 'steps': 3022, 'loss/train': 3.1065754890441895} +02/24/2022 05:30:01 - INFO - codeparrot_training - Step 3023: {'lr': 0.0004994398345743861, 'samples': 1548288, 'steps': 3023, 'loss/train': 2.1964523792266846} +02/24/2022 05:30:05 - INFO - codeparrot_training - Step 3024: {'lr': 0.0004994387393067117, 'samples': 1548800, 'steps': 3024, 'loss/train': 2.6514766216278076} +02/24/2022 05:30:10 - INFO - codeparrot_training - Step 3025: {'lr': 0.0004994376429705208, 'samples': 1549312, 'steps': 3025, 'loss/train': 3.2547452449798584} +02/24/2022 05:30:14 - INFO - codeparrot_training - Step 3026: {'lr': 0.0004994365455658185, 'samples': 1549824, 'steps': 3026, 'loss/train': 3.4843621253967285} +02/24/2022 05:30:19 - INFO - codeparrot_training - Step 3027: {'lr': 0.000499435447092609, 'samples': 1550336, 'steps': 3027, 'loss/train': 2.9100887775421143} +02/24/2022 05:30:22 - INFO - codeparrot_training - Step 3028: {'lr': 0.0004994343475508974, 'samples': 1550848, 'steps': 3028, 'loss/train': 4.048941612243652} +02/24/2022 05:30:29 - INFO - codeparrot_training - Step 3029: {'lr': 0.0004994332469406882, 'samples': 1551360, 'steps': 3029, 'loss/train': 4.630547523498535} +02/24/2022 05:30:32 - INFO - codeparrot_training - Step 3030: {'lr': 0.0004994321452619863, 'samples': 1551872, 'steps': 3030, 'loss/train': 9.189669609069824} +02/24/2022 05:30:38 - INFO - codeparrot_training - Step 3031: {'lr': 0.0004994310425147962, 'samples': 1552384, 'steps': 3031, 'loss/train': 1.415884017944336} +02/24/2022 05:30:41 - INFO - codeparrot_training - Step 3032: {'lr': 0.0004994299386991227, 'samples': 1552896, 'steps': 3032, 'loss/train': 3.1991424560546875} +02/24/2022 05:30:47 - INFO - codeparrot_training - Step 3033: {'lr': 0.0004994288338149705, 'samples': 1553408, 'steps': 3033, 'loss/train': 2.432361125946045} +02/24/2022 05:30:50 - INFO - codeparrot_training - Step 3034: {'lr': 0.0004994277278623445, 'samples': 1553920, 'steps': 3034, 'loss/train': 3.262948989868164} +02/24/2022 05:30:56 - INFO - codeparrot_training - Step 3035: {'lr': 0.0004994266208412493, 'samples': 1554432, 'steps': 3035, 'loss/train': 2.9390087127685547} +02/24/2022 05:30:59 - INFO - codeparrot_training - Step 3036: {'lr': 0.0004994255127516895, 'samples': 1554944, 'steps': 3036, 'loss/train': 4.075638771057129} +02/24/2022 05:31:05 - INFO - codeparrot_training - Step 3037: {'lr': 0.0004994244035936701, 'samples': 1555456, 'steps': 3037, 'loss/train': 3.095963716506958} +02/24/2022 05:31:08 - INFO - codeparrot_training - Step 3038: {'lr': 0.0004994232933671958, 'samples': 1555968, 'steps': 3038, 'loss/train': 2.6832947731018066} +02/24/2022 05:31:15 - INFO - codeparrot_training - Step 3039: {'lr': 0.0004994221820722713, 'samples': 1556480, 'steps': 3039, 'loss/train': 0.6859299540519714} +02/24/2022 05:31:18 - INFO - codeparrot_training - Step 3040: {'lr': 0.0004994210697089013, 'samples': 1556992, 'steps': 3040, 'loss/train': 2.2006866931915283} +02/24/2022 05:31:23 - INFO - codeparrot_training - Step 3041: {'lr': 0.0004994199562770907, 'samples': 1557504, 'steps': 3041, 'loss/train': 1.9316508769989014} +02/24/2022 05:31:27 - INFO - codeparrot_training - Step 3042: {'lr': 0.0004994188417768443, 'samples': 1558016, 'steps': 3042, 'loss/train': 2.3153460025787354} +02/24/2022 05:31:32 - INFO - codeparrot_training - Step 3043: {'lr': 0.0004994177262081666, 'samples': 1558528, 'steps': 3043, 'loss/train': 2.8302969932556152} +02/24/2022 05:31:38 - INFO - codeparrot_training - Step 3044: {'lr': 0.0004994166095710626, 'samples': 1559040, 'steps': 3044, 'loss/train': 2.9031593799591064} +02/24/2022 05:31:42 - INFO - codeparrot_training - Step 3045: {'lr': 0.0004994154918655371, 'samples': 1559552, 'steps': 3045, 'loss/train': 3.1518476009368896} +02/24/2022 05:31:47 - INFO - codeparrot_training - Step 3046: {'lr': 0.0004994143730915948, 'samples': 1560064, 'steps': 3046, 'loss/train': 3.062596082687378} +02/24/2022 05:31:50 - INFO - codeparrot_training - Step 3047: {'lr': 0.0004994132532492406, 'samples': 1560576, 'steps': 3047, 'loss/train': 3.987868309020996} +02/24/2022 05:31:57 - INFO - codeparrot_training - Step 3048: {'lr': 0.0004994121323384791, 'samples': 1561088, 'steps': 3048, 'loss/train': 3.3002355098724365} +02/24/2022 05:32:01 - INFO - codeparrot_training - Step 3049: {'lr': 0.0004994110103593154, 'samples': 1561600, 'steps': 3049, 'loss/train': 2.5775294303894043} +02/24/2022 05:32:06 - INFO - codeparrot_training - Step 3050: {'lr': 0.0004994098873117539, 'samples': 1562112, 'steps': 3050, 'loss/train': 2.7640812397003174} +02/24/2022 05:32:09 - INFO - codeparrot_training - Step 3051: {'lr': 0.0004994087631957998, 'samples': 1562624, 'steps': 3051, 'loss/train': 2.525949001312256} +02/24/2022 05:32:15 - INFO - codeparrot_training - Step 3052: {'lr': 0.0004994076380114577, 'samples': 1563136, 'steps': 3052, 'loss/train': 2.823159694671631} +02/24/2022 05:32:18 - INFO - codeparrot_training - Step 3053: {'lr': 0.0004994065117587325, 'samples': 1563648, 'steps': 3053, 'loss/train': 2.5650107860565186} +02/24/2022 05:32:24 - INFO - codeparrot_training - Step 3054: {'lr': 0.0004994053844376289, 'samples': 1564160, 'steps': 3054, 'loss/train': 2.197035312652588} +02/24/2022 05:32:28 - INFO - codeparrot_training - Step 3055: {'lr': 0.000499404256048152, 'samples': 1564672, 'steps': 3055, 'loss/train': 3.0703396797180176} +02/24/2022 05:32:33 - INFO - codeparrot_training - Step 3056: {'lr': 0.0004994031265903063, 'samples': 1565184, 'steps': 3056, 'loss/train': 2.5021157264709473} +02/24/2022 05:32:36 - INFO - codeparrot_training - Step 3057: {'lr': 0.0004994019960640969, 'samples': 1565696, 'steps': 3057, 'loss/train': 3.484729528427124} +02/24/2022 05:32:43 - INFO - codeparrot_training - Step 3058: {'lr': 0.0004994008644695285, 'samples': 1566208, 'steps': 3058, 'loss/train': 3.1117043495178223} +02/24/2022 05:32:46 - INFO - codeparrot_training - Step 3059: {'lr': 0.0004993997318066061, 'samples': 1566720, 'steps': 3059, 'loss/train': 3.684542417526245} +02/24/2022 05:32:52 - INFO - codeparrot_training - Step 3060: {'lr': 0.0004993985980753342, 'samples': 1567232, 'steps': 3060, 'loss/train': 2.172954797744751} +02/24/2022 05:32:55 - INFO - codeparrot_training - Step 3061: {'lr': 0.0004993974632757181, 'samples': 1567744, 'steps': 3061, 'loss/train': 2.8374783992767334} +02/24/2022 05:33:01 - INFO - codeparrot_training - Step 3062: {'lr': 0.0004993963274077624, 'samples': 1568256, 'steps': 3062, 'loss/train': 2.33644962310791} +02/24/2022 05:33:04 - INFO - codeparrot_training - Step 3063: {'lr': 0.000499395190471472, 'samples': 1568768, 'steps': 3063, 'loss/train': 3.3544886112213135} +02/24/2022 05:33:10 - INFO - codeparrot_training - Step 3064: {'lr': 0.0004993940524668518, 'samples': 1569280, 'steps': 3064, 'loss/train': 2.6159868240356445} +02/24/2022 05:33:13 - INFO - codeparrot_training - Step 3065: {'lr': 0.0004993929133939067, 'samples': 1569792, 'steps': 3065, 'loss/train': 4.779129981994629} +02/24/2022 05:33:19 - INFO - codeparrot_training - Step 3066: {'lr': 0.0004993917732526416, 'samples': 1570304, 'steps': 3066, 'loss/train': 2.871546506881714} +02/24/2022 05:33:22 - INFO - codeparrot_training - Step 3067: {'lr': 0.0004993906320430613, 'samples': 1570816, 'steps': 3067, 'loss/train': 1.2023429870605469} +02/24/2022 05:33:26 - INFO - codeparrot_training - Step 3068: {'lr': 0.0004993894897651706, 'samples': 1571328, 'steps': 3068, 'loss/train': 3.7668938636779785} +02/24/2022 05:33:32 - INFO - codeparrot_training - Step 3069: {'lr': 0.0004993883464189747, 'samples': 1571840, 'steps': 3069, 'loss/train': 2.192412853240967} +02/24/2022 05:33:35 - INFO - codeparrot_training - Step 3070: {'lr': 0.0004993872020044781, 'samples': 1572352, 'steps': 3070, 'loss/train': 3.0252087116241455} +02/24/2022 05:33:41 - INFO - codeparrot_training - Step 3071: {'lr': 0.0004993860565216861, 'samples': 1572864, 'steps': 3071, 'loss/train': 2.8106606006622314} +02/24/2022 05:33:44 - INFO - codeparrot_training - Step 3072: {'lr': 0.0004993849099706034, 'samples': 1573376, 'steps': 3072, 'loss/train': 3.3133106231689453} +02/24/2022 05:33:50 - INFO - codeparrot_training - Step 3073: {'lr': 0.0004993837623512349, 'samples': 1573888, 'steps': 3073, 'loss/train': 2.569164276123047} +02/24/2022 05:33:53 - INFO - codeparrot_training - Step 3074: {'lr': 0.0004993826136635856, 'samples': 1574400, 'steps': 3074, 'loss/train': 1.7295336723327637} +02/24/2022 05:33:59 - INFO - codeparrot_training - Step 3075: {'lr': 0.0004993814639076602, 'samples': 1574912, 'steps': 3075, 'loss/train': 4.119635581970215} +02/24/2022 05:34:03 - INFO - codeparrot_training - Step 3076: {'lr': 0.000499380313083464, 'samples': 1575424, 'steps': 3076, 'loss/train': 2.8600194454193115} +02/24/2022 05:34:08 - INFO - codeparrot_training - Step 3077: {'lr': 0.0004993791611910017, 'samples': 1575936, 'steps': 3077, 'loss/train': 3.1440136432647705} +02/24/2022 05:34:12 - INFO - codeparrot_training - Step 3078: {'lr': 0.0004993780082302782, 'samples': 1576448, 'steps': 3078, 'loss/train': 3.172365665435791} +02/24/2022 05:34:17 - INFO - codeparrot_training - Step 3079: {'lr': 0.0004993768542012985, 'samples': 1576960, 'steps': 3079, 'loss/train': 3.4987597465515137} +02/24/2022 05:34:23 - INFO - codeparrot_training - Step 3080: {'lr': 0.0004993756991040675, 'samples': 1577472, 'steps': 3080, 'loss/train': 1.0102487802505493} +02/24/2022 05:34:26 - INFO - codeparrot_training - Step 3081: {'lr': 0.0004993745429385903, 'samples': 1577984, 'steps': 3081, 'loss/train': 3.1577799320220947} +02/24/2022 05:34:32 - INFO - codeparrot_training - Step 3082: {'lr': 0.0004993733857048717, 'samples': 1578496, 'steps': 3082, 'loss/train': 3.864769697189331} +02/24/2022 05:34:35 - INFO - codeparrot_training - Step 3083: {'lr': 0.0004993722274029167, 'samples': 1579008, 'steps': 3083, 'loss/train': 3.4096155166625977} +02/24/2022 05:34:41 - INFO - codeparrot_training - Step 3084: {'lr': 0.0004993710680327301, 'samples': 1579520, 'steps': 3084, 'loss/train': 2.480449676513672} +02/24/2022 05:34:45 - INFO - codeparrot_training - Step 3085: {'lr': 0.0004993699075943172, 'samples': 1580032, 'steps': 3085, 'loss/train': 2.4581048488616943} +02/24/2022 05:34:50 - INFO - codeparrot_training - Step 3086: {'lr': 0.0004993687460876829, 'samples': 1580544, 'steps': 3086, 'loss/train': 4.506467819213867} +02/24/2022 05:34:54 - INFO - codeparrot_training - Step 3087: {'lr': 0.0004993675835128319, 'samples': 1581056, 'steps': 3087, 'loss/train': 3.1030325889587402} +02/24/2022 05:34:59 - INFO - codeparrot_training - Step 3088: {'lr': 0.0004993664198697694, 'samples': 1581568, 'steps': 3088, 'loss/train': 2.63893461227417} +02/24/2022 05:35:03 - INFO - codeparrot_training - Step 3089: {'lr': 0.0004993652551585003, 'samples': 1582080, 'steps': 3089, 'loss/train': 2.6485581398010254} +02/24/2022 05:35:09 - INFO - codeparrot_training - Step 3090: {'lr': 0.0004993640893790298, 'samples': 1582592, 'steps': 3090, 'loss/train': 2.240339994430542} +02/24/2022 05:35:12 - INFO - codeparrot_training - Step 3091: {'lr': 0.0004993629225313625, 'samples': 1583104, 'steps': 3091, 'loss/train': 8.609245300292969} +02/24/2022 05:35:15 - INFO - codeparrot_training - Step 3092: {'lr': 0.0004993617546155037, 'samples': 1583616, 'steps': 3092, 'loss/train': 2.914644479751587} +02/24/2022 05:35:21 - INFO - codeparrot_training - Step 3093: {'lr': 0.0004993605856314584, 'samples': 1584128, 'steps': 3093, 'loss/train': 3.2939345836639404} +02/24/2022 05:35:24 - INFO - codeparrot_training - Step 3094: {'lr': 0.0004993594155792315, 'samples': 1584640, 'steps': 3094, 'loss/train': 3.5357043743133545} +02/24/2022 05:35:31 - INFO - codeparrot_training - Step 3095: {'lr': 0.000499358244458828, 'samples': 1585152, 'steps': 3095, 'loss/train': 4.278213977813721} +02/24/2022 05:35:34 - INFO - codeparrot_training - Step 3096: {'lr': 0.0004993570722702529, 'samples': 1585664, 'steps': 3096, 'loss/train': 4.091033935546875} +02/24/2022 05:35:39 - INFO - codeparrot_training - Step 3097: {'lr': 0.0004993558990135115, 'samples': 1586176, 'steps': 3097, 'loss/train': 2.089538812637329} +02/24/2022 05:35:43 - INFO - codeparrot_training - Step 3098: {'lr': 0.0004993547246886084, 'samples': 1586688, 'steps': 3098, 'loss/train': 3.41424298286438} +02/24/2022 05:35:49 - INFO - codeparrot_training - Step 3099: {'lr': 0.0004993535492955488, 'samples': 1587200, 'steps': 3099, 'loss/train': 2.994297742843628} +02/24/2022 05:35:54 - INFO - codeparrot_training - Step 3100: {'lr': 0.000499352372834338, 'samples': 1587712, 'steps': 3100, 'loss/train': 2.984280824661255} +02/24/2022 05:35:58 - INFO - codeparrot_training - Step 3101: {'lr': 0.0004993511953049807, 'samples': 1588224, 'steps': 3101, 'loss/train': 1.667233943939209} +02/24/2022 05:36:01 - INFO - codeparrot_training - Step 3102: {'lr': 0.000499350016707482, 'samples': 1588736, 'steps': 3102, 'loss/train': 1.7583197355270386} +02/24/2022 05:36:07 - INFO - codeparrot_training - Step 3103: {'lr': 0.0004993488370418471, 'samples': 1589248, 'steps': 3103, 'loss/train': 2.6337673664093018} +02/24/2022 05:36:13 - INFO - codeparrot_training - Step 3104: {'lr': 0.0004993476563080809, 'samples': 1589760, 'steps': 3104, 'loss/train': 3.095831871032715} +02/24/2022 05:36:16 - INFO - codeparrot_training - Step 3105: {'lr': 0.0004993464745061885, 'samples': 1590272, 'steps': 3105, 'loss/train': 3.2371175289154053} +02/24/2022 05:36:22 - INFO - codeparrot_training - Step 3106: {'lr': 0.0004993452916361751, 'samples': 1590784, 'steps': 3106, 'loss/train': 1.9171984195709229} +02/24/2022 05:36:25 - INFO - codeparrot_training - Step 3107: {'lr': 0.0004993441076980455, 'samples': 1591296, 'steps': 3107, 'loss/train': 2.9600372314453125} +02/24/2022 05:36:31 - INFO - codeparrot_training - Step 3108: {'lr': 0.0004993429226918051, 'samples': 1591808, 'steps': 3108, 'loss/train': 3.627802610397339} +02/24/2022 05:36:34 - INFO - codeparrot_training - Step 3109: {'lr': 0.0004993417366174586, 'samples': 1592320, 'steps': 3109, 'loss/train': 9.288830757141113} +02/24/2022 05:36:38 - INFO - codeparrot_training - Step 3110: {'lr': 0.0004993405494750113, 'samples': 1592832, 'steps': 3110, 'loss/train': 3.908906936645508} +02/24/2022 05:36:43 - INFO - codeparrot_training - Step 3111: {'lr': 0.0004993393612644683, 'samples': 1593344, 'steps': 3111, 'loss/train': 1.6921014785766602} +02/24/2022 05:36:49 - INFO - codeparrot_training - Step 3112: {'lr': 0.0004993381719858347, 'samples': 1593856, 'steps': 3112, 'loss/train': 3.225355863571167} +02/24/2022 05:36:52 - INFO - codeparrot_training - Step 3113: {'lr': 0.0004993369816391156, 'samples': 1594368, 'steps': 3113, 'loss/train': 3.1029157638549805} +02/24/2022 05:36:58 - INFO - codeparrot_training - Step 3114: {'lr': 0.0004993357902243158, 'samples': 1594880, 'steps': 3114, 'loss/train': 1.9244272708892822} +02/24/2022 05:37:01 - INFO - codeparrot_training - Step 3115: {'lr': 0.0004993345977414408, 'samples': 1595392, 'steps': 3115, 'loss/train': 1.8763879537582397} +02/24/2022 05:37:07 - INFO - codeparrot_training - Step 3116: {'lr': 0.0004993334041904957, 'samples': 1595904, 'steps': 3116, 'loss/train': 1.612082839012146} +02/24/2022 05:37:11 - INFO - codeparrot_training - Step 3117: {'lr': 0.0004993322095714853, 'samples': 1596416, 'steps': 3117, 'loss/train': 3.9887824058532715} +02/24/2022 05:37:16 - INFO - codeparrot_training - Step 3118: {'lr': 0.0004993310138844149, 'samples': 1596928, 'steps': 3118, 'loss/train': 2.430988311767578} +02/24/2022 05:37:20 - INFO - codeparrot_training - Step 3119: {'lr': 0.0004993298171292896, 'samples': 1597440, 'steps': 3119, 'loss/train': 3.412546396255493} +02/24/2022 05:37:25 - INFO - codeparrot_training - Step 3120: {'lr': 0.0004993286193061145, 'samples': 1597952, 'steps': 3120, 'loss/train': 2.7344772815704346} +02/24/2022 05:37:29 - INFO - codeparrot_training - Step 3121: {'lr': 0.0004993274204148949, 'samples': 1598464, 'steps': 3121, 'loss/train': 1.9452003240585327} +02/24/2022 05:37:34 - INFO - codeparrot_training - Step 3122: {'lr': 0.0004993262204556356, 'samples': 1598976, 'steps': 3122, 'loss/train': 4.483450412750244} +02/24/2022 05:37:38 - INFO - codeparrot_training - Step 3123: {'lr': 0.0004993250194283421, 'samples': 1599488, 'steps': 3123, 'loss/train': 2.142258882522583} +02/24/2022 05:37:43 - INFO - codeparrot_training - Step 3124: {'lr': 0.0004993238173330194, 'samples': 1600000, 'steps': 3124, 'loss/train': 3.2038655281066895} +02/24/2022 05:37:47 - INFO - codeparrot_training - Step 3125: {'lr': 0.0004993226141696725, 'samples': 1600512, 'steps': 3125, 'loss/train': 3.7841668128967285} +02/24/2022 05:37:52 - INFO - codeparrot_training - Step 3126: {'lr': 0.0004993214099383069, 'samples': 1601024, 'steps': 3126, 'loss/train': 0.7654164433479309} +02/24/2022 05:37:56 - INFO - codeparrot_training - Step 3127: {'lr': 0.0004993202046389274, 'samples': 1601536, 'steps': 3127, 'loss/train': 2.519116163253784} +02/24/2022 05:38:01 - INFO - codeparrot_training - Step 3128: {'lr': 0.0004993189982715392, 'samples': 1602048, 'steps': 3128, 'loss/train': 1.8190758228302002} +02/24/2022 05:38:05 - INFO - codeparrot_training - Step 3129: {'lr': 0.0004993177908361479, 'samples': 1602560, 'steps': 3129, 'loss/train': 0.8677091002464294} +02/24/2022 05:38:11 - INFO - codeparrot_training - Step 3130: {'lr': 0.000499316582332758, 'samples': 1603072, 'steps': 3130, 'loss/train': 3.7022597789764404} +02/24/2022 05:38:14 - INFO - codeparrot_training - Step 3131: {'lr': 0.0004993153727613753, 'samples': 1603584, 'steps': 3131, 'loss/train': 2.8413612842559814} +02/24/2022 05:38:20 - INFO - codeparrot_training - Step 3132: {'lr': 0.0004993141621220046, 'samples': 1604096, 'steps': 3132, 'loss/train': 2.6280641555786133} +02/24/2022 05:38:23 - INFO - codeparrot_training - Step 3133: {'lr': 0.0004993129504146512, 'samples': 1604608, 'steps': 3133, 'loss/train': 3.023607015609741} +02/24/2022 05:38:29 - INFO - codeparrot_training - Step 3134: {'lr': 0.0004993117376393203, 'samples': 1605120, 'steps': 3134, 'loss/train': 2.760035991668701} +02/24/2022 05:38:32 - INFO - codeparrot_training - Step 3135: {'lr': 0.000499310523796017, 'samples': 1605632, 'steps': 3135, 'loss/train': 3.1640913486480713} +02/24/2022 05:38:38 - INFO - codeparrot_training - Step 3136: {'lr': 0.0004993093088847466, 'samples': 1606144, 'steps': 3136, 'loss/train': 2.2486836910247803} +02/24/2022 05:38:41 - INFO - codeparrot_training - Step 3137: {'lr': 0.0004993080929055144, 'samples': 1606656, 'steps': 3137, 'loss/train': 3.125383138656616} +02/24/2022 05:38:47 - INFO - codeparrot_training - Step 3138: {'lr': 0.0004993068758583254, 'samples': 1607168, 'steps': 3138, 'loss/train': 1.7867794036865234} +02/24/2022 05:38:50 - INFO - codeparrot_training - Step 3139: {'lr': 0.0004993056577431849, 'samples': 1607680, 'steps': 3139, 'loss/train': 3.286060094833374} +02/24/2022 05:38:56 - INFO - codeparrot_training - Step 3140: {'lr': 0.0004993044385600982, 'samples': 1608192, 'steps': 3140, 'loss/train': 4.156434535980225} +02/24/2022 05:38:59 - INFO - codeparrot_training - Step 3141: {'lr': 0.0004993032183090704, 'samples': 1608704, 'steps': 3141, 'loss/train': 2.3331122398376465} +02/24/2022 05:39:05 - INFO - codeparrot_training - Step 3142: {'lr': 0.0004993019969901069, 'samples': 1609216, 'steps': 3142, 'loss/train': 3.361048698425293} +02/24/2022 05:39:09 - INFO - codeparrot_training - Step 3143: {'lr': 0.0004993007746032126, 'samples': 1609728, 'steps': 3143, 'loss/train': 1.409009575843811} +02/24/2022 05:39:14 - INFO - codeparrot_training - Step 3144: {'lr': 0.000499299551148393, 'samples': 1610240, 'steps': 3144, 'loss/train': 4.10382080078125} +02/24/2022 05:39:18 - INFO - codeparrot_training - Step 3145: {'lr': 0.0004992983266256533, 'samples': 1610752, 'steps': 3145, 'loss/train': 1.9379990100860596} +02/24/2022 05:39:24 - INFO - codeparrot_training - Step 3146: {'lr': 0.0004992971010349987, 'samples': 1611264, 'steps': 3146, 'loss/train': 2.8312385082244873} +02/24/2022 05:39:27 - INFO - codeparrot_training - Step 3147: {'lr': 0.0004992958743764346, 'samples': 1611776, 'steps': 3147, 'loss/train': 2.8191604614257812} +02/24/2022 05:39:33 - INFO - codeparrot_training - Step 3148: {'lr': 0.0004992946466499661, 'samples': 1612288, 'steps': 3148, 'loss/train': 3.0731639862060547} +02/24/2022 05:39:36 - INFO - codeparrot_training - Step 3149: {'lr': 0.0004992934178555984, 'samples': 1612800, 'steps': 3149, 'loss/train': 1.7696489095687866} +02/24/2022 05:39:42 - INFO - codeparrot_training - Step 3150: {'lr': 0.000499292187993337, 'samples': 1613312, 'steps': 3150, 'loss/train': 3.066833019256592} +02/24/2022 05:39:46 - INFO - codeparrot_training - Step 3151: {'lr': 0.0004992909570631868, 'samples': 1613824, 'steps': 3151, 'loss/train': 3.392374038696289} +02/24/2022 05:39:51 - INFO - codeparrot_training - Step 3152: {'lr': 0.0004992897250651535, 'samples': 1614336, 'steps': 3152, 'loss/train': 0.44543054699897766} +02/24/2022 05:39:55 - INFO - codeparrot_training - Step 3153: {'lr': 0.0004992884919992421, 'samples': 1614848, 'steps': 3153, 'loss/train': 3.2773361206054688} +02/24/2022 05:40:00 - INFO - codeparrot_training - Step 3154: {'lr': 0.000499287257865458, 'samples': 1615360, 'steps': 3154, 'loss/train': 3.7567567825317383} +02/24/2022 05:40:04 - INFO - codeparrot_training - Step 3155: {'lr': 0.0004992860226638064, 'samples': 1615872, 'steps': 3155, 'loss/train': 2.750204563140869} +02/24/2022 05:40:09 - INFO - codeparrot_training - Step 3156: {'lr': 0.0004992847863942927, 'samples': 1616384, 'steps': 3156, 'loss/train': 1.7656526565551758} +02/24/2022 05:40:13 - INFO - codeparrot_training - Step 3157: {'lr': 0.000499283549056922, 'samples': 1616896, 'steps': 3157, 'loss/train': 3.4308788776397705} +02/24/2022 05:40:18 - INFO - codeparrot_training - Step 3158: {'lr': 0.0004992823106516999, 'samples': 1617408, 'steps': 3158, 'loss/train': 2.693948745727539} +02/24/2022 05:40:22 - INFO - codeparrot_training - Step 3159: {'lr': 0.0004992810711786314, 'samples': 1617920, 'steps': 3159, 'loss/train': 0.32078981399536133} +02/24/2022 05:40:27 - INFO - codeparrot_training - Step 3160: {'lr': 0.000499279830637722, 'samples': 1618432, 'steps': 3160, 'loss/train': 3.464844226837158} +02/24/2022 05:40:31 - INFO - codeparrot_training - Step 3161: {'lr': 0.000499278589028977, 'samples': 1618944, 'steps': 3161, 'loss/train': 3.604499578475952} +02/24/2022 05:40:36 - INFO - codeparrot_training - Step 3162: {'lr': 0.0004992773463524016, 'samples': 1619456, 'steps': 3162, 'loss/train': 2.6285791397094727} +02/24/2022 05:40:40 - INFO - codeparrot_training - Step 3163: {'lr': 0.0004992761026080013, 'samples': 1619968, 'steps': 3163, 'loss/train': 2.7477588653564453} +02/24/2022 05:40:45 - INFO - codeparrot_training - Step 3164: {'lr': 0.0004992748577957812, 'samples': 1620480, 'steps': 3164, 'loss/train': 3.1901142597198486} +02/24/2022 05:40:49 - INFO - codeparrot_training - Step 3165: {'lr': 0.0004992736119157469, 'samples': 1620992, 'steps': 3165, 'loss/train': 3.7214250564575195} +02/24/2022 05:40:55 - INFO - codeparrot_training - Step 3166: {'lr': 0.0004992723649679035, 'samples': 1621504, 'steps': 3166, 'loss/train': 4.488134860992432} +02/24/2022 05:40:58 - INFO - codeparrot_training - Step 3167: {'lr': 0.0004992711169522565, 'samples': 1622016, 'steps': 3167, 'loss/train': 2.0717387199401855} +02/24/2022 05:41:04 - INFO - codeparrot_training - Step 3168: {'lr': 0.0004992698678688111, 'samples': 1622528, 'steps': 3168, 'loss/train': 3.143423557281494} +02/24/2022 05:41:08 - INFO - codeparrot_training - Step 3169: {'lr': 0.0004992686177175728, 'samples': 1623040, 'steps': 3169, 'loss/train': 7.139700412750244} +02/24/2022 05:41:13 - INFO - codeparrot_training - Step 3170: {'lr': 0.000499267366498547, 'samples': 1623552, 'steps': 3170, 'loss/train': 3.5356225967407227} +02/24/2022 05:41:16 - INFO - codeparrot_training - Step 3171: {'lr': 0.0004992661142117388, 'samples': 1624064, 'steps': 3171, 'loss/train': 2.750394105911255} +02/24/2022 05:41:22 - INFO - codeparrot_training - Step 3172: {'lr': 0.0004992648608571537, 'samples': 1624576, 'steps': 3172, 'loss/train': 1.6689268350601196} +02/24/2022 05:41:25 - INFO - codeparrot_training - Step 3173: {'lr': 0.0004992636064347971, 'samples': 1625088, 'steps': 3173, 'loss/train': 2.4697742462158203} +02/24/2022 05:41:31 - INFO - codeparrot_training - Step 3174: {'lr': 0.0004992623509446746, 'samples': 1625600, 'steps': 3174, 'loss/train': 2.9983716011047363} +02/24/2022 05:41:34 - INFO - codeparrot_training - Step 3175: {'lr': 0.0004992610943867911, 'samples': 1626112, 'steps': 3175, 'loss/train': 2.3993630409240723} +02/24/2022 05:41:40 - INFO - codeparrot_training - Step 3176: {'lr': 0.0004992598367611523, 'samples': 1626624, 'steps': 3176, 'loss/train': 2.1723434925079346} +02/24/2022 05:41:43 - INFO - codeparrot_training - Step 3177: {'lr': 0.0004992585780677634, 'samples': 1627136, 'steps': 3177, 'loss/train': 1.7839462757110596} +02/24/2022 05:41:50 - INFO - codeparrot_training - Step 3178: {'lr': 0.00049925731830663, 'samples': 1627648, 'steps': 3178, 'loss/train': 2.7818543910980225} +02/24/2022 05:41:53 - INFO - codeparrot_training - Step 3179: {'lr': 0.0004992560574777574, 'samples': 1628160, 'steps': 3179, 'loss/train': 3.399399757385254} +02/24/2022 05:41:59 - INFO - codeparrot_training - Step 3180: {'lr': 0.000499254795581151, 'samples': 1628672, 'steps': 3180, 'loss/train': 1.4273549318313599} +02/24/2022 05:42:02 - INFO - codeparrot_training - Step 3181: {'lr': 0.0004992535326168162, 'samples': 1629184, 'steps': 3181, 'loss/train': 2.8285038471221924} +02/24/2022 05:42:08 - INFO - codeparrot_training - Step 3182: {'lr': 0.0004992522685847583, 'samples': 1629696, 'steps': 3182, 'loss/train': 1.829829454421997} +02/24/2022 05:42:11 - INFO - codeparrot_training - Step 3183: {'lr': 0.000499251003484983, 'samples': 1630208, 'steps': 3183, 'loss/train': 3.3949851989746094} +02/24/2022 05:42:17 - INFO - codeparrot_training - Step 3184: {'lr': 0.0004992497373174955, 'samples': 1630720, 'steps': 3184, 'loss/train': 1.1417018175125122} +02/24/2022 05:42:20 - INFO - codeparrot_training - Step 3185: {'lr': 0.0004992484700823012, 'samples': 1631232, 'steps': 3185, 'loss/train': 3.5801939964294434} +02/24/2022 05:42:25 - INFO - codeparrot_training - Step 3186: {'lr': 0.0004992472017794057, 'samples': 1631744, 'steps': 3186, 'loss/train': 2.711984872817993} +02/24/2022 05:42:29 - INFO - codeparrot_training - Step 3187: {'lr': 0.0004992459324088143, 'samples': 1632256, 'steps': 3187, 'loss/train': 2.6519556045532227} +02/24/2022 05:42:35 - INFO - codeparrot_training - Step 3188: {'lr': 0.0004992446619705324, 'samples': 1632768, 'steps': 3188, 'loss/train': 3.6207399368286133} +02/24/2022 05:42:38 - INFO - codeparrot_training - Step 3189: {'lr': 0.0004992433904645654, 'samples': 1633280, 'steps': 3189, 'loss/train': 3.226090431213379} +02/24/2022 05:42:44 - INFO - codeparrot_training - Step 3190: {'lr': 0.0004992421178909191, 'samples': 1633792, 'steps': 3190, 'loss/train': 2.890526294708252} +02/24/2022 05:42:47 - INFO - codeparrot_training - Step 3191: {'lr': 0.0004992408442495986, 'samples': 1634304, 'steps': 3191, 'loss/train': 3.212261199951172} +02/24/2022 05:42:53 - INFO - codeparrot_training - Step 3192: {'lr': 0.0004992395695406095, 'samples': 1634816, 'steps': 3192, 'loss/train': 2.970785617828369} +02/24/2022 05:42:56 - INFO - codeparrot_training - Step 3193: {'lr': 0.0004992382937639572, 'samples': 1635328, 'steps': 3193, 'loss/train': 2.744068145751953} +02/24/2022 05:43:02 - INFO - codeparrot_training - Step 3194: {'lr': 0.0004992370169196472, 'samples': 1635840, 'steps': 3194, 'loss/train': 2.8357691764831543} +02/24/2022 05:43:07 - INFO - codeparrot_training - Step 3195: {'lr': 0.000499235739007685, 'samples': 1636352, 'steps': 3195, 'loss/train': 1.7496614456176758} +02/24/2022 05:43:11 - INFO - codeparrot_training - Step 3196: {'lr': 0.000499234460028076, 'samples': 1636864, 'steps': 3196, 'loss/train': 3.583536148071289} +02/24/2022 05:43:17 - INFO - codeparrot_training - Step 3197: {'lr': 0.0004992331799808258, 'samples': 1637376, 'steps': 3197, 'loss/train': 2.757659435272217} +02/24/2022 05:43:21 - INFO - codeparrot_training - Step 3198: {'lr': 0.0004992318988659396, 'samples': 1637888, 'steps': 3198, 'loss/train': 0.2308320850133896} +02/24/2022 05:43:24 - INFO - codeparrot_training - Step 3199: {'lr': 0.0004992306166834232, 'samples': 1638400, 'steps': 3199, 'loss/train': 3.5532004833221436} +02/24/2022 05:43:30 - INFO - codeparrot_training - Step 3200: {'lr': 0.000499229333433282, 'samples': 1638912, 'steps': 3200, 'loss/train': 3.832852363586426} +02/24/2022 05:43:33 - INFO - codeparrot_training - Step 3201: {'lr': 0.0004992280491155214, 'samples': 1639424, 'steps': 3201, 'loss/train': 3.0533409118652344} +02/24/2022 05:43:39 - INFO - codeparrot_training - Step 3202: {'lr': 0.0004992267637301471, 'samples': 1639936, 'steps': 3202, 'loss/train': 2.398283004760742} +02/24/2022 05:43:42 - INFO - codeparrot_training - Step 3203: {'lr': 0.0004992254772771644, 'samples': 1640448, 'steps': 3203, 'loss/train': 1.2057732343673706} +02/24/2022 05:43:48 - INFO - codeparrot_training - Step 3204: {'lr': 0.0004992241897565789, 'samples': 1640960, 'steps': 3204, 'loss/train': 2.909679889678955} +02/24/2022 05:43:51 - INFO - codeparrot_training - Step 3205: {'lr': 0.0004992229011683961, 'samples': 1641472, 'steps': 3205, 'loss/train': 3.1105668544769287} +02/24/2022 05:43:57 - INFO - codeparrot_training - Step 3206: {'lr': 0.0004992216115126216, 'samples': 1641984, 'steps': 3206, 'loss/train': 2.3970987796783447} +02/24/2022 05:44:00 - INFO - codeparrot_training - Step 3207: {'lr': 0.0004992203207892607, 'samples': 1642496, 'steps': 3207, 'loss/train': 2.6902811527252197} +02/24/2022 05:44:06 - INFO - codeparrot_training - Step 3208: {'lr': 0.0004992190289983192, 'samples': 1643008, 'steps': 3208, 'loss/train': 2.374669075012207} +02/24/2022 05:44:12 - INFO - codeparrot_training - Step 3209: {'lr': 0.0004992177361398026, 'samples': 1643520, 'steps': 3209, 'loss/train': 5.70389986038208} +02/24/2022 05:44:15 - INFO - codeparrot_training - Step 3210: {'lr': 0.0004992164422137162, 'samples': 1644032, 'steps': 3210, 'loss/train': 2.85617733001709} +02/24/2022 05:44:21 - INFO - codeparrot_training - Step 3211: {'lr': 0.0004992151472200657, 'samples': 1644544, 'steps': 3211, 'loss/train': 3.1170172691345215} +02/24/2022 05:44:24 - INFO - codeparrot_training - Step 3212: {'lr': 0.0004992138511588567, 'samples': 1645056, 'steps': 3212, 'loss/train': 3.403440475463867} +02/24/2022 05:44:30 - INFO - codeparrot_training - Step 3213: {'lr': 0.0004992125540300947, 'samples': 1645568, 'steps': 3213, 'loss/train': 2.5705549716949463} +02/24/2022 05:44:33 - INFO - codeparrot_training - Step 3214: {'lr': 0.0004992112558337852, 'samples': 1646080, 'steps': 3214, 'loss/train': 2.6941282749176025} +02/24/2022 05:44:39 - INFO - codeparrot_training - Step 3215: {'lr': 0.0004992099565699339, 'samples': 1646592, 'steps': 3215, 'loss/train': 4.322793960571289} +02/24/2022 05:44:42 - INFO - codeparrot_training - Step 3216: {'lr': 0.0004992086562385462, 'samples': 1647104, 'steps': 3216, 'loss/train': 3.3275146484375} +02/24/2022 05:44:48 - INFO - codeparrot_training - Step 3217: {'lr': 0.0004992073548396277, 'samples': 1647616, 'steps': 3217, 'loss/train': 3.8183724880218506} +02/24/2022 05:44:51 - INFO - codeparrot_training - Step 3218: {'lr': 0.0004992060523731842, 'samples': 1648128, 'steps': 3218, 'loss/train': 1.4780527353286743} +02/24/2022 05:44:56 - INFO - codeparrot_training - Step 3219: {'lr': 0.0004992047488392209, 'samples': 1648640, 'steps': 3219, 'loss/train': 2.7397172451019287} +02/24/2022 05:45:00 - INFO - codeparrot_training - Step 3220: {'lr': 0.0004992034442377437, 'samples': 1649152, 'steps': 3220, 'loss/train': 0.3047305941581726} +02/24/2022 05:45:07 - INFO - codeparrot_training - Step 3221: {'lr': 0.0004992021385687582, 'samples': 1649664, 'steps': 3221, 'loss/train': 3.753160238265991} +02/24/2022 05:45:10 - INFO - codeparrot_training - Step 3222: {'lr': 0.0004992008318322697, 'samples': 1650176, 'steps': 3222, 'loss/train': 5.040472030639648} +02/24/2022 05:45:16 - INFO - codeparrot_training - Step 3223: {'lr': 0.000499199524028284, 'samples': 1650688, 'steps': 3223, 'loss/train': 2.1115314960479736} +02/24/2022 05:45:19 - INFO - codeparrot_training - Step 3224: {'lr': 0.0004991982151568066, 'samples': 1651200, 'steps': 3224, 'loss/train': 2.7115297317504883} +02/24/2022 05:45:25 - INFO - codeparrot_training - Step 3225: {'lr': 0.0004991969052178433, 'samples': 1651712, 'steps': 3225, 'loss/train': 3.3633506298065186} +02/24/2022 05:45:28 - INFO - codeparrot_training - Step 3226: {'lr': 0.0004991955942113995, 'samples': 1652224, 'steps': 3226, 'loss/train': 2.8467767238616943} +02/24/2022 05:45:34 - INFO - codeparrot_training - Step 3227: {'lr': 0.0004991942821374809, 'samples': 1652736, 'steps': 3227, 'loss/train': 2.220717191696167} +02/24/2022 05:45:37 - INFO - codeparrot_training - Step 3228: {'lr': 0.0004991929689960932, 'samples': 1653248, 'steps': 3228, 'loss/train': 3.3960371017456055} +02/24/2022 05:45:43 - INFO - codeparrot_training - Step 3229: {'lr': 0.000499191654787242, 'samples': 1653760, 'steps': 3229, 'loss/train': 2.105057954788208} +02/24/2022 05:45:46 - INFO - codeparrot_training - Step 3230: {'lr': 0.0004991903395109328, 'samples': 1654272, 'steps': 3230, 'loss/train': 3.1729471683502197} +02/24/2022 05:45:53 - INFO - codeparrot_training - Step 3231: {'lr': 0.0004991890231671712, 'samples': 1654784, 'steps': 3231, 'loss/train': 1.2637964487075806} +02/24/2022 05:45:56 - INFO - codeparrot_training - Step 3232: {'lr': 0.0004991877057559631, 'samples': 1655296, 'steps': 3232, 'loss/train': 2.351189136505127} +02/24/2022 05:46:02 - INFO - codeparrot_training - Step 3233: {'lr': 0.0004991863872773139, 'samples': 1655808, 'steps': 3233, 'loss/train': 1.7066203355789185} +02/24/2022 05:46:05 - INFO - codeparrot_training - Step 3234: {'lr': 0.0004991850677312295, 'samples': 1656320, 'steps': 3234, 'loss/train': 1.730821967124939} +02/24/2022 05:46:11 - INFO - codeparrot_training - Step 3235: {'lr': 0.0004991837471177152, 'samples': 1656832, 'steps': 3235, 'loss/train': 3.146332263946533} +02/24/2022 05:46:14 - INFO - codeparrot_training - Step 3236: {'lr': 0.000499182425436777, 'samples': 1657344, 'steps': 3236, 'loss/train': 3.5814104080200195} +02/24/2022 05:46:20 - INFO - codeparrot_training - Step 3237: {'lr': 0.0004991811026884203, 'samples': 1657856, 'steps': 3237, 'loss/train': 2.492323398590088} +02/24/2022 05:46:23 - INFO - codeparrot_training - Step 3238: {'lr': 0.0004991797788726509, 'samples': 1658368, 'steps': 3238, 'loss/train': 2.071268081665039} +02/24/2022 05:46:29 - INFO - codeparrot_training - Step 3239: {'lr': 0.0004991784539894745, 'samples': 1658880, 'steps': 3239, 'loss/train': 2.5763838291168213} +02/24/2022 05:46:32 - INFO - codeparrot_training - Step 3240: {'lr': 0.0004991771280388967, 'samples': 1659392, 'steps': 3240, 'loss/train': 3.325284957885742} +02/24/2022 05:46:39 - INFO - codeparrot_training - Step 3241: {'lr': 0.0004991758010209232, 'samples': 1659904, 'steps': 3241, 'loss/train': 1.9424529075622559} +02/24/2022 05:46:42 - INFO - codeparrot_training - Step 3242: {'lr': 0.0004991744729355598, 'samples': 1660416, 'steps': 3242, 'loss/train': 3.8290340900421143} +02/24/2022 05:46:48 - INFO - codeparrot_training - Step 3243: {'lr': 0.0004991731437828119, 'samples': 1660928, 'steps': 3243, 'loss/train': 2.1688547134399414} +02/24/2022 05:46:51 - INFO - codeparrot_training - Step 3244: {'lr': 0.0004991718135626855, 'samples': 1661440, 'steps': 3244, 'loss/train': 2.0057246685028076} +02/24/2022 05:46:57 - INFO - codeparrot_training - Step 3245: {'lr': 0.0004991704822751861, 'samples': 1661952, 'steps': 3245, 'loss/train': 2.5122742652893066} +02/24/2022 05:47:00 - INFO - codeparrot_training - Step 3246: {'lr': 0.0004991691499203195, 'samples': 1662464, 'steps': 3246, 'loss/train': 3.7530465126037598} +02/24/2022 05:47:06 - INFO - codeparrot_training - Step 3247: {'lr': 0.0004991678164980914, 'samples': 1662976, 'steps': 3247, 'loss/train': 3.4744019508361816} +02/24/2022 05:47:09 - INFO - codeparrot_training - Step 3248: {'lr': 0.0004991664820085074, 'samples': 1663488, 'steps': 3248, 'loss/train': 2.2773642539978027} +02/24/2022 05:47:14 - INFO - codeparrot_training - Step 3249: {'lr': 0.0004991651464515735, 'samples': 1664000, 'steps': 3249, 'loss/train': 3.480146646499634} +02/24/2022 05:47:18 - INFO - codeparrot_training - Step 3250: {'lr': 0.0004991638098272951, 'samples': 1664512, 'steps': 3250, 'loss/train': 2.522777557373047} +02/24/2022 05:47:25 - INFO - codeparrot_training - Step 3251: {'lr': 0.000499162472135678, 'samples': 1665024, 'steps': 3251, 'loss/train': 3.02297043800354} +02/24/2022 05:47:28 - INFO - codeparrot_training - Step 3252: {'lr': 0.0004991611333767281, 'samples': 1665536, 'steps': 3252, 'loss/train': 3.784723997116089} +02/24/2022 05:47:34 - INFO - codeparrot_training - Step 3253: {'lr': 0.000499159793550451, 'samples': 1666048, 'steps': 3253, 'loss/train': 3.7307851314544678} +02/24/2022 05:47:39 - INFO - codeparrot_training - Step 3254: {'lr': 0.0004991584526568524, 'samples': 1666560, 'steps': 3254, 'loss/train': 2.665712833404541} +02/24/2022 05:47:43 - INFO - codeparrot_training - Step 3255: {'lr': 0.0004991571106959383, 'samples': 1667072, 'steps': 3255, 'loss/train': 2.9563913345336914} +02/24/2022 05:47:48 - INFO - codeparrot_training - Step 3256: {'lr': 0.000499155767667714, 'samples': 1667584, 'steps': 3256, 'loss/train': 2.1152777671813965} +02/24/2022 05:47:51 - INFO - codeparrot_training - Step 3257: {'lr': 0.0004991544235721857, 'samples': 1668096, 'steps': 3257, 'loss/train': 3.230518102645874} +02/24/2022 05:47:57 - INFO - codeparrot_training - Step 3258: {'lr': 0.0004991530784093589, 'samples': 1668608, 'steps': 3258, 'loss/train': 2.232226848602295} +02/24/2022 05:48:00 - INFO - codeparrot_training - Step 3259: {'lr': 0.0004991517321792394, 'samples': 1669120, 'steps': 3259, 'loss/train': 3.9473624229431152} +02/24/2022 05:48:06 - INFO - codeparrot_training - Step 3260: {'lr': 0.000499150384881833, 'samples': 1669632, 'steps': 3260, 'loss/train': 2.196275472640991} +02/24/2022 05:48:09 - INFO - codeparrot_training - Step 3261: {'lr': 0.0004991490365171454, 'samples': 1670144, 'steps': 3261, 'loss/train': 2.8099348545074463} +02/24/2022 05:48:16 - INFO - codeparrot_training - Step 3262: {'lr': 0.0004991476870851825, 'samples': 1670656, 'steps': 3262, 'loss/train': 3.0723612308502197} +02/24/2022 05:48:20 - INFO - codeparrot_training - Step 3263: {'lr': 0.0004991463365859501, 'samples': 1671168, 'steps': 3263, 'loss/train': 4.407003402709961} +02/24/2022 05:48:25 - INFO - codeparrot_training - Step 3264: {'lr': 0.0004991449850194538, 'samples': 1671680, 'steps': 3264, 'loss/train': 2.6636056900024414} +02/24/2022 05:48:28 - INFO - codeparrot_training - Step 3265: {'lr': 0.0004991436323856995, 'samples': 1672192, 'steps': 3265, 'loss/train': 2.2249515056610107} +02/24/2022 05:48:34 - INFO - codeparrot_training - Step 3266: {'lr': 0.0004991422786846931, 'samples': 1672704, 'steps': 3266, 'loss/train': 1.6259679794311523} +02/24/2022 05:48:37 - INFO - codeparrot_training - Step 3267: {'lr': 0.0004991409239164401, 'samples': 1673216, 'steps': 3267, 'loss/train': 2.8585119247436523} +02/24/2022 05:48:43 - INFO - codeparrot_training - Step 3268: {'lr': 0.0004991395680809467, 'samples': 1673728, 'steps': 3268, 'loss/train': 3.8965282440185547} +02/24/2022 05:48:46 - INFO - codeparrot_training - Step 3269: {'lr': 0.0004991382111782183, 'samples': 1674240, 'steps': 3269, 'loss/train': 3.693962812423706} +02/24/2022 05:48:52 - INFO - codeparrot_training - Step 3270: {'lr': 0.0004991368532082611, 'samples': 1674752, 'steps': 3270, 'loss/train': 1.0827059745788574} +02/24/2022 05:48:55 - INFO - codeparrot_training - Step 3271: {'lr': 0.0004991354941710806, 'samples': 1675264, 'steps': 3271, 'loss/train': 2.297900438308716} +02/24/2022 05:49:01 - INFO - codeparrot_training - Step 3272: {'lr': 0.0004991341340666828, 'samples': 1675776, 'steps': 3272, 'loss/train': 2.014286756515503} +02/24/2022 05:49:04 - INFO - codeparrot_training - Step 3273: {'lr': 0.0004991327728950736, 'samples': 1676288, 'steps': 3273, 'loss/train': 2.8018417358398438} +02/24/2022 05:49:10 - INFO - codeparrot_training - Step 3274: {'lr': 0.0004991314106562586, 'samples': 1676800, 'steps': 3274, 'loss/train': 2.6202468872070312} +02/24/2022 05:49:13 - INFO - codeparrot_training - Step 3275: {'lr': 0.0004991300473502437, 'samples': 1677312, 'steps': 3275, 'loss/train': 2.7593538761138916} +02/24/2022 05:49:19 - INFO - codeparrot_training - Step 3276: {'lr': 0.0004991286829770348, 'samples': 1677824, 'steps': 3276, 'loss/train': 3.337987184524536} +02/24/2022 05:49:23 - INFO - codeparrot_training - Step 3277: {'lr': 0.0004991273175366378, 'samples': 1678336, 'steps': 3277, 'loss/train': 2.0048723220825195} +02/24/2022 05:49:26 - INFO - codeparrot_training - Step 3278: {'lr': 0.0004991259510290584, 'samples': 1678848, 'steps': 3278, 'loss/train': 4.979170322418213} +02/24/2022 05:49:33 - INFO - codeparrot_training - Step 3279: {'lr': 0.0004991245834543025, 'samples': 1679360, 'steps': 3279, 'loss/train': 2.2831180095672607} +02/24/2022 05:49:36 - INFO - codeparrot_training - Step 3280: {'lr': 0.0004991232148123761, 'samples': 1679872, 'steps': 3280, 'loss/train': 2.6834723949432373} +02/24/2022 05:49:41 - INFO - codeparrot_training - Step 3281: {'lr': 0.0004991218451032849, 'samples': 1680384, 'steps': 3281, 'loss/train': 3.3644988536834717} +02/24/2022 05:49:45 - INFO - codeparrot_training - Step 3282: {'lr': 0.0004991204743270348, 'samples': 1680896, 'steps': 3282, 'loss/train': 3.373000383377075} +02/24/2022 05:49:50 - INFO - codeparrot_training - Step 3283: {'lr': 0.0004991191024836317, 'samples': 1681408, 'steps': 3283, 'loss/train': 3.6448915004730225} +02/24/2022 05:49:56 - INFO - codeparrot_training - Step 3284: {'lr': 0.0004991177295730815, 'samples': 1681920, 'steps': 3284, 'loss/train': 3.4570765495300293} +02/24/2022 05:49:59 - INFO - codeparrot_training - Step 3285: {'lr': 0.0004991163555953901, 'samples': 1682432, 'steps': 3285, 'loss/train': 3.0987887382507324} +02/24/2022 05:50:06 - INFO - codeparrot_training - Step 3286: {'lr': 0.0004991149805505632, 'samples': 1682944, 'steps': 3286, 'loss/train': 3.1108367443084717} +02/24/2022 05:50:10 - INFO - codeparrot_training - Step 3287: {'lr': 0.0004991136044386069, 'samples': 1683456, 'steps': 3287, 'loss/train': 1.6065137386322021} +02/24/2022 05:50:15 - INFO - codeparrot_training - Step 3288: {'lr': 0.0004991122272595271, 'samples': 1683968, 'steps': 3288, 'loss/train': 3.9230844974517822} +02/24/2022 05:50:19 - INFO - codeparrot_training - Step 3289: {'lr': 0.0004991108490133296, 'samples': 1684480, 'steps': 3289, 'loss/train': 3.022031545639038} +02/24/2022 05:50:24 - INFO - codeparrot_training - Step 3290: {'lr': 0.0004991094697000202, 'samples': 1684992, 'steps': 3290, 'loss/train': 2.7204723358154297} +02/24/2022 05:50:28 - INFO - codeparrot_training - Step 3291: {'lr': 0.000499108089319605, 'samples': 1685504, 'steps': 3291, 'loss/train': 1.8913851976394653} +02/24/2022 05:50:33 - INFO - codeparrot_training - Step 3292: {'lr': 0.0004991067078720899, 'samples': 1686016, 'steps': 3292, 'loss/train': 2.1258010864257812} +02/24/2022 05:50:37 - INFO - codeparrot_training - Step 3293: {'lr': 0.0004991053253574807, 'samples': 1686528, 'steps': 3293, 'loss/train': 3.767484188079834} +02/24/2022 05:50:42 - INFO - codeparrot_training - Step 3294: {'lr': 0.0004991039417757833, 'samples': 1687040, 'steps': 3294, 'loss/train': 2.5974926948547363} +02/24/2022 05:50:46 - INFO - codeparrot_training - Step 3295: {'lr': 0.0004991025571270039, 'samples': 1687552, 'steps': 3295, 'loss/train': 3.828691244125366} +02/24/2022 05:50:51 - INFO - codeparrot_training - Step 3296: {'lr': 0.000499101171411148, 'samples': 1688064, 'steps': 3296, 'loss/train': 2.7984731197357178} +02/24/2022 05:50:54 - INFO - codeparrot_training - Step 3297: {'lr': 0.000499099784628222, 'samples': 1688576, 'steps': 3297, 'loss/train': 2.1045572757720947} +02/24/2022 05:51:01 - INFO - codeparrot_training - Step 3298: {'lr': 0.0004990983967782316, 'samples': 1689088, 'steps': 3298, 'loss/train': 0.8454645276069641} +02/24/2022 05:51:04 - INFO - codeparrot_training - Step 3299: {'lr': 0.0004990970078611827, 'samples': 1689600, 'steps': 3299, 'loss/train': 3.2705276012420654} +02/24/2022 05:51:10 - INFO - codeparrot_training - Step 3300: {'lr': 0.0004990956178770814, 'samples': 1690112, 'steps': 3300, 'loss/train': 2.8591206073760986} +02/24/2022 05:51:13 - INFO - codeparrot_training - Step 3301: {'lr': 0.0004990942268259335, 'samples': 1690624, 'steps': 3301, 'loss/train': 4.083927154541016} +02/24/2022 05:51:19 - INFO - codeparrot_training - Step 3302: {'lr': 0.000499092834707745, 'samples': 1691136, 'steps': 3302, 'loss/train': 2.730400323867798} +02/24/2022 05:51:22 - INFO - codeparrot_training - Step 3303: {'lr': 0.000499091441522522, 'samples': 1691648, 'steps': 3303, 'loss/train': 2.869877815246582} +02/24/2022 05:51:28 - INFO - codeparrot_training - Step 3304: {'lr': 0.0004990900472702702, 'samples': 1692160, 'steps': 3304, 'loss/train': 2.0143260955810547} +02/24/2022 05:51:31 - INFO - codeparrot_training - Step 3305: {'lr': 0.0004990886519509959, 'samples': 1692672, 'steps': 3305, 'loss/train': 2.5578551292419434} +02/24/2022 05:51:37 - INFO - codeparrot_training - Step 3306: {'lr': 0.0004990872555647048, 'samples': 1693184, 'steps': 3306, 'loss/train': 2.9826536178588867} +02/24/2022 05:51:40 - INFO - codeparrot_training - Step 3307: {'lr': 0.0004990858581114029, 'samples': 1693696, 'steps': 3307, 'loss/train': 3.3972973823547363} +02/24/2022 05:51:46 - INFO - codeparrot_training - Step 3308: {'lr': 0.0004990844595910965, 'samples': 1694208, 'steps': 3308, 'loss/train': 2.664269208908081} +02/24/2022 05:51:49 - INFO - codeparrot_training - Step 3309: {'lr': 0.0004990830600037912, 'samples': 1694720, 'steps': 3309, 'loss/train': 2.6696882247924805} +02/24/2022 05:51:55 - INFO - codeparrot_training - Step 3310: {'lr': 0.0004990816593494933, 'samples': 1695232, 'steps': 3310, 'loss/train': 2.8404529094696045} +02/24/2022 05:51:58 - INFO - codeparrot_training - Step 3311: {'lr': 0.0004990802576282085, 'samples': 1695744, 'steps': 3311, 'loss/train': 3.3159096240997314} +02/24/2022 05:52:03 - INFO - codeparrot_training - Step 3312: {'lr': 0.0004990788548399431, 'samples': 1696256, 'steps': 3312, 'loss/train': 2.6696512699127197} +02/24/2022 05:52:07 - INFO - codeparrot_training - Step 3313: {'lr': 0.0004990774509847029, 'samples': 1696768, 'steps': 3313, 'loss/train': 3.5161855220794678} +02/24/2022 05:52:14 - INFO - codeparrot_training - Step 3314: {'lr': 0.0004990760460624941, 'samples': 1697280, 'steps': 3314, 'loss/train': 3.0681910514831543} +02/24/2022 05:52:17 - INFO - codeparrot_training - Step 3315: {'lr': 0.0004990746400733225, 'samples': 1697792, 'steps': 3315, 'loss/train': 3.209573268890381} +02/24/2022 05:52:23 - INFO - codeparrot_training - Step 3316: {'lr': 0.0004990732330171943, 'samples': 1698304, 'steps': 3316, 'loss/train': 1.746667742729187} +02/24/2022 05:52:28 - INFO - codeparrot_training - Step 3317: {'lr': 0.0004990718248941154, 'samples': 1698816, 'steps': 3317, 'loss/train': 3.3051421642303467} +02/24/2022 05:52:32 - INFO - codeparrot_training - Step 3318: {'lr': 0.0004990704157040919, 'samples': 1699328, 'steps': 3318, 'loss/train': 3.486098051071167} +02/24/2022 05:52:37 - INFO - codeparrot_training - Step 3319: {'lr': 0.0004990690054471299, 'samples': 1699840, 'steps': 3319, 'loss/train': 2.2478842735290527} +02/24/2022 05:52:41 - INFO - codeparrot_training - Step 3320: {'lr': 0.0004990675941232354, 'samples': 1700352, 'steps': 3320, 'loss/train': 1.7554570436477661} +02/24/2022 05:52:46 - INFO - codeparrot_training - Step 3321: {'lr': 0.0004990661817324142, 'samples': 1700864, 'steps': 3321, 'loss/train': 3.1901450157165527} +02/24/2022 05:52:49 - INFO - codeparrot_training - Step 3322: {'lr': 0.0004990647682746727, 'samples': 1701376, 'steps': 3322, 'loss/train': 3.148627996444702} +02/24/2022 05:52:56 - INFO - codeparrot_training - Step 3323: {'lr': 0.0004990633537500169, 'samples': 1701888, 'steps': 3323, 'loss/train': 4.160163879394531} +02/24/2022 05:52:59 - INFO - codeparrot_training - Step 3324: {'lr': 0.0004990619381584527, 'samples': 1702400, 'steps': 3324, 'loss/train': 2.2061586380004883} +02/24/2022 05:53:05 - INFO - codeparrot_training - Step 3325: {'lr': 0.0004990605214999862, 'samples': 1702912, 'steps': 3325, 'loss/train': 3.1617579460144043} +02/24/2022 05:53:08 - INFO - codeparrot_training - Step 3326: {'lr': 0.0004990591037746236, 'samples': 1703424, 'steps': 3326, 'loss/train': 4.343183994293213} +02/24/2022 05:53:13 - INFO - codeparrot_training - Step 3327: {'lr': 0.0004990576849823708, 'samples': 1703936, 'steps': 3327, 'loss/train': 1.9711787700653076} +02/24/2022 05:53:17 - INFO - codeparrot_training - Step 3328: {'lr': 0.000499056265123234, 'samples': 1704448, 'steps': 3328, 'loss/train': 2.978633403778076} +02/24/2022 05:53:23 - INFO - codeparrot_training - Step 3329: {'lr': 0.0004990548441972193, 'samples': 1704960, 'steps': 3329, 'loss/train': 2.6748435497283936} +02/24/2022 05:53:26 - INFO - codeparrot_training - Step 3330: {'lr': 0.0004990534222043325, 'samples': 1705472, 'steps': 3330, 'loss/train': 0.8951675295829773} +02/24/2022 05:53:31 - INFO - codeparrot_training - Step 3331: {'lr': 0.0004990519991445803, 'samples': 1705984, 'steps': 3331, 'loss/train': 3.2934584617614746} +02/24/2022 05:53:35 - INFO - codeparrot_training - Step 3332: {'lr': 0.0004990505750179682, 'samples': 1706496, 'steps': 3332, 'loss/train': 2.81172776222229} +02/24/2022 05:53:41 - INFO - codeparrot_training - Step 3333: {'lr': 0.0004990491498245024, 'samples': 1707008, 'steps': 3333, 'loss/train': 2.932238817214966} +02/24/2022 05:53:44 - INFO - codeparrot_training - Step 3334: {'lr': 0.0004990477235641893, 'samples': 1707520, 'steps': 3334, 'loss/train': 1.2628066539764404} +02/24/2022 05:53:50 - INFO - codeparrot_training - Step 3335: {'lr': 0.0004990462962370347, 'samples': 1708032, 'steps': 3335, 'loss/train': 3.303638219833374} +02/24/2022 05:53:54 - INFO - codeparrot_training - Step 3336: {'lr': 0.0004990448678430451, 'samples': 1708544, 'steps': 3336, 'loss/train': 2.033548355102539} +02/24/2022 05:53:59 - INFO - codeparrot_training - Step 3337: {'lr': 0.0004990434383822261, 'samples': 1709056, 'steps': 3337, 'loss/train': 3.910740852355957} +02/24/2022 05:54:02 - INFO - codeparrot_training - Step 3338: {'lr': 0.0004990420078545843, 'samples': 1709568, 'steps': 3338, 'loss/train': 0.6613812446594238} +02/24/2022 05:54:08 - INFO - codeparrot_training - Step 3339: {'lr': 0.0004990405762601254, 'samples': 1710080, 'steps': 3339, 'loss/train': 4.209691524505615} +02/24/2022 05:54:11 - INFO - codeparrot_training - Step 3340: {'lr': 0.000499039143598856, 'samples': 1710592, 'steps': 3340, 'loss/train': 4.026211261749268} +02/24/2022 05:54:17 - INFO - codeparrot_training - Step 3341: {'lr': 0.0004990377098707818, 'samples': 1711104, 'steps': 3341, 'loss/train': 3.085852861404419} +02/24/2022 05:54:20 - INFO - codeparrot_training - Step 3342: {'lr': 0.0004990362750759092, 'samples': 1711616, 'steps': 3342, 'loss/train': 2.8016932010650635} +02/24/2022 05:54:26 - INFO - codeparrot_training - Step 3343: {'lr': 0.0004990348392142443, 'samples': 1712128, 'steps': 3343, 'loss/train': 1.691659927368164} +02/24/2022 05:54:29 - INFO - codeparrot_training - Step 3344: {'lr': 0.0004990334022857932, 'samples': 1712640, 'steps': 3344, 'loss/train': 1.7974783182144165} +02/24/2022 05:54:36 - INFO - codeparrot_training - Step 3345: {'lr': 0.0004990319642905619, 'samples': 1713152, 'steps': 3345, 'loss/train': 3.4584097862243652} +02/24/2022 05:54:39 - INFO - codeparrot_training - Step 3346: {'lr': 0.000499030525228557, 'samples': 1713664, 'steps': 3346, 'loss/train': 2.7353296279907227} +02/24/2022 05:54:45 - INFO - codeparrot_training - Step 3347: {'lr': 0.0004990290850997843, 'samples': 1714176, 'steps': 3347, 'loss/train': 2.4467248916625977} +02/24/2022 05:54:48 - INFO - codeparrot_training - Step 3348: {'lr': 0.0004990276439042501, 'samples': 1714688, 'steps': 3348, 'loss/train': 1.4302935600280762} +02/24/2022 05:54:54 - INFO - codeparrot_training - Step 3349: {'lr': 0.0004990262016419606, 'samples': 1715200, 'steps': 3349, 'loss/train': 3.072995901107788} +02/24/2022 05:54:57 - INFO - codeparrot_training - Step 3350: {'lr': 0.0004990247583129218, 'samples': 1715712, 'steps': 3350, 'loss/train': 2.932849884033203} +02/24/2022 05:55:03 - INFO - codeparrot_training - Step 3351: {'lr': 0.00049902331391714, 'samples': 1716224, 'steps': 3351, 'loss/train': 2.1219727993011475} +02/24/2022 05:55:06 - INFO - codeparrot_training - Step 3352: {'lr': 0.0004990218684546216, 'samples': 1716736, 'steps': 3352, 'loss/train': 3.4314424991607666} +02/24/2022 05:55:12 - INFO - codeparrot_training - Step 3353: {'lr': 0.0004990204219253724, 'samples': 1717248, 'steps': 3353, 'loss/train': 3.2598094940185547} +02/24/2022 05:55:15 - INFO - codeparrot_training - Step 3354: {'lr': 0.0004990189743293989, 'samples': 1717760, 'steps': 3354, 'loss/train': 2.1458733081817627} +02/24/2022 05:55:21 - INFO - codeparrot_training - Step 3355: {'lr': 0.0004990175256667071, 'samples': 1718272, 'steps': 3355, 'loss/train': 3.1958394050598145} +02/24/2022 05:55:24 - INFO - codeparrot_training - Step 3356: {'lr': 0.0004990160759373033, 'samples': 1718784, 'steps': 3356, 'loss/train': 2.9634346961975098} +02/24/2022 05:55:30 - INFO - codeparrot_training - Step 3357: {'lr': 0.0004990146251411938, 'samples': 1719296, 'steps': 3357, 'loss/train': 3.195547103881836} +02/24/2022 05:55:33 - INFO - codeparrot_training - Step 3358: {'lr': 0.0004990131732783846, 'samples': 1719808, 'steps': 3358, 'loss/train': 3.348626136779785} +02/24/2022 05:55:40 - INFO - codeparrot_training - Step 3359: {'lr': 0.000499011720348882, 'samples': 1720320, 'steps': 3359, 'loss/train': 2.193225383758545} +02/24/2022 05:55:43 - INFO - codeparrot_training - Step 3360: {'lr': 0.0004990102663526924, 'samples': 1720832, 'steps': 3360, 'loss/train': 0.33492591977119446} +02/24/2022 05:55:49 - INFO - codeparrot_training - Step 3361: {'lr': 0.0004990088112898219, 'samples': 1721344, 'steps': 3361, 'loss/train': 1.6221529245376587} +02/24/2022 05:55:52 - INFO - codeparrot_training - Step 3362: {'lr': 0.0004990073551602766, 'samples': 1721856, 'steps': 3362, 'loss/train': 3.003966808319092} +02/24/2022 05:55:58 - INFO - codeparrot_training - Step 3363: {'lr': 0.000499005897964063, 'samples': 1722368, 'steps': 3363, 'loss/train': 3.1551339626312256} +02/24/2022 05:56:01 - INFO - codeparrot_training - Step 3364: {'lr': 0.0004990044397011871, 'samples': 1722880, 'steps': 3364, 'loss/train': 2.333383321762085} +02/24/2022 05:56:07 - INFO - codeparrot_training - Step 3365: {'lr': 0.0004990029803716552, 'samples': 1723392, 'steps': 3365, 'loss/train': 4.322406768798828} +02/24/2022 05:56:10 - INFO - codeparrot_training - Step 3366: {'lr': 0.0004990015199754736, 'samples': 1723904, 'steps': 3366, 'loss/train': 2.2222132682800293} +02/24/2022 05:56:16 - INFO - codeparrot_training - Step 3367: {'lr': 0.0004990000585126486, 'samples': 1724416, 'steps': 3367, 'loss/train': 2.321769952774048} +02/24/2022 05:56:20 - INFO - codeparrot_training - Step 3368: {'lr': 0.0004989985959831865, 'samples': 1724928, 'steps': 3368, 'loss/train': 3.484276294708252} +02/24/2022 05:56:23 - INFO - codeparrot_training - Step 3369: {'lr': 0.0004989971323870934, 'samples': 1725440, 'steps': 3369, 'loss/train': 2.8329782485961914} +02/24/2022 05:56:30 - INFO - codeparrot_training - Step 3370: {'lr': 0.0004989956677243757, 'samples': 1725952, 'steps': 3370, 'loss/train': 4.990061283111572} +02/24/2022 05:56:36 - INFO - codeparrot_training - Step 3371: {'lr': 0.0004989942019950395, 'samples': 1726464, 'steps': 3371, 'loss/train': 2.5743296146392822} +02/24/2022 05:56:39 - INFO - codeparrot_training - Step 3372: {'lr': 0.0004989927351990912, 'samples': 1726976, 'steps': 3372, 'loss/train': 2.976681709289551} +02/24/2022 05:56:45 - INFO - codeparrot_training - Step 3373: {'lr': 0.0004989912673365373, 'samples': 1727488, 'steps': 3373, 'loss/train': 3.4696974754333496} +02/24/2022 05:56:48 - INFO - codeparrot_training - Step 3374: {'lr': 0.0004989897984073837, 'samples': 1728000, 'steps': 3374, 'loss/train': 3.0720739364624023} +02/24/2022 05:56:54 - INFO - codeparrot_training - Step 3375: {'lr': 0.000498988328411637, 'samples': 1728512, 'steps': 3375, 'loss/train': 2.514559268951416} +02/24/2022 05:56:57 - INFO - codeparrot_training - Step 3376: {'lr': 0.0004989868573493032, 'samples': 1729024, 'steps': 3376, 'loss/train': 1.5759943723678589} +02/24/2022 05:57:02 - INFO - codeparrot_training - Step 3377: {'lr': 0.0004989853852203889, 'samples': 1729536, 'steps': 3377, 'loss/train': 2.459177017211914} +02/24/2022 05:57:06 - INFO - codeparrot_training - Step 3378: {'lr': 0.0004989839120249002, 'samples': 1730048, 'steps': 3378, 'loss/train': 3.1192870140075684} +02/24/2022 05:57:12 - INFO - codeparrot_training - Step 3379: {'lr': 0.0004989824377628435, 'samples': 1730560, 'steps': 3379, 'loss/train': 2.705767869949341} +02/24/2022 05:57:15 - INFO - codeparrot_training - Step 3380: {'lr': 0.0004989809624342251, 'samples': 1731072, 'steps': 3380, 'loss/train': 1.0068100690841675} +02/24/2022 05:57:21 - INFO - codeparrot_training - Step 3381: {'lr': 0.0004989794860390513, 'samples': 1731584, 'steps': 3381, 'loss/train': 4.0767927169799805} +02/24/2022 05:57:25 - INFO - codeparrot_training - Step 3382: {'lr': 0.0004989780085773285, 'samples': 1732096, 'steps': 3382, 'loss/train': 3.0383639335632324} +02/24/2022 05:57:30 - INFO - codeparrot_training - Step 3383: {'lr': 0.0004989765300490628, 'samples': 1732608, 'steps': 3383, 'loss/train': 3.3286867141723633} +02/24/2022 05:57:34 - INFO - codeparrot_training - Step 3384: {'lr': 0.0004989750504542609, 'samples': 1733120, 'steps': 3384, 'loss/train': 1.9907475709915161} +02/24/2022 05:57:39 - INFO - codeparrot_training - Step 3385: {'lr': 0.0004989735697929289, 'samples': 1733632, 'steps': 3385, 'loss/train': 2.196045160293579} +02/24/2022 05:57:43 - INFO - codeparrot_training - Step 3386: {'lr': 0.0004989720880650731, 'samples': 1734144, 'steps': 3386, 'loss/train': 3.305495262145996} +02/24/2022 05:57:48 - INFO - codeparrot_training - Step 3387: {'lr': 0.0004989706052707, 'samples': 1734656, 'steps': 3387, 'loss/train': 2.4244720935821533} +02/24/2022 05:57:52 - INFO - codeparrot_training - Step 3388: {'lr': 0.0004989691214098158, 'samples': 1735168, 'steps': 3388, 'loss/train': 7.428947448730469} +02/24/2022 05:57:57 - INFO - codeparrot_training - Step 3389: {'lr': 0.0004989676364824271, 'samples': 1735680, 'steps': 3389, 'loss/train': 3.755610942840576} +02/24/2022 05:58:01 - INFO - codeparrot_training - Step 3390: {'lr': 0.00049896615048854, 'samples': 1736192, 'steps': 3390, 'loss/train': 2.9101481437683105} +02/24/2022 05:58:06 - INFO - codeparrot_training - Step 3391: {'lr': 0.000498964663428161, 'samples': 1736704, 'steps': 3391, 'loss/train': 1.8431017398834229} +02/24/2022 05:58:10 - INFO - codeparrot_training - Step 3392: {'lr': 0.0004989631753012964, 'samples': 1737216, 'steps': 3392, 'loss/train': 1.9650423526763916} +02/24/2022 05:58:15 - INFO - codeparrot_training - Step 3393: {'lr': 0.0004989616861079527, 'samples': 1737728, 'steps': 3393, 'loss/train': 0.8594864010810852} +02/24/2022 05:58:19 - INFO - codeparrot_training - Step 3394: {'lr': 0.0004989601958481361, 'samples': 1738240, 'steps': 3394, 'loss/train': 2.525815486907959} +02/24/2022 05:58:25 - INFO - codeparrot_training - Step 3395: {'lr': 0.000498958704521853, 'samples': 1738752, 'steps': 3395, 'loss/train': 3.5477874279022217} +02/24/2022 05:58:28 - INFO - codeparrot_training - Step 3396: {'lr': 0.00049895721212911, 'samples': 1739264, 'steps': 3396, 'loss/train': 3.293307065963745} +02/24/2022 05:58:34 - INFO - codeparrot_training - Step 3397: {'lr': 0.0004989557186699133, 'samples': 1739776, 'steps': 3397, 'loss/train': 3.0308268070220947} +02/24/2022 05:58:37 - INFO - codeparrot_training - Step 3398: {'lr': 0.0004989542241442695, 'samples': 1740288, 'steps': 3398, 'loss/train': 3.868443012237549} +02/24/2022 05:58:43 - INFO - codeparrot_training - Step 3399: {'lr': 0.0004989527285521846, 'samples': 1740800, 'steps': 3399, 'loss/train': 1.2805525064468384} +02/24/2022 05:58:46 - INFO - codeparrot_training - Step 3400: {'lr': 0.0004989512318936654, 'samples': 1741312, 'steps': 3400, 'loss/train': 2.518308162689209} +02/24/2022 05:58:52 - INFO - codeparrot_training - Step 3401: {'lr': 0.0004989497341687182, 'samples': 1741824, 'steps': 3401, 'loss/train': 4.038780689239502} +02/24/2022 05:58:55 - INFO - codeparrot_training - Step 3402: {'lr': 0.0004989482353773494, 'samples': 1742336, 'steps': 3402, 'loss/train': 2.5255954265594482} +02/24/2022 05:59:01 - INFO - codeparrot_training - Step 3403: {'lr': 0.0004989467355195653, 'samples': 1742848, 'steps': 3403, 'loss/train': 2.20237398147583} +02/24/2022 05:59:04 - INFO - codeparrot_training - Step 3404: {'lr': 0.0004989452345953725, 'samples': 1743360, 'steps': 3404, 'loss/train': 3.1351206302642822} +02/24/2022 05:59:10 - INFO - codeparrot_training - Step 3405: {'lr': 0.0004989437326047774, 'samples': 1743872, 'steps': 3405, 'loss/train': 3.515432357788086} +02/24/2022 05:59:13 - INFO - codeparrot_training - Step 3406: {'lr': 0.0004989422295477863, 'samples': 1744384, 'steps': 3406, 'loss/train': 2.930018663406372} +02/24/2022 05:59:19 - INFO - codeparrot_training - Step 3407: {'lr': 0.0004989407254244058, 'samples': 1744896, 'steps': 3407, 'loss/train': 2.9464001655578613} +02/24/2022 05:59:23 - INFO - codeparrot_training - Step 3408: {'lr': 0.0004989392202346424, 'samples': 1745408, 'steps': 3408, 'loss/train': 4.28965425491333} +02/24/2022 05:59:29 - INFO - codeparrot_training - Step 3409: {'lr': 0.0004989377139785022, 'samples': 1745920, 'steps': 3409, 'loss/train': 2.82633376121521} +02/24/2022 05:59:32 - INFO - codeparrot_training - Step 3410: {'lr': 0.000498936206655992, 'samples': 1746432, 'steps': 3410, 'loss/train': 3.0110108852386475} +02/24/2022 05:59:38 - INFO - codeparrot_training - Step 3411: {'lr': 0.0004989346982671181, 'samples': 1746944, 'steps': 3411, 'loss/train': 2.8296079635620117} +02/24/2022 05:59:41 - INFO - codeparrot_training - Step 3412: {'lr': 0.0004989331888118869, 'samples': 1747456, 'steps': 3412, 'loss/train': 2.685868740081787} +02/24/2022 05:59:47 - INFO - codeparrot_training - Step 3413: {'lr': 0.0004989316782903052, 'samples': 1747968, 'steps': 3413, 'loss/train': 2.2202298641204834} +02/24/2022 05:59:50 - INFO - codeparrot_training - Step 3414: {'lr': 0.0004989301667023791, 'samples': 1748480, 'steps': 3414, 'loss/train': 3.193492889404297} +02/24/2022 05:59:56 - INFO - codeparrot_training - Step 3415: {'lr': 0.0004989286540481152, 'samples': 1748992, 'steps': 3415, 'loss/train': 2.7336885929107666} +02/24/2022 06:00:00 - INFO - codeparrot_training - Step 3416: {'lr': 0.00049892714032752, 'samples': 1749504, 'steps': 3416, 'loss/train': 1.3939684629440308} +02/24/2022 06:00:05 - INFO - codeparrot_training - Step 3417: {'lr': 0.0004989256255406001, 'samples': 1750016, 'steps': 3417, 'loss/train': 2.98162579536438} +02/24/2022 06:00:09 - INFO - codeparrot_training - Step 3418: {'lr': 0.0004989241096873617, 'samples': 1750528, 'steps': 3418, 'loss/train': 2.2810022830963135} +02/24/2022 06:00:14 - INFO - codeparrot_training - Step 3419: {'lr': 0.0004989225927678115, 'samples': 1751040, 'steps': 3419, 'loss/train': 2.1340138912200928} +02/24/2022 06:00:18 - INFO - codeparrot_training - Step 3420: {'lr': 0.000498921074781956, 'samples': 1751552, 'steps': 3420, 'loss/train': 2.4442453384399414} +02/24/2022 06:00:23 - INFO - codeparrot_training - Step 3421: {'lr': 0.0004989195557298016, 'samples': 1752064, 'steps': 3421, 'loss/train': 1.3915042877197266} +02/24/2022 06:00:27 - INFO - codeparrot_training - Step 3422: {'lr': 0.0004989180356113549, 'samples': 1752576, 'steps': 3422, 'loss/train': 3.096195697784424} +02/24/2022 06:00:33 - INFO - codeparrot_training - Step 3423: {'lr': 0.0004989165144266224, 'samples': 1753088, 'steps': 3423, 'loss/train': 3.191877841949463} +02/24/2022 06:00:36 - INFO - codeparrot_training - Step 3424: {'lr': 0.0004989149921756105, 'samples': 1753600, 'steps': 3424, 'loss/train': 3.200476884841919} +02/24/2022 06:00:42 - INFO - codeparrot_training - Step 3425: {'lr': 0.0004989134688583259, 'samples': 1754112, 'steps': 3425, 'loss/train': 3.352710485458374} +02/24/2022 06:00:46 - INFO - codeparrot_training - Step 3426: {'lr': 0.000498911944474775, 'samples': 1754624, 'steps': 3426, 'loss/train': 3.7004177570343018} +02/24/2022 06:00:52 - INFO - codeparrot_training - Step 3427: {'lr': 0.0004989104190249643, 'samples': 1755136, 'steps': 3427, 'loss/train': 0.3849867284297943} +02/24/2022 06:00:55 - INFO - codeparrot_training - Step 3428: {'lr': 0.0004989088925089005, 'samples': 1755648, 'steps': 3428, 'loss/train': 2.8095381259918213} +02/24/2022 06:01:00 - INFO - codeparrot_training - Step 3429: {'lr': 0.00049890736492659, 'samples': 1756160, 'steps': 3429, 'loss/train': 3.5617098808288574} +02/24/2022 06:01:04 - INFO - codeparrot_training - Step 3430: {'lr': 0.0004989058362780394, 'samples': 1756672, 'steps': 3430, 'loss/train': 3.3939056396484375} +02/24/2022 06:01:09 - INFO - codeparrot_training - Step 3431: {'lr': 0.0004989043065632552, 'samples': 1757184, 'steps': 3431, 'loss/train': 3.7800133228302} +02/24/2022 06:01:13 - INFO - codeparrot_training - Step 3432: {'lr': 0.0004989027757822441, 'samples': 1757696, 'steps': 3432, 'loss/train': 1.511250376701355} +02/24/2022 06:01:18 - INFO - codeparrot_training - Step 3433: {'lr': 0.0004989012439350124, 'samples': 1758208, 'steps': 3433, 'loss/train': 0.15233869850635529} +02/24/2022 06:01:22 - INFO - codeparrot_training - Step 3434: {'lr': 0.0004988997110215668, 'samples': 1758720, 'steps': 3434, 'loss/train': 3.2571072578430176} +02/24/2022 06:01:27 - INFO - codeparrot_training - Step 3435: {'lr': 0.0004988981770419141, 'samples': 1759232, 'steps': 3435, 'loss/train': 3.0362415313720703} +02/24/2022 06:01:31 - INFO - codeparrot_training - Step 3436: {'lr': 0.0004988966419960605, 'samples': 1759744, 'steps': 3436, 'loss/train': 2.339164972305298} +02/24/2022 06:01:36 - INFO - codeparrot_training - Step 3437: {'lr': 0.0004988951058840127, 'samples': 1760256, 'steps': 3437, 'loss/train': 2.506068706512451} +02/24/2022 06:01:40 - INFO - codeparrot_training - Step 3438: {'lr': 0.0004988935687057773, 'samples': 1760768, 'steps': 3438, 'loss/train': 3.698857307434082} +02/24/2022 06:01:45 - INFO - codeparrot_training - Step 3439: {'lr': 0.0004988920304613609, 'samples': 1761280, 'steps': 3439, 'loss/train': 3.0238447189331055} +02/24/2022 06:01:49 - INFO - codeparrot_training - Step 3440: {'lr': 0.00049889049115077, 'samples': 1761792, 'steps': 3440, 'loss/train': 1.8441886901855469} +02/24/2022 06:01:55 - INFO - codeparrot_training - Step 3441: {'lr': 0.0004988889507740113, 'samples': 1762304, 'steps': 3441, 'loss/train': 1.3593950271606445} +02/24/2022 06:01:59 - INFO - codeparrot_training - Step 3442: {'lr': 0.0004988874093310914, 'samples': 1762816, 'steps': 3442, 'loss/train': 3.2507877349853516} +02/24/2022 06:02:04 - INFO - codeparrot_training - Step 3443: {'lr': 0.000498885866822017, 'samples': 1763328, 'steps': 3443, 'loss/train': 2.106030225753784} +02/24/2022 06:02:08 - INFO - codeparrot_training - Step 3444: {'lr': 0.0004988843232467944, 'samples': 1763840, 'steps': 3444, 'loss/train': 2.3151698112487793} +02/24/2022 06:02:13 - INFO - codeparrot_training - Step 3445: {'lr': 0.0004988827786054304, 'samples': 1764352, 'steps': 3445, 'loss/train': 2.7319116592407227} +02/24/2022 06:02:16 - INFO - codeparrot_training - Step 3446: {'lr': 0.0004988812328979317, 'samples': 1764864, 'steps': 3446, 'loss/train': 3.381547451019287} +02/24/2022 06:02:22 - INFO - codeparrot_training - Step 3447: {'lr': 0.0004988796861243046, 'samples': 1765376, 'steps': 3447, 'loss/train': 2.0480639934539795} +02/24/2022 06:02:26 - INFO - codeparrot_training - Step 3448: {'lr': 0.0004988781382845562, 'samples': 1765888, 'steps': 3448, 'loss/train': 2.7525269985198975} +02/24/2022 06:02:31 - INFO - codeparrot_training - Step 3449: {'lr': 0.0004988765893786929, 'samples': 1766400, 'steps': 3449, 'loss/train': 3.872993230819702} +02/24/2022 06:02:35 - INFO - codeparrot_training - Step 3450: {'lr': 0.0004988750394067211, 'samples': 1766912, 'steps': 3450, 'loss/train': 2.2978477478027344} +02/24/2022 06:02:41 - INFO - codeparrot_training - Step 3451: {'lr': 0.0004988734883686479, 'samples': 1767424, 'steps': 3451, 'loss/train': 2.334455728530884} +02/24/2022 06:02:44 - INFO - codeparrot_training - Step 3452: {'lr': 0.0004988719362644795, 'samples': 1767936, 'steps': 3452, 'loss/train': 2.5293805599212646} +02/24/2022 06:02:50 - INFO - codeparrot_training - Step 3453: {'lr': 0.0004988703830942228, 'samples': 1768448, 'steps': 3453, 'loss/train': 2.462118625640869} +02/24/2022 06:02:53 - INFO - codeparrot_training - Step 3454: {'lr': 0.0004988688288578845, 'samples': 1768960, 'steps': 3454, 'loss/train': 2.1267988681793213} +02/24/2022 06:02:59 - INFO - codeparrot_training - Step 3455: {'lr': 0.0004988672735554711, 'samples': 1769472, 'steps': 3455, 'loss/train': 1.8279513120651245} +02/24/2022 06:03:02 - INFO - codeparrot_training - Step 3456: {'lr': 0.0004988657171869893, 'samples': 1769984, 'steps': 3456, 'loss/train': 1.1528892517089844} +02/24/2022 06:03:08 - INFO - codeparrot_training - Step 3457: {'lr': 0.0004988641597524458, 'samples': 1770496, 'steps': 3457, 'loss/train': 2.45343279838562} +02/24/2022 06:03:11 - INFO - codeparrot_training - Step 3458: {'lr': 0.0004988626012518473, 'samples': 1771008, 'steps': 3458, 'loss/train': 2.931540012359619} +02/24/2022 06:03:17 - INFO - codeparrot_training - Step 3459: {'lr': 0.0004988610416852004, 'samples': 1771520, 'steps': 3459, 'loss/train': 2.7208609580993652} +02/24/2022 06:03:20 - INFO - codeparrot_training - Step 3460: {'lr': 0.0004988594810525118, 'samples': 1772032, 'steps': 3460, 'loss/train': 3.9978678226470947} +02/24/2022 06:03:26 - INFO - codeparrot_training - Step 3461: {'lr': 0.0004988579193537883, 'samples': 1772544, 'steps': 3461, 'loss/train': 3.373157262802124} +02/24/2022 06:03:29 - INFO - codeparrot_training - Step 3462: {'lr': 0.0004988563565890364, 'samples': 1773056, 'steps': 3462, 'loss/train': 3.328639507293701} +02/24/2022 06:03:35 - INFO - codeparrot_training - Step 3463: {'lr': 0.000498854792758263, 'samples': 1773568, 'steps': 3463, 'loss/train': 3.3730952739715576} +02/24/2022 06:03:39 - INFO - codeparrot_training - Step 3464: {'lr': 0.0004988532278614745, 'samples': 1774080, 'steps': 3464, 'loss/train': 2.235300302505493} +02/24/2022 06:03:44 - INFO - codeparrot_training - Step 3465: {'lr': 0.0004988516618986779, 'samples': 1774592, 'steps': 3465, 'loss/train': 2.551022529602051} +02/24/2022 06:03:48 - INFO - codeparrot_training - Step 3466: {'lr': 0.0004988500948698799, 'samples': 1775104, 'steps': 3466, 'loss/train': 2.154386043548584} +02/24/2022 06:03:53 - INFO - codeparrot_training - Step 3467: {'lr': 0.000498848526775087, 'samples': 1775616, 'steps': 3467, 'loss/train': 2.2679810523986816} +02/24/2022 06:03:57 - INFO - codeparrot_training - Step 3468: {'lr': 0.0004988469576143059, 'samples': 1776128, 'steps': 3468, 'loss/train': 3.1192033290863037} +02/24/2022 06:04:02 - INFO - codeparrot_training - Step 3469: {'lr': 0.0004988453873875437, 'samples': 1776640, 'steps': 3469, 'loss/train': 1.99477219581604} +02/24/2022 06:04:06 - INFO - codeparrot_training - Step 3470: {'lr': 0.0004988438160948068, 'samples': 1777152, 'steps': 3470, 'loss/train': 3.6434619426727295} +02/24/2022 06:04:11 - INFO - codeparrot_training - Step 3471: {'lr': 0.000498842243736102, 'samples': 1777664, 'steps': 3471, 'loss/train': 2.7385129928588867} +02/24/2022 06:04:15 - INFO - codeparrot_training - Step 3472: {'lr': 0.000498840670311436, 'samples': 1778176, 'steps': 3472, 'loss/train': 2.3991098403930664} +02/24/2022 06:04:20 - INFO - codeparrot_training - Step 3473: {'lr': 0.0004988390958208156, 'samples': 1778688, 'steps': 3473, 'loss/train': 2.373166084289551} +02/24/2022 06:04:26 - INFO - codeparrot_training - Step 3474: {'lr': 0.0004988375202642475, 'samples': 1779200, 'steps': 3474, 'loss/train': 2.6730761528015137} +02/24/2022 06:04:29 - INFO - codeparrot_training - Step 3475: {'lr': 0.0004988359436417385, 'samples': 1779712, 'steps': 3475, 'loss/train': 2.573969602584839} +02/24/2022 06:04:36 - INFO - codeparrot_training - Step 3476: {'lr': 0.0004988343659532954, 'samples': 1780224, 'steps': 3476, 'loss/train': 2.7785582542419434} +02/24/2022 06:04:39 - INFO - codeparrot_training - Step 3477: {'lr': 0.0004988327871989249, 'samples': 1780736, 'steps': 3477, 'loss/train': 2.594221830368042} +02/24/2022 06:04:45 - INFO - codeparrot_training - Step 3478: {'lr': 0.0004988312073786336, 'samples': 1781248, 'steps': 3478, 'loss/train': 3.8832502365112305} +02/24/2022 06:04:48 - INFO - codeparrot_training - Step 3479: {'lr': 0.0004988296264924286, 'samples': 1781760, 'steps': 3479, 'loss/train': 3.013744592666626} +02/24/2022 06:04:54 - INFO - codeparrot_training - Step 3480: {'lr': 0.0004988280445403164, 'samples': 1782272, 'steps': 3480, 'loss/train': 2.1859991550445557} +02/24/2022 06:04:58 - INFO - codeparrot_training - Step 3481: {'lr': 0.0004988264615223038, 'samples': 1782784, 'steps': 3481, 'loss/train': 2.1419198513031006} +02/24/2022 06:05:01 - INFO - codeparrot_training - Step 3482: {'lr': 0.0004988248774383978, 'samples': 1783296, 'steps': 3482, 'loss/train': 0.30636340379714966} +02/24/2022 06:05:07 - INFO - codeparrot_training - Step 3483: {'lr': 0.0004988232922886049, 'samples': 1783808, 'steps': 3483, 'loss/train': 2.5050477981567383} +02/24/2022 06:05:10 - INFO - codeparrot_training - Step 3484: {'lr': 0.0004988217060729321, 'samples': 1784320, 'steps': 3484, 'loss/train': 2.3946900367736816} +02/24/2022 06:05:16 - INFO - codeparrot_training - Step 3485: {'lr': 0.0004988201187913861, 'samples': 1784832, 'steps': 3485, 'loss/train': 1.218027949333191} +02/24/2022 06:05:19 - INFO - codeparrot_training - Step 3486: {'lr': 0.0004988185304439737, 'samples': 1785344, 'steps': 3486, 'loss/train': 2.8386361598968506} +02/24/2022 06:05:25 - INFO - codeparrot_training - Step 3487: {'lr': 0.0004988169410307018, 'samples': 1785856, 'steps': 3487, 'loss/train': 3.3929803371429443} +02/24/2022 06:05:28 - INFO - codeparrot_training - Step 3488: {'lr': 0.0004988153505515771, 'samples': 1786368, 'steps': 3488, 'loss/train': 2.450044631958008} +02/24/2022 06:05:35 - INFO - codeparrot_training - Step 3489: {'lr': 0.0004988137590066064, 'samples': 1786880, 'steps': 3489, 'loss/train': 2.801856756210327} +02/24/2022 06:05:40 - INFO - codeparrot_training - Step 3490: {'lr': 0.0004988121663957966, 'samples': 1787392, 'steps': 3490, 'loss/train': 2.518889904022217} +02/24/2022 06:05:44 - INFO - codeparrot_training - Step 3491: {'lr': 0.0004988105727191546, 'samples': 1787904, 'steps': 3491, 'loss/train': 1.3204587697982788} +02/24/2022 06:05:49 - INFO - codeparrot_training - Step 3492: {'lr': 0.0004988089779766869, 'samples': 1788416, 'steps': 3492, 'loss/train': 2.1671030521392822} +02/24/2022 06:05:53 - INFO - codeparrot_training - Step 3493: {'lr': 0.0004988073821684006, 'samples': 1788928, 'steps': 3493, 'loss/train': 3.2876837253570557} +02/24/2022 06:05:58 - INFO - codeparrot_training - Step 3494: {'lr': 0.0004988057852943025, 'samples': 1789440, 'steps': 3494, 'loss/train': 2.6805965900421143} +02/24/2022 06:06:02 - INFO - codeparrot_training - Step 3495: {'lr': 0.0004988041873543995, 'samples': 1789952, 'steps': 3495, 'loss/train': 2.0916295051574707} +02/24/2022 06:06:07 - INFO - codeparrot_training - Step 3496: {'lr': 0.0004988025883486983, 'samples': 1790464, 'steps': 3496, 'loss/train': 2.741621494293213} +02/24/2022 06:06:11 - INFO - codeparrot_training - Step 3497: {'lr': 0.0004988009882772058, 'samples': 1790976, 'steps': 3497, 'loss/train': 3.0412755012512207} +02/24/2022 06:06:18 - INFO - codeparrot_training - Step 3498: {'lr': 0.0004987993871399289, 'samples': 1791488, 'steps': 3498, 'loss/train': 4.121082305908203} +02/24/2022 06:06:21 - INFO - codeparrot_training - Step 3499: {'lr': 0.0004987977849368744, 'samples': 1792000, 'steps': 3499, 'loss/train': 3.4514198303222656} +02/24/2022 06:06:27 - INFO - codeparrot_training - Step 3500: {'lr': 0.0004987961816680492, 'samples': 1792512, 'steps': 3500, 'loss/train': 3.965061664581299} +02/24/2022 06:06:30 - INFO - codeparrot_training - Step 3501: {'lr': 0.0004987945773334602, 'samples': 1793024, 'steps': 3501, 'loss/train': 2.789762496948242} +02/24/2022 06:06:36 - INFO - codeparrot_training - Step 3502: {'lr': 0.0004987929719331142, 'samples': 1793536, 'steps': 3502, 'loss/train': 2.0071256160736084} +02/24/2022 06:06:39 - INFO - codeparrot_training - Step 3503: {'lr': 0.0004987913654670181, 'samples': 1794048, 'steps': 3503, 'loss/train': 3.3820815086364746} +02/24/2022 06:06:44 - INFO - codeparrot_training - Step 3504: {'lr': 0.0004987897579351787, 'samples': 1794560, 'steps': 3504, 'loss/train': 2.395979881286621} +02/24/2022 06:06:48 - INFO - codeparrot_training - Step 3505: {'lr': 0.0004987881493376032, 'samples': 1795072, 'steps': 3505, 'loss/train': 3.3991336822509766} +02/24/2022 06:06:54 - INFO - codeparrot_training - Step 3506: {'lr': 0.0004987865396742981, 'samples': 1795584, 'steps': 3506, 'loss/train': 3.1336047649383545} +02/24/2022 06:06:57 - INFO - codeparrot_training - Step 3507: {'lr': 0.0004987849289452705, 'samples': 1796096, 'steps': 3507, 'loss/train': 0.3818409740924835} +02/24/2022 06:07:04 - INFO - codeparrot_training - Step 3508: {'lr': 0.0004987833171505272, 'samples': 1796608, 'steps': 3508, 'loss/train': 3.589928388595581} +02/24/2022 06:07:07 - INFO - codeparrot_training - Step 3509: {'lr': 0.0004987817042900753, 'samples': 1797120, 'steps': 3509, 'loss/train': 2.944444417953491} +02/24/2022 06:07:13 - INFO - codeparrot_training - Step 3510: {'lr': 0.0004987800903639216, 'samples': 1797632, 'steps': 3510, 'loss/train': 3.009467124938965} +02/24/2022 06:07:16 - INFO - codeparrot_training - Step 3511: {'lr': 0.0004987784753720728, 'samples': 1798144, 'steps': 3511, 'loss/train': 1.7856380939483643} +02/24/2022 06:07:22 - INFO - codeparrot_training - Step 3512: {'lr': 0.0004987768593145362, 'samples': 1798656, 'steps': 3512, 'loss/train': 0.23413746058940887} +02/24/2022 06:07:25 - INFO - codeparrot_training - Step 3513: {'lr': 0.0004987752421913185, 'samples': 1799168, 'steps': 3513, 'loss/train': 2.859423875808716} +02/24/2022 06:07:31 - INFO - codeparrot_training - Step 3514: {'lr': 0.0004987736240024264, 'samples': 1799680, 'steps': 3514, 'loss/train': 2.9557743072509766} +02/24/2022 06:07:34 - INFO - codeparrot_training - Step 3515: {'lr': 0.0004987720047478673, 'samples': 1800192, 'steps': 3515, 'loss/train': 2.4329938888549805} +02/24/2022 06:07:40 - INFO - codeparrot_training - Step 3516: {'lr': 0.000498770384427648, 'samples': 1800704, 'steps': 3516, 'loss/train': 2.0503909587860107} +02/24/2022 06:07:43 - INFO - codeparrot_training - Step 3517: {'lr': 0.0004987687630417753, 'samples': 1801216, 'steps': 3517, 'loss/train': 3.767716646194458} +02/24/2022 06:07:49 - INFO - codeparrot_training - Step 3518: {'lr': 0.0004987671405902562, 'samples': 1801728, 'steps': 3518, 'loss/train': 4.070925712585449} +02/24/2022 06:07:52 - INFO - codeparrot_training - Step 3519: {'lr': 0.0004987655170730976, 'samples': 1802240, 'steps': 3519, 'loss/train': 2.934185266494751} +02/24/2022 06:07:58 - INFO - codeparrot_training - Step 3520: {'lr': 0.0004987638924903066, 'samples': 1802752, 'steps': 3520, 'loss/train': 2.184549331665039} +02/24/2022 06:08:01 - INFO - codeparrot_training - Step 3521: {'lr': 0.00049876226684189, 'samples': 1803264, 'steps': 3521, 'loss/train': 2.0489423274993896} +02/24/2022 06:08:07 - INFO - codeparrot_training - Step 3522: {'lr': 0.0004987606401278549, 'samples': 1803776, 'steps': 3522, 'loss/train': 3.404755115509033} +02/24/2022 06:08:10 - INFO - codeparrot_training - Step 3523: {'lr': 0.0004987590123482082, 'samples': 1804288, 'steps': 3523, 'loss/train': 3.2636003494262695} +02/24/2022 06:08:16 - INFO - codeparrot_training - Step 3524: {'lr': 0.0004987573835029569, 'samples': 1804800, 'steps': 3524, 'loss/train': 2.906745433807373} +02/24/2022 06:08:20 - INFO - codeparrot_training - Step 3525: {'lr': 0.0004987557535921079, 'samples': 1805312, 'steps': 3525, 'loss/train': 3.216729164123535} +02/24/2022 06:08:25 - INFO - codeparrot_training - Step 3526: {'lr': 0.0004987541226156683, 'samples': 1805824, 'steps': 3526, 'loss/train': 3.9713149070739746} +02/24/2022 06:08:29 - INFO - codeparrot_training - Step 3527: {'lr': 0.0004987524905736451, 'samples': 1806336, 'steps': 3527, 'loss/train': 2.592583656311035} +02/24/2022 06:08:34 - INFO - codeparrot_training - Step 3528: {'lr': 0.000498750857466045, 'samples': 1806848, 'steps': 3528, 'loss/train': 2.5117557048797607} +02/24/2022 06:08:38 - INFO - codeparrot_training - Step 3529: {'lr': 0.0004987492232928753, 'samples': 1807360, 'steps': 3529, 'loss/train': 3.4634780883789062} +02/24/2022 06:08:43 - INFO - codeparrot_training - Step 3530: {'lr': 0.000498747588054143, 'samples': 1807872, 'steps': 3530, 'loss/train': 2.570119857788086} +02/24/2022 06:08:47 - INFO - codeparrot_training - Step 3531: {'lr': 0.0004987459517498549, 'samples': 1808384, 'steps': 3531, 'loss/train': 2.5747294425964355} +02/24/2022 06:08:52 - INFO - codeparrot_training - Step 3532: {'lr': 0.0004987443143800182, 'samples': 1808896, 'steps': 3532, 'loss/train': 2.667483329772949} +02/24/2022 06:08:56 - INFO - codeparrot_training - Step 3533: {'lr': 0.0004987426759446398, 'samples': 1809408, 'steps': 3533, 'loss/train': 1.96893310546875} +02/24/2022 06:09:02 - INFO - codeparrot_training - Step 3534: {'lr': 0.0004987410364437269, 'samples': 1809920, 'steps': 3534, 'loss/train': 3.6135401725769043} +02/24/2022 06:09:05 - INFO - codeparrot_training - Step 3535: {'lr': 0.0004987393958772862, 'samples': 1810432, 'steps': 3535, 'loss/train': 1.8765099048614502} +02/24/2022 06:09:11 - INFO - codeparrot_training - Step 3536: {'lr': 0.0004987377542453251, 'samples': 1810944, 'steps': 3536, 'loss/train': 2.8146049976348877} +02/24/2022 06:09:14 - INFO - codeparrot_training - Step 3537: {'lr': 0.0004987361115478502, 'samples': 1811456, 'steps': 3537, 'loss/train': 2.16068959236145} +02/24/2022 06:09:20 - INFO - codeparrot_training - Step 3538: {'lr': 0.000498734467784869, 'samples': 1811968, 'steps': 3538, 'loss/train': 2.9196808338165283} +02/24/2022 06:09:23 - INFO - codeparrot_training - Step 3539: {'lr': 0.0004987328229563883, 'samples': 1812480, 'steps': 3539, 'loss/train': 4.4412150382995605} +02/24/2022 06:09:29 - INFO - codeparrot_training - Step 3540: {'lr': 0.0004987311770624151, 'samples': 1812992, 'steps': 3540, 'loss/train': 2.660609483718872} +02/24/2022 06:09:33 - INFO - codeparrot_training - Step 3541: {'lr': 0.0004987295301029565, 'samples': 1813504, 'steps': 3541, 'loss/train': 4.113503456115723} +02/24/2022 06:09:38 - INFO - codeparrot_training - Step 3542: {'lr': 0.0004987278820780196, 'samples': 1814016, 'steps': 3542, 'loss/train': 2.2509279251098633} +02/24/2022 06:09:42 - INFO - codeparrot_training - Step 3543: {'lr': 0.0004987262329876114, 'samples': 1814528, 'steps': 3543, 'loss/train': 4.698426246643066} +02/24/2022 06:09:48 - INFO - codeparrot_training - Step 3544: {'lr': 0.000498724582831739, 'samples': 1815040, 'steps': 3544, 'loss/train': 2.624014139175415} +02/24/2022 06:09:51 - INFO - codeparrot_training - Step 3545: {'lr': 0.0004987229316104095, 'samples': 1815552, 'steps': 3545, 'loss/train': 2.6331729888916016} +02/24/2022 06:09:57 - INFO - codeparrot_training - Step 3546: {'lr': 0.00049872127932363, 'samples': 1816064, 'steps': 3546, 'loss/train': 2.3879826068878174} +02/24/2022 06:10:00 - INFO - codeparrot_training - Step 3547: {'lr': 0.0004987196259714074, 'samples': 1816576, 'steps': 3547, 'loss/train': 2.751999855041504} +02/24/2022 06:10:06 - INFO - codeparrot_training - Step 3548: {'lr': 0.000498717971553749, 'samples': 1817088, 'steps': 3548, 'loss/train': 2.8464198112487793} +02/24/2022 06:10:09 - INFO - codeparrot_training - Step 3549: {'lr': 0.0004987163160706617, 'samples': 1817600, 'steps': 3549, 'loss/train': 1.9754754304885864} +02/24/2022 06:10:15 - INFO - codeparrot_training - Step 3550: {'lr': 0.0004987146595221527, 'samples': 1818112, 'steps': 3550, 'loss/train': 1.407125473022461} +02/24/2022 06:10:18 - INFO - codeparrot_training - Step 3551: {'lr': 0.0004987130019082291, 'samples': 1818624, 'steps': 3551, 'loss/train': 2.565162420272827} +02/24/2022 06:10:24 - INFO - codeparrot_training - Step 3552: {'lr': 0.000498711343228898, 'samples': 1819136, 'steps': 3552, 'loss/train': 3.4164676666259766} +02/24/2022 06:10:27 - INFO - codeparrot_training - Step 3553: {'lr': 0.0004987096834841665, 'samples': 1819648, 'steps': 3553, 'loss/train': 3.1859078407287598} +02/24/2022 06:10:34 - INFO - codeparrot_training - Step 3554: {'lr': 0.0004987080226740416, 'samples': 1820160, 'steps': 3554, 'loss/train': 2.8668935298919678} +02/24/2022 06:10:37 - INFO - codeparrot_training - Step 3555: {'lr': 0.0004987063607985305, 'samples': 1820672, 'steps': 3555, 'loss/train': 2.7604565620422363} +02/24/2022 06:10:43 - INFO - codeparrot_training - Step 3556: {'lr': 0.0004987046978576404, 'samples': 1821184, 'steps': 3556, 'loss/train': 2.679964303970337} +02/24/2022 06:10:46 - INFO - codeparrot_training - Step 3557: {'lr': 0.0004987030338513783, 'samples': 1821696, 'steps': 3557, 'loss/train': 2.652012348175049} +02/24/2022 06:10:52 - INFO - codeparrot_training - Step 3558: {'lr': 0.0004987013687797514, 'samples': 1822208, 'steps': 3558, 'loss/train': 3.0175700187683105} +02/24/2022 06:10:55 - INFO - codeparrot_training - Step 3559: {'lr': 0.0004986997026427668, 'samples': 1822720, 'steps': 3559, 'loss/train': 3.9427781105041504} +02/24/2022 06:11:01 - INFO - codeparrot_training - Step 3560: {'lr': 0.0004986980354404316, 'samples': 1823232, 'steps': 3560, 'loss/train': 2.6489572525024414} +02/24/2022 06:11:04 - INFO - codeparrot_training - Step 3561: {'lr': 0.000498696367172753, 'samples': 1823744, 'steps': 3561, 'loss/train': 1.2118130922317505} +02/24/2022 06:11:10 - INFO - codeparrot_training - Step 3562: {'lr': 0.0004986946978397382, 'samples': 1824256, 'steps': 3562, 'loss/train': 3.7711474895477295} +02/24/2022 06:11:13 - INFO - codeparrot_training - Step 3563: {'lr': 0.0004986930274413942, 'samples': 1824768, 'steps': 3563, 'loss/train': 3.0194289684295654} +02/24/2022 06:11:19 - INFO - codeparrot_training - Step 3564: {'lr': 0.0004986913559777283, 'samples': 1825280, 'steps': 3564, 'loss/train': 2.7172155380249023} +02/24/2022 06:11:22 - INFO - codeparrot_training - Step 3565: {'lr': 0.0004986896834487477, 'samples': 1825792, 'steps': 3565, 'loss/train': 2.6076767444610596} +02/24/2022 06:11:28 - INFO - codeparrot_training - Step 3566: {'lr': 0.0004986880098544593, 'samples': 1826304, 'steps': 3566, 'loss/train': 3.3423800468444824} +02/24/2022 06:11:31 - INFO - codeparrot_training - Step 3567: {'lr': 0.0004986863351948705, 'samples': 1826816, 'steps': 3567, 'loss/train': 5.826650619506836} +02/24/2022 06:11:37 - INFO - codeparrot_training - Step 3568: {'lr': 0.0004986846594699883, 'samples': 1827328, 'steps': 3568, 'loss/train': 5.136133193969727} +02/24/2022 06:11:40 - INFO - codeparrot_training - Step 3569: {'lr': 0.0004986829826798202, 'samples': 1827840, 'steps': 3569, 'loss/train': 2.5577759742736816} +02/24/2022 06:11:47 - INFO - codeparrot_training - Step 3570: {'lr': 0.0004986813048243729, 'samples': 1828352, 'steps': 3570, 'loss/train': 3.0491981506347656} +02/24/2022 06:11:50 - INFO - codeparrot_training - Step 3571: {'lr': 0.000498679625903654, 'samples': 1828864, 'steps': 3571, 'loss/train': 2.9680490493774414} +02/24/2022 06:11:56 - INFO - codeparrot_training - Step 3572: {'lr': 0.0004986779459176706, 'samples': 1829376, 'steps': 3572, 'loss/train': 2.868858814239502} +02/24/2022 06:11:59 - INFO - codeparrot_training - Step 3573: {'lr': 0.0004986762648664298, 'samples': 1829888, 'steps': 3573, 'loss/train': 2.118520498275757} +02/24/2022 06:12:05 - INFO - codeparrot_training - Step 3574: {'lr': 0.0004986745827499389, 'samples': 1830400, 'steps': 3574, 'loss/train': 3.633444309234619} +02/24/2022 06:12:08 - INFO - codeparrot_training - Step 3575: {'lr': 0.0004986728995682049, 'samples': 1830912, 'steps': 3575, 'loss/train': 2.089087963104248} +02/24/2022 06:12:14 - INFO - codeparrot_training - Step 3576: {'lr': 0.0004986712153212352, 'samples': 1831424, 'steps': 3576, 'loss/train': 2.3902127742767334} +02/24/2022 06:12:17 - INFO - codeparrot_training - Step 3577: {'lr': 0.0004986695300090371, 'samples': 1831936, 'steps': 3577, 'loss/train': 2.7970895767211914} +02/24/2022 06:12:23 - INFO - codeparrot_training - Step 3578: {'lr': 0.0004986678436316175, 'samples': 1832448, 'steps': 3578, 'loss/train': 3.4691977500915527} +02/24/2022 06:12:26 - INFO - codeparrot_training - Step 3579: {'lr': 0.000498666156188984, 'samples': 1832960, 'steps': 3579, 'loss/train': 2.964557647705078} +02/24/2022 06:12:32 - INFO - codeparrot_training - Step 3580: {'lr': 0.0004986644676811436, 'samples': 1833472, 'steps': 3580, 'loss/train': 1.213300108909607} +02/24/2022 06:12:36 - INFO - codeparrot_training - Step 3581: {'lr': 0.0004986627781081035, 'samples': 1833984, 'steps': 3581, 'loss/train': 3.4480254650115967} +02/24/2022 06:12:41 - INFO - codeparrot_training - Step 3582: {'lr': 0.0004986610874698712, 'samples': 1834496, 'steps': 3582, 'loss/train': 2.341282844543457} +02/24/2022 06:12:45 - INFO - codeparrot_training - Step 3583: {'lr': 0.0004986593957664536, 'samples': 1835008, 'steps': 3583, 'loss/train': 2.6536827087402344} +02/24/2022 06:12:50 - INFO - codeparrot_training - Step 3584: {'lr': 0.0004986577029978581, 'samples': 1835520, 'steps': 3584, 'loss/train': 4.221004009246826} +02/24/2022 06:12:54 - INFO - codeparrot_training - Step 3585: {'lr': 0.000498656009164092, 'samples': 1836032, 'steps': 3585, 'loss/train': 0.5307567119598389} +02/24/2022 06:12:59 - INFO - codeparrot_training - Step 3586: {'lr': 0.0004986543142651625, 'samples': 1836544, 'steps': 3586, 'loss/train': 0.7687624096870422} +02/24/2022 06:13:03 - INFO - codeparrot_training - Step 3587: {'lr': 0.0004986526183010769, 'samples': 1837056, 'steps': 3587, 'loss/train': 1.857844591140747} +02/24/2022 06:13:08 - INFO - codeparrot_training - Step 3588: {'lr': 0.0004986509212718425, 'samples': 1837568, 'steps': 3588, 'loss/train': 4.479802131652832} +02/24/2022 06:13:12 - INFO - codeparrot_training - Step 3589: {'lr': 0.0004986492231774664, 'samples': 1838080, 'steps': 3589, 'loss/train': 3.233942747116089} +02/24/2022 06:13:18 - INFO - codeparrot_training - Step 3590: {'lr': 0.0004986475240179559, 'samples': 1838592, 'steps': 3590, 'loss/train': 3.5459752082824707} +02/24/2022 06:13:24 - INFO - codeparrot_training - Step 3591: {'lr': 0.0004986458237933185, 'samples': 1839104, 'steps': 3591, 'loss/train': 3.054870843887329} +02/24/2022 06:13:27 - INFO - codeparrot_training - Step 3592: {'lr': 0.0004986441225035614, 'samples': 1839616, 'steps': 3592, 'loss/train': 2.552980899810791} +02/24/2022 06:13:33 - INFO - codeparrot_training - Step 3593: {'lr': 0.0004986424201486918, 'samples': 1840128, 'steps': 3593, 'loss/train': 3.2321536540985107} +02/24/2022 06:13:36 - INFO - codeparrot_training - Step 3594: {'lr': 0.000498640716728717, 'samples': 1840640, 'steps': 3594, 'loss/train': 3.447997808456421} +02/24/2022 06:13:42 - INFO - codeparrot_training - Step 3595: {'lr': 0.0004986390122436443, 'samples': 1841152, 'steps': 3595, 'loss/train': 2.6887683868408203} +02/24/2022 06:13:45 - INFO - codeparrot_training - Step 3596: {'lr': 0.000498637306693481, 'samples': 1841664, 'steps': 3596, 'loss/train': 2.5113518238067627} +02/24/2022 06:13:51 - INFO - codeparrot_training - Step 3597: {'lr': 0.0004986356000782345, 'samples': 1842176, 'steps': 3597, 'loss/train': 3.2579522132873535} +02/24/2022 06:13:54 - INFO - codeparrot_training - Step 3598: {'lr': 0.0004986338923979119, 'samples': 1842688, 'steps': 3598, 'loss/train': 2.677898645401001} +02/24/2022 06:14:00 - INFO - codeparrot_training - Step 3599: {'lr': 0.0004986321836525209, 'samples': 1843200, 'steps': 3599, 'loss/train': 1.6726078987121582} +02/24/2022 06:14:04 - INFO - codeparrot_training - Step 3600: {'lr': 0.0004986304738420684, 'samples': 1843712, 'steps': 3600, 'loss/train': 2.5096471309661865} +02/24/2022 06:14:09 - INFO - codeparrot_training - Step 3601: {'lr': 0.0004986287629665619, 'samples': 1844224, 'steps': 3601, 'loss/train': 3.162659168243408} +02/24/2022 06:14:13 - INFO - codeparrot_training - Step 3602: {'lr': 0.0004986270510260087, 'samples': 1844736, 'steps': 3602, 'loss/train': 3.1090872287750244} +02/24/2022 06:14:18 - INFO - codeparrot_training - Step 3603: {'lr': 0.0004986253380204163, 'samples': 1845248, 'steps': 3603, 'loss/train': 2.805504322052002} +02/24/2022 06:14:22 - INFO - codeparrot_training - Step 3604: {'lr': 0.0004986236239497918, 'samples': 1845760, 'steps': 3604, 'loss/train': 3.178154230117798} +02/24/2022 06:14:27 - INFO - codeparrot_training - Step 3605: {'lr': 0.0004986219088141426, 'samples': 1846272, 'steps': 3605, 'loss/train': 2.967597246170044} +02/24/2022 06:14:31 - INFO - codeparrot_training - Step 3606: {'lr': 0.0004986201926134761, 'samples': 1846784, 'steps': 3606, 'loss/train': 2.679542064666748} +02/24/2022 06:14:37 - INFO - codeparrot_training - Step 3607: {'lr': 0.0004986184753477998, 'samples': 1847296, 'steps': 3607, 'loss/train': 3.167675256729126} +02/24/2022 06:14:40 - INFO - codeparrot_training - Step 3608: {'lr': 0.0004986167570171208, 'samples': 1847808, 'steps': 3608, 'loss/train': 2.4734816551208496} +02/24/2022 06:14:44 - INFO - codeparrot_training - Step 3609: {'lr': 0.0004986150376214465, 'samples': 1848320, 'steps': 3609, 'loss/train': 3.0577664375305176} +02/24/2022 06:14:49 - INFO - codeparrot_training - Step 3610: {'lr': 0.0004986133171607844, 'samples': 1848832, 'steps': 3610, 'loss/train': 3.364696502685547} +02/24/2022 06:14:53 - INFO - codeparrot_training - Step 3611: {'lr': 0.0004986115956351417, 'samples': 1849344, 'steps': 3611, 'loss/train': 2.9301700592041016} +02/24/2022 06:14:58 - INFO - codeparrot_training - Step 3612: {'lr': 0.000498609873044526, 'samples': 1849856, 'steps': 3612, 'loss/train': 2.5401082038879395} +02/24/2022 06:15:02 - INFO - codeparrot_training - Step 3613: {'lr': 0.0004986081493889444, 'samples': 1850368, 'steps': 3613, 'loss/train': 3.115487813949585} +02/24/2022 06:15:07 - INFO - codeparrot_training - Step 3614: {'lr': 0.0004986064246684046, 'samples': 1850880, 'steps': 3614, 'loss/train': 3.4398844242095947} +02/24/2022 06:15:11 - INFO - codeparrot_training - Step 3615: {'lr': 0.0004986046988829136, 'samples': 1851392, 'steps': 3615, 'loss/train': 3.2407708168029785} +02/24/2022 06:15:17 - INFO - codeparrot_training - Step 3616: {'lr': 0.0004986029720324791, 'samples': 1851904, 'steps': 3616, 'loss/train': 2.397080898284912} +02/24/2022 06:15:20 - INFO - codeparrot_training - Step 3617: {'lr': 0.0004986012441171085, 'samples': 1852416, 'steps': 3617, 'loss/train': 4.38129186630249} +02/24/2022 06:15:26 - INFO - codeparrot_training - Step 3618: {'lr': 0.000498599515136809, 'samples': 1852928, 'steps': 3618, 'loss/train': 3.37368106842041} +02/24/2022 06:15:29 - INFO - codeparrot_training - Step 3619: {'lr': 0.0004985977850915882, 'samples': 1853440, 'steps': 3619, 'loss/train': 1.7670608758926392} +02/24/2022 06:15:35 - INFO - codeparrot_training - Step 3620: {'lr': 0.0004985960539814534, 'samples': 1853952, 'steps': 3620, 'loss/train': 2.857828140258789} +02/24/2022 06:15:38 - INFO - codeparrot_training - Step 3621: {'lr': 0.000498594321806412, 'samples': 1854464, 'steps': 3621, 'loss/train': 2.5764071941375732} +02/24/2022 06:15:44 - INFO - codeparrot_training - Step 3622: {'lr': 0.0004985925885664716, 'samples': 1854976, 'steps': 3622, 'loss/train': 2.8645498752593994} +02/24/2022 06:15:49 - INFO - codeparrot_training - Step 3623: {'lr': 0.0004985908542616393, 'samples': 1855488, 'steps': 3623, 'loss/train': 0.915501594543457} +02/24/2022 06:15:53 - INFO - codeparrot_training - Step 3624: {'lr': 0.0004985891188919229, 'samples': 1856000, 'steps': 3624, 'loss/train': 3.1070609092712402} +02/24/2022 06:15:58 - INFO - codeparrot_training - Step 3625: {'lr': 0.0004985873824573296, 'samples': 1856512, 'steps': 3625, 'loss/train': 3.562913179397583} +02/24/2022 06:16:02 - INFO - codeparrot_training - Step 3626: {'lr': 0.0004985856449578667, 'samples': 1857024, 'steps': 3626, 'loss/train': 0.7748192548751831} +02/24/2022 06:16:08 - INFO - codeparrot_training - Step 3627: {'lr': 0.0004985839063935421, 'samples': 1857536, 'steps': 3627, 'loss/train': 3.190570116043091} +02/24/2022 06:16:12 - INFO - codeparrot_training - Step 3628: {'lr': 0.0004985821667643628, 'samples': 1858048, 'steps': 3628, 'loss/train': 2.9273016452789307} +02/24/2022 06:16:17 - INFO - codeparrot_training - Step 3629: {'lr': 0.0004985804260703364, 'samples': 1858560, 'steps': 3629, 'loss/train': 2.5061187744140625} +02/24/2022 06:16:21 - INFO - codeparrot_training - Step 3630: {'lr': 0.0004985786843114706, 'samples': 1859072, 'steps': 3630, 'loss/train': 1.3142304420471191} +02/24/2022 06:16:26 - INFO - codeparrot_training - Step 3631: {'lr': 0.0004985769414877725, 'samples': 1859584, 'steps': 3631, 'loss/train': 1.9471021890640259} +02/24/2022 06:16:30 - INFO - codeparrot_training - Step 3632: {'lr': 0.0004985751975992497, 'samples': 1860096, 'steps': 3632, 'loss/train': 1.9094825983047485} +02/24/2022 06:16:35 - INFO - codeparrot_training - Step 3633: {'lr': 0.0004985734526459098, 'samples': 1860608, 'steps': 3633, 'loss/train': 3.0633323192596436} +02/24/2022 06:16:39 - INFO - codeparrot_training - Step 3634: {'lr': 0.0004985717066277601, 'samples': 1861120, 'steps': 3634, 'loss/train': 2.8317251205444336} +02/24/2022 06:16:44 - INFO - codeparrot_training - Step 3635: {'lr': 0.0004985699595448081, 'samples': 1861632, 'steps': 3635, 'loss/train': 2.7256269454956055} +02/24/2022 06:16:48 - INFO - codeparrot_training - Step 3636: {'lr': 0.0004985682113970613, 'samples': 1862144, 'steps': 3636, 'loss/train': 4.393418312072754} +02/24/2022 06:16:54 - INFO - codeparrot_training - Step 3637: {'lr': 0.0004985664621845273, 'samples': 1862656, 'steps': 3637, 'loss/train': 2.0420732498168945} +02/24/2022 06:16:58 - INFO - codeparrot_training - Step 3638: {'lr': 0.0004985647119072135, 'samples': 1863168, 'steps': 3638, 'loss/train': 2.6048779487609863} +02/24/2022 06:17:03 - INFO - codeparrot_training - Step 3639: {'lr': 0.0004985629605651273, 'samples': 1863680, 'steps': 3639, 'loss/train': 3.3446872234344482} +02/24/2022 06:17:07 - INFO - codeparrot_training - Step 3640: {'lr': 0.0004985612081582763, 'samples': 1864192, 'steps': 3640, 'loss/train': 0.5013619065284729} +02/24/2022 06:17:12 - INFO - codeparrot_training - Step 3641: {'lr': 0.0004985594546866682, 'samples': 1864704, 'steps': 3641, 'loss/train': 3.417056083679199} +02/24/2022 06:17:16 - INFO - codeparrot_training - Step 3642: {'lr': 0.0004985577001503102, 'samples': 1865216, 'steps': 3642, 'loss/train': 4.4992995262146} +02/24/2022 06:17:21 - INFO - codeparrot_training - Step 3643: {'lr': 0.0004985559445492099, 'samples': 1865728, 'steps': 3643, 'loss/train': 2.3566882610321045} +02/24/2022 06:17:25 - INFO - codeparrot_training - Step 3644: {'lr': 0.0004985541878833749, 'samples': 1866240, 'steps': 3644, 'loss/train': 1.6865768432617188} +02/24/2022 06:17:30 - INFO - codeparrot_training - Step 3645: {'lr': 0.0004985524301528127, 'samples': 1866752, 'steps': 3645, 'loss/train': 3.049612522125244} +02/24/2022 06:17:34 - INFO - codeparrot_training - Step 3646: {'lr': 0.0004985506713575307, 'samples': 1867264, 'steps': 3646, 'loss/train': 3.0081145763397217} +02/24/2022 06:17:40 - INFO - codeparrot_training - Step 3647: {'lr': 0.0004985489114975368, 'samples': 1867776, 'steps': 3647, 'loss/train': 2.305170774459839} +02/24/2022 06:17:44 - INFO - codeparrot_training - Step 3648: {'lr': 0.0004985471505728381, 'samples': 1868288, 'steps': 3648, 'loss/train': 2.7749903202056885} +02/24/2022 06:17:49 - INFO - codeparrot_training - Step 3649: {'lr': 0.0004985453885834423, 'samples': 1868800, 'steps': 3649, 'loss/train': 3.4093470573425293} +02/24/2022 06:17:53 - INFO - codeparrot_training - Step 3650: {'lr': 0.0004985436255293571, 'samples': 1869312, 'steps': 3650, 'loss/train': 3.6422040462493896} +02/24/2022 06:17:58 - INFO - codeparrot_training - Step 3651: {'lr': 0.0004985418614105898, 'samples': 1869824, 'steps': 3651, 'loss/train': 3.808675527572632} +02/24/2022 06:18:02 - INFO - codeparrot_training - Step 3652: {'lr': 0.0004985400962271482, 'samples': 1870336, 'steps': 3652, 'loss/train': 5.389224529266357} +02/24/2022 06:18:07 - INFO - codeparrot_training - Step 3653: {'lr': 0.0004985383299790397, 'samples': 1870848, 'steps': 3653, 'loss/train': 2.6073951721191406} +02/24/2022 06:18:11 - INFO - codeparrot_training - Step 3654: {'lr': 0.0004985365626662719, 'samples': 1871360, 'steps': 3654, 'loss/train': 2.7892062664031982} +02/24/2022 06:18:16 - INFO - codeparrot_training - Step 3655: {'lr': 0.0004985347942888524, 'samples': 1871872, 'steps': 3655, 'loss/train': 1.9611306190490723} +02/24/2022 06:18:20 - INFO - codeparrot_training - Step 3656: {'lr': 0.0004985330248467888, 'samples': 1872384, 'steps': 3656, 'loss/train': 3.443881034851074} +02/24/2022 06:18:25 - INFO - codeparrot_training - Step 3657: {'lr': 0.0004985312543400886, 'samples': 1872896, 'steps': 3657, 'loss/train': 1.3975470066070557} +02/24/2022 06:18:29 - INFO - codeparrot_training - Step 3658: {'lr': 0.0004985294827687594, 'samples': 1873408, 'steps': 3658, 'loss/train': 2.50839900970459} +02/24/2022 06:18:34 - INFO - codeparrot_training - Step 3659: {'lr': 0.0004985277101328088, 'samples': 1873920, 'steps': 3659, 'loss/train': 1.0921086072921753} +02/24/2022 06:18:37 - INFO - codeparrot_training - Step 3660: {'lr': 0.0004985259364322445, 'samples': 1874432, 'steps': 3660, 'loss/train': 2.4239721298217773} +02/24/2022 06:18:43 - INFO - codeparrot_training - Step 3661: {'lr': 0.0004985241616670739, 'samples': 1874944, 'steps': 3661, 'loss/train': 3.2412805557250977} +02/24/2022 06:18:46 - INFO - codeparrot_training - Step 3662: {'lr': 0.0004985223858373048, 'samples': 1875456, 'steps': 3662, 'loss/train': 2.599910020828247} +02/24/2022 06:18:53 - INFO - codeparrot_training - Step 3663: {'lr': 0.0004985206089429447, 'samples': 1875968, 'steps': 3663, 'loss/train': 1.2451457977294922} +02/24/2022 06:18:56 - INFO - codeparrot_training - Step 3664: {'lr': 0.0004985188309840012, 'samples': 1876480, 'steps': 3664, 'loss/train': 2.9755656719207764} +02/24/2022 06:19:02 - INFO - codeparrot_training - Step 3665: {'lr': 0.0004985170519604819, 'samples': 1876992, 'steps': 3665, 'loss/train': 3.2344369888305664} +02/24/2022 06:19:05 - INFO - codeparrot_training - Step 3666: {'lr': 0.0004985152718723944, 'samples': 1877504, 'steps': 3666, 'loss/train': 2.8349993228912354} +02/24/2022 06:19:11 - INFO - codeparrot_training - Step 3667: {'lr': 0.0004985134907197466, 'samples': 1878016, 'steps': 3667, 'loss/train': 4.3324456214904785} +02/24/2022 06:19:14 - INFO - codeparrot_training - Step 3668: {'lr': 0.0004985117085025458, 'samples': 1878528, 'steps': 3668, 'loss/train': 3.3636906147003174} +02/24/2022 06:19:20 - INFO - codeparrot_training - Step 3669: {'lr': 0.0004985099252207998, 'samples': 1879040, 'steps': 3669, 'loss/train': 3.616454839706421} +02/24/2022 06:19:24 - INFO - codeparrot_training - Step 3670: {'lr': 0.0004985081408745161, 'samples': 1879552, 'steps': 3670, 'loss/train': 3.3376147747039795} +02/24/2022 06:19:29 - INFO - codeparrot_training - Step 3671: {'lr': 0.0004985063554637025, 'samples': 1880064, 'steps': 3671, 'loss/train': 2.973696708679199} +02/24/2022 06:19:33 - INFO - codeparrot_training - Step 3672: {'lr': 0.0004985045689883665, 'samples': 1880576, 'steps': 3672, 'loss/train': 4.10275936126709} +02/24/2022 06:19:39 - INFO - codeparrot_training - Step 3673: {'lr': 0.0004985027814485159, 'samples': 1881088, 'steps': 3673, 'loss/train': 0.3154342472553253} +02/24/2022 06:19:42 - INFO - codeparrot_training - Step 3674: {'lr': 0.0004985009928441584, 'samples': 1881600, 'steps': 3674, 'loss/train': 3.094592571258545} +02/24/2022 06:19:48 - INFO - codeparrot_training - Step 3675: {'lr': 0.0004984992031753014, 'samples': 1882112, 'steps': 3675, 'loss/train': 2.868638038635254} +02/24/2022 06:19:51 - INFO - codeparrot_training - Step 3676: {'lr': 0.0004984974124419528, 'samples': 1882624, 'steps': 3676, 'loss/train': 2.966783046722412} +02/24/2022 06:19:57 - INFO - codeparrot_training - Step 3677: {'lr': 0.0004984956206441201, 'samples': 1883136, 'steps': 3677, 'loss/train': 2.5661163330078125} +02/24/2022 06:20:01 - INFO - codeparrot_training - Step 3678: {'lr': 0.0004984938277818112, 'samples': 1883648, 'steps': 3678, 'loss/train': 2.9959144592285156} +02/24/2022 06:20:06 - INFO - codeparrot_training - Step 3679: {'lr': 0.0004984920338550335, 'samples': 1884160, 'steps': 3679, 'loss/train': 3.004345178604126} +02/24/2022 06:20:09 - INFO - codeparrot_training - Step 3680: {'lr': 0.0004984902388637949, 'samples': 1884672, 'steps': 3680, 'loss/train': 2.672820568084717} +02/24/2022 06:20:15 - INFO - codeparrot_training - Step 3681: {'lr': 0.0004984884428081031, 'samples': 1885184, 'steps': 3681, 'loss/train': 2.1061084270477295} +02/24/2022 06:20:18 - INFO - codeparrot_training - Step 3682: {'lr': 0.0004984866456879657, 'samples': 1885696, 'steps': 3682, 'loss/train': 2.685220956802368} +02/24/2022 06:20:25 - INFO - codeparrot_training - Step 3683: {'lr': 0.0004984848475033903, 'samples': 1886208, 'steps': 3683, 'loss/train': 2.81203031539917} +02/24/2022 06:20:28 - INFO - codeparrot_training - Step 3684: {'lr': 0.0004984830482543847, 'samples': 1886720, 'steps': 3684, 'loss/train': 3.422428607940674} +02/24/2022 06:20:34 - INFO - codeparrot_training - Step 3685: {'lr': 0.0004984812479409568, 'samples': 1887232, 'steps': 3685, 'loss/train': 2.7525634765625} +02/24/2022 06:20:37 - INFO - codeparrot_training - Step 3686: {'lr': 0.000498479446563114, 'samples': 1887744, 'steps': 3686, 'loss/train': 2.6657323837280273} +02/24/2022 06:20:43 - INFO - codeparrot_training - Step 3687: {'lr': 0.0004984776441208642, 'samples': 1888256, 'steps': 3687, 'loss/train': 3.3297717571258545} +02/24/2022 06:20:46 - INFO - codeparrot_training - Step 3688: {'lr': 0.000498475840614215, 'samples': 1888768, 'steps': 3688, 'loss/train': 1.8983154296875} +02/24/2022 06:20:52 - INFO - codeparrot_training - Step 3689: {'lr': 0.0004984740360431742, 'samples': 1889280, 'steps': 3689, 'loss/train': 3.2670509815216064} +02/24/2022 06:20:55 - INFO - codeparrot_training - Step 3690: {'lr': 0.0004984722304077496, 'samples': 1889792, 'steps': 3690, 'loss/train': 3.821131944656372} +02/24/2022 06:21:01 - INFO - codeparrot_training - Step 3691: {'lr': 0.0004984704237079489, 'samples': 1890304, 'steps': 3691, 'loss/train': 1.655848503112793} +02/24/2022 06:21:04 - INFO - codeparrot_training - Step 3692: {'lr': 0.0004984686159437798, 'samples': 1890816, 'steps': 3692, 'loss/train': 3.352936267852783} +02/24/2022 06:21:10 - INFO - codeparrot_training - Step 3693: {'lr': 0.00049846680711525, 'samples': 1891328, 'steps': 3693, 'loss/train': 1.884761095046997} +02/24/2022 06:21:14 - INFO - codeparrot_training - Step 3694: {'lr': 0.0004984649972223673, 'samples': 1891840, 'steps': 3694, 'loss/train': 3.1419994831085205} +02/24/2022 06:21:19 - INFO - codeparrot_training - Step 3695: {'lr': 0.0004984631862651395, 'samples': 1892352, 'steps': 3695, 'loss/train': 3.305544137954712} +02/24/2022 06:21:23 - INFO - codeparrot_training - Step 3696: {'lr': 0.0004984613742435742, 'samples': 1892864, 'steps': 3696, 'loss/train': 2.4752044677734375} +02/24/2022 06:21:28 - INFO - codeparrot_training - Step 3697: {'lr': 0.0004984595611576793, 'samples': 1893376, 'steps': 3697, 'loss/train': 2.936878204345703} +02/24/2022 06:21:32 - INFO - codeparrot_training - Step 3698: {'lr': 0.0004984577470074625, 'samples': 1893888, 'steps': 3698, 'loss/train': 1.8516157865524292} +02/24/2022 06:21:37 - INFO - codeparrot_training - Step 3699: {'lr': 0.0004984559317929317, 'samples': 1894400, 'steps': 3699, 'loss/train': 2.8204853534698486} +02/24/2022 06:21:43 - INFO - codeparrot_training - Step 3700: {'lr': 0.0004984541155140946, 'samples': 1894912, 'steps': 3700, 'loss/train': 5.0451788902282715} +02/24/2022 06:21:46 - INFO - codeparrot_training - Step 3701: {'lr': 0.0004984522981709589, 'samples': 1895424, 'steps': 3701, 'loss/train': 3.1432008743286133} +02/24/2022 06:21:52 - INFO - codeparrot_training - Step 3702: {'lr': 0.0004984504797635324, 'samples': 1895936, 'steps': 3702, 'loss/train': 2.3866403102874756} +02/24/2022 06:21:55 - INFO - codeparrot_training - Step 3703: {'lr': 0.000498448660291823, 'samples': 1896448, 'steps': 3703, 'loss/train': 3.1456377506256104} +02/24/2022 06:22:01 - INFO - codeparrot_training - Step 3704: {'lr': 0.0004984468397558384, 'samples': 1896960, 'steps': 3704, 'loss/train': 1.9401761293411255} +02/24/2022 06:22:04 - INFO - codeparrot_training - Step 3705: {'lr': 0.0004984450181555864, 'samples': 1897472, 'steps': 3705, 'loss/train': 2.999276876449585} +02/24/2022 06:22:10 - INFO - codeparrot_training - Step 3706: {'lr': 0.0004984431954910749, 'samples': 1897984, 'steps': 3706, 'loss/train': 2.5989420413970947} +02/24/2022 06:22:13 - INFO - codeparrot_training - Step 3707: {'lr': 0.0004984413717623117, 'samples': 1898496, 'steps': 3707, 'loss/train': 3.1828978061676025} +02/24/2022 06:22:19 - INFO - codeparrot_training - Step 3708: {'lr': 0.0004984395469693044, 'samples': 1899008, 'steps': 3708, 'loss/train': 2.2003378868103027} +02/24/2022 06:22:23 - INFO - codeparrot_training - Step 3709: {'lr': 0.000498437721112061, 'samples': 1899520, 'steps': 3709, 'loss/train': 2.798269510269165} +02/24/2022 06:22:28 - INFO - codeparrot_training - Step 3710: {'lr': 0.0004984358941905894, 'samples': 1900032, 'steps': 3710, 'loss/train': 3.3372676372528076} +02/24/2022 06:22:32 - INFO - codeparrot_training - Step 3711: {'lr': 0.0004984340662048972, 'samples': 1900544, 'steps': 3711, 'loss/train': 3.361177682876587} +02/24/2022 06:22:37 - INFO - codeparrot_training - Step 3712: {'lr': 0.0004984322371549924, 'samples': 1901056, 'steps': 3712, 'loss/train': 3.1103322505950928} +02/24/2022 06:22:41 - INFO - codeparrot_training - Step 3713: {'lr': 0.0004984304070408828, 'samples': 1901568, 'steps': 3713, 'loss/train': 1.899722695350647} +02/24/2022 06:22:47 - INFO - codeparrot_training - Step 3714: {'lr': 0.0004984285758625761, 'samples': 1902080, 'steps': 3714, 'loss/train': 2.3745577335357666} +02/24/2022 06:22:50 - INFO - codeparrot_training - Step 3715: {'lr': 0.0004984267436200805, 'samples': 1902592, 'steps': 3715, 'loss/train': 3.5624730587005615} +02/24/2022 06:22:56 - INFO - codeparrot_training - Step 3716: {'lr': 0.0004984249103134035, 'samples': 1903104, 'steps': 3716, 'loss/train': 2.43900990486145} +02/24/2022 06:22:59 - INFO - codeparrot_training - Step 3717: {'lr': 0.000498423075942553, 'samples': 1903616, 'steps': 3717, 'loss/train': 2.644005298614502} +02/24/2022 06:23:05 - INFO - codeparrot_training - Step 3718: {'lr': 0.0004984212405075369, 'samples': 1904128, 'steps': 3718, 'loss/train': 2.683960199356079} +02/24/2022 06:23:09 - INFO - codeparrot_training - Step 3719: {'lr': 0.0004984194040083632, 'samples': 1904640, 'steps': 3719, 'loss/train': 3.2765417098999023} +02/24/2022 06:23:14 - INFO - codeparrot_training - Step 3720: {'lr': 0.0004984175664450397, 'samples': 1905152, 'steps': 3720, 'loss/train': 3.5208442211151123} +02/24/2022 06:23:18 - INFO - codeparrot_training - Step 3721: {'lr': 0.0004984157278175741, 'samples': 1905664, 'steps': 3721, 'loss/train': 3.1742918491363525} +02/24/2022 06:23:23 - INFO - codeparrot_training - Step 3722: {'lr': 0.0004984138881259744, 'samples': 1906176, 'steps': 3722, 'loss/train': 2.0669407844543457} +02/24/2022 06:23:27 - INFO - codeparrot_training - Step 3723: {'lr': 0.0004984120473702486, 'samples': 1906688, 'steps': 3723, 'loss/train': 2.4079787731170654} +02/24/2022 06:23:32 - INFO - codeparrot_training - Step 3724: {'lr': 0.0004984102055504044, 'samples': 1907200, 'steps': 3724, 'loss/train': 2.955017328262329} +02/24/2022 06:23:36 - INFO - codeparrot_training - Step 3725: {'lr': 0.0004984083626664497, 'samples': 1907712, 'steps': 3725, 'loss/train': 3.057616949081421} +02/24/2022 06:23:41 - INFO - codeparrot_training - Step 3726: {'lr': 0.0004984065187183925, 'samples': 1908224, 'steps': 3726, 'loss/train': 2.7142789363861084} +02/24/2022 06:23:44 - INFO - codeparrot_training - Step 3727: {'lr': 0.0004984046737062407, 'samples': 1908736, 'steps': 3727, 'loss/train': 1.5987069606781006} +02/24/2022 06:23:51 - INFO - codeparrot_training - Step 3728: {'lr': 0.0004984028276300021, 'samples': 1909248, 'steps': 3728, 'loss/train': 2.8336312770843506} +02/24/2022 06:23:54 - INFO - codeparrot_training - Step 3729: {'lr': 0.0004984009804896846, 'samples': 1909760, 'steps': 3729, 'loss/train': 2.7531681060791016} +02/24/2022 06:24:00 - INFO - codeparrot_training - Step 3730: {'lr': 0.0004983991322852963, 'samples': 1910272, 'steps': 3730, 'loss/train': 9.213851928710938} +02/24/2022 06:24:03 - INFO - codeparrot_training - Step 3731: {'lr': 0.000498397283016845, 'samples': 1910784, 'steps': 3731, 'loss/train': 3.1641433238983154} +02/24/2022 06:24:10 - INFO - codeparrot_training - Step 3732: {'lr': 0.0004983954326843386, 'samples': 1911296, 'steps': 3732, 'loss/train': 2.9372360706329346} +02/24/2022 06:24:13 - INFO - codeparrot_training - Step 3733: {'lr': 0.000498393581287785, 'samples': 1911808, 'steps': 3733, 'loss/train': 2.870913505554199} +02/24/2022 06:24:16 - INFO - codeparrot_training - Step 3734: {'lr': 0.0004983917288271921, 'samples': 1912320, 'steps': 3734, 'loss/train': 2.4943997859954834} +02/24/2022 06:24:22 - INFO - codeparrot_training - Step 3735: {'lr': 0.0004983898753025681, 'samples': 1912832, 'steps': 3735, 'loss/train': 2.899331569671631} +02/24/2022 06:24:25 - INFO - codeparrot_training - Step 3736: {'lr': 0.0004983880207139205, 'samples': 1913344, 'steps': 3736, 'loss/train': 2.1709070205688477} +02/24/2022 06:24:31 - INFO - codeparrot_training - Step 3737: {'lr': 0.0004983861650612577, 'samples': 1913856, 'steps': 3737, 'loss/train': 2.6429572105407715} +02/24/2022 06:24:34 - INFO - codeparrot_training - Step 3738: {'lr': 0.0004983843083445873, 'samples': 1914368, 'steps': 3738, 'loss/train': 2.3169102668762207} +02/24/2022 06:24:40 - INFO - codeparrot_training - Step 3739: {'lr': 0.0004983824505639175, 'samples': 1914880, 'steps': 3739, 'loss/train': 2.5287582874298096} +02/24/2022 06:24:43 - INFO - codeparrot_training - Step 3740: {'lr': 0.000498380591719256, 'samples': 1915392, 'steps': 3740, 'loss/train': 3.4230027198791504} +02/24/2022 06:24:49 - INFO - codeparrot_training - Step 3741: {'lr': 0.0004983787318106111, 'samples': 1915904, 'steps': 3741, 'loss/train': 3.4316139221191406} +02/24/2022 06:24:53 - INFO - codeparrot_training - Step 3742: {'lr': 0.0004983768708379905, 'samples': 1916416, 'steps': 3742, 'loss/train': 2.9466638565063477} +02/24/2022 06:24:58 - INFO - codeparrot_training - Step 3743: {'lr': 0.0004983750088014023, 'samples': 1916928, 'steps': 3743, 'loss/train': 2.963587760925293} +02/24/2022 06:25:02 - INFO - codeparrot_training - Step 3744: {'lr': 0.0004983731457008544, 'samples': 1917440, 'steps': 3744, 'loss/train': 1.5645618438720703} +02/24/2022 06:25:07 - INFO - codeparrot_training - Step 3745: {'lr': 0.0004983712815363548, 'samples': 1917952, 'steps': 3745, 'loss/train': 2.88423490524292} +02/24/2022 06:25:11 - INFO - codeparrot_training - Step 3746: {'lr': 0.0004983694163079115, 'samples': 1918464, 'steps': 3746, 'loss/train': 2.7721304893493652} +02/24/2022 06:25:16 - INFO - codeparrot_training - Step 3747: {'lr': 0.0004983675500155325, 'samples': 1918976, 'steps': 3747, 'loss/train': 2.4178385734558105} +02/24/2022 06:25:20 - INFO - codeparrot_training - Step 3748: {'lr': 0.0004983656826592258, 'samples': 1919488, 'steps': 3748, 'loss/train': 7.331582546234131} +02/24/2022 06:25:25 - INFO - codeparrot_training - Step 3749: {'lr': 0.0004983638142389993, 'samples': 1920000, 'steps': 3749, 'loss/train': 2.3395955562591553} +02/24/2022 06:25:29 - INFO - codeparrot_training - Step 3750: {'lr': 0.000498361944754861, 'samples': 1920512, 'steps': 3750, 'loss/train': 3.1834793090820312} +02/24/2022 06:25:34 - INFO - codeparrot_training - Step 3751: {'lr': 0.0004983600742068192, 'samples': 1921024, 'steps': 3751, 'loss/train': 3.083505392074585} +02/24/2022 06:25:38 - INFO - codeparrot_training - Step 3752: {'lr': 0.0004983582025948816, 'samples': 1921536, 'steps': 3752, 'loss/train': 2.9713878631591797} +02/24/2022 06:25:43 - INFO - codeparrot_training - Step 3753: {'lr': 0.0004983563299190564, 'samples': 1922048, 'steps': 3753, 'loss/train': 3.736201763153076} +02/24/2022 06:25:47 - INFO - codeparrot_training - Step 3754: {'lr': 0.0004983544561793515, 'samples': 1922560, 'steps': 3754, 'loss/train': 1.7429077625274658} +02/24/2022 06:25:53 - INFO - codeparrot_training - Step 3755: {'lr': 0.000498352581375775, 'samples': 1923072, 'steps': 3755, 'loss/train': 2.9478325843811035} +02/24/2022 06:25:56 - INFO - codeparrot_training - Step 3756: {'lr': 0.0004983507055083349, 'samples': 1923584, 'steps': 3756, 'loss/train': 2.925215244293213} +02/24/2022 06:26:02 - INFO - codeparrot_training - Step 3757: {'lr': 0.0004983488285770391, 'samples': 1924096, 'steps': 3757, 'loss/train': 3.0634958744049072} +02/24/2022 06:26:05 - INFO - codeparrot_training - Step 3758: {'lr': 0.000498346950581896, 'samples': 1924608, 'steps': 3758, 'loss/train': 2.396843671798706} +02/24/2022 06:26:11 - INFO - codeparrot_training - Step 3759: {'lr': 0.0004983450715229132, 'samples': 1925120, 'steps': 3759, 'loss/train': 2.7595198154449463} +02/24/2022 06:26:14 - INFO - codeparrot_training - Step 3760: {'lr': 0.000498343191400099, 'samples': 1925632, 'steps': 3760, 'loss/train': 3.177227020263672} +02/24/2022 06:26:20 - INFO - codeparrot_training - Step 3761: {'lr': 0.0004983413102134616, 'samples': 1926144, 'steps': 3761, 'loss/train': 3.1766610145568848} +02/24/2022 06:26:23 - INFO - codeparrot_training - Step 3762: {'lr': 0.0004983394279630088, 'samples': 1926656, 'steps': 3762, 'loss/train': 2.3948452472686768} +02/24/2022 06:26:29 - INFO - codeparrot_training - Step 3763: {'lr': 0.0004983375446487488, 'samples': 1927168, 'steps': 3763, 'loss/train': 0.9162213206291199} +02/24/2022 06:26:32 - INFO - codeparrot_training - Step 3764: {'lr': 0.0004983356602706895, 'samples': 1927680, 'steps': 3764, 'loss/train': 2.3024191856384277} +02/24/2022 06:26:38 - INFO - codeparrot_training - Step 3765: {'lr': 0.0004983337748288391, 'samples': 1928192, 'steps': 3765, 'loss/train': 3.489478349685669} +02/24/2022 06:26:41 - INFO - codeparrot_training - Step 3766: {'lr': 0.0004983318883232058, 'samples': 1928704, 'steps': 3766, 'loss/train': 3.4569692611694336} +02/24/2022 06:26:47 - INFO - codeparrot_training - Step 3767: {'lr': 0.0004983300007537974, 'samples': 1929216, 'steps': 3767, 'loss/train': 3.213865041732788} +02/24/2022 06:26:51 - INFO - codeparrot_training - Step 3768: {'lr': 0.0004983281121206222, 'samples': 1929728, 'steps': 3768, 'loss/train': 3.08313250541687} +02/24/2022 06:26:56 - INFO - codeparrot_training - Step 3769: {'lr': 0.0004983262224236882, 'samples': 1930240, 'steps': 3769, 'loss/train': 2.926205635070801} +02/24/2022 06:27:00 - INFO - codeparrot_training - Step 3770: {'lr': 0.0004983243316630035, 'samples': 1930752, 'steps': 3770, 'loss/train': 3.129277229309082} +02/24/2022 06:27:05 - INFO - codeparrot_training - Step 3771: {'lr': 0.0004983224398385762, 'samples': 1931264, 'steps': 3771, 'loss/train': 2.537529230117798} +02/24/2022 06:27:09 - INFO - codeparrot_training - Step 3772: {'lr': 0.0004983205469504144, 'samples': 1931776, 'steps': 3772, 'loss/train': 1.9506564140319824} +02/24/2022 06:27:14 - INFO - codeparrot_training - Step 3773: {'lr': 0.0004983186529985263, 'samples': 1932288, 'steps': 3773, 'loss/train': 2.6856629848480225} +02/24/2022 06:27:18 - INFO - codeparrot_training - Step 3774: {'lr': 0.00049831675798292, 'samples': 1932800, 'steps': 3774, 'loss/train': 3.698329448699951} +02/24/2022 06:27:23 - INFO - codeparrot_training - Step 3775: {'lr': 0.0004983148619036034, 'samples': 1933312, 'steps': 3775, 'loss/train': 1.9796056747436523} +02/24/2022 06:27:27 - INFO - codeparrot_training - Step 3776: {'lr': 0.0004983129647605849, 'samples': 1933824, 'steps': 3776, 'loss/train': 3.0843496322631836} +02/24/2022 06:27:33 - INFO - codeparrot_training - Step 3777: {'lr': 0.0004983110665538724, 'samples': 1934336, 'steps': 3777, 'loss/train': 2.5527446269989014} +02/24/2022 06:27:36 - INFO - codeparrot_training - Step 3778: {'lr': 0.0004983091672834742, 'samples': 1934848, 'steps': 3778, 'loss/train': 3.087131977081299} +02/24/2022 06:27:42 - INFO - codeparrot_training - Step 3779: {'lr': 0.0004983072669493985, 'samples': 1935360, 'steps': 3779, 'loss/train': 3.097358226776123} +02/24/2022 06:27:45 - INFO - codeparrot_training - Step 3780: {'lr': 0.0004983053655516531, 'samples': 1935872, 'steps': 3780, 'loss/train': 2.8714654445648193} +02/24/2022 06:27:51 - INFO - codeparrot_training - Step 3781: {'lr': 0.0004983034630902465, 'samples': 1936384, 'steps': 3781, 'loss/train': 4.090614318847656} +02/24/2022 06:27:55 - INFO - codeparrot_training - Step 3782: {'lr': 0.0004983015595651867, 'samples': 1936896, 'steps': 3782, 'loss/train': 2.7963130474090576} +02/24/2022 06:28:00 - INFO - codeparrot_training - Step 3783: {'lr': 0.0004982996549764817, 'samples': 1937408, 'steps': 3783, 'loss/train': 2.333299398422241} +02/24/2022 06:28:03 - INFO - codeparrot_training - Step 3784: {'lr': 0.0004982977493241399, 'samples': 1937920, 'steps': 3784, 'loss/train': 2.328354835510254} +02/24/2022 06:28:09 - INFO - codeparrot_training - Step 3785: {'lr': 0.0004982958426081695, 'samples': 1938432, 'steps': 3785, 'loss/train': 2.2759294509887695} +02/24/2022 06:28:12 - INFO - codeparrot_training - Step 3786: {'lr': 0.0004982939348285784, 'samples': 1938944, 'steps': 3786, 'loss/train': 3.6218342781066895} +02/24/2022 06:28:19 - INFO - codeparrot_training - Step 3787: {'lr': 0.000498292025985375, 'samples': 1939456, 'steps': 3787, 'loss/train': 2.3638393878936768} +02/24/2022 06:28:22 - INFO - codeparrot_training - Step 3788: {'lr': 0.0004982901160785675, 'samples': 1939968, 'steps': 3788, 'loss/train': 3.03568959236145} +02/24/2022 06:28:28 - INFO - codeparrot_training - Step 3789: {'lr': 0.0004982882051081639, 'samples': 1940480, 'steps': 3789, 'loss/train': 4.6451802253723145} +02/24/2022 06:28:31 - INFO - codeparrot_training - Step 3790: {'lr': 0.0004982862930741725, 'samples': 1940992, 'steps': 3790, 'loss/train': 3.393296957015991} +02/24/2022 06:28:37 - INFO - codeparrot_training - Step 3791: {'lr': 0.0004982843799766014, 'samples': 1941504, 'steps': 3791, 'loss/train': 4.070493698120117} +02/24/2022 06:28:40 - INFO - codeparrot_training - Step 3792: {'lr': 0.0004982824658154589, 'samples': 1942016, 'steps': 3792, 'loss/train': 0.31920528411865234} +02/24/2022 06:28:46 - INFO - codeparrot_training - Step 3793: {'lr': 0.000498280550590753, 'samples': 1942528, 'steps': 3793, 'loss/train': 3.0541083812713623} +02/24/2022 06:28:49 - INFO - codeparrot_training - Step 3794: {'lr': 0.0004982786343024923, 'samples': 1943040, 'steps': 3794, 'loss/train': 1.9726697206497192} +02/24/2022 06:28:55 - INFO - codeparrot_training - Step 3795: {'lr': 0.0004982767169506847, 'samples': 1943552, 'steps': 3795, 'loss/train': 3.0337398052215576} +02/24/2022 06:28:58 - INFO - codeparrot_training - Step 3796: {'lr': 0.0004982747985353384, 'samples': 1944064, 'steps': 3796, 'loss/train': 2.9903831481933594} +02/24/2022 06:29:05 - INFO - codeparrot_training - Step 3797: {'lr': 0.0004982728790564616, 'samples': 1944576, 'steps': 3797, 'loss/train': 3.065185546875} +02/24/2022 06:29:08 - INFO - codeparrot_training - Step 3798: {'lr': 0.0004982709585140629, 'samples': 1945088, 'steps': 3798, 'loss/train': 3.254495620727539} +02/24/2022 06:29:14 - INFO - codeparrot_training - Step 3799: {'lr': 0.0004982690369081501, 'samples': 1945600, 'steps': 3799, 'loss/train': 2.4238195419311523} +02/24/2022 06:29:17 - INFO - codeparrot_training - Step 3800: {'lr': 0.0004982671142387316, 'samples': 1946112, 'steps': 3800, 'loss/train': 3.1700680255889893} +02/24/2022 06:29:23 - INFO - codeparrot_training - Step 3801: {'lr': 0.0004982651905058156, 'samples': 1946624, 'steps': 3801, 'loss/train': 2.4958302974700928} +02/24/2022 06:29:26 - INFO - codeparrot_training - Step 3802: {'lr': 0.0004982632657094104, 'samples': 1947136, 'steps': 3802, 'loss/train': 2.6902425289154053} +02/24/2022 06:29:32 - INFO - codeparrot_training - Step 3803: {'lr': 0.0004982613398495241, 'samples': 1947648, 'steps': 3803, 'loss/train': 3.3472297191619873} +02/24/2022 06:29:35 - INFO - codeparrot_training - Step 3804: {'lr': 0.0004982594129261652, 'samples': 1948160, 'steps': 3804, 'loss/train': 2.234116315841675} +02/24/2022 06:29:41 - INFO - codeparrot_training - Step 3805: {'lr': 0.0004982574849393416, 'samples': 1948672, 'steps': 3805, 'loss/train': 3.7738876342773438} +02/24/2022 06:29:44 - INFO - codeparrot_training - Step 3806: {'lr': 0.000498255555889062, 'samples': 1949184, 'steps': 3806, 'loss/train': 2.6245174407958984} +02/24/2022 06:29:51 - INFO - codeparrot_training - Step 3807: {'lr': 0.0004982536257753343, 'samples': 1949696, 'steps': 3807, 'loss/train': 2.472965955734253} +02/24/2022 06:29:56 - INFO - codeparrot_training - Step 3808: {'lr': 0.0004982516945981669, 'samples': 1950208, 'steps': 3808, 'loss/train': 2.5350589752197266} +02/24/2022 06:29:59 - INFO - codeparrot_training - Step 3809: {'lr': 0.0004982497623575681, 'samples': 1950720, 'steps': 3809, 'loss/train': 3.7140450477600098} +02/24/2022 06:30:05 - INFO - codeparrot_training - Step 3810: {'lr': 0.0004982478290535461, 'samples': 1951232, 'steps': 3810, 'loss/train': 2.433718681335449} +02/24/2022 06:30:09 - INFO - codeparrot_training - Step 3811: {'lr': 0.0004982458946861093, 'samples': 1951744, 'steps': 3811, 'loss/train': 2.9738035202026367} +02/24/2022 06:30:14 - INFO - codeparrot_training - Step 3812: {'lr': 0.0004982439592552658, 'samples': 1952256, 'steps': 3812, 'loss/train': 2.861863851547241} +02/24/2022 06:30:18 - INFO - codeparrot_training - Step 3813: {'lr': 0.0004982420227610242, 'samples': 1952768, 'steps': 3813, 'loss/train': 2.92391300201416} +02/24/2022 06:30:23 - INFO - codeparrot_training - Step 3814: {'lr': 0.0004982400852033924, 'samples': 1953280, 'steps': 3814, 'loss/train': 3.3727493286132812} +02/24/2022 06:30:27 - INFO - codeparrot_training - Step 3815: {'lr': 0.000498238146582379, 'samples': 1953792, 'steps': 3815, 'loss/train': 2.72053861618042} +02/24/2022 06:30:32 - INFO - codeparrot_training - Step 3816: {'lr': 0.0004982362068979921, 'samples': 1954304, 'steps': 3816, 'loss/train': 3.413799524307251} +02/24/2022 06:30:36 - INFO - codeparrot_training - Step 3817: {'lr': 0.0004982342661502403, 'samples': 1954816, 'steps': 3817, 'loss/train': 2.691828966140747} +02/24/2022 06:30:41 - INFO - codeparrot_training - Step 3818: {'lr': 0.0004982323243391315, 'samples': 1955328, 'steps': 3818, 'loss/train': 3.3134384155273438} +02/24/2022 06:30:45 - INFO - codeparrot_training - Step 3819: {'lr': 0.0004982303814646745, 'samples': 1955840, 'steps': 3819, 'loss/train': 2.3300282955169678} +02/24/2022 06:30:50 - INFO - codeparrot_training - Step 3820: {'lr': 0.0004982284375268772, 'samples': 1956352, 'steps': 3820, 'loss/train': 2.090731143951416} +02/24/2022 06:30:54 - INFO - codeparrot_training - Step 3821: {'lr': 0.0004982264925257481, 'samples': 1956864, 'steps': 3821, 'loss/train': 3.1457366943359375} +02/24/2022 06:31:00 - INFO - codeparrot_training - Step 3822: {'lr': 0.0004982245464612955, 'samples': 1957376, 'steps': 3822, 'loss/train': 2.9802441596984863} +02/24/2022 06:31:03 - INFO - codeparrot_training - Step 3823: {'lr': 0.0004982225993335279, 'samples': 1957888, 'steps': 3823, 'loss/train': 3.245405435562134} +02/24/2022 06:31:09 - INFO - codeparrot_training - Step 3824: {'lr': 0.0004982206511424534, 'samples': 1958400, 'steps': 3824, 'loss/train': 3.7082931995391846} +02/24/2022 06:31:12 - INFO - codeparrot_training - Step 3825: {'lr': 0.0004982187018880805, 'samples': 1958912, 'steps': 3825, 'loss/train': 3.3702168464660645} +02/24/2022 06:31:18 - INFO - codeparrot_training - Step 3826: {'lr': 0.0004982167515704174, 'samples': 1959424, 'steps': 3826, 'loss/train': 2.24308705329895} +02/24/2022 06:31:21 - INFO - codeparrot_training - Step 3827: {'lr': 0.0004982148001894727, 'samples': 1959936, 'steps': 3827, 'loss/train': 3.251370668411255} +02/24/2022 06:31:27 - INFO - codeparrot_training - Step 3828: {'lr': 0.0004982128477452546, 'samples': 1960448, 'steps': 3828, 'loss/train': 2.9376633167266846} +02/24/2022 06:31:30 - INFO - codeparrot_training - Step 3829: {'lr': 0.0004982108942377713, 'samples': 1960960, 'steps': 3829, 'loss/train': 2.1045656204223633} +02/24/2022 06:31:36 - INFO - codeparrot_training - Step 3830: {'lr': 0.0004982089396670316, 'samples': 1961472, 'steps': 3830, 'loss/train': 5.327694892883301} +02/24/2022 06:31:39 - INFO - codeparrot_training - Step 3831: {'lr': 0.0004982069840330435, 'samples': 1961984, 'steps': 3831, 'loss/train': 3.1833877563476562} +02/24/2022 06:31:46 - INFO - codeparrot_training - Step 3832: {'lr': 0.0004982050273358154, 'samples': 1962496, 'steps': 3832, 'loss/train': 3.3765788078308105} +02/24/2022 06:31:49 - INFO - codeparrot_training - Step 3833: {'lr': 0.0004982030695753558, 'samples': 1963008, 'steps': 3833, 'loss/train': 3.984766960144043} +02/24/2022 06:31:52 - INFO - codeparrot_training - Step 3834: {'lr': 0.0004982011107516732, 'samples': 1963520, 'steps': 3834, 'loss/train': 3.2367210388183594} +02/24/2022 06:31:58 - INFO - codeparrot_training - Step 3835: {'lr': 0.0004981991508647757, 'samples': 1964032, 'steps': 3835, 'loss/train': 2.9056060314178467} +02/24/2022 06:32:01 - INFO - codeparrot_training - Step 3836: {'lr': 0.0004981971899146719, 'samples': 1964544, 'steps': 3836, 'loss/train': 2.7002134323120117} +02/24/2022 06:32:07 - INFO - codeparrot_training - Step 3837: {'lr': 0.0004981952279013702, 'samples': 1965056, 'steps': 3837, 'loss/train': 1.8907198905944824} +02/24/2022 06:32:13 - INFO - codeparrot_training - Step 3838: {'lr': 0.0004981932648248789, 'samples': 1965568, 'steps': 3838, 'loss/train': 3.1737170219421387} +02/24/2022 06:32:16 - INFO - codeparrot_training - Step 3839: {'lr': 0.0004981913006852065, 'samples': 1966080, 'steps': 3839, 'loss/train': 2.749114751815796} +02/24/2022 06:32:21 - INFO - codeparrot_training - Step 3840: {'lr': 0.0004981893354823614, 'samples': 1966592, 'steps': 3840, 'loss/train': 2.7215335369110107} +02/24/2022 06:32:25 - INFO - codeparrot_training - Step 3841: {'lr': 0.000498187369216352, 'samples': 1967104, 'steps': 3841, 'loss/train': 2.360414505004883} +02/24/2022 06:32:32 - INFO - codeparrot_training - Step 3842: {'lr': 0.0004981854018871867, 'samples': 1967616, 'steps': 3842, 'loss/train': 1.6500130891799927} +02/24/2022 06:32:35 - INFO - codeparrot_training - Step 3843: {'lr': 0.0004981834334948738, 'samples': 1968128, 'steps': 3843, 'loss/train': 4.215713977813721} +02/24/2022 06:32:41 - INFO - codeparrot_training - Step 3844: {'lr': 0.0004981814640394221, 'samples': 1968640, 'steps': 3844, 'loss/train': 3.047409772872925} +02/24/2022 06:32:44 - INFO - codeparrot_training - Step 3845: {'lr': 0.0004981794935208397, 'samples': 1969152, 'steps': 3845, 'loss/train': 2.663679838180542} +02/24/2022 06:32:50 - INFO - codeparrot_training - Step 3846: {'lr': 0.0004981775219391352, 'samples': 1969664, 'steps': 3846, 'loss/train': 2.523682117462158} +02/24/2022 06:32:53 - INFO - codeparrot_training - Step 3847: {'lr': 0.000498175549294317, 'samples': 1970176, 'steps': 3847, 'loss/train': 3.081430196762085} +02/24/2022 06:32:59 - INFO - codeparrot_training - Step 3848: {'lr': 0.0004981735755863934, 'samples': 1970688, 'steps': 3848, 'loss/train': 5.446176528930664} +02/24/2022 06:33:02 - INFO - codeparrot_training - Step 3849: {'lr': 0.0004981716008153732, 'samples': 1971200, 'steps': 3849, 'loss/train': 2.8046562671661377} +02/24/2022 06:33:08 - INFO - codeparrot_training - Step 3850: {'lr': 0.0004981696249812646, 'samples': 1971712, 'steps': 3850, 'loss/train': 2.7847628593444824} +02/24/2022 06:33:11 - INFO - codeparrot_training - Step 3851: {'lr': 0.0004981676480840761, 'samples': 1972224, 'steps': 3851, 'loss/train': 2.9150774478912354} +02/24/2022 06:33:17 - INFO - codeparrot_training - Step 3852: {'lr': 0.0004981656701238162, 'samples': 1972736, 'steps': 3852, 'loss/train': 1.747209072113037} +02/24/2022 06:33:20 - INFO - codeparrot_training - Step 3853: {'lr': 0.0004981636911004934, 'samples': 1973248, 'steps': 3853, 'loss/train': 2.875384569168091} +02/24/2022 06:33:25 - INFO - codeparrot_training - Step 3854: {'lr': 0.0004981617110141162, 'samples': 1973760, 'steps': 3854, 'loss/train': 2.2483558654785156} +02/24/2022 06:33:29 - INFO - codeparrot_training - Step 3855: {'lr': 0.000498159729864693, 'samples': 1974272, 'steps': 3855, 'loss/train': 2.3501102924346924} +02/24/2022 06:33:34 - INFO - codeparrot_training - Step 3856: {'lr': 0.0004981577476522323, 'samples': 1974784, 'steps': 3856, 'loss/train': 2.564826488494873} +02/24/2022 06:33:38 - INFO - codeparrot_training - Step 3857: {'lr': 0.0004981557643767426, 'samples': 1975296, 'steps': 3857, 'loss/train': 2.1798336505889893} +02/24/2022 06:33:45 - INFO - codeparrot_training - Step 3858: {'lr': 0.0004981537800382323, 'samples': 1975808, 'steps': 3858, 'loss/train': 2.6843316555023193} +02/24/2022 06:33:48 - INFO - codeparrot_training - Step 3859: {'lr': 0.0004981517946367102, 'samples': 1976320, 'steps': 3859, 'loss/train': 2.897378921508789} +02/24/2022 06:33:54 - INFO - codeparrot_training - Step 3860: {'lr': 0.0004981498081721845, 'samples': 1976832, 'steps': 3860, 'loss/train': 2.6063075065612793} +02/24/2022 06:33:57 - INFO - codeparrot_training - Step 3861: {'lr': 0.0004981478206446638, 'samples': 1977344, 'steps': 3861, 'loss/train': 4.317244529724121} +02/24/2022 06:34:03 - INFO - codeparrot_training - Step 3862: {'lr': 0.0004981458320541567, 'samples': 1977856, 'steps': 3862, 'loss/train': 0.46900343894958496} +02/24/2022 06:34:06 - INFO - codeparrot_training - Step 3863: {'lr': 0.0004981438424006716, 'samples': 1978368, 'steps': 3863, 'loss/train': 2.3372273445129395} +02/24/2022 06:34:12 - INFO - codeparrot_training - Step 3864: {'lr': 0.0004981418516842171, 'samples': 1978880, 'steps': 3864, 'loss/train': 3.0440077781677246} +02/24/2022 06:34:15 - INFO - codeparrot_training - Step 3865: {'lr': 0.0004981398599048018, 'samples': 1979392, 'steps': 3865, 'loss/train': 2.0230464935302734} +02/24/2022 06:34:20 - INFO - codeparrot_training - Step 3866: {'lr': 0.000498137867062434, 'samples': 1979904, 'steps': 3866, 'loss/train': 3.9015941619873047} +02/24/2022 06:34:24 - INFO - codeparrot_training - Step 3867: {'lr': 0.0004981358731571223, 'samples': 1980416, 'steps': 3867, 'loss/train': 2.2192654609680176} +02/24/2022 06:34:31 - INFO - codeparrot_training - Step 3868: {'lr': 0.0004981338781888755, 'samples': 1980928, 'steps': 3868, 'loss/train': 3.102003812789917} +02/24/2022 06:34:34 - INFO - codeparrot_training - Step 3869: {'lr': 0.0004981318821577018, 'samples': 1981440, 'steps': 3869, 'loss/train': 3.187795877456665} +02/24/2022 06:34:40 - INFO - codeparrot_training - Step 3870: {'lr': 0.00049812988506361, 'samples': 1981952, 'steps': 3870, 'loss/train': 2.7443923950195312} +02/24/2022 06:34:43 - INFO - codeparrot_training - Step 3871: {'lr': 0.0004981278869066085, 'samples': 1982464, 'steps': 3871, 'loss/train': 2.060483932495117} +02/24/2022 06:34:48 - INFO - codeparrot_training - Step 3872: {'lr': 0.000498125887686706, 'samples': 1982976, 'steps': 3872, 'loss/train': 2.1937296390533447} +02/24/2022 06:34:52 - INFO - codeparrot_training - Step 3873: {'lr': 0.0004981238874039109, 'samples': 1983488, 'steps': 3873, 'loss/train': 2.440709352493286} +02/24/2022 06:34:58 - INFO - codeparrot_training - Step 3874: {'lr': 0.0004981218860582319, 'samples': 1984000, 'steps': 3874, 'loss/train': 3.473883867263794} +02/24/2022 06:35:01 - INFO - codeparrot_training - Step 3875: {'lr': 0.0004981198836496775, 'samples': 1984512, 'steps': 3875, 'loss/train': 3.1413447856903076} +02/24/2022 06:35:06 - INFO - codeparrot_training - Step 3876: {'lr': 0.0004981178801782563, 'samples': 1985024, 'steps': 3876, 'loss/train': 1.9248684644699097} +02/24/2022 06:35:10 - INFO - codeparrot_training - Step 3877: {'lr': 0.000498115875643977, 'samples': 1985536, 'steps': 3877, 'loss/train': 1.1404633522033691} +02/24/2022 06:35:16 - INFO - codeparrot_training - Step 3878: {'lr': 0.0004981138700468479, 'samples': 1986048, 'steps': 3878, 'loss/train': 4.5774126052856445} +02/24/2022 06:35:22 - INFO - codeparrot_training - Step 3879: {'lr': 0.0004981118633868779, 'samples': 1986560, 'steps': 3879, 'loss/train': 2.8465540409088135} +02/24/2022 06:35:25 - INFO - codeparrot_training - Step 3880: {'lr': 0.0004981098556640755, 'samples': 1987072, 'steps': 3880, 'loss/train': 1.1227093935012817} +02/24/2022 06:35:31 - INFO - codeparrot_training - Step 3881: {'lr': 0.0004981078468784491, 'samples': 1987584, 'steps': 3881, 'loss/train': 2.5524864196777344} +02/24/2022 06:35:34 - INFO - codeparrot_training - Step 3882: {'lr': 0.0004981058370300076, 'samples': 1988096, 'steps': 3882, 'loss/train': 2.31843638420105} +02/24/2022 06:35:40 - INFO - codeparrot_training - Step 3883: {'lr': 0.0004981038261187594, 'samples': 1988608, 'steps': 3883, 'loss/train': 1.8953057527542114} +02/24/2022 06:35:43 - INFO - codeparrot_training - Step 3884: {'lr': 0.0004981018141447133, 'samples': 1989120, 'steps': 3884, 'loss/train': 1.1048979759216309} +02/24/2022 06:35:49 - INFO - codeparrot_training - Step 3885: {'lr': 0.0004980998011078776, 'samples': 1989632, 'steps': 3885, 'loss/train': 4.669902801513672} +02/24/2022 06:35:52 - INFO - codeparrot_training - Step 3886: {'lr': 0.0004980977870082613, 'samples': 1990144, 'steps': 3886, 'loss/train': 1.7005499601364136} +02/24/2022 06:35:58 - INFO - codeparrot_training - Step 3887: {'lr': 0.0004980957718458729, 'samples': 1990656, 'steps': 3887, 'loss/train': 3.654224395751953} +02/24/2022 06:36:01 - INFO - codeparrot_training - Step 3888: {'lr': 0.0004980937556207207, 'samples': 1991168, 'steps': 3888, 'loss/train': 2.9529154300689697} +02/24/2022 06:36:07 - INFO - codeparrot_training - Step 3889: {'lr': 0.0004980917383328139, 'samples': 1991680, 'steps': 3889, 'loss/train': 2.5530948638916016} +02/24/2022 06:36:11 - INFO - codeparrot_training - Step 3890: {'lr': 0.0004980897199821609, 'samples': 1992192, 'steps': 3890, 'loss/train': 3.3051698207855225} +02/24/2022 06:36:16 - INFO - codeparrot_training - Step 3891: {'lr': 0.0004980877005687701, 'samples': 1992704, 'steps': 3891, 'loss/train': 2.874004602432251} +02/24/2022 06:36:20 - INFO - codeparrot_training - Step 3892: {'lr': 0.0004980856800926506, 'samples': 1993216, 'steps': 3892, 'loss/train': 3.540424346923828} +02/24/2022 06:36:25 - INFO - codeparrot_training - Step 3893: {'lr': 0.0004980836585538107, 'samples': 1993728, 'steps': 3893, 'loss/train': 3.150780200958252} +02/24/2022 06:36:29 - INFO - codeparrot_training - Step 3894: {'lr': 0.0004980816359522592, 'samples': 1994240, 'steps': 3894, 'loss/train': 2.766648292541504} +02/24/2022 06:36:34 - INFO - codeparrot_training - Step 3895: {'lr': 0.0004980796122880048, 'samples': 1994752, 'steps': 3895, 'loss/train': 2.0121874809265137} +02/24/2022 06:36:38 - INFO - codeparrot_training - Step 3896: {'lr': 0.000498077587561056, 'samples': 1995264, 'steps': 3896, 'loss/train': 2.6432979106903076} +02/24/2022 06:36:43 - INFO - codeparrot_training - Step 3897: {'lr': 0.0004980755617714216, 'samples': 1995776, 'steps': 3897, 'loss/train': 3.193638563156128} +02/24/2022 06:36:47 - INFO - codeparrot_training - Step 3898: {'lr': 0.0004980735349191104, 'samples': 1996288, 'steps': 3898, 'loss/train': 2.426842212677002} +02/24/2022 06:36:52 - INFO - codeparrot_training - Step 3899: {'lr': 0.0004980715070041308, 'samples': 1996800, 'steps': 3899, 'loss/train': 2.622724771499634} +02/24/2022 06:36:55 - INFO - codeparrot_training - Step 3900: {'lr': 0.0004980694780264917, 'samples': 1997312, 'steps': 3900, 'loss/train': 4.063630104064941} +02/24/2022 06:37:01 - INFO - codeparrot_training - Step 3901: {'lr': 0.0004980674479862018, 'samples': 1997824, 'steps': 3901, 'loss/train': 3.0027968883514404} +02/24/2022 06:37:05 - INFO - codeparrot_training - Step 3902: {'lr': 0.0004980654168832697, 'samples': 1998336, 'steps': 3902, 'loss/train': 3.2706844806671143} +02/24/2022 06:37:12 - INFO - codeparrot_training - Step 3903: {'lr': 0.0004980633847177041, 'samples': 1998848, 'steps': 3903, 'loss/train': 2.9794161319732666} +02/24/2022 06:37:15 - INFO - codeparrot_training - Step 3904: {'lr': 0.0004980613514895135, 'samples': 1999360, 'steps': 3904, 'loss/train': 1.8317804336547852} +02/24/2022 06:37:19 - INFO - codeparrot_training - Step 3905: {'lr': 0.0004980593171987072, 'samples': 1999872, 'steps': 3905, 'loss/train': 2.109004020690918} +02/24/2022 06:37:24 - INFO - codeparrot_training - Step 3906: {'lr': 0.0004980572818452934, 'samples': 2000384, 'steps': 3906, 'loss/train': 1.9249424934387207} +02/24/2022 06:37:28 - INFO - codeparrot_training - Step 3907: {'lr': 0.0004980552454292809, 'samples': 2000896, 'steps': 3907, 'loss/train': 3.3942930698394775} +02/24/2022 06:37:33 - INFO - codeparrot_training - Step 3908: {'lr': 0.0004980532079506786, 'samples': 2001408, 'steps': 3908, 'loss/train': 1.7629261016845703} +02/24/2022 06:37:36 - INFO - codeparrot_training - Step 3909: {'lr': 0.0004980511694094951, 'samples': 2001920, 'steps': 3909, 'loss/train': 1.864051342010498} +02/24/2022 06:37:42 - INFO - codeparrot_training - Step 3910: {'lr': 0.0004980491298057392, 'samples': 2002432, 'steps': 3910, 'loss/train': 2.3030428886413574} +02/24/2022 06:37:46 - INFO - codeparrot_training - Step 3911: {'lr': 0.0004980470891394194, 'samples': 2002944, 'steps': 3911, 'loss/train': 3.462634563446045} +02/24/2022 06:37:51 - INFO - codeparrot_training - Step 3912: {'lr': 0.0004980450474105448, 'samples': 2003456, 'steps': 3912, 'loss/train': 2.4956939220428467} +02/24/2022 06:37:55 - INFO - codeparrot_training - Step 3913: {'lr': 0.000498043004619124, 'samples': 2003968, 'steps': 3913, 'loss/train': 2.379676103591919} +02/24/2022 06:38:01 - INFO - codeparrot_training - Step 3914: {'lr': 0.0004980409607651656, 'samples': 2004480, 'steps': 3914, 'loss/train': 1.6961394548416138} +02/24/2022 06:38:04 - INFO - codeparrot_training - Step 3915: {'lr': 0.0004980389158486786, 'samples': 2004992, 'steps': 3915, 'loss/train': 3.039517879486084} +02/24/2022 06:38:10 - INFO - codeparrot_training - Step 3916: {'lr': 0.0004980368698696716, 'samples': 2005504, 'steps': 3916, 'loss/train': 3.665147304534912} +02/24/2022 06:38:13 - INFO - codeparrot_training - Step 3917: {'lr': 0.0004980348228281534, 'samples': 2006016, 'steps': 3917, 'loss/train': 2.4664700031280518} +02/24/2022 06:38:19 - INFO - codeparrot_training - Step 3918: {'lr': 0.0004980327747241329, 'samples': 2006528, 'steps': 3918, 'loss/train': 4.039323329925537} +02/24/2022 06:38:22 - INFO - codeparrot_training - Step 3919: {'lr': 0.0004980307255576185, 'samples': 2007040, 'steps': 3919, 'loss/train': 3.323735475540161} +02/24/2022 06:38:28 - INFO - codeparrot_training - Step 3920: {'lr': 0.0004980286753286195, 'samples': 2007552, 'steps': 3920, 'loss/train': 3.0495383739471436} +02/24/2022 06:38:31 - INFO - codeparrot_training - Step 3921: {'lr': 0.0004980266240371443, 'samples': 2008064, 'steps': 3921, 'loss/train': 4.615071773529053} +02/24/2022 06:38:37 - INFO - codeparrot_training - Step 3922: {'lr': 0.0004980245716832018, 'samples': 2008576, 'steps': 3922, 'loss/train': 2.739562511444092} +02/24/2022 06:38:40 - INFO - codeparrot_training - Step 3923: {'lr': 0.0004980225182668008, 'samples': 2009088, 'steps': 3923, 'loss/train': 2.093564748764038} +02/24/2022 06:38:46 - INFO - codeparrot_training - Step 3924: {'lr': 0.00049802046378795, 'samples': 2009600, 'steps': 3924, 'loss/train': 3.1308810710906982} +02/24/2022 06:38:49 - INFO - codeparrot_training - Step 3925: {'lr': 0.0004980184082466583, 'samples': 2010112, 'steps': 3925, 'loss/train': 4.367579936981201} +02/24/2022 06:38:56 - INFO - codeparrot_training - Step 3926: {'lr': 0.0004980163516429346, 'samples': 2010624, 'steps': 3926, 'loss/train': 2.273790121078491} +02/24/2022 06:38:59 - INFO - codeparrot_training - Step 3927: {'lr': 0.0004980142939767876, 'samples': 2011136, 'steps': 3927, 'loss/train': 3.024556875228882} +02/24/2022 06:39:05 - INFO - codeparrot_training - Step 3928: {'lr': 0.000498012235248226, 'samples': 2011648, 'steps': 3928, 'loss/train': 2.704887866973877} +02/24/2022 06:39:08 - INFO - codeparrot_training - Step 3929: {'lr': 0.0004980101754572589, 'samples': 2012160, 'steps': 3929, 'loss/train': 1.8712055683135986} +02/24/2022 06:39:13 - INFO - codeparrot_training - Step 3930: {'lr': 0.0004980081146038948, 'samples': 2012672, 'steps': 3930, 'loss/train': 3.405402421951294} +02/24/2022 06:39:17 - INFO - codeparrot_training - Step 3931: {'lr': 0.0004980060526881429, 'samples': 2013184, 'steps': 3931, 'loss/train': 3.6324095726013184} +02/24/2022 06:39:23 - INFO - codeparrot_training - Step 3932: {'lr': 0.0004980039897100115, 'samples': 2013696, 'steps': 3932, 'loss/train': 3.024447202682495} +02/24/2022 06:39:26 - INFO - codeparrot_training - Step 3933: {'lr': 0.0004980019256695101, 'samples': 2014208, 'steps': 3933, 'loss/train': 5.835325241088867} +02/24/2022 06:39:32 - INFO - codeparrot_training - Step 3934: {'lr': 0.000497999860566647, 'samples': 2014720, 'steps': 3934, 'loss/train': 2.5996549129486084} +02/24/2022 06:39:35 - INFO - codeparrot_training - Step 3935: {'lr': 0.0004979977944014313, 'samples': 2015232, 'steps': 3935, 'loss/train': 2.718518018722534} +02/24/2022 06:39:41 - INFO - codeparrot_training - Step 3936: {'lr': 0.0004979957271738718, 'samples': 2015744, 'steps': 3936, 'loss/train': 1.3581740856170654} +02/24/2022 06:39:44 - INFO - codeparrot_training - Step 3937: {'lr': 0.0004979936588839773, 'samples': 2016256, 'steps': 3937, 'loss/train': 2.5434367656707764} +02/24/2022 06:39:50 - INFO - codeparrot_training - Step 3938: {'lr': 0.0004979915895317567, 'samples': 2016768, 'steps': 3938, 'loss/train': 2.3870959281921387} +02/24/2022 06:39:53 - INFO - codeparrot_training - Step 3939: {'lr': 0.000497989519117219, 'samples': 2017280, 'steps': 3939, 'loss/train': 2.086479425430298} +02/24/2022 06:39:59 - INFO - codeparrot_training - Step 3940: {'lr': 0.0004979874476403729, 'samples': 2017792, 'steps': 3940, 'loss/train': 2.3216536045074463} +02/24/2022 06:40:02 - INFO - codeparrot_training - Step 3941: {'lr': 0.0004979853751012273, 'samples': 2018304, 'steps': 3941, 'loss/train': 2.391488790512085} +02/24/2022 06:40:08 - INFO - codeparrot_training - Step 3942: {'lr': 0.0004979833014997911, 'samples': 2018816, 'steps': 3942, 'loss/train': 2.9961180686950684} +02/24/2022 06:40:11 - INFO - codeparrot_training - Step 3943: {'lr': 0.0004979812268360731, 'samples': 2019328, 'steps': 3943, 'loss/train': 2.9539763927459717} +02/24/2022 06:40:17 - INFO - codeparrot_training - Step 3944: {'lr': 0.0004979791511100823, 'samples': 2019840, 'steps': 3944, 'loss/train': 3.0451676845550537} +02/24/2022 06:40:20 - INFO - codeparrot_training - Step 3945: {'lr': 0.0004979770743218276, 'samples': 2020352, 'steps': 3945, 'loss/train': 1.821937918663025} +02/24/2022 06:40:26 - INFO - codeparrot_training - Step 3946: {'lr': 0.0004979749964713179, 'samples': 2020864, 'steps': 3946, 'loss/train': 2.286525249481201} +02/24/2022 06:40:29 - INFO - codeparrot_training - Step 3947: {'lr': 0.000497972917558562, 'samples': 2021376, 'steps': 3947, 'loss/train': 3.20574951171875} +02/24/2022 06:40:35 - INFO - codeparrot_training - Step 3948: {'lr': 0.0004979708375835688, 'samples': 2021888, 'steps': 3948, 'loss/train': 2.0780210494995117} +02/24/2022 06:40:38 - INFO - codeparrot_training - Step 3949: {'lr': 0.0004979687565463475, 'samples': 2022400, 'steps': 3949, 'loss/train': 3.068037748336792} +02/24/2022 06:40:45 - INFO - codeparrot_training - Step 3950: {'lr': 0.0004979666744469065, 'samples': 2022912, 'steps': 3950, 'loss/train': 2.231921911239624} +02/24/2022 06:40:48 - INFO - codeparrot_training - Step 3951: {'lr': 0.0004979645912852552, 'samples': 2023424, 'steps': 3951, 'loss/train': 1.9839613437652588} +02/24/2022 06:40:54 - INFO - codeparrot_training - Step 3952: {'lr': 0.0004979625070614022, 'samples': 2023936, 'steps': 3952, 'loss/train': 3.6623475551605225} +02/24/2022 06:40:57 - INFO - codeparrot_training - Step 3953: {'lr': 0.0004979604217753566, 'samples': 2024448, 'steps': 3953, 'loss/train': 1.5192588567733765} +02/24/2022 06:41:03 - INFO - codeparrot_training - Step 3954: {'lr': 0.0004979583354271273, 'samples': 2024960, 'steps': 3954, 'loss/train': 3.713426113128662} +02/24/2022 06:41:06 - INFO - codeparrot_training - Step 3955: {'lr': 0.0004979562480167232, 'samples': 2025472, 'steps': 3955, 'loss/train': 1.8236944675445557} +02/24/2022 06:41:12 - INFO - codeparrot_training - Step 3956: {'lr': 0.0004979541595441534, 'samples': 2025984, 'steps': 3956, 'loss/train': 1.8752506971359253} +02/24/2022 06:41:17 - INFO - codeparrot_training - Step 3957: {'lr': 0.0004979520700094265, 'samples': 2026496, 'steps': 3957, 'loss/train': 2.3672893047332764} +02/24/2022 06:41:21 - INFO - codeparrot_training - Step 3958: {'lr': 0.0004979499794125518, 'samples': 2027008, 'steps': 3958, 'loss/train': 3.0423362255096436} +02/24/2022 06:41:27 - INFO - codeparrot_training - Step 3959: {'lr': 0.0004979478877535382, 'samples': 2027520, 'steps': 3959, 'loss/train': 0.24859654903411865} +02/24/2022 06:41:31 - INFO - codeparrot_training - Step 3960: {'lr': 0.0004979457950323945, 'samples': 2028032, 'steps': 3960, 'loss/train': 3.2089052200317383} +02/24/2022 06:41:36 - INFO - codeparrot_training - Step 3961: {'lr': 0.0004979437012491297, 'samples': 2028544, 'steps': 3961, 'loss/train': 2.6857504844665527} +02/24/2022 06:41:40 - INFO - codeparrot_training - Step 3962: {'lr': 0.0004979416064037528, 'samples': 2029056, 'steps': 3962, 'loss/train': 2.964184522628784} +02/24/2022 06:41:45 - INFO - codeparrot_training - Step 3963: {'lr': 0.0004979395104962728, 'samples': 2029568, 'steps': 3963, 'loss/train': 2.4108879566192627} +02/24/2022 06:41:49 - INFO - codeparrot_training - Step 3964: {'lr': 0.0004979374135266987, 'samples': 2030080, 'steps': 3964, 'loss/train': 2.143505096435547} +02/24/2022 06:41:54 - INFO - codeparrot_training - Step 3965: {'lr': 0.0004979353154950394, 'samples': 2030592, 'steps': 3965, 'loss/train': 0.5235998630523682} +02/24/2022 06:41:58 - INFO - codeparrot_training - Step 3966: {'lr': 0.0004979332164013041, 'samples': 2031104, 'steps': 3966, 'loss/train': 1.6597570180892944} +02/24/2022 06:42:03 - INFO - codeparrot_training - Step 3967: {'lr': 0.0004979311162455015, 'samples': 2031616, 'steps': 3967, 'loss/train': 3.270280361175537} +02/24/2022 06:42:07 - INFO - codeparrot_training - Step 3968: {'lr': 0.0004979290150276407, 'samples': 2032128, 'steps': 3968, 'loss/train': 4.413817882537842} +02/24/2022 06:42:12 - INFO - codeparrot_training - Step 3969: {'lr': 0.0004979269127477308, 'samples': 2032640, 'steps': 3969, 'loss/train': 3.0075902938842773} +02/24/2022 06:42:16 - INFO - codeparrot_training - Step 3970: {'lr': 0.0004979248094057806, 'samples': 2033152, 'steps': 3970, 'loss/train': 1.8251111507415771} +02/24/2022 06:42:22 - INFO - codeparrot_training - Step 3971: {'lr': 0.0004979227050017994, 'samples': 2033664, 'steps': 3971, 'loss/train': 1.5707528591156006} +02/24/2022 06:42:26 - INFO - codeparrot_training - Step 3972: {'lr': 0.000497920599535796, 'samples': 2034176, 'steps': 3972, 'loss/train': 2.2323122024536133} +02/24/2022 06:42:31 - INFO - codeparrot_training - Step 3973: {'lr': 0.0004979184930077794, 'samples': 2034688, 'steps': 3973, 'loss/train': 1.9746849536895752} +02/24/2022 06:42:35 - INFO - codeparrot_training - Step 3974: {'lr': 0.0004979163854177588, 'samples': 2035200, 'steps': 3974, 'loss/train': 2.7524733543395996} +02/24/2022 06:42:40 - INFO - codeparrot_training - Step 3975: {'lr': 0.0004979142767657432, 'samples': 2035712, 'steps': 3975, 'loss/train': 3.455688714981079} +02/24/2022 06:42:44 - INFO - codeparrot_training - Step 3976: {'lr': 0.0004979121670517413, 'samples': 2036224, 'steps': 3976, 'loss/train': 2.497265100479126} +02/24/2022 06:42:49 - INFO - codeparrot_training - Step 3977: {'lr': 0.0004979100562757626, 'samples': 2036736, 'steps': 3977, 'loss/train': 2.372575521469116} +02/24/2022 06:42:53 - INFO - codeparrot_training - Step 3978: {'lr': 0.0004979079444378159, 'samples': 2037248, 'steps': 3978, 'loss/train': 3.387920618057251} +02/24/2022 06:42:58 - INFO - codeparrot_training - Step 3979: {'lr': 0.0004979058315379103, 'samples': 2037760, 'steps': 3979, 'loss/train': 2.5600380897521973} +02/24/2022 06:43:02 - INFO - codeparrot_training - Step 3980: {'lr': 0.0004979037175760548, 'samples': 2038272, 'steps': 3980, 'loss/train': 1.1275111436843872} +02/24/2022 06:43:08 - INFO - codeparrot_training - Step 3981: {'lr': 0.0004979016025522586, 'samples': 2038784, 'steps': 3981, 'loss/train': 5.364108085632324} +02/24/2022 06:43:11 - INFO - codeparrot_training - Step 3982: {'lr': 0.0004978994864665305, 'samples': 2039296, 'steps': 3982, 'loss/train': 3.3783178329467773} +02/24/2022 06:43:17 - INFO - codeparrot_training - Step 3983: {'lr': 0.0004978973693188797, 'samples': 2039808, 'steps': 3983, 'loss/train': 3.17097806930542} +02/24/2022 06:43:20 - INFO - codeparrot_training - Step 3984: {'lr': 0.0004978952511093155, 'samples': 2040320, 'steps': 3984, 'loss/train': 3.197195529937744} +02/24/2022 06:43:26 - INFO - codeparrot_training - Step 3985: {'lr': 0.0004978931318378465, 'samples': 2040832, 'steps': 3985, 'loss/train': 2.1542203426361084} +02/24/2022 06:43:29 - INFO - codeparrot_training - Step 3986: {'lr': 0.0004978910115044822, 'samples': 2041344, 'steps': 3986, 'loss/train': 1.6286121606826782} +02/24/2022 06:43:35 - INFO - codeparrot_training - Step 3987: {'lr': 0.0004978888901092315, 'samples': 2041856, 'steps': 3987, 'loss/train': 3.493614673614502} +02/24/2022 06:43:38 - INFO - codeparrot_training - Step 3988: {'lr': 0.0004978867676521035, 'samples': 2042368, 'steps': 3988, 'loss/train': 3.4249188899993896} +02/24/2022 06:43:44 - INFO - codeparrot_training - Step 3989: {'lr': 0.0004978846441331073, 'samples': 2042880, 'steps': 3989, 'loss/train': 3.6243088245391846} +02/24/2022 06:43:47 - INFO - codeparrot_training - Step 3990: {'lr': 0.000497882519552252, 'samples': 2043392, 'steps': 3990, 'loss/train': 2.1013896465301514} +02/24/2022 06:43:53 - INFO - codeparrot_training - Step 3991: {'lr': 0.0004978803939095466, 'samples': 2043904, 'steps': 3991, 'loss/train': 2.831549644470215} +02/24/2022 06:43:56 - INFO - codeparrot_training - Step 3992: {'lr': 0.0004978782672050004, 'samples': 2044416, 'steps': 3992, 'loss/train': 3.109680414199829} +02/24/2022 06:44:02 - INFO - codeparrot_training - Step 3993: {'lr': 0.0004978761394386224, 'samples': 2044928, 'steps': 3993, 'loss/train': 1.1819266080856323} +02/24/2022 06:44:05 - INFO - codeparrot_training - Step 3994: {'lr': 0.0004978740106104218, 'samples': 2045440, 'steps': 3994, 'loss/train': 3.6159698963165283} +02/24/2022 06:44:11 - INFO - codeparrot_training - Step 3995: {'lr': 0.0004978718807204076, 'samples': 2045952, 'steps': 3995, 'loss/train': 2.9805691242218018} +02/24/2022 06:44:14 - INFO - codeparrot_training - Step 3996: {'lr': 0.0004978697497685889, 'samples': 2046464, 'steps': 3996, 'loss/train': 3.5271191596984863} +02/24/2022 06:44:20 - INFO - codeparrot_training - Step 3997: {'lr': 0.0004978676177549749, 'samples': 2046976, 'steps': 3997, 'loss/train': 2.2478177547454834} +02/24/2022 06:44:24 - INFO - codeparrot_training - Step 3998: {'lr': 0.0004978654846795748, 'samples': 2047488, 'steps': 3998, 'loss/train': 2.3059802055358887} +02/24/2022 06:44:29 - INFO - codeparrot_training - Step 3999: {'lr': 0.0004978633505423976, 'samples': 2048000, 'steps': 3999, 'loss/train': 2.555680751800537} +02/24/2022 06:44:29 - INFO - codeparrot_training - Evaluating and saving model checkpoint