diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -25451,3 +25451,1009 @@ Use FP16 precision: False 02/25/2022 09:57:48 - INFO - codeparrot_training - Step 24998: {'lr': 0.0002663834370242611, 'samples': 12799488, 'steps': 24998, 'loss/train': 2.7944555282592773} 02/25/2022 09:57:52 - INFO - codeparrot_training - Step 24999: {'lr': 0.00026636710970095426, 'samples': 12800000, 'steps': 24999, 'loss/train': 0.8392263650894165} 02/25/2022 09:57:52 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/25/2022 09:58:09 - WARNING - huggingface_hub.repository - Several commits (25) will be pushed upstream. +02/25/2022 09:58:09 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/25/2022 09:58:43 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + 7a862ae..5549929 floral-grass-11 -> floral-grass-11 + +02/25/2022 09:58:50 - INFO - codeparrot_training - Step 25000: {'lr': 0.0002663507823075358, 'samples': 12800512, 'steps': 25000, 'loss/train': 1.160456895828247} +02/25/2022 09:58:53 - INFO - codeparrot_training - Step 25001: {'lr': 0.00026633445484407583, 'samples': 12801024, 'steps': 25001, 'loss/train': 2.628060817718506} +02/25/2022 09:59:01 - INFO - codeparrot_training - Step 25002: {'lr': 0.0002663181273106442, 'samples': 12801536, 'steps': 25002, 'loss/train': 1.6362329721450806} +02/25/2022 09:59:04 - INFO - codeparrot_training - Step 25003: {'lr': 0.00026630179970731094, 'samples': 12802048, 'steps': 25003, 'loss/train': 1.5520554780960083} +02/25/2022 09:59:10 - INFO - codeparrot_training - Step 25004: {'lr': 0.0002662854720341459, 'samples': 12802560, 'steps': 25004, 'loss/train': 0.42571038007736206} +02/25/2022 09:59:13 - INFO - codeparrot_training - Step 25005: {'lr': 0.00026626914429121913, 'samples': 12803072, 'steps': 25005, 'loss/train': 1.7985339164733887} +02/25/2022 09:59:19 - INFO - codeparrot_training - Step 25006: {'lr': 0.00026625281647860047, 'samples': 12803584, 'steps': 25006, 'loss/train': 2.1499977111816406} +02/25/2022 09:59:22 - INFO - codeparrot_training - Step 25007: {'lr': 0.0002662364885963599, 'samples': 12804096, 'steps': 25007, 'loss/train': 1.6239352226257324} +02/25/2022 09:59:28 - INFO - codeparrot_training - Step 25008: {'lr': 0.0002662201606445674, 'samples': 12804608, 'steps': 25008, 'loss/train': 1.7747437953948975} +02/25/2022 09:59:31 - INFO - codeparrot_training - Step 25009: {'lr': 0.00026620383262329286, 'samples': 12805120, 'steps': 25009, 'loss/train': 1.72773277759552} +02/25/2022 09:59:37 - INFO - codeparrot_training - Step 25010: {'lr': 0.0002661875045326062, 'samples': 12805632, 'steps': 25010, 'loss/train': 2.1759560108184814} +02/25/2022 09:59:40 - INFO - codeparrot_training - Step 25011: {'lr': 0.0002661711763725775, 'samples': 12806144, 'steps': 25011, 'loss/train': 1.2365885972976685} +02/25/2022 09:59:48 - INFO - codeparrot_training - Step 25012: {'lr': 0.0002661548481432766, 'samples': 12806656, 'steps': 25012, 'loss/train': 2.0251924991607666} +02/25/2022 09:59:51 - INFO - codeparrot_training - Step 25013: {'lr': 0.00026613851984477337, 'samples': 12807168, 'steps': 25013, 'loss/train': 1.984740972518921} +02/25/2022 09:59:57 - INFO - codeparrot_training - Step 25014: {'lr': 0.00026612219147713795, 'samples': 12807680, 'steps': 25014, 'loss/train': 2.4250524044036865} +02/25/2022 10:00:01 - INFO - codeparrot_training - Step 25015: {'lr': 0.00026610586304044015, 'samples': 12808192, 'steps': 25015, 'loss/train': 0.9071022272109985} +02/25/2022 10:00:06 - INFO - codeparrot_training - Step 25016: {'lr': 0.0002660895345347499, 'samples': 12808704, 'steps': 25016, 'loss/train': 1.991665005683899} +02/25/2022 10:00:10 - INFO - codeparrot_training - Step 25017: {'lr': 0.0002660732059601372, 'samples': 12809216, 'steps': 25017, 'loss/train': 2.069371461868286} +02/25/2022 10:00:15 - INFO - codeparrot_training - Step 25018: {'lr': 0.0002660568773166721, 'samples': 12809728, 'steps': 25018, 'loss/train': 2.193485975265503} +02/25/2022 10:00:19 - INFO - codeparrot_training - Step 25019: {'lr': 0.00026604054860442436, 'samples': 12810240, 'steps': 25019, 'loss/train': 2.2914202213287354} +02/25/2022 10:00:24 - INFO - codeparrot_training - Step 25020: {'lr': 0.000266024219823464, 'samples': 12810752, 'steps': 25020, 'loss/train': 1.6088972091674805} +02/25/2022 10:00:28 - INFO - codeparrot_training - Step 25021: {'lr': 0.00026600789097386095, 'samples': 12811264, 'steps': 25021, 'loss/train': 0.6779914498329163} +02/25/2022 10:00:35 - INFO - codeparrot_training - Step 25022: {'lr': 0.00026599156205568523, 'samples': 12811776, 'steps': 25022, 'loss/train': 1.6544021368026733} +02/25/2022 10:00:38 - INFO - codeparrot_training - Step 25023: {'lr': 0.00026597523306900675, 'samples': 12812288, 'steps': 25023, 'loss/train': 2.0362980365753174} +02/25/2022 10:00:44 - INFO - codeparrot_training - Step 25024: {'lr': 0.0002659589040138954, 'samples': 12812800, 'steps': 25024, 'loss/train': 1.6817017793655396} +02/25/2022 10:00:47 - INFO - codeparrot_training - Step 25025: {'lr': 0.00026594257489042115, 'samples': 12813312, 'steps': 25025, 'loss/train': 1.5721395015716553} +02/25/2022 10:00:53 - INFO - codeparrot_training - Step 25026: {'lr': 0.00026592624569865396, 'samples': 12813824, 'steps': 25026, 'loss/train': 2.136380672454834} +02/25/2022 10:00:56 - INFO - codeparrot_training - Step 25027: {'lr': 0.00026590991643866393, 'samples': 12814336, 'steps': 25027, 'loss/train': 1.866845965385437} +02/25/2022 10:01:02 - INFO - codeparrot_training - Step 25028: {'lr': 0.0002658935871105207, 'samples': 12814848, 'steps': 25028, 'loss/train': 2.767653226852417} +02/25/2022 10:01:05 - INFO - codeparrot_training - Step 25029: {'lr': 0.0002658772577142945, 'samples': 12815360, 'steps': 25029, 'loss/train': 0.9264286756515503} +02/25/2022 10:01:11 - INFO - codeparrot_training - Step 25030: {'lr': 0.0002658609282500551, 'samples': 12815872, 'steps': 25030, 'loss/train': 1.4567736387252808} +02/25/2022 10:01:14 - INFO - codeparrot_training - Step 25031: {'lr': 0.0002658445987178726, 'samples': 12816384, 'steps': 25031, 'loss/train': 2.3768463134765625} +02/25/2022 10:01:20 - INFO - codeparrot_training - Step 25032: {'lr': 0.00026582826911781675, 'samples': 12816896, 'steps': 25032, 'loss/train': 2.361173152923584} +02/25/2022 10:01:23 - INFO - codeparrot_training - Step 25033: {'lr': 0.0002658119394499577, 'samples': 12817408, 'steps': 25033, 'loss/train': 2.2039384841918945} +02/25/2022 10:01:29 - INFO - codeparrot_training - Step 25034: {'lr': 0.0002657956097143653, 'samples': 12817920, 'steps': 25034, 'loss/train': 2.3813908100128174} +02/25/2022 10:01:32 - INFO - codeparrot_training - Step 25035: {'lr': 0.0002657792799111095, 'samples': 12818432, 'steps': 25035, 'loss/train': 1.2311429977416992} +02/25/2022 10:01:40 - INFO - codeparrot_training - Step 25036: {'lr': 0.00026576295004026034, 'samples': 12818944, 'steps': 25036, 'loss/train': 0.7064680457115173} +02/25/2022 10:01:43 - INFO - codeparrot_training - Step 25037: {'lr': 0.00026574662010188767, 'samples': 12819456, 'steps': 25037, 'loss/train': 2.1231205463409424} +02/25/2022 10:01:49 - INFO - codeparrot_training - Step 25038: {'lr': 0.0002657302900960615, 'samples': 12819968, 'steps': 25038, 'loss/train': 1.8891308307647705} +02/25/2022 10:01:52 - INFO - codeparrot_training - Step 25039: {'lr': 0.00026571396002285174, 'samples': 12820480, 'steps': 25039, 'loss/train': 1.2767367362976074} +02/25/2022 10:01:58 - INFO - codeparrot_training - Step 25040: {'lr': 0.0002656976298823284, 'samples': 12820992, 'steps': 25040, 'loss/train': 0.625769853591919} +02/25/2022 10:02:02 - INFO - codeparrot_training - Step 25041: {'lr': 0.00026568129967456135, 'samples': 12821504, 'steps': 25041, 'loss/train': 2.32356595993042} +02/25/2022 10:02:07 - INFO - codeparrot_training - Step 25042: {'lr': 0.0002656649693996206, 'samples': 12822016, 'steps': 25042, 'loss/train': 1.1074837446212769} +02/25/2022 10:02:11 - INFO - codeparrot_training - Step 25043: {'lr': 0.00026564863905757606, 'samples': 12822528, 'steps': 25043, 'loss/train': 2.6152665615081787} +02/25/2022 10:02:16 - INFO - codeparrot_training - Step 25044: {'lr': 0.00026563230864849784, 'samples': 12823040, 'steps': 25044, 'loss/train': 2.2992584705352783} +02/25/2022 10:02:20 - INFO - codeparrot_training - Step 25045: {'lr': 0.0002656159781724557, 'samples': 12823552, 'steps': 25045, 'loss/train': 1.691097617149353} +02/25/2022 10:02:25 - INFO - codeparrot_training - Step 25046: {'lr': 0.0002655996476295196, 'samples': 12824064, 'steps': 25046, 'loss/train': 0.944744884967804} +02/25/2022 10:02:29 - INFO - codeparrot_training - Step 25047: {'lr': 0.0002655833170197597, 'samples': 12824576, 'steps': 25047, 'loss/train': 1.5726711750030518} +02/25/2022 10:02:36 - INFO - codeparrot_training - Step 25048: {'lr': 0.0002655669863432457, 'samples': 12825088, 'steps': 25048, 'loss/train': 2.2993552684783936} +02/25/2022 10:02:40 - INFO - codeparrot_training - Step 25049: {'lr': 0.00026555065560004775, 'samples': 12825600, 'steps': 25049, 'loss/train': 1.6567405462265015} +02/25/2022 10:02:46 - INFO - codeparrot_training - Step 25050: {'lr': 0.0002655343247902357, 'samples': 12826112, 'steps': 25050, 'loss/train': 2.1947453022003174} +02/25/2022 10:02:49 - INFO - codeparrot_training - Step 25051: {'lr': 0.00026551799391387953, 'samples': 12826624, 'steps': 25051, 'loss/train': 1.6891319751739502} +02/25/2022 10:02:52 - INFO - codeparrot_training - Step 25052: {'lr': 0.0002655016629710492, 'samples': 12827136, 'steps': 25052, 'loss/train': 2.944282293319702} +02/25/2022 10:02:58 - INFO - codeparrot_training - Step 25053: {'lr': 0.0002654853319618147, 'samples': 12827648, 'steps': 25053, 'loss/train': 0.4813360571861267} +02/25/2022 10:03:02 - INFO - codeparrot_training - Step 25054: {'lr': 0.00026546900088624594, 'samples': 12828160, 'steps': 25054, 'loss/train': 3.6540863513946533} +02/25/2022 10:03:07 - INFO - codeparrot_training - Step 25055: {'lr': 0.0002654526697444129, 'samples': 12828672, 'steps': 25055, 'loss/train': 1.298710584640503} +02/25/2022 10:03:11 - INFO - codeparrot_training - Step 25056: {'lr': 0.0002654363385363855, 'samples': 12829184, 'steps': 25056, 'loss/train': 1.1219193935394287} +02/25/2022 10:03:17 - INFO - codeparrot_training - Step 25057: {'lr': 0.0002654200072622337, 'samples': 12829696, 'steps': 25057, 'loss/train': 1.8067675828933716} +02/25/2022 10:03:20 - INFO - codeparrot_training - Step 25058: {'lr': 0.00026540367592202754, 'samples': 12830208, 'steps': 25058, 'loss/train': 1.7024645805358887} +02/25/2022 10:03:24 - INFO - codeparrot_training - Step 25059: {'lr': 0.0002653873445158369, 'samples': 12830720, 'steps': 25059, 'loss/train': 2.1636903285980225} +02/25/2022 10:03:31 - INFO - codeparrot_training - Step 25060: {'lr': 0.0002653710130437318, 'samples': 12831232, 'steps': 25060, 'loss/train': 2.718723773956299} +02/25/2022 10:03:35 - INFO - codeparrot_training - Step 25061: {'lr': 0.0002653546815057821, 'samples': 12831744, 'steps': 25061, 'loss/train': 2.3811492919921875} +02/25/2022 10:03:40 - INFO - codeparrot_training - Step 25062: {'lr': 0.0002653383499020579, 'samples': 12832256, 'steps': 25062, 'loss/train': 8.567477226257324} +02/25/2022 10:03:44 - INFO - codeparrot_training - Step 25063: {'lr': 0.00026532201823262897, 'samples': 12832768, 'steps': 25063, 'loss/train': 1.0298140048980713} +02/25/2022 10:03:49 - INFO - codeparrot_training - Step 25064: {'lr': 0.00026530568649756547, 'samples': 12833280, 'steps': 25064, 'loss/train': 1.4718226194381714} +02/25/2022 10:03:53 - INFO - codeparrot_training - Step 25065: {'lr': 0.0002652893546969373, 'samples': 12833792, 'steps': 25065, 'loss/train': 2.306047201156616} +02/25/2022 10:03:58 - INFO - codeparrot_training - Step 25066: {'lr': 0.0002652730228308143, 'samples': 12834304, 'steps': 25066, 'loss/train': 2.2164902687072754} +02/25/2022 10:04:02 - INFO - codeparrot_training - Step 25067: {'lr': 0.00026525669089926657, 'samples': 12834816, 'steps': 25067, 'loss/train': 2.468912363052368} +02/25/2022 10:04:07 - INFO - codeparrot_training - Step 25068: {'lr': 0.00026524035890236397, 'samples': 12835328, 'steps': 25068, 'loss/train': 1.9229764938354492} +02/25/2022 10:04:11 - INFO - codeparrot_training - Step 25069: {'lr': 0.0002652240268401766, 'samples': 12835840, 'steps': 25069, 'loss/train': 2.1440024375915527} +02/25/2022 10:04:18 - INFO - codeparrot_training - Step 25070: {'lr': 0.00026520769471277423, 'samples': 12836352, 'steps': 25070, 'loss/train': 2.160217523574829} +02/25/2022 10:04:21 - INFO - codeparrot_training - Step 25071: {'lr': 0.000265191362520227, 'samples': 12836864, 'steps': 25071, 'loss/train': 1.2895935773849487} +02/25/2022 10:04:27 - INFO - codeparrot_training - Step 25072: {'lr': 0.00026517503026260474, 'samples': 12837376, 'steps': 25072, 'loss/train': 1.391907811164856} +02/25/2022 10:04:30 - INFO - codeparrot_training - Step 25073: {'lr': 0.00026515869793997755, 'samples': 12837888, 'steps': 25073, 'loss/train': 2.3654892444610596} +02/25/2022 10:04:36 - INFO - codeparrot_training - Step 25074: {'lr': 0.0002651423655524152, 'samples': 12838400, 'steps': 25074, 'loss/train': 1.4736377000808716} +02/25/2022 10:04:40 - INFO - codeparrot_training - Step 25075: {'lr': 0.00026512603309998783, 'samples': 12838912, 'steps': 25075, 'loss/train': 2.848262071609497} +02/25/2022 10:04:45 - INFO - codeparrot_training - Step 25076: {'lr': 0.00026510970058276533, 'samples': 12839424, 'steps': 25076, 'loss/train': 1.776846170425415} +02/25/2022 10:04:49 - INFO - codeparrot_training - Step 25077: {'lr': 0.00026509336800081774, 'samples': 12839936, 'steps': 25077, 'loss/train': 4.0198974609375} +02/25/2022 10:04:54 - INFO - codeparrot_training - Step 25078: {'lr': 0.00026507703535421485, 'samples': 12840448, 'steps': 25078, 'loss/train': 0.8756927847862244} +02/25/2022 10:04:58 - INFO - codeparrot_training - Step 25079: {'lr': 0.0002650607026430268, 'samples': 12840960, 'steps': 25079, 'loss/train': 1.9865827560424805} +02/25/2022 10:05:03 - INFO - codeparrot_training - Step 25080: {'lr': 0.0002650443698673234, 'samples': 12841472, 'steps': 25080, 'loss/train': 1.4674131870269775} +02/25/2022 10:05:07 - INFO - codeparrot_training - Step 25081: {'lr': 0.0002650280370271747, 'samples': 12841984, 'steps': 25081, 'loss/train': 1.5579302310943604} +02/25/2022 10:05:13 - INFO - codeparrot_training - Step 25082: {'lr': 0.00026501170412265076, 'samples': 12842496, 'steps': 25082, 'loss/train': 1.2404475212097168} +02/25/2022 10:05:16 - INFO - codeparrot_training - Step 25083: {'lr': 0.00026499537115382135, 'samples': 12843008, 'steps': 25083, 'loss/train': 1.9711076021194458} +02/25/2022 10:05:19 - INFO - codeparrot_training - Step 25084: {'lr': 0.0002649790381207566, 'samples': 12843520, 'steps': 25084, 'loss/train': 0.4490146338939667} +02/25/2022 10:05:27 - INFO - codeparrot_training - Step 25085: {'lr': 0.0002649627050235263, 'samples': 12844032, 'steps': 25085, 'loss/train': 2.262723922729492} +02/25/2022 10:05:30 - INFO - codeparrot_training - Step 25086: {'lr': 0.00026494637186220065, 'samples': 12844544, 'steps': 25086, 'loss/train': 1.8037751913070679} +02/25/2022 10:05:36 - INFO - codeparrot_training - Step 25087: {'lr': 0.00026493003863684944, 'samples': 12845056, 'steps': 25087, 'loss/train': 2.374884605407715} +02/25/2022 10:05:39 - INFO - codeparrot_training - Step 25088: {'lr': 0.0002649137053475427, 'samples': 12845568, 'steps': 25088, 'loss/train': 4.739823818206787} +02/25/2022 10:05:45 - INFO - codeparrot_training - Step 25089: {'lr': 0.00026489737199435036, 'samples': 12846080, 'steps': 25089, 'loss/train': 1.5920213460922241} +02/25/2022 10:05:50 - INFO - codeparrot_training - Step 25090: {'lr': 0.0002648810385773424, 'samples': 12846592, 'steps': 25090, 'loss/train': 2.736701726913452} +02/25/2022 10:05:54 - INFO - codeparrot_training - Step 25091: {'lr': 0.0002648647050965888, 'samples': 12847104, 'steps': 25091, 'loss/train': 1.961225986480713} +02/25/2022 10:05:59 - INFO - codeparrot_training - Step 25092: {'lr': 0.00026484837155215957, 'samples': 12847616, 'steps': 25092, 'loss/train': 1.0222641229629517} +02/25/2022 10:06:03 - INFO - codeparrot_training - Step 25093: {'lr': 0.00026483203794412457, 'samples': 12848128, 'steps': 25093, 'loss/train': 1.9658944606781006} +02/25/2022 10:06:08 - INFO - codeparrot_training - Step 25094: {'lr': 0.00026481570427255385, 'samples': 12848640, 'steps': 25094, 'loss/train': 1.1277885437011719} +02/25/2022 10:06:12 - INFO - codeparrot_training - Step 25095: {'lr': 0.0002647993705375174, 'samples': 12849152, 'steps': 25095, 'loss/train': 2.2916805744171143} +02/25/2022 10:06:19 - INFO - codeparrot_training - Step 25096: {'lr': 0.00026478303673908507, 'samples': 12849664, 'steps': 25096, 'loss/train': 1.7349023818969727} +02/25/2022 10:06:23 - INFO - codeparrot_training - Step 25097: {'lr': 0.00026476670287732697, 'samples': 12850176, 'steps': 25097, 'loss/train': 2.313267707824707} +02/25/2022 10:06:28 - INFO - codeparrot_training - Step 25098: {'lr': 0.000264750368952313, 'samples': 12850688, 'steps': 25098, 'loss/train': 2.211817741394043} +02/25/2022 10:06:32 - INFO - codeparrot_training - Step 25099: {'lr': 0.00026473403496411307, 'samples': 12851200, 'steps': 25099, 'loss/train': 1.7130725383758545} +02/25/2022 10:06:37 - INFO - codeparrot_training - Step 25100: {'lr': 0.00026471770091279724, 'samples': 12851712, 'steps': 25100, 'loss/train': 2.3953895568847656} +02/25/2022 10:06:41 - INFO - codeparrot_training - Step 25101: {'lr': 0.0002647013667984355, 'samples': 12852224, 'steps': 25101, 'loss/train': 2.012718915939331} +02/25/2022 10:06:46 - INFO - codeparrot_training - Step 25102: {'lr': 0.00026468503262109774, 'samples': 12852736, 'steps': 25102, 'loss/train': 1.41632080078125} +02/25/2022 10:06:50 - INFO - codeparrot_training - Step 25103: {'lr': 0.00026466869838085393, 'samples': 12853248, 'steps': 25103, 'loss/train': 2.2961106300354004} +02/25/2022 10:06:55 - INFO - codeparrot_training - Step 25104: {'lr': 0.0002646523640777741, 'samples': 12853760, 'steps': 25104, 'loss/train': 1.8013173341751099} +02/25/2022 10:06:58 - INFO - codeparrot_training - Step 25105: {'lr': 0.00026463602971192824, 'samples': 12854272, 'steps': 25105, 'loss/train': 1.9846426248550415} +02/25/2022 10:07:06 - INFO - codeparrot_training - Step 25106: {'lr': 0.0002646196952833862, 'samples': 12854784, 'steps': 25106, 'loss/train': 2.1611316204071045} +02/25/2022 10:07:09 - INFO - codeparrot_training - Step 25107: {'lr': 0.0002646033607922181, 'samples': 12855296, 'steps': 25107, 'loss/train': 1.252687931060791} +02/25/2022 10:07:15 - INFO - codeparrot_training - Step 25108: {'lr': 0.0002645870262384938, 'samples': 12855808, 'steps': 25108, 'loss/train': 1.8214967250823975} +02/25/2022 10:07:18 - INFO - codeparrot_training - Step 25109: {'lr': 0.00026457069162228337, 'samples': 12856320, 'steps': 25109, 'loss/train': 1.959747076034546} +02/25/2022 10:07:24 - INFO - codeparrot_training - Step 25110: {'lr': 0.0002645543569436566, 'samples': 12856832, 'steps': 25110, 'loss/train': 2.5783262252807617} +02/25/2022 10:07:27 - INFO - codeparrot_training - Step 25111: {'lr': 0.0002645380222026837, 'samples': 12857344, 'steps': 25111, 'loss/train': 2.056340217590332} +02/25/2022 10:07:33 - INFO - codeparrot_training - Step 25112: {'lr': 0.0002645216873994345, 'samples': 12857856, 'steps': 25112, 'loss/train': 0.7776860594749451} +02/25/2022 10:07:36 - INFO - codeparrot_training - Step 25113: {'lr': 0.00026450535253397895, 'samples': 12858368, 'steps': 25113, 'loss/train': 1.2803047895431519} +02/25/2022 10:07:42 - INFO - codeparrot_training - Step 25114: {'lr': 0.0002644890176063872, 'samples': 12858880, 'steps': 25114, 'loss/train': 0.702643096446991} +02/25/2022 10:07:45 - INFO - codeparrot_training - Step 25115: {'lr': 0.00026447268261672895, 'samples': 12859392, 'steps': 25115, 'loss/train': 2.390354633331299} +02/25/2022 10:07:51 - INFO - codeparrot_training - Step 25116: {'lr': 0.0002644563475650744, 'samples': 12859904, 'steps': 25116, 'loss/train': 0.7148985266685486} +02/25/2022 10:07:54 - INFO - codeparrot_training - Step 25117: {'lr': 0.0002644400124514934, 'samples': 12860416, 'steps': 25117, 'loss/train': 1.1707137823104858} +02/25/2022 10:08:00 - INFO - codeparrot_training - Step 25118: {'lr': 0.000264423677276056, 'samples': 12860928, 'steps': 25118, 'loss/train': 1.7243993282318115} +02/25/2022 10:08:03 - INFO - codeparrot_training - Step 25119: {'lr': 0.0002644073420388322, 'samples': 12861440, 'steps': 25119, 'loss/train': 1.88140070438385} +02/25/2022 10:08:09 - INFO - codeparrot_training - Step 25120: {'lr': 0.00026439100673989184, 'samples': 12861952, 'steps': 25120, 'loss/train': 0.8988780975341797} +02/25/2022 10:08:12 - INFO - codeparrot_training - Step 25121: {'lr': 0.00026437467137930507, 'samples': 12862464, 'steps': 25121, 'loss/train': 1.7940598726272583} +02/25/2022 10:08:20 - INFO - codeparrot_training - Step 25122: {'lr': 0.00026435833595714163, 'samples': 12862976, 'steps': 25122, 'loss/train': 1.2678987979888916} +02/25/2022 10:08:23 - INFO - codeparrot_training - Step 25123: {'lr': 0.0002643420004734718, 'samples': 12863488, 'steps': 25123, 'loss/train': 2.009763479232788} +02/25/2022 10:08:29 - INFO - codeparrot_training - Step 25124: {'lr': 0.00026432566492836523, 'samples': 12864000, 'steps': 25124, 'loss/train': 1.8347705602645874} +02/25/2022 10:08:32 - INFO - codeparrot_training - Step 25125: {'lr': 0.00026430932932189225, 'samples': 12864512, 'steps': 25125, 'loss/train': 1.8948403596878052} +02/25/2022 10:08:38 - INFO - codeparrot_training - Step 25126: {'lr': 0.0002642929936541225, 'samples': 12865024, 'steps': 25126, 'loss/train': 2.1912829875946045} +02/25/2022 10:08:41 - INFO - codeparrot_training - Step 25127: {'lr': 0.00026427665792512614, 'samples': 12865536, 'steps': 25127, 'loss/train': 2.6858644485473633} +02/25/2022 10:08:47 - INFO - codeparrot_training - Step 25128: {'lr': 0.0002642603221349731, 'samples': 12866048, 'steps': 25128, 'loss/train': 1.4050952196121216} +02/25/2022 10:08:50 - INFO - codeparrot_training - Step 25129: {'lr': 0.0002642439862837334, 'samples': 12866560, 'steps': 25129, 'loss/train': 1.9154839515686035} +02/25/2022 10:08:56 - INFO - codeparrot_training - Step 25130: {'lr': 0.00026422765037147696, 'samples': 12867072, 'steps': 25130, 'loss/train': 2.706439971923828} +02/25/2022 10:09:00 - INFO - codeparrot_training - Step 25131: {'lr': 0.0002642113143982738, 'samples': 12867584, 'steps': 25131, 'loss/train': 1.623214840888977} +02/25/2022 10:09:07 - INFO - codeparrot_training - Step 25132: {'lr': 0.0002641949783641939, 'samples': 12868096, 'steps': 25132, 'loss/train': 2.114255428314209} +02/25/2022 10:09:10 - INFO - codeparrot_training - Step 25133: {'lr': 0.00026417864226930713, 'samples': 12868608, 'steps': 25133, 'loss/train': 2.5305700302124023} +02/25/2022 10:09:16 - INFO - codeparrot_training - Step 25134: {'lr': 0.00026416230611368363, 'samples': 12869120, 'steps': 25134, 'loss/train': 1.8685394525527954} +02/25/2022 10:09:19 - INFO - codeparrot_training - Step 25135: {'lr': 0.00026414596989739333, 'samples': 12869632, 'steps': 25135, 'loss/train': 2.091590166091919} +02/25/2022 10:09:25 - INFO - codeparrot_training - Step 25136: {'lr': 0.0002641296336205062, 'samples': 12870144, 'steps': 25136, 'loss/train': 1.804237723350525} +02/25/2022 10:09:28 - INFO - codeparrot_training - Step 25137: {'lr': 0.0002641132972830921, 'samples': 12870656, 'steps': 25137, 'loss/train': 1.5900814533233643} +02/25/2022 10:09:34 - INFO - codeparrot_training - Step 25138: {'lr': 0.00026409696088522123, 'samples': 12871168, 'steps': 25138, 'loss/train': 1.7084108591079712} +02/25/2022 10:09:37 - INFO - codeparrot_training - Step 25139: {'lr': 0.00026408062442696335, 'samples': 12871680, 'steps': 25139, 'loss/train': 1.98611319065094} +02/25/2022 10:09:43 - INFO - codeparrot_training - Step 25140: {'lr': 0.00026406428790838864, 'samples': 12872192, 'steps': 25140, 'loss/train': 2.0422236919403076} +02/25/2022 10:09:46 - INFO - codeparrot_training - Step 25141: {'lr': 0.00026404795132956694, 'samples': 12872704, 'steps': 25141, 'loss/train': 1.176757574081421} +02/25/2022 10:09:53 - INFO - codeparrot_training - Step 25142: {'lr': 0.00026403161469056827, 'samples': 12873216, 'steps': 25142, 'loss/train': 0.999722957611084} +02/25/2022 10:09:57 - INFO - codeparrot_training - Step 25143: {'lr': 0.0002640152779914626, 'samples': 12873728, 'steps': 25143, 'loss/train': 2.1472854614257812} +02/25/2022 10:10:02 - INFO - codeparrot_training - Step 25144: {'lr': 0.00026399894123232, 'samples': 12874240, 'steps': 25144, 'loss/train': 1.7768681049346924} +02/25/2022 10:10:06 - INFO - codeparrot_training - Step 25145: {'lr': 0.00026398260441321035, 'samples': 12874752, 'steps': 25145, 'loss/train': 2.6299691200256348} +02/25/2022 10:10:11 - INFO - codeparrot_training - Step 25146: {'lr': 0.0002639662675342036, 'samples': 12875264, 'steps': 25146, 'loss/train': 1.055228590965271} +02/25/2022 10:10:15 - INFO - codeparrot_training - Step 25147: {'lr': 0.00026394993059536995, 'samples': 12875776, 'steps': 25147, 'loss/train': 6.421290397644043} +02/25/2022 10:10:20 - INFO - codeparrot_training - Step 25148: {'lr': 0.00026393359359677904, 'samples': 12876288, 'steps': 25148, 'loss/train': 1.5929527282714844} +02/25/2022 10:10:24 - INFO - codeparrot_training - Step 25149: {'lr': 0.0002639172565385012, 'samples': 12876800, 'steps': 25149, 'loss/train': 1.9797353744506836} +02/25/2022 10:10:29 - INFO - codeparrot_training - Step 25150: {'lr': 0.00026390091942060613, 'samples': 12877312, 'steps': 25150, 'loss/train': 0.9531596302986145} +02/25/2022 10:10:33 - INFO - codeparrot_training - Step 25151: {'lr': 0.00026388458224316404, 'samples': 12877824, 'steps': 25151, 'loss/train': 1.5998244285583496} +02/25/2022 10:10:40 - INFO - codeparrot_training - Step 25152: {'lr': 0.0002638682450062448, 'samples': 12878336, 'steps': 25152, 'loss/train': 2.1040232181549072} +02/25/2022 10:10:44 - INFO - codeparrot_training - Step 25153: {'lr': 0.0002638519077099183, 'samples': 12878848, 'steps': 25153, 'loss/train': 2.62042498588562} +02/25/2022 10:10:49 - INFO - codeparrot_training - Step 25154: {'lr': 0.0002638355703542547, 'samples': 12879360, 'steps': 25154, 'loss/train': 3.3423335552215576} +02/25/2022 10:10:53 - INFO - codeparrot_training - Step 25155: {'lr': 0.00026381923293932385, 'samples': 12879872, 'steps': 25155, 'loss/train': 1.4967269897460938} +02/25/2022 10:10:58 - INFO - codeparrot_training - Step 25156: {'lr': 0.0002638028954651959, 'samples': 12880384, 'steps': 25156, 'loss/train': 0.7448533177375793} +02/25/2022 10:11:02 - INFO - codeparrot_training - Step 25157: {'lr': 0.0002637865579319406, 'samples': 12880896, 'steps': 25157, 'loss/train': 2.8340001106262207} +02/25/2022 10:11:07 - INFO - codeparrot_training - Step 25158: {'lr': 0.0002637702203396282, 'samples': 12881408, 'steps': 25158, 'loss/train': 1.175994873046875} +02/25/2022 10:11:11 - INFO - codeparrot_training - Step 25159: {'lr': 0.00026375388268832844, 'samples': 12881920, 'steps': 25159, 'loss/train': 1.030189871788025} +02/25/2022 10:11:16 - INFO - codeparrot_training - Step 25160: {'lr': 0.00026373754497811147, 'samples': 12882432, 'steps': 25160, 'loss/train': 2.870720148086548} +02/25/2022 10:11:19 - INFO - codeparrot_training - Step 25161: {'lr': 0.0002637212072090472, 'samples': 12882944, 'steps': 25161, 'loss/train': 2.220973014831543} +02/25/2022 10:11:25 - INFO - codeparrot_training - Step 25162: {'lr': 0.0002637048693812056, 'samples': 12883456, 'steps': 25162, 'loss/train': 1.8070130348205566} +02/25/2022 10:11:29 - INFO - codeparrot_training - Step 25163: {'lr': 0.00026368853149465676, 'samples': 12883968, 'steps': 25163, 'loss/train': 0.9473209977149963} +02/25/2022 10:11:35 - INFO - codeparrot_training - Step 25164: {'lr': 0.00026367219354947056, 'samples': 12884480, 'steps': 25164, 'loss/train': 2.1047441959381104} +02/25/2022 10:11:38 - INFO - codeparrot_training - Step 25165: {'lr': 0.000263655855545717, 'samples': 12884992, 'steps': 25165, 'loss/train': 0.9830390810966492} +02/25/2022 10:11:44 - INFO - codeparrot_training - Step 25166: {'lr': 0.00026363951748346615, 'samples': 12885504, 'steps': 25166, 'loss/train': 0.845406711101532} +02/25/2022 10:11:47 - INFO - codeparrot_training - Step 25167: {'lr': 0.0002636231793627879, 'samples': 12886016, 'steps': 25167, 'loss/train': 1.1542160511016846} +02/25/2022 10:11:55 - INFO - codeparrot_training - Step 25168: {'lr': 0.00026360684118375226, 'samples': 12886528, 'steps': 25168, 'loss/train': 1.5078092813491821} +02/25/2022 10:11:58 - INFO - codeparrot_training - Step 25169: {'lr': 0.0002635905029464293, 'samples': 12887040, 'steps': 25169, 'loss/train': 2.3469903469085693} +02/25/2022 10:12:04 - INFO - codeparrot_training - Step 25170: {'lr': 0.0002635741646508889, 'samples': 12887552, 'steps': 25170, 'loss/train': 1.9248242378234863} +02/25/2022 10:12:07 - INFO - codeparrot_training - Step 25171: {'lr': 0.0002635578262972011, 'samples': 12888064, 'steps': 25171, 'loss/train': 2.318403959274292} +02/25/2022 10:12:13 - INFO - codeparrot_training - Step 25172: {'lr': 0.0002635414878854359, 'samples': 12888576, 'steps': 25172, 'loss/train': 2.0437896251678467} +02/25/2022 10:12:16 - INFO - codeparrot_training - Step 25173: {'lr': 0.0002635251494156632, 'samples': 12889088, 'steps': 25173, 'loss/train': 1.870969533920288} +02/25/2022 10:12:22 - INFO - codeparrot_training - Step 25174: {'lr': 0.00026350881088795306, 'samples': 12889600, 'steps': 25174, 'loss/train': 1.3610689640045166} +02/25/2022 10:12:25 - INFO - codeparrot_training - Step 25175: {'lr': 0.0002634924723023755, 'samples': 12890112, 'steps': 25175, 'loss/train': 0.3429269790649414} +02/25/2022 10:12:29 - INFO - codeparrot_training - Step 25176: {'lr': 0.0002634761336590005, 'samples': 12890624, 'steps': 25176, 'loss/train': 0.12544433772563934} +02/25/2022 10:12:34 - INFO - codeparrot_training - Step 25177: {'lr': 0.00026345979495789797, 'samples': 12891136, 'steps': 25177, 'loss/train': 1.1430953741073608} +02/25/2022 10:12:41 - INFO - codeparrot_training - Step 25178: {'lr': 0.00026344345619913796, 'samples': 12891648, 'steps': 25178, 'loss/train': 1.7195504903793335} +02/25/2022 10:12:45 - INFO - codeparrot_training - Step 25179: {'lr': 0.0002634271173827904, 'samples': 12892160, 'steps': 25179, 'loss/train': 2.5008950233459473} +02/25/2022 10:12:50 - INFO - codeparrot_training - Step 25180: {'lr': 0.0002634107785089254, 'samples': 12892672, 'steps': 25180, 'loss/train': 1.8234107494354248} +02/25/2022 10:12:54 - INFO - codeparrot_training - Step 25181: {'lr': 0.0002633944395776129, 'samples': 12893184, 'steps': 25181, 'loss/train': 1.1687787771224976} +02/25/2022 10:12:59 - INFO - codeparrot_training - Step 25182: {'lr': 0.00026337810058892286, 'samples': 12893696, 'steps': 25182, 'loss/train': 1.5779426097869873} +02/25/2022 10:13:03 - INFO - codeparrot_training - Step 25183: {'lr': 0.0002633617615429252, 'samples': 12894208, 'steps': 25183, 'loss/train': 0.3335775136947632} +02/25/2022 10:13:08 - INFO - codeparrot_training - Step 25184: {'lr': 0.0002633454224396901, 'samples': 12894720, 'steps': 25184, 'loss/train': 1.4372093677520752} +02/25/2022 10:13:12 - INFO - codeparrot_training - Step 25185: {'lr': 0.00026332908327928746, 'samples': 12895232, 'steps': 25185, 'loss/train': 1.827237606048584} +02/25/2022 10:13:17 - INFO - codeparrot_training - Step 25186: {'lr': 0.00026331274406178726, 'samples': 12895744, 'steps': 25186, 'loss/train': 0.3919581174850464} +02/25/2022 10:13:21 - INFO - codeparrot_training - Step 25187: {'lr': 0.00026329640478725937, 'samples': 12896256, 'steps': 25187, 'loss/train': 2.2953128814697266} +02/25/2022 10:13:28 - INFO - codeparrot_training - Step 25188: {'lr': 0.00026328006545577406, 'samples': 12896768, 'steps': 25188, 'loss/train': 2.5481157302856445} +02/25/2022 10:13:32 - INFO - codeparrot_training - Step 25189: {'lr': 0.000263263726067401, 'samples': 12897280, 'steps': 25189, 'loss/train': 1.0703096389770508} +02/25/2022 10:13:37 - INFO - codeparrot_training - Step 25190: {'lr': 0.0002632473866222104, 'samples': 12897792, 'steps': 25190, 'loss/train': 2.317592144012451} +02/25/2022 10:13:41 - INFO - codeparrot_training - Step 25191: {'lr': 0.0002632310471202723, 'samples': 12898304, 'steps': 25191, 'loss/train': 2.086141586303711} +02/25/2022 10:13:44 - INFO - codeparrot_training - Step 25192: {'lr': 0.0002632147075616566, 'samples': 12898816, 'steps': 25192, 'loss/train': 1.1146897077560425} +02/25/2022 10:13:50 - INFO - codeparrot_training - Step 25193: {'lr': 0.0002631983679464332, 'samples': 12899328, 'steps': 25193, 'loss/train': 1.962989330291748} +02/25/2022 10:13:53 - INFO - codeparrot_training - Step 25194: {'lr': 0.0002631820282746722, 'samples': 12899840, 'steps': 25194, 'loss/train': 2.2453997135162354} +02/25/2022 10:13:59 - INFO - codeparrot_training - Step 25195: {'lr': 0.0002631656885464436, 'samples': 12900352, 'steps': 25195, 'loss/train': 0.8386679291725159} +02/25/2022 10:14:03 - INFO - codeparrot_training - Step 25196: {'lr': 0.00026314934876181734, 'samples': 12900864, 'steps': 25196, 'loss/train': 2.2704102993011475} +02/25/2022 10:14:08 - INFO - codeparrot_training - Step 25197: {'lr': 0.0002631330089208634, 'samples': 12901376, 'steps': 25197, 'loss/train': 2.329030990600586} +02/25/2022 10:14:12 - INFO - codeparrot_training - Step 25198: {'lr': 0.00026311666902365194, 'samples': 12901888, 'steps': 25198, 'loss/train': 1.9652130603790283} +02/25/2022 10:14:17 - INFO - codeparrot_training - Step 25199: {'lr': 0.00026310032907025276, 'samples': 12902400, 'steps': 25199, 'loss/train': 2.265171766281128} +02/25/2022 10:14:21 - INFO - codeparrot_training - Step 25200: {'lr': 0.000263083989060736, 'samples': 12902912, 'steps': 25200, 'loss/train': 2.438291072845459} +02/25/2022 10:14:28 - INFO - codeparrot_training - Step 25201: {'lr': 0.00026306764899517154, 'samples': 12903424, 'steps': 25201, 'loss/train': 1.3084348440170288} +02/25/2022 10:14:31 - INFO - codeparrot_training - Step 25202: {'lr': 0.0002630513088736294, 'samples': 12903936, 'steps': 25202, 'loss/train': 2.0516445636749268} +02/25/2022 10:14:37 - INFO - codeparrot_training - Step 25203: {'lr': 0.0002630349686961796, 'samples': 12904448, 'steps': 25203, 'loss/train': 1.963873267173767} +02/25/2022 10:14:40 - INFO - codeparrot_training - Step 25204: {'lr': 0.00026301862846289214, 'samples': 12904960, 'steps': 25204, 'loss/train': 2.644075393676758} +02/25/2022 10:14:46 - INFO - codeparrot_training - Step 25205: {'lr': 0.00026300228817383703, 'samples': 12905472, 'steps': 25205, 'loss/train': 2.1028757095336914} +02/25/2022 10:14:50 - INFO - codeparrot_training - Step 25206: {'lr': 0.00026298594782908424, 'samples': 12905984, 'steps': 25206, 'loss/train': 3.740410327911377} +02/25/2022 10:14:55 - INFO - codeparrot_training - Step 25207: {'lr': 0.00026296960742870374, 'samples': 12906496, 'steps': 25207, 'loss/train': 1.3810604810714722} +02/25/2022 10:14:59 - INFO - codeparrot_training - Step 25208: {'lr': 0.00026295326697276563, 'samples': 12907008, 'steps': 25208, 'loss/train': 1.3692246675491333} +02/25/2022 10:15:04 - INFO - codeparrot_training - Step 25209: {'lr': 0.00026293692646133974, 'samples': 12907520, 'steps': 25209, 'loss/train': 1.5162711143493652} +02/25/2022 10:15:08 - INFO - codeparrot_training - Step 25210: {'lr': 0.00026292058589449626, 'samples': 12908032, 'steps': 25210, 'loss/train': 2.312471866607666} +02/25/2022 10:15:15 - INFO - codeparrot_training - Step 25211: {'lr': 0.000262904245272305, 'samples': 12908544, 'steps': 25211, 'loss/train': 1.8811147212982178} +02/25/2022 10:15:19 - INFO - codeparrot_training - Step 25212: {'lr': 0.0002628879045948361, 'samples': 12909056, 'steps': 25212, 'loss/train': 1.6245062351226807} +02/25/2022 10:15:24 - INFO - codeparrot_training - Step 25213: {'lr': 0.0002628715638621595, 'samples': 12909568, 'steps': 25213, 'loss/train': 1.7548400163650513} +02/25/2022 10:15:28 - INFO - codeparrot_training - Step 25214: {'lr': 0.00026285522307434517, 'samples': 12910080, 'steps': 25214, 'loss/train': 2.358091354370117} +02/25/2022 10:15:33 - INFO - codeparrot_training - Step 25215: {'lr': 0.0002628388822314632, 'samples': 12910592, 'steps': 25215, 'loss/train': 2.141122817993164} +02/25/2022 10:15:37 - INFO - codeparrot_training - Step 25216: {'lr': 0.0002628225413335836, 'samples': 12911104, 'steps': 25216, 'loss/train': 1.2398630380630493} +02/25/2022 10:15:42 - INFO - codeparrot_training - Step 25217: {'lr': 0.0002628062003807762, 'samples': 12911616, 'steps': 25217, 'loss/train': 0.9681863784790039} +02/25/2022 10:15:46 - INFO - codeparrot_training - Step 25218: {'lr': 0.0002627898593731111, 'samples': 12912128, 'steps': 25218, 'loss/train': 1.0605509281158447} +02/25/2022 10:15:51 - INFO - codeparrot_training - Step 25219: {'lr': 0.0002627735183106584, 'samples': 12912640, 'steps': 25219, 'loss/train': 1.580093502998352} +02/25/2022 10:15:55 - INFO - codeparrot_training - Step 25220: {'lr': 0.00026275717719348793, 'samples': 12913152, 'steps': 25220, 'loss/train': 0.6143463850021362} +02/25/2022 10:16:00 - INFO - codeparrot_training - Step 25221: {'lr': 0.0002627408360216698, 'samples': 12913664, 'steps': 25221, 'loss/train': 0.4043956696987152} +02/25/2022 10:16:04 - INFO - codeparrot_training - Step 25222: {'lr': 0.00026272449479527386, 'samples': 12914176, 'steps': 25222, 'loss/train': 0.45767590403556824} +02/25/2022 10:16:09 - INFO - codeparrot_training - Step 25223: {'lr': 0.0002627081535143704, 'samples': 12914688, 'steps': 25223, 'loss/train': 1.9036891460418701} +02/25/2022 10:16:13 - INFO - codeparrot_training - Step 25224: {'lr': 0.0002626918121790291, 'samples': 12915200, 'steps': 25224, 'loss/train': 2.329961061477661} +02/25/2022 10:16:20 - INFO - codeparrot_training - Step 25225: {'lr': 0.0002626754707893202, 'samples': 12915712, 'steps': 25225, 'loss/train': 0.49185848236083984} +02/25/2022 10:16:24 - INFO - codeparrot_training - Step 25226: {'lr': 0.00026265912934531353, 'samples': 12916224, 'steps': 25226, 'loss/train': 1.6422491073608398} +02/25/2022 10:16:29 - INFO - codeparrot_training - Step 25227: {'lr': 0.00026264278784707924, 'samples': 12916736, 'steps': 25227, 'loss/train': 2.4964475631713867} +02/25/2022 10:16:33 - INFO - codeparrot_training - Step 25228: {'lr': 0.00026262644629468726, 'samples': 12917248, 'steps': 25228, 'loss/train': 1.4067500829696655} +02/25/2022 10:16:38 - INFO - codeparrot_training - Step 25229: {'lr': 0.0002626101046882076, 'samples': 12917760, 'steps': 25229, 'loss/train': 2.486905336380005} +02/25/2022 10:16:42 - INFO - codeparrot_training - Step 25230: {'lr': 0.00026259376302771023, 'samples': 12918272, 'steps': 25230, 'loss/train': 1.2278902530670166} +02/25/2022 10:16:47 - INFO - codeparrot_training - Step 25231: {'lr': 0.0002625774213132651, 'samples': 12918784, 'steps': 25231, 'loss/train': 1.9769572019577026} +02/25/2022 10:16:51 - INFO - codeparrot_training - Step 25232: {'lr': 0.0002625610795449424, 'samples': 12919296, 'steps': 25232, 'loss/train': 1.467758059501648} +02/25/2022 10:16:56 - INFO - codeparrot_training - Step 25233: {'lr': 0.00026254473772281196, 'samples': 12919808, 'steps': 25233, 'loss/train': 1.4294638633728027} +02/25/2022 10:17:00 - INFO - codeparrot_training - Step 25234: {'lr': 0.0002625283958469439, 'samples': 12920320, 'steps': 25234, 'loss/train': 0.9792097806930542} +02/25/2022 10:17:07 - INFO - codeparrot_training - Step 25235: {'lr': 0.00026251205391740815, 'samples': 12920832, 'steps': 25235, 'loss/train': 2.814891815185547} +02/25/2022 10:17:10 - INFO - codeparrot_training - Step 25236: {'lr': 0.00026249571193427473, 'samples': 12921344, 'steps': 25236, 'loss/train': 1.8181837797164917} +02/25/2022 10:17:16 - INFO - codeparrot_training - Step 25237: {'lr': 0.0002624793698976136, 'samples': 12921856, 'steps': 25237, 'loss/train': 1.7550987005233765} +02/25/2022 10:17:19 - INFO - codeparrot_training - Step 25238: {'lr': 0.0002624630278074949, 'samples': 12922368, 'steps': 25238, 'loss/train': 2.3792221546173096} +02/25/2022 10:17:25 - INFO - codeparrot_training - Step 25239: {'lr': 0.0002624466856639885, 'samples': 12922880, 'steps': 25239, 'loss/train': 1.0399821996688843} +02/25/2022 10:17:28 - INFO - codeparrot_training - Step 25240: {'lr': 0.0002624303434671645, 'samples': 12923392, 'steps': 25240, 'loss/train': 1.0097362995147705} +02/25/2022 10:17:34 - INFO - codeparrot_training - Step 25241: {'lr': 0.00026241400121709287, 'samples': 12923904, 'steps': 25241, 'loss/train': 2.1287190914154053} +02/25/2022 10:17:37 - INFO - codeparrot_training - Step 25242: {'lr': 0.0002623976589138435, 'samples': 12924416, 'steps': 25242, 'loss/train': 1.6061726808547974} +02/25/2022 10:17:43 - INFO - codeparrot_training - Step 25243: {'lr': 0.00026238131655748656, 'samples': 12924928, 'steps': 25243, 'loss/train': 1.814828872680664} +02/25/2022 10:17:46 - INFO - codeparrot_training - Step 25244: {'lr': 0.0002623649741480919, 'samples': 12925440, 'steps': 25244, 'loss/train': 2.7730724811553955} +02/25/2022 10:17:54 - INFO - codeparrot_training - Step 25245: {'lr': 0.00026234863168572974, 'samples': 12925952, 'steps': 25245, 'loss/train': 2.665889263153076} +02/25/2022 10:17:57 - INFO - codeparrot_training - Step 25246: {'lr': 0.0002623322891704699, 'samples': 12926464, 'steps': 25246, 'loss/train': 2.974480390548706} +02/25/2022 10:18:03 - INFO - codeparrot_training - Step 25247: {'lr': 0.0002623159466023825, 'samples': 12926976, 'steps': 25247, 'loss/train': 0.7214266061782837} +02/25/2022 10:18:06 - INFO - codeparrot_training - Step 25248: {'lr': 0.0002622996039815374, 'samples': 12927488, 'steps': 25248, 'loss/train': 1.5576725006103516} +02/25/2022 10:18:12 - INFO - codeparrot_training - Step 25249: {'lr': 0.0002622832613080048, 'samples': 12928000, 'steps': 25249, 'loss/train': 2.1740376949310303} +02/25/2022 10:18:15 - INFO - codeparrot_training - Step 25250: {'lr': 0.00026226691858185456, 'samples': 12928512, 'steps': 25250, 'loss/train': 2.48518967628479} +02/25/2022 10:18:21 - INFO - codeparrot_training - Step 25251: {'lr': 0.0002622505758031567, 'samples': 12929024, 'steps': 25251, 'loss/train': 2.667180299758911} +02/25/2022 10:18:24 - INFO - codeparrot_training - Step 25252: {'lr': 0.00026223423297198136, 'samples': 12929536, 'steps': 25252, 'loss/train': 2.1488027572631836} +02/25/2022 10:18:30 - INFO - codeparrot_training - Step 25253: {'lr': 0.00026221789008839836, 'samples': 12930048, 'steps': 25253, 'loss/train': 1.2392905950546265} +02/25/2022 10:18:33 - INFO - codeparrot_training - Step 25254: {'lr': 0.00026220154715247783, 'samples': 12930560, 'steps': 25254, 'loss/train': 1.8072820901870728} +02/25/2022 10:18:39 - INFO - codeparrot_training - Step 25255: {'lr': 0.00026218520416428976, 'samples': 12931072, 'steps': 25255, 'loss/train': 2.21661376953125} +02/25/2022 10:18:42 - INFO - codeparrot_training - Step 25256: {'lr': 0.00026216886112390413, 'samples': 12931584, 'steps': 25256, 'loss/train': 2.3472323417663574} +02/25/2022 10:18:49 - INFO - codeparrot_training - Step 25257: {'lr': 0.00026215251803139093, 'samples': 12932096, 'steps': 25257, 'loss/train': 1.6080751419067383} +02/25/2022 10:18:53 - INFO - codeparrot_training - Step 25258: {'lr': 0.0002621361748868203, 'samples': 12932608, 'steps': 25258, 'loss/train': 1.6127243041992188} +02/25/2022 10:18:58 - INFO - codeparrot_training - Step 25259: {'lr': 0.00026211983169026207, 'samples': 12933120, 'steps': 25259, 'loss/train': 1.5720276832580566} +02/25/2022 10:19:02 - INFO - codeparrot_training - Step 25260: {'lr': 0.0002621034884417864, 'samples': 12933632, 'steps': 25260, 'loss/train': 1.5244604349136353} +02/25/2022 10:19:07 - INFO - codeparrot_training - Step 25261: {'lr': 0.0002620871451414631, 'samples': 12934144, 'steps': 25261, 'loss/train': 1.9730055332183838} +02/25/2022 10:19:13 - INFO - codeparrot_training - Step 25262: {'lr': 0.00026207080178936243, 'samples': 12934656, 'steps': 25262, 'loss/train': 1.9861608743667603} +02/25/2022 10:19:16 - INFO - codeparrot_training - Step 25263: {'lr': 0.0002620544583855542, 'samples': 12935168, 'steps': 25263, 'loss/train': 0.6329479813575745} +02/25/2022 10:19:22 - INFO - codeparrot_training - Step 25264: {'lr': 0.00026203811493010854, 'samples': 12935680, 'steps': 25264, 'loss/train': 2.566152572631836} +02/25/2022 10:19:25 - INFO - codeparrot_training - Step 25265: {'lr': 0.0002620217714230954, 'samples': 12936192, 'steps': 25265, 'loss/train': 1.6782448291778564} +02/25/2022 10:19:31 - INFO - codeparrot_training - Step 25266: {'lr': 0.0002620054278645848, 'samples': 12936704, 'steps': 25266, 'loss/train': 1.4395540952682495} +02/25/2022 10:19:34 - INFO - codeparrot_training - Step 25267: {'lr': 0.00026198908425464684, 'samples': 12937216, 'steps': 25267, 'loss/train': 2.275939464569092} +02/25/2022 10:19:40 - INFO - codeparrot_training - Step 25268: {'lr': 0.00026197274059335137, 'samples': 12937728, 'steps': 25268, 'loss/train': 1.917593002319336} +02/25/2022 10:19:43 - INFO - codeparrot_training - Step 25269: {'lr': 0.0002619563968807685, 'samples': 12938240, 'steps': 25269, 'loss/train': 1.9233434200286865} +02/25/2022 10:19:49 - INFO - codeparrot_training - Step 25270: {'lr': 0.00026194005311696824, 'samples': 12938752, 'steps': 25270, 'loss/train': 1.6113722324371338} +02/25/2022 10:19:52 - INFO - codeparrot_training - Step 25271: {'lr': 0.0002619237093020206, 'samples': 12939264, 'steps': 25271, 'loss/train': 0.8846839666366577} +02/25/2022 10:20:00 - INFO - codeparrot_training - Step 25272: {'lr': 0.00026190736543599545, 'samples': 12939776, 'steps': 25272, 'loss/train': 1.7859965562820435} +02/25/2022 10:20:03 - INFO - codeparrot_training - Step 25273: {'lr': 0.0002618910215189631, 'samples': 12940288, 'steps': 25273, 'loss/train': 1.5689723491668701} +02/25/2022 10:20:08 - INFO - codeparrot_training - Step 25274: {'lr': 0.0002618746775509933, 'samples': 12940800, 'steps': 25274, 'loss/train': 2.146951913833618} +02/25/2022 10:20:12 - INFO - codeparrot_training - Step 25275: {'lr': 0.0002618583335321561, 'samples': 12941312, 'steps': 25275, 'loss/train': 2.030069351196289} +02/25/2022 10:20:18 - INFO - codeparrot_training - Step 25276: {'lr': 0.00026184198946252164, 'samples': 12941824, 'steps': 25276, 'loss/train': 1.7635247707366943} +02/25/2022 10:20:21 - INFO - codeparrot_training - Step 25277: {'lr': 0.00026182564534215985, 'samples': 12942336, 'steps': 25277, 'loss/train': 0.9114202857017517} +02/25/2022 10:20:27 - INFO - codeparrot_training - Step 25278: {'lr': 0.00026180930117114076, 'samples': 12942848, 'steps': 25278, 'loss/train': 2.3035519123077393} +02/25/2022 10:20:30 - INFO - codeparrot_training - Step 25279: {'lr': 0.0002617929569495343, 'samples': 12943360, 'steps': 25279, 'loss/train': 1.680801510810852} +02/25/2022 10:20:36 - INFO - codeparrot_training - Step 25280: {'lr': 0.00026177661267741067, 'samples': 12943872, 'steps': 25280, 'loss/train': 1.2134040594100952} +02/25/2022 10:20:39 - INFO - codeparrot_training - Step 25281: {'lr': 0.00026176026835483975, 'samples': 12944384, 'steps': 25281, 'loss/train': 0.7380163669586182} +02/25/2022 10:20:46 - INFO - codeparrot_training - Step 25282: {'lr': 0.00026174392398189153, 'samples': 12944896, 'steps': 25282, 'loss/train': 2.2398416996002197} +02/25/2022 10:20:50 - INFO - codeparrot_training - Step 25283: {'lr': 0.00026172757955863606, 'samples': 12945408, 'steps': 25283, 'loss/train': 1.9276384115219116} +02/25/2022 10:20:55 - INFO - codeparrot_training - Step 25284: {'lr': 0.0002617112350851434, 'samples': 12945920, 'steps': 25284, 'loss/train': 1.5832734107971191} +02/25/2022 10:20:59 - INFO - codeparrot_training - Step 25285: {'lr': 0.00026169489056148353, 'samples': 12946432, 'steps': 25285, 'loss/train': 2.6555488109588623} +02/25/2022 10:21:04 - INFO - codeparrot_training - Step 25286: {'lr': 0.0002616785459877264, 'samples': 12946944, 'steps': 25286, 'loss/train': 2.1728549003601074} +02/25/2022 10:21:08 - INFO - codeparrot_training - Step 25287: {'lr': 0.00026166220136394214, 'samples': 12947456, 'steps': 25287, 'loss/train': 2.062012195587158} +02/25/2022 10:21:13 - INFO - codeparrot_training - Step 25288: {'lr': 0.0002616458566902007, 'samples': 12947968, 'steps': 25288, 'loss/train': 2.2282443046569824} +02/25/2022 10:21:17 - INFO - codeparrot_training - Step 25289: {'lr': 0.00026162951196657215, 'samples': 12948480, 'steps': 25289, 'loss/train': 2.558828830718994} +02/25/2022 10:21:22 - INFO - codeparrot_training - Step 25290: {'lr': 0.00026161316719312637, 'samples': 12948992, 'steps': 25290, 'loss/train': 2.0352771282196045} +02/25/2022 10:21:26 - INFO - codeparrot_training - Step 25291: {'lr': 0.0002615968223699336, 'samples': 12949504, 'steps': 25291, 'loss/train': 2.081367015838623} +02/25/2022 10:21:33 - INFO - codeparrot_training - Step 25292: {'lr': 0.0002615804774970636, 'samples': 12950016, 'steps': 25292, 'loss/train': 1.9572584629058838} +02/25/2022 10:21:36 - INFO - codeparrot_training - Step 25293: {'lr': 0.0002615641325745866, 'samples': 12950528, 'steps': 25293, 'loss/train': 1.195730209350586} +02/25/2022 10:21:42 - INFO - codeparrot_training - Step 25294: {'lr': 0.00026154778760257246, 'samples': 12951040, 'steps': 25294, 'loss/train': 0.3673703670501709} +02/25/2022 10:21:45 - INFO - codeparrot_training - Step 25295: {'lr': 0.0002615314425810913, 'samples': 12951552, 'steps': 25295, 'loss/train': 1.7312372922897339} +02/25/2022 10:21:51 - INFO - codeparrot_training - Step 25296: {'lr': 0.0002615150975102131, 'samples': 12952064, 'steps': 25296, 'loss/train': 2.1584668159484863} +02/25/2022 10:21:54 - INFO - codeparrot_training - Step 25297: {'lr': 0.00026149875239000786, 'samples': 12952576, 'steps': 25297, 'loss/train': 2.6947379112243652} +02/25/2022 10:22:00 - INFO - codeparrot_training - Step 25298: {'lr': 0.0002614824072205456, 'samples': 12953088, 'steps': 25298, 'loss/train': 1.9793378114700317} +02/25/2022 10:22:03 - INFO - codeparrot_training - Step 25299: {'lr': 0.00026146606200189637, 'samples': 12953600, 'steps': 25299, 'loss/train': 2.1179208755493164} +02/25/2022 10:22:09 - INFO - codeparrot_training - Step 25300: {'lr': 0.0002614497167341302, 'samples': 12954112, 'steps': 25300, 'loss/train': 2.183959484100342} +02/25/2022 10:22:12 - INFO - codeparrot_training - Step 25301: {'lr': 0.00026143337141731703, 'samples': 12954624, 'steps': 25301, 'loss/train': 1.6719461679458618} +02/25/2022 10:22:20 - INFO - codeparrot_training - Step 25302: {'lr': 0.000261417026051527, 'samples': 12955136, 'steps': 25302, 'loss/train': 1.7023910284042358} +02/25/2022 10:22:23 - INFO - codeparrot_training - Step 25303: {'lr': 0.00026140068063683, 'samples': 12955648, 'steps': 25303, 'loss/train': 0.7320264577865601} +02/25/2022 10:22:29 - INFO - codeparrot_training - Step 25304: {'lr': 0.00026138433517329616, 'samples': 12956160, 'steps': 25304, 'loss/train': 2.1170542240142822} +02/25/2022 10:22:32 - INFO - codeparrot_training - Step 25305: {'lr': 0.0002613679896609954, 'samples': 12956672, 'steps': 25305, 'loss/train': 1.7571150064468384} +02/25/2022 10:22:38 - INFO - codeparrot_training - Step 25306: {'lr': 0.0002613516440999978, 'samples': 12957184, 'steps': 25306, 'loss/train': 1.5068050622940063} +02/25/2022 10:22:41 - INFO - codeparrot_training - Step 25307: {'lr': 0.0002613352984903733, 'samples': 12957696, 'steps': 25307, 'loss/train': 2.0442020893096924} +02/25/2022 10:22:47 - INFO - codeparrot_training - Step 25308: {'lr': 0.0002613189528321921, 'samples': 12958208, 'steps': 25308, 'loss/train': 2.2437171936035156} +02/25/2022 10:22:51 - INFO - codeparrot_training - Step 25309: {'lr': 0.00026130260712552394, 'samples': 12958720, 'steps': 25309, 'loss/train': 1.404790997505188} +02/25/2022 10:22:56 - INFO - codeparrot_training - Step 25310: {'lr': 0.00026128626137043913, 'samples': 12959232, 'steps': 25310, 'loss/train': 1.9766829013824463} +02/25/2022 10:22:59 - INFO - codeparrot_training - Step 25311: {'lr': 0.0002612699155670075, 'samples': 12959744, 'steps': 25311, 'loss/train': 1.4593075513839722} +02/25/2022 10:23:05 - INFO - codeparrot_training - Step 25312: {'lr': 0.0002612535697152991, 'samples': 12960256, 'steps': 25312, 'loss/train': 1.157163381576538} +02/25/2022 10:23:08 - INFO - codeparrot_training - Step 25313: {'lr': 0.0002612372238153841, 'samples': 12960768, 'steps': 25313, 'loss/train': 1.974704384803772} +02/25/2022 10:23:14 - INFO - codeparrot_training - Step 25314: {'lr': 0.00026122087786733226, 'samples': 12961280, 'steps': 25314, 'loss/train': 2.5316545963287354} +02/25/2022 10:23:17 - INFO - codeparrot_training - Step 25315: {'lr': 0.00026120453187121386, 'samples': 12961792, 'steps': 25315, 'loss/train': 1.7827507257461548} +02/25/2022 10:23:23 - INFO - codeparrot_training - Step 25316: {'lr': 0.00026118818582709875, 'samples': 12962304, 'steps': 25316, 'loss/train': 1.0745261907577515} +02/25/2022 10:23:27 - INFO - codeparrot_training - Step 25317: {'lr': 0.000261171839735057, 'samples': 12962816, 'steps': 25317, 'loss/train': 0.8575783371925354} +02/25/2022 10:23:34 - INFO - codeparrot_training - Step 25318: {'lr': 0.00026115549359515867, 'samples': 12963328, 'steps': 25318, 'loss/train': 2.160360813140869} +02/25/2022 10:23:37 - INFO - codeparrot_training - Step 25319: {'lr': 0.00026113914740747373, 'samples': 12963840, 'steps': 25319, 'loss/train': 1.8212852478027344} +02/25/2022 10:23:43 - INFO - codeparrot_training - Step 25320: {'lr': 0.00026112280117207223, 'samples': 12964352, 'steps': 25320, 'loss/train': 2.2596120834350586} +02/25/2022 10:23:46 - INFO - codeparrot_training - Step 25321: {'lr': 0.00026110645488902417, 'samples': 12964864, 'steps': 25321, 'loss/train': 1.876841425895691} +02/25/2022 10:23:52 - INFO - codeparrot_training - Step 25322: {'lr': 0.00026109010855839953, 'samples': 12965376, 'steps': 25322, 'loss/train': 2.182778835296631} +02/25/2022 10:23:55 - INFO - codeparrot_training - Step 25323: {'lr': 0.00026107376218026846, 'samples': 12965888, 'steps': 25323, 'loss/train': 1.8846172094345093} +02/25/2022 10:24:01 - INFO - codeparrot_training - Step 25324: {'lr': 0.0002610574157547009, 'samples': 12966400, 'steps': 25324, 'loss/train': 1.6929960250854492} +02/25/2022 10:24:04 - INFO - codeparrot_training - Step 25325: {'lr': 0.00026104106928176684, 'samples': 12966912, 'steps': 25325, 'loss/train': 1.2628182172775269} +02/25/2022 10:24:10 - INFO - codeparrot_training - Step 25326: {'lr': 0.0002610247227615364, 'samples': 12967424, 'steps': 25326, 'loss/train': 3.2269182205200195} +02/25/2022 10:24:13 - INFO - codeparrot_training - Step 25327: {'lr': 0.00026100837619407954, 'samples': 12967936, 'steps': 25327, 'loss/train': 2.1335484981536865} +02/25/2022 10:24:21 - INFO - codeparrot_training - Step 25328: {'lr': 0.0002609920295794662, 'samples': 12968448, 'steps': 25328, 'loss/train': 1.5020781755447388} +02/25/2022 10:24:25 - INFO - codeparrot_training - Step 25329: {'lr': 0.00026097568291776665, 'samples': 12968960, 'steps': 25329, 'loss/train': 1.9993840456008911} +02/25/2022 10:24:30 - INFO - codeparrot_training - Step 25330: {'lr': 0.0002609593362090507, 'samples': 12969472, 'steps': 25330, 'loss/train': 2.5293819904327393} +02/25/2022 10:24:34 - INFO - codeparrot_training - Step 25331: {'lr': 0.0002609429894533884, 'samples': 12969984, 'steps': 25331, 'loss/train': 1.8078157901763916} +02/25/2022 10:24:39 - INFO - codeparrot_training - Step 25332: {'lr': 0.00026092664265084983, 'samples': 12970496, 'steps': 25332, 'loss/train': 2.028881788253784} +02/25/2022 10:24:43 - INFO - codeparrot_training - Step 25333: {'lr': 0.0002609102958015049, 'samples': 12971008, 'steps': 25333, 'loss/train': 2.8448386192321777} +02/25/2022 10:24:48 - INFO - codeparrot_training - Step 25334: {'lr': 0.0002608939489054239, 'samples': 12971520, 'steps': 25334, 'loss/train': 1.7702529430389404} +02/25/2022 10:24:52 - INFO - codeparrot_training - Step 25335: {'lr': 0.00026087760196267655, 'samples': 12972032, 'steps': 25335, 'loss/train': 1.3746795654296875} +02/25/2022 10:24:58 - INFO - codeparrot_training - Step 25336: {'lr': 0.000260861254973333, 'samples': 12972544, 'steps': 25336, 'loss/train': 0.6420924067497253} +02/25/2022 10:25:01 - INFO - codeparrot_training - Step 25337: {'lr': 0.00026084490793746337, 'samples': 12973056, 'steps': 25337, 'loss/train': 1.5441663265228271} +02/25/2022 10:25:07 - INFO - codeparrot_training - Step 25338: {'lr': 0.00026082856085513756, 'samples': 12973568, 'steps': 25338, 'loss/train': 2.481992721557617} +02/25/2022 10:25:11 - INFO - codeparrot_training - Step 25339: {'lr': 0.0002608122137264257, 'samples': 12974080, 'steps': 25339, 'loss/train': 2.3662033081054688} +02/25/2022 10:25:17 - INFO - codeparrot_training - Step 25340: {'lr': 0.0002607958665513976, 'samples': 12974592, 'steps': 25340, 'loss/train': 0.8369866013526917} +02/25/2022 10:25:20 - INFO - codeparrot_training - Step 25341: {'lr': 0.00026077951933012355, 'samples': 12975104, 'steps': 25341, 'loss/train': 1.852104902267456} +02/25/2022 10:25:26 - INFO - codeparrot_training - Step 25342: {'lr': 0.0002607631720626734, 'samples': 12975616, 'steps': 25342, 'loss/train': 1.4531915187835693} +02/25/2022 10:25:29 - INFO - codeparrot_training - Step 25343: {'lr': 0.0002607468247491173, 'samples': 12976128, 'steps': 25343, 'loss/train': 1.8628367185592651} +02/25/2022 10:25:36 - INFO - codeparrot_training - Step 25344: {'lr': 0.00026073047738952513, 'samples': 12976640, 'steps': 25344, 'loss/train': 0.22304286062717438} +02/25/2022 10:25:40 - INFO - codeparrot_training - Step 25345: {'lr': 0.0002607141299839671, 'samples': 12977152, 'steps': 25345, 'loss/train': 1.4542042016983032} +02/25/2022 10:25:45 - INFO - codeparrot_training - Step 25346: {'lr': 0.0002606977825325131, 'samples': 12977664, 'steps': 25346, 'loss/train': 3.1794064044952393} +02/25/2022 10:25:49 - INFO - codeparrot_training - Step 25347: {'lr': 0.0002606814350352331, 'samples': 12978176, 'steps': 25347, 'loss/train': 1.249501347541809} +02/25/2022 10:25:54 - INFO - codeparrot_training - Step 25348: {'lr': 0.00026066508749219734, 'samples': 12978688, 'steps': 25348, 'loss/train': 2.259679079055786} +02/25/2022 10:25:58 - INFO - codeparrot_training - Step 25349: {'lr': 0.0002606487399034757, 'samples': 12979200, 'steps': 25349, 'loss/train': 2.393623113632202} +02/25/2022 10:26:03 - INFO - codeparrot_training - Step 25350: {'lr': 0.0002606323922691383, 'samples': 12979712, 'steps': 25350, 'loss/train': 2.1388514041900635} +02/25/2022 10:26:07 - INFO - codeparrot_training - Step 25351: {'lr': 0.000260616044589255, 'samples': 12980224, 'steps': 25351, 'loss/train': 2.245131254196167} +02/25/2022 10:26:12 - INFO - codeparrot_training - Step 25352: {'lr': 0.00026059969686389605, 'samples': 12980736, 'steps': 25352, 'loss/train': 1.0605303049087524} +02/25/2022 10:26:16 - INFO - codeparrot_training - Step 25353: {'lr': 0.0002605833490931313, 'samples': 12981248, 'steps': 25353, 'loss/train': 1.6841325759887695} +02/25/2022 10:26:22 - INFO - codeparrot_training - Step 25354: {'lr': 0.00026056700127703085, 'samples': 12981760, 'steps': 25354, 'loss/train': 1.1205074787139893} +02/25/2022 10:26:26 - INFO - codeparrot_training - Step 25355: {'lr': 0.0002605506534156647, 'samples': 12982272, 'steps': 25355, 'loss/train': 1.686244010925293} +02/25/2022 10:26:31 - INFO - codeparrot_training - Step 25356: {'lr': 0.00026053430550910297, 'samples': 12982784, 'steps': 25356, 'loss/train': 2.243374824523926} +02/25/2022 10:26:35 - INFO - codeparrot_training - Step 25357: {'lr': 0.0002605179575574155, 'samples': 12983296, 'steps': 25357, 'loss/train': 1.817054033279419} +02/25/2022 10:26:40 - INFO - codeparrot_training - Step 25358: {'lr': 0.0002605016095606726, 'samples': 12983808, 'steps': 25358, 'loss/train': 1.4486634731292725} +02/25/2022 10:26:44 - INFO - codeparrot_training - Step 25359: {'lr': 0.00026048526151894395, 'samples': 12984320, 'steps': 25359, 'loss/train': 0.8246952295303345} +02/25/2022 10:26:49 - INFO - codeparrot_training - Step 25360: {'lr': 0.0002604689134322999, 'samples': 12984832, 'steps': 25360, 'loss/train': 1.6542701721191406} +02/25/2022 10:26:53 - INFO - codeparrot_training - Step 25361: {'lr': 0.00026045256530081033, 'samples': 12985344, 'steps': 25361, 'loss/train': 2.278477907180786} +02/25/2022 10:26:58 - INFO - codeparrot_training - Step 25362: {'lr': 0.00026043621712454524, 'samples': 12985856, 'steps': 25362, 'loss/train': 1.476204514503479} +02/25/2022 10:27:02 - INFO - codeparrot_training - Step 25363: {'lr': 0.00026041986890357476, 'samples': 12986368, 'steps': 25363, 'loss/train': 2.968668222427368} +02/25/2022 10:27:08 - INFO - codeparrot_training - Step 25364: {'lr': 0.00026040352063796886, 'samples': 12986880, 'steps': 25364, 'loss/train': 1.6224629878997803} +02/25/2022 10:27:12 - INFO - codeparrot_training - Step 25365: {'lr': 0.00026038717232779765, 'samples': 12987392, 'steps': 25365, 'loss/train': 0.9365993738174438} +02/25/2022 10:27:17 - INFO - codeparrot_training - Step 25366: {'lr': 0.00026037082397313094, 'samples': 12987904, 'steps': 25366, 'loss/train': 2.612879991531372} +02/25/2022 10:27:21 - INFO - codeparrot_training - Step 25367: {'lr': 0.000260354475574039, 'samples': 12988416, 'steps': 25367, 'loss/train': 1.475862979888916} +02/25/2022 10:27:26 - INFO - codeparrot_training - Step 25368: {'lr': 0.0002603381271305918, 'samples': 12988928, 'steps': 25368, 'loss/train': 2.3788938522338867} +02/25/2022 10:27:30 - INFO - codeparrot_training - Step 25369: {'lr': 0.0002603217786428593, 'samples': 12989440, 'steps': 25369, 'loss/train': 1.380410075187683} +02/25/2022 10:27:35 - INFO - codeparrot_training - Step 25370: {'lr': 0.0002603054301109117, 'samples': 12989952, 'steps': 25370, 'loss/train': 2.6743950843811035} +02/25/2022 10:27:39 - INFO - codeparrot_training - Step 25371: {'lr': 0.00026028908153481875, 'samples': 12990464, 'steps': 25371, 'loss/train': 2.0259957313537598} +02/25/2022 10:27:44 - INFO - codeparrot_training - Step 25372: {'lr': 0.0002602727329146507, 'samples': 12990976, 'steps': 25372, 'loss/train': 1.5719716548919678} +02/25/2022 10:27:48 - INFO - codeparrot_training - Step 25373: {'lr': 0.00026025638425047746, 'samples': 12991488, 'steps': 25373, 'loss/train': 1.8571712970733643} +02/25/2022 10:27:55 - INFO - codeparrot_training - Step 25374: {'lr': 0.00026024003554236925, 'samples': 12992000, 'steps': 25374, 'loss/train': 2.4841511249542236} +02/25/2022 10:27:58 - INFO - codeparrot_training - Step 25375: {'lr': 0.0002602236867903959, 'samples': 12992512, 'steps': 25375, 'loss/train': 3.207655191421509} +02/25/2022 10:28:04 - INFO - codeparrot_training - Step 25376: {'lr': 0.00026020733799462755, 'samples': 12993024, 'steps': 25376, 'loss/train': 2.3679535388946533} +02/25/2022 10:28:07 - INFO - codeparrot_training - Step 25377: {'lr': 0.0002601909891551342, 'samples': 12993536, 'steps': 25377, 'loss/train': 2.1880011558532715} +02/25/2022 10:28:13 - INFO - codeparrot_training - Step 25378: {'lr': 0.0002601746402719859, 'samples': 12994048, 'steps': 25378, 'loss/train': 2.410015106201172} +02/25/2022 10:28:16 - INFO - codeparrot_training - Step 25379: {'lr': 0.0002601582913452526, 'samples': 12994560, 'steps': 25379, 'loss/train': 1.9797877073287964} +02/25/2022 10:28:22 - INFO - codeparrot_training - Step 25380: {'lr': 0.0002601419423750045, 'samples': 12995072, 'steps': 25380, 'loss/train': 1.0747326612472534} +02/25/2022 10:28:25 - INFO - codeparrot_training - Step 25381: {'lr': 0.00026012559336131147, 'samples': 12995584, 'steps': 25381, 'loss/train': 2.887636423110962} +02/25/2022 10:28:31 - INFO - codeparrot_training - Step 25382: {'lr': 0.0002601092443042437, 'samples': 12996096, 'steps': 25382, 'loss/train': 1.7689247131347656} +02/25/2022 10:28:34 - INFO - codeparrot_training - Step 25383: {'lr': 0.000260092895203871, 'samples': 12996608, 'steps': 25383, 'loss/train': 2.6680214405059814} +02/25/2022 10:28:40 - INFO - codeparrot_training - Step 25384: {'lr': 0.0002600765460602636, 'samples': 12997120, 'steps': 25384, 'loss/train': 1.4685745239257812} +02/25/2022 10:28:44 - INFO - codeparrot_training - Step 25385: {'lr': 0.0002600601968734915, 'samples': 12997632, 'steps': 25385, 'loss/train': 2.89216685295105} +02/25/2022 10:28:49 - INFO - codeparrot_training - Step 25386: {'lr': 0.00026004384764362473, 'samples': 12998144, 'steps': 25386, 'loss/train': 2.2213428020477295} +02/25/2022 10:28:53 - INFO - codeparrot_training - Step 25387: {'lr': 0.0002600274983707333, 'samples': 12998656, 'steps': 25387, 'loss/train': 1.8447260856628418} +02/25/2022 10:28:58 - INFO - codeparrot_training - Step 25388: {'lr': 0.0002600111490548872, 'samples': 12999168, 'steps': 25388, 'loss/train': 1.543138861656189} +02/25/2022 10:29:02 - INFO - codeparrot_training - Step 25389: {'lr': 0.00025999479969615656, 'samples': 12999680, 'steps': 25389, 'loss/train': 2.214895725250244} +02/25/2022 10:29:07 - INFO - codeparrot_training - Step 25390: {'lr': 0.00025997845029461134, 'samples': 13000192, 'steps': 25390, 'loss/train': 1.2128826379776} +02/25/2022 10:29:11 - INFO - codeparrot_training - Step 25391: {'lr': 0.00025996210085032167, 'samples': 13000704, 'steps': 25391, 'loss/train': 2.0411376953125} +02/25/2022 10:29:16 - INFO - codeparrot_training - Step 25392: {'lr': 0.00025994575136335747, 'samples': 13001216, 'steps': 25392, 'loss/train': 0.09121988713741302} +02/25/2022 10:29:20 - INFO - codeparrot_training - Step 25393: {'lr': 0.0002599294018337889, 'samples': 13001728, 'steps': 25393, 'loss/train': 1.353220820426941} +02/25/2022 10:29:25 - INFO - codeparrot_training - Step 25394: {'lr': 0.0002599130522616858, 'samples': 13002240, 'steps': 25394, 'loss/train': 1.7172094583511353} +02/25/2022 10:29:29 - INFO - codeparrot_training - Step 25395: {'lr': 0.00025989670264711846, 'samples': 13002752, 'steps': 25395, 'loss/train': 2.8422091007232666} +02/25/2022 10:29:34 - INFO - codeparrot_training - Step 25396: {'lr': 0.0002598803529901567, 'samples': 13003264, 'steps': 25396, 'loss/train': 1.205527901649475} +02/25/2022 10:29:38 - INFO - codeparrot_training - Step 25397: {'lr': 0.0002598640032908706, 'samples': 13003776, 'steps': 25397, 'loss/train': 1.7367963790893555} +02/25/2022 10:29:43 - INFO - codeparrot_training - Step 25398: {'lr': 0.0002598476535493304, 'samples': 13004288, 'steps': 25398, 'loss/train': 1.991734504699707} +02/25/2022 10:29:47 - INFO - codeparrot_training - Step 25399: {'lr': 0.0002598313037656058, 'samples': 13004800, 'steps': 25399, 'loss/train': 2.408982992172241} +02/25/2022 10:29:53 - INFO - codeparrot_training - Step 25400: {'lr': 0.00025981495393976716, 'samples': 13005312, 'steps': 25400, 'loss/train': 2.187908887863159} +02/25/2022 10:29:57 - INFO - codeparrot_training - Step 25401: {'lr': 0.0002597986040718843, 'samples': 13005824, 'steps': 25401, 'loss/train': 1.5372228622436523} +02/25/2022 10:30:02 - INFO - codeparrot_training - Step 25402: {'lr': 0.0002597822541620274, 'samples': 13006336, 'steps': 25402, 'loss/train': 1.9885507822036743} +02/25/2022 10:30:06 - INFO - codeparrot_training - Step 25403: {'lr': 0.00025976590421026636, 'samples': 13006848, 'steps': 25403, 'loss/train': 2.3181846141815186} +02/25/2022 10:30:11 - INFO - codeparrot_training - Step 25404: {'lr': 0.00025974955421667134, 'samples': 13007360, 'steps': 25404, 'loss/train': 2.372411012649536} +02/25/2022 10:30:15 - INFO - codeparrot_training - Step 25405: {'lr': 0.00025973320418131227, 'samples': 13007872, 'steps': 25405, 'loss/train': 1.2195004224777222} +02/25/2022 10:30:20 - INFO - codeparrot_training - Step 25406: {'lr': 0.00025971685410425933, 'samples': 13008384, 'steps': 25406, 'loss/train': 8.741043090820312} +02/25/2022 10:30:24 - INFO - codeparrot_training - Step 25407: {'lr': 0.00025970050398558235, 'samples': 13008896, 'steps': 25407, 'loss/train': 0.9202667474746704} +02/25/2022 10:30:29 - INFO - codeparrot_training - Step 25408: {'lr': 0.00025968415382535153, 'samples': 13009408, 'steps': 25408, 'loss/train': 2.025219440460205} +02/25/2022 10:30:33 - INFO - codeparrot_training - Step 25409: {'lr': 0.000259667803623637, 'samples': 13009920, 'steps': 25409, 'loss/train': 2.1659293174743652} +02/25/2022 10:30:39 - INFO - codeparrot_training - Step 25410: {'lr': 0.00025965145338050855, 'samples': 13010432, 'steps': 25410, 'loss/train': 2.5328400135040283} +02/25/2022 10:30:43 - INFO - codeparrot_training - Step 25411: {'lr': 0.00025963510309603635, 'samples': 13010944, 'steps': 25411, 'loss/train': 2.10746431350708} +02/25/2022 10:30:48 - INFO - codeparrot_training - Step 25412: {'lr': 0.0002596187527702904, 'samples': 13011456, 'steps': 25412, 'loss/train': 1.6793832778930664} +02/25/2022 10:30:51 - INFO - codeparrot_training - Step 25413: {'lr': 0.0002596024024033408, 'samples': 13011968, 'steps': 25413, 'loss/train': 1.3334057331085205} +02/25/2022 10:30:57 - INFO - codeparrot_training - Step 25414: {'lr': 0.00025958605199525756, 'samples': 13012480, 'steps': 25414, 'loss/train': 1.6516681909561157} +02/25/2022 10:31:01 - INFO - codeparrot_training - Step 25415: {'lr': 0.0002595697015461107, 'samples': 13012992, 'steps': 25415, 'loss/train': 1.3304407596588135} +02/25/2022 10:31:07 - INFO - codeparrot_training - Step 25416: {'lr': 0.00025955335105597036, 'samples': 13013504, 'steps': 25416, 'loss/train': 1.3077303171157837} +02/25/2022 10:31:10 - INFO - codeparrot_training - Step 25417: {'lr': 0.0002595370005249064, 'samples': 13014016, 'steps': 25417, 'loss/train': 2.26249361038208} +02/25/2022 10:31:14 - INFO - codeparrot_training - Step 25418: {'lr': 0.00025952064995298895, 'samples': 13014528, 'steps': 25418, 'loss/train': 0.6702812910079956} +02/25/2022 10:31:19 - INFO - codeparrot_training - Step 25419: {'lr': 0.0002595042993402882, 'samples': 13015040, 'steps': 25419, 'loss/train': 0.7806688547134399} +02/25/2022 10:31:23 - INFO - codeparrot_training - Step 25420: {'lr': 0.0002594879486868739, 'samples': 13015552, 'steps': 25420, 'loss/train': 1.027009129524231} +02/25/2022 10:31:29 - INFO - codeparrot_training - Step 25421: {'lr': 0.00025947159799281623, 'samples': 13016064, 'steps': 25421, 'loss/train': 2.146104335784912} +02/25/2022 10:31:33 - INFO - codeparrot_training - Step 25422: {'lr': 0.0002594552472581853, 'samples': 13016576, 'steps': 25422, 'loss/train': 2.3594746589660645} +02/25/2022 10:31:38 - INFO - codeparrot_training - Step 25423: {'lr': 0.00025943889648305106, 'samples': 13017088, 'steps': 25423, 'loss/train': 2.610564947128296} +02/25/2022 10:31:42 - INFO - codeparrot_training - Step 25424: {'lr': 0.0002594225456674837, 'samples': 13017600, 'steps': 25424, 'loss/train': 0.26570653915405273} +02/25/2022 10:31:47 - INFO - codeparrot_training - Step 25425: {'lr': 0.000259406194811553, 'samples': 13018112, 'steps': 25425, 'loss/train': 1.6636470556259155} +02/25/2022 10:31:51 - INFO - codeparrot_training - Step 25426: {'lr': 0.0002593898439153293, 'samples': 13018624, 'steps': 25426, 'loss/train': 1.9744212627410889} +02/25/2022 10:31:56 - INFO - codeparrot_training - Step 25427: {'lr': 0.00025937349297888235, 'samples': 13019136, 'steps': 25427, 'loss/train': 1.8284608125686646} +02/25/2022 10:32:00 - INFO - codeparrot_training - Step 25428: {'lr': 0.0002593571420022824, 'samples': 13019648, 'steps': 25428, 'loss/train': 1.9696407318115234} +02/25/2022 10:32:05 - INFO - codeparrot_training - Step 25429: {'lr': 0.0002593407909855994, 'samples': 13020160, 'steps': 25429, 'loss/train': 1.9532917737960815} +02/25/2022 10:32:09 - INFO - codeparrot_training - Step 25430: {'lr': 0.00025932443992890343, 'samples': 13020672, 'steps': 25430, 'loss/train': 0.7508223652839661} +02/25/2022 10:32:16 - INFO - codeparrot_training - Step 25431: {'lr': 0.0002593080888322645, 'samples': 13021184, 'steps': 25431, 'loss/train': 1.1213818788528442} +02/25/2022 10:32:19 - INFO - codeparrot_training - Step 25432: {'lr': 0.00025929173769575266, 'samples': 13021696, 'steps': 25432, 'loss/train': 0.7965890169143677} +02/25/2022 10:32:25 - INFO - codeparrot_training - Step 25433: {'lr': 0.000259275386519438, 'samples': 13022208, 'steps': 25433, 'loss/train': 1.337225317955017} +02/25/2022 10:32:28 - INFO - codeparrot_training - Step 25434: {'lr': 0.0002592590353033905, 'samples': 13022720, 'steps': 25434, 'loss/train': 1.908464789390564} +02/25/2022 10:32:34 - INFO - codeparrot_training - Step 25435: {'lr': 0.0002592426840476803, 'samples': 13023232, 'steps': 25435, 'loss/train': 0.9522150754928589} +02/25/2022 10:32:37 - INFO - codeparrot_training - Step 25436: {'lr': 0.0002592263327523773, 'samples': 13023744, 'steps': 25436, 'loss/train': 2.873217821121216} +02/25/2022 10:32:43 - INFO - codeparrot_training - Step 25437: {'lr': 0.0002592099814175517, 'samples': 13024256, 'steps': 25437, 'loss/train': 1.4420586824417114} +02/25/2022 10:32:46 - INFO - codeparrot_training - Step 25438: {'lr': 0.00025919363004327337, 'samples': 13024768, 'steps': 25438, 'loss/train': 2.89656138420105} +02/25/2022 10:32:52 - INFO - codeparrot_training - Step 25439: {'lr': 0.0002591772786296125, 'samples': 13025280, 'steps': 25439, 'loss/train': 1.6785086393356323} +02/25/2022 10:32:55 - INFO - codeparrot_training - Step 25440: {'lr': 0.0002591609271766391, 'samples': 13025792, 'steps': 25440, 'loss/train': 1.2279216051101685} +02/25/2022 10:33:01 - INFO - codeparrot_training - Step 25441: {'lr': 0.0002591445756844232, 'samples': 13026304, 'steps': 25441, 'loss/train': 2.342471122741699} +02/25/2022 10:33:04 - INFO - codeparrot_training - Step 25442: {'lr': 0.0002591282241530348, 'samples': 13026816, 'steps': 25442, 'loss/train': 1.7308307886123657} +02/25/2022 10:33:10 - INFO - codeparrot_training - Step 25443: {'lr': 0.000259111872582544, 'samples': 13027328, 'steps': 25443, 'loss/train': 2.144907236099243} +02/25/2022 10:33:13 - INFO - codeparrot_training - Step 25444: {'lr': 0.0002590955209730208, 'samples': 13027840, 'steps': 25444, 'loss/train': 1.9072288274765015} +02/25/2022 10:33:19 - INFO - codeparrot_training - Step 25445: {'lr': 0.0002590791693245353, 'samples': 13028352, 'steps': 25445, 'loss/train': 1.843860149383545} +02/25/2022 10:33:22 - INFO - codeparrot_training - Step 25446: {'lr': 0.00025906281763715753, 'samples': 13028864, 'steps': 25446, 'loss/train': 2.8474838733673096} +02/25/2022 10:33:29 - INFO - codeparrot_training - Step 25447: {'lr': 0.0002590464659109575, 'samples': 13029376, 'steps': 25447, 'loss/train': 2.140130043029785} +02/25/2022 10:33:32 - INFO - codeparrot_training - Step 25448: {'lr': 0.00025903011414600536, 'samples': 13029888, 'steps': 25448, 'loss/train': 1.7169541120529175} +02/25/2022 10:33:38 - INFO - codeparrot_training - Step 25449: {'lr': 0.00025901376234237103, 'samples': 13030400, 'steps': 25449, 'loss/train': 2.234224796295166} +02/25/2022 10:33:41 - INFO - codeparrot_training - Step 25450: {'lr': 0.0002589974105001246, 'samples': 13030912, 'steps': 25450, 'loss/train': 2.063070297241211} +02/25/2022 10:33:47 - INFO - codeparrot_training - Step 25451: {'lr': 0.00025898105861933614, 'samples': 13031424, 'steps': 25451, 'loss/train': 1.272411823272705} +02/25/2022 10:33:50 - INFO - codeparrot_training - Step 25452: {'lr': 0.00025896470670007567, 'samples': 13031936, 'steps': 25452, 'loss/train': 2.5773468017578125} +02/25/2022 10:33:56 - INFO - codeparrot_training - Step 25453: {'lr': 0.0002589483547424132, 'samples': 13032448, 'steps': 25453, 'loss/train': 1.8566616773605347} +02/25/2022 10:33:59 - INFO - codeparrot_training - Step 25454: {'lr': 0.0002589320027464189, 'samples': 13032960, 'steps': 25454, 'loss/train': 0.7591987252235413} +02/25/2022 10:34:05 - INFO - codeparrot_training - Step 25455: {'lr': 0.00025891565071216267, 'samples': 13033472, 'steps': 25455, 'loss/train': 2.1075520515441895} +02/25/2022 10:34:08 - INFO - codeparrot_training - Step 25456: {'lr': 0.0002588992986397146, 'samples': 13033984, 'steps': 25456, 'loss/train': 1.6278977394104004} +02/25/2022 10:34:14 - INFO - codeparrot_training - Step 25457: {'lr': 0.0002588829465291448, 'samples': 13034496, 'steps': 25457, 'loss/train': 2.203155279159546} +02/25/2022 10:34:18 - INFO - codeparrot_training - Step 25458: {'lr': 0.0002588665943805233, 'samples': 13035008, 'steps': 25458, 'loss/train': 3.2736563682556152} +02/25/2022 10:34:23 - INFO - codeparrot_training - Step 25459: {'lr': 0.00025885024219392017, 'samples': 13035520, 'steps': 25459, 'loss/train': 1.5789657831192017} +02/25/2022 10:34:27 - INFO - codeparrot_training - Step 25460: {'lr': 0.00025883388996940533, 'samples': 13036032, 'steps': 25460, 'loss/train': 1.5168770551681519} +02/25/2022 10:34:32 - INFO - codeparrot_training - Step 25461: {'lr': 0.00025881753770704897, 'samples': 13036544, 'steps': 25461, 'loss/train': 1.689923882484436} +02/25/2022 10:34:36 - INFO - codeparrot_training - Step 25462: {'lr': 0.000258801185406921, 'samples': 13037056, 'steps': 25462, 'loss/train': 1.134519100189209} +02/25/2022 10:34:41 - INFO - codeparrot_training - Step 25463: {'lr': 0.0002587848330690916, 'samples': 13037568, 'steps': 25463, 'loss/train': 1.8639558553695679} +02/25/2022 10:34:45 - INFO - codeparrot_training - Step 25464: {'lr': 0.0002587684806936307, 'samples': 13038080, 'steps': 25464, 'loss/train': 1.2560575008392334} +02/25/2022 10:34:50 - INFO - codeparrot_training - Step 25465: {'lr': 0.0002587521282806085, 'samples': 13038592, 'steps': 25465, 'loss/train': 1.5340209007263184} +02/25/2022 10:34:54 - INFO - codeparrot_training - Step 25466: {'lr': 0.00025873577583009495, 'samples': 13039104, 'steps': 25466, 'loss/train': 1.7172353267669678} +02/25/2022 10:35:00 - INFO - codeparrot_training - Step 25467: {'lr': 0.0002587194233421601, 'samples': 13039616, 'steps': 25467, 'loss/train': 0.3781517446041107} +02/25/2022 10:35:04 - INFO - codeparrot_training - Step 25468: {'lr': 0.00025870307081687395, 'samples': 13040128, 'steps': 25468, 'loss/train': 1.8215484619140625} +02/25/2022 10:35:09 - INFO - codeparrot_training - Step 25469: {'lr': 0.0002586867182543066, 'samples': 13040640, 'steps': 25469, 'loss/train': 2.0719873905181885} +02/25/2022 10:35:13 - INFO - codeparrot_training - Step 25470: {'lr': 0.0002586703656545282, 'samples': 13041152, 'steps': 25470, 'loss/train': 2.7393150329589844} +02/25/2022 10:35:18 - INFO - codeparrot_training - Step 25471: {'lr': 0.00025865401301760867, 'samples': 13041664, 'steps': 25471, 'loss/train': 1.7830309867858887} +02/25/2022 10:35:22 - INFO - codeparrot_training - Step 25472: {'lr': 0.00025863766034361815, 'samples': 13042176, 'steps': 25472, 'loss/train': 1.3046330213546753} +02/25/2022 10:35:27 - INFO - codeparrot_training - Step 25473: {'lr': 0.00025862130763262646, 'samples': 13042688, 'steps': 25473, 'loss/train': 1.2730748653411865} +02/25/2022 10:35:31 - INFO - codeparrot_training - Step 25474: {'lr': 0.00025860495488470403, 'samples': 13043200, 'steps': 25474, 'loss/train': 3.5933492183685303} +02/25/2022 10:35:36 - INFO - codeparrot_training - Step 25475: {'lr': 0.00025858860209992057, 'samples': 13043712, 'steps': 25475, 'loss/train': 1.6586380004882812} +02/25/2022 10:35:40 - INFO - codeparrot_training - Step 25476: {'lr': 0.00025857224927834633, 'samples': 13044224, 'steps': 25476, 'loss/train': 1.2413740158081055} +02/25/2022 10:35:45 - INFO - codeparrot_training - Step 25477: {'lr': 0.0002585558964200513, 'samples': 13044736, 'steps': 25477, 'loss/train': 1.6203744411468506} +02/25/2022 10:35:49 - INFO - codeparrot_training - Step 25478: {'lr': 0.0002585395435251055, 'samples': 13045248, 'steps': 25478, 'loss/train': 2.581801652908325} +02/25/2022 10:35:55 - INFO - codeparrot_training - Step 25479: {'lr': 0.000258523190593579, 'samples': 13045760, 'steps': 25479, 'loss/train': 2.717923641204834} +02/25/2022 10:36:00 - INFO - codeparrot_training - Step 25480: {'lr': 0.0002585068376255418, 'samples': 13046272, 'steps': 25480, 'loss/train': 8.688414573669434} +02/25/2022 10:36:04 - INFO - codeparrot_training - Step 25481: {'lr': 0.00025849048462106414, 'samples': 13046784, 'steps': 25481, 'loss/train': 2.695458173751831} +02/25/2022 10:36:09 - INFO - codeparrot_training - Step 25482: {'lr': 0.00025847413158021587, 'samples': 13047296, 'steps': 25482, 'loss/train': 2.860206127166748} +02/25/2022 10:36:13 - INFO - codeparrot_training - Step 25483: {'lr': 0.00025845777850306716, 'samples': 13047808, 'steps': 25483, 'loss/train': 2.300733804702759} +02/25/2022 10:36:18 - INFO - codeparrot_training - Step 25484: {'lr': 0.000258441425389688, 'samples': 13048320, 'steps': 25484, 'loss/train': 2.0404443740844727} +02/25/2022 10:36:22 - INFO - codeparrot_training - Step 25485: {'lr': 0.0002584250722401484, 'samples': 13048832, 'steps': 25485, 'loss/train': 1.8768339157104492} +02/25/2022 10:36:27 - INFO - codeparrot_training - Step 25486: {'lr': 0.00025840871905451847, 'samples': 13049344, 'steps': 25486, 'loss/train': 2.9098217487335205} +02/25/2022 10:36:31 - INFO - codeparrot_training - Step 25487: {'lr': 0.0002583923658328683, 'samples': 13049856, 'steps': 25487, 'loss/train': 1.714423418045044} +02/25/2022 10:36:36 - INFO - codeparrot_training - Step 25488: {'lr': 0.00025837601257526786, 'samples': 13050368, 'steps': 25488, 'loss/train': 1.2151185274124146} +02/25/2022 10:36:40 - INFO - codeparrot_training - Step 25489: {'lr': 0.0002583596592817873, 'samples': 13050880, 'steps': 25489, 'loss/train': 2.2535359859466553} +02/25/2022 10:36:45 - INFO - codeparrot_training - Step 25490: {'lr': 0.00025834330595249654, 'samples': 13051392, 'steps': 25490, 'loss/train': 0.9353277087211609} +02/25/2022 10:36:49 - INFO - codeparrot_training - Step 25491: {'lr': 0.0002583269525874658, 'samples': 13051904, 'steps': 25491, 'loss/train': 1.742711067199707} +02/25/2022 10:36:55 - INFO - codeparrot_training - Step 25492: {'lr': 0.00025831059918676497, 'samples': 13052416, 'steps': 25492, 'loss/train': 1.9470714330673218} +02/25/2022 10:36:59 - INFO - codeparrot_training - Step 25493: {'lr': 0.00025829424575046414, 'samples': 13052928, 'steps': 25493, 'loss/train': 1.2629696130752563} +02/25/2022 10:37:04 - INFO - codeparrot_training - Step 25494: {'lr': 0.0002582778922786335, 'samples': 13053440, 'steps': 25494, 'loss/train': 0.8298726677894592} +02/25/2022 10:37:07 - INFO - codeparrot_training - Step 25495: {'lr': 0.0002582615387713429, 'samples': 13053952, 'steps': 25495, 'loss/train': 2.4662070274353027} +02/25/2022 10:37:13 - INFO - codeparrot_training - Step 25496: {'lr': 0.00025824518522866253, 'samples': 13054464, 'steps': 25496, 'loss/train': 2.401947498321533} +02/25/2022 10:37:16 - INFO - codeparrot_training - Step 25497: {'lr': 0.0002582288316506624, 'samples': 13054976, 'steps': 25497, 'loss/train': 1.985283374786377} +02/25/2022 10:37:22 - INFO - codeparrot_training - Step 25498: {'lr': 0.0002582124780374126, 'samples': 13055488, 'steps': 25498, 'loss/train': 1.9059538841247559} +02/25/2022 10:37:26 - INFO - codeparrot_training - Step 25499: {'lr': 0.00025819612438898314, 'samples': 13056000, 'steps': 25499, 'loss/train': 2.1930618286132812} +02/25/2022 10:37:31 - INFO - codeparrot_training - Step 25500: {'lr': 0.00025817977070544405, 'samples': 13056512, 'steps': 25500, 'loss/train': 2.1014113426208496} +02/25/2022 10:37:34 - INFO - codeparrot_training - Step 25501: {'lr': 0.00025816341698686545, 'samples': 13057024, 'steps': 25501, 'loss/train': 2.0710501670837402} +02/25/2022 10:37:41 - INFO - codeparrot_training - Step 25502: {'lr': 0.0002581470632333173, 'samples': 13057536, 'steps': 25502, 'loss/train': 1.6427345275878906} +02/25/2022 10:37:44 - INFO - codeparrot_training - Step 25503: {'lr': 0.00025813070944486985, 'samples': 13058048, 'steps': 25503, 'loss/train': 2.5381133556365967} +02/25/2022 10:37:50 - INFO - codeparrot_training - Step 25504: {'lr': 0.0002581143556215929, 'samples': 13058560, 'steps': 25504, 'loss/train': 1.2489573955535889} +02/25/2022 10:37:53 - INFO - codeparrot_training - Step 25505: {'lr': 0.0002580980017635567, 'samples': 13059072, 'steps': 25505, 'loss/train': 2.8317482471466064} +02/25/2022 10:37:59 - INFO - codeparrot_training - Step 25506: {'lr': 0.0002580816478708312, 'samples': 13059584, 'steps': 25506, 'loss/train': 3.079897403717041} +02/25/2022 10:38:02 - INFO - codeparrot_training - Step 25507: {'lr': 0.0002580652939434865, 'samples': 13060096, 'steps': 25507, 'loss/train': 2.1846425533294678} +02/25/2022 10:38:08 - INFO - codeparrot_training - Step 25508: {'lr': 0.0002580489399815926, 'samples': 13060608, 'steps': 25508, 'loss/train': 1.4465380907058716} +02/25/2022 10:38:11 - INFO - codeparrot_training - Step 25509: {'lr': 0.00025803258598521966, 'samples': 13061120, 'steps': 25509, 'loss/train': 2.062087059020996} +02/25/2022 10:38:17 - INFO - codeparrot_training - Step 25510: {'lr': 0.0002580162319544376, 'samples': 13061632, 'steps': 25510, 'loss/train': 0.6560931205749512} +02/25/2022 10:38:20 - INFO - codeparrot_training - Step 25511: {'lr': 0.00025799987788931666, 'samples': 13062144, 'steps': 25511, 'loss/train': 2.174177885055542} +02/25/2022 10:38:27 - INFO - codeparrot_training - Step 25512: {'lr': 0.0002579835237899267, 'samples': 13062656, 'steps': 25512, 'loss/train': 2.0699105262756348} +02/25/2022 10:38:30 - INFO - codeparrot_training - Step 25513: {'lr': 0.00025796716965633787, 'samples': 13063168, 'steps': 25513, 'loss/train': 1.134191870689392} +02/25/2022 10:38:36 - INFO - codeparrot_training - Step 25514: {'lr': 0.0002579508154886202, 'samples': 13063680, 'steps': 25514, 'loss/train': 2.9658265113830566} +02/25/2022 10:38:39 - INFO - codeparrot_training - Step 25515: {'lr': 0.00025793446128684377, 'samples': 13064192, 'steps': 25515, 'loss/train': 0.21962374448776245} +02/25/2022 10:38:45 - INFO - codeparrot_training - Step 25516: {'lr': 0.00025791810705107866, 'samples': 13064704, 'steps': 25516, 'loss/train': 0.7573524117469788} +02/25/2022 10:38:48 - INFO - codeparrot_training - Step 25517: {'lr': 0.00025790175278139483, 'samples': 13065216, 'steps': 25517, 'loss/train': 2.088284730911255} +02/25/2022 10:38:54 - INFO - codeparrot_training - Step 25518: {'lr': 0.0002578853984778625, 'samples': 13065728, 'steps': 25518, 'loss/train': 2.1003429889678955} +02/25/2022 10:38:58 - INFO - codeparrot_training - Step 25519: {'lr': 0.0002578690441405515, 'samples': 13066240, 'steps': 25519, 'loss/train': 2.253528356552124} +02/25/2022 10:39:01 - INFO - codeparrot_training - Step 25520: {'lr': 0.00025785268976953206, 'samples': 13066752, 'steps': 25520, 'loss/train': 1.3939143419265747} +02/25/2022 10:39:07 - INFO - codeparrot_training - Step 25521: {'lr': 0.0002578363353648742, 'samples': 13067264, 'steps': 25521, 'loss/train': 0.7397488355636597} +02/25/2022 10:39:11 - INFO - codeparrot_training - Step 25522: {'lr': 0.000257819980926648, 'samples': 13067776, 'steps': 25522, 'loss/train': 2.4867022037506104} +02/25/2022 10:39:16 - INFO - codeparrot_training - Step 25523: {'lr': 0.00025780362645492344, 'samples': 13068288, 'steps': 25523, 'loss/train': 2.077606678009033} +02/25/2022 10:39:20 - INFO - codeparrot_training - Step 25524: {'lr': 0.0002577872719497707, 'samples': 13068800, 'steps': 25524, 'loss/train': 2.550997257232666} +02/25/2022 10:39:26 - INFO - codeparrot_training - Step 25525: {'lr': 0.0002577709174112597, 'samples': 13069312, 'steps': 25525, 'loss/train': 1.8374128341674805} +02/25/2022 10:39:29 - INFO - codeparrot_training - Step 25526: {'lr': 0.0002577545628394606, 'samples': 13069824, 'steps': 25526, 'loss/train': 1.7259000539779663} +02/25/2022 10:39:35 - INFO - codeparrot_training - Step 25527: {'lr': 0.00025773820823444334, 'samples': 13070336, 'steps': 25527, 'loss/train': 2.8765740394592285} +02/25/2022 10:39:38 - INFO - codeparrot_training - Step 25528: {'lr': 0.0002577218535962781, 'samples': 13070848, 'steps': 25528, 'loss/train': 1.0596556663513184} +02/25/2022 10:39:44 - INFO - codeparrot_training - Step 25529: {'lr': 0.00025770549892503486, 'samples': 13071360, 'steps': 25529, 'loss/train': 1.5723588466644287} +02/25/2022 10:39:47 - INFO - codeparrot_training - Step 25530: {'lr': 0.0002576891442207837, 'samples': 13071872, 'steps': 25530, 'loss/train': 3.7963387966156006} +02/25/2022 10:39:53 - INFO - codeparrot_training - Step 25531: {'lr': 0.00025767278948359473, 'samples': 13072384, 'steps': 25531, 'loss/train': 1.7266310453414917} +02/25/2022 10:39:56 - INFO - codeparrot_training - Step 25532: {'lr': 0.00025765643471353794, 'samples': 13072896, 'steps': 25532, 'loss/train': 1.6487746238708496} +02/25/2022 10:40:02 - INFO - codeparrot_training - Step 25533: {'lr': 0.00025764007991068344, 'samples': 13073408, 'steps': 25533, 'loss/train': 1.7094881534576416} +02/25/2022 10:40:05 - INFO - codeparrot_training - Step 25534: {'lr': 0.0002576237250751012, 'samples': 13073920, 'steps': 25534, 'loss/train': 0.9303370714187622} +02/25/2022 10:40:11 - INFO - codeparrot_training - Step 25535: {'lr': 0.0002576073702068615, 'samples': 13074432, 'steps': 25535, 'loss/train': 1.7391544580459595} +02/25/2022 10:40:15 - INFO - codeparrot_training - Step 25536: {'lr': 0.00025759101530603405, 'samples': 13074944, 'steps': 25536, 'loss/train': 2.6711604595184326} +02/25/2022 10:40:20 - INFO - codeparrot_training - Step 25537: {'lr': 0.00025757466037268925, 'samples': 13075456, 'steps': 25537, 'loss/train': 1.4803251028060913} +02/25/2022 10:40:24 - INFO - codeparrot_training - Step 25538: {'lr': 0.0002575583054068969, 'samples': 13075968, 'steps': 25538, 'loss/train': 1.5431030988693237} +02/25/2022 10:40:29 - INFO - codeparrot_training - Step 25539: {'lr': 0.00025754195040872727, 'samples': 13076480, 'steps': 25539, 'loss/train': 2.096067190170288} +02/25/2022 10:40:33 - INFO - codeparrot_training - Step 25540: {'lr': 0.00025752559537825027, 'samples': 13076992, 'steps': 25540, 'loss/train': 2.3013758659362793} +02/25/2022 10:40:38 - INFO - codeparrot_training - Step 25541: {'lr': 0.00025750924031553603, 'samples': 13077504, 'steps': 25541, 'loss/train': 2.5413990020751953} +02/25/2022 10:40:42 - INFO - codeparrot_training - Step 25542: {'lr': 0.0002574928852206545, 'samples': 13078016, 'steps': 25542, 'loss/train': 3.4210944175720215} +02/25/2022 10:40:48 - INFO - codeparrot_training - Step 25543: {'lr': 0.00025747653009367596, 'samples': 13078528, 'steps': 25543, 'loss/train': 2.311305284500122} +02/25/2022 10:40:51 - INFO - codeparrot_training - Step 25544: {'lr': 0.0002574601749346702, 'samples': 13079040, 'steps': 25544, 'loss/train': 2.3376216888427734} +02/25/2022 10:40:57 - INFO - codeparrot_training - Step 25545: {'lr': 0.0002574438197437076, 'samples': 13079552, 'steps': 25545, 'loss/train': 2.332993268966675} +02/25/2022 10:41:00 - INFO - codeparrot_training - Step 25546: {'lr': 0.00025742746452085794, 'samples': 13080064, 'steps': 25546, 'loss/train': 0.3118111491203308} +02/25/2022 10:41:06 - INFO - codeparrot_training - Step 25547: {'lr': 0.00025741110926619134, 'samples': 13080576, 'steps': 25547, 'loss/train': 2.3225464820861816} +02/25/2022 10:41:09 - INFO - codeparrot_training - Step 25548: {'lr': 0.00025739475397977795, 'samples': 13081088, 'steps': 25548, 'loss/train': 2.4193713665008545} +02/25/2022 10:41:15 - INFO - codeparrot_training - Step 25549: {'lr': 0.0002573783986616877, 'samples': 13081600, 'steps': 25549, 'loss/train': 1.776007056236267} +02/25/2022 10:41:18 - INFO - codeparrot_training - Step 25550: {'lr': 0.00025736204331199084, 'samples': 13082112, 'steps': 25550, 'loss/train': 2.2091004848480225} +02/25/2022 10:41:24 - INFO - codeparrot_training - Step 25551: {'lr': 0.00025734568793075725, 'samples': 13082624, 'steps': 25551, 'loss/train': 1.9936878681182861} +02/25/2022 10:41:28 - INFO - codeparrot_training - Step 25552: {'lr': 0.00025732933251805713, 'samples': 13083136, 'steps': 25552, 'loss/train': 2.126516342163086} +02/25/2022 10:41:33 - INFO - codeparrot_training - Step 25553: {'lr': 0.00025731297707396047, 'samples': 13083648, 'steps': 25553, 'loss/train': 1.8014073371887207} +02/25/2022 10:41:37 - INFO - codeparrot_training - Step 25554: {'lr': 0.00025729662159853725, 'samples': 13084160, 'steps': 25554, 'loss/train': 2.103489398956299} +02/25/2022 10:41:42 - INFO - codeparrot_training - Step 25555: {'lr': 0.0002572802660918577, 'samples': 13084672, 'steps': 25555, 'loss/train': 1.1406726837158203} +02/25/2022 10:41:46 - INFO - codeparrot_training - Step 25556: {'lr': 0.0002572639105539918, 'samples': 13085184, 'steps': 25556, 'loss/train': 1.2123444080352783} +02/25/2022 10:41:51 - INFO - codeparrot_training - Step 25557: {'lr': 0.0002572475549850096, 'samples': 13085696, 'steps': 25557, 'loss/train': 1.4706628322601318} +02/25/2022 10:41:55 - INFO - codeparrot_training - Step 25558: {'lr': 0.00025723119938498115, 'samples': 13086208, 'steps': 25558, 'loss/train': 1.7352365255355835} +02/25/2022 10:42:00 - INFO - codeparrot_training - Step 25559: {'lr': 0.0002572148437539766, 'samples': 13086720, 'steps': 25559, 'loss/train': 0.4805527329444885} +02/25/2022 10:42:04 - INFO - codeparrot_training - Step 25560: {'lr': 0.0002571984880920659, 'samples': 13087232, 'steps': 25560, 'loss/train': 2.853137493133545} +02/25/2022 10:42:10 - INFO - codeparrot_training - Step 25561: {'lr': 0.0002571821323993192, 'samples': 13087744, 'steps': 25561, 'loss/train': 2.0273165702819824} +02/25/2022 10:42:13 - INFO - codeparrot_training - Step 25562: {'lr': 0.00025716577667580647, 'samples': 13088256, 'steps': 25562, 'loss/train': 1.693200945854187} +02/25/2022 10:42:19 - INFO - codeparrot_training - Step 25563: {'lr': 0.0002571494209215979, 'samples': 13088768, 'steps': 25563, 'loss/train': 1.1780664920806885} +02/25/2022 10:42:22 - INFO - codeparrot_training - Step 25564: {'lr': 0.0002571330651367634, 'samples': 13089280, 'steps': 25564, 'loss/train': 2.799821376800537} +02/25/2022 10:42:28 - INFO - codeparrot_training - Step 25565: {'lr': 0.0002571167093213731, 'samples': 13089792, 'steps': 25565, 'loss/train': 1.9448604583740234} +02/25/2022 10:42:31 - INFO - codeparrot_training - Step 25566: {'lr': 0.0002571003534754972, 'samples': 13090304, 'steps': 25566, 'loss/train': 0.9562610387802124} +02/25/2022 10:42:37 - INFO - codeparrot_training - Step 25567: {'lr': 0.0002570839975992055, 'samples': 13090816, 'steps': 25567, 'loss/train': 2.371833562850952} +02/25/2022 10:42:40 - INFO - codeparrot_training - Step 25568: {'lr': 0.00025706764169256837, 'samples': 13091328, 'steps': 25568, 'loss/train': 2.011049270629883} +02/25/2022 10:42:46 - INFO - codeparrot_training - Step 25569: {'lr': 0.0002570512857556556, 'samples': 13091840, 'steps': 25569, 'loss/train': 0.7666170597076416} +02/25/2022 10:42:49 - INFO - codeparrot_training - Step 25570: {'lr': 0.0002570349297885374, 'samples': 13092352, 'steps': 25570, 'loss/train': 1.8144721984863281} +02/25/2022 10:42:56 - INFO - codeparrot_training - Step 25571: {'lr': 0.00025701857379128366, 'samples': 13092864, 'steps': 25571, 'loss/train': 1.5858922004699707} +02/25/2022 10:42:59 - INFO - codeparrot_training - Step 25572: {'lr': 0.0002570022177639648, 'samples': 13093376, 'steps': 25572, 'loss/train': 2.163553476333618} +02/25/2022 10:43:05 - INFO - codeparrot_training - Step 25573: {'lr': 0.0002569858617066505, 'samples': 13093888, 'steps': 25573, 'loss/train': 2.0017151832580566} +02/25/2022 10:43:08 - INFO - codeparrot_training - Step 25574: {'lr': 0.00025696950561941104, 'samples': 13094400, 'steps': 25574, 'loss/train': 2.418836832046509} +02/25/2022 10:43:14 - INFO - codeparrot_training - Step 25575: {'lr': 0.00025695314950231643, 'samples': 13094912, 'steps': 25575, 'loss/train': 2.1170639991760254} +02/25/2022 10:43:17 - INFO - codeparrot_training - Step 25576: {'lr': 0.0002569367933554367, 'samples': 13095424, 'steps': 25576, 'loss/train': 1.3791933059692383} +02/25/2022 10:43:23 - INFO - codeparrot_training - Step 25577: {'lr': 0.000256920437178842, 'samples': 13095936, 'steps': 25577, 'loss/train': 2.83950138092041} +02/25/2022 10:43:26 - INFO - codeparrot_training - Step 25578: {'lr': 0.00025690408097260234, 'samples': 13096448, 'steps': 25578, 'loss/train': 1.680790901184082} +02/25/2022 10:43:32 - INFO - codeparrot_training - Step 25579: {'lr': 0.0002568877247367878, 'samples': 13096960, 'steps': 25579, 'loss/train': 2.302349090576172} +02/25/2022 10:43:35 - INFO - codeparrot_training - Step 25580: {'lr': 0.0002568713684714684, 'samples': 13097472, 'steps': 25580, 'loss/train': 2.396230459213257} +02/25/2022 10:43:41 - INFO - codeparrot_training - Step 25581: {'lr': 0.00025685501217671423, 'samples': 13097984, 'steps': 25581, 'loss/train': 2.152153253555298} +02/25/2022 10:43:44 - INFO - codeparrot_training - Step 25582: {'lr': 0.00025683865585259533, 'samples': 13098496, 'steps': 25582, 'loss/train': 2.87414288520813} +02/25/2022 10:43:50 - INFO - codeparrot_training - Step 25583: {'lr': 0.0002568222994991819, 'samples': 13099008, 'steps': 25583, 'loss/train': 0.6613590121269226} +02/25/2022 10:43:53 - INFO - codeparrot_training - Step 25584: {'lr': 0.0002568059431165438, 'samples': 13099520, 'steps': 25584, 'loss/train': 0.7476187348365784} +02/25/2022 10:43:59 - INFO - codeparrot_training - Step 25585: {'lr': 0.00025678958670475135, 'samples': 13100032, 'steps': 25585, 'loss/train': 1.3253560066223145} +02/25/2022 10:44:02 - INFO - codeparrot_training - Step 25586: {'lr': 0.0002567732302638744, 'samples': 13100544, 'steps': 25586, 'loss/train': 1.8754878044128418} +02/25/2022 10:44:09 - INFO - codeparrot_training - Step 25587: {'lr': 0.00025675687379398304, 'samples': 13101056, 'steps': 25587, 'loss/train': 0.08983694016933441} +02/25/2022 10:44:12 - INFO - codeparrot_training - Step 25588: {'lr': 0.0002567405172951474, 'samples': 13101568, 'steps': 25588, 'loss/train': 1.6174893379211426} +02/25/2022 10:44:18 - INFO - codeparrot_training - Step 25589: {'lr': 0.0002567241607674375, 'samples': 13102080, 'steps': 25589, 'loss/train': 0.8111506104469299} +02/25/2022 10:44:21 - INFO - codeparrot_training - Step 25590: {'lr': 0.0002567078042109235, 'samples': 13102592, 'steps': 25590, 'loss/train': 1.9698349237442017} +02/25/2022 10:44:27 - INFO - codeparrot_training - Step 25591: {'lr': 0.0002566914476256753, 'samples': 13103104, 'steps': 25591, 'loss/train': 0.5227755308151245} +02/25/2022 10:44:30 - INFO - codeparrot_training - Step 25592: {'lr': 0.00025667509101176317, 'samples': 13103616, 'steps': 25592, 'loss/train': 1.7507622241973877} +02/25/2022 10:44:36 - INFO - codeparrot_training - Step 25593: {'lr': 0.00025665873436925697, 'samples': 13104128, 'steps': 25593, 'loss/train': 2.006484270095825} +02/25/2022 10:44:39 - INFO - codeparrot_training - Step 25594: {'lr': 0.000256642377698227, 'samples': 13104640, 'steps': 25594, 'loss/train': 1.7868516445159912} +02/25/2022 10:44:45 - INFO - codeparrot_training - Step 25595: {'lr': 0.000256626020998743, 'samples': 13105152, 'steps': 25595, 'loss/train': 2.7415452003479004} +02/25/2022 10:44:48 - INFO - codeparrot_training - Step 25596: {'lr': 0.0002566096642708754, 'samples': 13105664, 'steps': 25596, 'loss/train': 2.0796501636505127} +02/25/2022 10:44:55 - INFO - codeparrot_training - Step 25597: {'lr': 0.00025659330751469394, 'samples': 13106176, 'steps': 25597, 'loss/train': 2.1918537616729736} +02/25/2022 10:44:58 - INFO - codeparrot_training - Step 25598: {'lr': 0.000256576950730269, 'samples': 13106688, 'steps': 25598, 'loss/train': 1.1648391485214233} +02/25/2022 10:45:04 - INFO - codeparrot_training - Step 25599: {'lr': 0.0002565605939176704, 'samples': 13107200, 'steps': 25599, 'loss/train': 1.7362256050109863} +02/25/2022 10:45:07 - INFO - codeparrot_training - Step 25600: {'lr': 0.00025654423707696834, 'samples': 13107712, 'steps': 25600, 'loss/train': 2.187987804412842} +02/25/2022 10:45:13 - INFO - codeparrot_training - Step 25601: {'lr': 0.0002565278802082328, 'samples': 13108224, 'steps': 25601, 'loss/train': 1.9063791036605835} +02/25/2022 10:45:16 - INFO - codeparrot_training - Step 25602: {'lr': 0.00025651152331153393, 'samples': 13108736, 'steps': 25602, 'loss/train': 1.6950427293777466} +02/25/2022 10:45:22 - INFO - codeparrot_training - Step 25603: {'lr': 0.00025649516638694174, 'samples': 13109248, 'steps': 25603, 'loss/train': 2.131453037261963} +02/25/2022 10:45:25 - INFO - codeparrot_training - Step 25604: {'lr': 0.00025647880943452633, 'samples': 13109760, 'steps': 25604, 'loss/train': 2.075115919113159} +02/25/2022 10:45:32 - INFO - codeparrot_training - Step 25605: {'lr': 0.0002564624524543578, 'samples': 13110272, 'steps': 25605, 'loss/train': 1.14596426486969} +02/25/2022 10:45:35 - INFO - codeparrot_training - Step 25606: {'lr': 0.0002564460954465061, 'samples': 13110784, 'steps': 25606, 'loss/train': 3.5504369735717773} +02/25/2022 10:45:41 - INFO - codeparrot_training - Step 25607: {'lr': 0.0002564297384110414, 'samples': 13111296, 'steps': 25607, 'loss/train': 2.123778820037842} +02/25/2022 10:45:45 - INFO - codeparrot_training - Step 25608: {'lr': 0.00025641338134803376, 'samples': 13111808, 'steps': 25608, 'loss/train': 1.2274852991104126} +02/25/2022 10:45:50 - INFO - codeparrot_training - Step 25609: {'lr': 0.0002563970242575533, 'samples': 13112320, 'steps': 25609, 'loss/train': 1.6785922050476074} +02/25/2022 10:45:54 - INFO - codeparrot_training - Step 25610: {'lr': 0.0002563806671396699, 'samples': 13112832, 'steps': 25610, 'loss/train': 1.8916770219802856} +02/25/2022 10:45:59 - INFO - codeparrot_training - Step 25611: {'lr': 0.0002563643099944538, 'samples': 13113344, 'steps': 25611, 'loss/train': 1.98416006565094} +02/25/2022 10:46:03 - INFO - codeparrot_training - Step 25612: {'lr': 0.000256347952821975, 'samples': 13113856, 'steps': 25612, 'loss/train': 0.3088620603084564} +02/25/2022 10:46:08 - INFO - codeparrot_training - Step 25613: {'lr': 0.0002563315956223036, 'samples': 13114368, 'steps': 25613, 'loss/train': 1.8856010437011719} +02/25/2022 10:46:12 - INFO - codeparrot_training - Step 25614: {'lr': 0.0002563152383955096, 'samples': 13114880, 'steps': 25614, 'loss/train': 2.6615824699401855} +02/25/2022 10:46:17 - INFO - codeparrot_training - Step 25615: {'lr': 0.0002562988811416632, 'samples': 13115392, 'steps': 25615, 'loss/train': 1.9493383169174194} +02/25/2022 10:46:21 - INFO - codeparrot_training - Step 25616: {'lr': 0.0002562825238608344, 'samples': 13115904, 'steps': 25616, 'loss/train': 1.2353770732879639} +02/25/2022 10:46:27 - INFO - codeparrot_training - Step 25617: {'lr': 0.0002562661665530932, 'samples': 13116416, 'steps': 25617, 'loss/train': 1.5191400051116943} +02/25/2022 10:46:30 - INFO - codeparrot_training - Step 25618: {'lr': 0.00025624980921850975, 'samples': 13116928, 'steps': 25618, 'loss/train': 1.785825490951538} +02/25/2022 10:46:36 - INFO - codeparrot_training - Step 25619: {'lr': 0.00025623345185715415, 'samples': 13117440, 'steps': 25619, 'loss/train': 1.1005932092666626} +02/25/2022 10:46:39 - INFO - codeparrot_training - Step 25620: {'lr': 0.00025621709446909644, 'samples': 13117952, 'steps': 25620, 'loss/train': 2.3417887687683105} +02/25/2022 10:46:45 - INFO - codeparrot_training - Step 25621: {'lr': 0.00025620073705440654, 'samples': 13118464, 'steps': 25621, 'loss/train': 2.985455274581909} +02/25/2022 10:46:48 - INFO - codeparrot_training - Step 25622: {'lr': 0.0002561843796131548, 'samples': 13118976, 'steps': 25622, 'loss/train': 2.6752543449401855} +02/25/2022 10:46:54 - INFO - codeparrot_training - Step 25623: {'lr': 0.000256168022145411, 'samples': 13119488, 'steps': 25623, 'loss/train': 1.1425602436065674} +02/25/2022 10:46:58 - INFO - codeparrot_training - Step 25624: {'lr': 0.0002561516646512454, 'samples': 13120000, 'steps': 25624, 'loss/train': 1.9599618911743164} +02/25/2022 10:47:03 - INFO - codeparrot_training - Step 25625: {'lr': 0.0002561353071307281, 'samples': 13120512, 'steps': 25625, 'loss/train': 1.5613296031951904} +02/25/2022 10:47:07 - INFO - codeparrot_training - Step 25626: {'lr': 0.000256118949583929, 'samples': 13121024, 'steps': 25626, 'loss/train': 1.5623118877410889} +02/25/2022 10:47:12 - INFO - codeparrot_training - Step 25627: {'lr': 0.0002561025920109183, 'samples': 13121536, 'steps': 25627, 'loss/train': 2.1120214462280273} +02/25/2022 10:47:16 - INFO - codeparrot_training - Step 25628: {'lr': 0.0002560862344117661, 'samples': 13122048, 'steps': 25628, 'loss/train': 2.0736422538757324} +02/25/2022 10:47:21 - INFO - codeparrot_training - Step 25629: {'lr': 0.0002560698767865423, 'samples': 13122560, 'steps': 25629, 'loss/train': 2.025491237640381} +02/25/2022 10:47:25 - INFO - codeparrot_training - Step 25630: {'lr': 0.0002560535191353171, 'samples': 13123072, 'steps': 25630, 'loss/train': 3.3473308086395264} +02/25/2022 10:47:30 - INFO - codeparrot_training - Step 25631: {'lr': 0.00025603716145816056, 'samples': 13123584, 'steps': 25631, 'loss/train': 1.3574202060699463} +02/25/2022 10:47:34 - INFO - codeparrot_training - Step 25632: {'lr': 0.00025602080375514277, 'samples': 13124096, 'steps': 25632, 'loss/train': 0.2443811148405075} +02/25/2022 10:47:40 - INFO - codeparrot_training - Step 25633: {'lr': 0.0002560044460263337, 'samples': 13124608, 'steps': 25633, 'loss/train': 1.4887213706970215} +02/25/2022 10:47:44 - INFO - codeparrot_training - Step 25634: {'lr': 0.0002559880882718035, 'samples': 13125120, 'steps': 25634, 'loss/train': 1.1063514947891235} +02/25/2022 10:47:49 - INFO - codeparrot_training - Step 25635: {'lr': 0.00025597173049162234, 'samples': 13125632, 'steps': 25635, 'loss/train': 1.3391457796096802} +02/25/2022 10:47:52 - INFO - codeparrot_training - Step 25636: {'lr': 0.0002559553726858601, 'samples': 13126144, 'steps': 25636, 'loss/train': 2.2346572875976562} +02/25/2022 10:47:58 - INFO - codeparrot_training - Step 25637: {'lr': 0.0002559390148545869, 'samples': 13126656, 'steps': 25637, 'loss/train': 1.7385978698730469} +02/25/2022 10:48:01 - INFO - codeparrot_training - Step 25638: {'lr': 0.0002559226569978729, 'samples': 13127168, 'steps': 25638, 'loss/train': 1.7197811603546143} +02/25/2022 10:48:07 - INFO - codeparrot_training - Step 25639: {'lr': 0.00025590629911578805, 'samples': 13127680, 'steps': 25639, 'loss/train': 0.9838075041770935} +02/25/2022 10:48:10 - INFO - codeparrot_training - Step 25640: {'lr': 0.0002558899412084026, 'samples': 13128192, 'steps': 25640, 'loss/train': 0.9118172526359558} +02/25/2022 10:48:16 - INFO - codeparrot_training - Step 25641: {'lr': 0.0002558735832757864, 'samples': 13128704, 'steps': 25641, 'loss/train': 0.9940716028213501} +02/25/2022 10:48:19 - INFO - codeparrot_training - Step 25642: {'lr': 0.0002558572253180097, 'samples': 13129216, 'steps': 25642, 'loss/train': 1.4485918283462524} +02/25/2022 10:48:25 - INFO - codeparrot_training - Step 25643: {'lr': 0.0002558408673351425, 'samples': 13129728, 'steps': 25643, 'loss/train': 2.0387818813323975} +02/25/2022 10:48:29 - INFO - codeparrot_training - Step 25644: {'lr': 0.0002558245093272549, 'samples': 13130240, 'steps': 25644, 'loss/train': 1.5015616416931152} +02/25/2022 10:48:35 - INFO - codeparrot_training - Step 25645: {'lr': 0.0002558081512944169, 'samples': 13130752, 'steps': 25645, 'loss/train': 1.6553453207015991} +02/25/2022 10:48:38 - INFO - codeparrot_training - Step 25646: {'lr': 0.00025579179323669866, 'samples': 13131264, 'steps': 25646, 'loss/train': 1.626403570175171} +02/25/2022 10:48:42 - INFO - codeparrot_training - Step 25647: {'lr': 0.00025577543515417015, 'samples': 13131776, 'steps': 25647, 'loss/train': 1.8631216287612915} +02/25/2022 10:48:47 - INFO - codeparrot_training - Step 25648: {'lr': 0.0002557590770469016, 'samples': 13132288, 'steps': 25648, 'loss/train': 1.6985031366348267} +02/25/2022 10:48:51 - INFO - codeparrot_training - Step 25649: {'lr': 0.0002557427189149629, 'samples': 13132800, 'steps': 25649, 'loss/train': 2.2670018672943115} +02/25/2022 10:48:56 - INFO - codeparrot_training - Step 25650: {'lr': 0.0002557263607584243, 'samples': 13133312, 'steps': 25650, 'loss/train': 1.4436798095703125} +02/25/2022 10:49:00 - INFO - codeparrot_training - Step 25651: {'lr': 0.0002557100025773558, 'samples': 13133824, 'steps': 25651, 'loss/train': 2.4478187561035156} +02/25/2022 10:49:05 - INFO - codeparrot_training - Step 25652: {'lr': 0.00025569364437182736, 'samples': 13134336, 'steps': 25652, 'loss/train': 1.841557502746582} +02/25/2022 10:49:09 - INFO - codeparrot_training - Step 25653: {'lr': 0.00025567728614190926, 'samples': 13134848, 'steps': 25653, 'loss/train': 1.109722375869751} +02/25/2022 10:49:15 - INFO - codeparrot_training - Step 25654: {'lr': 0.0002556609278876714, 'samples': 13135360, 'steps': 25654, 'loss/train': 1.647121548652649} +02/25/2022 10:49:19 - INFO - codeparrot_training - Step 25655: {'lr': 0.00025564456960918395, 'samples': 13135872, 'steps': 25655, 'loss/train': 1.0943329334259033} +02/25/2022 10:49:24 - INFO - codeparrot_training - Step 25656: {'lr': 0.0002556282113065169, 'samples': 13136384, 'steps': 25656, 'loss/train': 1.7512942552566528} +02/25/2022 10:49:30 - INFO - codeparrot_training - Step 25657: {'lr': 0.0002556118529797405, 'samples': 13136896, 'steps': 25657, 'loss/train': 0.0709896832704544} +02/25/2022 10:49:33 - INFO - codeparrot_training - Step 25658: {'lr': 0.00025559549462892463, 'samples': 13137408, 'steps': 25658, 'loss/train': 1.7430973052978516} +02/25/2022 10:49:39 - INFO - codeparrot_training - Step 25659: {'lr': 0.00025557913625413945, 'samples': 13137920, 'steps': 25659, 'loss/train': 1.395140528678894} +02/25/2022 10:49:42 - INFO - codeparrot_training - Step 25660: {'lr': 0.000255562777855455, 'samples': 13138432, 'steps': 25660, 'loss/train': 3.4155142307281494} +02/25/2022 10:49:48 - INFO - codeparrot_training - Step 25661: {'lr': 0.00025554641943294145, 'samples': 13138944, 'steps': 25661, 'loss/train': 1.91545832157135} +02/25/2022 10:49:51 - INFO - codeparrot_training - Step 25662: {'lr': 0.0002555300609866687, 'samples': 13139456, 'steps': 25662, 'loss/train': 1.1756900548934937} +02/25/2022 10:49:57 - INFO - codeparrot_training - Step 25663: {'lr': 0.00025551370251670694, 'samples': 13139968, 'steps': 25663, 'loss/train': 1.3493777513504028} +02/25/2022 10:50:01 - INFO - codeparrot_training - Step 25664: {'lr': 0.0002554973440231263, 'samples': 13140480, 'steps': 25664, 'loss/train': 2.425305128097534} +02/25/2022 10:50:07 - INFO - codeparrot_training - Step 25665: {'lr': 0.0002554809855059967, 'samples': 13140992, 'steps': 25665, 'loss/train': 1.7686724662780762} +02/25/2022 10:50:10 - INFO - codeparrot_training - Step 25666: {'lr': 0.00025546462696538836, 'samples': 13141504, 'steps': 25666, 'loss/train': 3.03287672996521} +02/25/2022 10:50:15 - INFO - codeparrot_training - Step 25667: {'lr': 0.00025544826840137125, 'samples': 13142016, 'steps': 25667, 'loss/train': 2.772830009460449} +02/25/2022 10:50:19 - INFO - codeparrot_training - Step 25668: {'lr': 0.00025543190981401555, 'samples': 13142528, 'steps': 25668, 'loss/train': 0.9455603361129761} +02/25/2022 10:50:24 - INFO - codeparrot_training - Step 25669: {'lr': 0.0002554155512033912, 'samples': 13143040, 'steps': 25669, 'loss/train': 1.3380768299102783} +02/25/2022 10:50:28 - INFO - codeparrot_training - Step 25670: {'lr': 0.00025539919256956843, 'samples': 13143552, 'steps': 25670, 'loss/train': 1.609128713607788} +02/25/2022 10:50:34 - INFO - codeparrot_training - Step 25671: {'lr': 0.00025538283391261714, 'samples': 13144064, 'steps': 25671, 'loss/train': 1.4452869892120361} +02/25/2022 10:50:37 - INFO - codeparrot_training - Step 25672: {'lr': 0.0002553664752326076, 'samples': 13144576, 'steps': 25672, 'loss/train': 2.1352651119232178} +02/25/2022 10:50:42 - INFO - codeparrot_training - Step 25673: {'lr': 0.0002553501165296097, 'samples': 13145088, 'steps': 25673, 'loss/train': 2.2420411109924316} +02/25/2022 10:50:46 - INFO - codeparrot_training - Step 25674: {'lr': 0.00025533375780369366, 'samples': 13145600, 'steps': 25674, 'loss/train': 1.3607232570648193} +02/25/2022 10:50:52 - INFO - codeparrot_training - Step 25675: {'lr': 0.00025531739905492953, 'samples': 13146112, 'steps': 25675, 'loss/train': 1.0559757947921753} +02/25/2022 10:50:55 - INFO - codeparrot_training - Step 25676: {'lr': 0.0002553010402833872, 'samples': 13146624, 'steps': 25676, 'loss/train': 0.0966961681842804} +02/25/2022 10:51:01 - INFO - codeparrot_training - Step 25677: {'lr': 0.00025528468148913703, 'samples': 13147136, 'steps': 25677, 'loss/train': 2.1042630672454834} +02/25/2022 10:51:04 - INFO - codeparrot_training - Step 25678: {'lr': 0.00025526832267224885, 'samples': 13147648, 'steps': 25678, 'loss/train': 1.3894081115722656} +02/25/2022 10:51:10 - INFO - codeparrot_training - Step 25679: {'lr': 0.000255251963832793, 'samples': 13148160, 'steps': 25679, 'loss/train': 2.479046106338501} +02/25/2022 10:51:13 - INFO - codeparrot_training - Step 25680: {'lr': 0.00025523560497083924, 'samples': 13148672, 'steps': 25680, 'loss/train': 1.7690578699111938} +02/25/2022 10:51:20 - INFO - codeparrot_training - Step 25681: {'lr': 0.00025521924608645796, 'samples': 13149184, 'steps': 25681, 'loss/train': 2.472615957260132} +02/25/2022 10:51:23 - INFO - codeparrot_training - Step 25682: {'lr': 0.00025520288717971896, 'samples': 13149696, 'steps': 25682, 'loss/train': 1.6594858169555664} +02/25/2022 10:51:29 - INFO - codeparrot_training - Step 25683: {'lr': 0.00025518652825069256, 'samples': 13150208, 'steps': 25683, 'loss/train': 2.097919464111328} +02/25/2022 10:51:32 - INFO - codeparrot_training - Step 25684: {'lr': 0.00025517016929944863, 'samples': 13150720, 'steps': 25684, 'loss/train': 1.5367844104766846} +02/25/2022 10:51:39 - INFO - codeparrot_training - Step 25685: {'lr': 0.0002551538103260574, 'samples': 13151232, 'steps': 25685, 'loss/train': 1.544798493385315} +02/25/2022 10:51:42 - INFO - codeparrot_training - Step 25686: {'lr': 0.0002551374513305888, 'samples': 13151744, 'steps': 25686, 'loss/train': 1.5396851301193237} +02/25/2022 10:51:45 - INFO - codeparrot_training - Step 25687: {'lr': 0.000255121092313113, 'samples': 13152256, 'steps': 25687, 'loss/train': 0.9987378716468811} +02/25/2022 10:51:51 - INFO - codeparrot_training - Step 25688: {'lr': 0.00025510473327370014, 'samples': 13152768, 'steps': 25688, 'loss/train': 1.808637022972107} +02/25/2022 10:51:55 - INFO - codeparrot_training - Step 25689: {'lr': 0.00025508837421242016, 'samples': 13153280, 'steps': 25689, 'loss/train': 0.5900411605834961} +02/25/2022 10:52:01 - INFO - codeparrot_training - Step 25690: {'lr': 0.00025507201512934327, 'samples': 13153792, 'steps': 25690, 'loss/train': 1.537652611732483} +02/25/2022 10:52:05 - INFO - codeparrot_training - Step 25691: {'lr': 0.0002550556560245394, 'samples': 13154304, 'steps': 25691, 'loss/train': 1.1949650049209595} +02/25/2022 10:52:10 - INFO - codeparrot_training - Step 25692: {'lr': 0.0002550392968980788, 'samples': 13154816, 'steps': 25692, 'loss/train': 1.3613845109939575} +02/25/2022 10:52:14 - INFO - codeparrot_training - Step 25693: {'lr': 0.0002550229377500313, 'samples': 13155328, 'steps': 25693, 'loss/train': 2.4665143489837646} +02/25/2022 10:52:20 - INFO - codeparrot_training - Step 25694: {'lr': 0.00025500657858046734, 'samples': 13155840, 'steps': 25694, 'loss/train': 2.102137804031372} +02/25/2022 10:52:23 - INFO - codeparrot_training - Step 25695: {'lr': 0.0002549902193894566, 'samples': 13156352, 'steps': 25695, 'loss/train': 1.3893893957138062} +02/25/2022 10:52:29 - INFO - codeparrot_training - Step 25696: {'lr': 0.00025497386017706947, 'samples': 13156864, 'steps': 25696, 'loss/train': 1.0654795169830322} +02/25/2022 10:52:32 - INFO - codeparrot_training - Step 25697: {'lr': 0.00025495750094337586, 'samples': 13157376, 'steps': 25697, 'loss/train': 2.170686721801758} +02/25/2022 10:52:38 - INFO - codeparrot_training - Step 25698: {'lr': 0.00025494114168844576, 'samples': 13157888, 'steps': 25698, 'loss/train': 2.075395345687866} +02/25/2022 10:52:41 - INFO - codeparrot_training - Step 25699: {'lr': 0.00025492478241234955, 'samples': 13158400, 'steps': 25699, 'loss/train': 2.4617505073547363} +02/25/2022 10:52:47 - INFO - codeparrot_training - Step 25700: {'lr': 0.00025490842311515704, 'samples': 13158912, 'steps': 25700, 'loss/train': 2.340740203857422} +02/25/2022 10:52:50 - INFO - codeparrot_training - Step 25701: {'lr': 0.00025489206379693854, 'samples': 13159424, 'steps': 25701, 'loss/train': 1.5465506315231323} +02/25/2022 10:52:57 - INFO - codeparrot_training - Step 25702: {'lr': 0.0002548757044577638, 'samples': 13159936, 'steps': 25702, 'loss/train': 1.6311246156692505} +02/25/2022 10:53:00 - INFO - codeparrot_training - Step 25703: {'lr': 0.00025485934509770326, 'samples': 13160448, 'steps': 25703, 'loss/train': 2.338343620300293} +02/25/2022 10:53:05 - INFO - codeparrot_training - Step 25704: {'lr': 0.00025484298571682676, 'samples': 13160960, 'steps': 25704, 'loss/train': 1.9064362049102783} +02/25/2022 10:53:09 - INFO - codeparrot_training - Step 25705: {'lr': 0.00025482662631520444, 'samples': 13161472, 'steps': 25705, 'loss/train': 1.0886472463607788} +02/25/2022 10:53:15 - INFO - codeparrot_training - Step 25706: {'lr': 0.0002548102668929064, 'samples': 13161984, 'steps': 25706, 'loss/train': 1.0186115503311157} +02/25/2022 10:53:18 - INFO - codeparrot_training - Step 25707: {'lr': 0.0002547939074500027, 'samples': 13162496, 'steps': 25707, 'loss/train': 2.081252098083496} +02/25/2022 10:53:24 - INFO - codeparrot_training - Step 25708: {'lr': 0.0002547775479865634, 'samples': 13163008, 'steps': 25708, 'loss/train': 2.5716304779052734} +02/25/2022 10:53:27 - INFO - codeparrot_training - Step 25709: {'lr': 0.00025476118850265863, 'samples': 13163520, 'steps': 25709, 'loss/train': 2.8330962657928467} +02/25/2022 10:53:33 - INFO - codeparrot_training - Step 25710: {'lr': 0.0002547448289983584, 'samples': 13164032, 'steps': 25710, 'loss/train': 2.475098133087158} +02/25/2022 10:53:36 - INFO - codeparrot_training - Step 25711: {'lr': 0.00025472846947373286, 'samples': 13164544, 'steps': 25711, 'loss/train': 1.94191575050354} +02/25/2022 10:53:42 - INFO - codeparrot_training - Step 25712: {'lr': 0.0002547121099288521, 'samples': 13165056, 'steps': 25712, 'loss/train': 2.3849785327911377} +02/25/2022 10:53:46 - INFO - codeparrot_training - Step 25713: {'lr': 0.00025469575036378607, 'samples': 13165568, 'steps': 25713, 'loss/train': 0.8719168305397034} +02/25/2022 10:53:51 - INFO - codeparrot_training - Step 25714: {'lr': 0.00025467939077860496, 'samples': 13166080, 'steps': 25714, 'loss/train': 2.9024498462677} +02/25/2022 10:53:55 - INFO - codeparrot_training - Step 25715: {'lr': 0.0002546630311733789, 'samples': 13166592, 'steps': 25715, 'loss/train': 1.0577749013900757} +02/25/2022 10:54:00 - INFO - codeparrot_training - Step 25716: {'lr': 0.00025464667154817777, 'samples': 13167104, 'steps': 25716, 'loss/train': 2.32977032661438} +02/25/2022 10:54:04 - INFO - codeparrot_training - Step 25717: {'lr': 0.0002546303119030719, 'samples': 13167616, 'steps': 25717, 'loss/train': 2.5572292804718018} +02/25/2022 10:54:10 - INFO - codeparrot_training - Step 25718: {'lr': 0.00025461395223813117, 'samples': 13168128, 'steps': 25718, 'loss/train': 0.13165880739688873} +02/25/2022 10:54:13 - INFO - codeparrot_training - Step 25719: {'lr': 0.0002545975925534258, 'samples': 13168640, 'steps': 25719, 'loss/train': 2.654447078704834} +02/25/2022 10:54:19 - INFO - codeparrot_training - Step 25720: {'lr': 0.00025458123284902574, 'samples': 13169152, 'steps': 25720, 'loss/train': 2.6887803077697754} +02/25/2022 10:54:22 - INFO - codeparrot_training - Step 25721: {'lr': 0.00025456487312500116, 'samples': 13169664, 'steps': 25721, 'loss/train': 2.5403947830200195} +02/25/2022 10:54:28 - INFO - codeparrot_training - Step 25722: {'lr': 0.00025454851338142207, 'samples': 13170176, 'steps': 25722, 'loss/train': 2.2072603702545166} +02/25/2022 10:54:31 - INFO - codeparrot_training - Step 25723: {'lr': 0.0002545321536183586, 'samples': 13170688, 'steps': 25723, 'loss/train': 2.1591055393218994} +02/25/2022 10:54:37 - INFO - codeparrot_training - Step 25724: {'lr': 0.00025451579383588084, 'samples': 13171200, 'steps': 25724, 'loss/train': 1.5913097858428955} +02/25/2022 10:54:40 - INFO - codeparrot_training - Step 25725: {'lr': 0.00025449943403405896, 'samples': 13171712, 'steps': 25725, 'loss/train': 0.959050714969635} +02/25/2022 10:54:46 - INFO - codeparrot_training - Step 25726: {'lr': 0.00025448307421296277, 'samples': 13172224, 'steps': 25726, 'loss/train': 2.009406328201294} +02/25/2022 10:54:50 - INFO - codeparrot_training - Step 25727: {'lr': 0.0002544667143726626, 'samples': 13172736, 'steps': 25727, 'loss/train': 2.427276134490967} +02/25/2022 10:54:55 - INFO - codeparrot_training - Step 25728: {'lr': 0.0002544503545132284, 'samples': 13173248, 'steps': 25728, 'loss/train': 1.9406293630599976} +02/25/2022 10:54:59 - INFO - codeparrot_training - Step 25729: {'lr': 0.0002544339946347303, 'samples': 13173760, 'steps': 25729, 'loss/train': 2.9963250160217285} +02/25/2022 10:55:04 - INFO - codeparrot_training - Step 25730: {'lr': 0.00025441763473723846, 'samples': 13174272, 'steps': 25730, 'loss/train': 2.4323890209198} +02/25/2022 10:55:08 - INFO - codeparrot_training - Step 25731: {'lr': 0.0002544012748208228, 'samples': 13174784, 'steps': 25731, 'loss/train': 2.374079704284668} +02/25/2022 10:55:13 - INFO - codeparrot_training - Step 25732: {'lr': 0.0002543849148855534, 'samples': 13175296, 'steps': 25732, 'loss/train': 2.516752243041992} +02/25/2022 10:55:17 - INFO - codeparrot_training - Step 25733: {'lr': 0.0002543685549315006, 'samples': 13175808, 'steps': 25733, 'loss/train': 2.384999990463257} +02/25/2022 10:55:22 - INFO - codeparrot_training - Step 25734: {'lr': 0.0002543521949587341, 'samples': 13176320, 'steps': 25734, 'loss/train': 0.9762688279151917} +02/25/2022 10:55:26 - INFO - codeparrot_training - Step 25735: {'lr': 0.0002543358349673243, 'samples': 13176832, 'steps': 25735, 'loss/train': 1.6819815635681152} +02/25/2022 10:55:31 - INFO - codeparrot_training - Step 25736: {'lr': 0.00025431947495734117, 'samples': 13177344, 'steps': 25736, 'loss/train': 1.8829540014266968} +02/25/2022 10:55:35 - INFO - codeparrot_training - Step 25737: {'lr': 0.00025430311492885473, 'samples': 13177856, 'steps': 25737, 'loss/train': 2.006132125854492} +02/25/2022 10:55:41 - INFO - codeparrot_training - Step 25738: {'lr': 0.00025428675488193517, 'samples': 13178368, 'steps': 25738, 'loss/train': 1.673203706741333} +02/25/2022 10:55:44 - INFO - codeparrot_training - Step 25739: {'lr': 0.0002542703948166524, 'samples': 13178880, 'steps': 25739, 'loss/train': 1.9815729856491089} +02/25/2022 10:55:50 - INFO - codeparrot_training - Step 25740: {'lr': 0.00025425403473307675, 'samples': 13179392, 'steps': 25740, 'loss/train': 1.899364948272705} +02/25/2022 10:55:53 - INFO - codeparrot_training - Step 25741: {'lr': 0.00025423767463127807, 'samples': 13179904, 'steps': 25741, 'loss/train': 1.8814778327941895} +02/25/2022 10:55:59 - INFO - codeparrot_training - Step 25742: {'lr': 0.00025422131451132665, 'samples': 13180416, 'steps': 25742, 'loss/train': 2.028073310852051} +02/25/2022 10:56:02 - INFO - codeparrot_training - Step 25743: {'lr': 0.0002542049543732923, 'samples': 13180928, 'steps': 25743, 'loss/train': 2.6303043365478516} +02/25/2022 10:56:08 - INFO - codeparrot_training - Step 25744: {'lr': 0.00025418859421724537, 'samples': 13181440, 'steps': 25744, 'loss/train': 0.8574888110160828} +02/25/2022 10:56:11 - INFO - codeparrot_training - Step 25745: {'lr': 0.0002541722340432558, 'samples': 13181952, 'steps': 25745, 'loss/train': 2.7316718101501465} +02/25/2022 10:56:17 - INFO - codeparrot_training - Step 25746: {'lr': 0.00025415587385139364, 'samples': 13182464, 'steps': 25746, 'loss/train': 2.671398162841797} +02/25/2022 10:56:20 - INFO - codeparrot_training - Step 25747: {'lr': 0.00025413951364172915, 'samples': 13182976, 'steps': 25747, 'loss/train': 0.6429460048675537} +02/25/2022 10:56:26 - INFO - codeparrot_training - Step 25748: {'lr': 0.0002541231534143322, 'samples': 13183488, 'steps': 25748, 'loss/train': 2.8606956005096436} +02/25/2022 10:56:30 - INFO - codeparrot_training - Step 25749: {'lr': 0.0002541067931692731, 'samples': 13184000, 'steps': 25749, 'loss/train': 1.670526146888733} +02/25/2022 10:56:35 - INFO - codeparrot_training - Step 25750: {'lr': 0.0002540904329066217, 'samples': 13184512, 'steps': 25750, 'loss/train': 1.62129807472229} +02/25/2022 10:56:39 - INFO - codeparrot_training - Step 25751: {'lr': 0.0002540740726264482, 'samples': 13185024, 'steps': 25751, 'loss/train': 1.9914284944534302} +02/25/2022 10:56:44 - INFO - codeparrot_training - Step 25752: {'lr': 0.0002540577123288227, 'samples': 13185536, 'steps': 25752, 'loss/train': 1.3021748065948486} +02/25/2022 10:56:48 - INFO - codeparrot_training - Step 25753: {'lr': 0.0002540413520138152, 'samples': 13186048, 'steps': 25753, 'loss/train': 1.5482739210128784} +02/25/2022 10:56:54 - INFO - codeparrot_training - Step 25754: {'lr': 0.00025402499168149587, 'samples': 13186560, 'steps': 25754, 'loss/train': 0.9066489934921265} +02/25/2022 10:56:57 - INFO - codeparrot_training - Step 25755: {'lr': 0.00025400863133193477, 'samples': 13187072, 'steps': 25755, 'loss/train': 1.965298056602478} +02/25/2022 10:57:02 - INFO - codeparrot_training - Step 25756: {'lr': 0.0002539922709652019, 'samples': 13187584, 'steps': 25756, 'loss/train': 2.1001808643341064} +02/25/2022 10:57:06 - INFO - codeparrot_training - Step 25757: {'lr': 0.0002539759105813675, 'samples': 13188096, 'steps': 25757, 'loss/train': 2.9081852436065674} +02/25/2022 10:57:12 - INFO - codeparrot_training - Step 25758: {'lr': 0.0002539595501805014, 'samples': 13188608, 'steps': 25758, 'loss/train': 2.226444721221924} +02/25/2022 10:57:16 - INFO - codeparrot_training - Step 25759: {'lr': 0.000253943189762674, 'samples': 13189120, 'steps': 25759, 'loss/train': 2.1923131942749023} +02/25/2022 10:57:21 - INFO - codeparrot_training - Step 25760: {'lr': 0.0002539268293279552, 'samples': 13189632, 'steps': 25760, 'loss/train': 1.6481832265853882} +02/25/2022 10:57:25 - INFO - codeparrot_training - Step 25761: {'lr': 0.00025391046887641506, 'samples': 13190144, 'steps': 25761, 'loss/train': 2.0524251461029053} +02/25/2022 10:57:30 - INFO - codeparrot_training - Step 25762: {'lr': 0.00025389410840812374, 'samples': 13190656, 'steps': 25762, 'loss/train': 1.869503140449524} +02/25/2022 10:57:33 - INFO - codeparrot_training - Step 25763: {'lr': 0.0002538777479231513, 'samples': 13191168, 'steps': 25763, 'loss/train': 0.918232262134552} +02/25/2022 10:57:39 - INFO - codeparrot_training - Step 25764: {'lr': 0.0002538613874215679, 'samples': 13191680, 'steps': 25764, 'loss/train': 2.697035551071167} +02/25/2022 10:57:43 - INFO - codeparrot_training - Step 25765: {'lr': 0.0002538450269034434, 'samples': 13192192, 'steps': 25765, 'loss/train': 2.201368808746338} +02/25/2022 10:57:48 - INFO - codeparrot_training - Step 25766: {'lr': 0.0002538286663688481, 'samples': 13192704, 'steps': 25766, 'loss/train': 3.3679611682891846} +02/25/2022 10:57:52 - INFO - codeparrot_training - Step 25767: {'lr': 0.00025381230581785196, 'samples': 13193216, 'steps': 25767, 'loss/train': 2.0616753101348877} +02/25/2022 10:57:57 - INFO - codeparrot_training - Step 25768: {'lr': 0.00025379594525052517, 'samples': 13193728, 'steps': 25768, 'loss/train': 1.1646887063980103} +02/25/2022 10:58:01 - INFO - codeparrot_training - Step 25769: {'lr': 0.00025377958466693773, 'samples': 13194240, 'steps': 25769, 'loss/train': 0.4731895625591278} +02/25/2022 10:58:06 - INFO - codeparrot_training - Step 25770: {'lr': 0.0002537632240671598, 'samples': 13194752, 'steps': 25770, 'loss/train': 1.464052438735962} +02/25/2022 10:58:10 - INFO - codeparrot_training - Step 25771: {'lr': 0.0002537468634512613, 'samples': 13195264, 'steps': 25771, 'loss/train': 2.3432819843292236} +02/25/2022 10:58:16 - INFO - codeparrot_training - Step 25772: {'lr': 0.00025373050281931247, 'samples': 13195776, 'steps': 25772, 'loss/train': 1.3197146654129028} +02/25/2022 10:58:19 - INFO - codeparrot_training - Step 25773: {'lr': 0.0002537141421713834, 'samples': 13196288, 'steps': 25773, 'loss/train': 1.645582675933838} +02/25/2022 10:58:25 - INFO - codeparrot_training - Step 25774: {'lr': 0.00025369778150754406, 'samples': 13196800, 'steps': 25774, 'loss/train': 1.7776597738265991} +02/25/2022 10:58:28 - INFO - codeparrot_training - Step 25775: {'lr': 0.00025368142082786465, 'samples': 13197312, 'steps': 25775, 'loss/train': 1.6517839431762695} +02/25/2022 10:58:34 - INFO - codeparrot_training - Step 25776: {'lr': 0.0002536650601324152, 'samples': 13197824, 'steps': 25776, 'loss/train': 0.1663322001695633} +02/25/2022 10:58:37 - INFO - codeparrot_training - Step 25777: {'lr': 0.0002536486994212658, 'samples': 13198336, 'steps': 25777, 'loss/train': 1.4063405990600586} +02/25/2022 10:58:43 - INFO - codeparrot_training - Step 25778: {'lr': 0.00025363233869448647, 'samples': 13198848, 'steps': 25778, 'loss/train': 2.351147413253784} +02/25/2022 10:58:46 - INFO - codeparrot_training - Step 25779: {'lr': 0.00025361597795214743, 'samples': 13199360, 'steps': 25779, 'loss/train': 1.40060293674469} +02/25/2022 10:58:52 - INFO - codeparrot_training - Step 25780: {'lr': 0.0002535996171943186, 'samples': 13199872, 'steps': 25780, 'loss/train': 1.617052674293518} +02/25/2022 10:58:55 - INFO - codeparrot_training - Step 25781: {'lr': 0.0002535832564210703, 'samples': 13200384, 'steps': 25781, 'loss/train': 1.1847692728042603} +02/25/2022 10:59:01 - INFO - codeparrot_training - Step 25782: {'lr': 0.0002535668956324723, 'samples': 13200896, 'steps': 25782, 'loss/train': 0.6098423600196838} +02/25/2022 10:59:04 - INFO - codeparrot_training - Step 25783: {'lr': 0.0002535505348285949, 'samples': 13201408, 'steps': 25783, 'loss/train': 1.836410641670227} +02/25/2022 10:59:11 - INFO - codeparrot_training - Step 25784: {'lr': 0.0002535341740095082, 'samples': 13201920, 'steps': 25784, 'loss/train': 2.2008070945739746} +02/25/2022 10:59:14 - INFO - codeparrot_training - Step 25785: {'lr': 0.0002535178131752822, 'samples': 13202432, 'steps': 25785, 'loss/train': 1.6727392673492432} +02/25/2022 10:59:20 - INFO - codeparrot_training - Step 25786: {'lr': 0.000253501452325987, 'samples': 13202944, 'steps': 25786, 'loss/train': 0.9830268025398254} +02/25/2022 10:59:23 - INFO - codeparrot_training - Step 25787: {'lr': 0.0002534850914616926, 'samples': 13203456, 'steps': 25787, 'loss/train': 1.4031126499176025} +02/25/2022 10:59:29 - INFO - codeparrot_training - Step 25788: {'lr': 0.0002534687305824693, 'samples': 13203968, 'steps': 25788, 'loss/train': 1.7922031879425049} +02/25/2022 10:59:32 - INFO - codeparrot_training - Step 25789: {'lr': 0.00025345236968838704, 'samples': 13204480, 'steps': 25789, 'loss/train': 1.715986728668213} +02/25/2022 10:59:38 - INFO - codeparrot_training - Step 25790: {'lr': 0.0002534360087795159, 'samples': 13204992, 'steps': 25790, 'loss/train': 2.3838462829589844} +02/25/2022 10:59:41 - INFO - codeparrot_training - Step 25791: {'lr': 0.000253419647855926, 'samples': 13205504, 'steps': 25791, 'loss/train': 2.225346088409424} +02/25/2022 10:59:47 - INFO - codeparrot_training - Step 25792: {'lr': 0.0002534032869176874, 'samples': 13206016, 'steps': 25792, 'loss/train': 0.12485877424478531} +02/25/2022 10:59:50 - INFO - codeparrot_training - Step 25793: {'lr': 0.0002533869259648702, 'samples': 13206528, 'steps': 25793, 'loss/train': 1.1403968334197998} +02/25/2022 10:59:56 - INFO - codeparrot_training - Step 25794: {'lr': 0.00025337056499754446, 'samples': 13207040, 'steps': 25794, 'loss/train': 2.668804883956909} +02/25/2022 11:00:00 - INFO - codeparrot_training - Step 25795: {'lr': 0.0002533542040157803, 'samples': 13207552, 'steps': 25795, 'loss/train': 1.8052493333816528} +02/25/2022 11:00:06 - INFO - codeparrot_training - Step 25796: {'lr': 0.0002533378430196478, 'samples': 13208064, 'steps': 25796, 'loss/train': 1.5465325117111206} +02/25/2022 11:00:09 - INFO - codeparrot_training - Step 25797: {'lr': 0.0002533214820092171, 'samples': 13208576, 'steps': 25797, 'loss/train': 1.316029667854309} +02/25/2022 11:00:15 - INFO - codeparrot_training - Step 25798: {'lr': 0.0002533051209845581, 'samples': 13209088, 'steps': 25798, 'loss/train': 2.12951922416687} +02/25/2022 11:00:18 - INFO - codeparrot_training - Step 25799: {'lr': 0.0002532887599457411, 'samples': 13209600, 'steps': 25799, 'loss/train': 1.361382246017456} +02/25/2022 11:00:24 - INFO - codeparrot_training - Step 25800: {'lr': 0.0002532723988928361, 'samples': 13210112, 'steps': 25800, 'loss/train': 1.951144814491272} +02/25/2022 11:00:27 - INFO - codeparrot_training - Step 25801: {'lr': 0.0002532560378259132, 'samples': 13210624, 'steps': 25801, 'loss/train': 2.426379680633545} +02/25/2022 11:00:33 - INFO - codeparrot_training - Step 25802: {'lr': 0.0002532396767450424, 'samples': 13211136, 'steps': 25802, 'loss/train': 1.969315767288208} +02/25/2022 11:00:36 - INFO - codeparrot_training - Step 25803: {'lr': 0.00025322331565029395, 'samples': 13211648, 'steps': 25803, 'loss/train': 1.8718268871307373} +02/25/2022 11:00:43 - INFO - codeparrot_training - Step 25804: {'lr': 0.00025320695454173777, 'samples': 13212160, 'steps': 25804, 'loss/train': 2.588383197784424} +02/25/2022 11:00:46 - INFO - codeparrot_training - Step 25805: {'lr': 0.000253190593419444, 'samples': 13212672, 'steps': 25805, 'loss/train': 2.333868980407715} +02/25/2022 11:00:51 - INFO - codeparrot_training - Step 25806: {'lr': 0.0002531742322834828, 'samples': 13213184, 'steps': 25806, 'loss/train': 0.33307725191116333} +02/25/2022 11:00:55 - INFO - codeparrot_training - Step 25807: {'lr': 0.0002531578711339241, 'samples': 13213696, 'steps': 25807, 'loss/train': 2.445645332336426} +02/25/2022 11:01:00 - INFO - codeparrot_training - Step 25808: {'lr': 0.0002531415099708382, 'samples': 13214208, 'steps': 25808, 'loss/train': 1.2026604413986206} +02/25/2022 11:01:04 - INFO - codeparrot_training - Step 25809: {'lr': 0.000253125148794295, 'samples': 13214720, 'steps': 25809, 'loss/train': 1.6700819730758667} +02/25/2022 11:01:09 - INFO - codeparrot_training - Step 25810: {'lr': 0.0002531087876043647, 'samples': 13215232, 'steps': 25810, 'loss/train': 1.4728336334228516} +02/25/2022 11:01:13 - INFO - codeparrot_training - Step 25811: {'lr': 0.0002530924264011172, 'samples': 13215744, 'steps': 25811, 'loss/train': 0.7189762592315674} +02/25/2022 11:01:18 - INFO - codeparrot_training - Step 25812: {'lr': 0.00025307606518462297, 'samples': 13216256, 'steps': 25812, 'loss/train': 2.2786972522735596} +02/25/2022 11:01:22 - INFO - codeparrot_training - Step 25813: {'lr': 0.00025305970395495165, 'samples': 13216768, 'steps': 25813, 'loss/train': 1.4087247848510742} +02/25/2022 11:01:27 - INFO - codeparrot_training - Step 25814: {'lr': 0.00025304334271217363, 'samples': 13217280, 'steps': 25814, 'loss/train': 1.1288540363311768} +02/25/2022 11:01:31 - INFO - codeparrot_training - Step 25815: {'lr': 0.00025302698145635884, 'samples': 13217792, 'steps': 25815, 'loss/train': 1.194296956062317} +02/25/2022 11:01:36 - INFO - codeparrot_training - Step 25816: {'lr': 0.0002530106201875775, 'samples': 13218304, 'steps': 25816, 'loss/train': 2.638526678085327} +02/25/2022 11:01:40 - INFO - codeparrot_training - Step 25817: {'lr': 0.0002529942589058995, 'samples': 13218816, 'steps': 25817, 'loss/train': 1.6297551393508911} +02/25/2022 11:01:45 - INFO - codeparrot_training - Step 25818: {'lr': 0.0002529778976113952, 'samples': 13219328, 'steps': 25818, 'loss/train': 1.6843180656433105} +02/25/2022 11:01:49 - INFO - codeparrot_training - Step 25819: {'lr': 0.00025296153630413435, 'samples': 13219840, 'steps': 25819, 'loss/train': 1.806922197341919} +02/25/2022 11:01:55 - INFO - codeparrot_training - Step 25820: {'lr': 0.00025294517498418727, 'samples': 13220352, 'steps': 25820, 'loss/train': 1.7788069248199463} +02/25/2022 11:01:58 - INFO - codeparrot_training - Step 25821: {'lr': 0.00025292881365162413, 'samples': 13220864, 'steps': 25821, 'loss/train': 1.8311032056808472} +02/25/2022 11:02:04 - INFO - codeparrot_training - Step 25822: {'lr': 0.0002529124523065147, 'samples': 13221376, 'steps': 25822, 'loss/train': 1.8440948724746704} +02/25/2022 11:02:07 - INFO - codeparrot_training - Step 25823: {'lr': 0.0002528960909489294, 'samples': 13221888, 'steps': 25823, 'loss/train': 1.6727509498596191} +02/25/2022 11:02:13 - INFO - codeparrot_training - Step 25824: {'lr': 0.0002528797295789381, 'samples': 13222400, 'steps': 25824, 'loss/train': 1.6291556358337402} +02/25/2022 11:02:16 - INFO - codeparrot_training - Step 25825: {'lr': 0.000252863368196611, 'samples': 13222912, 'steps': 25825, 'loss/train': 1.479705810546875} +02/25/2022 11:02:22 - INFO - codeparrot_training - Step 25826: {'lr': 0.00025284700680201804, 'samples': 13223424, 'steps': 25826, 'loss/train': 1.7157994508743286} +02/25/2022 11:02:25 - INFO - codeparrot_training - Step 25827: {'lr': 0.0002528306453952295, 'samples': 13223936, 'steps': 25827, 'loss/train': 3.061528205871582} +02/25/2022 11:02:31 - INFO - codeparrot_training - Step 25828: {'lr': 0.00025281428397631536, 'samples': 13224448, 'steps': 25828, 'loss/train': 2.138091802597046} +02/25/2022 11:02:34 - INFO - codeparrot_training - Step 25829: {'lr': 0.0002527979225453457, 'samples': 13224960, 'steps': 25829, 'loss/train': 1.6424891948699951} +02/25/2022 11:02:41 - INFO - codeparrot_training - Step 25830: {'lr': 0.0002527815611023906, 'samples': 13225472, 'steps': 25830, 'loss/train': 2.46722412109375} +02/25/2022 11:02:44 - INFO - codeparrot_training - Step 25831: {'lr': 0.00025276519964752015, 'samples': 13225984, 'steps': 25831, 'loss/train': 1.6439441442489624} +02/25/2022 11:02:50 - INFO - codeparrot_training - Step 25832: {'lr': 0.00025274883818080456, 'samples': 13226496, 'steps': 25832, 'loss/train': 1.950255274772644} +02/25/2022 11:02:53 - INFO - codeparrot_training - Step 25833: {'lr': 0.0002527324767023138, 'samples': 13227008, 'steps': 25833, 'loss/train': 1.8935997486114502} +02/25/2022 11:02:59 - INFO - codeparrot_training - Step 25834: {'lr': 0.00025271611521211795, 'samples': 13227520, 'steps': 25834, 'loss/train': 2.3201212882995605} +02/25/2022 11:03:02 - INFO - codeparrot_training - Step 25835: {'lr': 0.0002526997537102871, 'samples': 13228032, 'steps': 25835, 'loss/train': 3.44230318069458} +02/25/2022 11:03:08 - INFO - codeparrot_training - Step 25836: {'lr': 0.0002526833921968914, 'samples': 13228544, 'steps': 25836, 'loss/train': 0.8415111899375916} +02/25/2022 11:03:11 - INFO - codeparrot_training - Step 25837: {'lr': 0.00025266703067200084, 'samples': 13229056, 'steps': 25837, 'loss/train': 1.803125262260437} +02/25/2022 11:03:17 - INFO - codeparrot_training - Step 25838: {'lr': 0.0002526506691356857, 'samples': 13229568, 'steps': 25838, 'loss/train': 1.0776190757751465} +02/25/2022 11:03:20 - INFO - codeparrot_training - Step 25839: {'lr': 0.00025263430758801583, 'samples': 13230080, 'steps': 25839, 'loss/train': 2.6610002517700195} +02/25/2022 11:03:26 - INFO - codeparrot_training - Step 25840: {'lr': 0.00025261794602906147, 'samples': 13230592, 'steps': 25840, 'loss/train': 2.604844331741333} +02/25/2022 11:03:30 - INFO - codeparrot_training - Step 25841: {'lr': 0.00025260158445889266, 'samples': 13231104, 'steps': 25841, 'loss/train': 2.0738120079040527} +02/25/2022 11:03:35 - INFO - codeparrot_training - Step 25842: {'lr': 0.00025258522287757945, 'samples': 13231616, 'steps': 25842, 'loss/train': 2.1464474201202393} +02/25/2022 11:03:39 - INFO - codeparrot_training - Step 25843: {'lr': 0.000252568861285192, 'samples': 13232128, 'steps': 25843, 'loss/train': 1.261229157447815} +02/25/2022 11:03:44 - INFO - codeparrot_training - Step 25844: {'lr': 0.00025255249968180035, 'samples': 13232640, 'steps': 25844, 'loss/train': 1.1351053714752197} +02/25/2022 11:03:48 - INFO - codeparrot_training - Step 25845: {'lr': 0.00025253613806747454, 'samples': 13233152, 'steps': 25845, 'loss/train': 1.0389478206634521} +02/25/2022 11:03:54 - INFO - codeparrot_training - Step 25846: {'lr': 0.0002525197764422848, 'samples': 13233664, 'steps': 25846, 'loss/train': 1.6159627437591553} +02/25/2022 11:03:57 - INFO - codeparrot_training - Step 25847: {'lr': 0.00025250341480630113, 'samples': 13234176, 'steps': 25847, 'loss/train': 1.8487048149108887} +02/25/2022 11:04:03 - INFO - codeparrot_training - Step 25848: {'lr': 0.0002524870531595936, 'samples': 13234688, 'steps': 25848, 'loss/train': 1.6222976446151733} +02/25/2022 11:04:06 - INFO - codeparrot_training - Step 25849: {'lr': 0.00025247069150223235, 'samples': 13235200, 'steps': 25849, 'loss/train': 1.6385929584503174} +02/25/2022 11:04:12 - INFO - codeparrot_training - Step 25850: {'lr': 0.0002524543298342875, 'samples': 13235712, 'steps': 25850, 'loss/train': 0.9132198691368103} +02/25/2022 11:04:16 - INFO - codeparrot_training - Step 25851: {'lr': 0.00025243796815582894, 'samples': 13236224, 'steps': 25851, 'loss/train': 1.9199039936065674} +02/25/2022 11:04:21 - INFO - codeparrot_training - Step 25852: {'lr': 0.000252421606466927, 'samples': 13236736, 'steps': 25852, 'loss/train': 2.31599497795105} +02/25/2022 11:04:25 - INFO - codeparrot_training - Step 25853: {'lr': 0.00025240524476765166, 'samples': 13237248, 'steps': 25853, 'loss/train': 2.0127487182617188} +02/25/2022 11:04:30 - INFO - codeparrot_training - Step 25854: {'lr': 0.00025238888305807294, 'samples': 13237760, 'steps': 25854, 'loss/train': 1.9955291748046875} +02/25/2022 11:04:34 - INFO - codeparrot_training - Step 25855: {'lr': 0.00025237252133826103, 'samples': 13238272, 'steps': 25855, 'loss/train': 3.0744364261627197} +02/25/2022 11:04:39 - INFO - codeparrot_training - Step 25856: {'lr': 0.0002523561596082861, 'samples': 13238784, 'steps': 25856, 'loss/train': 1.3415467739105225} +02/25/2022 11:04:43 - INFO - codeparrot_training - Step 25857: {'lr': 0.00025233979786821797, 'samples': 13239296, 'steps': 25857, 'loss/train': 1.6282848119735718} +02/25/2022 11:04:48 - INFO - codeparrot_training - Step 25858: {'lr': 0.00025232343611812695, 'samples': 13239808, 'steps': 25858, 'loss/train': 1.8890135288238525} +02/25/2022 11:04:52 - INFO - codeparrot_training - Step 25859: {'lr': 0.00025230707435808307, 'samples': 13240320, 'steps': 25859, 'loss/train': 1.5632286071777344} +02/25/2022 11:05:00 - INFO - codeparrot_training - Step 25860: {'lr': 0.0002522907125881565, 'samples': 13240832, 'steps': 25860, 'loss/train': 2.3817331790924072} +02/25/2022 11:05:03 - INFO - codeparrot_training - Step 25861: {'lr': 0.0002522743508084171, 'samples': 13241344, 'steps': 25861, 'loss/train': 1.6820285320281982} +02/25/2022 11:05:09 - INFO - codeparrot_training - Step 25862: {'lr': 0.0002522579890189352, 'samples': 13241856, 'steps': 25862, 'loss/train': 2.2996323108673096} +02/25/2022 11:05:12 - INFO - codeparrot_training - Step 25863: {'lr': 0.0002522416272197808, 'samples': 13242368, 'steps': 25863, 'loss/train': 1.5716899633407593} +02/25/2022 11:05:18 - INFO - codeparrot_training - Step 25864: {'lr': 0.0002522252654110239, 'samples': 13242880, 'steps': 25864, 'loss/train': 1.6403475999832153} +02/25/2022 11:05:22 - INFO - codeparrot_training - Step 25865: {'lr': 0.00025220890359273473, 'samples': 13243392, 'steps': 25865, 'loss/train': 2.276691198348999} +02/25/2022 11:05:27 - INFO - codeparrot_training - Step 25866: {'lr': 0.00025219254176498334, 'samples': 13243904, 'steps': 25866, 'loss/train': 3.263749122619629} +02/25/2022 11:05:31 - INFO - codeparrot_training - Step 25867: {'lr': 0.0002521761799278398, 'samples': 13244416, 'steps': 25867, 'loss/train': 0.7852783799171448} +02/25/2022 11:05:38 - INFO - codeparrot_training - Step 25868: {'lr': 0.0002521598180813741, 'samples': 13244928, 'steps': 25868, 'loss/train': 2.3589694499969482} +02/25/2022 11:05:41 - INFO - codeparrot_training - Step 25869: {'lr': 0.0002521434562256565, 'samples': 13245440, 'steps': 25869, 'loss/train': 0.21988695859909058} +02/25/2022 11:05:47 - INFO - codeparrot_training - Step 25870: {'lr': 0.000252127094360757, 'samples': 13245952, 'steps': 25870, 'loss/train': 1.8397248983383179} +02/25/2022 11:05:51 - INFO - codeparrot_training - Step 25871: {'lr': 0.00025211073248674573, 'samples': 13246464, 'steps': 25871, 'loss/train': 1.4425493478775024} +02/25/2022 11:05:57 - INFO - codeparrot_training - Step 25872: {'lr': 0.00025209437060369266, 'samples': 13246976, 'steps': 25872, 'loss/train': 2.2448723316192627} +02/25/2022 11:06:00 - INFO - codeparrot_training - Step 25873: {'lr': 0.0002520780087116681, 'samples': 13247488, 'steps': 25873, 'loss/train': 1.781081199645996} +02/25/2022 11:06:04 - INFO - codeparrot_training - Step 25874: {'lr': 0.0002520616468107419, 'samples': 13248000, 'steps': 25874, 'loss/train': 0.39445847272872925} +02/25/2022 11:06:09 - INFO - codeparrot_training - Step 25875: {'lr': 0.0002520452849009843, 'samples': 13248512, 'steps': 25875, 'loss/train': 1.5589098930358887} +02/25/2022 11:06:13 - INFO - codeparrot_training - Step 25876: {'lr': 0.0002520289229824653, 'samples': 13249024, 'steps': 25876, 'loss/train': 1.2916417121887207} +02/25/2022 11:06:19 - INFO - codeparrot_training - Step 25877: {'lr': 0.0002520125610552552, 'samples': 13249536, 'steps': 25877, 'loss/train': 2.270116090774536} +02/25/2022 11:06:22 - INFO - codeparrot_training - Step 25878: {'lr': 0.0002519961991194238, 'samples': 13250048, 'steps': 25878, 'loss/train': 1.5044080018997192} +02/25/2022 11:06:28 - INFO - codeparrot_training - Step 25879: {'lr': 0.0002519798371750413, 'samples': 13250560, 'steps': 25879, 'loss/train': 2.329695224761963} +02/25/2022 11:06:31 - INFO - codeparrot_training - Step 25880: {'lr': 0.0002519634752221778, 'samples': 13251072, 'steps': 25880, 'loss/train': 2.5209453105926514} +02/25/2022 11:06:38 - INFO - codeparrot_training - Step 25881: {'lr': 0.00025194711326090346, 'samples': 13251584, 'steps': 25881, 'loss/train': 2.630035638809204} +02/25/2022 11:06:42 - INFO - codeparrot_training - Step 25882: {'lr': 0.0002519307512912883, 'samples': 13252096, 'steps': 25882, 'loss/train': 1.6330238580703735} +02/25/2022 11:06:47 - INFO - codeparrot_training - Step 25883: {'lr': 0.00025191438931340237, 'samples': 13252608, 'steps': 25883, 'loss/train': 1.349502444267273} +02/25/2022 11:06:51 - INFO - codeparrot_training - Step 25884: {'lr': 0.0002518980273273159, 'samples': 13253120, 'steps': 25884, 'loss/train': 2.3931283950805664} +02/25/2022 11:06:56 - INFO - codeparrot_training - Step 25885: {'lr': 0.00025188166533309875, 'samples': 13253632, 'steps': 25885, 'loss/train': 2.9504313468933105} +02/25/2022 11:07:00 - INFO - codeparrot_training - Step 25886: {'lr': 0.0002518653033308212, 'samples': 13254144, 'steps': 25886, 'loss/train': 1.3233942985534668} +02/25/2022 11:07:06 - INFO - codeparrot_training - Step 25887: {'lr': 0.0002518489413205533, 'samples': 13254656, 'steps': 25887, 'loss/train': 2.4223709106445312} +02/25/2022 11:07:09 - INFO - codeparrot_training - Step 25888: {'lr': 0.00025183257930236515, 'samples': 13255168, 'steps': 25888, 'loss/train': 2.405268430709839} +02/25/2022 11:07:15 - INFO - codeparrot_training - Step 25889: {'lr': 0.00025181621727632673, 'samples': 13255680, 'steps': 25889, 'loss/train': 0.924758791923523} +02/25/2022 11:07:18 - INFO - codeparrot_training - Step 25890: {'lr': 0.0002517998552425083, 'samples': 13256192, 'steps': 25890, 'loss/train': 1.5506850481033325} +02/25/2022 11:07:25 - INFO - codeparrot_training - Step 25891: {'lr': 0.00025178349320097984, 'samples': 13256704, 'steps': 25891, 'loss/train': 0.4020494818687439} +02/25/2022 11:07:29 - INFO - codeparrot_training - Step 25892: {'lr': 0.00025176713115181143, 'samples': 13257216, 'steps': 25892, 'loss/train': 2.080244779586792} +02/25/2022 11:07:35 - INFO - codeparrot_training - Step 25893: {'lr': 0.0002517507690950733, 'samples': 13257728, 'steps': 25893, 'loss/train': 2.1497750282287598} +02/25/2022 11:07:38 - INFO - codeparrot_training - Step 25894: {'lr': 0.0002517344070308353, 'samples': 13258240, 'steps': 25894, 'loss/train': 4.769745349884033} +02/25/2022 11:07:44 - INFO - codeparrot_training - Step 25895: {'lr': 0.0002517180449591677, 'samples': 13258752, 'steps': 25895, 'loss/train': 1.1782888174057007} +02/25/2022 11:07:47 - INFO - codeparrot_training - Step 25896: {'lr': 0.00025170168288014046, 'samples': 13259264, 'steps': 25896, 'loss/train': 1.297042727470398} +02/25/2022 11:07:53 - INFO - codeparrot_training - Step 25897: {'lr': 0.0002516853207938238, 'samples': 13259776, 'steps': 25897, 'loss/train': 1.8245781660079956} +02/25/2022 11:07:56 - INFO - codeparrot_training - Step 25898: {'lr': 0.0002516689587002878, 'samples': 13260288, 'steps': 25898, 'loss/train': 2.8517932891845703} +02/25/2022 11:08:02 - INFO - codeparrot_training - Step 25899: {'lr': 0.00025165259659960256, 'samples': 13260800, 'steps': 25899, 'loss/train': 1.672888994216919} +02/25/2022 11:08:05 - INFO - codeparrot_training - Step 25900: {'lr': 0.00025163623449183796, 'samples': 13261312, 'steps': 25900, 'loss/train': 2.4763741493225098} +02/25/2022 11:08:11 - INFO - codeparrot_training - Step 25901: {'lr': 0.00025161987237706443, 'samples': 13261824, 'steps': 25901, 'loss/train': 0.5543332099914551} +02/25/2022 11:08:14 - INFO - codeparrot_training - Step 25902: {'lr': 0.00025160351025535173, 'samples': 13262336, 'steps': 25902, 'loss/train': 0.34986764192581177} +02/25/2022 11:08:20 - INFO - codeparrot_training - Step 25903: {'lr': 0.0002515871481267702, 'samples': 13262848, 'steps': 25903, 'loss/train': 2.4872703552246094} +02/25/2022 11:08:23 - INFO - codeparrot_training - Step 25904: {'lr': 0.00025157078599138976, 'samples': 13263360, 'steps': 25904, 'loss/train': 1.520838737487793} +02/25/2022 11:08:29 - INFO - codeparrot_training - Step 25905: {'lr': 0.00025155442384928057, 'samples': 13263872, 'steps': 25905, 'loss/train': 2.6658027172088623} +02/25/2022 11:08:32 - INFO - codeparrot_training - Step 25906: {'lr': 0.0002515380617005128, 'samples': 13264384, 'steps': 25906, 'loss/train': 2.430997848510742} +02/25/2022 11:08:40 - INFO - codeparrot_training - Step 25907: {'lr': 0.0002515216995451564, 'samples': 13264896, 'steps': 25907, 'loss/train': 0.6682925820350647} +02/25/2022 11:08:43 - INFO - codeparrot_training - Step 25908: {'lr': 0.0002515053373832816, 'samples': 13265408, 'steps': 25908, 'loss/train': 1.8046756982803345} +02/25/2022 11:08:49 - INFO - codeparrot_training - Step 25909: {'lr': 0.0002514889752149583, 'samples': 13265920, 'steps': 25909, 'loss/train': 0.4461532533168793} +02/25/2022 11:08:52 - INFO - codeparrot_training - Step 25910: {'lr': 0.00025147261304025683, 'samples': 13266432, 'steps': 25910, 'loss/train': 2.2284903526306152} +02/25/2022 11:08:58 - INFO - codeparrot_training - Step 25911: {'lr': 0.000251456250859247, 'samples': 13266944, 'steps': 25911, 'loss/train': 2.7277920246124268} +02/25/2022 11:09:01 - INFO - codeparrot_training - Step 25912: {'lr': 0.00025143988867199916, 'samples': 13267456, 'steps': 25912, 'loss/train': 4.894824981689453} +02/25/2022 11:09:07 - INFO - codeparrot_training - Step 25913: {'lr': 0.00025142352647858324, 'samples': 13267968, 'steps': 25913, 'loss/train': 1.703282356262207} +02/25/2022 11:09:10 - INFO - codeparrot_training - Step 25914: {'lr': 0.00025140716427906945, 'samples': 13268480, 'steps': 25914, 'loss/train': 2.0290658473968506} +02/25/2022 11:09:16 - INFO - codeparrot_training - Step 25915: {'lr': 0.00025139080207352776, 'samples': 13268992, 'steps': 25915, 'loss/train': 1.3233006000518799} +02/25/2022 11:09:19 - INFO - codeparrot_training - Step 25916: {'lr': 0.00025137443986202827, 'samples': 13269504, 'steps': 25916, 'loss/train': 1.7200279235839844} +02/25/2022 11:09:27 - INFO - codeparrot_training - Step 25917: {'lr': 0.00025135807764464123, 'samples': 13270016, 'steps': 25917, 'loss/train': 2.1949801445007324} +02/25/2022 11:09:30 - INFO - codeparrot_training - Step 25918: {'lr': 0.0002513417154214366, 'samples': 13270528, 'steps': 25918, 'loss/train': 0.7022582292556763} +02/25/2022 11:09:36 - INFO - codeparrot_training - Step 25919: {'lr': 0.0002513253531924844, 'samples': 13271040, 'steps': 25919, 'loss/train': 3.2486822605133057} +02/25/2022 11:09:39 - INFO - codeparrot_training - Step 25920: {'lr': 0.0002513089909578549, 'samples': 13271552, 'steps': 25920, 'loss/train': 1.7495293617248535} +02/25/2022 11:09:43 - INFO - codeparrot_training - Step 25921: {'lr': 0.0002512926287176181, 'samples': 13272064, 'steps': 25921, 'loss/train': 1.521016001701355} +02/25/2022 11:09:48 - INFO - codeparrot_training - Step 25922: {'lr': 0.000251276266471844, 'samples': 13272576, 'steps': 25922, 'loss/train': 1.9212186336517334} +02/25/2022 11:09:52 - INFO - codeparrot_training - Step 25923: {'lr': 0.00025125990422060284, 'samples': 13273088, 'steps': 25923, 'loss/train': 2.0351526737213135} +02/25/2022 11:09:57 - INFO - codeparrot_training - Step 25924: {'lr': 0.00025124354196396465, 'samples': 13273600, 'steps': 25924, 'loss/train': 1.6816900968551636} +02/25/2022 11:10:03 - INFO - codeparrot_training - Step 25925: {'lr': 0.00025122717970199956, 'samples': 13274112, 'steps': 25925, 'loss/train': 2.1449434757232666} +02/25/2022 11:10:07 - INFO - codeparrot_training - Step 25926: {'lr': 0.0002512108174347775, 'samples': 13274624, 'steps': 25926, 'loss/train': 2.3340272903442383} +02/25/2022 11:10:10 - INFO - codeparrot_training - Step 25927: {'lr': 0.0002511944551623688, 'samples': 13275136, 'steps': 25927, 'loss/train': 1.4073207378387451} +02/25/2022 11:10:17 - INFO - codeparrot_training - Step 25928: {'lr': 0.00025117809288484333, 'samples': 13275648, 'steps': 25928, 'loss/train': 1.188170075416565} +02/25/2022 11:10:21 - INFO - codeparrot_training - Step 25929: {'lr': 0.0002511617306022713, 'samples': 13276160, 'steps': 25929, 'loss/train': 2.3164262771606445} +02/25/2022 11:10:26 - INFO - codeparrot_training - Step 25930: {'lr': 0.0002511453683147229, 'samples': 13276672, 'steps': 25930, 'loss/train': 1.4057706594467163} +02/25/2022 11:10:30 - INFO - codeparrot_training - Step 25931: {'lr': 0.000251129006022268, 'samples': 13277184, 'steps': 25931, 'loss/train': 1.5364357233047485} +02/25/2022 11:10:35 - INFO - codeparrot_training - Step 25932: {'lr': 0.00025111264372497686, 'samples': 13277696, 'steps': 25932, 'loss/train': 2.583714008331299} +02/25/2022 11:10:39 - INFO - codeparrot_training - Step 25933: {'lr': 0.0002510962814229195, 'samples': 13278208, 'steps': 25933, 'loss/train': 2.0228497982025146} +02/25/2022 11:10:44 - INFO - codeparrot_training - Step 25934: {'lr': 0.000251079919116166, 'samples': 13278720, 'steps': 25934, 'loss/train': 0.3589111864566803} +02/25/2022 11:10:48 - INFO - codeparrot_training - Step 25935: {'lr': 0.00025106355680478646, 'samples': 13279232, 'steps': 25935, 'loss/train': 1.6998690366744995} +02/25/2022 11:10:54 - INFO - codeparrot_training - Step 25936: {'lr': 0.000251047194488851, 'samples': 13279744, 'steps': 25936, 'loss/train': 2.191589117050171} +02/25/2022 11:10:57 - INFO - codeparrot_training - Step 25937: {'lr': 0.0002510308321684297, 'samples': 13280256, 'steps': 25937, 'loss/train': 2.3912060260772705} +02/25/2022 11:11:03 - INFO - codeparrot_training - Step 25938: {'lr': 0.0002510144698435926, 'samples': 13280768, 'steps': 25938, 'loss/train': 1.7736722230911255} +02/25/2022 11:11:06 - INFO - codeparrot_training - Step 25939: {'lr': 0.00025099810751440986, 'samples': 13281280, 'steps': 25939, 'loss/train': 1.5722779035568237} +02/25/2022 11:11:14 - INFO - codeparrot_training - Step 25940: {'lr': 0.0002509817451809515, 'samples': 13281792, 'steps': 25940, 'loss/train': 1.761673092842102} +02/25/2022 11:11:17 - INFO - codeparrot_training - Step 25941: {'lr': 0.00025096538284328775, 'samples': 13282304, 'steps': 25941, 'loss/train': 2.2593841552734375} +02/25/2022 11:11:23 - INFO - codeparrot_training - Step 25942: {'lr': 0.0002509490205014885, 'samples': 13282816, 'steps': 25942, 'loss/train': 1.6592916250228882} +02/25/2022 11:11:26 - INFO - codeparrot_training - Step 25943: {'lr': 0.00025093265815562406, 'samples': 13283328, 'steps': 25943, 'loss/train': 0.317325621843338} +02/25/2022 11:11:32 - INFO - codeparrot_training - Step 25944: {'lr': 0.00025091629580576433, 'samples': 13283840, 'steps': 25944, 'loss/train': 1.5023730993270874} +02/25/2022 11:11:35 - INFO - codeparrot_training - Step 25945: {'lr': 0.00025089993345197946, 'samples': 13284352, 'steps': 25945, 'loss/train': 1.326545238494873} +02/25/2022 11:11:41 - INFO - codeparrot_training - Step 25946: {'lr': 0.0002508835710943396, 'samples': 13284864, 'steps': 25946, 'loss/train': 2.2905256748199463} +02/25/2022 11:11:44 - INFO - codeparrot_training - Step 25947: {'lr': 0.00025086720873291485, 'samples': 13285376, 'steps': 25947, 'loss/train': 0.7675917148590088} +02/25/2022 11:11:49 - INFO - codeparrot_training - Step 25948: {'lr': 0.0002508508463677752, 'samples': 13285888, 'steps': 25948, 'loss/train': 2.64031720161438} +02/25/2022 11:11:53 - INFO - codeparrot_training - Step 25949: {'lr': 0.0002508344839989908, 'samples': 13286400, 'steps': 25949, 'loss/train': 1.8297836780548096} +02/25/2022 11:12:00 - INFO - codeparrot_training - Step 25950: {'lr': 0.0002508181216266317, 'samples': 13286912, 'steps': 25950, 'loss/train': 1.6454377174377441} +02/25/2022 11:12:04 - INFO - codeparrot_training - Step 25951: {'lr': 0.0002508017592507681, 'samples': 13287424, 'steps': 25951, 'loss/train': 1.8232076168060303} +02/25/2022 11:12:09 - INFO - codeparrot_training - Step 25952: {'lr': 0.0002507853968714699, 'samples': 13287936, 'steps': 25952, 'loss/train': 3.2949485778808594} +02/25/2022 11:12:13 - INFO - codeparrot_training - Step 25953: {'lr': 0.0002507690344888074, 'samples': 13288448, 'steps': 25953, 'loss/train': 0.8985562324523926} +02/25/2022 11:12:19 - INFO - codeparrot_training - Step 25954: {'lr': 0.00025075267210285056, 'samples': 13288960, 'steps': 25954, 'loss/train': 1.9168519973754883} +02/25/2022 11:12:22 - INFO - codeparrot_training - Step 25955: {'lr': 0.0002507363097136696, 'samples': 13289472, 'steps': 25955, 'loss/train': 2.2396509647369385} +02/25/2022 11:12:28 - INFO - codeparrot_training - Step 25956: {'lr': 0.0002507199473213344, 'samples': 13289984, 'steps': 25956, 'loss/train': 1.6588709354400635} +02/25/2022 11:12:31 - INFO - codeparrot_training - Step 25957: {'lr': 0.00025070358492591523, 'samples': 13290496, 'steps': 25957, 'loss/train': 1.9297422170639038} +02/25/2022 11:12:37 - INFO - codeparrot_training - Step 25958: {'lr': 0.00025068722252748215, 'samples': 13291008, 'steps': 25958, 'loss/train': 1.6260007619857788} +02/25/2022 11:12:40 - INFO - codeparrot_training - Step 25959: {'lr': 0.0002506708601261052, 'samples': 13291520, 'steps': 25959, 'loss/train': 3.115778923034668} +02/25/2022 11:12:46 - INFO - codeparrot_training - Step 25960: {'lr': 0.00025065449772185456, 'samples': 13292032, 'steps': 25960, 'loss/train': 2.6047306060791016} +02/25/2022 11:12:49 - INFO - codeparrot_training - Step 25961: {'lr': 0.0002506381353148002, 'samples': 13292544, 'steps': 25961, 'loss/train': 2.872133493423462} +02/25/2022 11:12:55 - INFO - codeparrot_training - Step 25962: {'lr': 0.00025062177290501226, 'samples': 13293056, 'steps': 25962, 'loss/train': 1.9665627479553223} +02/25/2022 11:12:58 - INFO - codeparrot_training - Step 25963: {'lr': 0.0002506054104925608, 'samples': 13293568, 'steps': 25963, 'loss/train': 2.273334503173828} +02/25/2022 11:13:06 - INFO - codeparrot_training - Step 25964: {'lr': 0.00025058904807751604, 'samples': 13294080, 'steps': 25964, 'loss/train': 2.1211843490600586} +02/25/2022 11:13:09 - INFO - codeparrot_training - Step 25965: {'lr': 0.00025057268565994794, 'samples': 13294592, 'steps': 25965, 'loss/train': 1.1610602140426636} +02/25/2022 11:13:15 - INFO - codeparrot_training - Step 25966: {'lr': 0.00025055632323992666, 'samples': 13295104, 'steps': 25966, 'loss/train': 1.9810941219329834} +02/25/2022 11:13:18 - INFO - codeparrot_training - Step 25967: {'lr': 0.00025053996081752226, 'samples': 13295616, 'steps': 25967, 'loss/train': 2.5076656341552734} +02/25/2022 11:13:24 - INFO - codeparrot_training - Step 25968: {'lr': 0.0002505235983928048, 'samples': 13296128, 'steps': 25968, 'loss/train': 1.9034626483917236} +02/25/2022 11:13:27 - INFO - codeparrot_training - Step 25969: {'lr': 0.00025050723596584454, 'samples': 13296640, 'steps': 25969, 'loss/train': 1.7650463581085205} +02/25/2022 11:13:33 - INFO - codeparrot_training - Step 25970: {'lr': 0.0002504908735367113, 'samples': 13297152, 'steps': 25970, 'loss/train': 3.1255433559417725} +02/25/2022 11:13:36 - INFO - codeparrot_training - Step 25971: {'lr': 0.0002504745111054754, 'samples': 13297664, 'steps': 25971, 'loss/train': 1.3024663925170898} +02/25/2022 11:13:42 - INFO - codeparrot_training - Step 25972: {'lr': 0.00025045814867220677, 'samples': 13298176, 'steps': 25972, 'loss/train': 1.6699652671813965} +02/25/2022 11:13:46 - INFO - codeparrot_training - Step 25973: {'lr': 0.00025044178623697565, 'samples': 13298688, 'steps': 25973, 'loss/train': 2.245182991027832} +02/25/2022 11:13:53 - INFO - codeparrot_training - Step 25974: {'lr': 0.000250425423799852, 'samples': 13299200, 'steps': 25974, 'loss/train': 1.8966944217681885} +02/25/2022 11:13:56 - INFO - codeparrot_training - Step 25975: {'lr': 0.00025040906136090604, 'samples': 13299712, 'steps': 25975, 'loss/train': 1.5828367471694946} +02/25/2022 11:14:02 - INFO - codeparrot_training - Step 25976: {'lr': 0.00025039269892020773, 'samples': 13300224, 'steps': 25976, 'loss/train': 1.1790169477462769} +02/25/2022 11:14:05 - INFO - codeparrot_training - Step 25977: {'lr': 0.0002503763364778273, 'samples': 13300736, 'steps': 25977, 'loss/train': 1.0677714347839355} +02/25/2022 11:14:11 - INFO - codeparrot_training - Step 25978: {'lr': 0.0002503599740338347, 'samples': 13301248, 'steps': 25978, 'loss/train': 2.010186195373535} +02/25/2022 11:14:14 - INFO - codeparrot_training - Step 25979: {'lr': 0.00025034361158830006, 'samples': 13301760, 'steps': 25979, 'loss/train': 1.9555803537368774} +02/25/2022 11:14:20 - INFO - codeparrot_training - Step 25980: {'lr': 0.0002503272491412936, 'samples': 13302272, 'steps': 25980, 'loss/train': 3.3068318367004395} +02/25/2022 11:14:24 - INFO - codeparrot_training - Step 25981: {'lr': 0.00025031088669288514, 'samples': 13302784, 'steps': 25981, 'loss/train': 1.796302318572998} +02/25/2022 11:14:29 - INFO - codeparrot_training - Step 25982: {'lr': 0.00025029452424314507, 'samples': 13303296, 'steps': 25982, 'loss/train': 1.937954068183899} +02/25/2022 11:14:33 - INFO - codeparrot_training - Step 25983: {'lr': 0.0002502781617921433, 'samples': 13303808, 'steps': 25983, 'loss/train': 1.954081416130066} +02/25/2022 11:14:38 - INFO - codeparrot_training - Step 25984: {'lr': 0.00025026179933995, 'samples': 13304320, 'steps': 25984, 'loss/train': 1.7263826131820679} +02/25/2022 11:14:42 - INFO - codeparrot_training - Step 25985: {'lr': 0.0002502454368866352, 'samples': 13304832, 'steps': 25985, 'loss/train': 0.9804202318191528} +02/25/2022 11:14:49 - INFO - codeparrot_training - Step 25986: {'lr': 0.0002502290744322691, 'samples': 13305344, 'steps': 25986, 'loss/train': 1.898422360420227} +02/25/2022 11:14:52 - INFO - codeparrot_training - Step 25987: {'lr': 0.00025021271197692163, 'samples': 13305856, 'steps': 25987, 'loss/train': 1.9202721118927002} +02/25/2022 11:14:58 - INFO - codeparrot_training - Step 25988: {'lr': 0.000250196349520663, 'samples': 13306368, 'steps': 25988, 'loss/train': 2.741056442260742} +02/25/2022 11:15:01 - INFO - codeparrot_training - Step 25989: {'lr': 0.0002501799870635633, 'samples': 13306880, 'steps': 25989, 'loss/train': 1.5168230533599854} +02/25/2022 11:15:07 - INFO - codeparrot_training - Step 25990: {'lr': 0.00025016362460569255, 'samples': 13307392, 'steps': 25990, 'loss/train': 0.5687598586082458} +02/25/2022 11:15:11 - INFO - codeparrot_training - Step 25991: {'lr': 0.0002501472621471209, 'samples': 13307904, 'steps': 25991, 'loss/train': 0.9630624651908875} +02/25/2022 11:15:16 - INFO - codeparrot_training - Step 25992: {'lr': 0.0002501308996879184, 'samples': 13308416, 'steps': 25992, 'loss/train': 1.777406930923462} +02/25/2022 11:15:20 - INFO - codeparrot_training - Step 25993: {'lr': 0.00025011453722815526, 'samples': 13308928, 'steps': 25993, 'loss/train': 1.7278110980987549} +02/25/2022 11:15:25 - INFO - codeparrot_training - Step 25994: {'lr': 0.0002500981747679014, 'samples': 13309440, 'steps': 25994, 'loss/train': 2.7779786586761475} +02/25/2022 11:15:28 - INFO - codeparrot_training - Step 25995: {'lr': 0.00025008181230722705, 'samples': 13309952, 'steps': 25995, 'loss/train': 3.2408502101898193} +02/25/2022 11:15:36 - INFO - codeparrot_training - Step 25996: {'lr': 0.00025006544984620217, 'samples': 13310464, 'steps': 25996, 'loss/train': 2.555023193359375} +02/25/2022 11:15:39 - INFO - codeparrot_training - Step 25997: {'lr': 0.00025004908738489697, 'samples': 13310976, 'steps': 25997, 'loss/train': 0.3586549162864685} +02/25/2022 11:15:45 - INFO - codeparrot_training - Step 25998: {'lr': 0.0002500327249233815, 'samples': 13311488, 'steps': 25998, 'loss/train': 1.2565412521362305} +02/25/2022 11:15:48 - INFO - codeparrot_training - Step 25999: {'lr': 0.0002500163624617258, 'samples': 13312000, 'steps': 25999, 'loss/train': 1.7097930908203125} +02/25/2022 11:15:48 - INFO - codeparrot_training - Evaluating and saving model checkpoint